ctags/asm.c

388 lines
7.9 KiB
C
Raw Normal View History

/*
* $Id$
*
* Copyright (c) 2000-2003, Darren Hiebert
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for generating tags for assembly language
* files.
*/
/*
* INCLUDE FILES
*/
#include "general.h" /* must always come first */
#include <string.h>
#include "debug.h"
#include "keyword.h"
#include "parse.h"
#include "read.h"
#include "routines.h"
#include "vstring.h"
/*
* DATA DECLARATIONS
*/
typedef enum {
K_NONE = -1, K_DEFINE, K_LABEL, K_MACRO, K_TYPE
} AsmKind;
typedef enum {
OP_UNDEFINED = -1,
OP_ALIGN,
OP_COLON_EQUAL,
OP_END,
OP_ENDM,
OP_ENDMACRO,
OP_ENDP,
OP_ENDS,
OP_EQU,
OP_EQUAL,
OP_LABEL,
OP_MACRO,
OP_PROC,
OP_RECORD,
OP_SECTIONS,
OP_SET,
OP_STRUCT,
OP_LAST
} opKeyword;
typedef struct {
const char *operator;
opKeyword keyword;
} asmKeyword;
typedef struct {
opKeyword keyword;
AsmKind kind;
} opKind;
/*
* DATA DEFINITIONS
*/
static langType Lang_asm;
static kindOption AsmKinds [] = {
{ TRUE, 'd', "define", "defines" },
{ TRUE, 'l', "label", "labels" },
{ TRUE, 'm', "macro", "macros" },
{ TRUE, 't', "type", "types (structs and records)" }
};
static const asmKeyword AsmKeywords [] = {
{ "align", OP_ALIGN },
{ "endmacro", OP_ENDMACRO },
{ "endm", OP_ENDM },
{ "end", OP_END },
{ "endp", OP_ENDP },
{ "ends", OP_ENDS },
{ "equ", OP_EQU },
{ "label", OP_LABEL },
{ "macro", OP_MACRO },
{ ":=", OP_COLON_EQUAL },
{ "=", OP_EQUAL },
{ "proc", OP_PROC },
{ "record", OP_RECORD },
{ "sections", OP_SECTIONS },
{ "set", OP_SET },
{ "struct", OP_STRUCT }
};
static const opKind OpKinds [] = {
/* must be ordered same as opKeyword enumeration */
{ OP_ALIGN, K_NONE },
{ OP_COLON_EQUAL, K_DEFINE },
{ OP_END, K_NONE },
{ OP_ENDM, K_NONE },
{ OP_ENDMACRO, K_NONE },
{ OP_ENDP, K_NONE },
{ OP_ENDS, K_NONE },
{ OP_EQU, K_DEFINE },
{ OP_EQUAL, K_DEFINE },
{ OP_LABEL, K_LABEL },
{ OP_MACRO, K_MACRO },
{ OP_PROC, K_LABEL },
{ OP_RECORD, K_TYPE },
{ OP_SECTIONS, K_NONE },
{ OP_SET, K_DEFINE },
{ OP_STRUCT, K_TYPE }
};
/*
* FUNCTION DEFINITIONS
*/
static void buildAsmKeywordHash (void)
{
const size_t count = sizeof (AsmKeywords) / sizeof (AsmKeywords [0]);
size_t i;
for (i = 0 ; i < count ; ++i)
{
const asmKeyword* const p = AsmKeywords + i;
addKeyword (p->operator, Lang_asm, (int) p->keyword);
}
}
static opKeyword analyzeOperator (const vString *const op)
{
trunk/args.c: trunk/asm.c: trunk/beta.c: trunk/eiffel.c: trunk/fortran.c: trunk/jscript.c: trunk/lregex.c: trunk/main.c: trunk/options.c: trunk/pascal.c: trunk/read.c: trunk/routines.c: trunk/routines.h: trunk/sml.c: trunk/sql.c: fix almost all our current memory leaks. Based on a patch from Dmitry Antipov, but also using his vString leak detector and valgrind(1) to find new ones and ones he missed. Three known leaks remain. The first is in parseLongOption. There's also one in "fortran.c" and another in "sql.c": helium:~/Projects/ctags/trunk$ valgrind --leak-check=full --show-reachable=yes ./dctags -f - Test/* ==3056== Memcheck, a memory error detector. ==3056== Copyright (C) 2002-2006, and GNU GPL'd, by Julian Seward et al. ==3056== Using LibVEX rev 1658, a library for dynamic binary translation. ==3056== Copyright (C) 2004-2006, and GNU GPL'd, by OpenWorks LLP. ==3056== Using valgrind-3.2.1-Debian, a dynamic binary instrumentation framework. ==3056== Copyright (C) 2000-2006, and GNU GPL'd, by Julian Seward et al. ==3056== For more details, rerun with: -v ==3056== . . . ==3056== ==3056== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 13 from 1) ==3056== malloc/free: in use at exit: 708 bytes in 22 blocks. ==3056== malloc/free: 36,126 allocs, 36,104 frees, 1,584,216 bytes allocated. ==3056== For counts of detected errors, rerun with: -v ==3056== searching for pointers to 22 not-freed blocks. ==3056== checked 68,184 bytes. ==3056== ==3056== 68 bytes in 2 blocks are definitely lost in loss record 1 of 2 ==3056== at 0x4021620: malloc (vg_replace_malloc.c:149) ==3056== by 0x806347E: eMalloc (routines.c:238) ==3056== by 0x8065B68: newToken (sql.c:347) ==3056== by 0x80662BB: parseSubProgram (sql.c:688) ==3056== by 0x8067867: parseSqlFile (sql.c:1782) ==3056== by 0x8067934: findSqlTags (sql.c:1810) ==3056== by 0x8060760: createTagsForFile (parse.c:618) ==3056== by 0x8060810: createTagsWithFallback (parse.c:640) ==3056== by 0x80608DC: parseFile (parse.c:667) ==3056== by 0x805B7D6: createTagsForEntry (main.c:303) ==3056== by 0x805B811: createTagsForArgs (main.c:348) ==3056== by 0x805BD6F: makeTags (main.c:494) ==3056== ==3056== ==3056== 640 bytes in 20 blocks are still reachable in loss record 2 of 2 ==3056== at 0x4021620: malloc (vg_replace_malloc.c:149) ==3056== by 0x806347E: eMalloc (routines.c:238) ==3056== by 0x806AEE2: vStringNew (vstring.c:116) ==3056== by 0x80542C0: newToken (fortran.c:419) ==3056== by 0x8054309: newTokenFrom (fortran.c:429) ==3056== by 0x80562E5: parseInterfaceBlock (fortran.c:1709) ==3056== by 0x805661D: parseDeclarationConstruct (fortran.c:1834) ==3056== by 0x805679F: parseSpecificationPart (fortran.c:1901) ==3056== by 0x80569F5: parseModule (fortran.c:1990) ==3056== by 0x8056E05: parseProgramUnit (fortran.c:2142) ==3056== by 0x8056F37: findFortranTags (fortran.c:2183) ==3056== by 0x806077A: createTagsForFile (parse.c:620) ==3056== ==3056== LEAK SUMMARY: ==3056== definitely lost: 68 bytes in 2 blocks. ==3056== possibly lost: 0 bytes in 0 blocks. ==3056== still reachable: 640 bytes in 20 blocks. ==3056== suppressed: 0 bytes in 0 blocks. I think they're both awkward longjmp(3)/setjmp(3)-related leaks, and I don't currently have a good solution. ("eiffel.c" cunningly only calls newToken once, before calling setjmp(3).) git-svn-id: svn://svn.code.sf.net/p/ctags/code/trunk@536 c5d04d22-be80-434c-894e-aa346cc9e8e8
2007-06-02 06:09:00 +00:00
vString *keyword = vStringNew ();
opKeyword result;
vStringCopyToLower (keyword, op);
result = (opKeyword) lookupKeyword (vStringValue (keyword), Lang_asm);
trunk/args.c: trunk/asm.c: trunk/beta.c: trunk/eiffel.c: trunk/fortran.c: trunk/jscript.c: trunk/lregex.c: trunk/main.c: trunk/options.c: trunk/pascal.c: trunk/read.c: trunk/routines.c: trunk/routines.h: trunk/sml.c: trunk/sql.c: fix almost all our current memory leaks. Based on a patch from Dmitry Antipov, but also using his vString leak detector and valgrind(1) to find new ones and ones he missed. Three known leaks remain. The first is in parseLongOption. There's also one in "fortran.c" and another in "sql.c": helium:~/Projects/ctags/trunk$ valgrind --leak-check=full --show-reachable=yes ./dctags -f - Test/* ==3056== Memcheck, a memory error detector. ==3056== Copyright (C) 2002-2006, and GNU GPL'd, by Julian Seward et al. ==3056== Using LibVEX rev 1658, a library for dynamic binary translation. ==3056== Copyright (C) 2004-2006, and GNU GPL'd, by OpenWorks LLP. ==3056== Using valgrind-3.2.1-Debian, a dynamic binary instrumentation framework. ==3056== Copyright (C) 2000-2006, and GNU GPL'd, by Julian Seward et al. ==3056== For more details, rerun with: -v ==3056== . . . ==3056== ==3056== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 13 from 1) ==3056== malloc/free: in use at exit: 708 bytes in 22 blocks. ==3056== malloc/free: 36,126 allocs, 36,104 frees, 1,584,216 bytes allocated. ==3056== For counts of detected errors, rerun with: -v ==3056== searching for pointers to 22 not-freed blocks. ==3056== checked 68,184 bytes. ==3056== ==3056== 68 bytes in 2 blocks are definitely lost in loss record 1 of 2 ==3056== at 0x4021620: malloc (vg_replace_malloc.c:149) ==3056== by 0x806347E: eMalloc (routines.c:238) ==3056== by 0x8065B68: newToken (sql.c:347) ==3056== by 0x80662BB: parseSubProgram (sql.c:688) ==3056== by 0x8067867: parseSqlFile (sql.c:1782) ==3056== by 0x8067934: findSqlTags (sql.c:1810) ==3056== by 0x8060760: createTagsForFile (parse.c:618) ==3056== by 0x8060810: createTagsWithFallback (parse.c:640) ==3056== by 0x80608DC: parseFile (parse.c:667) ==3056== by 0x805B7D6: createTagsForEntry (main.c:303) ==3056== by 0x805B811: createTagsForArgs (main.c:348) ==3056== by 0x805BD6F: makeTags (main.c:494) ==3056== ==3056== ==3056== 640 bytes in 20 blocks are still reachable in loss record 2 of 2 ==3056== at 0x4021620: malloc (vg_replace_malloc.c:149) ==3056== by 0x806347E: eMalloc (routines.c:238) ==3056== by 0x806AEE2: vStringNew (vstring.c:116) ==3056== by 0x80542C0: newToken (fortran.c:419) ==3056== by 0x8054309: newTokenFrom (fortran.c:429) ==3056== by 0x80562E5: parseInterfaceBlock (fortran.c:1709) ==3056== by 0x805661D: parseDeclarationConstruct (fortran.c:1834) ==3056== by 0x805679F: parseSpecificationPart (fortran.c:1901) ==3056== by 0x80569F5: parseModule (fortran.c:1990) ==3056== by 0x8056E05: parseProgramUnit (fortran.c:2142) ==3056== by 0x8056F37: findFortranTags (fortran.c:2183) ==3056== by 0x806077A: createTagsForFile (parse.c:620) ==3056== ==3056== LEAK SUMMARY: ==3056== definitely lost: 68 bytes in 2 blocks. ==3056== possibly lost: 0 bytes in 0 blocks. ==3056== still reachable: 640 bytes in 20 blocks. ==3056== suppressed: 0 bytes in 0 blocks. I think they're both awkward longjmp(3)/setjmp(3)-related leaks, and I don't currently have a good solution. ("eiffel.c" cunningly only calls newToken once, before calling setjmp(3).) git-svn-id: svn://svn.code.sf.net/p/ctags/code/trunk@536 c5d04d22-be80-434c-894e-aa346cc9e8e8
2007-06-02 06:09:00 +00:00
vStringDelete (keyword);
return result;
}
static boolean isInitialSymbolCharacter (int c)
{
return (boolean) (c != '\0' && (isalpha (c) || strchr ("_$", c) != NULL));
}
static boolean isSymbolCharacter (int c)
{
/* '?' character is allowed in AMD 29K family */
return (boolean) (c != '\0' && (isalnum (c) || strchr ("_$?", c) != NULL));
}
static boolean readPreProc (const unsigned char *const line)
{
boolean result;
const unsigned char *cp = line;
vString *name = vStringNew ();
while (isSymbolCharacter ((int) *cp))
{
vStringPut (name, *cp);
++cp;
}
vStringTerminate (name);
result = (boolean) (strcmp (vStringValue (name), "define") == 0);
if (result)
{
while (isspace ((int) *cp))
++cp;
vStringClear (name);
while (isSymbolCharacter ((int) *cp))
{
vStringPut (name, *cp);
++cp;
}
vStringTerminate (name);
makeSimpleTag (name, AsmKinds, K_DEFINE);
}
vStringDelete (name);
return result;
}
static AsmKind operatorKind (
const vString *const operator,
boolean *const found)
{
AsmKind result = K_NONE;
const opKeyword kw = analyzeOperator (operator);
*found = (boolean) (kw != OP_UNDEFINED);
if (*found)
{
result = OpKinds [kw].kind;
Assert (OpKinds [kw].keyword == kw);
}
return result;
}
/* We must check for "DB", "DB.L", "DCB.W" (68000)
*/
static boolean isDefineOperator (const vString *const operator)
{
const unsigned char *const op =
(unsigned char*) vStringValue (operator);
const size_t length = vStringLength (operator);
const boolean result = (boolean) (length > 0 &&
toupper ((int) *op) == 'D' &&
(length == 2 ||
(length == 4 && (int) op [2] == '.') ||
(length == 5 && (int) op [3] == '.')));
return result;
}
static void makeAsmTag (
const vString *const name,
const vString *const operator,
const boolean labelCandidate,
const boolean nameFollows)
{
if (vStringLength (name) > 0)
{
boolean found;
const AsmKind kind = operatorKind (operator, &found);
if (found)
{
if (kind != K_NONE)
makeSimpleTag (name, AsmKinds, kind);
}
else if (isDefineOperator (operator))
{
if (! nameFollows)
makeSimpleTag (name, AsmKinds, K_DEFINE);
}
else if (labelCandidate)
{
operatorKind (name, &found);
if (! found)
makeSimpleTag (name, AsmKinds, K_LABEL);
}
}
}
static const unsigned char *readSymbol (
const unsigned char *const start,
vString *const sym)
{
const unsigned char *cp = start;
vStringClear (sym);
if (isInitialSymbolCharacter ((int) *cp))
{
while (isSymbolCharacter ((int) *cp))
{
vStringPut (sym, *cp);
++cp;
}
vStringTerminate (sym);
}
return cp;
}
static const unsigned char *readOperator (
const unsigned char *const start,
vString *const operator)
{
const unsigned char *cp = start;
vStringClear (operator);
while (*cp != '\0' && ! isspace ((int) *cp))
{
vStringPut (operator, *cp);
++cp;
}
vStringTerminate (operator);
return cp;
}
static void findAsmTags (void)
{
vString *name = vStringNew ();
vString *operator = vStringNew ();
const unsigned char *line;
boolean inCComment = FALSE;
while ((line = fileReadLine ()) != NULL)
{
const unsigned char *cp = line;
boolean labelCandidate = (boolean) (! isspace ((int) *cp));
boolean nameFollows = FALSE;
const boolean isComment = (boolean)
(*cp != '\0' && strchr (";*@", *cp) != NULL);
/* skip comments */
if (strncmp ((const char*) cp, "/*", (size_t) 2) == 0)
{
inCComment = TRUE;
cp += 2;
}
if (inCComment)
{
do
{
if (strncmp ((const char*) cp, "*/", (size_t) 2) == 0)
{
inCComment = FALSE;
cp += 2;
break;
}
++cp;
} while (*cp != '\0');
}
if (isComment || inCComment)
continue;
/* read preprocessor defines */
if (*cp == '#')
{
++cp;
readPreProc (cp);
continue;
}
/* skip white space */
while (isspace ((int) *cp))
++cp;
/* read symbol */
cp = readSymbol (cp, name);
if (vStringLength (name) > 0 && *cp == ':')
{
labelCandidate = TRUE;
++cp;
}
if (! isspace ((int) *cp) && *cp != '\0')
continue;
/* skip white space */
while (isspace ((int) *cp))
++cp;
/* skip leading dot */
#if 0
if (*cp == '.')
++cp;
#endif
cp = readOperator (cp, operator);
/* attempt second read of symbol */
if (vStringLength (name) == 0)
{
while (isspace ((int) *cp))
++cp;
cp = readSymbol (cp, name);
nameFollows = TRUE;
}
makeAsmTag (name, operator, labelCandidate, nameFollows);
}
vStringDelete (name);
vStringDelete (operator);
}
static void initialize (const langType language)
{
Lang_asm = language;
buildAsmKeywordHash ();
}
extern parserDefinition* AsmParser (void)
{
static const char *const extensions [] = {
"asm", "ASM", "s", "S", NULL
};
static const char *const patterns [] = {
"*.A51",
"*.29[kK]",
"*.[68][68][kKsSxX]",
"*.[xX][68][68]",
NULL
};
parserDefinition* def = parserNew ("Asm");
def->kinds = AsmKinds;
def->kindCount = KIND_COUNT (AsmKinds);
def->extensions = extensions;
def->patterns = patterns;
def->parser = findAsmTags;
def->initialize = initialize;
return def;
}
/* vi:set tabstop=4 shiftwidth=4: */