Another of changes to the Javascript parser courtesy of Columban Wendling.

Fix a questionable use of the "const" type qualifier resulting
        mostly in improper lines numbers with some parsers (seen in the
        JavaScript parser).  Details in the patch, but basically the
        compilers may "improperly" optimize the content of the File
        global (seen with GCC and CLang when optimizing).


        Set the tag's scope rather than including it in the tag name.
        (Geany bug #3570192 [3])


        Just an optimization actually -- but also somewhat works around
        what 0002 above properly fixes: less calls means less room for
        the values to be improperly cached.

        
        Patch from an user, fixes generation of function tags inside
        methods.  (Geany bug #3571233 [4]).

        Fix for unbraced if/else, and adds more tests to 1880687.js
        (Geany bug #3568542 [5]).

        Just to make the parseJfFile() code a little simpler.

        Fix findCmdTerm() when it encounters an open curly.

        Fix some more unterminated statements problems, still in
        1880687.js.

        Lowercase "object" isn't actually a JavaScript keyword.
        (Geany bug #3036476 [6])

        Properly handle properties values.
        (Geany bug #3470609 [7])

        Generate class tag for variables for which we generated
        children tags.

	This one includes the tests for
	0005-JavaScript-Fix-recognizing-functions-inside-methods.patch
	and also fixes the scope of the tags this patch added -- proof
	that writing test cases is indeed better than simply testing
	against existing code :)


NOTE: This following tests are broken with these changes.
    Test\jsFunc_tutorial.js
         D4 function breaks the parser.

    Test\simple.js
        var invalidInnerFunction = function(a,b) {}
        A function tag was generated for this tag.

        Actually there are quite a few differences with
        simple.js, I am not certain whether the changes
        are correct or not.  My javascript knowledge is 
        not that complete.




git-svn-id: svn://svn.code.sf.net/p/ctags/code/trunk@797 c5d04d22-be80-434c-894e-aa346cc9e8e8
This commit is contained in:
dfishburn 2012-11-07 01:40:08 +00:00
parent 3d8fff34b0
commit 00e6482df8
12 changed files with 420 additions and 100 deletions

View File

@ -1,5 +1,7 @@
// This file should generate the following tags:
// classes
// Test.RPC
// methods
// Test.RPC.asyncMethod
// Test.RPC.asyncRequest

View File

@ -1,5 +1,7 @@
// This file should generate the following tags:
//
// classes
// objectLiteral
// methods
// objectLiteral.objLiteralMethod
// properties
@ -20,6 +22,3 @@ objLiteralProperty : 1,
objLiteralMethod : function(){}
}
// When I run ctags on this (ctags -f - test.js) I get no output. I expect it
// to give me something for both "objectLiteral" and "objLiteralMethod".

View File

@ -1,3 +1,11 @@
// Tags should include:
// functions
// my_function
// classes
// RE
// global variables
// foo
//
// This should handle the escaped quote
var RE={/foo\"/: "bar"};

View File

@ -20,6 +20,24 @@
// l
// m
// n
// o
// p
// q
// r
// s
// t
// u
// v
// w
// w.w_sub1
// w.w_sub2
//
// classes
// MyClass
//
// methods
// MyClass.MyClass_sub1
// MyClass.MyClass_sub2
function a(flag){
if(flag)
@ -48,6 +66,12 @@ function e(flag){
function f(){
var b= 33;
if (foo)
bar();
else
test(2);
function baz() {
}
}
function g(flag){
@ -95,4 +119,74 @@ function n(){
var b= 33;
}
if (1)
l();
function o(){
var b= 33;
}
if (1){
l();
}
function p(){
var b= 33;
}
if (1){
l();
} else
l();
function q(){
var b= 33;
}
function r(flag){
if (flag) {
value = 33
}
}
function s(){
var b= 33;
}
function t(flag){
if (flag) {
b= new Object()
}
}
function u(flag){
if (flag) {
b= ({})
}
}
function v(flag){
if (flag) {
b= {}
}
}
function w(){
function w_sub1(x){
if (! x)
x = {foo:bar};
var dummy1, dummy2;
}
function w_sub2(){
}
}
MyClass = {
MyClass_sub1: function(x){
if (! x)
x = { };
var dummy3, dummy4;
},
MyClass_sub2: function(x){
var dummy5 = 42;
}
};

View File

@ -7,9 +7,9 @@
*/
function f1() {
var str = 'This function will be listed.';
var str = 'This function will be listed.';
}
function f2() {
var str = 'This function won\'t.';
var str = 'This function won\'t.';
}

20
Test/bug3036476.js Normal file
View File

@ -0,0 +1,20 @@
/*
* "Lowercase "object" isn't a keyword"
*
* ctags -f - bug3036476.js should output:
*
* variables:
* container
*
* functions:
* container.object
* container.object.method1
* container.object.method2
*/
var container = {};
container.object = function() {}
container.object.method1 = function() {}
container.object.method2 = function() {}

49
Test/bug3470609.js Normal file
View File

@ -0,0 +1,49 @@
/*
* Test for properties values. Everything is valid here and must be
* correctly parsed.
*
* Output of ctags -f - 3470609.js should be:
*
* Properties:
* root.array
* root.decimal
* root.id
* root.neg
* root.parentheses
* root.string
* root.subObject.subProperty
*
* Classes:
* root
* root.subObject
*
* Methods:
* root.method
* root.subObject.subFunction
*
* Functions:
* f
*/
var root = {
'string' : 'hello world',
'method' : function() {
x = 42;
},
'id' : 1,
'neg' : -1,
'decimal' : 1.3,
'subObject' : {
'subProperty': 42,
'subFunction': function() {
y = 43;
}
},
'array' : [1, 2, 3],
'parentheses' : (2 * (2 + 3))
}
function f() {
}

56
Test/bug3571233.js Normal file
View File

@ -0,0 +1,56 @@
/*
* "Functions nested inside methods show improper scope with the parent method
* being reported as "function""
*
* ctags -f - bug3571233.js should output:
*
* classes
* MyClass
*
* methods
* MyClass.method2
*
* functions
* MyClass.method2.nestedFunction1
* MyClass.method2.nestedFunction2
* function1
* function1.nestedFunction3
* function2
* function2.nestedFunction4
* function2.nestedFunction5
*
*
* Note that MyClass is shown both as a class and as a function (the parser
* discovers it actually is a class only later on). This isn't really easy to
* fix because a JavaScript function is only a class if it happen to be used as
* one, for example it has prototypes.
*/
function MyClass() {
}
MyClass.prototype.method2 = function() {
// these functions have improper scope
function nestedFunction1() {
}
function nestedFunction2() {
}
};
// following work fine, just here as a reference
function function1() {
function nestedFunction3() {
}
};
function2 = function() {
function nestedFunction4() {
}
function nestedFunction5() {
}
};

View File

@ -103,7 +103,6 @@ alert(D3(3,4)); // produces 7
// Example D4
var D4=new Function("a", "b",
"alert" + // chop string using "+"
"('adding '+a+' and ' +b);\ // separate string using "\"
return a+b;");
alert(D4(3,4)); // produces 7

38
Test/regexp.js Normal file
View File

@ -0,0 +1,38 @@
/*
* ctags should return the following for parsing this file using:
* ctags -f - simple.js
*
* variables:
* no_re1
* no_re2
* no_re3
* no_re4
* no_re5
* no_re6
* re1
* re2
* re3
* re4
* re5
* re6
* str1
* str2
*/
var no_re1 = 1 / 2;
var no_re2 = 1 + (1 + 2) / 3;
var no_re3 = 1 + {0:1}[0] / 2;
var no_re4 = 1 + {0:1} / 8; // gives NaN
var no_re5 = "foo" / 2; // so does this
var no_re6 = no_re1 / 2;
var re1 = /foo/;
var re2 = /\//;
var re3 = /[/]/;
var re4 = /'/;
var re5 = /["'/]/;
var re6 = /\(([a-z]*_)+/;
var str1 = "a/b/c".replace(/\//g, '-');
var str2 = "Hello".replace(/O/ig, 'O');

View File

@ -7,7 +7,7 @@
* D2A
* theAdd
* variables
* global
* my_global_var1
*/
function D1(a, b)
{

241
jscript.c
View File

@ -58,7 +58,6 @@ typedef enum eKeywordId {
KEYWORD_NONE = -1,
KEYWORD_function,
KEYWORD_capital_function,
KEYWORD_object,
KEYWORD_capital_object,
KEYWORD_prototype,
KEYWORD_var,
@ -101,7 +100,8 @@ typedef enum eTokenType {
TOKEN_EQUAL_SIGN,
TOKEN_FORWARD_SLASH,
TOKEN_OPEN_SQUARE,
TOKEN_CLOSE_SQUARE
TOKEN_CLOSE_SQUARE,
TOKEN_REGEXP
} tokenType;
typedef struct sTokenInfo {
@ -119,6 +119,8 @@ typedef struct sTokenInfo {
* DATA DEFINITIONS
*/
static tokenType LastTokenType;
static langType Lang_js;
static jmp_buf Exception;
@ -144,7 +146,6 @@ static const keywordDesc JsKeywordTable [] = {
/* keyword keyword ID */
{ "function", KEYWORD_function },
{ "Function", KEYWORD_capital_function },
{ "object", KEYWORD_object },
{ "Object", KEYWORD_capital_object },
{ "prototype", KEYWORD_prototype },
{ "var", KEYWORD_var },
@ -339,6 +340,32 @@ static void parseString (vString *const string, const int delimiter)
vStringTerminate (string);
}
static void parseRegExp (void)
{
int c;
boolean in_range = FALSE;
do
{
c = fileGetc ();
if (! in_range && c == '/')
{
do /* skip flags */
{
c = fileGetc ();
} while (isalpha (c));
fileUngetc (c);
break;
}
else if (c == '\\')
c = fileGetc (); /* skip next character */
else if (c == '[')
in_range = TRUE;
else if (c == ']')
in_range = FALSE;
} while (c != EOF);
}
/* Read a C identifier beginning with "firstChar" and places it into
* "name".
*/
@ -414,6 +441,25 @@ getNextChar:
{
token->type = TOKEN_FORWARD_SLASH;
fileUngetc (d);
switch (LastTokenType)
{
case TOKEN_CHARACTER:
case TOKEN_KEYWORD:
case TOKEN_IDENTIFIER:
case TOKEN_STRING:
case TOKEN_CLOSE_CURLY:
case TOKEN_CLOSE_PAREN:
case TOKEN_CLOSE_SQUARE:
token->type = TOKEN_FORWARD_SLASH;
break;
default:
token->type = TOKEN_REGEXP;
parseRegExp ();
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
break;
}
}
else
{
@ -455,6 +501,8 @@ getNextChar:
}
break;
}
LastTokenType = token->type;
}
static void copyToken (tokenInfo *const dest, tokenInfo *const src)
@ -574,6 +622,7 @@ static void findCmdTerm (tokenInfo *const token)
if ( isType (token, TOKEN_OPEN_CURLY))
{
parseBlock (token, token);
readToken (token);
}
else if ( isType (token, TOKEN_OPEN_PAREN) )
{
@ -789,36 +838,9 @@ static boolean parseIf (tokenInfo *const token)
{
findCmdTerm (token);
/*
* The IF could be followed by an ELSE statement.
* This too could have two formats, a curly braced
* multiline section, or another single line.
*/
if (isType (token, TOKEN_CLOSE_CURLY))
{
/*
* This statement did not have a line terminator.
*/
read_next_token = FALSE;
}
else
{
readToken (token);
if (isType (token, TOKEN_CLOSE_CURLY))
{
/*
* This statement did not have a line terminator.
*/
read_next_token = FALSE;
}
else
{
if (isKeyword (token, KEYWORD_else))
read_next_token = parseIf (token);
}
}
/* The next token should only be read if this statement had its own
* terminator */
read_next_token = isType (token, TOKEN_SEMICOLON);
}
return read_next_token;
}
@ -838,17 +860,14 @@ static void parseFunction (tokenInfo *const token)
addToScope(name, token->scope);
readToken (token);
if (isType (token, TOKEN_PERIOD))
while (isType (token, TOKEN_PERIOD))
{
do
readToken (token);
if ( isKeyword(token, KEYWORD_NONE) )
{
addContext (name, token);
readToken (token);
if ( isKeyword(token, KEYWORD_NONE) )
{
addContext (name, token);
readToken (token);
}
} while (isType (token, TOKEN_PERIOD));
}
}
if ( isType (token, TOKEN_OPEN_PAREN) )
@ -1022,15 +1041,43 @@ static boolean parseMethods (tokenInfo *const token, tokenInfo *const class)
}
else
{
vString * saveScope = vStringNew ();
boolean has_child_methods = FALSE;
/* skip whatever is the value */
while (! isType (token, TOKEN_COMMA) &&
! isType (token, TOKEN_CLOSE_CURLY))
{
if (isType (token, TOKEN_OPEN_CURLY))
{
/* Recurse to find child properties/methods */
vStringCopy (saveScope, token->scope);
addToScope (token, class->string);
has_child_methods = parseMethods (token, name);
vStringCopy (token->scope, saveScope);
readToken (token);
}
else if (isType (token, TOKEN_OPEN_PAREN))
{
skipArgumentList (token);
}
else if (isType (token, TOKEN_OPEN_SQUARE))
{
skipArrayList (token);
}
else
{
readToken (token);
}
}
vStringDelete (saveScope);
has_methods = TRUE;
addToScope (name, class->string);
makeJsTag (name, JSTAG_PROPERTY);
/*
* Read the next token, if a comma
* we must loop again
*/
readToken (token);
if (has_child_methods)
makeJsTag (name, JSTAG_CLASS);
else
makeJsTag (name, JSTAG_PROPERTY);
}
}
}
@ -1048,6 +1095,7 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
{
tokenInfo *const name = newToken ();
tokenInfo *const secondary_name = newToken ();
tokenInfo *const method_body_token = newToken ();
vString * saveScope = vStringNew ();
boolean is_class = FALSE;
boolean is_terminated = TRUE;
@ -1150,7 +1198,7 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
* CASE 1
* Specified function name: "build"
* BindAgent.prototype.build = function( mode ) {
* ignore everything within this function
* maybe parse nested functions
* }
*
* CASE 2
@ -1178,20 +1226,24 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
{
vStringCopy(saveScope, token->scope);
addToScope(token, name->string);
makeJsTag (token, JSTAG_METHOD);
/*
* We can read until the end of the block / statement.
* We need to correctly parse any nested blocks, but
* we do NOT want to create any tags based on what is
* within the blocks.
*/
token->ignoreTag = TRUE;
/*
* Find to the end of the statement
*/
findCmdTerm (token);
token->ignoreTag = FALSE;
readToken (method_body_token);
vStringCopy (method_body_token->scope, token->scope);
while (! ( isType (method_body_token, TOKEN_SEMICOLON) ||
isType (method_body_token, TOKEN_CLOSE_CURLY) ||
isType (method_body_token, TOKEN_OPEN_CURLY)) )
{
if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
skipArgumentList(method_body_token);
else
readToken (method_body_token);
}
if ( isType (method_body_token, TOKEN_OPEN_CURLY))
parseBlock (method_body_token, token);
is_terminated = TRUE;
goto cleanUp;
}
@ -1356,6 +1408,8 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
makeJsTag (name, JSTAG_METHOD);
parseBlock (token, name);
}
else if (isType (token, TOKEN_CLOSE_CURLY))
is_terminated = FALSE;
}
else if (isType (token, TOKEN_OPEN_CURLY))
{
@ -1369,7 +1423,9 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
* var z = {};
*/
has_methods = parseMethods(token, name);
if ( ! has_methods )
if (has_methods)
makeJsTag (name, JSTAG_CLASS);
else
{
/*
* Only create variables for global scope
@ -1427,11 +1483,9 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
readToken (token);
if ( isKeyword (token, KEYWORD_function) ||
isKeyword (token, KEYWORD_capital_function) ||
isKeyword (token, KEYWORD_object) ||
isKeyword (token, KEYWORD_capital_object) )
{
if ( isKeyword (token, KEYWORD_object) ||
isKeyword (token, KEYWORD_capital_object) )
if ( isKeyword (token, KEYWORD_capital_object) )
is_class = TRUE;
readToken (token);
@ -1450,6 +1504,8 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
}
}
}
else if (isType (token, TOKEN_CLOSE_CURLY))
is_terminated = FALSE;
}
}
else if (isKeyword (token, KEYWORD_NONE))
@ -1495,28 +1551,34 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
}
}
}
findCmdTerm (token);
/*
* Statements can be optionally terminated in the case of
* statement prior to a close curly brace as in the
* document.write line below:
*
* function checkForUpdate() {
* if( 1==1 ) {
* document.write("hello from checkForUpdate<br>")
* }
* return 1;
* }
*/
if ( ! is_terminated && isType (token, TOKEN_CLOSE_CURLY))
is_terminated = FALSE;
/* if we aren't already at the cmd end, advance to it and check whether
* the statement was terminated */
if (! isType (token, TOKEN_CLOSE_CURLY) &&
! isType (token, TOKEN_SEMICOLON))
{
findCmdTerm (token);
/*
* Statements can be optionally terminated in the case of
* statement prior to a close curly brace as in the
* document.write line below:
*
* function checkForUpdate() {
* if( 1==1 ) {
* document.write("hello from checkForUpdate<br>")
* }
* return 1;
* }
*/
if (isType (token, TOKEN_CLOSE_CURLY))
is_terminated = FALSE;
}
cleanUp:
vStringCopy(token->scope, saveScope);
deleteToken (name);
deleteToken (secondary_name);
deleteToken (method_body_token);
vStringDelete(saveScope);
return is_terminated;
@ -1578,18 +1640,10 @@ static void parseJsFile (tokenInfo *const token)
{
readToken (token);
if (isType(token, TOKEN_KEYWORD))
{
switch (token->keyword)
{
case KEYWORD_function: parseFunction (token); break;
default: parseLine (token, FALSE); break;
}
}
if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_function)
parseFunction (token);
else
{
parseLine (token, FALSE);
}
} while (TRUE);
}
@ -1607,6 +1661,7 @@ static void findJsTags (void)
ClassNames = stringListNew ();
FunctionNames = stringListNew ();
LastTokenType = TOKEN_UNDEFINED;
exception = (exception_t) (setjmp (Exception));
while (exception == ExceptionNone)