X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fas3%2Ftokenizer.lex;h=7e936b65cdb3834459d60686269523dbc741b285;hb=c63b2bf21dc1df9a736f0b4c08f6cba828cdab92;hp=491a5f25af9ddacaae818ab62f0db4ac3fe39c98;hpb=639ac6b9f7a89f10d02c5d9ef41bca3bad4eaf2b;p=swftools.git diff --git a/lib/as3/tokenizer.lex b/lib/as3/tokenizer.lex index 491a5f2..7e936b6 100644 --- a/lib/as3/tokenizer.lex +++ b/lib/as3/tokenizer.lex @@ -110,10 +110,10 @@ void handleInclude(char*text, int len, char quotes) } else { int i1=0,i2=len; // find start - while(!strchr(" \n\r\t", text[i1])) i1++; + while(!strchr(" \n\r\t\xa0", text[i1])) i1++; // strip - while(strchr(" \n\r\t", text[i1])) i1++; - while(strchr(" \n\r\t", text[i2-1])) i2--; + while(strchr(" \n\r\t\xa0", text[i1])) i1++; + while(strchr(" \n\r\t\xa0", text[i2-1])) i2--; if(i2!=len) text[i2]=0; filename = strdup(&text[i1]); } @@ -470,30 +470,13 @@ static inline void c() { current_column+=yyleng; } -trie_t*active_namespaces = 0; -/*void tokenizer_register_namespace(const char*id) -{ - trie_put(namespaces, id, 0); -} -void tokenizer_unregister_namespace(const char*id) -{ - trie_remove(namespaces, id); -}*/ -static inline char tokenizer_is_namespace(const char*id) -{ - return trie_contains(active_namespaces, id); -} - static inline int handleIdentifier() { char*s = malloc(yyleng+1); memcpy(s, yytext, yyleng); s[yyleng]=0; a3_lval.id = s; - if(tokenizer_is_namespace(s)) - return T_NAMESPACE; - else - return T_IDENTIFIER; + return T_IDENTIFIER; } static int tokenerror(); @@ -505,6 +488,7 @@ static int tokenerror(); //XMLCOMMENT //{XMLCOMMENT} + %} %s REGEXPOK @@ -513,8 +497,20 @@ static int tokenerror(); %x XMLTEXT %x XML -NAME [a-zA-Z_][a-zA-Z0-9_\\]* -_ [^a-zA-Z0-9_\\] +X1 parsing identifiers with a non unicode lexer is a knightmare we have to skip all possible +X2 combinations of byte order markers or utf8 space chars and i dont quite like the fact that +X3 lex doesnt support proper comments in this section either... +X4 {NAME_HEAD}{NAME_TAIL} + +NAME_NOC2EF [a-zA-Z_\x80-\xc1\xc3-\xee\xf0-\xff] +NAME_EF [\xef][a-zA-Z0-9_\\\x80-\xba\xbc-\xff] +NAME_C2 [\xc2][a-zA-Z0-9_\\\x80-\x9f\xa1-\xff] +NAME_EFBB [\xef][\xbb][a-zA-Z0-9_\\\x80-\xbe\xc0-\xff] +NAME_TAIL [a-zA-Z_0-9\\\x80-\xff]* +NAME_HEAD (({NAME_NOC2EF})|({NAME_EF})|({NAME_C2})|({NAME_EFBB})) +NAME {NAME_HEAD}{NAME_TAIL} + +_ [^a-zA-Z0-9_\\\x80-\xff] HEXINT 0x[a-zA-Z0-9]+ HEXFLOAT 0x[a-zA-Z0-9]*\.[a-zA-Z0-9]* @@ -529,9 +525,11 @@ FLOATWITHSIGN [+-]?({FLOAT}) CDATA ])*\]*\]\]\> XMLCOMMENT XML <[^>]+{S}> +XMLID [A-Za-z0-9_\x80-\xff]+([:][A-Za-z0-9_\x80-\xff]+)? +XMLSTRING ["][^"]*["] STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*['] -S [ \n\r\t] +S ([ \n\r\t\xa0]|[\xc2][\xa0]) MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/] SINGLELINE_COMMENT \/\/[^\n\r]*[\n\r] REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* @@ -543,7 +541,7 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* [/][*] {syntaxerror("syntax error: unterminated comment", yytext);} ^include{S}+{STRING}{S}*/\n {l();handleInclude(yytext, yyleng, 1);} -^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {l();handleInclude(yytext, yyleng, 0);} +^include{S}+[^" \t\xa0\r\n][\x20-\xff]*{S}*/\n {l();handleInclude(yytext, yyleng, 0);} {STRING} {l(); BEGIN(DEFAULT);handleString(yytext, yyleng);return T_STRING;} {CDATA} {l(); BEGIN(DEFAULT);handleCData(yytext, yyleng);return T_STRING;} @@ -552,13 +550,13 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* } { -{STRING} {l(); handleString(yytext, yyleng);return T_STRING;} +{XMLSTRING} {l(); handleRaw(yytext, yyleng);return T_STRING;} [{] {c(); BEGIN(REGEXPOK);return m('{');} [<] {c(); return m('<');} [/] {c(); return m('/');} [>] {c(); return m('>');} [=] {c(); return m('=');} -{NAME} {c(); handleRaw(yytext, yyleng);return T_IDENTIFIER;} +{XMLID} {c(); handleRaw(yytext, yyleng);return T_IDENTIFIER;} {S} {l();} <> {syntaxerror("unexpected end of file");} } @@ -584,7 +582,7 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* [\{] {c(); BEGIN(REGEXPOK);return m(T_DICTSTART);} [\{] {c(); BEGIN(DEFAULT); return m('{');} -\xef\xbb\xbf {/* utf 8 bom */} +\xef\xbb\xbf {/* utf 8 bom (0xfeff) */} {S} {l();} {HEXINT}/{_} {c(); BEGIN(DEFAULT);return handlehex();} @@ -600,6 +598,7 @@ NaN {c(); BEGIN(DEFAULT);return m(KW_NAN);} {NAME}{S}*:{S}*do/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-2);return T_DO;} {NAME}{S}*:{S}*while/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-5);return T_WHILE;} {NAME}{S}*:{S}*switch/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-6);return T_SWITCH;} +default{S}xml {l();BEGIN(DEFAULT);return m(KW_DEFAULT_XML);} for {c();BEGIN(DEFAULT);a3_lval.id="";return T_FOR;} do {c();BEGIN(DEFAULT);a3_lval.id="";return T_DO;} while {c();BEGIN(DEFAULT);a3_lval.id="";return T_WHILE;} @@ -637,8 +636,8 @@ switch {c();BEGIN(DEFAULT);a3_lval.id="";return T_SWITCH;} instanceof {c();BEGIN(REGEXPOK);return m(KW_INSTANCEOF);} implements {c();BEGIN(REGEXPOK);return m(KW_IMPLEMENTS);} interface {c();BEGIN(DEFAULT);return m(KW_INTERFACE);} -namespace {c();BEGIN(DEFAULT);return m(KW_NAMESPACE);} protected {c();BEGIN(DEFAULT);return m(KW_PROTECTED);} +namespace {c();BEGIN(DEFAULT);return m(KW_NAMESPACE);} undefined {c();BEGIN(DEFAULT);return m(KW_UNDEFINED);} arguments {c();BEGIN(DEFAULT);return m(KW_ARGUMENTS);} continue {c();BEGIN(DEFAULT);return m(KW_CONTINUE);} @@ -727,7 +726,7 @@ static int tokenerror() if(c1>='0' && c1<='9') syntaxerror("syntax error: %s (identifiers must not start with a digit)"); else - syntaxerror("syntax error [%d]: %s", (yy_start-1)/2, buf); + syntaxerror("syntax error [state=%d]: %s", (yy_start-1)/2, buf); printf("\n"); exit(1); yyterminate();