X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fas3%2Ftokenizer.lex;h=4647d3065879a21526a0920bc56b8e96b5eeba5b;hb=34e395874f66f71ed68a3d74ef9d6b9f706eff06;hp=ee71c965b13088bb7641a53d1d83fc1eb6140563;hpb=e60ab7763330304f1e23966cd3d29240b33913ec;p=swftools.git diff --git a/lib/as3/tokenizer.lex b/lib/as3/tokenizer.lex index ee71c96..4647d30 100644 --- a/lib/as3/tokenizer.lex +++ b/lib/as3/tokenizer.lex @@ -129,16 +129,13 @@ void handleInclude(char*text, int len, char quotes) //BEGIN(INITIAL); keep context } -string_t string_unescape(const char*in, int l) +static int do_unescape(const char*s, const char*end, char*n) { - int len=0; - const char*s = in; - const char*end = &in[l]; - char*n = (char*)malloc(l); char*o = n; + int len=0; while(s256) syntaxerror("octal number out of range (0-255): %d", num); - o[len++] = num; + if(o) o[len] = num;len++; continue; } case 'x': case 'u': { @@ -214,12 +211,12 @@ string_t string_unescape(const char*in, int l) if(unicode) { char*utf8 = getUTF8(num); while(*utf8) { - o[len++] = *utf8++; + if(o) o[len] = *utf8;utf8++;len++; } } else { if(num>256) syntaxerror("byte out of range (0-255): %d", num); - o[len++] = num; + if(o) o[len] = num;len++; } break; } @@ -227,8 +224,19 @@ string_t string_unescape(const char*in, int l) syntaxerror("unknown escape sequence: \"\\%c\"", *s); } } + if(o) o[len]=0; + return len; +} + +static string_t string_unescape(const char*in, int l) +{ + const char*s = in; + const char*end = &in[l]; + + int len = do_unescape(s, end, 0); + char*n = (char*)malloc(len+1); + do_unescape(s, end, n); string_t out = string_new(n, len); - o[len]=0; return out; } @@ -268,30 +276,47 @@ static inline int m(int type) static char numberbuf[64]; -static inline int handlenumber() +static char*nrbuf() { if(yyleng>sizeof(numberbuf)-1) syntaxerror("decimal number overflow"); - char*s = numberbuf; memcpy(s, yytext, yyleng); s[yyleng]=0; + return s; +} - int t; - char is_float=0; - for(t=0;t-128) + return T_BYTE; + else if(v>=-32768) + return T_SHORT; + else + return T_INT; +} +static inline int setuint(unsigned int v) +{ + avm2_lval.number_uint = v; + if(v<128) + return T_BYTE; + else if(v<32768) + return T_SHORT; + else + return T_UINT; +} + +static inline int handlefloat() +{ + char*s = nrbuf(); + avm2_lval.number_float = atof(s); + return T_FLOAT; +} + +static inline int handleint() +{ + char*s = nrbuf(); char l = (yytext[0]=='-'); char*max = l?"1073741824":"2147483647"; @@ -308,29 +333,61 @@ static inline int handlenumber() } if(yytext[0]=='-') { int v = atoi(s); - avm2_lval.number_int = v; - if(v>-128) - return T_BYTE; - else if(v>=-32768) - return T_SHORT; - else - return T_INT; + return setint(v); } else { unsigned int v = 0; + int t; for(t=0;t8) + syntaxerror("integer overflow"); + int t; + unsigned int v = 0; + for(t=l;t='0' && c<='9') + v|=(c&15); + else if(c>='a' && c<='f' || + c>='A' && c<='F') + v|=(c&0x0f)+9; + } + if(l && v>1073741824) + syntaxerror("signed integer overflow"); + if(!l && v>2147483647) + syntaxerror("unsigned integer overflow"); + + if(l==3) { + return setint(-(int)v); + } else { + return setuint(v); } } +void handleLabel(char*text, int len) +{ + int t; + for(t=len-1;t>=0;--t) { + if(text[t]!=' ' && + text[t]!='.') + break; + } + char*s = malloc(t+1); + memcpy(s, yytext, t); + s[t]=0; + avm2_lval.id = s; +} + void initialize_scanner(); #define YY_USER_INIT initialize_scanner(); @@ -340,15 +397,23 @@ void initialize_scanner(); //int {c();return m(KW_INT);} //uint {c();return m(KW_UINT);} //Number {c();return m(KW_NUMBER);} + + %} %s REGEXPOK %s BEGINNING NAME [a-zA-Z_][a-zA-Z0-9_\\]* +_ [^a-zA-Z0-9_\\] -NUMBER [0-9]+(\.[0-9]*)?|-?\.[0-9]+ -NUMBERWITHSIGN [+-]?({NUMBER}) +HEXINT 0x[a-zA-Z0-9]+ +INT [0-9]+ +FLOAT [0-9]+(\.[0-9]*)?|\.[0-9]+ + +HEXWITHSIGN [+-]?({HEXINT}) +INTWITHSIGN [+-]?({INT}) +FLOATWITHSIGN [+-]?({FLOAT}) STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*['] S [ \n\r\t] @@ -368,17 +433,30 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* { {REGEXP} {c(); BEGIN(INITIAL);return m(T_REGEXP);} -{NUMBERWITHSIGN} {c(); BEGIN(INITIAL);return handlenumber();} +{HEXWITHSIGN} {c(); BEGIN(INITIAL);return handlehex();} +{INTWITHSIGN} {c(); BEGIN(INITIAL);return handleint();} +{FLOATWITHSIGN} {c(); BEGIN(INITIAL);return handlefloat();} } \xef\xbb\xbf {/* utf 8 bom */} {S} {c();} -{NUMBER} {c(); BEGIN(INITIAL);return handlenumber();} +{HEXINT} {c(); BEGIN(INITIAL);return handlehex();} +{INT} {c(); BEGIN(INITIAL);return handleint();} +{FLOAT} {c(); BEGIN(INITIAL);return handlefloat();} 3rr0r {/* for debugging: generates a tokenizer-level error */ syntaxerror("3rr0r");} +{NAME}{S}*:{S}*for/{_} {c();handleLabel(yytext, yyleng-3);return T_FOR;} +{NAME}{S}*:{S}*do/{_} {c();handleLabel(yytext, yyleng-2);return T_DO;} +{NAME}{S}*:{S}*while/{_} {c();handleLabel(yytext, yyleng-5);return T_WHILE;} +{NAME}{S}*:{S}*switch/{_} {c();handleLabel(yytext, yyleng-6);return T_SWITCH;} +for {c();avm2_lval.id="";return T_FOR;} +do {c();avm2_lval.id="";return T_DO;} +while {c();avm2_lval.id="";return T_WHILE;} +switch {c();avm2_lval.id="";return T_SWITCH;} + [&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);} [|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);} [!][=] {c();BEGIN(REGEXPOK);return m(T_NE);} @@ -405,13 +483,17 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* \. {c();return m('.');} :: {c();return m(T_COLONCOLON);} : {c();return m(':');} +instanceof {c();return m(KW_INSTANCEOF);} implements {c();return m(KW_IMPLEMENTS);} interface {c();return m(KW_INTERFACE);} namespace {c();return m(KW_NAMESPACE);} protected {c();return m(KW_PROTECTED);} +undefined {c();return m(KW_UNDEFINED);} +continue {c();return m(KW_CONTINUE);} override {c();return m(KW_OVERRIDE);} internal {c();return m(KW_INTERNAL);} function {c();return m(KW_FUNCTION);} +default {c();return m(KW_DEFAULT);} package {c();return m(KW_PACKAGE);} private {c();return m(KW_PRIVATE);} dynamic {c();return m(KW_DYNAMIC);} @@ -422,22 +504,30 @@ public {c();return m(KW_PUBLIC);} native {c();return m(KW_NATIVE);} static {c();return m(KW_STATIC);} import {c();return m(KW_IMPORT);} -while {c();return m(KW_WHILE);} +typeof {c();return m(KW_TYPEOF);} +throw {c();return m(KW_THROW);} class {c();return m(KW_CLASS);} const {c();return m(KW_CONST);} +catch {c();return m(KW_CATCH);} final {c();return m(KW_FINAL);} false {c();return m(KW_FALSE);} break {c();return m(KW_BREAK);} +super {c();return m(KW_SUPER);} +each {c();return m(KW_EACH);} +void {c();return m(KW_VOID);} true {c();return m(KW_TRUE);} null {c();return m(KW_NULL);} else {c();return m(KW_ELSE);} +case {c();return m(KW_CASE);} +with {c();return m(KW_WITH);} use {c();return m(KW_USE);} new {c();return m(KW_NEW);} get {c();return m(KW_GET);} -for {c();return m(KW_FOR);} set {c();return m(KW_SET);} var {c();return m(KW_VAR);} +try {c();return m(KW_TRY);} is {c();return m(KW_IS) ;} +in {c();return m(KW_IN) ;} if {c();return m(KW_IF) ;} as {c();return m(KW_AS);} {NAME} {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);} @@ -526,7 +616,6 @@ char*token2string(enum yytokentype nr, YYSTYPE v) else if(nr==KW_INT) return "int"; else if(nr==KW_NEW) return "new"; else if(nr==KW_GET) return "get"; - else if(nr==KW_FOR) return "for"; else if(nr==KW_SET) return "set"; else if(nr==KW_VAR) return "var"; else if(nr==KW_IS) return "is";