X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fas3%2Ftokenizer.lex;h=4647d3065879a21526a0920bc56b8e96b5eeba5b;hb=34e395874f66f71ed68a3d74ef9d6b9f706eff06;hp=0da86e900d96e9945b764f8053ee068f65720987;hpb=24d7875c692fef75179e12c7b1eeeba81eaea825;p=swftools.git diff --git a/lib/as3/tokenizer.lex b/lib/as3/tokenizer.lex index 0da86e9..4647d30 100644 --- a/lib/as3/tokenizer.lex +++ b/lib/as3/tokenizer.lex @@ -129,6 +129,117 @@ void handleInclude(char*text, int len, char quotes) //BEGIN(INITIAL); keep context } +static int do_unescape(const char*s, const char*end, char*n) +{ + char*o = n; + int len=0; + while(s256) + syntaxerror("octal number out of range (0-255): %d", num); + if(o) o[len] = num;len++; + continue; + } + case 'x': case 'u': { + int max=2; + char bracket = 0; + char unicode = 0; + if(*s == 'u') { + max = 6; + unicode = 1; + } + s++; + if(s==end) syntaxerror("invalid \\u or \\x at end of string"); + if(*s == '{') { + s++; + if(s==end) syntaxerror("invalid \\u{ at end of string"); + bracket=1; + } + unsigned int num=0; + int nr = 0; + while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s='0' && *s<='9') num |= *s - '0'; + if(*s>='a' && *s<='f') num |= *s - 'a' + 10; + if(*s>='A' && *s<='F') num |= *s - 'A' + 10; + nr++; + s++; + } + if(bracket) { + if(*s=='}' && s256) + syntaxerror("byte out of range (0-255): %d", num); + if(o) o[len] = num;len++; + } + break; + } + default: + syntaxerror("unknown escape sequence: \"\\%c\"", *s); + } + } + if(o) o[len]=0; + return len; +} + +static string_t string_unescape(const char*in, int l) +{ + const char*s = in; + const char*end = &in[l]; + + int len = do_unescape(s, end, 0); + char*n = (char*)malloc(len+1); + do_unescape(s, end, n); + string_t out = string_new(n, len); + return out; +} + static void handleString(char*s, int len) { if(s[0]=='"') { @@ -140,51 +251,72 @@ static void handleString(char*s, int len) s++;len-=2; } else syntaxerror("String incorrectly terminated"); - s[len] = 0; - avm2_lval.string = s; + + + avm2_lval.str = string_unescape(s, len); } char start_of_expression; -static inline int m(int type) +static inline int mkid(int type) { char*s = malloc(yyleng+1); memcpy(s, yytext, yyleng); s[yyleng]=0; + avm2_lval.id = s; + return type; +} - NEW(token_t,t); - t->type = type; - t->text = s; - avm2_lval.token = t; +static inline int m(int type) +{ + avm2_lval.token = type; return type; } + static char numberbuf[64]; -static inline int handlenumber() +static char*nrbuf() { if(yyleng>sizeof(numberbuf)-1) syntaxerror("decimal number overflow"); - char*s = numberbuf; memcpy(s, yytext, yyleng); s[yyleng]=0; + return s; +} - int t; - char is_float=0; - for(t=0;t-128) + return T_BYTE; + else if(v>=-32768) + return T_SHORT; + else + return T_INT; +} +static inline int setuint(unsigned int v) +{ + avm2_lval.number_uint = v; + if(v<128) + return T_BYTE; + else if(v<32768) + return T_SHORT; + else + return T_UINT; +} + +static inline int handlefloat() +{ + char*s = nrbuf(); + avm2_lval.number_float = atof(s); + return T_FLOAT; +} + +static inline int handleint() +{ + char*s = nrbuf(); char l = (yytext[0]=='-'); char*max = l?"1073741824":"2147483647"; @@ -201,42 +333,87 @@ static inline int handlenumber() } if(yytext[0]=='-') { int v = atoi(s); - avm2_lval.number_int = v; - if(v>-128) - return T_BYTE; - else if(v>=-32768) - return T_SHORT; - else - return T_INT; + return setint(v); } else { unsigned int v = 0; + int t; for(t=0;t8) + syntaxerror("integer overflow"); + int t; + unsigned int v = 0; + for(t=l;t='0' && c<='9') + v|=(c&15); + else if(c>='a' && c<='f' || + c>='A' && c<='F') + v|=(c&0x0f)+9; + } + if(l && v>1073741824) + syntaxerror("signed integer overflow"); + if(!l && v>2147483647) + syntaxerror("unsigned integer overflow"); + + if(l==3) { + return setint(-(int)v); + } else { + return setuint(v); + } +} + +void handleLabel(char*text, int len) +{ + int t; + for(t=len-1;t>=0;--t) { + if(text[t]!=' ' && + text[t]!='.') + break; + } + char*s = malloc(t+1); + memcpy(s, yytext, t); + s[t]=0; + avm2_lval.id = s; +} + void initialize_scanner(); #define YY_USER_INIT initialize_scanner(); #define c() {countlines(yytext, yyleng);} +//Boolean {c();return m(KW_BOOLEAN);} +//int {c();return m(KW_INT);} +//uint {c();return m(KW_UINT);} +//Number {c();return m(KW_NUMBER);} + + %} %s REGEXPOK %s BEGINNING NAME [a-zA-Z_][a-zA-Z0-9_\\]* +_ [^a-zA-Z0-9_\\] + +HEXINT 0x[a-zA-Z0-9]+ +INT [0-9]+ +FLOAT [0-9]+(\.[0-9]*)?|\.[0-9]+ -NUMBER -?[0-9]+(\.[0-9]*)? +HEXWITHSIGN [+-]?({HEXINT}) +INTWITHSIGN [+-]?({INT}) +FLOATWITHSIGN [+-]?({FLOAT}) STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*['] S [ \n\r\t] @@ -256,19 +433,34 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* { {REGEXP} {c(); BEGIN(INITIAL);return m(T_REGEXP);} +{HEXWITHSIGN} {c(); BEGIN(INITIAL);return handlehex();} +{INTWITHSIGN} {c(); BEGIN(INITIAL);return handleint();} +{FLOATWITHSIGN} {c(); BEGIN(INITIAL);return handlefloat();} } \xef\xbb\xbf {/* utf 8 bom */} {S} {c();} -{NUMBER} {c(); BEGIN(INITIAL);return handlenumber();} +{HEXINT} {c(); BEGIN(INITIAL);return handlehex();} +{INT} {c(); BEGIN(INITIAL);return handleint();} +{FLOAT} {c(); BEGIN(INITIAL);return handlefloat();} 3rr0r {/* for debugging: generates a tokenizer-level error */ syntaxerror("3rr0r");} +{NAME}{S}*:{S}*for/{_} {c();handleLabel(yytext, yyleng-3);return T_FOR;} +{NAME}{S}*:{S}*do/{_} {c();handleLabel(yytext, yyleng-2);return T_DO;} +{NAME}{S}*:{S}*while/{_} {c();handleLabel(yytext, yyleng-5);return T_WHILE;} +{NAME}{S}*:{S}*switch/{_} {c();handleLabel(yytext, yyleng-6);return T_SWITCH;} +for {c();avm2_lval.id="";return T_FOR;} +do {c();avm2_lval.id="";return T_DO;} +while {c();avm2_lval.id="";return T_WHILE;} +switch {c();avm2_lval.id="";return T_SWITCH;} + [&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);} [|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);} [!][=] {c();BEGIN(REGEXPOK);return m(T_NE);} +[!][=][=] {c();BEGIN(REGEXPOK);return m(T_NEE);} [=][=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);} [=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQ);} [>][=] {c();return m(T_GE);} @@ -291,47 +483,56 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* \. {c();return m('.');} :: {c();return m(T_COLONCOLON);} : {c();return m(':');} +instanceof {c();return m(KW_INSTANCEOF);} implements {c();return m(KW_IMPLEMENTS);} interface {c();return m(KW_INTERFACE);} namespace {c();return m(KW_NAMESPACE);} protected {c();return m(KW_PROTECTED);} +undefined {c();return m(KW_UNDEFINED);} +continue {c();return m(KW_CONTINUE);} override {c();return m(KW_OVERRIDE);} internal {c();return m(KW_INTERNAL);} function {c();return m(KW_FUNCTION);} +default {c();return m(KW_DEFAULT);} package {c();return m(KW_PACKAGE);} private {c();return m(KW_PRIVATE);} -Boolean {c();return m(KW_BOOLEAN);} dynamic {c();return m(KW_DYNAMIC);} extends {c();return m(KW_EXTENDS);} +delete {c();return m(KW_DELETE);} return {c();return m(KW_RETURN);} public {c();return m(KW_PUBLIC);} native {c();return m(KW_NATIVE);} static {c();return m(KW_STATIC);} import {c();return m(KW_IMPORT);} -Number {c();return m(KW_NUMBER);} -while {c();return m(KW_WHILE);} +typeof {c();return m(KW_TYPEOF);} +throw {c();return m(KW_THROW);} class {c();return m(KW_CLASS);} const {c();return m(KW_CONST);} +catch {c();return m(KW_CATCH);} final {c();return m(KW_FINAL);} false {c();return m(KW_FALSE);} break {c();return m(KW_BREAK);} +super {c();return m(KW_SUPER);} +each {c();return m(KW_EACH);} +void {c();return m(KW_VOID);} true {c();return m(KW_TRUE);} -uint {c();return m(KW_UINT);} null {c();return m(KW_NULL);} else {c();return m(KW_ELSE);} +case {c();return m(KW_CASE);} +with {c();return m(KW_WITH);} use {c();return m(KW_USE);} -int {c();return m(KW_INT);} new {c();return m(KW_NEW);} get {c();return m(KW_GET);} -for {c();return m(KW_FOR);} set {c();return m(KW_SET);} var {c();return m(KW_VAR);} +try {c();return m(KW_TRY);} is {c();return m(KW_IS) ;} +in {c();return m(KW_IN) ;} if {c();return m(KW_IF) ;} as {c();return m(KW_AS);} -{NAME} {c();BEGIN(INITIAL);return m(T_IDENTIFIER);} +{NAME} {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);} -[+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} +[+-\/*^~@$!%&\(=\[\]\{\}|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} [\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);} . {char c1=yytext[0]; @@ -373,12 +574,12 @@ int yywrap() } static char mbuf[256]; -char*token2string(token_t*t) +char*token2string(enum yytokentype nr, YYSTYPE v) { - int nr=t->type; if(nr==T_STRING) return ""; else if(nr==T_INT) return ""; else if(nr==T_UINT) return ""; + else if(nr==T_BYTE) return ""; else if(nr==T_FLOAT) return ""; else if(nr==T_REGEXP) return "REGEXP"; else if(nr==T_EOF) return "***END***"; @@ -415,17 +616,12 @@ char*token2string(token_t*t) else if(nr==KW_INT) return "int"; else if(nr==KW_NEW) return "new"; else if(nr==KW_GET) return "get"; - else if(nr==KW_FOR) return "for"; else if(nr==KW_SET) return "set"; else if(nr==KW_VAR) return "var"; else if(nr==KW_IS) return "is"; else if(nr==KW_AS) return "as"; - else if(nr==T_IDENTIFIER) { - if(strlen(t->text)>sizeof(mbuf)-1) - return "ID(...)"; - sprintf(mbuf, "ID(%s)", t->text); - return mbuf; - } else { + else if(nr==T_IDENTIFIER) return "ID"; + else { sprintf(mbuf, "%d", nr); return mbuf; }