X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fas3%2Ftokenizer.lex;h=ee71c965b13088bb7641a53d1d83fc1eb6140563;hb=e60ab7763330304f1e23966cd3d29240b33913ec;hp=0da86e900d96e9945b764f8053ee068f65720987;hpb=24d7875c692fef75179e12c7b1eeeba81eaea825;p=swftools.git diff --git a/lib/as3/tokenizer.lex b/lib/as3/tokenizer.lex index 0da86e9..ee71c96 100644 --- a/lib/as3/tokenizer.lex +++ b/lib/as3/tokenizer.lex @@ -129,6 +129,109 @@ void handleInclude(char*text, int len, char quotes) //BEGIN(INITIAL); keep context } +string_t string_unescape(const char*in, int l) +{ + int len=0; + const char*s = in; + const char*end = &in[l]; + char*n = (char*)malloc(l); + char*o = n; + while(s256) + syntaxerror("octal number out of range (0-255): %d", num); + o[len++] = num; + continue; + } + case 'x': case 'u': { + int max=2; + char bracket = 0; + char unicode = 0; + if(*s == 'u') { + max = 6; + unicode = 1; + } + s++; + if(s==end) syntaxerror("invalid \\u or \\x at end of string"); + if(*s == '{') { + s++; + if(s==end) syntaxerror("invalid \\u{ at end of string"); + bracket=1; + } + unsigned int num=0; + int nr = 0; + while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s='0' && *s<='9') num |= *s - '0'; + if(*s>='a' && *s<='f') num |= *s - 'a' + 10; + if(*s>='A' && *s<='F') num |= *s - 'A' + 10; + nr++; + s++; + } + if(bracket) { + if(*s=='}' && s256) + syntaxerror("byte out of range (0-255): %d", num); + o[len++] = num; + } + break; + } + default: + syntaxerror("unknown escape sequence: \"\\%c\"", *s); + } + } + string_t out = string_new(n, len); + o[len]=0; + return out; +} + static void handleString(char*s, int len) { if(s[0]=='"') { @@ -140,26 +243,30 @@ static void handleString(char*s, int len) s++;len-=2; } else syntaxerror("String incorrectly terminated"); - s[len] = 0; - avm2_lval.string = s; + + + avm2_lval.str = string_unescape(s, len); } char start_of_expression; -static inline int m(int type) +static inline int mkid(int type) { char*s = malloc(yyleng+1); memcpy(s, yytext, yyleng); s[yyleng]=0; + avm2_lval.id = s; + return type; +} - NEW(token_t,t); - t->type = type; - t->text = s; - avm2_lval.token = t; +static inline int m(int type) +{ + avm2_lval.token = type; return type; } + static char numberbuf[64]; static inline int handlenumber() { @@ -229,6 +336,10 @@ void initialize_scanner(); #define c() {countlines(yytext, yyleng);} +//Boolean {c();return m(KW_BOOLEAN);} +//int {c();return m(KW_INT);} +//uint {c();return m(KW_UINT);} +//Number {c();return m(KW_NUMBER);} %} %s REGEXPOK @@ -236,7 +347,8 @@ void initialize_scanner(); NAME [a-zA-Z_][a-zA-Z0-9_\\]* -NUMBER -?[0-9]+(\.[0-9]*)? +NUMBER [0-9]+(\.[0-9]*)?|-?\.[0-9]+ +NUMBERWITHSIGN [+-]?({NUMBER}) STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*['] S [ \n\r\t] @@ -256,6 +368,7 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* { {REGEXP} {c(); BEGIN(INITIAL);return m(T_REGEXP);} +{NUMBERWITHSIGN} {c(); BEGIN(INITIAL);return handlenumber();} } \xef\xbb\xbf {/* utf 8 bom */} @@ -269,6 +382,7 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* [&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);} [|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);} [!][=] {c();BEGIN(REGEXPOK);return m(T_NE);} +[!][=][=] {c();BEGIN(REGEXPOK);return m(T_NEE);} [=][=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);} [=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQ);} [>][=] {c();return m(T_GE);} @@ -300,15 +414,14 @@ internal {c();return m(KW_INTERNAL);} function {c();return m(KW_FUNCTION);} package {c();return m(KW_PACKAGE);} private {c();return m(KW_PRIVATE);} -Boolean {c();return m(KW_BOOLEAN);} dynamic {c();return m(KW_DYNAMIC);} extends {c();return m(KW_EXTENDS);} +delete {c();return m(KW_DELETE);} return {c();return m(KW_RETURN);} public {c();return m(KW_PUBLIC);} native {c();return m(KW_NATIVE);} static {c();return m(KW_STATIC);} import {c();return m(KW_IMPORT);} -Number {c();return m(KW_NUMBER);} while {c();return m(KW_WHILE);} class {c();return m(KW_CLASS);} const {c();return m(KW_CONST);} @@ -316,11 +429,9 @@ final {c();return m(KW_FINAL);} false {c();return m(KW_FALSE);} break {c();return m(KW_BREAK);} true {c();return m(KW_TRUE);} -uint {c();return m(KW_UINT);} null {c();return m(KW_NULL);} else {c();return m(KW_ELSE);} use {c();return m(KW_USE);} -int {c();return m(KW_INT);} new {c();return m(KW_NEW);} get {c();return m(KW_GET);} for {c();return m(KW_FOR);} @@ -329,9 +440,9 @@ var {c();return m(KW_VAR);} is {c();return m(KW_IS) ;} if {c();return m(KW_IF) ;} as {c();return m(KW_AS);} -{NAME} {c();BEGIN(INITIAL);return m(T_IDENTIFIER);} +{NAME} {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);} -[+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} +[+-\/*^~@$!%&\(=\[\]\{\}|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} [\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);} . {char c1=yytext[0]; @@ -373,12 +484,12 @@ int yywrap() } static char mbuf[256]; -char*token2string(token_t*t) +char*token2string(enum yytokentype nr, YYSTYPE v) { - int nr=t->type; if(nr==T_STRING) return ""; else if(nr==T_INT) return ""; else if(nr==T_UINT) return ""; + else if(nr==T_BYTE) return ""; else if(nr==T_FLOAT) return ""; else if(nr==T_REGEXP) return "REGEXP"; else if(nr==T_EOF) return "***END***"; @@ -420,12 +531,8 @@ char*token2string(token_t*t) else if(nr==KW_VAR) return "var"; else if(nr==KW_IS) return "is"; else if(nr==KW_AS) return "as"; - else if(nr==T_IDENTIFIER) { - if(strlen(t->text)>sizeof(mbuf)-1) - return "ID(...)"; - sprintf(mbuf, "ID(%s)", t->text); - return mbuf; - } else { + else if(nr==T_IDENTIFIER) return "ID"; + else { sprintf(mbuf, "%d", nr); return mbuf; }