X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fas3%2Ftokenizer.lex;h=bc4f01248265ed1d3763defc33378849f7b6c4a4;hb=0fd17f47ee30d90181e51f30fc17a6c31646137e;hp=dcae2f2b3e0e4b002518170d996814b1a7bb669d;hpb=21ef4c306d38969e8f5fe821a27d155b855735b9;p=swftools.git diff --git a/lib/as3/tokenizer.lex b/lib/as3/tokenizer.lex index dcae2f2..bc4f012 100644 --- a/lib/as3/tokenizer.lex +++ b/lib/as3/tokenizer.lex @@ -31,59 +31,109 @@ #include "tokenizer.h" #include "files.h" -static void countlines(char*text, int len) { - int t; - for(t=0;t as3_buffer_len) \ + to_read = as3_buffer_len - as3_buffer_pos; \ + memcpy(buf, as3_buffer+as3_buffer_pos, to_read); \ + as3_buffer_pos += to_read; \ + result=to_read; \ + } \ +} + void handleInclude(char*text, int len, char quotes) { char*filename = 0; @@ -106,7 +156,8 @@ void handleInclude(char*text, int len, char quotes) filename = strdup(&text[i1]); } - char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER); + char*fullfilename = find_file(filename, 1); + enter_file2(filename, fullfilename, YY_CURRENT_BUFFER); yyin = fopen(fullfilename, "rb"); if (!yyin) { syntaxerror("Couldn't open include file \"%s\"\n", fullfilename); @@ -116,89 +167,382 @@ void handleInclude(char*text, int len, char quotes) //BEGIN(INITIAL); keep context } +static int do_unescape(const char*s, const char*end, char*n) +{ + char*o = n; + int len=0; + while(s256) + syntaxerror("octal number out of range (0-255): %d", num); + if(o) o[len] = num;len++; + continue; + } + case 'x': case 'u': { + int max=2; + char bracket = 0; + char unicode = 0; + if(*s == 'u') { + max = 6; + unicode = 1; + } + s++; + if(s==end) syntaxerror("invalid \\u or \\x at end of string"); + if(*s == '{') { + s++; + if(s==end) syntaxerror("invalid \\u{ at end of string"); + bracket=1; + } + unsigned int num=0; + int nr = 0; + while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s='0' && *s<='9') num |= *s - '0'; + if(*s>='a' && *s<='f') num |= *s - 'a' + 10; + if(*s>='A' && *s<='F') num |= *s - 'A' + 10; + nr++; + s++; + } + if(bracket) { + if(*s=='}' && s256) + syntaxerror("byte out of range (0-255): %d", num); + if(o) o[len] = num;len++; + } + break; + } + default: + syntaxerror("unknown escape sequence: \"\\%c\"", *s); + } + } + if(o) o[len]=0; + return len; +} + +static string_t string_unescape(const char*in, int l) +{ + const char*s = in; + const char*end = &in[l]; + + int len = do_unescape(s, end, 0); + char*n = (char*)malloc(len+1); + do_unescape(s, end, n); + string_t out = string_new(n, len); + return out; +} + +static void handleCData(char*s, int len) +{ + a3_lval.str.str = s+9; // + a3_lval.str.str = strdup_n(a3_lval.str.str, a3_lval.str.len); +} + +static void handleString(char*s, int len) +{ + if(as3_pass < 2) { + // don't bother decoding strings in pass 1 + memset(&a3_lval, 0, sizeof(a3_lval)); + return; + } + + if(s[0]=='"') { + if(s[len-1]!='"') syntaxerror("String doesn't end with '\"'"); + s++;len-=2; + } + else if(s[0]=='\'') { + if(s[len-1]!='\'') syntaxerror("String doesn't end with '\"'"); + s++;len-=2; + } + else syntaxerror("String incorrectly terminated"); + + + a3_lval.str = string_unescape(s, len); +} + + char start_of_expression; static inline int m(int type) { - char*s = malloc(yyleng+1); - memcpy(s, yytext, yyleng); - s[yyleng]=0; - - NEW(token_t,t); - t->type = type; - t->text = s; - avm2_lval.token = t; + a3_lval.token = type; return type; } + static char numberbuf[64]; -static inline int handlenumber() +static char*nrbuf() { if(yyleng>sizeof(numberbuf)-1) syntaxerror("decimal number overflow"); - char*s = numberbuf; memcpy(s, yytext, yyleng); s[yyleng]=0; + return s; +} - int t; - char is_float=0; - for(t=0;t-128) + return T_BYTE; + else if(v>=-32768) + return T_SHORT; + else + return T_INT; +} +static inline int setuint(unsigned int v) +{ + a3_lval.number_uint = v; + if(v<128) + return T_BYTE; + else if(v<32768) + return T_SHORT; + else + return T_UINT; +} +static inline int setfloat(double v) +{ + a3_lval.number_float = v; + return T_FLOAT; +} + +static inline int handlefloat() +{ + char*s = nrbuf(); + a3_lval.number_float = atof(s); + return T_FLOAT; +} + +static inline int handleint() +{ + char*s = nrbuf(); char l = (yytext[0]=='-'); char*max = l?"1073741824":"2147483647"; - if(yyleng-l>10) - syntaxerror("integer overflow"); + if(yyleng-l>10) { + as3_softwarning("integer overflow: %s (converted to Number)", s); + return handlefloat(); + } if(yyleng-l==10) { int t; for(t=0;tmax[t]) - syntaxerror("integer overflow %s > %s", s+l,max); + if(yytext[l+t]>max[t]) { + as3_softwarning("integer overflow: %s (converted to Number)", s); + return handlefloat(); + } else if(yytext[l+t]-128) - return T_BYTE; - else if(v>=-32768) - return T_SHORT; - else - return T_INT; + return setint(v); } else { unsigned int v = 0; + int t; for(t=0;t='0' && c<='9') + d+=(c&15)*base; + else if((c>='a' && c<='f') || (c>='A' && c<='F')) + d+=((c&0x0f)+9)*base; + } + return setfloat(d); +} +static inline int handlehex() +{ + char l = (yytext[0]=='-')+2; + int len = yyleng; + + if(len-l>8) { + char*s = nrbuf(); + syntaxerror("integer overflow %s", s); + } + + int t; + unsigned int v = 0; + for(t=l;t='0' && c<='9') + v|=(c&15); + else if((c>='a' && c<='f') || (c>='A' && c<='F')) + v|=(c&0x0f)+9; + } + if(l && v>1073741824) { + char*s = nrbuf(); + as3_softwarning("signed integer overflow: %s (converted to Number)", s); + return setfloat(v); + } + if(!l && v>2147483647) { + char*s = nrbuf(); + as3_softwarning("unsigned integer overflow: %s (converted to Number)", s); + return setfloat(v); + } + + if(l==3) { + return setint(-(int)v); + } else { + return setuint(v); + } +} + +void handleLabel(char*text, int len) +{ + int t; + for(t=len-1;t>=0;--t) { + if(text[t]!=' ' && + text[t]!=':') + break; + } + char*s = malloc(t+1); + memcpy(s, yytext, t); + s[t]=0; + a3_lval.id = s; +} + +static int handleregexp() +{ + char*s = malloc(yyleng); + int len=yyleng-1; + memcpy(s, yytext+1, len); + s[len] = 0; + int t; + for(t=len;t>=0;--t) { + if(s[t]=='/') { + s[t] = 0; + break; + } + } + a3_lval.regexp.pattern = s; + if(t==len) { + a3_lval.regexp.options = 0; + } else { + a3_lval.regexp.options = s+t+1; + } + return T_REGEXP; +} + void initialize_scanner(); #define YY_USER_INIT initialize_scanner(); -#define c() {countlines(yytext, yyleng);} +/* count the number of lines+columns consumed by this token */ +static inline void l() { + int t; + for(t=0;t])*\]*\]\]\> STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*['] S [ \n\r\t] -MULTILINE_COMMENT [/][*]([*][^/]|[^*]|[\x00-\x31])*[*]+[/] -SINGLELINE_COMMENT \/\/[^\n]*\n +MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/] +SINGLELINE_COMMENT \/\/[^\n\r]*[\n\r] REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* %% -{SINGLELINE_COMMENT} {c(); /* single line comment */} -{MULTILINE_COMMENT} {c(); /* multi line comment */} +{SINGLELINE_COMMENT} {l(); /* single line comment */} +{MULTILINE_COMMENT} {l(); /* multi line comment */} [/][*] {syntaxerror("syntax error: unterminated comment", yytext);} -^include{S}+{STRING}{S}*/\n {c();handleInclude(yytext, yyleng, 1);} -^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {c();handleInclude(yytext, yyleng, 0);} -{STRING} {c(); BEGIN(INITIAL);return m(T_STRING);} +^include{S}+{STRING}{S}*/\n {l();handleInclude(yytext, yyleng, 1);} +^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {l();handleInclude(yytext, yyleng, 0);} +{STRING} {l(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;} +{CDATA} {l(); BEGIN(INITIAL);handleCData(yytext, yyleng);return T_STRING;} { -{REGEXP} {c(); BEGIN(INITIAL);return m(T_REGEXP);} +{REGEXP} {c(); BEGIN(INITIAL);return handleregexp();} +{HEXWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlehex();} +{HEXFLOATWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlehexfloat();} +{INTWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handleint();} +{FLOATWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlefloat();} } +[\{] {c(); BEGIN(REGEXPOK);return m(T_DICTSTART);} +[\{] {c(); BEGIN(INITIAL); return m('{');} + \xef\xbb\xbf {/* utf 8 bom */} -{S} {c();} - -{NUMBER} {c(); BEGIN(INITIAL);return handlenumber();} - -[>][=] {return m(T_GE);} -[<][=] {return m(T_LE);} -[-][-] {BEGIN(INITIAL);return m(T_MINUSMINUS);} -[+][+] {BEGIN(INITIAL);return m(T_PLUSPLUS);} -== {BEGIN(REGEXPOK);return m(T_EQEQ);} -\.\. {return m(T_DOTDOT);} -\. {return m('.');} -:: {return m(T_COLONCOLON);} -: {return m(':');} -implements {return m(KW_IMPLEMENTS);} -interface {return m(KW_INTERFACE);} -namespace {return m(KW_NAMESPACE);} -protected {return m(KW_PROTECTED);} -override {return m(KW_OVERRIDE);} -internal {return m(KW_INTERNAL);} -function {return m(KW_FUNCTION);} -package {return m(KW_PACKAGE);} -private {return m(KW_PRIVATE);} -Boolean {return m(KW_BOOLEAN);} -dynamic {return m(KW_DYNAMIC);} -extends {return m(KW_EXTENDS);} -public {return m(KW_PUBLIC);} -native {return m(KW_NATIVE);} -static {return m(KW_STATIC);} -import {return m(KW_IMPORT);} -Number {return m(KW_NUMBER);} -class {return m(KW_CLASS);} -const {return m(KW_CONST);} -final {return m(KW_FINAL);} -False {return m(KW_FALSE);} -True {return m(KW_TRUE);} -uint {return m(KW_UINT);} -null {return m(KW_NULL);} -use {return m(KW_USE);} -int {return m(KW_INT);} -new {return m(KW_NEW);} -get {return m(KW_GET);} -for {return m(KW_FOR);} -set {return m(KW_SET);} -var {return m(KW_VAR);} -is {return m(KW_IS) ;} -as {return m(KW_AS);} -{NAME} {c();BEGIN(INITIAL);return m(T_IDENTIFIER);} - -[+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} -[\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);} - -. {char c1=yytext[0]; +{S} {l();} + +{HEXINT}/{_} {c(); BEGIN(INITIAL);return handlehex();} +{HEXFLOAT}/{_} {c(); BEGIN(INITIAL);return handlehexfloat();} +{INT}/{_} {c(); BEGIN(INITIAL);return handleint();} +{FLOAT}/{_} {c(); BEGIN(INITIAL);return handlefloat();} + +3rr0r {/* for debugging: generates a tokenizer-level error */ + syntaxerror("3rr0r");} + +{NAME}{S}*:{S}*for/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-3);return T_FOR;} +{NAME}{S}*:{S}*do/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-2);return T_DO;} +{NAME}{S}*:{S}*while/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-5);return T_WHILE;} +{NAME}{S}*:{S}*switch/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-6);return T_SWITCH;} +for {c();BEGIN(INITIAL);a3_lval.id="";return T_FOR;} +do {c();BEGIN(INITIAL);a3_lval.id="";return T_DO;} +while {c();BEGIN(INITIAL);a3_lval.id="";return T_WHILE;} +switch {c();BEGIN(INITIAL);a3_lval.id="";return T_SWITCH;} + +[&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);} +[|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);} +[!][=] {c();BEGIN(REGEXPOK);return m(T_NE);} +[!][=][=] {c();BEGIN(REGEXPOK);return m(T_NEE);} +[=][=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);} +[=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQ);} +[>][=] {c();BEGIN(REGEXPOK);return m(T_GE);} +[<][=] {c();BEGIN(REGEXPOK);return m(T_LE);} +[-][-] {c();BEGIN(INITIAL);return m(T_MINUSMINUS);} +[+][+] {c();BEGIN(INITIAL);return m(T_PLUSPLUS);} +[+][=] {c();BEGIN(REGEXPOK);return m(T_PLUSBY);} +[\^][=] {c();BEGIN(REGEXPOK);return m(T_XORBY);} +[-][=] {c();BEGIN(REGEXPOK);return m(T_MINUSBY);} +[/][=] {c();BEGIN(REGEXPOK);return m(T_DIVBY);} +[%][=] {c();BEGIN(REGEXPOK);return m(T_MODBY);} +[*][=] {c();BEGIN(REGEXPOK);return m(T_MULBY);} +[|][=] {c();BEGIN(REGEXPOK);return m(T_ORBY);} +[>][>][=] {c();BEGIN(REGEXPOK);return m(T_SHRBY);} +[<][<][=] {c();BEGIN(REGEXPOK);return m(T_SHLBY);} +[>][>][>][=] {c();BEGIN(REGEXPOK);return m(T_USHRBY);} +[<][<] {c();BEGIN(REGEXPOK);return m(T_SHL);} +[>][>][>] {c();BEGIN(REGEXPOK);return m(T_USHR);} +[>][>] {c();BEGIN(REGEXPOK);return m(T_SHR);} +\.\.\. {c();BEGIN(REGEXPOK);return m(T_DOTDOTDOT);} +\.\. {c();BEGIN(REGEXPOK);return m(T_DOTDOT);} +\. {c();BEGIN(REGEXPOK);return m('.');} +:: {c();BEGIN(REGEXPOK);return m(T_COLONCOLON);} +: {c();BEGIN(REGEXPOK);return m(':');} +instanceof {c();BEGIN(REGEXPOK);return m(KW_INSTANCEOF);} +implements {c();BEGIN(REGEXPOK);return m(KW_IMPLEMENTS);} +interface {c();BEGIN(INITIAL);return m(KW_INTERFACE);} +namespace {c();BEGIN(INITIAL);return m(KW_NAMESPACE);} +protected {c();BEGIN(INITIAL);return m(KW_PROTECTED);} +undefined {c();BEGIN(INITIAL);return m(KW_UNDEFINED);} +continue {c();BEGIN(INITIAL);return m(KW_CONTINUE);} +override {c();BEGIN(INITIAL);return m(KW_OVERRIDE);} +internal {c();BEGIN(INITIAL);return m(KW_INTERNAL);} +function {c();BEGIN(INITIAL);return m(KW_FUNCTION);} +finally {c();BEGIN(INITIAL);return m(KW_FINALLY);} +default {c();BEGIN(INITIAL);return m(KW_DEFAULT);} +package {c();BEGIN(INITIAL);return m(KW_PACKAGE);} +private {c();BEGIN(INITIAL);return m(KW_PRIVATE);} +dynamic {c();BEGIN(INITIAL);return m(KW_DYNAMIC);} +extends {c();BEGIN(INITIAL);return m(KW_EXTENDS);} +delete {c();BEGIN(REGEXPOK);return m(KW_DELETE);} +return {c();BEGIN(REGEXPOK);return m(KW_RETURN);} +public {c();BEGIN(INITIAL);return m(KW_PUBLIC);} +native {c();BEGIN(INITIAL);return m(KW_NATIVE);} +static {c();BEGIN(INITIAL);return m(KW_STATIC);} +import {c();BEGIN(REGEXPOK);return m(KW_IMPORT);} +typeof {c();BEGIN(REGEXPOK);return m(KW_TYPEOF);} +throw {c();BEGIN(REGEXPOK);return m(KW_THROW);} +class {c();BEGIN(INITIAL);return m(KW_CLASS);} +const {c();BEGIN(INITIAL);return m(KW_CONST);} +catch {c();BEGIN(INITIAL);return m(KW_CATCH);} +final {c();BEGIN(INITIAL);return m(KW_FINAL);} +false {c();BEGIN(INITIAL);return m(KW_FALSE);} +break {c();BEGIN(INITIAL);return m(KW_BREAK);} +super {c();BEGIN(INITIAL);return m(KW_SUPER);} +each {c();BEGIN(INITIAL);return m(KW_EACH);} +void {c();BEGIN(INITIAL);return m(KW_VOID);} +true {c();BEGIN(INITIAL);return m(KW_TRUE);} +null {c();BEGIN(INITIAL);return m(KW_NULL);} +else {c();BEGIN(INITIAL);return m(KW_ELSE);} +case {c();BEGIN(REGEXPOK);return m(KW_CASE);} +with {c();BEGIN(REGEXPOK);return m(KW_WITH);} +use {c();BEGIN(REGEXPOK);return m(KW_USE);} +new {c();BEGIN(REGEXPOK);return m(KW_NEW);} +get {c();BEGIN(INITIAL);return m(KW_GET);} +set {c();BEGIN(INITIAL);return m(KW_SET);} +var {c();BEGIN(INITIAL);return m(KW_VAR);} +try {c();BEGIN(INITIAL);return m(KW_TRY);} +is {c();BEGIN(REGEXPOK);return m(KW_IS) ;} +in {c();BEGIN(REGEXPOK);return m(KW_IN) ;} +if {c();BEGIN(INITIAL);return m(KW_IF) ;} +as {c();BEGIN(REGEXPOK);return m(KW_AS);} +$?{NAME} {c();BEGIN(INITIAL);return handleIdentifier();} + +[\]\}*] {c();BEGIN(INITIAL);return m(yytext[0]);} +[+-\/^~@$!%&\(=\[|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} +[\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);} + +. {/* ERROR */ + char c1=yytext[0]; char buf[128]; buf[0] = yytext[0]; int t; @@ -300,7 +713,7 @@ as {return m(KW_AS);} exit(1); yyterminate(); } -<> {c(); +<> {l(); void*b = leave_file(); if (!b) { yyterminate(); @@ -320,14 +733,29 @@ int yywrap() } static char mbuf[256]; -char*token2string(token_t*t) +char*token2string(enum yytokentype nr, YYSTYPE v) { - int nr=t->type; - if(nr==T_STRING) return ""; + if(nr==T_STRING) { + char*s = malloc(v.str.len+10); + strcpy(s, ""); + memcpy(s+8, v.str.str, v.str.len); + sprintf(s+8+v.str.len, " (%d bytes)", v.str.len); + return s; + } + else if(nr==T_REGEXP) { + char*s = malloc(strlen(v.regexp.pattern)+10); + sprintf(s, "%s", v.regexp.pattern); + return s; + } + else if(nr==T_IDENTIFIER) { + char*s = malloc(strlen(v.id)+10); + sprintf(s, "%s", v.id); + return s; + } else if(nr==T_INT) return ""; else if(nr==T_UINT) return ""; + else if(nr==T_BYTE) return ""; else if(nr==T_FLOAT) return ""; - else if(nr==T_REGEXP) return "REGEXP"; else if(nr==T_EOF) return "***END***"; else if(nr==T_GE) return ">="; else if(nr==T_LE) return "<="; @@ -357,21 +785,16 @@ char*token2string(token_t*t) else if(nr==KW_TRUE) return "True"; else if(nr==KW_UINT) return "uint"; else if(nr==KW_NULL) return "null"; + else if(nr==KW_ELSE) return "else"; else if(nr==KW_USE) return "use"; else if(nr==KW_INT) return "int"; else if(nr==KW_NEW) return "new"; else if(nr==KW_GET) return "get"; - else if(nr==KW_FOR) return "for"; else if(nr==KW_SET) return "set"; else if(nr==KW_VAR) return "var"; else if(nr==KW_IS) return "is"; else if(nr==KW_AS) return "as"; - else if(nr==T_IDENTIFIER) { - if(strlen(t->text)>sizeof(mbuf)-1) - return "ID(...)"; - sprintf(mbuf, "ID(%s)", t->text); - return mbuf; - } else { + else { sprintf(mbuf, "%d", nr); return mbuf; } @@ -382,3 +805,4 @@ void initialize_scanner() BEGIN(BEGINNING); } +