X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fas3%2Ftokenizer.lex;h=127379a75d1f65c38cfffbab01ede12e4a86d6c1;hb=71b5bc980e9f3fa962167da70043f0b16e253906;hp=d9eb6da5a057522c7b8156e6ae4a37c7ccfe7587;hpb=c038b6cb3bba62c610da32885e1589011227e6ad;p=swftools.git diff --git a/lib/as3/tokenizer.lex b/lib/as3/tokenizer.lex index d9eb6da..127379a 100644 --- a/lib/as3/tokenizer.lex +++ b/lib/as3/tokenizer.lex @@ -28,53 +28,12 @@ #include #include #include "../utf8.h" +#include "common.h" #include "tokenizer.h" #include "files.h" -int as3_pass = 0; -int as3_verbosity = 1; unsigned int as3_tokencount = 0; -void as3_error(const char*format, ...) -{ - char buf[1024]; - int l; - va_list arglist; - if(as3_verbosity<0) - exit(1); - va_start(arglist, format); - vsprintf(buf, format, arglist); - va_end(arglist); - fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename_short, current_line, current_column, buf); - fflush(stderr); - exit(1); -} -void as3_warning(const char*format, ...) -{ - char buf[1024]; - int l; - va_list arglist; - if(as3_verbosity<1) - return; - va_start(arglist, format); - vsprintf(buf, format, arglist); - va_end(arglist); - fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf); - fflush(stderr); -} -void as3_softwarning(const char*format, ...) -{ - char buf[1024]; - int l; - va_list arglist; - if(as3_verbosity<2) - return; - va_start(arglist, format); - vsprintf(buf, format, arglist); - va_end(arglist); - fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf); - fflush(stderr); -} static void dbg(const char*format, ...) { char buf[1024]; @@ -156,7 +115,7 @@ void handleInclude(char*text, int len, char quotes) filename = strdup(&text[i1]); } - char*fullfilename = find_file(filename); + char*fullfilename = find_file(filename, 1); enter_file2(filename, fullfilename, YY_CURRENT_BUFFER); yyin = fopen(fullfilename, "rb"); if (!yyin) { @@ -259,8 +218,15 @@ static int do_unescape(const char*s, const char*end, char*n) } break; } - default: - syntaxerror("unknown escape sequence: \"\\%c\"", *s); + default: { + if(o) { + o[len+0] = '\\'; + o[len+1] = *s; + } + s++; + len+=2; + break; + } } } if(o) o[len]=0; @@ -288,12 +254,6 @@ static void handleCData(char*s, int len) static void handleString(char*s, int len) { - if(as3_pass < 2) { - // don't bother decoding strings in pass 1 - memset(&a3_lval, 0, sizeof(a3_lval)); - return; - } - if(s[0]=='"') { if(s[len-1]!='"') syntaxerror("String doesn't end with '\"'"); s++;len-=2; @@ -303,7 +263,6 @@ static void handleString(char*s, int len) s++;len-=2; } else syntaxerror("String incorrectly terminated"); - a3_lval.str = string_unescape(s, len); } @@ -332,22 +291,7 @@ static char*nrbuf() static inline int setint(int v) { a3_lval.number_int = v; - if(v>-128) - return T_BYTE; - else if(v>=-32768) - return T_SHORT; - else - return T_INT; -} -static inline int setuint(unsigned int v) -{ - a3_lval.number_uint = v; - if(v<128) - return T_BYTE; - else if(v<32768) - return T_SHORT; - else - return T_UINT; + return T_INT; } static inline int setfloat(double v) { @@ -367,16 +311,18 @@ static inline int handleint() char*s = nrbuf(); char l = (yytext[0]=='-'); - char*max = l?"1073741824":"2147483647"; + //char*max = l?"1073741824":"2147483647"; + char*max = l?"2147483648":"2147483647"; + if(yyleng-l>10) { - as3_warning("integer overflow: %s (converted to Number)", s); + as3_softwarning("integer overflow: %s (converted to Number)", s); return handlefloat(); } if(yyleng-l==10) { int t; for(t=0;tmax[t]) { - as3_warning("integer overflow: %s (converted to Number)", s); + as3_softwarning("integer overflow: %s (converted to Number)", s); return handlefloat(); } else if(yytext[l+t]='a' && c<='f') || (c>='A' && c<='F')) v|=(c&0x0f)+9; } - if(l && v>1073741824) { + if(l && v>=0x80000000) { char*s = nrbuf(); - as3_warning("signed integer overflow: %s (converted to Number)", s); + as3_softwarning("integer overflow: %s (converted to Number)", s); return setfloat(v); } - if(!l && v>2147483647) { + if(!l && v>0x7fffffff) { char*s = nrbuf(); - as3_warning("unsigned integer overflow: %s (converted to Number)", s); + as3_softwarning("integer overflow: %s (converted to Number)", s); return setfloat(v); } if(l==3) { return setint(-(int)v); } else { - return setuint(v); + return setint(v); } } @@ -516,14 +462,18 @@ static inline void c() { current_column+=yyleng; } -static trie_t*namespaces = 0; -void tokenizer_register_namespace(const char*id) +trie_t*active_namespaces = 0; +/*void tokenizer_register_namespace(const char*id) { - trie_put(&namespaces, id); + trie_put(namespaces, id, 0); } +void tokenizer_unregister_namespace(const char*id) +{ + trie_remove(namespaces, id); +}*/ static inline tokenizer_is_namespace(const char*id) { - return trie_lookup(namespaces, id); + return trie_contains(active_namespaces, id); } static inline int handleIdentifier() @@ -566,7 +516,7 @@ CDATA ])*\]*\]\]\> STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*['] S [ \n\r\t] MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/] -SINGLELINE_COMMENT \/\/[^\n]*\n +SINGLELINE_COMMENT \/\/[^\n\r]*[\n\r] REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* %% @@ -588,6 +538,9 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* {FLOATWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlefloat();} } +[\{] {c(); BEGIN(REGEXPOK);return m(T_DICTSTART);} +[\{] {c(); BEGIN(INITIAL); return m('{');} + \xef\xbb\xbf {/* utf 8 bom */} {S} {l();} @@ -595,18 +548,19 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* {HEXFLOAT}/{_} {c(); BEGIN(INITIAL);return handlehexfloat();} {INT}/{_} {c(); BEGIN(INITIAL);return handleint();} {FLOAT}/{_} {c(); BEGIN(INITIAL);return handlefloat();} +NaN {c(); BEGIN(INITIAL);return m(KW_NAN);} 3rr0r {/* for debugging: generates a tokenizer-level error */ syntaxerror("3rr0r");} -{NAME}{S}*:{S}*for/{_} {l();handleLabel(yytext, yyleng-3);return T_FOR;} -{NAME}{S}*:{S}*do/{_} {l();handleLabel(yytext, yyleng-2);return T_DO;} -{NAME}{S}*:{S}*while/{_} {l();handleLabel(yytext, yyleng-5);return T_WHILE;} -{NAME}{S}*:{S}*switch/{_} {l();handleLabel(yytext, yyleng-6);return T_SWITCH;} -for {c();a3_lval.id="";return T_FOR;} -do {c();a3_lval.id="";return T_DO;} -while {c();a3_lval.id="";return T_WHILE;} -switch {c();a3_lval.id="";return T_SWITCH;} +{NAME}{S}*:{S}*for/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-3);return T_FOR;} +{NAME}{S}*:{S}*do/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-2);return T_DO;} +{NAME}{S}*:{S}*while/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-5);return T_WHILE;} +{NAME}{S}*:{S}*switch/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-6);return T_SWITCH;} +for {c();BEGIN(INITIAL);a3_lval.id="";return T_FOR;} +do {c();BEGIN(INITIAL);a3_lval.id="";return T_DO;} +while {c();BEGIN(INITIAL);a3_lval.id="";return T_WHILE;} +switch {c();BEGIN(INITIAL);a3_lval.id="";return T_SWITCH;} [&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);} [|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);} @@ -614,79 +568,81 @@ switch {c();a3_lval.id="";return T_SWITCH;} [!][=][=] {c();BEGIN(REGEXPOK);return m(T_NEE);} [=][=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);} [=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQ);} -[>][=] {c();return m(T_GE);} -[<][=] {c();return m(T_LE);} +[>][=] {c();BEGIN(REGEXPOK);return m(T_GE);} +[<][=] {c();BEGIN(REGEXPOK);return m(T_LE);} [-][-] {c();BEGIN(INITIAL);return m(T_MINUSMINUS);} [+][+] {c();BEGIN(INITIAL);return m(T_PLUSPLUS);} -[+][=] {c();return m(T_PLUSBY);} -[-][=] {c();return m(T_MINUSBY);} -[/][=] {c();return m(T_DIVBY);} -[%][=] {c();return m(T_MODBY);} -[*][=] {c();return m(T_MULBY);} -[|][=] {c();return m(T_ORBY);} -[>][>][=] {c();return m(T_SHRBY);} -[<][<][=] {c();return m(T_SHLBY);} -[>][>][>][=] {c();return m(T_USHRBY);} -[<][<] {c();return m(T_SHL);} -[>][>][>] {c();return m(T_USHR);} -[>][>] {c();return m(T_SHR);} -\.\.\. {c();return m(T_DOTDOTDOT);} -\.\. {c();return m(T_DOTDOT);} -\. {c();return m('.');} -:: {c();return m(T_COLONCOLON);} -: {c();return m(':');} -instanceof {c();return m(KW_INSTANCEOF);} -implements {c();return m(KW_IMPLEMENTS);} -interface {c();return m(KW_INTERFACE);} -namespace {c();return m(KW_NAMESPACE);} -protected {c();return m(KW_PROTECTED);} -undefined {c();return m(KW_UNDEFINED);} -continue {c();return m(KW_CONTINUE);} -override {c();return m(KW_OVERRIDE);} -internal {c();return m(KW_INTERNAL);} -function {c();return m(KW_FUNCTION);} -finally {c();return m(KW_FINALLY);} -default {c();return m(KW_DEFAULT);} -package {c();return m(KW_PACKAGE);} -private {c();return m(KW_PRIVATE);} -dynamic {c();return m(KW_DYNAMIC);} -extends {c();return m(KW_EXTENDS);} -delete {c();return m(KW_DELETE);} -return {c();return m(KW_RETURN);} -public {c();return m(KW_PUBLIC);} -native {c();return m(KW_NATIVE);} -static {c();return m(KW_STATIC);} -import {c();return m(KW_IMPORT);} -typeof {c();return m(KW_TYPEOF);} -throw {c();return m(KW_THROW);} -class {c();return m(KW_CLASS);} -const {c();return m(KW_CONST);} -catch {c();return m(KW_CATCH);} -final {c();return m(KW_FINAL);} -false {c();return m(KW_FALSE);} -break {c();return m(KW_BREAK);} -super {c();return m(KW_SUPER);} -each {c();return m(KW_EACH);} -void {c();return m(KW_VOID);} -true {c();return m(KW_TRUE);} -null {c();return m(KW_NULL);} -else {c();return m(KW_ELSE);} -case {c();return m(KW_CASE);} -with {c();return m(KW_WITH);} -use {c();return m(KW_USE);} -new {c();return m(KW_NEW);} -get {c();return m(KW_GET);} -set {c();return m(KW_SET);} -var {c();return m(KW_VAR);} -try {c();return m(KW_TRY);} -is {c();return m(KW_IS) ;} -in {c();return m(KW_IN) ;} -if {c();return m(KW_IF) ;} -as {c();return m(KW_AS);} -{NAME} {c();BEGIN(INITIAL);return handleIdentifier();} - -[\]\}] {c();BEGIN(INITIAL);return m(yytext[0]);} -[+-\/*^~@$!%&\(=\[\{|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} +[+][=] {c();BEGIN(REGEXPOK);return m(T_PLUSBY);} +[\^][=] {c();BEGIN(REGEXPOK);return m(T_XORBY);} +[-][=] {c();BEGIN(REGEXPOK);return m(T_MINUSBY);} +[/][=] {c();BEGIN(REGEXPOK);return m(T_DIVBY);} +[%][=] {c();BEGIN(REGEXPOK);return m(T_MODBY);} +[*][=] {c();BEGIN(REGEXPOK);return m(T_MULBY);} +[|][=] {c();BEGIN(REGEXPOK);return m(T_ORBY);} +[&][=] {c();BEGIN(REGEXPOK);return m(T_ANDBY);} +[>][>][=] {c();BEGIN(REGEXPOK);return m(T_SHRBY);} +[<][<][=] {c();BEGIN(REGEXPOK);return m(T_SHLBY);} +[>][>][>][=] {c();BEGIN(REGEXPOK);return m(T_USHRBY);} +[<][<] {c();BEGIN(REGEXPOK);return m(T_SHL);} +[>][>][>] {c();BEGIN(REGEXPOK);return m(T_USHR);} +[>][>] {c();BEGIN(REGEXPOK);return m(T_SHR);} +\.\.\. {c();BEGIN(REGEXPOK);return m(T_DOTDOTDOT);} +\.\. {c();BEGIN(REGEXPOK);return m(T_DOTDOT);} +\. {c();BEGIN(REGEXPOK);return m('.');} +:: {c();BEGIN(REGEXPOK);return m(T_COLONCOLON);} +: {c();BEGIN(REGEXPOK);return m(':');} +instanceof {c();BEGIN(REGEXPOK);return m(KW_INSTANCEOF);} +implements {c();BEGIN(REGEXPOK);return m(KW_IMPLEMENTS);} +interface {c();BEGIN(INITIAL);return m(KW_INTERFACE);} +namespace {c();BEGIN(INITIAL);return m(KW_NAMESPACE);} +protected {c();BEGIN(INITIAL);return m(KW_PROTECTED);} +undefined {c();BEGIN(INITIAL);return m(KW_UNDEFINED);} +continue {c();BEGIN(INITIAL);return m(KW_CONTINUE);} +override {c();BEGIN(INITIAL);return m(KW_OVERRIDE);} +internal {c();BEGIN(INITIAL);return m(KW_INTERNAL);} +function {c();BEGIN(INITIAL);return m(KW_FUNCTION);} +finally {c();BEGIN(INITIAL);return m(KW_FINALLY);} +default {c();BEGIN(INITIAL);return m(KW_DEFAULT);} +package {c();BEGIN(INITIAL);return m(KW_PACKAGE);} +private {c();BEGIN(INITIAL);return m(KW_PRIVATE);} +dynamic {c();BEGIN(INITIAL);return m(KW_DYNAMIC);} +extends {c();BEGIN(INITIAL);return m(KW_EXTENDS);} +delete {c();BEGIN(REGEXPOK);return m(KW_DELETE);} +return {c();BEGIN(REGEXPOK);return m(KW_RETURN);} +public {c();BEGIN(INITIAL);return m(KW_PUBLIC);} +native {c();BEGIN(INITIAL);return m(KW_NATIVE);} +static {c();BEGIN(INITIAL);return m(KW_STATIC);} +import {c();BEGIN(REGEXPOK);return m(KW_IMPORT);} +typeof {c();BEGIN(REGEXPOK);return m(KW_TYPEOF);} +throw {c();BEGIN(REGEXPOK);return m(KW_THROW);} +class {c();BEGIN(INITIAL);return m(KW_CLASS);} +const {c();BEGIN(INITIAL);return m(KW_CONST);} +catch {c();BEGIN(INITIAL);return m(KW_CATCH);} +final {c();BEGIN(INITIAL);return m(KW_FINAL);} +false {c();BEGIN(INITIAL);return m(KW_FALSE);} +break {c();BEGIN(INITIAL);return m(KW_BREAK);} +super {c();BEGIN(INITIAL);return m(KW_SUPER);} +each {c();BEGIN(INITIAL);return m(KW_EACH);} +void {c();BEGIN(INITIAL);return m(KW_VOID);} +true {c();BEGIN(INITIAL);return m(KW_TRUE);} +null {c();BEGIN(INITIAL);return m(KW_NULL);} +else {c();BEGIN(INITIAL);return m(KW_ELSE);} +case {c();BEGIN(REGEXPOK);return m(KW_CASE);} +with {c();BEGIN(REGEXPOK);return m(KW_WITH);} +use {c();BEGIN(REGEXPOK);return m(KW_USE);} +new {c();BEGIN(REGEXPOK);return m(KW_NEW);} +get {c();BEGIN(INITIAL);return m(KW_GET);} +set {c();BEGIN(INITIAL);return m(KW_SET);} +var {c();BEGIN(INITIAL);return m(KW_VAR);} +try {c();BEGIN(INITIAL);return m(KW_TRY);} +is {c();BEGIN(REGEXPOK);return m(KW_IS) ;} +in {c();BEGIN(REGEXPOK);return m(KW_IN) ;} +if {c();BEGIN(INITIAL);return m(KW_IF) ;} +as {c();BEGIN(REGEXPOK);return m(KW_AS);} +$?{NAME} {c();BEGIN(INITIAL);return handleIdentifier();} + +[\]\}*] {c();BEGIN(INITIAL);return m(yytext[0]);} +[+-\/^~@$!%&\(=\[|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} [\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);} . {/* ERROR */ @@ -731,12 +687,26 @@ int yywrap() static char mbuf[256]; char*token2string(enum yytokentype nr, YYSTYPE v) { - if(nr==T_STRING) return ""; + if(nr==T_STRING) { + char*s = malloc(v.str.len+10); + strcpy(s, ""); + memcpy(s+8, v.str.str, v.str.len); + sprintf(s+8+v.str.len, " (%d bytes)", v.str.len); + return s; + } + else if(nr==T_REGEXP) { + char*s = malloc(strlen(v.regexp.pattern)+10); + sprintf(s, "%s", v.regexp.pattern); + return s; + } + else if(nr==T_IDENTIFIER) { + char*s = malloc(strlen(v.id)+10); + sprintf(s, "%s", v.id); + return s; + } else if(nr==T_INT) return ""; else if(nr==T_UINT) return ""; - else if(nr==T_BYTE) return ""; else if(nr==T_FLOAT) return ""; - else if(nr==T_REGEXP) return "REGEXP"; else if(nr==T_EOF) return "***END***"; else if(nr==T_GE) return ">="; else if(nr==T_LE) return "<="; @@ -775,7 +745,6 @@ char*token2string(enum yytokentype nr, YYSTYPE v) else if(nr==KW_VAR) return "var"; else if(nr==KW_IS) return "is"; else if(nr==KW_AS) return "as"; - else if(nr==T_IDENTIFIER) return "ID"; else { sprintf(mbuf, "%d", nr); return mbuf;