X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fas3%2Ftokenizer.lex;h=2238ace660a17c8e58051a37a50318ee30f9ab58;hb=9e103111a918b0d69c2b25a8a199c54eb719b8ac;hp=dcae2f2b3e0e4b002518170d996814b1a7bb669d;hpb=21ef4c306d38969e8f5fe821a27d155b855735b9;p=swftools.git diff --git a/lib/as3/tokenizer.lex b/lib/as3/tokenizer.lex index dcae2f2..2238ace 100644 --- a/lib/as3/tokenizer.lex +++ b/lib/as3/tokenizer.lex @@ -74,10 +74,23 @@ void syntaxerror(const char*format, ...) va_start(arglist, format); vsprintf(buf, format, arglist); va_end(arglist); - fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename, current_line, current_column, buf); + fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename_short, current_line, current_column, buf); fflush(stderr); exit(1); } +void warning(const char*format, ...) +{ + char buf[1024]; + int l; + va_list arglist; + if(!verbose) + return; + va_start(arglist, format); + vsprintf(buf, format, arglist); + va_end(arglist); + fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf); + fflush(stderr); +} #ifndef YY_CURRENT_BUFFER @@ -116,21 +129,144 @@ void handleInclude(char*text, int len, char quotes) //BEGIN(INITIAL); keep context } +string_t string_unescape(const char*in, int l) +{ + int len=0; + const char*s = in; + const char*end = &in[l]; + char*n = (char*)malloc(l); + char*o = n; + while(s256) + syntaxerror("octal number out of range (0-255): %d", num); + o[len++] = num; + continue; + } + case 'x': case 'u': { + int max=2; + char bracket = 0; + char unicode = 0; + if(*s == 'u') { + max = 6; + unicode = 1; + } + s++; + if(s==end) syntaxerror("invalid \\u or \\x at end of string"); + if(*s == '{') { + s++; + if(s==end) syntaxerror("invalid \\u{ at end of string"); + bracket=1; + } + unsigned int num=0; + int nr = 0; + while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s='0' && *s<='9') num |= *s - '0'; + if(*s>='a' && *s<='f') num |= *s - 'a' + 10; + if(*s>='A' && *s<='F') num |= *s - 'A' + 10; + nr++; + s++; + } + if(bracket) { + if(*s=='}' && s256) + syntaxerror("byte out of range (0-255): %d", num); + o[len++] = num; + } + break; + } + default: + syntaxerror("unknown escape sequence: \"\\%c\"", *s); + } + } + string_t out = string_new(n, len); + o[len]=0; + return out; +} + +static void handleString(char*s, int len) +{ + if(s[0]=='"') { + if(s[len-1]!='"') syntaxerror("String doesn't end with '\"'"); + s++;len-=2; + } + else if(s[0]=='\'') { + if(s[len-1]!='\'') syntaxerror("String doesn't end with '\"'"); + s++;len-=2; + } + else syntaxerror("String incorrectly terminated"); + + + avm2_lval.str = string_unescape(s, len); +} + + char start_of_expression; -static inline int m(int type) +static inline int mkid(int type) { char*s = malloc(yyleng+1); memcpy(s, yytext, yyleng); s[yyleng]=0; + avm2_lval.id = s; + return type; +} - NEW(token_t,t); - t->type = type; - t->text = s; - avm2_lval.token = t; +static inline int m(int type) +{ + avm2_lval.token = type; return type; } + static char numberbuf[64]; static inline int handlenumber() { @@ -211,7 +347,7 @@ NUMBER -?[0-9]+(\.[0-9]*)? STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*['] S [ \n\r\t] -MULTILINE_COMMENT [/][*]([*][^/]|[^*]|[\x00-\x31])*[*]+[/] +MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/] SINGLELINE_COMMENT \/\/[^\n]*\n REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* %% @@ -223,7 +359,7 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* ^include{S}+{STRING}{S}*/\n {c();handleInclude(yytext, yyleng, 1);} ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {c();handleInclude(yytext, yyleng, 0);} -{STRING} {c(); BEGIN(INITIAL);return m(T_STRING);} +{STRING} {c(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;} { {REGEXP} {c(); BEGIN(INITIAL);return m(T_REGEXP);} @@ -234,49 +370,73 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* {NUMBER} {c(); BEGIN(INITIAL);return handlenumber();} -[>][=] {return m(T_GE);} -[<][=] {return m(T_LE);} -[-][-] {BEGIN(INITIAL);return m(T_MINUSMINUS);} -[+][+] {BEGIN(INITIAL);return m(T_PLUSPLUS);} -== {BEGIN(REGEXPOK);return m(T_EQEQ);} -\.\. {return m(T_DOTDOT);} -\. {return m('.');} -:: {return m(T_COLONCOLON);} -: {return m(':');} -implements {return m(KW_IMPLEMENTS);} -interface {return m(KW_INTERFACE);} -namespace {return m(KW_NAMESPACE);} -protected {return m(KW_PROTECTED);} -override {return m(KW_OVERRIDE);} -internal {return m(KW_INTERNAL);} -function {return m(KW_FUNCTION);} -package {return m(KW_PACKAGE);} -private {return m(KW_PRIVATE);} -Boolean {return m(KW_BOOLEAN);} -dynamic {return m(KW_DYNAMIC);} -extends {return m(KW_EXTENDS);} -public {return m(KW_PUBLIC);} -native {return m(KW_NATIVE);} -static {return m(KW_STATIC);} -import {return m(KW_IMPORT);} -Number {return m(KW_NUMBER);} -class {return m(KW_CLASS);} -const {return m(KW_CONST);} -final {return m(KW_FINAL);} -False {return m(KW_FALSE);} -True {return m(KW_TRUE);} -uint {return m(KW_UINT);} -null {return m(KW_NULL);} -use {return m(KW_USE);} -int {return m(KW_INT);} -new {return m(KW_NEW);} -get {return m(KW_GET);} -for {return m(KW_FOR);} -set {return m(KW_SET);} -var {return m(KW_VAR);} -is {return m(KW_IS) ;} -as {return m(KW_AS);} -{NAME} {c();BEGIN(INITIAL);return m(T_IDENTIFIER);} +3rr0r {/* for debugging: generates a tokenizer-level error */ + syntaxerror("3rr0r");} + +[&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);} +[|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);} +[!][=] {c();BEGIN(REGEXPOK);return m(T_NE);} +[=][=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);} +[=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQ);} +[>][=] {c();return m(T_GE);} +[<][=] {c();return m(T_LE);} +[-][-] {c();BEGIN(INITIAL);return m(T_MINUSMINUS);} +[+][+] {c();BEGIN(INITIAL);return m(T_PLUSPLUS);} +[+][=] {c();return m(T_PLUSBY);} +[-][=] {c();return m(T_MINUSBY);} +[/][=] {c();return m(T_DIVBY);} +[%][=] {c();return m(T_MODBY);} +[*][=] {c();return m(T_MULBY);} +[>][>][=] {c();return m(T_SHRBY);} +[<][<][=] {c();return m(T_SHLBY);} +[>][>][>][=] {c();return m(T_USHRBY);} +[<][<] {c();return m(T_SHL);} +[>][>][>] {c();return m(T_USHR);} +[>][>] {c();return m(T_SHR);} +\.\.\. {c();return m(T_DOTDOTDOT);} +\.\. {c();return m(T_DOTDOT);} +\. {c();return m('.');} +:: {c();return m(T_COLONCOLON);} +: {c();return m(':');} +implements {c();return m(KW_IMPLEMENTS);} +interface {c();return m(KW_INTERFACE);} +namespace {c();return m(KW_NAMESPACE);} +protected {c();return m(KW_PROTECTED);} +override {c();return m(KW_OVERRIDE);} +internal {c();return m(KW_INTERNAL);} +function {c();return m(KW_FUNCTION);} +package {c();return m(KW_PACKAGE);} +private {c();return m(KW_PRIVATE);} +Boolean {c();return m(KW_BOOLEAN);} +dynamic {c();return m(KW_DYNAMIC);} +extends {c();return m(KW_EXTENDS);} +return {c();return m(KW_RETURN);} +public {c();return m(KW_PUBLIC);} +native {c();return m(KW_NATIVE);} +static {c();return m(KW_STATIC);} +import {c();return m(KW_IMPORT);} +Number {c();return m(KW_NUMBER);} +while {c();return m(KW_WHILE);} +class {c();return m(KW_CLASS);} +const {c();return m(KW_CONST);} +final {c();return m(KW_FINAL);} +false {c();return m(KW_FALSE);} +break {c();return m(KW_BREAK);} +true {c();return m(KW_TRUE);} +uint {c();return m(KW_UINT);} +null {c();return m(KW_NULL);} +else {c();return m(KW_ELSE);} +use {c();return m(KW_USE);} +int {c();return m(KW_INT);} +new {c();return m(KW_NEW);} +get {c();return m(KW_GET);} +for {c();return m(KW_FOR);} +set {c();return m(KW_SET);} +var {c();return m(KW_VAR);} +is {c();return m(KW_IS) ;} +if {c();return m(KW_IF) ;} +as {c();return m(KW_AS);} +{NAME} {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);} [+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} [\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);} @@ -320,9 +480,8 @@ int yywrap() } static char mbuf[256]; -char*token2string(token_t*t) +char*token2string(enum yytokentype nr) { - int nr=t->type; if(nr==T_STRING) return ""; else if(nr==T_INT) return ""; else if(nr==T_UINT) return ""; @@ -357,6 +516,7 @@ char*token2string(token_t*t) else if(nr==KW_TRUE) return "True"; else if(nr==KW_UINT) return "uint"; else if(nr==KW_NULL) return "null"; + else if(nr==KW_ELSE) return "else"; else if(nr==KW_USE) return "use"; else if(nr==KW_INT) return "int"; else if(nr==KW_NEW) return "new"; @@ -366,12 +526,8 @@ char*token2string(token_t*t) else if(nr==KW_VAR) return "var"; else if(nr==KW_IS) return "is"; else if(nr==KW_AS) return "as"; - else if(nr==T_IDENTIFIER) { - if(strlen(t->text)>sizeof(mbuf)-1) - return "ID(...)"; - sprintf(mbuf, "ID(%s)", t->text); - return mbuf; - } else { + else if(nr==T_IDENTIFIER) return "ID"; + else { sprintf(mbuf, "%d", nr); return mbuf; }