X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fas3%2Ftokenizer.lex;h=8002657a200fb1f859827772ed9d08d007e24c44;hb=d55df27a66db460b764ba10b3f474d7b91fb4814;hp=54d04c5e4605dfff114c5c77e6e27a1f1b93966c;hpb=4b34eeaf1908b9978c072ca4e8e5b3eee8a1c0f1;p=swftools.git diff --git a/lib/as3/tokenizer.lex b/lib/as3/tokenizer.lex index 54d04c5..8002657 100644 --- a/lib/as3/tokenizer.lex +++ b/lib/as3/tokenizer.lex @@ -95,12 +95,45 @@ static void dbg(const char*format, ...) fflush(stdout); } - - #ifndef YY_CURRENT_BUFFER #define YY_CURRENT_BUFFER yy_current_buffer #endif +static void*as3_buffer=0; +static int as3_buffer_pos=0; +static int as3_buffer_len=0; +void as3_file_input(FILE*fi) +{ + as3_in = fi; + as3_buffer = 0; +} +void as3_buffer_input(void*buffer, int len) +{ + if(!buffer) + syntaxerror("trying to parse zero bytearray"); + as3_buffer = buffer; + as3_buffer_len = len; + as3_buffer_pos = 0; + as3_in = 0; +} + +#define YY_INPUT(buf,result,max_size) { \ + if(!as3_buffer) { \ + errno=0; \ + while((result = fread(buf, 1, max_size, as3_in))==0 && ferror(as3_in)) \ + { if(errno != EINTR) {YY_FATAL_ERROR("input in flex scanner failed"); break;} \ + errno=0; clearerr(as3_in); \ + } \ + } else { \ + int to_read = max_size; \ + if(to_read + as3_buffer_pos > as3_buffer_len) \ + to_read = as3_buffer_len - as3_buffer_pos; \ + memcpy(buf, as3_buffer+as3_buffer_pos, to_read); \ + as3_buffer_pos += to_read; \ + result=to_read; \ + } \ +} + void handleInclude(char*text, int len, char quotes) { char*filename = 0; @@ -123,7 +156,8 @@ void handleInclude(char*text, int len, char quotes) filename = strdup(&text[i1]); } - char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER); + char*fullfilename = find_file(filename); + enter_file2(filename, fullfilename, YY_CURRENT_BUFFER); yyin = fopen(fullfilename, "rb"); if (!yyin) { syntaxerror("Couldn't open include file \"%s\"\n", fullfilename); @@ -249,6 +283,7 @@ static void handleCData(char*s, int len) { a3_lval.str.str = s+9; // + a3_lval.str.str = strdup_n(a3_lval.str.str, a3_lval.str.len); } static void handleString(char*s, int len) @@ -276,15 +311,6 @@ static void handleString(char*s, int len) char start_of_expression; -static inline int mkid(int type) -{ - char*s = malloc(yyleng+1); - memcpy(s, yytext, yyleng); - s[yyleng]=0; - a3_lval.id = s; - return type; -} - static inline int m(int type) { a3_lval.token = type; @@ -490,12 +516,34 @@ static inline void c() { current_column+=yyleng; } +static trie_t*namespaces = 0; +void tokenizer_register_namespace(const char*id) +{ + trie_put(&namespaces, id); +} +static inline tokenizer_is_namespace(const char*id) +{ + return trie_lookup(namespaces, id); +} + +static inline int handleIdentifier() +{ + char*s = malloc(yyleng+1); + memcpy(s, yytext, yyleng); + s[yyleng]=0; + a3_lval.id = s; + if(tokenizer_is_namespace(s)) + return T_NAMESPACE; + else + return T_IDENTIFIER; +} + + //Boolean {c();return m(KW_BOOLEAN);} //int {c();return m(KW_INT);} //uint {c();return m(KW_UINT);} //Number {c();return m(KW_NUMBER);} - %} %s REGEXPOK @@ -507,7 +555,7 @@ _ [^a-zA-Z0-9_\\] HEXINT 0x[a-zA-Z0-9]+ HEXFLOAT 0x[a-zA-Z0-9]*\.[a-zA-Z0-9]* INT [0-9]+ -FLOAT [0-9]+(\.[0-9]*)?|\.[0-9]+ +FLOAT ([0-9]+(\.[0-9]*)?|\.[0-9]+)(e[0-9]+)? HEXWITHSIGN [+-]?({HEXINT}) HEXFLOATWITHSIGN [+-]?({HEXFLOAT}) @@ -518,7 +566,7 @@ CDATA ])*\]*\]\]\> STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*['] S [ \n\r\t] MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/] -SINGLELINE_COMMENT \/\/[^\n]*\n +SINGLELINE_COMMENT \/\/[^\n\r]*[\n\r] REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* %% @@ -534,19 +582,19 @@ REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* { {REGEXP} {c(); BEGIN(INITIAL);return handleregexp();} -{HEXWITHSIGN} {c(); BEGIN(INITIAL);return handlehex();} -{HEXFLOATWITHSIGN} {c(); BEGIN(INITIAL);return handlehexfloat();} -{INTWITHSIGN} {c(); BEGIN(INITIAL);return handleint();} -{FLOATWITHSIGN} {c(); BEGIN(INITIAL);return handlefloat();} +{HEXWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlehex();} +{HEXFLOATWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlehexfloat();} +{INTWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handleint();} +{FLOATWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlefloat();} } \xef\xbb\xbf {/* utf 8 bom */} {S} {l();} -{HEXINT} {c(); BEGIN(INITIAL);return handlehex();} -{HEXFLOAT} {c(); BEGIN(INITIAL);return handlehexfloat();} -{INT} {c(); BEGIN(INITIAL);return handleint();} -{FLOAT} {c(); BEGIN(INITIAL);return handlefloat();} +{HEXINT}/{_} {c(); BEGIN(INITIAL);return handlehex();} +{HEXFLOAT}/{_} {c(); BEGIN(INITIAL);return handlehexfloat();} +{INT}/{_} {c(); BEGIN(INITIAL);return handleint();} +{FLOAT}/{_} {c(); BEGIN(INITIAL);return handlefloat();} 3rr0r {/* for debugging: generates a tokenizer-level error */ syntaxerror("3rr0r");} @@ -635,9 +683,10 @@ is {c();return m(KW_IS) ;} in {c();return m(KW_IN) ;} if {c();return m(KW_IF) ;} as {c();return m(KW_AS);} -{NAME} {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);} +{NAME} {c();BEGIN(INITIAL);return handleIdentifier();} -[+-\/*^~@$!%&\(=\[\]\{\}|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} +[\]\}] {c();BEGIN(INITIAL);return m(yytext[0]);} +[+-\/*^~@$!%&\(=\[\{|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} [\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);} . {/* ERROR */ @@ -682,12 +731,27 @@ int yywrap() static char mbuf[256]; char*token2string(enum yytokentype nr, YYSTYPE v) { - if(nr==T_STRING) return ""; + if(nr==T_STRING) { + char*s = malloc(v.str.len+10); + strcpy(s, ""); + memcpy(s+8, v.str.str, v.str.len); + sprintf(s+8+v.str.len, " (%d bytes)", v.str.len); + return s; + } + else if(nr==T_REGEXP) { + char*s = malloc(strlen(v.regexp.pattern)+10); + sprintf(s, "%s", v.regexp.pattern); + return s; + } + else if(nr==T_IDENTIFIER) { + char*s = malloc(strlen(v.id)+10); + sprintf(s, "%s", v.id); + return s; + } else if(nr==T_INT) return ""; else if(nr==T_UINT) return ""; else if(nr==T_BYTE) return ""; else if(nr==T_FLOAT) return ""; - else if(nr==T_REGEXP) return "REGEXP"; else if(nr==T_EOF) return "***END***"; else if(nr==T_GE) return ">="; else if(nr==T_LE) return "<="; @@ -726,7 +790,6 @@ char*token2string(enum yytokentype nr, YYSTYPE v) else if(nr==KW_VAR) return "var"; else if(nr==KW_IS) return "is"; else if(nr==KW_AS) return "as"; - else if(nr==T_IDENTIFIER) return "ID"; else { sprintf(mbuf, "%d", nr); return mbuf; @@ -738,3 +801,4 @@ void initialize_scanner() BEGIN(BEGINNING); } +