X-Git-Url: http://git.asbjorn.biz/?p=swftools.git;a=blobdiff_plain;f=lib%2Fas3%2Ftokenizer.lex;h=ece94d035a742f3a1bd0a5d393ac422cf221e758;hp=4647d3065879a21526a0920bc56b8e96b5eeba5b;hb=3e303bea7eb10c99a7b3808f0c355ee63188eb9b;hpb=34e395874f66f71ed68a3d74ef9d6b9f706eff06 diff --git a/lib/as3/tokenizer.lex b/lib/as3/tokenizer.lex index 4647d30..ece94d0 100644 --- a/lib/as3/tokenizer.lex +++ b/lib/as3/tokenizer.lex @@ -28,31 +28,21 @@ #include #include #include "../utf8.h" +#include "common.h" #include "tokenizer.h" #include "files.h" -static void countlines(char*text, int len) { - int t; - for(t=0;t as3_buffer_len) \ + to_read = as3_buffer_len - as3_buffer_pos; \ + memcpy(buf, as3_buffer+as3_buffer_pos, to_read); \ + as3_buffer_pos += to_read; \ + result=to_read; \ + } \ +} void handleInclude(char*text, int len, char quotes) { @@ -111,22 +110,23 @@ void handleInclude(char*text, int len, char quotes) } else { int i1=0,i2=len; // find start - while(!strchr(" \n\r\t", text[i1])) i1++; + while(!strchr(" \n\r\t\xa0", text[i1])) i1++; // strip - while(strchr(" \n\r\t", text[i1])) i1++; - while(strchr(" \n\r\t", text[i2-1])) i2--; + while(strchr(" \n\r\t\xa0", text[i1])) i1++; + while(strchr(" \n\r\t\xa0", text[i2-1])) i2--; if(i2!=len) text[i2]=0; filename = strdup(&text[i1]); } - char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER); + char*fullfilename = find_file(filename, 1); + enter_file2(filename, fullfilename, YY_CURRENT_BUFFER); yyin = fopen(fullfilename, "rb"); if (!yyin) { syntaxerror("Couldn't open include file \"%s\"\n", fullfilename); } yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) ); - //BEGIN(INITIAL); keep context + //BEGIN(DEFAULT); keep context } static int do_unescape(const char*s, const char*end, char*n) @@ -157,6 +157,7 @@ static int do_unescape(const char*s, const char*end, char*n) switch(*s) { case '\\': if(o) o[len] = '\\';s++;len++; break; case '"': if(o) o[len] = '"';s++;len++; break; + case '\'': if(o) o[len] = '\'';s++;len++; break; case 'b': if(o) o[len] = '\b';s++;len++; break; case 'f': if(o) o[len] = '\f';s++;len++; break; case 'n': if(o) o[len] = '\n';s++;len++; break; @@ -220,8 +221,15 @@ static int do_unescape(const char*s, const char*end, char*n) } break; } - default: - syntaxerror("unknown escape sequence: \"\\%c\"", *s); + default: { + if(o) { + o[len+0] = '\\'; + o[len+1] = *s; + } + s++; + len+=2; + break; + } } } if(o) o[len]=0; @@ -240,6 +248,19 @@ static string_t string_unescape(const char*in, int l) return out; } +static void handleCData(char*s, int len) +{ + a3_lval.str.str = s+9; // + a3_lval.str.str = strdup_n(a3_lval.str.str, a3_lval.str.len); +} + +static void handleRaw(char*s, int len) +{ + a3_lval.str.len = len; + a3_lval.str.str = strdup_n(s, a3_lval.str.len); +} + static void handleString(char*s, int len) { if(s[0]=='"') { @@ -251,30 +272,19 @@ static void handleString(char*s, int len) s++;len-=2; } else syntaxerror("String incorrectly terminated"); - - avm2_lval.str = string_unescape(s, len); + a3_lval.str = string_unescape(s, len); } char start_of_expression; -static inline int mkid(int type) -{ - char*s = malloc(yyleng+1); - memcpy(s, yytext, yyleng); - s[yyleng]=0; - avm2_lval.id = s; - return type; -} - static inline int m(int type) { - avm2_lval.token = type; + a3_lval.token = type; return type; } - static char numberbuf[64]; static char*nrbuf() { @@ -288,29 +298,19 @@ static char*nrbuf() static inline int setint(int v) { - avm2_lval.number_int = v; - if(v>-128) - return T_BYTE; - else if(v>=-32768) - return T_SHORT; - else - return T_INT; + a3_lval.number_int = v; + return T_INT; } -static inline int setuint(unsigned int v) +static inline int setfloat(double v) { - avm2_lval.number_uint = v; - if(v<128) - return T_BYTE; - else if(v<32768) - return T_SHORT; - else - return T_UINT; + a3_lval.number_float = v; + return T_FLOAT; } static inline int handlefloat() { char*s = nrbuf(); - avm2_lval.number_float = atof(s); + a3_lval.number_float = atof(s); return T_FLOAT; } @@ -319,14 +319,20 @@ static inline int handleint() char*s = nrbuf(); char l = (yytext[0]=='-'); - char*max = l?"1073741824":"2147483647"; - if(yyleng-l>10) - syntaxerror("integer overflow"); + //char*max = l?"1073741824":"2147483647"; + char*max = l?"2147483648":"2147483647"; + + if(yyleng-l>10) { + as3_softwarning("integer overflow: %s (converted to Number)", s); + return handlefloat(); + } if(yyleng-l==10) { int t; for(t=0;tmax[t]) - syntaxerror("integer overflow %s > %s", s+l,max); + if(yytext[l+t]>max[t]) { + as3_softwarning("integer overflow: %s (converted to Number)", s); + return handlefloat(); + } else if(yytext[l+t]='0' && c<='9') + d+=(c&15)*base; + else if((c>='a' && c<='f') || (c>='A' && c<='F')) + d+=((c&0x0f)+9)*base; + } + return setfloat(d); +} static inline int handlehex() { char l = (yytext[0]=='-')+2; + int len = yyleng; + + if(len-l>8) { + char*s = nrbuf(); + syntaxerror("integer overflow %s", s); + } - if(yyleng-l>8) - syntaxerror("integer overflow"); int t; unsigned int v = 0; - for(t=l;t='0' && c<='9') v|=(c&15); - else if(c>='a' && c<='f' || - c>='A' && c<='F') + else if((c>='a' && c<='f') || (c>='A' && c<='F')) v|=(c&0x0f)+9; } - if(l && v>1073741824) - syntaxerror("signed integer overflow"); - if(!l && v>2147483647) - syntaxerror("unsigned integer overflow"); + if(l && v>=0x80000000) { + char*s = nrbuf(); + as3_softwarning("integer overflow: %s (converted to Number)", s); + return setfloat(v); + } + if(!l && v>0x7fffffff) { + char*s = nrbuf(); + as3_softwarning("integer overflow: %s (converted to Number)", s); + return setfloat(v); + } if(l==3) { return setint(-(int)v); } else { - return setuint(v); + return setint(v); } } @@ -379,83 +419,195 @@ void handleLabel(char*text, int len) int t; for(t=len-1;t>=0;--t) { if(text[t]!=' ' && - text[t]!='.') + text[t]!=':') break; } char*s = malloc(t+1); memcpy(s, yytext, t); s[t]=0; - avm2_lval.id = s; + a3_lval.id = s; +} + +static int handleregexp() +{ + char*s = malloc(yyleng); + int len=yyleng-1; + memcpy(s, yytext+1, len); + s[len] = 0; + int t; + for(t=len;t>=0;--t) { + if(s[t]=='/') { + s[t] = 0; + break; + } + } + a3_lval.regexp.pattern = s; + if(t==len) { + a3_lval.regexp.options = 0; + } else { + a3_lval.regexp.options = s+t+1; + } + return T_REGEXP; } void initialize_scanner(); #define YY_USER_INIT initialize_scanner(); -#define c() {countlines(yytext, yyleng);} +/* count the number of lines+columns consumed by this token */ +static inline void l() { + int t; + for(t=0;t]|(-/[^-])|(--/[^>]))*--> +//{XMLCOMMENT} %} %s REGEXPOK %s BEGINNING +%s DEFAULT +%x XMLTEXT +%x XML NAME [a-zA-Z_][a-zA-Z0-9_\\]* _ [^a-zA-Z0-9_\\] HEXINT 0x[a-zA-Z0-9]+ +HEXFLOAT 0x[a-zA-Z0-9]*\.[a-zA-Z0-9]* INT [0-9]+ -FLOAT [0-9]+(\.[0-9]*)?|\.[0-9]+ +FLOAT ([0-9]+(\.[0-9]*)?|\.[0-9]+)(e[0-9]+)? HEXWITHSIGN [+-]?({HEXINT}) +HEXFLOATWITHSIGN [+-]?({HEXFLOAT}) INTWITHSIGN [+-]?({INT}) FLOATWITHSIGN [+-]?({FLOAT}) +CDATA ])*\]*\]\]\> +XMLCOMMENT +XML <[^>]+{S}> +XMLID [A-Za-z0-9_\x80-\xff]+([:][A-Za-z0-9_\x80-\xff]+)? +XMLSTRING ["][^"]*["] + STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*['] -S [ \n\r\t] +S [ \n\r\t\xa0] MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/] -SINGLELINE_COMMENT \/\/[^\n]*\n +SINGLELINE_COMMENT \/\/[^\n\r]*[\n\r] REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]* %% -{SINGLELINE_COMMENT} {c(); /* single line comment */} -{MULTILINE_COMMENT} {c(); /* multi line comment */} +{SINGLELINE_COMMENT} {l(); /* single line comment */} +{MULTILINE_COMMENT} {l(); /* multi line comment */} [/][*] {syntaxerror("syntax error: unterminated comment", yytext);} -^include{S}+{STRING}{S}*/\n {c();handleInclude(yytext, yyleng, 1);} -^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {c();handleInclude(yytext, yyleng, 0);} -{STRING} {c(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;} +^include{S}+{STRING}{S}*/\n {l();handleInclude(yytext, yyleng, 1);} +^include{S}+[^" \t\xa0\r\n][\x20-\xff]*{S}*/\n {l();handleInclude(yytext, yyleng, 0);} +{STRING} {l(); BEGIN(DEFAULT);handleString(yytext, yyleng);return T_STRING;} +{CDATA} {l(); BEGIN(DEFAULT);handleCData(yytext, yyleng);return T_STRING;} + +{ +{XMLCOMMENT} {l(); BEGIN(DEFAULT);handleRaw(yytext, yyleng);return T_STRING;} +} + +{ +{XMLSTRING} {l(); handleRaw(yytext, yyleng);return T_STRING;} +[{] {c(); BEGIN(REGEXPOK);return m('{');} +[<] {c(); return m('<');} +[/] {c(); return m('/');} +[>] {c(); return m('>');} +[=] {c(); return m('=');} +{XMLID} {c(); handleRaw(yytext, yyleng);return T_IDENTIFIER;} +{S} {l();} +<> {syntaxerror("unexpected end of file");} +} + +{ +[^<>{]+ {l(); handleRaw(yytext, yyleng);return T_STRING;} +[{] {c(); BEGIN(REGEXPOK);return m('{');} +[<] {c(); BEGIN(XML);return m('<');} +[>] {c(); return m('>');} +{XMLCOMMENT} {l(); handleRaw(yytext, yyleng);return T_STRING;} +{CDATA} {l(); handleRaw(yytext, yyleng);return T_STRING;} +<> {syntaxerror("unexpected end of file");} +} { -{REGEXP} {c(); BEGIN(INITIAL);return m(T_REGEXP);} -{HEXWITHSIGN} {c(); BEGIN(INITIAL);return handlehex();} -{INTWITHSIGN} {c(); BEGIN(INITIAL);return handleint();} -{FLOATWITHSIGN} {c(); BEGIN(INITIAL);return handlefloat();} +{REGEXP} {c(); BEGIN(DEFAULT);return handleregexp();} +{HEXWITHSIGN}/{_} {c(); BEGIN(DEFAULT);return handlehex();} +{HEXFLOATWITHSIGN}/{_} {c(); BEGIN(DEFAULT);return handlehexfloat();} +{INTWITHSIGN}/{_} {c(); BEGIN(DEFAULT);return handleint();} +{FLOATWITHSIGN}/{_} {c(); BEGIN(DEFAULT);return handlefloat();} } +[\{] {c(); BEGIN(REGEXPOK);return m(T_DICTSTART);} +[\{] {c(); BEGIN(DEFAULT); return m('{');} + \xef\xbb\xbf {/* utf 8 bom */} -{S} {c();} +{S} {l();} -{HEXINT} {c(); BEGIN(INITIAL);return handlehex();} -{INT} {c(); BEGIN(INITIAL);return handleint();} -{FLOAT} {c(); BEGIN(INITIAL);return handlefloat();} +{HEXINT}/{_} {c(); BEGIN(DEFAULT);return handlehex();} +{HEXFLOAT}/{_} {c(); BEGIN(DEFAULT);return handlehexfloat();} +{INT}/{_} {c(); BEGIN(DEFAULT);return handleint();} +{FLOAT}/{_} {c(); BEGIN(DEFAULT);return handlefloat();} +NaN {c(); BEGIN(DEFAULT);return m(KW_NAN);} 3rr0r {/* for debugging: generates a tokenizer-level error */ syntaxerror("3rr0r");} -{NAME}{S}*:{S}*for/{_} {c();handleLabel(yytext, yyleng-3);return T_FOR;} -{NAME}{S}*:{S}*do/{_} {c();handleLabel(yytext, yyleng-2);return T_DO;} -{NAME}{S}*:{S}*while/{_} {c();handleLabel(yytext, yyleng-5);return T_WHILE;} -{NAME}{S}*:{S}*switch/{_} {c();handleLabel(yytext, yyleng-6);return T_SWITCH;} -for {c();avm2_lval.id="";return T_FOR;} -do {c();avm2_lval.id="";return T_DO;} -while {c();avm2_lval.id="";return T_WHILE;} -switch {c();avm2_lval.id="";return T_SWITCH;} +{NAME}{S}*:{S}*for/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-3);return T_FOR;} +{NAME}{S}*:{S}*do/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-2);return T_DO;} +{NAME}{S}*:{S}*while/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-5);return T_WHILE;} +{NAME}{S}*:{S}*switch/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-6);return T_SWITCH;} +default{S}xml {l();BEGIN(DEFAULT);return m(KW_DEFAULT_XML);} +for {c();BEGIN(DEFAULT);a3_lval.id="";return T_FOR;} +do {c();BEGIN(DEFAULT);a3_lval.id="";return T_DO;} +while {c();BEGIN(DEFAULT);a3_lval.id="";return T_WHILE;} +switch {c();BEGIN(DEFAULT);a3_lval.id="";return T_SWITCH;} [&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);} [|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);} @@ -463,98 +615,88 @@ switch {c();avm2_lval.id="";return T_SWITCH;} [!][=][=] {c();BEGIN(REGEXPOK);return m(T_NEE);} [=][=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);} [=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQ);} -[>][=] {c();return m(T_GE);} -[<][=] {c();return m(T_LE);} -[-][-] {c();BEGIN(INITIAL);return m(T_MINUSMINUS);} -[+][+] {c();BEGIN(INITIAL);return m(T_PLUSPLUS);} -[+][=] {c();return m(T_PLUSBY);} -[-][=] {c();return m(T_MINUSBY);} -[/][=] {c();return m(T_DIVBY);} -[%][=] {c();return m(T_MODBY);} -[*][=] {c();return m(T_MULBY);} -[>][>][=] {c();return m(T_SHRBY);} -[<][<][=] {c();return m(T_SHLBY);} -[>][>][>][=] {c();return m(T_USHRBY);} -[<][<] {c();return m(T_SHL);} -[>][>][>] {c();return m(T_USHR);} -[>][>] {c();return m(T_SHR);} -\.\.\. {c();return m(T_DOTDOTDOT);} -\.\. {c();return m(T_DOTDOT);} -\. {c();return m('.');} -:: {c();return m(T_COLONCOLON);} -: {c();return m(':');} -instanceof {c();return m(KW_INSTANCEOF);} -implements {c();return m(KW_IMPLEMENTS);} -interface {c();return m(KW_INTERFACE);} -namespace {c();return m(KW_NAMESPACE);} -protected {c();return m(KW_PROTECTED);} -undefined {c();return m(KW_UNDEFINED);} -continue {c();return m(KW_CONTINUE);} -override {c();return m(KW_OVERRIDE);} -internal {c();return m(KW_INTERNAL);} -function {c();return m(KW_FUNCTION);} -default {c();return m(KW_DEFAULT);} -package {c();return m(KW_PACKAGE);} -private {c();return m(KW_PRIVATE);} -dynamic {c();return m(KW_DYNAMIC);} -extends {c();return m(KW_EXTENDS);} -delete {c();return m(KW_DELETE);} -return {c();return m(KW_RETURN);} -public {c();return m(KW_PUBLIC);} -native {c();return m(KW_NATIVE);} -static {c();return m(KW_STATIC);} -import {c();return m(KW_IMPORT);} -typeof {c();return m(KW_TYPEOF);} -throw {c();return m(KW_THROW);} -class {c();return m(KW_CLASS);} -const {c();return m(KW_CONST);} -catch {c();return m(KW_CATCH);} -final {c();return m(KW_FINAL);} -false {c();return m(KW_FALSE);} -break {c();return m(KW_BREAK);} -super {c();return m(KW_SUPER);} -each {c();return m(KW_EACH);} -void {c();return m(KW_VOID);} -true {c();return m(KW_TRUE);} -null {c();return m(KW_NULL);} -else {c();return m(KW_ELSE);} -case {c();return m(KW_CASE);} -with {c();return m(KW_WITH);} -use {c();return m(KW_USE);} -new {c();return m(KW_NEW);} -get {c();return m(KW_GET);} -set {c();return m(KW_SET);} -var {c();return m(KW_VAR);} -try {c();return m(KW_TRY);} -is {c();return m(KW_IS) ;} -in {c();return m(KW_IN) ;} -if {c();return m(KW_IF) ;} -as {c();return m(KW_AS);} -{NAME} {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);} - -[+-\/*^~@$!%&\(=\[\]\{\}|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} -[\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);} - -. {char c1=yytext[0]; - char buf[128]; - buf[0] = yytext[0]; - int t; - for(t=1;t<128;t++) { - char c = buf[t]=input(); - if(c=='\n' || c==EOF) { - buf[t] = 0; - break; - } - } - if(c1>='0' && c1<='9') - syntaxerror("syntax error: %s (identifiers must not start with a digit)"); - else - syntaxerror("syntax error: %s", buf); - printf("\n"); - exit(1); - yyterminate(); - } -<> {c(); +[>][=] {c();BEGIN(REGEXPOK);return m(T_GE);} +[<][=] {c();BEGIN(REGEXPOK);return m(T_LE);} +[-][-] {c();BEGIN(DEFAULT);return m(T_MINUSMINUS);} +[+][+] {c();BEGIN(DEFAULT);return m(T_PLUSPLUS);} +[+][=] {c();BEGIN(REGEXPOK);return m(T_PLUSBY);} +[\^][=] {c();BEGIN(REGEXPOK);return m(T_XORBY);} +[-][=] {c();BEGIN(REGEXPOK);return m(T_MINUSBY);} +[/][=] {c();BEGIN(REGEXPOK);return m(T_DIVBY);} +[%][=] {c();BEGIN(REGEXPOK);return m(T_MODBY);} +[*][=] {c();BEGIN(REGEXPOK);return m(T_MULBY);} +[|][=] {c();BEGIN(REGEXPOK);return m(T_ORBY);} +[&][=] {c();BEGIN(REGEXPOK);return m(T_ANDBY);} +[>][>][=] {c();BEGIN(REGEXPOK);return m(T_SHRBY);} +[<][<][=] {c();BEGIN(REGEXPOK);return m(T_SHLBY);} +[>][>][>][=] {c();BEGIN(REGEXPOK);return m(T_USHRBY);} +[<][<] {c();BEGIN(REGEXPOK);return m(T_SHL);} +[>][>][>] {c();BEGIN(REGEXPOK);return m(T_USHR);} +[>][>] {c();BEGIN(REGEXPOK);return m(T_SHR);} +\.\.\. {c();BEGIN(REGEXPOK);return m(T_DOTDOTDOT);} +\.\. {c();BEGIN(REGEXPOK);return m(T_DOTDOT);} +\. {c();BEGIN(REGEXPOK);return m('.');} +:: {c();BEGIN(REGEXPOK);return m(T_COLONCOLON);} +: {c();BEGIN(REGEXPOK);return m(':');} +instanceof {c();BEGIN(REGEXPOK);return m(KW_INSTANCEOF);} +implements {c();BEGIN(REGEXPOK);return m(KW_IMPLEMENTS);} +interface {c();BEGIN(DEFAULT);return m(KW_INTERFACE);} +protected {c();BEGIN(DEFAULT);return m(KW_PROTECTED);} +namespace {c();BEGIN(DEFAULT);return m(KW_NAMESPACE);} +undefined {c();BEGIN(DEFAULT);return m(KW_UNDEFINED);} +arguments {c();BEGIN(DEFAULT);return m(KW_ARGUMENTS);} +continue {c();BEGIN(DEFAULT);return m(KW_CONTINUE);} +override {c();BEGIN(DEFAULT);return m(KW_OVERRIDE);} +internal {c();BEGIN(DEFAULT);return m(KW_INTERNAL);} +function {c();BEGIN(DEFAULT);return m(KW_FUNCTION);} +finally {c();BEGIN(DEFAULT);return m(KW_FINALLY);} +default {c();BEGIN(DEFAULT);return m(KW_DEFAULT);} +package {c();BEGIN(DEFAULT);return m(KW_PACKAGE);} +private {c();BEGIN(DEFAULT);return m(KW_PRIVATE);} +dynamic {c();BEGIN(DEFAULT);return m(KW_DYNAMIC);} +extends {c();BEGIN(DEFAULT);return m(KW_EXTENDS);} +delete {c();BEGIN(REGEXPOK);return m(KW_DELETE);} +return {c();BEGIN(REGEXPOK);return m(KW_RETURN);} +public {c();BEGIN(DEFAULT);return m(KW_PUBLIC);} +native {c();BEGIN(DEFAULT);return m(KW_NATIVE);} +static {c();BEGIN(DEFAULT);return m(KW_STATIC);} +import {c();BEGIN(REGEXPOK);return m(KW_IMPORT);} +typeof {c();BEGIN(REGEXPOK);return m(KW_TYPEOF);} +throw {c();BEGIN(REGEXPOK);return m(KW_THROW);} +class {c();BEGIN(DEFAULT);return m(KW_CLASS);} +const {c();BEGIN(DEFAULT);return m(KW_CONST);} +catch {c();BEGIN(DEFAULT);return m(KW_CATCH);} +final {c();BEGIN(DEFAULT);return m(KW_FINAL);} +false {c();BEGIN(DEFAULT);return m(KW_FALSE);} +break {c();BEGIN(DEFAULT);return m(KW_BREAK);} +super {c();BEGIN(DEFAULT);return m(KW_SUPER);} +each {c();BEGIN(DEFAULT);return m(KW_EACH);} +void {c();BEGIN(DEFAULT);return m(KW_VOID);} +true {c();BEGIN(DEFAULT);return m(KW_TRUE);} +null {c();BEGIN(DEFAULT);return m(KW_NULL);} +else {c();BEGIN(DEFAULT);return m(KW_ELSE);} +case {c();BEGIN(REGEXPOK);return m(KW_CASE);} +with {c();BEGIN(REGEXPOK);return m(KW_WITH);} +use {c();BEGIN(REGEXPOK);return m(KW_USE);} +new {c();BEGIN(REGEXPOK);return m(KW_NEW);} +get {c();BEGIN(DEFAULT);return m(KW_GET);} +set {c();BEGIN(DEFAULT);return m(KW_SET);} +var {c();BEGIN(DEFAULT);return m(KW_VAR);} +try {c();BEGIN(DEFAULT);return m(KW_TRY);} +is {c();BEGIN(REGEXPOK);return m(KW_IS) ;} +in {c();BEGIN(REGEXPOK);return m(KW_IN) ;} +if {c();BEGIN(DEFAULT);return m(KW_IF) ;} +as {c();BEGIN(REGEXPOK);return m(KW_AS);} +$?{NAME} {c();BEGIN(DEFAULT);return handleIdentifier();} + +[\]\}*] {c();BEGIN(DEFAULT);return m(yytext[0]);} +[+-\/^~@$!%&\(=\[|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);} +[\)\]] {c();BEGIN(DEFAULT);return m(yytext[0]);} + +{ +. {tokenerror();} +} +<> {l(); void*b = leave_file(); if (!b) { yyterminate(); @@ -573,15 +715,52 @@ int yywrap() return 1; } +static int tokenerror() +{ + char c1=yytext[0]; + char buf[128]; + buf[0] = yytext[0]; + int t; + for(t=1;t<128;t++) { + char c = buf[t]=input(); + if(c=='\n' || c==EOF) { + buf[t] = 0; + break; + } + } + if(c1>='0' && c1<='9') + syntaxerror("syntax error: %s (identifiers must not start with a digit)"); + else + syntaxerror("syntax error [%d]: %s", (yy_start-1)/2, buf); + printf("\n"); + exit(1); + yyterminate(); +} + + static char mbuf[256]; char*token2string(enum yytokentype nr, YYSTYPE v) { - if(nr==T_STRING) return ""; + if(nr==T_STRING) { + char*s = malloc(v.str.len+10); + strcpy(s, ""); + memcpy(s+8, v.str.str, v.str.len); + sprintf(s+8+v.str.len, " (%d bytes)", v.str.len); + return s; + } + else if(nr==T_REGEXP) { + char*s = malloc(strlen(v.regexp.pattern)+10); + sprintf(s, "%s", v.regexp.pattern); + return s; + } + else if(nr==T_IDENTIFIER) { + char*s = malloc(strlen(v.id)+10); + sprintf(s, "%s", v.id); + return s; + } else if(nr==T_INT) return ""; else if(nr==T_UINT) return ""; - else if(nr==T_BYTE) return ""; else if(nr==T_FLOAT) return ""; - else if(nr==T_REGEXP) return "REGEXP"; else if(nr==T_EOF) return "***END***"; else if(nr==T_GE) return ">="; else if(nr==T_LE) return "<="; @@ -620,15 +799,36 @@ char*token2string(enum yytokentype nr, YYSTYPE v) else if(nr==KW_VAR) return "var"; else if(nr==KW_IS) return "is"; else if(nr==KW_AS) return "as"; - else if(nr==T_IDENTIFIER) return "ID"; else { sprintf(mbuf, "%d", nr); return mbuf; } } +void tokenizer_begin_xml() +{ + dbg("begin reading xml"); + BEGIN(XML); +} +void tokenizer_begin_xmltext() +{ + dbg("begin reading xml text"); + BEGIN(XMLTEXT); +} +void tokenizer_end_xmltext() +{ + dbg("end reading xml text"); + BEGIN(XML); +} +void tokenizer_end_xml() +{ + dbg("end reading xml"); + BEGIN(DEFAULT); +} + void initialize_scanner() { BEGIN(BEGINNING); } +