switched more integer overflows from error reporting to float casting
[swftools.git] / lib / as3 / tokenizer.lex
1 /* tokenizer.lex
2
3    Routines for compiling Flash2 AVM2 ABC Actionscript
4
5    Extension module for the rfxswf library.
6    Part of the swftools package.
7
8    Copyright (c) 2008 Matthias Kramm <kramm@quiss.org>
9  
10    This program is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2 of the License, or
13    (at your option) any later version.
14
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
23 %{
24
25
26 #include <string.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include "../utf8.h"
31 #include "tokenizer.h"
32 #include "files.h"
33
34 static void countlines(char*text, int len) {
35     int t;
36     for(t=0;t<len;t++) {
37         if(text[t]=='\n') {
38             current_line++;
39             current_column=0;
40         } else {
41             current_column++;
42         }
43     }
44 }
45
46 static int verbose = 1;
47 static void dbg(const char*format, ...)
48 {
49     char buf[1024];
50     int l;
51     va_list arglist;
52     if(!verbose)
53         return;
54     va_start(arglist, format);
55     vsprintf(buf, format, arglist);
56     va_end(arglist);
57     l = strlen(buf);
58     while(l && buf[l-1]=='\n') {
59         buf[l-1] = 0;
60         l--;
61     }
62     printf("(tokenizer) ");
63     printf("%s\n", buf);
64     fflush(stdout);
65 }
66
67 void syntaxerror(const char*format, ...)
68 {
69     char buf[1024];
70     int l;
71     va_list arglist;
72     if(!verbose)
73         return;
74     va_start(arglist, format);
75     vsprintf(buf, format, arglist);
76     va_end(arglist);
77     fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename_short, current_line, current_column, buf);
78     fflush(stderr);
79     exit(1);
80 }
81 void warning(const char*format, ...)
82 {
83     char buf[1024];
84     int l;
85     va_list arglist;
86     if(!verbose)
87         return;
88     va_start(arglist, format);
89     vsprintf(buf, format, arglist);
90     va_end(arglist);
91     fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf);
92     fflush(stderr);
93 }
94
95
96 #ifndef YY_CURRENT_BUFFER
97 #define YY_CURRENT_BUFFER yy_current_buffer
98 #endif
99
100 void handleInclude(char*text, int len, char quotes)
101 {
102     char*filename = 0;
103     if(quotes) {
104         char*p1 = strchr(text, '"');
105         char*p2 = strrchr(text, '"');
106         if(!p1 || !p2 || p1==p2) {
107             syntaxerror("Invalid include in line %d\n", current_line);
108         }
109         *p2 = 0;
110         filename = strdup(p1+1);
111     } else {
112         int i1=0,i2=len;
113         // find start
114         while(!strchr(" \n\r\t", text[i1])) i1++;
115         // strip
116         while(strchr(" \n\r\t", text[i1])) i1++;
117         while(strchr(" \n\r\t", text[i2-1])) i2--;
118         if(i2!=len) text[i2]=0;
119         filename = strdup(&text[i1]);
120     }
121     
122     char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
123     yyin = fopen(fullfilename, "rb");
124     if (!yyin) {
125         syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
126     }
127
128     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
129     //BEGIN(INITIAL); keep context
130 }
131
132 static int do_unescape(const char*s, const char*end, char*n) 
133 {
134     char*o = n;
135     int len=0;
136     while(s<end) {
137         if(*s!='\\') {
138             if(o) o[len] = *s;len++;
139             s++;
140             continue;
141         }
142         s++; //skip past '\'
143         if(s==end) syntaxerror("invalid \\ at end of string");
144
145         /* handle the various line endings (mac, dos, unix) */
146         if(*s=='\r') { 
147             s++; 
148             if(s==end) break;
149             if(*s=='\n') 
150                 s++;
151             continue;
152         }
153         if(*s=='\n')  {
154             s++;
155             continue;
156         }
157         switch(*s) {
158             case '\\': if(o) o[len] = '\\';s++;len++; break;
159             case '"': if(o) o[len] = '"';s++;len++; break;
160             case 'b': if(o) o[len] = '\b';s++;len++; break;
161             case 'f': if(o) o[len] = '\f';s++;len++; break;
162             case 'n': if(o) o[len] = '\n';s++;len++; break;
163             case 'r': if(o) o[len] = '\r';s++;len++; break;
164             case 't': if(o) o[len] = '\t';s++;len++; break;
165             case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
166                 unsigned int num=0;
167                 int nr = 0;
168                 while(strchr("01234567", *s) && nr<3 && s<end) {
169                     num <<= 3;
170                     num |= *s-'0';
171                     nr++;
172                     s++;
173                 }
174                 if(num>256) 
175                     syntaxerror("octal number out of range (0-255): %d", num);
176                 if(o) o[len] = num;len++;
177                 continue;
178             }
179             case 'x': case 'u': {
180                 int max=2;
181                 char bracket = 0;
182                 char unicode = 0;
183                 if(*s == 'u') {
184                     max = 6;
185                     unicode = 1;
186                 }
187                 s++;
188                 if(s==end) syntaxerror("invalid \\u or \\x at end of string");
189                 if(*s == '{')  {
190                     s++;
191                     if(s==end) syntaxerror("invalid \\u{ at end of string");
192                     bracket=1;
193                 }
194                 unsigned int num=0;
195                 int nr = 0;
196                 while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s<end) {
197                     num <<= 4;
198                     if(*s>='0' && *s<='9') num |= *s - '0';
199                     if(*s>='a' && *s<='f') num |= *s - 'a' + 10;
200                     if(*s>='A' && *s<='F') num |= *s - 'A' + 10;
201                     nr++;
202                     s++;
203                 }
204                 if(bracket) {
205                     if(*s=='}' && s<end) {
206                         s++;
207                     } else {
208                         syntaxerror("missing terminating '}'");
209                     }
210                 }
211                 if(unicode) {
212                     char*utf8 = getUTF8(num);
213                     while(*utf8) {
214                         if(o) o[len] = *utf8;utf8++;len++;
215                     }
216                 } else {
217                     if(num>256) 
218                         syntaxerror("byte out of range (0-255): %d", num);
219                     if(o) o[len] = num;len++;
220                 }
221                 break;
222             }
223             default:
224                 syntaxerror("unknown escape sequence: \"\\%c\"", *s);
225         }
226     }
227     if(o) o[len]=0;
228     return len;
229 }
230
231 static string_t string_unescape(const char*in, int l)
232 {
233     const char*s = in;
234     const char*end = &in[l];
235
236     int len = do_unescape(s, end, 0);
237     char*n = (char*)malloc(len+1);
238     do_unescape(s, end, n);
239     string_t out = string_new(n, len);
240     return out; 
241 }
242
243 static void handleString(char*s, int len)
244 {
245     if(s[0]=='"') {
246         if(s[len-1]!='"') syntaxerror("String doesn't end with '\"'");
247         s++;len-=2;
248     }
249     else if(s[0]=='\'') {
250         if(s[len-1]!='\'') syntaxerror("String doesn't end with '\"'");
251         s++;len-=2;
252     }
253     else syntaxerror("String incorrectly terminated");
254
255     
256     avm2_lval.str = string_unescape(s, len);
257 }
258
259
260 char start_of_expression;
261
262 static inline int mkid(int type)
263 {
264     char*s = malloc(yyleng+1);
265     memcpy(s, yytext, yyleng);
266     s[yyleng]=0;
267     avm2_lval.id = s;
268     return type;
269 }
270
271 static inline int m(int type)
272 {
273     avm2_lval.token = type;
274     return type;
275 }
276
277
278 static char numberbuf[64];
279 static char*nrbuf()
280 {
281     if(yyleng>sizeof(numberbuf)-1)
282         syntaxerror("decimal number overflow");
283     char*s = numberbuf;
284     memcpy(s, yytext, yyleng);
285     s[yyleng]=0;
286     return s;
287 }
288
289 static inline int setint(int v)
290 {
291     avm2_lval.number_int = v;
292     if(v>-128)
293         return T_BYTE;
294     else if(v>=-32768)
295         return T_SHORT;
296     else
297         return T_INT;
298 }
299 static inline int setuint(unsigned int v)
300 {
301     avm2_lval.number_uint = v;
302     if(v<128)
303         return T_BYTE;
304     else if(v<32768)
305         return T_SHORT;
306     else
307         return T_UINT;
308 }
309 static inline int setfloat(double v)
310 {
311     avm2_lval.number_float = v;
312     return T_FLOAT;
313 }
314
315 static inline int handlefloat()
316 {
317     char*s = nrbuf();
318     avm2_lval.number_float = atof(s);
319     return T_FLOAT;
320 }
321
322 static inline int handleint()
323 {
324     char*s = nrbuf();
325     char l = (yytext[0]=='-');
326
327     char*max = l?"1073741824":"2147483647";
328     if(yyleng-l>10) {
329         warning("integer overflow: %s", s);
330         return handlefloat();
331     }
332     if(yyleng-l==10) {
333         int t;
334         for(t=0;t<yyleng-l;t++) {
335             if(yytext[l+t]>max[t]) {
336                 warning("integer overflow: %s", s);
337                 return handlefloat();
338             }
339             else if(yytext[l+t]<max[t])
340                 break;
341         }
342     }
343     if(yytext[0]=='-') {
344         int v = atoi(s);
345         return setint(v);
346     } else {
347         unsigned int v = 0;
348         int t;
349         for(t=0;t<yyleng;t++) {
350             v*=10;
351             v+=yytext[t]-'0';
352         }
353         return setuint(v);
354     }
355 }
356
357 static inline int handlehex()
358 {
359     char l = (yytext[0]=='-')+2;
360
361     if(yyleng-l>8) {
362         char*s = nrbuf();
363         syntaxerror("integer overflow %s", s);
364     }
365
366     int t;
367     unsigned int v = 0;
368     for(t=l;t<yyleng;t++) {
369         v<<=4;
370         char c = yytext[t];
371         if(c>='0' && c<='9')
372             v|=(c&15);
373         else if(c>='a' && c<='f' ||
374                 c>='A' && c<='F')
375             v|=(c&0x0f)+9;
376     }
377     if(l && v>1073741824) {
378         char*s = nrbuf();
379         warning("signed integer overflow: %s", s);
380         return setfloat(v);
381     }
382     if(!l && v>2147483647) {
383         char*s = nrbuf();
384         warning("unsigned integer overflow: %s", s);
385         return setfloat(v);
386     }
387
388     if(l==3) {
389         return setint(-(int)v);
390     } else {
391         return setuint(v);
392     }
393 }
394
395 void handleLabel(char*text, int len)
396 {
397     int t;
398     for(t=len-1;t>=0;--t) {
399         if(text[t]!=' ' &&
400            text[t]!='.')
401             break;
402     }
403     char*s = malloc(t+1);
404     memcpy(s, yytext, t);
405     s[t]=0;
406     avm2_lval.id = s;
407 }
408
409 void initialize_scanner();
410 #define YY_USER_INIT initialize_scanner();
411
412 #define c() {countlines(yytext, yyleng);}
413
414 //Boolean                      {c();return m(KW_BOOLEAN);}
415 //int                          {c();return m(KW_INT);}
416 //uint                         {c();return m(KW_UINT);}
417 //Number                       {c();return m(KW_NUMBER);}
418
419
420 %}
421
422 %s REGEXPOK
423 %s BEGINNING
424
425 NAME     [a-zA-Z_][a-zA-Z0-9_\\]*
426 _        [^a-zA-Z0-9_\\]
427
428 HEXINT    0x[a-zA-Z0-9]+
429 INT       [0-9]+
430 FLOAT     [0-9]+(\.[0-9]*)?|\.[0-9]+
431
432 HEXWITHSIGN [+-]?({HEXINT})
433 INTWITHSIGN [+-]?({INT})
434 FLOATWITHSIGN [+-]?({FLOAT})
435
436 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
437 S        [ \n\r\t]
438 MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/]
439 SINGLELINE_COMMENT \/\/[^\n]*\n
440 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
441 %%
442
443
444 {SINGLELINE_COMMENT}         {c(); /* single line comment */}
445 {MULTILINE_COMMENT}          {c(); /* multi line comment */}
446 [/][*]                       {syntaxerror("syntax error: unterminated comment", yytext);}
447
448 ^include{S}+{STRING}{S}*/\n    {c();handleInclude(yytext, yyleng, 1);}
449 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n    {c();handleInclude(yytext, yyleng, 0);}
450 {STRING}                     {c(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;}
451
452 <BEGINNING,REGEXPOK>{
453 {REGEXP}                     {c(); BEGIN(INITIAL);return m(T_REGEXP);} 
454 {HEXWITHSIGN}                {c(); BEGIN(INITIAL);return handlehex();}
455 {INTWITHSIGN}                {c(); BEGIN(INITIAL);return handleint();}
456 {FLOATWITHSIGN}              {c(); BEGIN(INITIAL);return handlefloat();}
457 }
458
459 \xef\xbb\xbf                 {/* utf 8 bom */}
460 {S}                          {c();}
461
462 {HEXINT}                     {c(); BEGIN(INITIAL);return handlehex();}
463 {INT}                        {c(); BEGIN(INITIAL);return handleint();}
464 {FLOAT}                      {c(); BEGIN(INITIAL);return handlefloat();}
465
466 3rr0r                        {/* for debugging: generates a tokenizer-level error */
467                               syntaxerror("3rr0r");}
468
469 {NAME}{S}*:{S}*for/{_}        {c();handleLabel(yytext, yyleng-3);return T_FOR;}
470 {NAME}{S}*:{S}*do/{_}         {c();handleLabel(yytext, yyleng-2);return T_DO;}
471 {NAME}{S}*:{S}*while/{_}      {c();handleLabel(yytext, yyleng-5);return T_WHILE;}
472 {NAME}{S}*:{S}*switch/{_}     {c();handleLabel(yytext, yyleng-6);return T_SWITCH;}
473 for                          {c();avm2_lval.id="";return T_FOR;}
474 do                           {c();avm2_lval.id="";return T_DO;}
475 while                        {c();avm2_lval.id="";return T_WHILE;}
476 switch                       {c();avm2_lval.id="";return T_SWITCH;}
477
478 [&][&]                       {c();BEGIN(REGEXPOK);return m(T_ANDAND);}
479 [|][|]                       {c();BEGIN(REGEXPOK);return m(T_OROR);}
480 [!][=]                       {c();BEGIN(REGEXPOK);return m(T_NE);}
481 [!][=][=]                    {c();BEGIN(REGEXPOK);return m(T_NEE);}
482 [=][=][=]                    {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);}
483 [=][=]                       {c();BEGIN(REGEXPOK);return m(T_EQEQ);}
484 [>][=]                       {c();return m(T_GE);}
485 [<][=]                       {c();return m(T_LE);}
486 [-][-]                       {c();BEGIN(INITIAL);return m(T_MINUSMINUS);}
487 [+][+]                       {c();BEGIN(INITIAL);return m(T_PLUSPLUS);}
488 [+][=]                       {c();return m(T_PLUSBY);}
489 [-][=]                       {c();return m(T_MINUSBY);}
490 [/][=]                       {c();return m(T_DIVBY);}
491 [%][=]                       {c();return m(T_MODBY);}
492 [*][=]                       {c();return m(T_MULBY);}
493 [|][=]                       {c();return m(T_ORBY);}
494 [>][>][=]                    {c();return m(T_SHRBY);}
495 [<][<][=]                    {c();return m(T_SHLBY);}
496 [>][>][>][=]                 {c();return m(T_USHRBY);}
497 [<][<]                       {c();return m(T_SHL);}
498 [>][>][>]                    {c();return m(T_USHR);}
499 [>][>]                       {c();return m(T_SHR);}
500 \.\.\.                       {c();return m(T_DOTDOTDOT);}
501 \.\.                         {c();return m(T_DOTDOT);}
502 \.                           {c();return m('.');}
503 ::                           {c();return m(T_COLONCOLON);}
504 :                            {c();return m(':');}
505 instanceof                   {c();return m(KW_INSTANCEOF);}
506 implements                   {c();return m(KW_IMPLEMENTS);}
507 interface                    {c();return m(KW_INTERFACE);}
508 namespace                    {c();return m(KW_NAMESPACE);}
509 protected                    {c();return m(KW_PROTECTED);}
510 undefined                    {c();return m(KW_UNDEFINED);}
511 continue                     {c();return m(KW_CONTINUE);}
512 override                     {c();return m(KW_OVERRIDE);}
513 internal                     {c();return m(KW_INTERNAL);}
514 function                     {c();return m(KW_FUNCTION);}
515 default                      {c();return m(KW_DEFAULT);}
516 package                      {c();return m(KW_PACKAGE);}
517 private                      {c();return m(KW_PRIVATE);}
518 dynamic                      {c();return m(KW_DYNAMIC);}
519 extends                      {c();return m(KW_EXTENDS);}
520 delete                       {c();return m(KW_DELETE);}
521 return                       {c();return m(KW_RETURN);}
522 public                       {c();return m(KW_PUBLIC);}
523 native                       {c();return m(KW_NATIVE);}
524 static                       {c();return m(KW_STATIC);}
525 import                       {c();return m(KW_IMPORT);}
526 typeof                       {c();return m(KW_TYPEOF);}
527 throw                        {c();return m(KW_THROW);}
528 class                        {c();return m(KW_CLASS);}
529 const                        {c();return m(KW_CONST);}
530 catch                        {c();return m(KW_CATCH);}
531 final                        {c();return m(KW_FINAL);}
532 false                        {c();return m(KW_FALSE);}
533 break                        {c();return m(KW_BREAK);}
534 super                        {c();return m(KW_SUPER);}
535 each                         {c();return m(KW_EACH);}
536 void                         {c();return m(KW_VOID);}
537 true                         {c();return m(KW_TRUE);}
538 null                         {c();return m(KW_NULL);}
539 else                         {c();return m(KW_ELSE);}
540 case                         {c();return m(KW_CASE);}
541 with                         {c();return m(KW_WITH);}
542 use                          {c();return m(KW_USE);}
543 new                          {c();return m(KW_NEW);}
544 get                          {c();return m(KW_GET);}
545 set                          {c();return m(KW_SET);}
546 var                          {c();return m(KW_VAR);}
547 try                          {c();return m(KW_TRY);}
548 is                           {c();return m(KW_IS) ;}
549 in                           {c();return m(KW_IN) ;}
550 if                           {c();return m(KW_IF) ;}
551 as                           {c();return m(KW_AS);}
552 {NAME}                       {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);}
553
554 [+-\/*^~@$!%&\(=\[\]\{\}|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
555 [\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
556
557 .                            {char c1=yytext[0];
558                               char buf[128];
559                               buf[0] = yytext[0];
560                               int t;
561                               for(t=1;t<128;t++) {
562                                   char c = buf[t]=input();
563                                   if(c=='\n' || c==EOF)  {
564                                       buf[t] = 0;
565                                       break;
566                                   }
567                               }
568                               if(c1>='0' && c1<='9')
569                                   syntaxerror("syntax error: %s (identifiers must not start with a digit)");
570                               else
571                                   syntaxerror("syntax error: %s", buf);
572                               printf("\n");
573                               exit(1);
574                               yyterminate();
575                              }
576 <<EOF>>                      {c();
577                               void*b = leave_file();
578                               if (!b) {
579                                  yyterminate();
580                                  yy_delete_buffer(YY_CURRENT_BUFFER);
581                                  return m(T_EOF);
582                               } else {
583                                   yy_delete_buffer(YY_CURRENT_BUFFER);
584                                   yy_switch_to_buffer(b);
585                               }
586                              }
587
588 %%
589
590 int yywrap()
591 {
592     return 1;
593 }
594
595 static char mbuf[256];
596 char*token2string(enum yytokentype nr, YYSTYPE v)
597 {
598     if(nr==T_STRING)     return "<string>";
599     else if(nr==T_INT)     return "<int>";
600     else if(nr==T_UINT)     return "<uint>";
601     else if(nr==T_BYTE)     return "<byte>";
602     else if(nr==T_FLOAT)     return "<float>";
603     else if(nr==T_REGEXP)     return "REGEXP";
604     else if(nr==T_EOF)        return "***END***";
605     else if(nr==T_GE)         return ">=";
606     else if(nr==T_LE)         return "<=";
607     else if(nr==T_MINUSMINUS) return "--";
608     else if(nr==T_PLUSPLUS)   return "++";
609     else if(nr==KW_IMPLEMENTS) return "implements";
610     else if(nr==KW_INTERFACE)  return "interface";
611     else if(nr==KW_NAMESPACE)  return "namespace";
612     else if(nr==KW_PROTECTED)  return "protected";
613     else if(nr==KW_OVERRIDE)   return "override";
614     else if(nr==KW_INTERNAL)   return "internal";
615     else if(nr==KW_FUNCTION)   return "function";
616     else if(nr==KW_PACKAGE)    return "package";
617     else if(nr==KW_PRIVATE)    return "private";
618     else if(nr==KW_BOOLEAN)    return "Boolean";
619     else if(nr==KW_DYNAMIC)    return "dynamic";
620     else if(nr==KW_EXTENDS)    return "extends";
621     else if(nr==KW_PUBLIC)     return "public";
622     else if(nr==KW_NATIVE)     return "native";
623     else if(nr==KW_STATIC)     return "static";
624     else if(nr==KW_IMPORT)     return "import";
625     else if(nr==KW_NUMBER)     return "number";
626     else if(nr==KW_CLASS)      return "class";
627     else if(nr==KW_CONST)      return "const";
628     else if(nr==KW_FINAL)      return "final";
629     else if(nr==KW_FALSE)      return "False";
630     else if(nr==KW_TRUE)       return "True";
631     else if(nr==KW_UINT)       return "uint";
632     else if(nr==KW_NULL)       return "null";
633     else if(nr==KW_ELSE)       return "else";
634     else if(nr==KW_USE)        return "use";
635     else if(nr==KW_INT)        return "int";
636     else if(nr==KW_NEW)        return "new";
637     else if(nr==KW_GET)        return "get";
638     else if(nr==KW_SET)        return "set";
639     else if(nr==KW_VAR)        return "var";
640     else if(nr==KW_IS)         return "is";
641     else if(nr==KW_AS)         return "as";
642     else if(nr==T_IDENTIFIER)  return "ID";
643     else {
644         sprintf(mbuf, "%d", nr);
645         return mbuf;
646     }
647 }
648
649 void initialize_scanner()
650 {
651     BEGIN(BEGINNING);
652 }
653