fixed bug in for/do/while/switch
[swftools.git] / lib / as3 / tokenizer.lex
1 /* tokenizer.lex
2
3    Routines for compiling Flash2 AVM2 ABC Actionscript
4
5    Extension module for the rfxswf library.
6    Part of the swftools package.
7
8    Copyright (c) 2008 Matthias Kramm <kramm@quiss.org>
9  
10    This program is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2 of the License, or
13    (at your option) any later version.
14
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
23 %{
24
25
26 #include <string.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include "../utf8.h"
31 #include "tokenizer.h"
32 #include "files.h"
33
34 static void countlines(char*text, int len) {
35     int t;
36     for(t=0;t<len;t++) {
37         if(text[t]=='\n') {
38             current_line++;
39             current_column=0;
40         } else {
41             current_column++;
42         }
43     }
44 }
45
46 static int verbose = 1;
47 static void dbg(const char*format, ...)
48 {
49     char buf[1024];
50     int l;
51     va_list arglist;
52     if(!verbose)
53         return;
54     va_start(arglist, format);
55     vsprintf(buf, format, arglist);
56     va_end(arglist);
57     l = strlen(buf);
58     while(l && buf[l-1]=='\n') {
59         buf[l-1] = 0;
60         l--;
61     }
62     printf("(tokenizer) ");
63     printf("%s\n", buf);
64     fflush(stdout);
65 }
66
67 void syntaxerror(const char*format, ...)
68 {
69     char buf[1024];
70     int l;
71     va_list arglist;
72     if(!verbose)
73         return;
74     va_start(arglist, format);
75     vsprintf(buf, format, arglist);
76     va_end(arglist);
77     fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename_short, current_line, current_column, buf);
78     fflush(stderr);
79     exit(1);
80 }
81 void warning(const char*format, ...)
82 {
83     char buf[1024];
84     int l;
85     va_list arglist;
86     if(!verbose)
87         return;
88     va_start(arglist, format);
89     vsprintf(buf, format, arglist);
90     va_end(arglist);
91     fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf);
92     fflush(stderr);
93 }
94
95
96 #ifndef YY_CURRENT_BUFFER
97 #define YY_CURRENT_BUFFER yy_current_buffer
98 #endif
99
100 void handleInclude(char*text, int len, char quotes)
101 {
102     char*filename = 0;
103     if(quotes) {
104         char*p1 = strchr(text, '"');
105         char*p2 = strrchr(text, '"');
106         if(!p1 || !p2 || p1==p2) {
107             syntaxerror("Invalid include in line %d\n", current_line);
108         }
109         *p2 = 0;
110         filename = strdup(p1+1);
111     } else {
112         int i1=0,i2=len;
113         // find start
114         while(!strchr(" \n\r\t", text[i1])) i1++;
115         // strip
116         while(strchr(" \n\r\t", text[i1])) i1++;
117         while(strchr(" \n\r\t", text[i2-1])) i2--;
118         if(i2!=len) text[i2]=0;
119         filename = strdup(&text[i1]);
120     }
121     
122     char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
123     yyin = fopen(fullfilename, "rb");
124     if (!yyin) {
125         syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
126     }
127
128     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
129     //BEGIN(INITIAL); keep context
130 }
131
132 string_t string_unescape(const char*in, int l)
133 {
134     int len=0;
135     const char*s = in;
136     const char*end = &in[l];
137     char*n = (char*)malloc(l);
138     char*o = n;
139     while(s<end) {
140         if(*s!='\\') {
141             o[len++] = *s;
142             s++;
143             continue;
144         }
145         s++; //skip past '\'
146         if(s==end) syntaxerror("invalid \\ at end of string");
147
148         /* handle the various line endings (mac, dos, unix) */
149         if(*s=='\r') { 
150             s++; 
151             if(s==end) break;
152             if(*s=='\n') 
153                 s++;
154             continue;
155         }
156         if(*s=='\n')  {
157             s++;
158             continue;
159         }
160         switch(*s) {
161             case '\\': o[len++] = '\\';s++; break;
162             case '"': o[len++] = '"';s++; break;
163             case 'b': o[len++] = '\b';s++; break;
164             case 'f': o[len++] = '\f';s++; break;
165             case 'n': o[len++] = '\n';s++; break;
166             case 'r': o[len++] = '\r';s++; break;
167             case 't': o[len++] = '\t';s++; break;
168             case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
169                 unsigned int num=0;
170                 int nr = 0;
171                 while(strchr("01234567", *s) && nr<3 && s<end) {
172                     num <<= 3;
173                     num |= *s-'0';
174                     nr++;
175                     s++;
176                 }
177                 if(num>256) 
178                     syntaxerror("octal number out of range (0-255): %d", num);
179                 o[len++] = num;
180                 continue;
181             }
182             case 'x': case 'u': {
183                 int max=2;
184                 char bracket = 0;
185                 char unicode = 0;
186                 if(*s == 'u') {
187                     max = 6;
188                     unicode = 1;
189                 }
190                 s++;
191                 if(s==end) syntaxerror("invalid \\u or \\x at end of string");
192                 if(*s == '{')  {
193                     s++;
194                     if(s==end) syntaxerror("invalid \\u{ at end of string");
195                     bracket=1;
196                 }
197                 unsigned int num=0;
198                 int nr = 0;
199                 while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s<end) {
200                     num <<= 4;
201                     if(*s>='0' && *s<='9') num |= *s - '0';
202                     if(*s>='a' && *s<='f') num |= *s - 'a' + 10;
203                     if(*s>='A' && *s<='F') num |= *s - 'A' + 10;
204                     nr++;
205                     s++;
206                 }
207                 if(bracket) {
208                     if(*s=='}' && s<end) {
209                         s++;
210                     } else {
211                         syntaxerror("missing terminating '}'");
212                     }
213                 }
214                 if(unicode) {
215                     char*utf8 = getUTF8(num);
216                     while(*utf8) {
217                         o[len++] = *utf8++;
218                     }
219                 } else {
220                     if(num>256) 
221                         syntaxerror("byte out of range (0-255): %d", num);
222                     o[len++] = num;
223                 }
224                 break;
225             }
226             default:
227                 syntaxerror("unknown escape sequence: \"\\%c\"", *s);
228         }
229     }
230     string_t out = string_new(n, len);
231     o[len]=0;
232     return out; 
233 }
234
235 static void handleString(char*s, int len)
236 {
237     if(s[0]=='"') {
238         if(s[len-1]!='"') syntaxerror("String doesn't end with '\"'");
239         s++;len-=2;
240     }
241     else if(s[0]=='\'') {
242         if(s[len-1]!='\'') syntaxerror("String doesn't end with '\"'");
243         s++;len-=2;
244     }
245     else syntaxerror("String incorrectly terminated");
246
247     
248     avm2_lval.str = string_unescape(s, len);
249 }
250
251
252 char start_of_expression;
253
254 static inline int mkid(int type)
255 {
256     char*s = malloc(yyleng+1);
257     memcpy(s, yytext, yyleng);
258     s[yyleng]=0;
259     avm2_lval.id = s;
260     return type;
261 }
262
263 static inline int m(int type)
264 {
265     avm2_lval.token = type;
266     return type;
267 }
268
269
270 static char numberbuf[64];
271 static char*nrbuf()
272 {
273     if(yyleng>sizeof(numberbuf)-1)
274         syntaxerror("decimal number overflow");
275     char*s = numberbuf;
276     memcpy(s, yytext, yyleng);
277     s[yyleng]=0;
278     return s;
279 }
280
281 static inline int setint(int v)
282 {
283     avm2_lval.number_int = v;
284     if(v>-128)
285         return T_BYTE;
286     else if(v>=-32768)
287         return T_SHORT;
288     else
289         return T_INT;
290 }
291 static inline int setuint(unsigned int v)
292 {
293     avm2_lval.number_uint = v;
294     if(v<128)
295         return T_BYTE;
296     else if(v<32768)
297         return T_SHORT;
298     else
299         return T_UINT;
300 }
301
302 static inline int handlefloat()
303 {
304     char*s = nrbuf();
305     avm2_lval.number_float = atof(s);
306     return T_FLOAT;
307 }
308
309 static inline int handleint()
310 {
311     char*s = nrbuf();
312     char l = (yytext[0]=='-');
313
314     char*max = l?"1073741824":"2147483647";
315     if(yyleng-l>10)
316         syntaxerror("integer overflow");
317     if(yyleng-l==10) {
318         int t;
319         for(t=0;t<yyleng-l;t++) {
320             if(yytext[l+t]>max[t])
321                 syntaxerror("integer overflow %s > %s", s+l,max);
322             else if(yytext[l+t]<max[t])
323                 break;
324         }
325     }
326     if(yytext[0]=='-') {
327         int v = atoi(s);
328         return setint(v);
329     } else {
330         unsigned int v = 0;
331         int t;
332         for(t=0;t<yyleng;t++) {
333             v*=10;
334             v+=yytext[t]-'0';
335         }
336         return setuint(v);
337     }
338 }
339
340 static inline int handlehex()
341 {
342     char l = (yytext[0]=='-')+2;
343
344     if(yyleng-l>8)
345         syntaxerror("integer overflow");
346     int t;
347     unsigned int v = 0;
348     for(t=l;t<yyleng;t++) {
349         v<<=4;
350         char c = yytext[t];
351         if(c>='0' && c<='9')
352             v|=(c&15);
353         else if(c>='a' && c<='f' ||
354                 c>='A' && c<='F')
355             v|=(c&0x0f)+9;
356     }
357     if(l && v>1073741824)
358         syntaxerror("signed integer overflow");
359     if(!l && v>2147483647)
360         syntaxerror("unsigned integer overflow");
361
362     if(l==3) {
363         return setint(-(int)v);
364     } else {
365         return setuint(v);
366     }
367 }
368
369 void handleLabel(char*text, int len)
370 {
371     int t;
372     for(t=len-1;t>=0;--t) {
373         if(text[t]!=' ' &&
374            text[t]!='.')
375             break;
376     }
377     char*s = malloc(t+1);
378     memcpy(s, yytext, t);
379     s[t]=0;
380     avm2_lval.id = s;
381 }
382
383 void initialize_scanner();
384 #define YY_USER_INIT initialize_scanner();
385
386 #define c() {countlines(yytext, yyleng);}
387
388 //Boolean                      {c();return m(KW_BOOLEAN);}
389 //int                          {c();return m(KW_INT);}
390 //uint                         {c();return m(KW_UINT);}
391 //Number                       {c();return m(KW_NUMBER);}
392
393
394 %}
395
396 %s REGEXPOK
397 %s BEGINNING
398
399 NAME     [a-zA-Z_][a-zA-Z0-9_\\]*
400 _        [^a-zA-Z0-9_\\]
401
402 HEXINT    0x[a-zA-Z0-9]+
403 INT       [0-9]+
404 FLOAT     [0-9]+(\.[0-9]*)?|\.[0-9]+
405
406 HEXWITHSIGN [+-]?({HEXINT})
407 INTWITHSIGN [+-]?({INT})
408 FLOATWITHSIGN [+-]?({FLOAT})
409
410 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
411 S        [ \n\r\t]
412 MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/]
413 SINGLELINE_COMMENT \/\/[^\n]*\n
414 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
415 %%
416
417
418 {SINGLELINE_COMMENT}         {c(); /* single line comment */}
419 {MULTILINE_COMMENT}          {c(); /* multi line comment */}
420 [/][*]                       {syntaxerror("syntax error: unterminated comment", yytext);}
421
422 ^include{S}+{STRING}{S}*/\n    {c();handleInclude(yytext, yyleng, 1);}
423 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n    {c();handleInclude(yytext, yyleng, 0);}
424 {STRING}                     {c(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;}
425
426 <BEGINNING,REGEXPOK>{
427 {REGEXP}                     {c(); BEGIN(INITIAL);return m(T_REGEXP);} 
428 {HEXWITHSIGN}                {c(); BEGIN(INITIAL);return handlehex();}
429 {INTWITHSIGN}                {c(); BEGIN(INITIAL);return handleint();}
430 {FLOATWITHSIGN}              {c(); BEGIN(INITIAL);return handlefloat();}
431 }
432
433 \xef\xbb\xbf                 {/* utf 8 bom */}
434 {S}                          {c();}
435
436 {HEXINT}                     {c(); BEGIN(INITIAL);return handlehex();}
437 {INT}                        {c(); BEGIN(INITIAL);return handleint();}
438 {FLOAT}                      {c(); BEGIN(INITIAL);return handlefloat();}
439
440 3rr0r                        {/* for debugging: generates a tokenizer-level error */
441                               syntaxerror("3rr0r");}
442
443 {NAME}{S}*:{S}*for/{_}        {c();handleLabel(yytext, yyleng-3);return T_FOR;}
444 {NAME}{S}*:{S}*do/{_}         {c();handleLabel(yytext, yyleng-2);return T_DO;}
445 {NAME}{S}*:{S}*while/{_}      {c();handleLabel(yytext, yyleng-5);return T_WHILE;}
446 {NAME}{S}*:{S}*switch/{_}     {c();handleLabel(yytext, yyleng-6);return T_SWITCH;}
447 for                          {c();avm2_lval.id="";return T_FOR;}
448 do                           {c();avm2_lval.id="";return T_DO;}
449 while                        {c();avm2_lval.id="";return T_WHILE;}
450 switch                       {c();avm2_lval.id="";return T_SWITCH;}
451
452 [&][&]                       {c();BEGIN(REGEXPOK);return m(T_ANDAND);}
453 [|][|]                       {c();BEGIN(REGEXPOK);return m(T_OROR);}
454 [!][=]                       {c();BEGIN(REGEXPOK);return m(T_NE);}
455 [!][=][=]                    {c();BEGIN(REGEXPOK);return m(T_NEE);}
456 [=][=][=]                    {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);}
457 [=][=]                       {c();BEGIN(REGEXPOK);return m(T_EQEQ);}
458 [>][=]                       {c();return m(T_GE);}
459 [<][=]                       {c();return m(T_LE);}
460 [-][-]                       {c();BEGIN(INITIAL);return m(T_MINUSMINUS);}
461 [+][+]                       {c();BEGIN(INITIAL);return m(T_PLUSPLUS);}
462 [+][=]                       {c();return m(T_PLUSBY);}
463 [-][=]                       {c();return m(T_MINUSBY);}
464 [/][=]                       {c();return m(T_DIVBY);}
465 [%][=]                       {c();return m(T_MODBY);}
466 [*][=]                       {c();return m(T_MULBY);}
467 [>][>][=]                    {c();return m(T_SHRBY);}
468 [<][<][=]                    {c();return m(T_SHLBY);}
469 [>][>][>][=]                 {c();return m(T_USHRBY);}
470 [<][<]                       {c();return m(T_SHL);}
471 [>][>][>]                    {c();return m(T_USHR);}
472 [>][>]                       {c();return m(T_SHR);}
473 \.\.\.                       {c();return m(T_DOTDOTDOT);}
474 \.\.                         {c();return m(T_DOTDOT);}
475 \.                           {c();return m('.');}
476 ::                           {c();return m(T_COLONCOLON);}
477 :                            {c();return m(':');}
478 instanceof                   {c();return m(KW_INSTANCEOF);}
479 implements                   {c();return m(KW_IMPLEMENTS);}
480 interface                    {c();return m(KW_INTERFACE);}
481 namespace                    {c();return m(KW_NAMESPACE);}
482 protected                    {c();return m(KW_PROTECTED);}
483 undefined                    {c();return m(KW_UNDEFINED);}
484 continue                     {c();return m(KW_CONTINUE);}
485 override                     {c();return m(KW_OVERRIDE);}
486 internal                     {c();return m(KW_INTERNAL);}
487 function                     {c();return m(KW_FUNCTION);}
488 default                      {c();return m(KW_DEFAULT);}
489 package                      {c();return m(KW_PACKAGE);}
490 private                      {c();return m(KW_PRIVATE);}
491 dynamic                      {c();return m(KW_DYNAMIC);}
492 extends                      {c();return m(KW_EXTENDS);}
493 delete                       {c();return m(KW_DELETE);}
494 return                       {c();return m(KW_RETURN);}
495 public                       {c();return m(KW_PUBLIC);}
496 native                       {c();return m(KW_NATIVE);}
497 static                       {c();return m(KW_STATIC);}
498 import                       {c();return m(KW_IMPORT);}
499 typeof                       {c();return m(KW_TYPEOF);}
500 class                        {c();return m(KW_CLASS);}
501 const                        {c();return m(KW_CONST);}
502 catch                        {c();return m(KW_CATCH);}
503 final                        {c();return m(KW_FINAL);}
504 false                        {c();return m(KW_FALSE);}
505 break                        {c();return m(KW_BREAK);}
506 super                        {c();return m(KW_SUPER);}
507 void                         {c();return m(KW_VOID);}
508 true                         {c();return m(KW_TRUE);}
509 null                         {c();return m(KW_NULL);}
510 else                         {c();return m(KW_ELSE);}
511 case                         {c();return m(KW_CASE);}
512 use                          {c();return m(KW_USE);}
513 new                          {c();return m(KW_NEW);}
514 get                          {c();return m(KW_GET);}
515 set                          {c();return m(KW_SET);}
516 var                          {c();return m(KW_VAR);}
517 try                          {c();return m(KW_TRY);}
518 is                           {c();return m(KW_IS) ;}
519 if                           {c();return m(KW_IF) ;}
520 as                           {c();return m(KW_AS);}
521 {NAME}                       {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);}
522
523 [+-\/*^~@$!%&\(=\[\]\{\}|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
524 [\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
525
526 .                            {char c1=yytext[0];
527                               char buf[128];
528                               buf[0] = yytext[0];
529                               int t;
530                               for(t=1;t<128;t++) {
531                                   char c = buf[t]=input();
532                                   if(c=='\n' || c==EOF)  {
533                                       buf[t] = 0;
534                                       break;
535                                   }
536                               }
537                               if(c1>='0' && c1<='9')
538                                   syntaxerror("syntax error: %s (identifiers must not start with a digit)");
539                               else
540                                   syntaxerror("syntax error: %s", buf);
541                               printf("\n");
542                               exit(1);
543                               yyterminate();
544                              }
545 <<EOF>>                      {c();
546                               void*b = leave_file();
547                               if (!b) {
548                                  yyterminate();
549                                  yy_delete_buffer(YY_CURRENT_BUFFER);
550                                  return m(T_EOF);
551                               } else {
552                                   yy_delete_buffer(YY_CURRENT_BUFFER);
553                                   yy_switch_to_buffer(b);
554                               }
555                              }
556
557 %%
558
559 int yywrap()
560 {
561     return 1;
562 }
563
564 static char mbuf[256];
565 char*token2string(enum yytokentype nr, YYSTYPE v)
566 {
567     if(nr==T_STRING)     return "<string>";
568     else if(nr==T_INT)     return "<int>";
569     else if(nr==T_UINT)     return "<uint>";
570     else if(nr==T_BYTE)     return "<byte>";
571     else if(nr==T_FLOAT)     return "<float>";
572     else if(nr==T_REGEXP)     return "REGEXP";
573     else if(nr==T_EOF)        return "***END***";
574     else if(nr==T_GE)         return ">=";
575     else if(nr==T_LE)         return "<=";
576     else if(nr==T_MINUSMINUS) return "--";
577     else if(nr==T_PLUSPLUS)   return "++";
578     else if(nr==KW_IMPLEMENTS) return "implements";
579     else if(nr==KW_INTERFACE)  return "interface";
580     else if(nr==KW_NAMESPACE)  return "namespace";
581     else if(nr==KW_PROTECTED)  return "protected";
582     else if(nr==KW_OVERRIDE)   return "override";
583     else if(nr==KW_INTERNAL)   return "internal";
584     else if(nr==KW_FUNCTION)   return "function";
585     else if(nr==KW_PACKAGE)    return "package";
586     else if(nr==KW_PRIVATE)    return "private";
587     else if(nr==KW_BOOLEAN)    return "Boolean";
588     else if(nr==KW_DYNAMIC)    return "dynamic";
589     else if(nr==KW_EXTENDS)    return "extends";
590     else if(nr==KW_PUBLIC)     return "public";
591     else if(nr==KW_NATIVE)     return "native";
592     else if(nr==KW_STATIC)     return "static";
593     else if(nr==KW_IMPORT)     return "import";
594     else if(nr==KW_NUMBER)     return "number";
595     else if(nr==KW_CLASS)      return "class";
596     else if(nr==KW_CONST)      return "const";
597     else if(nr==KW_FINAL)      return "final";
598     else if(nr==KW_FALSE)      return "False";
599     else if(nr==KW_TRUE)       return "True";
600     else if(nr==KW_UINT)       return "uint";
601     else if(nr==KW_NULL)       return "null";
602     else if(nr==KW_ELSE)       return "else";
603     else if(nr==KW_USE)        return "use";
604     else if(nr==KW_INT)        return "int";
605     else if(nr==KW_NEW)        return "new";
606     else if(nr==KW_GET)        return "get";
607     else if(nr==KW_SET)        return "set";
608     else if(nr==KW_VAR)        return "var";
609     else if(nr==KW_IS)         return "is";
610     else if(nr==KW_AS)         return "as";
611     else if(nr==T_IDENTIFIER)  return "ID";
612     else {
613         sprintf(mbuf, "%d", nr);
614         return mbuf;
615     }
616 }
617
618 void initialize_scanner()
619 {
620     BEGIN(BEGINNING);
621 }
622