uses compiler.h now instead of doing everything itself
[swftools.git] / lib / as3 / tokenizer.lex
1 /* tokenizer.lex
2
3    Routines for compiling Flash2 AVM2 ABC Actionscript
4
5    Extension module for the rfxswf library.
6    Part of the swftools package.
7
8    Copyright (c) 2008 Matthias Kramm <kramm@quiss.org>
9  
10    This program is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2 of the License, or
13    (at your option) any later version.
14
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
23 %{
24
25
26 #include <string.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include "../utf8.h"
31 #include "tokenizer.h"
32 #include "files.h"
33
34 static void countlines(char*text, int len) {
35     int t;
36     for(t=0;t<len;t++) {
37         if(text[t]=='\n') {
38             current_line++;
39             current_column=0;
40         } else {
41             current_column++;
42         }
43     }
44 }
45
46 static int verbose = 1;
47 static void dbg(const char*format, ...)
48 {
49     char buf[1024];
50     int l;
51     va_list arglist;
52     if(!verbose)
53         return;
54     va_start(arglist, format);
55     vsprintf(buf, format, arglist);
56     va_end(arglist);
57     l = strlen(buf);
58     while(l && buf[l-1]=='\n') {
59         buf[l-1] = 0;
60         l--;
61     }
62     printf("(tokenizer) ");
63     printf("%s\n", buf);
64     fflush(stdout);
65 }
66
67 void syntaxerror(const char*format, ...)
68 {
69     char buf[1024];
70     int l;
71     va_list arglist;
72     if(!verbose)
73         return;
74     va_start(arglist, format);
75     vsprintf(buf, format, arglist);
76     va_end(arglist);
77     fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename_short, current_line, current_column, buf);
78     fflush(stderr);
79     exit(1);
80 }
81 void warning(const char*format, ...)
82 {
83     char buf[1024];
84     int l;
85     va_list arglist;
86     if(!verbose)
87         return;
88     va_start(arglist, format);
89     vsprintf(buf, format, arglist);
90     va_end(arglist);
91     fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf);
92     fflush(stderr);
93 }
94
95
96 #ifndef YY_CURRENT_BUFFER
97 #define YY_CURRENT_BUFFER yy_current_buffer
98 #endif
99
100 void handleInclude(char*text, int len, char quotes)
101 {
102     char*filename = 0;
103     if(quotes) {
104         char*p1 = strchr(text, '"');
105         char*p2 = strrchr(text, '"');
106         if(!p1 || !p2 || p1==p2) {
107             syntaxerror("Invalid include in line %d\n", current_line);
108         }
109         *p2 = 0;
110         filename = strdup(p1+1);
111     } else {
112         int i1=0,i2=len;
113         // find start
114         while(!strchr(" \n\r\t", text[i1])) i1++;
115         // strip
116         while(strchr(" \n\r\t", text[i1])) i1++;
117         while(strchr(" \n\r\t", text[i2-1])) i2--;
118         if(i2!=len) text[i2]=0;
119         filename = strdup(&text[i1]);
120     }
121     
122     char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
123     yyin = fopen(fullfilename, "rb");
124     if (!yyin) {
125         syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
126     }
127
128     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
129     //BEGIN(INITIAL); keep context
130 }
131
132 static int do_unescape(const char*s, const char*end, char*n) 
133 {
134     char*o = n;
135     int len=0;
136     while(s<end) {
137         if(*s!='\\') {
138             if(o) o[len] = *s;len++;
139             s++;
140             continue;
141         }
142         s++; //skip past '\'
143         if(s==end) syntaxerror("invalid \\ at end of string");
144
145         /* handle the various line endings (mac, dos, unix) */
146         if(*s=='\r') { 
147             s++; 
148             if(s==end) break;
149             if(*s=='\n') 
150                 s++;
151             continue;
152         }
153         if(*s=='\n')  {
154             s++;
155             continue;
156         }
157         switch(*s) {
158             case '\\': if(o) o[len] = '\\';s++;len++; break;
159             case '"': if(o) o[len] = '"';s++;len++; break;
160             case 'b': if(o) o[len] = '\b';s++;len++; break;
161             case 'f': if(o) o[len] = '\f';s++;len++; break;
162             case 'n': if(o) o[len] = '\n';s++;len++; break;
163             case 'r': if(o) o[len] = '\r';s++;len++; break;
164             case 't': if(o) o[len] = '\t';s++;len++; break;
165             case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
166                 unsigned int num=0;
167                 int nr = 0;
168                 while(strchr("01234567", *s) && nr<3 && s<end) {
169                     num <<= 3;
170                     num |= *s-'0';
171                     nr++;
172                     s++;
173                 }
174                 if(num>256) 
175                     syntaxerror("octal number out of range (0-255): %d", num);
176                 if(o) o[len] = num;len++;
177                 continue;
178             }
179             case 'x': case 'u': {
180                 int max=2;
181                 char bracket = 0;
182                 char unicode = 0;
183                 if(*s == 'u') {
184                     max = 6;
185                     unicode = 1;
186                 }
187                 s++;
188                 if(s==end) syntaxerror("invalid \\u or \\x at end of string");
189                 if(*s == '{')  {
190                     s++;
191                     if(s==end) syntaxerror("invalid \\u{ at end of string");
192                     bracket=1;
193                 }
194                 unsigned int num=0;
195                 int nr = 0;
196                 while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s<end) {
197                     num <<= 4;
198                     if(*s>='0' && *s<='9') num |= *s - '0';
199                     if(*s>='a' && *s<='f') num |= *s - 'a' + 10;
200                     if(*s>='A' && *s<='F') num |= *s - 'A' + 10;
201                     nr++;
202                     s++;
203                 }
204                 if(bracket) {
205                     if(*s=='}' && s<end) {
206                         s++;
207                     } else {
208                         syntaxerror("missing terminating '}'");
209                     }
210                 }
211                 if(unicode) {
212                     char*utf8 = getUTF8(num);
213                     while(*utf8) {
214                         if(o) o[len] = *utf8;utf8++;len++;
215                     }
216                 } else {
217                     if(num>256) 
218                         syntaxerror("byte out of range (0-255): %d", num);
219                     if(o) o[len] = num;len++;
220                 }
221                 break;
222             }
223             default:
224                 syntaxerror("unknown escape sequence: \"\\%c\"", *s);
225         }
226     }
227     if(o) o[len]=0;
228     return len;
229 }
230
231 static string_t string_unescape(const char*in, int l)
232 {
233     const char*s = in;
234     const char*end = &in[l];
235
236     int len = do_unescape(s, end, 0);
237     char*n = (char*)malloc(len+1);
238     do_unescape(s, end, n);
239     string_t out = string_new(n, len);
240     return out; 
241 }
242
243 static void handleString(char*s, int len)
244 {
245     if(s[0]=='"') {
246         if(s[len-1]!='"') syntaxerror("String doesn't end with '\"'");
247         s++;len-=2;
248     }
249     else if(s[0]=='\'') {
250         if(s[len-1]!='\'') syntaxerror("String doesn't end with '\"'");
251         s++;len-=2;
252     }
253     else syntaxerror("String incorrectly terminated");
254
255     
256     avm2_lval.str = string_unescape(s, len);
257 }
258
259
260 char start_of_expression;
261
262 static inline int mkid(int type)
263 {
264     char*s = malloc(yyleng+1);
265     memcpy(s, yytext, yyleng);
266     s[yyleng]=0;
267     avm2_lval.id = s;
268     return type;
269 }
270
271 static inline int m(int type)
272 {
273     avm2_lval.token = type;
274     return type;
275 }
276
277
278 static char numberbuf[64];
279 static char*nrbuf()
280 {
281     if(yyleng>sizeof(numberbuf)-1)
282         syntaxerror("decimal number overflow");
283     char*s = numberbuf;
284     memcpy(s, yytext, yyleng);
285     s[yyleng]=0;
286     return s;
287 }
288
289 static inline int setint(int v)
290 {
291     avm2_lval.number_int = v;
292     if(v>-128)
293         return T_BYTE;
294     else if(v>=-32768)
295         return T_SHORT;
296     else
297         return T_INT;
298 }
299 static inline int setuint(unsigned int v)
300 {
301     avm2_lval.number_uint = v;
302     if(v<128)
303         return T_BYTE;
304     else if(v<32768)
305         return T_SHORT;
306     else
307         return T_UINT;
308 }
309
310 static inline int handlefloat()
311 {
312     char*s = nrbuf();
313     avm2_lval.number_float = atof(s);
314     return T_FLOAT;
315 }
316
317 static inline int handleint()
318 {
319     char*s = nrbuf();
320     char l = (yytext[0]=='-');
321
322     char*max = l?"1073741824":"2147483647";
323     if(yyleng-l>10)
324         syntaxerror("integer overflow");
325     if(yyleng-l==10) {
326         int t;
327         for(t=0;t<yyleng-l;t++) {
328             if(yytext[l+t]>max[t])
329                 syntaxerror("integer overflow %s > %s", s+l,max);
330             else if(yytext[l+t]<max[t])
331                 break;
332         }
333     }
334     if(yytext[0]=='-') {
335         int v = atoi(s);
336         return setint(v);
337     } else {
338         unsigned int v = 0;
339         int t;
340         for(t=0;t<yyleng;t++) {
341             v*=10;
342             v+=yytext[t]-'0';
343         }
344         return setuint(v);
345     }
346 }
347
348 static inline int handlehex()
349 {
350     char l = (yytext[0]=='-')+2;
351
352     if(yyleng-l>8)
353         syntaxerror("integer overflow");
354     int t;
355     unsigned int v = 0;
356     for(t=l;t<yyleng;t++) {
357         v<<=4;
358         char c = yytext[t];
359         if(c>='0' && c<='9')
360             v|=(c&15);
361         else if(c>='a' && c<='f' ||
362                 c>='A' && c<='F')
363             v|=(c&0x0f)+9;
364     }
365     if(l && v>1073741824)
366         syntaxerror("signed integer overflow");
367     if(!l && v>2147483647)
368         syntaxerror("unsigned integer overflow");
369
370     if(l==3) {
371         return setint(-(int)v);
372     } else {
373         return setuint(v);
374     }
375 }
376
377 void handleLabel(char*text, int len)
378 {
379     int t;
380     for(t=len-1;t>=0;--t) {
381         if(text[t]!=' ' &&
382            text[t]!='.')
383             break;
384     }
385     char*s = malloc(t+1);
386     memcpy(s, yytext, t);
387     s[t]=0;
388     avm2_lval.id = s;
389 }
390
391 void initialize_scanner();
392 #define YY_USER_INIT initialize_scanner();
393
394 #define c() {countlines(yytext, yyleng);}
395
396 //Boolean                      {c();return m(KW_BOOLEAN);}
397 //int                          {c();return m(KW_INT);}
398 //uint                         {c();return m(KW_UINT);}
399 //Number                       {c();return m(KW_NUMBER);}
400
401
402 %}
403
404 %s REGEXPOK
405 %s BEGINNING
406
407 NAME     [a-zA-Z_][a-zA-Z0-9_\\]*
408 _        [^a-zA-Z0-9_\\]
409
410 HEXINT    0x[a-zA-Z0-9]+
411 INT       [0-9]+
412 FLOAT     [0-9]+(\.[0-9]*)?|\.[0-9]+
413
414 HEXWITHSIGN [+-]?({HEXINT})
415 INTWITHSIGN [+-]?({INT})
416 FLOATWITHSIGN [+-]?({FLOAT})
417
418 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
419 S        [ \n\r\t]
420 MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/]
421 SINGLELINE_COMMENT \/\/[^\n]*\n
422 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
423 %%
424
425
426 {SINGLELINE_COMMENT}         {c(); /* single line comment */}
427 {MULTILINE_COMMENT}          {c(); /* multi line comment */}
428 [/][*]                       {syntaxerror("syntax error: unterminated comment", yytext);}
429
430 ^include{S}+{STRING}{S}*/\n    {c();handleInclude(yytext, yyleng, 1);}
431 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n    {c();handleInclude(yytext, yyleng, 0);}
432 {STRING}                     {c(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;}
433
434 <BEGINNING,REGEXPOK>{
435 {REGEXP}                     {c(); BEGIN(INITIAL);return m(T_REGEXP);} 
436 {HEXWITHSIGN}                {c(); BEGIN(INITIAL);return handlehex();}
437 {INTWITHSIGN}                {c(); BEGIN(INITIAL);return handleint();}
438 {FLOATWITHSIGN}              {c(); BEGIN(INITIAL);return handlefloat();}
439 }
440
441 \xef\xbb\xbf                 {/* utf 8 bom */}
442 {S}                          {c();}
443
444 {HEXINT}                     {c(); BEGIN(INITIAL);return handlehex();}
445 {INT}                        {c(); BEGIN(INITIAL);return handleint();}
446 {FLOAT}                      {c(); BEGIN(INITIAL);return handlefloat();}
447
448 3rr0r                        {/* for debugging: generates a tokenizer-level error */
449                               syntaxerror("3rr0r");}
450
451 {NAME}{S}*:{S}*for/{_}        {c();handleLabel(yytext, yyleng-3);return T_FOR;}
452 {NAME}{S}*:{S}*do/{_}         {c();handleLabel(yytext, yyleng-2);return T_DO;}
453 {NAME}{S}*:{S}*while/{_}      {c();handleLabel(yytext, yyleng-5);return T_WHILE;}
454 {NAME}{S}*:{S}*switch/{_}     {c();handleLabel(yytext, yyleng-6);return T_SWITCH;}
455 for                          {c();avm2_lval.id="";return T_FOR;}
456 do                           {c();avm2_lval.id="";return T_DO;}
457 while                        {c();avm2_lval.id="";return T_WHILE;}
458 switch                       {c();avm2_lval.id="";return T_SWITCH;}
459
460 [&][&]                       {c();BEGIN(REGEXPOK);return m(T_ANDAND);}
461 [|][|]                       {c();BEGIN(REGEXPOK);return m(T_OROR);}
462 [!][=]                       {c();BEGIN(REGEXPOK);return m(T_NE);}
463 [!][=][=]                    {c();BEGIN(REGEXPOK);return m(T_NEE);}
464 [=][=][=]                    {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);}
465 [=][=]                       {c();BEGIN(REGEXPOK);return m(T_EQEQ);}
466 [>][=]                       {c();return m(T_GE);}
467 [<][=]                       {c();return m(T_LE);}
468 [-][-]                       {c();BEGIN(INITIAL);return m(T_MINUSMINUS);}
469 [+][+]                       {c();BEGIN(INITIAL);return m(T_PLUSPLUS);}
470 [+][=]                       {c();return m(T_PLUSBY);}
471 [-][=]                       {c();return m(T_MINUSBY);}
472 [/][=]                       {c();return m(T_DIVBY);}
473 [%][=]                       {c();return m(T_MODBY);}
474 [*][=]                       {c();return m(T_MULBY);}
475 [>][>][=]                    {c();return m(T_SHRBY);}
476 [<][<][=]                    {c();return m(T_SHLBY);}
477 [>][>][>][=]                 {c();return m(T_USHRBY);}
478 [<][<]                       {c();return m(T_SHL);}
479 [>][>][>]                    {c();return m(T_USHR);}
480 [>][>]                       {c();return m(T_SHR);}
481 \.\.\.                       {c();return m(T_DOTDOTDOT);}
482 \.\.                         {c();return m(T_DOTDOT);}
483 \.                           {c();return m('.');}
484 ::                           {c();return m(T_COLONCOLON);}
485 :                            {c();return m(':');}
486 instanceof                   {c();return m(KW_INSTANCEOF);}
487 implements                   {c();return m(KW_IMPLEMENTS);}
488 interface                    {c();return m(KW_INTERFACE);}
489 namespace                    {c();return m(KW_NAMESPACE);}
490 protected                    {c();return m(KW_PROTECTED);}
491 undefined                    {c();return m(KW_UNDEFINED);}
492 continue                     {c();return m(KW_CONTINUE);}
493 override                     {c();return m(KW_OVERRIDE);}
494 internal                     {c();return m(KW_INTERNAL);}
495 function                     {c();return m(KW_FUNCTION);}
496 default                      {c();return m(KW_DEFAULT);}
497 package                      {c();return m(KW_PACKAGE);}
498 private                      {c();return m(KW_PRIVATE);}
499 dynamic                      {c();return m(KW_DYNAMIC);}
500 extends                      {c();return m(KW_EXTENDS);}
501 delete                       {c();return m(KW_DELETE);}
502 return                       {c();return m(KW_RETURN);}
503 public                       {c();return m(KW_PUBLIC);}
504 native                       {c();return m(KW_NATIVE);}
505 static                       {c();return m(KW_STATIC);}
506 import                       {c();return m(KW_IMPORT);}
507 typeof                       {c();return m(KW_TYPEOF);}
508 throw                        {c();return m(KW_THROW);}
509 class                        {c();return m(KW_CLASS);}
510 const                        {c();return m(KW_CONST);}
511 catch                        {c();return m(KW_CATCH);}
512 final                        {c();return m(KW_FINAL);}
513 false                        {c();return m(KW_FALSE);}
514 break                        {c();return m(KW_BREAK);}
515 super                        {c();return m(KW_SUPER);}
516 each                         {c();return m(KW_EACH);}
517 void                         {c();return m(KW_VOID);}
518 true                         {c();return m(KW_TRUE);}
519 null                         {c();return m(KW_NULL);}
520 else                         {c();return m(KW_ELSE);}
521 case                         {c();return m(KW_CASE);}
522 with                         {c();return m(KW_WITH);}
523 use                          {c();return m(KW_USE);}
524 new                          {c();return m(KW_NEW);}
525 get                          {c();return m(KW_GET);}
526 set                          {c();return m(KW_SET);}
527 var                          {c();return m(KW_VAR);}
528 try                          {c();return m(KW_TRY);}
529 is                           {c();return m(KW_IS) ;}
530 in                           {c();return m(KW_IN) ;}
531 if                           {c();return m(KW_IF) ;}
532 as                           {c();return m(KW_AS);}
533 {NAME}                       {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);}
534
535 [+-\/*^~@$!%&\(=\[\]\{\}|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
536 [\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
537
538 .                            {char c1=yytext[0];
539                               char buf[128];
540                               buf[0] = yytext[0];
541                               int t;
542                               for(t=1;t<128;t++) {
543                                   char c = buf[t]=input();
544                                   if(c=='\n' || c==EOF)  {
545                                       buf[t] = 0;
546                                       break;
547                                   }
548                               }
549                               if(c1>='0' && c1<='9')
550                                   syntaxerror("syntax error: %s (identifiers must not start with a digit)");
551                               else
552                                   syntaxerror("syntax error: %s", buf);
553                               printf("\n");
554                               exit(1);
555                               yyterminate();
556                              }
557 <<EOF>>                      {c();
558                               void*b = leave_file();
559                               if (!b) {
560                                  yyterminate();
561                                  yy_delete_buffer(YY_CURRENT_BUFFER);
562                                  return m(T_EOF);
563                               } else {
564                                   yy_delete_buffer(YY_CURRENT_BUFFER);
565                                   yy_switch_to_buffer(b);
566                               }
567                              }
568
569 %%
570
571 int yywrap()
572 {
573     return 1;
574 }
575
576 static char mbuf[256];
577 char*token2string(enum yytokentype nr, YYSTYPE v)
578 {
579     if(nr==T_STRING)     return "<string>";
580     else if(nr==T_INT)     return "<int>";
581     else if(nr==T_UINT)     return "<uint>";
582     else if(nr==T_BYTE)     return "<byte>";
583     else if(nr==T_FLOAT)     return "<float>";
584     else if(nr==T_REGEXP)     return "REGEXP";
585     else if(nr==T_EOF)        return "***END***";
586     else if(nr==T_GE)         return ">=";
587     else if(nr==T_LE)         return "<=";
588     else if(nr==T_MINUSMINUS) return "--";
589     else if(nr==T_PLUSPLUS)   return "++";
590     else if(nr==KW_IMPLEMENTS) return "implements";
591     else if(nr==KW_INTERFACE)  return "interface";
592     else if(nr==KW_NAMESPACE)  return "namespace";
593     else if(nr==KW_PROTECTED)  return "protected";
594     else if(nr==KW_OVERRIDE)   return "override";
595     else if(nr==KW_INTERNAL)   return "internal";
596     else if(nr==KW_FUNCTION)   return "function";
597     else if(nr==KW_PACKAGE)    return "package";
598     else if(nr==KW_PRIVATE)    return "private";
599     else if(nr==KW_BOOLEAN)    return "Boolean";
600     else if(nr==KW_DYNAMIC)    return "dynamic";
601     else if(nr==KW_EXTENDS)    return "extends";
602     else if(nr==KW_PUBLIC)     return "public";
603     else if(nr==KW_NATIVE)     return "native";
604     else if(nr==KW_STATIC)     return "static";
605     else if(nr==KW_IMPORT)     return "import";
606     else if(nr==KW_NUMBER)     return "number";
607     else if(nr==KW_CLASS)      return "class";
608     else if(nr==KW_CONST)      return "const";
609     else if(nr==KW_FINAL)      return "final";
610     else if(nr==KW_FALSE)      return "False";
611     else if(nr==KW_TRUE)       return "True";
612     else if(nr==KW_UINT)       return "uint";
613     else if(nr==KW_NULL)       return "null";
614     else if(nr==KW_ELSE)       return "else";
615     else if(nr==KW_USE)        return "use";
616     else if(nr==KW_INT)        return "int";
617     else if(nr==KW_NEW)        return "new";
618     else if(nr==KW_GET)        return "get";
619     else if(nr==KW_SET)        return "set";
620     else if(nr==KW_VAR)        return "var";
621     else if(nr==KW_IS)         return "is";
622     else if(nr==KW_AS)         return "as";
623     else if(nr==T_IDENTIFIER)  return "ID";
624     else {
625         sprintf(mbuf, "%d", nr);
626         return mbuf;
627     }
628 }
629
630 void initialize_scanner()
631 {
632     BEGIN(BEGINNING);
633 }
634