added GPL headers
[swftools.git] / lib / as3 / tokenizer.lex
1 /* tokenizer.lex
2
3    Routines for compiling Flash2 AVM2 ABC Actionscript
4
5    Extension module for the rfxswf library.
6    Part of the swftools package.
7
8    Copyright (c) 2008 Matthias Kramm <kramm@quiss.org>
9  
10    This program is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2 of the License, or
13    (at your option) any later version.
14
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
23 %{
24
25
26 #include <string.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include "../utf8.h"
31 #include "tokenizer.h"
32 #include "files.h"
33
34 static void countlines(char*text, int len) {
35     int t;
36     for(t=0;t<len;t++) {
37         if(text[t]=='\n') {
38             current_line++;
39             current_column=0;
40         } else {
41             current_column++;
42         }
43     }
44 }
45
46 static int verbose = 1;
47 static void dbg(const char*format, ...)
48 {
49     char buf[1024];
50     int l;
51     va_list arglist;
52     if(!verbose)
53         return;
54     va_start(arglist, format);
55     vsprintf(buf, format, arglist);
56     va_end(arglist);
57     l = strlen(buf);
58     while(l && buf[l-1]=='\n') {
59         buf[l-1] = 0;
60         l--;
61     }
62     printf("(tokenizer) ");
63     printf("%s\n", buf);
64     fflush(stdout);
65 }
66
67 void syntaxerror(const char*format, ...)
68 {
69     char buf[1024];
70     int l;
71     va_list arglist;
72     if(!verbose)
73         return;
74     va_start(arglist, format);
75     vsprintf(buf, format, arglist);
76     va_end(arglist);
77     fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename, current_line, current_column, buf);
78     fflush(stderr);
79     exit(1);
80 }
81
82
83 #ifndef YY_CURRENT_BUFFER
84 #define YY_CURRENT_BUFFER yy_current_buffer
85 #endif
86
87 void handleInclude(char*text, int len, char quotes)
88 {
89     char*filename = 0;
90     if(quotes) {
91         char*p1 = strchr(text, '"');
92         char*p2 = strrchr(text, '"');
93         if(!p1 || !p2 || p1==p2) {
94             syntaxerror("Invalid include in line %d\n", current_line);
95         }
96         *p2 = 0;
97         filename = strdup(p1+1);
98     } else {
99         int i1=0,i2=len;
100         // find start
101         while(!strchr(" \n\r\t", text[i1])) i1++;
102         // strip
103         while(strchr(" \n\r\t", text[i1])) i1++;
104         while(strchr(" \n\r\t", text[i2-1])) i2--;
105         if(i2!=len) text[i2]=0;
106         filename = strdup(&text[i1]);
107     }
108     
109     char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
110     yyin = fopen(fullfilename, "rb");
111     if (!yyin) {
112         syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
113     }
114
115     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
116     //BEGIN(INITIAL); keep context
117 }
118
119 char start_of_expression;
120
121 static inline int m(int type)
122 {
123     char*s = malloc(yyleng+1);
124     memcpy(s, yytext, yyleng);
125     s[yyleng]=0;
126
127     NEW(token_t,t);
128     t->type = type;
129     t->text = s;
130     avm2_lval.token = t;
131     return type;
132 }
133
134 static char numberbuf[64];
135 static inline int handlenumber()
136 {
137     if(yyleng>sizeof(numberbuf)-1)
138         syntaxerror("decimal number overflow");
139
140     char*s = numberbuf;
141     memcpy(s, yytext, yyleng);
142     s[yyleng]=0;
143
144     int t;
145     char is_float=0;
146     for(t=0;t<yyleng;t++) {
147         if(yytext[t]=='.') {
148             if(is_float)
149                 syntaxerror("Invalid number");
150             is_float=1;
151         } else if(!strchr("-0123456789", yytext[t])) {
152             syntaxerror("Invalid number");
153         }
154     }
155     if(is_float) {
156         avm2_lval.number_float = atof(s);
157         return T_FLOAT;
158     } 
159     char l = (yytext[0]=='-');
160
161     char*max = l?"1073741824":"2147483647";
162     if(yyleng-l>10)
163         syntaxerror("integer overflow");
164     if(yyleng-l==10) {
165         int t;
166         for(t=0;t<yyleng-l;t++) {
167             if(yytext[l+t]>max[t])
168                 syntaxerror("integer overflow %s > %s", s+l,max);
169             else if(yytext[l+t]<max[t])
170                 break;
171         }
172     }
173     if(yytext[0]=='-') {
174         int v = atoi(s);
175         avm2_lval.number_int = v;
176         if(v>-128)
177             return T_BYTE;
178         else if(v>=-32768)
179             return T_SHORT;
180         else
181             return T_INT;
182     } else {
183         unsigned int v = 0;
184         for(t=0;t<yyleng;t++) {
185             v*=10;
186             v+=yytext[t]-'0';
187         }
188         avm2_lval.number_uint = v;
189         if(v<128)
190             return T_BYTE;
191         else if(v<32768)
192             return T_SHORT;
193         else
194             return T_UINT;
195     }
196 }
197
198 void initialize_scanner();
199 #define YY_USER_INIT initialize_scanner();
200
201 #define c() {countlines(yytext, yyleng);}
202
203 %}
204
205 %s REGEXPOK
206 %s BEGINNING
207
208 NAME     [a-zA-Z_][a-zA-Z0-9_\\]*
209
210 NUMBER   -?[0-9]+(\.[0-9]*)?
211
212 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
213 S        [ \n\r\t]
214 MULTILINE_COMMENT [/][*]([*][^/]|[^*]|[\x00-\x31])*[*]+[/]
215 SINGLELINE_COMMENT \/\/[^\n]*\n
216 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
217 %%
218
219
220 {SINGLELINE_COMMENT}         {c(); /* single line comment */}
221 {MULTILINE_COMMENT}          {c(); /* multi line comment */}
222 [/][*]                       {syntaxerror("syntax error: unterminated comment", yytext);}
223
224 ^include{S}+{STRING}{S}*/\n    {c();handleInclude(yytext, yyleng, 1);}
225 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n    {c();handleInclude(yytext, yyleng, 0);}
226 {STRING}                     {c(); BEGIN(INITIAL);return m(T_STRING);}
227
228 <BEGINNING,REGEXPOK>{
229 {REGEXP}                     {c(); BEGIN(INITIAL);return m(T_REGEXP);} 
230 }
231
232 \xef\xbb\xbf                 {/* utf 8 bom */}
233 {S}                          {c();}
234
235 {NUMBER}                     {c(); BEGIN(INITIAL);return handlenumber();}
236
237 [>][=]                       {return m(T_GE);}
238 [<][=]                       {return m(T_LE);}
239 [-][-]                       {BEGIN(INITIAL);return m(T_MINUSMINUS);}
240 [+][+]                       {BEGIN(INITIAL);return m(T_PLUSPLUS);}
241 ==                           {BEGIN(REGEXPOK);return m(T_EQEQ);}
242 \.\.                         {return m(T_DOTDOT);}
243 \.                           {return m('.');}
244 ::                           {return m(T_COLONCOLON);}
245 :                            {return m(':');}
246 implements                   {return m(KW_IMPLEMENTS);}
247 interface                    {return m(KW_INTERFACE);}
248 namespace                    {return m(KW_NAMESPACE);}
249 protected                    {return m(KW_PROTECTED);}
250 override                     {return m(KW_OVERRIDE);}
251 internal                     {return m(KW_INTERNAL);}
252 function                     {return m(KW_FUNCTION);}
253 package                      {return m(KW_PACKAGE);}
254 private                      {return m(KW_PRIVATE);}
255 Boolean                      {return m(KW_BOOLEAN);}
256 dynamic                      {return m(KW_DYNAMIC);}
257 extends                      {return m(KW_EXTENDS);}
258 public                       {return m(KW_PUBLIC);}
259 native                       {return m(KW_NATIVE);}
260 static                       {return m(KW_STATIC);}
261 import                       {return m(KW_IMPORT);}
262 Number                       {return m(KW_NUMBER);}
263 class                        {return m(KW_CLASS);}
264 const                        {return m(KW_CONST);}
265 final                        {return m(KW_FINAL);}
266 False                        {return m(KW_FALSE);}
267 True                         {return m(KW_TRUE);}
268 uint                         {return m(KW_UINT);}
269 null                         {return m(KW_NULL);}
270 use                          {return m(KW_USE);}
271 int                          {return m(KW_INT);}
272 new                          {return m(KW_NEW);}
273 get                          {return m(KW_GET);}
274 for                          {return m(KW_FOR);}
275 set                          {return m(KW_SET);}
276 var                          {return m(KW_VAR);}
277 is                           {return m(KW_IS) ;}
278 as                           {return m(KW_AS);}
279 {NAME}                       {c();BEGIN(INITIAL);return m(T_IDENTIFIER);}
280
281 [+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
282 [\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
283
284 .                            {char c1=yytext[0];
285                               char buf[128];
286                               buf[0] = yytext[0];
287                               int t;
288                               for(t=1;t<128;t++) {
289                                   char c = buf[t]=input();
290                                   if(c=='\n' || c==EOF)  {
291                                       buf[t] = 0;
292                                       break;
293                                   }
294                               }
295                               if(c1>='0' && c1<='9')
296                                   syntaxerror("syntax error: %s (identifiers must not start with a digit)");
297                               else
298                                   syntaxerror("syntax error: %s", buf);
299                               printf("\n");
300                               exit(1);
301                               yyterminate();
302                              }
303 <<EOF>>                      {c();
304                               void*b = leave_file();
305                               if (!b) {
306                                  yyterminate();
307                                  yy_delete_buffer(YY_CURRENT_BUFFER);
308                                  return m(T_EOF);
309                               } else {
310                                   yy_delete_buffer(YY_CURRENT_BUFFER);
311                                   yy_switch_to_buffer(b);
312                               }
313                              }
314
315 %%
316
317 int yywrap()
318 {
319     return 1;
320 }
321
322 static char mbuf[256];
323 char*token2string(token_t*t)
324 {
325     int nr=t->type;
326     if(nr==T_STRING)     return "<string>";
327     else if(nr==T_INT)     return "<int>";
328     else if(nr==T_UINT)     return "<uint>";
329     else if(nr==T_FLOAT)     return "<float>";
330     else if(nr==T_REGEXP)     return "REGEXP";
331     else if(nr==T_EOF)        return "***END***";
332     else if(nr==T_GE)         return ">=";
333     else if(nr==T_LE)         return "<=";
334     else if(nr==T_MINUSMINUS) return "--";
335     else if(nr==T_PLUSPLUS)   return "++";
336     else if(nr==KW_IMPLEMENTS) return "implements";
337     else if(nr==KW_INTERFACE)  return "interface";
338     else if(nr==KW_NAMESPACE)  return "namespace";
339     else if(nr==KW_PROTECTED)  return "protected";
340     else if(nr==KW_OVERRIDE)   return "override";
341     else if(nr==KW_INTERNAL)   return "internal";
342     else if(nr==KW_FUNCTION)   return "function";
343     else if(nr==KW_PACKAGE)    return "package";
344     else if(nr==KW_PRIVATE)    return "private";
345     else if(nr==KW_BOOLEAN)    return "Boolean";
346     else if(nr==KW_DYNAMIC)    return "dynamic";
347     else if(nr==KW_EXTENDS)    return "extends";
348     else if(nr==KW_PUBLIC)     return "public";
349     else if(nr==KW_NATIVE)     return "native";
350     else if(nr==KW_STATIC)     return "static";
351     else if(nr==KW_IMPORT)     return "import";
352     else if(nr==KW_NUMBER)     return "number";
353     else if(nr==KW_CLASS)      return "class";
354     else if(nr==KW_CONST)      return "const";
355     else if(nr==KW_FINAL)      return "final";
356     else if(nr==KW_FALSE)      return "False";
357     else if(nr==KW_TRUE)       return "True";
358     else if(nr==KW_UINT)       return "uint";
359     else if(nr==KW_NULL)       return "null";
360     else if(nr==KW_USE)        return "use";
361     else if(nr==KW_INT)        return "int";
362     else if(nr==KW_NEW)        return "new";
363     else if(nr==KW_GET)        return "get";
364     else if(nr==KW_FOR)        return "for";
365     else if(nr==KW_SET)        return "set";
366     else if(nr==KW_VAR)        return "var";
367     else if(nr==KW_IS)         return "is";
368     else if(nr==KW_AS)         return "as";
369     else if(nr==T_IDENTIFIER) {
370         if(strlen(t->text)>sizeof(mbuf)-1)
371             return "ID(...)";
372         sprintf(mbuf, "ID(%s)", t->text);
373         return mbuf;
374     } else {
375         sprintf(mbuf, "%d", nr);
376         return mbuf;
377     }
378 }
379
380 void initialize_scanner()
381 {
382     BEGIN(BEGINNING);
383 }
384