fixed number parsing
[swftools.git] / lib / as3 / tokenizer.lex
1 %{
2 #include <string.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <stdarg.h>
6 #include "../utf8.h"
7 #include "tokenizer.h"
8 #include "files.h"
9
10 static void countlines(char*text, int len) {
11     int t;
12     for(t=0;t<len;t++) {
13         if(text[t]=='\n') {
14             current_line++;
15             current_column=0;
16         } else {
17             current_column++;
18         }
19     }
20 }
21
22 static int verbose = 1;
23 static void dbg(const char*format, ...)
24 {
25     char buf[1024];
26     int l;
27     va_list arglist;
28     if(!verbose)
29         return;
30     va_start(arglist, format);
31     vsprintf(buf, format, arglist);
32     va_end(arglist);
33     l = strlen(buf);
34     while(l && buf[l-1]=='\n') {
35         buf[l-1] = 0;
36         l--;
37     }
38     printf("(tokenizer) ");
39     printf("%s\n", buf);
40     fflush(stdout);
41 }
42
43 void syntaxerror(const char*format, ...)
44 {
45     char buf[1024];
46     int l;
47     va_list arglist;
48     if(!verbose)
49         return;
50     va_start(arglist, format);
51     vsprintf(buf, format, arglist);
52     va_end(arglist);
53     fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename, current_line, current_column, buf);
54     fflush(stderr);
55     exit(1);
56 }
57
58
59 #ifndef YY_CURRENT_BUFFER
60 #define YY_CURRENT_BUFFER yy_current_buffer
61 #endif
62
63 void handleInclude(char*text, int len, char quotes)
64 {
65     char*filename = 0;
66     if(quotes) {
67         char*p1 = strchr(text, '"');
68         char*p2 = strrchr(text, '"');
69         if(!p1 || !p2 || p1==p2) {
70             syntaxerror("Invalid include in line %d\n", current_line);
71         }
72         *p2 = 0;
73         filename = strdup(p1+1);
74     } else {
75         int i1=0,i2=len;
76         // find start
77         while(!strchr(" \n\r\t", text[i1])) i1++;
78         // strip
79         while(strchr(" \n\r\t", text[i1])) i1++;
80         while(strchr(" \n\r\t", text[i2-1])) i2--;
81         if(i2!=len) text[i2]=0;
82         filename = strdup(&text[i1]);
83     }
84     
85     char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
86     yyin = fopen(fullfilename, "rb");
87     if (!yyin) {
88         syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
89     }
90
91     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
92     //BEGIN(INITIAL); keep context
93 }
94
95 char start_of_expression;
96
97 static inline int m(int type)
98 {
99     char*s = malloc(yyleng+1);
100     memcpy(s, yytext, yyleng);
101     s[yyleng]=0;
102
103     NEW(token_t,t);
104     t->type = type;
105     t->text = s;
106     avm2_lval.token = t;
107     return type;
108 }
109
110 static char numberbuf[64];
111 static inline int handlenumber()
112 {
113     if(yyleng>sizeof(numberbuf)-1)
114         syntaxerror("decimal number overflow");
115
116     char*s = numberbuf;
117     memcpy(s, yytext, yyleng);
118     s[yyleng]=0;
119
120     int t;
121     char is_float=0;
122     for(t=0;t<yyleng;t++) {
123         if(yytext[t]=='.') {
124             if(is_float)
125                 syntaxerror("Invalid number");
126             is_float=1;
127         } else if(!strchr("-0123456789", yytext[t])) {
128             syntaxerror("Invalid number");
129         }
130     }
131     if(is_float) {
132         avm2_lval.number_float = atof(s);
133         return T_FLOAT;
134     } 
135     char l = (yytext[0]=='-');
136
137     char*max = l?"1073741824":"2147483647";
138     if(yyleng-l>10)
139         syntaxerror("integer overflow");
140     if(yyleng-l==10) {
141         int t;
142         for(t=0;t<yyleng-l;t++) {
143             if(yytext[l+t]>max[t])
144                 syntaxerror("integer overflow %s > %s", s+l,max);
145             else if(yytext[l+t]<max[t])
146                 break;
147         }
148     }
149     if(yytext[0]=='-') {
150         int v = atoi(s);
151         avm2_lval.number_int = v;
152         if(v>-128)
153             return T_BYTE;
154         else if(v>=-32768)
155             return T_SHORT;
156         else
157             return T_INT;
158     } else {
159         unsigned int v = 0;
160         for(t=0;t<yyleng;t++) {
161             v*=10;
162             v+=yytext[t]-'0';
163         }
164         avm2_lval.number_uint = v;
165         if(v<128)
166             return T_BYTE;
167         else if(v<32768)
168             return T_SHORT;
169         else
170             return T_UINT;
171     }
172 }
173
174 void initialize_scanner();
175 #define YY_USER_INIT initialize_scanner();
176
177 #define c() {countlines(yytext, yyleng);}
178
179 %}
180
181 %s REGEXPOK
182 %s BEGINNING
183
184 NAME     [a-zA-Z_][a-zA-Z0-9_\\]*
185
186 NUMBER   -?[0-9]+(\.[0-9]*)?
187
188 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
189 S        [ \n\r\t]
190 MULTILINE_COMMENT [/][*]([*][^/]|[^*]|[\x00-\x31])*[*]+[/]
191 SINGLELINE_COMMENT \/\/[^\n]*\n
192 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
193 %%
194
195
196 {SINGLELINE_COMMENT}         {c(); /* single line comment */}
197 {MULTILINE_COMMENT}          {c(); /* multi line comment */}
198 [/][*]                       {syntaxerror("syntax error: unterminated comment", yytext);}
199
200 ^include{S}+{STRING}{S}*/\n    {c();handleInclude(yytext, yyleng, 1);}
201 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n    {c();handleInclude(yytext, yyleng, 0);}
202 {STRING}                     {c(); BEGIN(INITIAL);return m(T_STRING);}
203
204 <BEGINNING,REGEXPOK>{
205 {REGEXP}                     {c(); BEGIN(INITIAL);return m(T_REGEXP);} 
206 }
207
208 \xef\xbb\xbf                 {/* utf 8 bom */}
209 {S}                          {c();}
210
211 {NUMBER}                     {c(); BEGIN(INITIAL);return handlenumber();}
212
213 [>][=]                       {return m(T_GE);}
214 [<][=]                       {return m(T_LE);}
215 [-][-]                       {BEGIN(INITIAL);return m(T_MINUSMINUS);}
216 [+][+]                       {BEGIN(INITIAL);return m(T_PLUSPLUS);}
217 ==                           {BEGIN(REGEXPOK);return m(T_EQEQ);}
218 \.\.                         {return m(T_DOTDOT);}
219 \.                           {return m('.');}
220 ::                           {return m(T_COLONCOLON);}
221 :                            {return m(':');}
222 implements                   {return m(KW_IMPLEMENTS);}
223 interface                    {return m(KW_INTERFACE);}
224 namespace                    {return m(KW_NAMESPACE);}
225 protected                    {return m(KW_PROTECTED);}
226 override                     {return m(KW_OVERRIDE);}
227 internal                     {return m(KW_INTERNAL);}
228 function                     {return m(KW_FUNCTION);}
229 package                      {return m(KW_PACKAGE);}
230 private                      {return m(KW_PRIVATE);}
231 Boolean                      {return m(KW_BOOLEAN);}
232 dynamic                      {return m(KW_DYNAMIC);}
233 extends                      {return m(KW_EXTENDS);}
234 public                       {return m(KW_PUBLIC);}
235 native                       {return m(KW_NATIVE);}
236 static                       {return m(KW_STATIC);}
237 import                       {return m(KW_IMPORT);}
238 Number                       {return m(KW_NUMBER);}
239 class                        {return m(KW_CLASS);}
240 const                        {return m(KW_CONST);}
241 final                        {return m(KW_FINAL);}
242 False                        {return m(KW_FALSE);}
243 True                         {return m(KW_TRUE);}
244 uint                         {return m(KW_UINT);}
245 null                         {return m(KW_NULL);}
246 use                          {return m(KW_USE);}
247 int                          {return m(KW_INT);}
248 new                          {return m(KW_NEW);}
249 get                          {return m(KW_GET);}
250 for                          {return m(KW_FOR);}
251 set                          {return m(KW_SET);}
252 var                          {return m(KW_VAR);}
253 is                           {return m(KW_IS) ;}
254 as                           {return m(KW_AS);}
255 {NAME}                       {c();BEGIN(INITIAL);return m(T_IDENTIFIER);}
256
257 [+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
258 [\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
259
260 .                            {char c1=yytext[0];
261                               char buf[128];
262                               buf[0] = yytext[0];
263                               int t;
264                               for(t=1;t<128;t++) {
265                                   char c = buf[t]=input();
266                                   if(c=='\n' || c==EOF)  {
267                                       buf[t] = 0;
268                                       break;
269                                   }
270                               }
271                               if(c1>='0' && c1<='9')
272                                   syntaxerror("syntax error: %s (identifiers must not start with a digit)");
273                               else
274                                   syntaxerror("syntax error: %s", buf);
275                               printf("\n");
276                               exit(1);
277                               yyterminate();
278                              }
279 <<EOF>>                      {c();
280                               void*b = leave_file();
281                               if (!b) {
282                                  yyterminate();
283                                  yy_delete_buffer(YY_CURRENT_BUFFER);
284                                  return m(T_EOF);
285                               } else {
286                                   yy_delete_buffer(YY_CURRENT_BUFFER);
287                                   yy_switch_to_buffer(b);
288                               }
289                              }
290
291 %%
292
293 int yywrap()
294 {
295     return 1;
296 }
297
298 static char mbuf[256];
299 char*token2string(token_t*t)
300 {
301     int nr=t->type;
302     if(nr==T_STRING)     return "<string>";
303     else if(nr==T_INT)     return "<int>";
304     else if(nr==T_UINT)     return "<uint>";
305     else if(nr==T_FLOAT)     return "<float>";
306     else if(nr==T_REGEXP)     return "REGEXP";
307     else if(nr==T_EOF)        return "***END***";
308     else if(nr==T_GE)         return ">=";
309     else if(nr==T_LE)         return "<=";
310     else if(nr==T_MINUSMINUS) return "--";
311     else if(nr==T_PLUSPLUS)   return "++";
312     else if(nr==KW_IMPLEMENTS) return "implements";
313     else if(nr==KW_INTERFACE)  return "interface";
314     else if(nr==KW_NAMESPACE)  return "namespace";
315     else if(nr==KW_PROTECTED)  return "protected";
316     else if(nr==KW_OVERRIDE)   return "override";
317     else if(nr==KW_INTERNAL)   return "internal";
318     else if(nr==KW_FUNCTION)   return "function";
319     else if(nr==KW_PACKAGE)    return "package";
320     else if(nr==KW_PRIVATE)    return "private";
321     else if(nr==KW_BOOLEAN)    return "Boolean";
322     else if(nr==KW_DYNAMIC)    return "dynamic";
323     else if(nr==KW_EXTENDS)    return "extends";
324     else if(nr==KW_PUBLIC)     return "public";
325     else if(nr==KW_NATIVE)     return "native";
326     else if(nr==KW_STATIC)     return "static";
327     else if(nr==KW_IMPORT)     return "import";
328     else if(nr==KW_NUMBER)     return "number";
329     else if(nr==KW_CLASS)      return "class";
330     else if(nr==KW_CONST)      return "const";
331     else if(nr==KW_FINAL)      return "final";
332     else if(nr==KW_FALSE)      return "False";
333     else if(nr==KW_TRUE)       return "True";
334     else if(nr==KW_UINT)       return "uint";
335     else if(nr==KW_NULL)       return "null";
336     else if(nr==KW_USE)        return "use";
337     else if(nr==KW_INT)        return "int";
338     else if(nr==KW_NEW)        return "new";
339     else if(nr==KW_GET)        return "get";
340     else if(nr==KW_FOR)        return "for";
341     else if(nr==KW_SET)        return "set";
342     else if(nr==KW_VAR)        return "var";
343     else if(nr==KW_IS)         return "is";
344     else if(nr==KW_AS)         return "as";
345     else if(nr==T_IDENTIFIER) {
346         if(strlen(t->text)>sizeof(mbuf)-1)
347             return "ID(...)";
348         sprintf(mbuf, "ID(%s)", t->text);
349         return mbuf;
350     } else {
351         sprintf(mbuf, "%d", nr);
352         return mbuf;
353     }
354 }
355
356 void initialize_scanner()
357 {
358     BEGIN(BEGINNING);
359 }
360