fixed some bugs in handlenumber()
[swftools.git] / lib / as3 / tokenizer.lex
1 %{
2 #include <string.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <stdarg.h>
6 #include "../utf8.h"
7 #include "tokenizer.h"
8 #include "files.h"
9
10 static void countlines(char*text, int len) {
11     int t;
12     for(t=0;t<len;t++) {
13         if(text[t]=='\n') {
14             current_line++;
15             current_column=0;
16         } else {
17             current_column++;
18         }
19     }
20 }
21
22 static int verbose = 1;
23 static void dbg(const char*format, ...)
24 {
25     char buf[1024];
26     int l;
27     va_list arglist;
28     if(!verbose)
29         return;
30     va_start(arglist, format);
31     vsprintf(buf, format, arglist);
32     va_end(arglist);
33     l = strlen(buf);
34     while(l && buf[l-1]=='\n') {
35         buf[l-1] = 0;
36         l--;
37     }
38     printf("(tokenizer) ");
39     printf("%s\n", buf);
40     fflush(stdout);
41 }
42
43 void syntaxerror(const char*format, ...)
44 {
45     char buf[1024];
46     int l;
47     va_list arglist;
48     if(!verbose)
49         return;
50     va_start(arglist, format);
51     vsprintf(buf, format, arglist);
52     va_end(arglist);
53     fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename, current_line, current_column, buf);
54     fflush(stderr);
55     exit(1);
56 }
57
58
59 #ifndef YY_CURRENT_BUFFER
60 #define YY_CURRENT_BUFFER yy_current_buffer
61 #endif
62
63 void handleInclude(char*text, int len, char quotes)
64 {
65     char*filename = 0;
66     if(quotes) {
67         char*p1 = strchr(text, '"');
68         char*p2 = strrchr(text, '"');
69         if(!p1 || !p2 || p1==p2) {
70             syntaxerror("Invalid include in line %d\n", current_line);
71         }
72         *p2 = 0;
73         filename = strdup(p1+1);
74     } else {
75         int i1=0,i2=len;
76         // find start
77         while(!strchr(" \n\r\t", text[i1])) i1++;
78         // strip
79         while(strchr(" \n\r\t", text[i1])) i1++;
80         while(strchr(" \n\r\t", text[i2-1])) i2--;
81         if(i2!=len) text[i2]=0;
82         filename = strdup(&text[i1]);
83     }
84     
85     char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
86     yyin = fopen(fullfilename, "rb");
87     if (!yyin) {
88         syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
89     }
90
91     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
92     //BEGIN(INITIAL); keep context
93 }
94
95 char start_of_expression;
96
97 static inline int m(int type)
98 {
99     char*s = malloc(yyleng+1);
100     memcpy(s, yytext, yyleng);
101     s[yyleng]=0;
102
103     NEW(token_t,t);
104     t->type = type;
105     t->text = s;
106     avm2_lval.token = t;
107     return type;
108 }
109
110 static char numberbuf[64];
111 static inline int handlenumber()
112 {
113     if(yyleng>sizeof(numberbuf)-1)
114         syntaxerror("decimal number overflow");
115
116     char*s = numberbuf;
117     memcpy(s, yytext, yyleng);
118     s[yyleng]=0;
119
120     int t;
121     char is_float=0;
122     for(t=0;t<yyleng;t++) {
123         if(yytext[t]=='.') {
124             if(is_float)
125                 syntaxerror("Invalid number");
126             is_float=1;
127         } else if(!strchr("-0123456789", yytext[t])) {
128             syntaxerror("Invalid number");
129         }
130     }
131     if(is_float) {
132         avm2_lval.number_float = atof(s);
133         return T_FLOAT;
134     } 
135     char l = (yytext[0]=='-');
136
137     char*max = l?"1073741824":"2147483647";
138     if(yyleng-l>10)
139         syntaxerror("integer overflow");
140     if(yyleng-l==10) {
141         int t;
142         for(t=0;t<yyleng-l;t++) {
143             if(yytext[l+t]>max[t])
144                 syntaxerror("integer overflow %s > %s", s+l,max);
145             else if(yytext[l+t]<max[t])
146                 break;
147         }
148     }
149     if(yytext[0]=='-') {
150         avm2_lval.number_int = atoi(s);
151         return T_INT;
152     } else {
153         unsigned int v = 0;
154         for(t=0;t<yyleng;t++) {
155             v*=10;
156             v+=yytext[t]-'0';
157         }
158         avm2_lval.number_uint = v;
159         if(v<256)
160             return T_BYTE;
161         /* useless- numbers are usually smaller if stored in the constant pool
162           else if(v<0x80000000u)
163             return T_SHORT;*/
164         else
165             return T_UINT;
166     }
167 }
168
169 void initialize_scanner();
170 #define YY_USER_INIT initialize_scanner();
171
172 #define c() {countlines(yytext, yyleng);}
173
174 %}
175
176 %s REGEXPOK
177 %s BEGINNING
178
179 NAME     [a-zA-Z_][a-zA-Z0-9_\\]*
180
181 NUMBER   -?[0-9]+(\.[0-9]*)?
182
183 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
184 S        [ \n\r\t]
185 MULTILINE_COMMENT [/][*]([*][^/]|[^*]|[\x00-\x31])*[*]+[/]
186 SINGLELINE_COMMENT \/\/[^\n]*\n
187 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
188 %%
189
190
191 {SINGLELINE_COMMENT}         {c(); /* single line comment */}
192 {MULTILINE_COMMENT}          {c(); /* multi line comment */}
193 [/][*]                       {syntaxerror("syntax error: unterminated comment", yytext);}
194
195 ^include{S}+{STRING}{S}*/\n    {c();handleInclude(yytext, yyleng, 1);}
196 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n    {c();handleInclude(yytext, yyleng, 0);}
197 {STRING}                     {c(); BEGIN(INITIAL);return m(T_STRING);}
198
199 <BEGINNING,REGEXPOK>{
200 {REGEXP}                     {c(); BEGIN(INITIAL);return m(T_REGEXP);} 
201 }
202
203 \xef\xbb\xbf                 {/* utf 8 bom */}
204 {S}                          {c();}
205
206 {NUMBER}                     {c(); BEGIN(INITIAL);return handlenumber();}
207
208 [>][=]                       {return m(T_GE);}
209 [<][=]                       {return m(T_LE);}
210 [-][-]                       {BEGIN(INITIAL);return m(T_MINUSMINUS);}
211 [+][+]                       {BEGIN(INITIAL);return m(T_PLUSPLUS);}
212 ==                           {BEGIN(REGEXPOK);return m(T_EQEQ);}
213 \.\.                         {return m(T_DOTDOT);}
214 \.                           {return m('.');}
215 ::                           {return m(T_COLONCOLON);}
216 :                            {return m(':');}
217 implements                   {return m(KW_IMPLEMENTS);}
218 interface                    {return m(KW_INTERFACE);}
219 namespace                    {return m(KW_NAMESPACE);}
220 protected                    {return m(KW_PROTECTED);}
221 override                     {return m(KW_OVERRIDE);}
222 internal                     {return m(KW_INTERNAL);}
223 function                     {return m(KW_FUNCTION);}
224 package                      {return m(KW_PACKAGE);}
225 private                      {return m(KW_PRIVATE);}
226 Boolean                      {return m(KW_BOOLEAN);}
227 dynamic                      {return m(KW_DYNAMIC);}
228 extends                      {return m(KW_EXTENDS);}
229 public                       {return m(KW_PUBLIC);}
230 native                       {return m(KW_NATIVE);}
231 static                       {return m(KW_STATIC);}
232 import                       {return m(KW_IMPORT);}
233 Number                       {return m(KW_NUMBER);}
234 class                        {return m(KW_CLASS);}
235 const                        {return m(KW_CONST);}
236 final                        {return m(KW_FINAL);}
237 False                        {return m(KW_FALSE);}
238 True                         {return m(KW_TRUE);}
239 uint                         {return m(KW_UINT);}
240 null                         {return m(KW_NULL);}
241 use                          {return m(KW_USE);}
242 int                          {return m(KW_INT);}
243 new                          {return m(KW_NEW);}
244 get                          {return m(KW_GET);}
245 for                          {return m(KW_FOR);}
246 set                          {return m(KW_SET);}
247 var                          {return m(KW_VAR);}
248 is                           {return m(KW_IS) ;}
249 as                           {return m(KW_AS);}
250 {NAME}                       {c();BEGIN(INITIAL);return m(T_IDENTIFIER);}
251
252 [+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
253 [\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
254
255 .                            {char c1=yytext[0];
256                               char buf[128];
257                               buf[0] = yytext[0];
258                               int t;
259                               for(t=1;t<128;t++) {
260                                   char c = buf[t]=input();
261                                   if(c=='\n' || c==EOF)  {
262                                       buf[t] = 0;
263                                       break;
264                                   }
265                               }
266                               if(c1>='0' && c1<='9')
267                                   syntaxerror("syntax error: %s (identifiers must not start with a digit)");
268                               else
269                                   syntaxerror("syntax error: %s", buf);
270                               printf("\n");
271                               exit(1);
272                               yyterminate();
273                              }
274 <<EOF>>                      {c();
275                               void*b = leave_file();
276                               if (!b) {
277                                  yyterminate();
278                                  yy_delete_buffer(YY_CURRENT_BUFFER);
279                                  return m(T_EOF);
280                               } else {
281                                   yy_delete_buffer(YY_CURRENT_BUFFER);
282                                   yy_switch_to_buffer(b);
283                               }
284                              }
285
286 %%
287
288 int yywrap()
289 {
290     return 1;
291 }
292
293 static char mbuf[256];
294 char*token2string(token_t*t)
295 {
296     int nr=t->type;
297     if(nr==T_STRING)     return "<string>";
298     else if(nr==T_INT)     return "<int>";
299     else if(nr==T_UINT)     return "<uint>";
300     else if(nr==T_FLOAT)     return "<float>";
301     else if(nr==T_REGEXP)     return "REGEXP";
302     else if(nr==T_EOF)        return "***END***";
303     else if(nr==T_GE)         return ">=";
304     else if(nr==T_LE)         return "<=";
305     else if(nr==T_MINUSMINUS) return "--";
306     else if(nr==T_PLUSPLUS)   return "++";
307     else if(nr==KW_IMPLEMENTS) return "implements";
308     else if(nr==KW_INTERFACE)  return "interface";
309     else if(nr==KW_NAMESPACE)  return "namespace";
310     else if(nr==KW_PROTECTED)  return "protected";
311     else if(nr==KW_OVERRIDE)   return "override";
312     else if(nr==KW_INTERNAL)   return "internal";
313     else if(nr==KW_FUNCTION)   return "function";
314     else if(nr==KW_PACKAGE)    return "package";
315     else if(nr==KW_PRIVATE)    return "private";
316     else if(nr==KW_BOOLEAN)    return "Boolean";
317     else if(nr==KW_DYNAMIC)    return "dynamic";
318     else if(nr==KW_EXTENDS)    return "extends";
319     else if(nr==KW_PUBLIC)     return "public";
320     else if(nr==KW_NATIVE)     return "native";
321     else if(nr==KW_STATIC)     return "static";
322     else if(nr==KW_IMPORT)     return "import";
323     else if(nr==KW_NUMBER)     return "number";
324     else if(nr==KW_CLASS)      return "class";
325     else if(nr==KW_CONST)      return "const";
326     else if(nr==KW_FINAL)      return "final";
327     else if(nr==KW_FALSE)      return "False";
328     else if(nr==KW_TRUE)       return "True";
329     else if(nr==KW_UINT)       return "uint";
330     else if(nr==KW_NULL)       return "null";
331     else if(nr==KW_USE)        return "use";
332     else if(nr==KW_INT)        return "int";
333     else if(nr==KW_NEW)        return "new";
334     else if(nr==KW_GET)        return "get";
335     else if(nr==KW_FOR)        return "for";
336     else if(nr==KW_SET)        return "set";
337     else if(nr==KW_VAR)        return "var";
338     else if(nr==KW_IS)         return "is";
339     else if(nr==KW_AS)         return "as";
340     else if(nr==T_IDENTIFIER) {
341         if(strlen(t->text)>sizeof(mbuf)-1)
342             return "ID(...)";
343         sprintf(mbuf, "ID(%s)", t->text);
344         return mbuf;
345     } else {
346         sprintf(mbuf, "%d", nr);
347         return mbuf;
348     }
349 }
350
351 void initialize_scanner()
352 {
353     BEGIN(BEGINNING);
354 }
355