renamed all keywords KW_KEYWORD, added number conversion
[swftools.git] / lib / as3 / tokenizer.lex
1 %{
2 #include <string.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <stdarg.h>
6 #include "../utf8.h"
7 #include "tokenizer.h"
8 #include "files.h"
9
10 static void countlines(char*text, int len) {
11     int t;
12     for(t=0;t<len;t++) {
13         if(text[t]=='\n') {
14             current_line++;
15             current_column=0;
16         } else {
17             current_column++;
18         }
19     }
20 }
21
22 static int verbose = 1;
23 static void dbg(const char*format, ...)
24 {
25     char buf[1024];
26     int l;
27     va_list arglist;
28     if(!verbose)
29         return;
30     va_start(arglist, format);
31     vsprintf(buf, format, arglist);
32     va_end(arglist);
33     l = strlen(buf);
34     while(l && buf[l-1]=='\n') {
35         buf[l-1] = 0;
36         l--;
37     }
38     printf("(tokenizer) ");
39     printf("%s\n", buf);
40     fflush(stdout);
41 }
42
43 void syntaxerror(const char*format, ...)
44 {
45     char buf[1024];
46     int l;
47     va_list arglist;
48     if(!verbose)
49         return;
50     va_start(arglist, format);
51     vsprintf(buf, format, arglist);
52     va_end(arglist);
53     fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename, current_line, current_column, buf);
54     fflush(stderr);
55     exit(1);
56 }
57
58
59 #ifndef YY_CURRENT_BUFFER
60 #define YY_CURRENT_BUFFER yy_current_buffer
61 #endif
62
63 void handleInclude(char*text, int len, char quotes)
64 {
65     char*filename = 0;
66     if(quotes) {
67         char*p1 = strchr(text, '"');
68         char*p2 = strrchr(text, '"');
69         if(!p1 || !p2 || p1==p2) {
70             syntaxerror("Invalid include in line %d\n", current_line);
71         }
72         *p2 = 0;
73         filename = strdup(p1+1);
74     } else {
75         int i1=0,i2=len;
76         // find start
77         while(!strchr(" \n\r\t", text[i1])) i1++;
78         // strip
79         while(strchr(" \n\r\t", text[i1])) i1++;
80         while(strchr(" \n\r\t", text[i2-1])) i2--;
81         if(i2!=len) text[i2]=0;
82         filename = strdup(&text[i1]);
83     }
84     
85     char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
86     yyin = fopen(fullfilename, "rb");
87     if (!yyin) {
88         syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
89     }
90
91     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
92     //BEGIN(INITIAL); keep context
93 }
94
95 char start_of_expression;
96
97 static inline int m(int type)
98 {
99     char*s = malloc(yyleng+1);
100     memcpy(s, yytext, yyleng);
101     s[yyleng]=0;
102
103     NEW(token_t,t);
104     t->type = type;
105     t->text = s;
106     avm2_lval.token = t;
107     return type;
108 }
109
110 static char numberbuf[64];
111 static inline int handlenumber()
112 {
113     if(yyleng>sizeof(numberbuf)-1)
114         syntaxerror("decimal number overflow");
115
116     char*s = numberbuf;
117     memcpy(s, yytext, yyleng);
118     s[yyleng]=0;
119
120     int t;
121     char is_float=0;
122     for(t=0;t<yyleng;t++) {
123         if(yytext[t]=='.') {
124             is_float=1;
125         } 
126         if(!strchr("0123456789", yytext[t])) {
127             syntaxerror("Invalid number");
128         }
129     }
130     if(is_float) {
131         avm2_lval.number_float = atof(s);
132         return T_FLOAT;
133     } 
134     int l=0;
135     if(yytext[0]=='-')
136         l++;
137
138     char*max = l?"2147483648":"4294967296";
139     if(yyleng>10)
140         syntaxerror("integer overflow");
141     if(yyleng==10) {
142         int t;
143         for(t=0;t<yyleng-l;t++) {
144             if(yytext[l+t]>max[t])
145                 syntaxerror("integer overflow");
146             else if(yytext[l+t]<max[t])
147                 break;
148         }
149     }
150     if(yytext[0]=='-') {
151         avm2_lval.number_int = atoi(s);
152         return T_INT;
153     } else {
154         unsigned int v = atoi(s);
155         avm2_lval.number_uint = v;
156         if(v<256)
157             return T_BYTE;
158         else if(v<0x80000000)
159             return T_SHORT;
160         else
161             return T_UINT;
162     }
163 }
164
165 void initialize_scanner();
166 #define YY_USER_INIT initialize_scanner();
167
168 #define c() {countlines(yytext, yyleng);}
169
170 %}
171
172 %s REGEXPOK
173 %s BEGINNING
174
175 NAME     [a-zA-Z_][a-zA-Z0-9_\\]*
176
177 NUMBER   -?[0-9]+(\.[0-9]*)?
178
179 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
180 S        [ \n\r\t]
181 MULTILINE_COMMENT [/][*]([*][^/]|[^*]|[\x00-\x31])*[*]+[/]
182 SINGLELINE_COMMENT \/\/[^\n]*\n
183 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
184 %%
185
186
187 {SINGLELINE_COMMENT}         {c(); /* single line comment */}
188 {MULTILINE_COMMENT}          {c(); /* multi line comment */}
189 [/][*]                       {syntaxerror("syntax error: unterminated comment", yytext);}
190
191 ^include{S}+{STRING}{S}*/\n    {c();handleInclude(yytext, yyleng, 1);}
192 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n    {c();handleInclude(yytext, yyleng, 0);}
193 {STRING}                     {c(); BEGIN(INITIAL);return m(T_STRING);}
194
195 <BEGINNING,REGEXPOK>{
196 {REGEXP}                     {c(); BEGIN(INITIAL);return m(T_REGEXP);} 
197 }
198
199 \xef\xbb\xbf                 {/* utf 8 bom */}
200 {S}                          {c();}
201
202 {NUMBER}                     {c(); BEGIN(INITIAL);return handlenumber();}
203
204 [>][=]                       {return m(T_GE);}
205 [<][=]                       {return m(T_LE);}
206 [-][-]                       {BEGIN(INITIAL);return m(T_MINUSMINUS);}
207 [+][+]                       {BEGIN(INITIAL);return m(T_PLUSPLUS);}
208 ==                           {BEGIN(REGEXPOK);return m(T_EQEQ);}
209 \.\.                         {return m(T_DOTDOT);}
210 \.                           {return m('.');}
211 ::                           {return m(T_COLONCOLON);}
212 :                            {return m(':');}
213 implements                   {return m(KW_IMPLEMENTS);}
214 interface                    {return m(KW_INTERFACE);}
215 namespace                    {return m(KW_NAMESPACE);}
216 protected                    {return m(KW_PROTECTED);}
217 override                     {return m(KW_OVERRIDE);}
218 internal                     {return m(KW_INTERNAL);}
219 function                     {return m(KW_FUNCTION);}
220 package                      {return m(KW_PACKAGE);}
221 private                      {return m(KW_PRIVATE);}
222 Boolean                      {return m(KW_BOOLEAN);}
223 dynamic                      {return m(KW_DYNAMIC);}
224 extends                      {return m(KW_EXTENDS);}
225 public                       {return m(KW_PUBLIC);}
226 native                       {return m(KW_NATIVE);}
227 static                       {return m(KW_STATIC);}
228 import                       {return m(KW_IMPORT);}
229 Number                       {return m(KW_NUMBER);}
230 class                        {return m(KW_CLASS);}
231 const                        {return m(KW_CONST);}
232 final                        {return m(KW_FINAL);}
233 False                        {return m(KW_FALSE);}
234 True                         {return m(KW_TRUE);}
235 uint                         {return m(KW_UINT);}
236 null                         {return m(KW_NULL);}
237 use                          {return m(KW_USE);}
238 int                          {return m(KW_INT);}
239 new                          {return m(KW_NEW);}
240 get                          {return m(KW_GET);}
241 for                          {return m(KW_FOR);}
242 set                          {return m(KW_SET);}
243 var                          {return m(KW_VAR);}
244 is                           {return m(KW_IS) ;}
245 as                           {return m(KW_AS);}
246 {NAME}                       {c();BEGIN(INITIAL);return m(T_IDENTIFIER);}
247
248 [+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
249 [\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
250
251 .                            {char c1=yytext[0];
252                               char buf[128];
253                               buf[0] = yytext[0];
254                               int t;
255                               for(t=1;t<128;t++) {
256                                   char c = buf[t]=input();
257                                   if(c=='\n' || c==EOF)  {
258                                       buf[t] = 0;
259                                       break;
260                                   }
261                               }
262                               if(c1>='0' && c1<='9')
263                                   syntaxerror("syntax error: %s (identifiers must not start with a digit)");
264                               else
265                                   syntaxerror("syntax error: %s", buf);
266                               printf("\n");
267                               exit(1);
268                               yyterminate();
269                              }
270 <<EOF>>                      {c();
271                               void*b = leave_file();
272                               if (!b) {
273                                  yyterminate();
274                                  yy_delete_buffer(YY_CURRENT_BUFFER);
275                                  return m(T_EOF);
276                               } else {
277                                   yy_delete_buffer(YY_CURRENT_BUFFER);
278                                   yy_switch_to_buffer(b);
279                               }
280                              }
281
282 %%
283
284 int yywrap()
285 {
286     return 1;
287 }
288
289 static char mbuf[256];
290 char*token2string(token_t*t)
291 {
292     int nr=t->type;
293     if(nr==T_STRING)     return "<string>";
294     else if(nr==T_INT)     return "<int>";
295     else if(nr==T_UINT)     return "<uint>";
296     else if(nr==T_FLOAT)     return "<float>";
297     else if(nr==T_REGEXP)     return "REGEXP";
298     else if(nr==T_EOF)        return "***END***";
299     else if(nr==T_GE)         return ">=";
300     else if(nr==T_LE)         return "<=";
301     else if(nr==T_MINUSMINUS) return "--";
302     else if(nr==T_PLUSPLUS)   return "++";
303     else if(nr==KW_IMPLEMENTS) return "implements";
304     else if(nr==KW_INTERFACE)  return "interface";
305     else if(nr==KW_NAMESPACE)  return "namespace";
306     else if(nr==KW_PROTECTED)  return "protected";
307     else if(nr==KW_OVERRIDE)   return "override";
308     else if(nr==KW_INTERNAL)   return "internal";
309     else if(nr==KW_FUNCTION)   return "function";
310     else if(nr==KW_PACKAGE)    return "package";
311     else if(nr==KW_PRIVATE)    return "private";
312     else if(nr==KW_BOOLEAN)    return "Boolean";
313     else if(nr==KW_DYNAMIC)    return "dynamic";
314     else if(nr==KW_EXTENDS)    return "extends";
315     else if(nr==KW_PUBLIC)     return "public";
316     else if(nr==KW_NATIVE)     return "native";
317     else if(nr==KW_STATIC)     return "static";
318     else if(nr==KW_IMPORT)     return "import";
319     else if(nr==KW_NUMBER)     return "number";
320     else if(nr==KW_CLASS)      return "class";
321     else if(nr==KW_CONST)      return "const";
322     else if(nr==KW_FINAL)      return "final";
323     else if(nr==KW_FALSE)      return "False";
324     else if(nr==KW_TRUE)       return "True";
325     else if(nr==KW_UINT)       return "uint";
326     else if(nr==KW_NULL)       return "null";
327     else if(nr==KW_USE)        return "use";
328     else if(nr==KW_INT)        return "int";
329     else if(nr==KW_NEW)        return "new";
330     else if(nr==KW_GET)        return "get";
331     else if(nr==KW_FOR)        return "for";
332     else if(nr==KW_SET)        return "set";
333     else if(nr==KW_VAR)        return "var";
334     else if(nr==KW_IS)         return "is";
335     else if(nr==KW_AS)         return "as";
336     else if(nr==T_IDENTIFIER) {
337         if(strlen(t->text)>sizeof(mbuf)-1)
338             return "ID(...)";
339         sprintf(mbuf, "ID(%s)", t->text);
340         return mbuf;
341     } else {
342         sprintf(mbuf, "%d", nr);
343         return mbuf;
344     }
345 }
346
347 void initialize_scanner()
348 {
349     BEGIN(BEGINNING);
350 }
351