switched to %union
[swftools.git] / lib / as3 / tokenizer.lex
1 %{
2 #include <string.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <stdarg.h>
6 #include "../utf8.h"
7 #include "tokenizer.h"
8 #include "files.h"
9
10 static void countlines(char*text, int len) {
11     int t;
12     for(t=0;t<len;t++) {
13         if(text[t]=='\n') {
14             current_line++;
15             current_column=0;
16         } else {
17             current_column++;
18         }
19     }
20 }
21
22 static int verbose = 1;
23 static void dbg(const char*format, ...)
24 {
25     char buf[1024];
26     int l;
27     va_list arglist;
28     if(!verbose)
29         return;
30     va_start(arglist, format);
31     vsprintf(buf, format, arglist);
32     va_end(arglist);
33     l = strlen(buf);
34     while(l && buf[l-1]=='\n') {
35         buf[l-1] = 0;
36         l--;
37     }
38     printf("(tokenizer) ");
39     printf("%s\n", buf);
40     fflush(stdout);
41 }
42
43 void syntaxerror(const char*format, ...)
44 {
45     char buf[1024];
46     int l;
47     va_list arglist;
48     if(!verbose)
49         return;
50     va_start(arglist, format);
51     vsprintf(buf, format, arglist);
52     va_end(arglist);
53     fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename, current_line, current_column, buf);
54     fflush(stderr);
55     exit(1);
56 }
57
58
59 #ifndef YY_CURRENT_BUFFER
60 #define YY_CURRENT_BUFFER yy_current_buffer
61 #endif
62
63 void handleInclude(char*text, int len, char quotes)
64 {
65     char*filename = 0;
66     if(quotes) {
67         char*p1 = strchr(text, '"');
68         char*p2 = strrchr(text, '"');
69         if(!p1 || !p2 || p1==p2) {
70             syntaxerror("Invalid include in line %d\n", current_line);
71         }
72         *p2 = 0;
73         filename = strdup(p1+1);
74     } else {
75         int i1=0,i2=len;
76         // find start
77         while(!strchr(" \n\r\t", text[i1])) i1++;
78         // strip
79         while(strchr(" \n\r\t", text[i1])) i1++;
80         while(strchr(" \n\r\t", text[i2-1])) i2--;
81         if(i2!=len) text[i2]=0;
82         filename = strdup(&text[i1]);
83     }
84     
85     char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
86     yyin = fopen(fullfilename, "rb");
87     if (!yyin) {
88         syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
89     }
90
91     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
92     //BEGIN(INITIAL); keep context
93 }
94
95 char start_of_expression;
96
97 static inline int m(int type)
98 {
99     char*s = malloc(yyleng+1);
100     memcpy(s, yytext, yyleng);
101     s[yyleng]=0;
102
103     NEW(token_t,t);
104     t->type = type;
105     t->text = s;
106     avm2_lval.token = t;
107     return type;
108 }
109
110 void initialize_scanner();
111 #define YY_USER_INIT initialize_scanner();
112
113 #define c() {countlines(yytext, yyleng);}
114
115 %}
116
117 %s REGEXPOK
118 %s BEGINNING
119
120 NAME     [a-zA-Z_][a-zA-Z0-9_\\]*
121 NUMBER   -?[0-9]+(\.[0-9]*)?
122 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
123 S        [ \n\r\t]
124 MULTILINE_COMMENT [/][*]([*][^/]|[^*]|[\x00-\x31])*[*]+[/]
125 SINGLELINE_COMMENT \/\/[^\n]*\n
126 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
127 %%
128
129
130 {SINGLELINE_COMMENT}         {c(); /* single line comment */}
131 {MULTILINE_COMMENT}          {c(); /* multi line comment */}
132 [/][*]                       {syntaxerror("syntax error: unterminated comment", yytext);}
133
134 ^include{S}+{STRING}{S}*/\n    {c();handleInclude(yytext, yyleng, 1);}
135 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n    {c();handleInclude(yytext, yyleng, 0);}
136 {STRING}                     {c(); return m(T_STRING);BEGIN(INITIAL);}
137
138 <BEGINNING,REGEXPOK>{
139 {REGEXP}                     {c(); return m(T_REGEXP);BEGIN(INITIAL);} 
140 }
141
142 \xef\xbb\xbf                 {/* utf 8 bom */}
143 {S}                          {c();}
144
145 {NUMBER}                     {c();return m(T_NUMBER);BEGIN(INITIAL);}
146 [>][=]                       {return m(T_GE);}
147 [<][=]                       {return m(T_LE);}
148 [-][-]                       {return m(T_MINUSMINUS);BEGIN(INITIAL);}
149 [+][+]                       {return m(T_PLUSPLUS);BEGIN(INITIAL);}
150 ==                           {return m(T_EQEQ);BEGIN(REGEXPOK);}
151 \.\.                         {return m(T_DOTDOT);}
152 \.                           {return m('.');}
153 ::                           {return m(T_COLONCOLON);}
154 :                            {return m(':');}
155 implements                   {return m(T_IMPLEMENTS);}
156 interface                    {return m(T_INTERFACE);}
157 namespace                    {return m(T_NAMESPACE);}
158 protected                    {return m(T_PROTECTED);}
159 override                     {return m(T_OVERRIDE);}
160 internal                     {return m(T_INTERNAL);}
161 function                     {return m(T_FUNCTION);}
162 package                      {return m(T_PACKAGE);}
163 private                      {return m(T_PRIVATE);}
164 Boolean                      {return m(T_BOOLEAN);}
165 dynamic                      {return m(T_DYNAMIC);}
166 extends                      {return m(T_EXTENDS);}
167 public                       {return m(T_PUBLIC);}
168 native                       {return m(T_NATIVE);}
169 static                       {return m(T_STATIC);}
170 import                       {return m(T_IMPORT);}
171 number                       {return m(T_NUMBER);}
172 class                        {return m(T_CLASS);}
173 const                        {return m(T_CONST);}
174 final                        {return m(T_FINAL);}
175 False                        {return m(T_FALSE);}
176 True                         {return m(T_TRUE);}
177 uint                         {return m(T_UINT);}
178 null                         {return m(T_NULL);}
179 use                          {return m(T_USE);}
180 int                          {return m(T_INT);}
181 new                          {return m(T_NEW);}
182 get                          {return m(T_GET);}
183 for                          {return m(T_FOR);}
184 set                          {return m(T_SET);}
185 var                          {return m(T_VAR);}
186 is                           {return m(T_IS) ;}
187 as                           {return m(T_AS);}
188 {NAME}                       {c();BEGIN(INITIAL);return m(T_IDENTIFIER);}
189
190 [+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
191 [\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
192
193 .                            {char c1=yytext[0];
194                               char buf[128];
195                               buf[0] = yytext[0];
196                               int t;
197                               for(t=1;t<128;t++) {
198                                   char c = buf[t]=input();
199                                   if(c=='\n' || c==EOF)  {
200                                       buf[t] = 0;
201                                       break;
202                                   }
203                               }
204                               if(c1>='0' && c1<='9')
205                                   syntaxerror("syntax error: %s (identifiers must not start with a digit)");
206                               else
207                                   syntaxerror("syntax error: %s", buf);
208                               printf("\n");
209                               exit(1);
210                               yyterminate();
211                              }
212 <<EOF>>                      {c();
213                               void*b = leave_file();
214                               if (!b) {
215                                  yyterminate();
216                                  yy_delete_buffer(YY_CURRENT_BUFFER);
217                                  return m(T_EOF);
218                               } else {
219                                   yy_delete_buffer(YY_CURRENT_BUFFER);
220                                   yy_switch_to_buffer(b);
221                               }
222                              }
223
224 %%
225
226 int yywrap()
227 {
228     return 1;
229 }
230
231 static char mbuf[256];
232 char*token2string(token_t*t)
233 {
234     int nr=t->type;
235     if(nr==T_STRING)     return "STRING";
236     else if(nr==T_NUMBER)     return "NUMBER";
237     else if(nr==T_REGEXP)     return "REGEXP";
238     else if(nr==T_EOF)        return "***END***";
239     else if(nr==T_GE)         return ">=";
240     else if(nr==T_LE)         return "<=";
241     else if(nr==T_MINUSMINUS) return "--";
242     else if(nr==T_PLUSPLUS)   return "++";
243     else if(nr==T_IMPLEMENTS) return "implements";
244     else if(nr==T_INTERFACE)  return "interface";
245     else if(nr==T_NAMESPACE)  return "namespace";
246     else if(nr==T_PROTECTED)  return "protected";
247     else if(nr==T_OVERRIDE)   return "override";
248     else if(nr==T_INTERNAL)   return "internal";
249     else if(nr==T_FUNCTION)   return "function";
250     else if(nr==T_PACKAGE)    return "package";
251     else if(nr==T_PRIVATE)    return "private";
252     else if(nr==T_BOOLEAN)    return "Boolean";
253     else if(nr==T_DYNAMIC)    return "dynamic";
254     else if(nr==T_EXTENDS)    return "extends";
255     else if(nr==T_PUBLIC)     return "public";
256     else if(nr==T_NATIVE)     return "native";
257     else if(nr==T_STATIC)     return "static";
258     else if(nr==T_IMPORT)     return "import";
259     else if(nr==T_NUMBER)     return "number";
260     else if(nr==T_CLASS)      return "class";
261     else if(nr==T_CONST)      return "const";
262     else if(nr==T_FINAL)      return "final";
263     else if(nr==T_FALSE)      return "False";
264     else if(nr==T_TRUE)       return "True";
265     else if(nr==T_UINT)       return "uint";
266     else if(nr==T_NULL)       return "null";
267     else if(nr==T_USE)        return "use";
268     else if(nr==T_INT)        return "int";
269     else if(nr==T_NEW)        return "new";
270     else if(nr==T_GET)        return "get";
271     else if(nr==T_FOR)        return "for";
272     else if(nr==T_SET)        return "set";
273     else if(nr==T_VAR)        return "var";
274     else if(nr==T_IS)         return "is";
275     else if(nr==T_AS)         return "as";
276     else if(nr==T_IDENTIFIER) {
277         if(strlen(t->text)>sizeof(mbuf)-1)
278             return "ID(...)";
279         sprintf(mbuf, "ID(%s)", t->text);
280         return mbuf;
281     } else {
282         sprintf(mbuf, "%d", nr);
283         return mbuf;
284     }
285 }
286
287 void initialize_scanner()
288 {
289     BEGIN(BEGINNING);
290 }
291