initial checkin
[swftools.git] / lib / as3 / tokenizer.lex
1 %{
2 #include <string.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <stdarg.h>
6 #include "../utf8.h"
7 #include "tokenizer.h"
8 #include "files.h"
9
10 static void countlines(char*text, int len) {
11     int t;
12     for(t=0;t<len;t++) {
13         if(text[t]=='\n') {
14             current_line++;
15             current_column=0;
16         } else {
17             current_column++;
18         }
19     }
20 }
21
22 static int verbose = 1;
23 static void dbg(const char*format, ...)
24 {
25     char buf[1024];
26     int l;
27     va_list arglist;
28     if(!verbose)
29         return;
30     va_start(arglist, format);
31     vsprintf(buf, format, arglist);
32     va_end(arglist);
33     l = strlen(buf);
34     while(l && buf[l-1]=='\n') {
35         buf[l-1] = 0;
36         l--;
37     }
38     printf("(tokenizer) ");
39     printf("%s\n", buf);
40     fflush(stdout);
41 }
42
43 void syntaxerror(const char*format, ...)
44 {
45     char buf[1024];
46     int l;
47     va_list arglist;
48     if(!verbose)
49         return;
50     va_start(arglist, format);
51     vsprintf(buf, format, arglist);
52     va_end(arglist);
53     fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename, current_line, current_column, buf);
54     fflush(stderr);
55     exit(1);
56 }
57
58
59 #ifndef YY_CURRENT_BUFFER
60 #define YY_CURRENT_BUFFER yy_current_buffer
61 #endif
62
63 void handleInclude(char*text, int len, char quotes)
64 {
65     char*filename = 0;
66     if(quotes) {
67         char*p1 = strchr(text, '"');
68         char*p2 = strrchr(text, '"');
69         if(!p1 || !p2 || p1==p2) {
70             syntaxerror("Invalid include in line %d\n", current_line);
71         }
72         *p2 = 0;
73         filename = strdup(p1+1);
74     } else {
75         int i1=0,i2=len;
76         // find start
77         while(!strchr(" \n\r\t", text[i1])) i1++;
78         // strip
79         while(strchr(" \n\r\t", text[i1])) i1++;
80         while(strchr(" \n\r\t", text[i2-1])) i2--;
81         if(i2!=len) text[i2]=0;
82         filename = strdup(&text[i1]);
83     }
84     
85     char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
86     yyin = fopen(fullfilename, "rb");
87     if (!yyin) {
88         syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
89     }
90
91     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
92     //BEGIN(INITIAL); keep context
93 }
94
95 static inline int m(int type)
96 {
97     char*s = malloc(yyleng+1);
98     memcpy(s, yytext, yyleng);
99     s[yyleng]=0;
100
101     NEW(token_t,t);
102     t->type = type;
103     t->text = s;
104     avm2_lval = t;
105     return type;
106 }
107
108 void initialize_scanner();
109 #define YY_USER_INIT initialize_scanner();
110
111 #define c() {countlines(yytext, yyleng);}
112
113 %}
114
115 %s REGEXPOK
116 %s BEGINNING
117
118 NAME     [a-zA-Z_][a-zA-Z0-9_\\]*
119 NUMBER   -?[0-9]+(\.[0-9]*)?
120 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
121 S        [ \n\r\t]
122 MULTILINE_COMMENT [/][*]([*][^/]|[^*]|[\x00-\x31])*[*]+[/]
123 SINGLELINE_COMMENT \/\/[^\n]*\n
124 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
125 %%
126
127
128 {SINGLELINE_COMMENT}         {c(); /* single line comment */}
129 {MULTILINE_COMMENT}          {c(); /* multi line comment */}
130 [/][*]                       {syntaxerror("syntax error: unterminated comment", yytext);}
131
132 ^include{S}+{STRING}{S}*/\n    {c();handleInclude(yytext, yyleng, 1);}
133 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n    {c();handleInclude(yytext, yyleng, 0);}
134 {STRING}                     {c(); return m(T_STRING);BEGIN(INITIAL);}
135
136 <BEGINNING,REGEXPOK>{
137 {REGEXP}                     {c(); return m(T_REGEXP);BEGIN(INITIAL);} 
138 }
139
140 \xef\xbb\xbf                 {/* utf 8 bom */}
141 {S}                          {c();}
142
143 {NUMBER}                     {c();return m(T_NUMBER);BEGIN(INITIAL);}
144 [>][=]                       {return m(T_GE);}
145 [<][=]                       {return m(T_LE);}
146 [-][-]                       {return m(T_MINUSMINUS);BEGIN(INITIAL);}
147 [+][+]                       {return m(T_PLUSPLUS);BEGIN(INITIAL);}
148 ==                           {return m(T_EQEQ);BEGIN(REGEXPOK);}
149 \.\.                         {return m(T_DOTDOT);}
150 \.                           {return m('.');}
151 ::                           {return m(T_COLONCOLON);}
152 :                            {return m(':');}
153 implements                   {return m(T_IMPLEMENTS);}
154 interface                    {return m(T_INTERFACE);}
155 namespace                    {return m(T_NAMESPACE);}
156 protected                    {return m(T_PROTECTED);}
157 override                     {return m(T_OVERRIDE);}
158 internal                     {return m(T_INTERNAL);}
159 function                     {return m(T_FUNCTION);}
160 package                      {return m(T_PACKAGE);}
161 private                      {return m(T_PRIVATE);}
162 Boolean                      {return m(T_BOOLEAN);}
163 dynamic                      {return m(T_DYNAMIC);}
164 extends                      {return m(T_EXTENDS);}
165 public                       {return m(T_PUBLIC);}
166 native                       {return m(T_NATIVE);}
167 static                       {return m(T_STATIC);}
168 import                       {return m(T_IMPORT);}
169 number                       {return m(T_NUMBER);}
170 class                        {return m(T_CLASS);}
171 const                        {return m(T_CONST);}
172 final                        {return m(T_FINAL);}
173 False                        {return m(T_FALSE);}
174 True                         {return m(T_TRUE);}
175 uint                         {return m(T_UINT);}
176 null                         {return m(T_NULL);}
177 use                          {return m(T_USE);}
178 int                          {return m(T_INT);}
179 new                          {return m(T_NEW);}
180 get                          {return m(T_GET);}
181 for                          {return m(T_FOR);}
182 set                          {return m(T_SET);}
183 var                          {return m(T_VAR);}
184 is                           {return m(T_IS) ;}
185 as                           {return m(T_AS);}
186 {NAME}                       {c();BEGIN(INITIAL);return m(T_IDENTIFIER);}
187
188 [+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
189 [\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
190
191 .                            {char c1=yytext[0];
192                               char buf[128];
193                               buf[0] = yytext[0];
194                               int t;
195                               for(t=1;t<128;t++) {
196                                   char c = buf[t]=input();
197                                   if(c=='\n' || c==EOF)  {
198                                       buf[t] = 0;
199                                       break;
200                                   }
201                               }
202                               if(c1>='0' && c1<='9')
203                                   syntaxerror("syntax error: %s (identifiers must not start with a digit)");
204                               else
205                                   syntaxerror("syntax error: %s", buf);
206                               printf("\n");
207                               exit(1);
208                               yyterminate();
209                              }
210 <<EOF>>                      {c();
211                               void*b = leave_file();
212                               if (!b) {
213                                  yyterminate();
214                                  yy_delete_buffer(YY_CURRENT_BUFFER);
215                                  return m(T_EOF);
216                               } else {
217                                   yy_delete_buffer(YY_CURRENT_BUFFER);
218                                   yy_switch_to_buffer(b);
219                               }
220                              }
221
222 %%
223
224 int yywrap()
225 {
226     return 1;
227 }
228
229 static char mbuf[256];
230 char*token2string(token_t*t)
231 {
232     int nr=t->type;
233     if(nr==T_STRING)     return "STRING";
234     else if(nr==T_NUMBER)     return "NUMBER";
235     else if(nr==T_REGEXP)     return "REGEXP";
236     else if(nr==T_EOF)        return "***END***";
237     else if(nr==T_GE)         return ">=";
238     else if(nr==T_LE)         return "<=";
239     else if(nr==T_MINUSMINUS) return "--";
240     else if(nr==T_PLUSPLUS)   return "++";
241     else if(nr==T_IMPLEMENTS) return "implements";
242     else if(nr==T_INTERFACE)  return "interface";
243     else if(nr==T_NAMESPACE)  return "namespace";
244     else if(nr==T_PROTECTED)  return "protected";
245     else if(nr==T_OVERRIDE)   return "override";
246     else if(nr==T_INTERNAL)   return "internal";
247     else if(nr==T_FUNCTION)   return "function";
248     else if(nr==T_PACKAGE)    return "package";
249     else if(nr==T_PRIVATE)    return "private";
250     else if(nr==T_BOOLEAN)    return "Boolean";
251     else if(nr==T_DYNAMIC)    return "dynamic";
252     else if(nr==T_EXTENDS)    return "extends";
253     else if(nr==T_PUBLIC)     return "public";
254     else if(nr==T_NATIVE)     return "native";
255     else if(nr==T_STATIC)     return "static";
256     else if(nr==T_IMPORT)     return "import";
257     else if(nr==T_NUMBER)     return "number";
258     else if(nr==T_CLASS)      return "class";
259     else if(nr==T_CONST)      return "const";
260     else if(nr==T_FINAL)      return "final";
261     else if(nr==T_FALSE)      return "False";
262     else if(nr==T_TRUE)       return "True";
263     else if(nr==T_UINT)       return "uint";
264     else if(nr==T_NULL)       return "null";
265     else if(nr==T_USE)        return "use";
266     else if(nr==T_INT)        return "int";
267     else if(nr==T_NEW)        return "new";
268     else if(nr==T_GET)        return "get";
269     else if(nr==T_FOR)        return "for";
270     else if(nr==T_SET)        return "set";
271     else if(nr==T_VAR)        return "var";
272     else if(nr==T_IS)         return "is";
273     else if(nr==T_AS)         return "as";
274     else if(nr==T_IDENTIFIER) {
275         if(strlen(t->text)>sizeof(mbuf)-1)
276             return "ID(...)";
277         sprintf(mbuf, "ID(%s)", t->text);
278         return mbuf;
279     } else {
280         sprintf(mbuf, "%d", nr);
281         return mbuf;
282     }
283 }
284
285 void initialize_scanner()
286 {
287     BEGIN(BEGINNING);
288 }
289