optimizations and bugfixes for finally
[swftools.git] / src / parser.lex
1 %{
2
3 #include <string.h>
4 #include <stdlib.h>
5 #include <stdio.h>
6 #include "../lib/q.h"
7 #include "parser.h"
8 #include "../lib/utf8.h"
9
10 //RVALUE         {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM}
11 //<a>.                {printf("<a>%s\n", yytext);}
12 // %x: exclusive, %s: inclusive
13 char*type_names[] = {"twip","number","command","string","assignment","identifier","label","end"};
14 static int line=1;
15 static int column=1;
16
17 mem_t strings;
18 mem_t tokens;
19
20 static void count(char*text, int len, int condition)
21 {
22     int t;
23     for(t=0;t<len;t++) {
24         if(text[t]=='\n') {
25             line++;
26             column=1;
27         } else {
28             column++;
29         }
30     }
31 }
32
33 static char*prefix = 0;
34
35 static void unescapeString(string_t * tmp)
36 {
37     char *p, *p1;
38     /* fixme - this routine expects the string to be
39        null-terminated */
40
41     for (p1=(char*)tmp->str; (p=strchr(p1, '\\')); p1 = p+1) 
42     {
43         int nr=2;
44         int new=1;
45         switch(p[1])
46         {
47             case '\\': p[0] = '\\'; break;
48             case '"': p[0] = '"'; break;
49             case 'b': p[0] = '\b'; break;
50             case 'f': p[0] = '\f'; break;
51             case 'n': p[0] = '\n'; break;
52             case 'r': p[0] = '\r'; break;
53             case 't': p[0] = '\t'; break;
54             case 'x':  case 'u': {
55                 int max=4;
56                 int num=0;
57                 char*utf8;
58                 char bracket = 0;
59                 if(p[1] == 'u')
60                     max = 6;
61                 if(p[2] == '{')  {
62                     bracket = 1;nr++;max++;
63                 }
64                 while(strchr("0123456789abcdefABCDEF", p[nr]) && (bracket || nr < max)) {
65                     num <<= 4;
66                     if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0';
67                     if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10;
68                     if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10;
69                     nr++;
70                 }
71                 if(bracket && p[nr]=='}') {
72                     bracket = 0;
73                     nr++;
74                 }
75                 utf8 = getUTF8(num);
76                 new = strlen(utf8);
77                 memcpy(p, utf8, new); // do not copy the terminating zero
78                 break;
79             }
80             default:
81                 continue;
82         }
83         tmp->len -= (nr-new); 
84         {
85             int t;
86             char*to=p+new,*from=p+nr;
87             while(*from) {
88                 *to = *from;
89                 to++;
90                 from++;
91             }
92         }
93     }
94 }
95
96 static void store(enum type_t type, int line, int column, char*text, int length)
97 {
98     struct token_t token;
99     string_t tmp;
100     token.type = type;
101     token.line = line;
102     token.column = column;
103     //printf("->%d(%s) %s\n", type, type_names[type], text);fflush(stdout);
104
105     token.text = 0;
106     switch(type) {
107         case END:
108             string_set2(&tmp, "", 0);
109             token.text = (char*)mem_putstring(&strings, tmp);
110         break;
111         case STRING:
112             string_set2(&tmp, text+1, length-2);
113             unescapeString(&tmp);
114             token.text = (char*)mem_putstring(&strings, tmp);
115         break;
116         case TWIP: 
117         case NUMBER: 
118         case IDENTIFIER:
119             string_set2(&tmp, text, length);
120             if(prefix) {
121                 //strcat
122                 token.text = (char*)mem_put(&strings, prefix, strlen(prefix));
123                 mem_putstring(&strings, tmp);
124             } else {
125                 token.text = (char*)mem_putstring(&strings, tmp);
126             }
127             prefix = 0;
128         break;
129         case RAWDATA:
130             string_set2(&tmp, text+1/*:*/, length-5/*.end*/);
131             token.text = (char*)mem_putstring(&strings, tmp);
132         break;
133         case COMMAND:
134             string_set2(&tmp, text+1, length-1);
135             token.text = (char*)mem_putstring(&strings, tmp);
136         break;
137         case ASSIGNMENT: {
138             char*x = &text[length-1];
139             if(x[-1] == '-' || x[-1] == '+')
140                 x--;
141             do{x--;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
142             x++; //first space
143             string_set2(&tmp, text, x-text);
144             token.text = (char*)mem_putstring(&strings, tmp);
145             /*char*y,*x = strchr(text, '=');
146             if(!x) exit(1);
147             y=x;
148             do{y--;} while(*y==32 || *y==10 || *y==13 || *y=='\t');
149             do{x++;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
150             token.text1 = (char*)put(&strings, text, y-text + 1, 1);
151             token.text2 = (char*)put(&strings, x, length-(x-text), 1);*/
152         } break;
153     }
154
155     mem_put(&tokens, &token, sizeof(struct token_t));
156     prefix = 0;
157 }
158
159 #define MAX_INCLUDE_DEPTH 16
160 YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
161 int line_stack[MAX_INCLUDE_DEPTH];
162 int column_stack[MAX_INCLUDE_DEPTH];
163 int include_stack_ptr = 0;
164
165 void handleInclude(char*text, int len)
166 {
167     text+=9;len-=9;
168     while(len >=1 && (text[0] == ' ' || text[0] == '\t')) {
169         text++;len--;
170     }
171     while(len >= 1 && 
172           (text[len-1] == ' ' || 
173            text[len-1] == '\r' || 
174            text[len-1] == '\n')) {
175         len--;
176     }
177     if(len >= 2 && text[0] == '"' && text[len-1] == '"') {
178         text++; len-=2;
179     }
180     text[len] = 0;
181     if(include_stack_ptr >= MAX_INCLUDE_DEPTH) {
182         fprintf( stderr, "Includes nested too deeply" );
183         exit( 1 );
184     }
185     include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
186     line_stack[include_stack_ptr] = line;
187     column_stack[include_stack_ptr] = column;
188     include_stack_ptr++;
189     yyin = fopen(text, "rb");
190     if (!yyin) {
191         fprintf(stderr, "Couldn't open %s\n", text);
192         exit(1);
193     }
194     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
195
196 #ifdef INITIAL
197     BEGIN(INITIAL);
198 #else
199     // best guess
200     BEGIN(0);
201 #endif
202 }
203
204 #define c() {count(yytext, yyleng, YY_START);}
205 #define s(type) {store(type, line, column, yytext, yyleng);}
206 %}
207
208 %s R
209 %x BINARY
210
211 NAME     [a-zA-Z_./](-*[a-zA-Z0-9_./])*
212 TWIP     (-?[0-9]+(\.([0-9]([05])?)?)?)
213 NUMBER   -?[0-9]+(\.[0-9]*)?
214 PERCENT  {NUMBER}%
215 STRING   (\\.|[^\\"\n])*
216 S        [ \n\r\t]
217 RVALUE   \"{STRING}\"|([^ \n\r\t]+)
218
219 %%
220
221 <BINARY>\] {c();BEGIN(0);}
222 <BINARY>.  {c();}
223 <BINARY>\n {c();}
224 {TWIP}/[ \n\r\t]            {s(TWIP);c();BEGIN(0);}
225 {NUMBER}/[ \n\r\t]          {s(NUMBER);c();BEGIN(0);}
226 ^#[^\n]*\n                  {c();}
227 [ \t\r]#[^\n]*\n            {c();}
228 \"{STRING}\"                {s(STRING);c();BEGIN(0);}
229 \"{STRING}$                 {c();printf("unterminated string in line %d: %s\n", line, yytext);exit(1);yyterminate();}
230 {NAME}{S}*\+=               {s(ASSIGNMENT);prefix="<plus>";c();BEGIN(R);}
231 {NAME}{S}*-=                {s(ASSIGNMENT);prefix="<minus>";c();BEGIN(R);}
232 {NAME}{S}*=                 {s(ASSIGNMENT);c();BEGIN(R);}
233 <R>{ /* values which appear only on the right-hand side of assignments, like: x=50% */
234     [^ :\n\t\r]*                    {s(IDENTIFIER);c();BEGIN(0);}
235 }
236 \.include{S}.*\n                    {handleInclude(yytext, yyleng);}
237 \.{NAME}                    {s(COMMAND);c();}
238 :([^.]|\.[^e]|\.e[^n]|\.en[^d]|\.end[^ \n\r\t]|[ \n\r\t])*\.end     {s(RAWDATA);c();}
239 {NAME}                      {s(IDENTIFIER);c();}
240 "["                         {c();BEGIN(BINARY);}
241 {S}                         {c();}
242 .                           {char c,c1=yytext[0];
243                              printf("Syntax error in line %d, %d: %s", line, column, yytext);
244                              while(1) {
245                                  c=input();
246                                  if(c=='\n' || c==EOF) 
247                                      break;
248                                 printf("%c", c);
249                              }
250                              if(c1>='0' && c1<='9')
251                                  printf(" (identifiers must not start with a digit)");
252                              printf("\n");
253                              exit(1);
254                              yyterminate();
255                             }
256 <<EOF>>                     {c();
257                              if ( --include_stack_ptr < 0 ) {
258                                 s(END);
259                                 yyterminate();
260                              } else {
261                                  yy_delete_buffer( YY_CURRENT_BUFFER );
262                                  yy_switch_to_buffer(
263                                       include_stack[include_stack_ptr] );
264                                  column = column_stack[include_stack_ptr];
265                                  line = line_stack[include_stack_ptr];
266                              }
267                             }
268
269 %%
270
271 int yywrap()
272 {
273     return 1;
274 }
275
276 void freeTokens(struct token_t*file)
277 {
278     mem_clear(&strings);
279     mem_clear(&tokens);
280 }
281
282 struct token_t* generateTokens(char*filename)
283 {
284     FILE*fi;
285     int t;
286     struct token_t*result;
287     int num;
288
289     if(!filename)
290         return 0;
291
292     if(!strcmp(filename,"-"))
293         fi = stdin;
294     else
295         fi = fopen(filename, "rb");
296
297     if(!fi) {
298         printf("Couldn't find file %s\n", filename);
299         return 0;
300     }
301     yyin = fi;
302
303     mem_init(&strings);
304     mem_init(&tokens);
305     mem_put(&strings, &t, 1); //hack- make all valid strings start at position >0
306
307     line=1;
308     column=1;
309
310     yylex();
311 #ifdef YY_CURRENT_BUFFER
312     // some newer flex versions require it like this:
313     yy_delete_buffer(YY_CURRENT_BUFFER);
314 #else
315     yy_delete_buffer(yy_current_buffer);
316 #endif
317
318     result = (struct token_t*)tokens.buffer;
319     num = tokens.pos/sizeof(struct token_t);
320
321     for(t=0;t<tokens.pos/sizeof(struct token_t);t++) {
322         if(result[t].text)
323             result[t].text += (int)strings.buffer;
324     }
325
326     if(fi!=stdin)
327         fclose(fi);
328     return result;
329 }
330