* fix for newer lex versions
[swftools.git] / src / parser.lex
1 %{
2
3 #include <string.h>
4 #include <stdlib.h>
5 #include <stdio.h>
6 #include "../lib/q.h"
7 #include "parser.h"
8
9 //RVALUE         {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM}
10 //<a>.                {printf("<a>%s\n", yytext);}
11 // %x: exclusive, %s: inclusive
12 char*type_names[] = {"twip","number","command","string","assignment","identifier","label","end"};
13 static int line=1;
14 static int column=1;
15
16 mem_t strings;
17 mem_t tokens;
18
19 static void count(char*text, int len, int condition)
20 {
21     int t;
22     for(t=0;t<len;t++) {
23         if(text[t]=='\n') {
24             line++;
25             column=1;
26         } else {
27             column++;
28         }
29     }
30 }
31
32 static char*prefix = 0;
33
34 static char utf8buf[16];
35 static char* getUTF8(unsigned int charnum)
36 {
37     memset(utf8buf, 0, sizeof(utf8buf));
38
39     if(charnum < 0x80) {
40         utf8buf[0] = charnum;
41         return utf8buf;
42     } else if(charnum <0x800) {
43         /* 0000 0080-0000 07FF   110xxxxx 10xxxxxx */
44         utf8buf[0] = 0xc0 | (charnum >> 6);
45         utf8buf[1] = 0x80 | (charnum & 0x3f);
46         return utf8buf;
47     } else if(charnum < 0x10000) {
48         /* 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx */
49         utf8buf[0] = 0xe0 | (charnum >> 12);
50         utf8buf[1] = 0x80 |((charnum >> 6)&0x3f);
51         utf8buf[2] = 0x80 |((charnum     )&0x3f);
52         return utf8buf;
53     } else if(charnum < 0x200000) {
54         /* 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
55         utf8buf[0] = 0xf0 | (charnum >> 18);
56         utf8buf[1] = 0x80 |((charnum >> 12)&0x3f);
57         utf8buf[2] = 0x80 |((charnum >> 6 )&0x3f);
58         utf8buf[3] = 0x80 |((charnum      )&0x3f);
59         return utf8buf;
60     } else if(charnum < 0x4000000) {
61         /* 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
62         utf8buf[0] = 0xf8 | (charnum >> 24);
63         utf8buf[1] = 0x80 |((charnum >> 18)&0x3f);
64         utf8buf[2] = 0x80 |((charnum >> 12)&0x3f);
65         utf8buf[3] = 0x80 |((charnum >> 6 )&0x3f);
66         utf8buf[4] = 0x80 |((charnum      )&0x3f);
67         return utf8buf;
68     } else if(charnum < 0x80000000) {
69         /* 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx */
70         utf8buf[0] = 0xfc | (charnum >> 30);
71         utf8buf[1] = 0x80 |((charnum >> 24)&0x3f);
72         utf8buf[2] = 0x80 |((charnum >> 18)&0x3f);
73         utf8buf[3] = 0x80 |((charnum >> 12)&0x3f);
74         utf8buf[4] = 0x80 |((charnum >> 6 )&0x3f);
75         utf8buf[5] = 0x80 |((charnum      )&0x3f);
76         return utf8buf;
77     } else {
78         fprintf(stderr, "Illegal character: 0x%08x\n", charnum);
79         return utf8buf;
80     }
81 }
82
83 static void unescapeString(string_t * tmp)
84 {
85     char *p, *p1;
86     /* fixme - this routine expects the string to be
87        null-terminated */
88
89     for (p1=tmp->str; (p=strchr(p1, '\\')); p1 = p+1) 
90     {
91         int nr=2;
92         int new=1;
93         switch(p[1])
94         {
95             case '\\': p[0] = '\\'; break;
96             case '"': p[0] = '"'; break;
97             case 'b': p[0] = '\b'; break;
98             case 'f': p[0] = '\f'; break;
99             case 'n': p[0] = '\n'; break;
100             case 'r': p[0] = '\r'; break;
101             case 't': p[0] = '\t'; break;
102             case 'x':  case 'u': {
103                 int max=4;
104                 int num=0;
105                 char*utf8;
106                 char bracket = 0;
107                 if(p[1] == 'u')
108                     max = 6;
109                 if(p[2] == '{')  {
110                     bracket = 1;nr++;max++;
111                 }
112                 while(strchr("0123456789abcdefABCDEF", p[nr]) && (bracket || nr < max)) {
113                     num <<= 4;
114                     if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0';
115                     if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10;
116                     if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10;
117                     nr++;
118                 }
119                 if(bracket && p[nr]=='}') {
120                     bracket = 0;
121                     nr++;
122                 }
123                 utf8 = getUTF8(num);
124                 new = strlen(utf8);
125                 memcpy(p, utf8, new); // do not copy the terminating zero
126                 break;
127             }
128             default:
129                 continue;
130         }
131         tmp->len -= (nr-new); 
132         {
133             int t;
134             char*to=p+new,*from=p+nr;
135             while(*from) {
136                 *to = *from;
137                 to++;
138                 from++;
139             }
140         }
141     }
142 }
143
144 static void store(enum type_t type, int line, int column, char*text, int length)
145 {
146     struct token_t token;
147     string_t tmp;
148     token.type = type;
149     token.line = line;
150     token.column = column;
151     //printf("->%d(%s) %s\n", type, type_names[type], text);fflush(stdout);
152
153     token.text = 0;
154     switch(type) {
155         case END:
156             string_set2(&tmp, "", 0);
157             token.text = (char*)mem_putstring(&strings, tmp);
158         break;
159         case STRING:
160             string_set2(&tmp, text+1, length-2);
161             unescapeString(&tmp);
162             token.text = (char*)mem_putstring(&strings, tmp);
163         break;
164         case TWIP: 
165         case NUMBER: 
166         case IDENTIFIER:
167             string_set2(&tmp, text, length);
168             if(prefix) {
169                 //strcat
170                 token.text = (char*)mem_put(&strings, prefix, strlen(prefix));
171                 mem_putstring(&strings, tmp);
172             } else {
173                 token.text = (char*)mem_putstring(&strings, tmp);
174             }
175             prefix = 0;
176         break;
177         case RAWDATA:
178             string_set2(&tmp, text+1/*:*/, length-5/*.end*/);
179             token.text = (char*)mem_putstring(&strings, tmp);
180         break;
181         case COMMAND:
182             string_set2(&tmp, text+1, length-1);
183             token.text = (char*)mem_putstring(&strings, tmp);
184         break;
185         case ASSIGNMENT: {
186             char*x = &text[length-1];
187             if(x[-1] == '-' || x[-1] == '+')
188                 x--;
189             do{x--;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
190             x++; //first space
191             string_set2(&tmp, text, x-text);
192             token.text = (char*)mem_putstring(&strings, tmp);
193             /*char*y,*x = strchr(text, '=');
194             if(!x) exit(1);
195             y=x;
196             do{y--;} while(*y==32 || *y==10 || *y==13 || *y=='\t');
197             do{x++;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
198             token.text1 = (char*)put(&strings, text, y-text + 1, 1);
199             token.text2 = (char*)put(&strings, x, length-(x-text), 1);*/
200         } break;
201     }
202
203     mem_put(&tokens, &token, sizeof(struct token_t));
204     prefix = 0;
205 }
206
207 #define MAX_INCLUDE_DEPTH 16
208 YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
209 int line_stack[MAX_INCLUDE_DEPTH];
210 int column_stack[MAX_INCLUDE_DEPTH];
211 int include_stack_ptr = 0;
212
213 void handleInclude(char*text, int len)
214 {
215     text+=9;len-=9;
216     while(len >=1 && (text[0] == ' ' || text[0] == '\t')) {
217         text++;len--;
218     }
219     while(len >= 1 && (text[len-1] == ' ' || text[len-1] == '\n')) {
220         len--;
221     }
222     if(len >= 2 && text[0] == '"' && text[len-1] == '"') {
223         text++; len-=2;
224     }
225     text[len] = 0;
226     if(include_stack_ptr >= MAX_INCLUDE_DEPTH) {
227         fprintf( stderr, "Includes nested too deeply" );
228         exit( 1 );
229     }
230     include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
231     line_stack[include_stack_ptr] = line;
232     column_stack[include_stack_ptr] = column;
233     include_stack_ptr++;
234     yyin = fopen(text, "rb");
235     if (!yyin) {
236         fprintf(stderr, "Couldn't open %s\n", text);
237         exit(1);
238     }
239     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
240
241 #ifdef INITIAL
242     BEGIN(INITIAL);
243 #else
244     // best guess
245     BEGIN(0);
246 #endif
247 }
248
249 #define c() {count(yytext, yyleng, YY_START);}
250 #define s(type) {store(type, line, column, yytext, yyleng);}
251 %}
252
253 %s R
254 %x BINARY
255
256 NAME     [a-zA-Z_./](-*[a-zA-Z0-9_./])*
257 TWIP     (-?[0-9]+(\.([0-9]([05])?)?)?)
258 NUMBER   -?[0-9]+(\.[0-9]*)?
259 PERCENT  {NUMBER}%
260 STRING   (\\.|[^\\"\n])*
261 S        [ \n\r\t]
262 RVALUE   \"{STRING}\"|([^ \n\r\t]+)
263
264 %%
265
266 <BINARY>\] {c();BEGIN(0);}
267 <BINARY>.  {c();}
268 <BINARY>\n {c();}
269 {TWIP}/[ \n\r\t]            {s(TWIP);c();BEGIN(0);}
270 {NUMBER}/[ \n\r\t]          {s(NUMBER);c();BEGIN(0);}
271 ^#[^\n]*\n                  {c();}
272 [ \t\r]#[^\n]*\n            {c();}
273 \"{STRING}\"                {s(STRING);c();BEGIN(0);}
274 \"{STRING}$                 {c();printf("unterminated string in line %d: %s\n", line, yytext);exit(1);yyterminate();}
275 {NAME}{S}*\+=               {s(ASSIGNMENT);prefix="<plus>";c();BEGIN(R);}
276 {NAME}{S}*-=                {s(ASSIGNMENT);prefix="<minus>";c();BEGIN(R);}
277 {NAME}{S}*=                 {s(ASSIGNMENT);c();BEGIN(R);}
278 <R>{ /* values which appear only on the right-hand side of assignments, like: x=50% */
279     [^ :\n\t\r]*                    {s(IDENTIFIER);c();BEGIN(0);}
280 }
281 \.include{S}.*\n                    {handleInclude(yytext, yyleng);}
282 \.{NAME}                    {s(COMMAND);c();}
283 :([^.]|\.[^e]|\.e[^n]|\.en[^d]|\.end[^ \n\r\t]|[ \n\r\t])*\.end     {s(RAWDATA);c();}
284 {NAME}                      {s(IDENTIFIER);c();}
285 "["                         {c();BEGIN(BINARY);}
286 {S}                         {c();}
287 .                           {char c,c1=yytext[0];
288                              printf("Syntax error in line %d, %d: %s", line, column, yytext);
289                              while(1) {
290                                  c=input();
291                                  if(c=='\n' || c==EOF) 
292                                      break;
293                                 printf("%c", c);
294                              }
295                              if(c1>='0' && c1<='9')
296                                  printf(" (identifiers must not start with a digit)");
297                              printf("\n");
298                              exit(1);
299                              yyterminate();
300                             }
301 <<EOF>>                     {c();
302                              if ( --include_stack_ptr < 0 ) {
303                                 s(END);
304                                 yyterminate();
305                              } else {
306                                  yy_delete_buffer( YY_CURRENT_BUFFER );
307                                  yy_switch_to_buffer(
308                                       include_stack[include_stack_ptr] );
309                                  column = column_stack[include_stack_ptr];
310                                  line = line_stack[include_stack_ptr];
311                              }
312                             }
313
314 %%
315
316 int yywrap()
317 {
318     return 1;
319 }
320
321 void freeTokens(struct token_t*file)
322 {
323     mem_clear(&strings);
324     mem_clear(&tokens);
325 }
326
327 struct token_t* generateTokens(char*filename)
328 {
329     FILE*fi = fopen(filename, "rb");
330     int t;
331     struct token_t*result;
332     int num;
333     if(!fi) {
334         printf("Couldn't find file %s\n", filename);
335         return 0;
336     }
337     yyin = fi;
338
339     mem_init(&strings);
340     mem_init(&tokens);
341     mem_put(&strings, &t, 1); //hack- make all valid strings start at position >0
342
343     line=1;
344     column=1;
345
346     yylex();
347 #ifdef YY_CURRENT_BUFFER
348     // some newer flex versions require it like this:
349     yy_delete_buffer(YY_CURRENT_BUFFER);
350 #else
351     yy_delete_buffer(yy_current_buffer);
352 #endif
353
354     result = (struct token_t*)tokens.buffer;
355     num = tokens.pos/sizeof(struct token_t);
356
357     for(t=0;t<tokens.pos/sizeof(struct token_t);t++) {
358         if(result[t].text)
359             result[t].text += (int)strings.buffer;
360     }
361
362     fclose(fi);
363     return result;
364 }
365