moved utf8 stuff to ../lib/utf8.h
[swftools.git] / src / parser.lex
1 %{
2
3 #include <string.h>
4 #include <stdlib.h>
5 #include <stdio.h>
6 #include "../lib/q.h"
7 #include "parser.h"
8 #include "../lib/utf8.h"
9
10 //RVALUE         {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM}
11 //<a>.                {printf("<a>%s\n", yytext);}
12 // %x: exclusive, %s: inclusive
13 char*type_names[] = {"twip","number","command","string","assignment","identifier","label","end"};
14 static int line=1;
15 static int column=1;
16
17 mem_t strings;
18 mem_t tokens;
19
20 static void count(char*text, int len, int condition)
21 {
22     int t;
23     for(t=0;t<len;t++) {
24         if(text[t]=='\n') {
25             line++;
26             column=1;
27         } else {
28             column++;
29         }
30     }
31 }
32
33 static char*prefix = 0;
34
35 static void unescapeString(string_t * tmp)
36 {
37     char *p, *p1;
38     /* fixme - this routine expects the string to be
39        null-terminated */
40
41     for (p1=tmp->str; (p=strchr(p1, '\\')); p1 = p+1) 
42     {
43         int nr=2;
44         int new=1;
45         switch(p[1])
46         {
47             case '\\': p[0] = '\\'; break;
48             case '"': p[0] = '"'; break;
49             case 'b': p[0] = '\b'; break;
50             case 'f': p[0] = '\f'; break;
51             case 'n': p[0] = '\n'; break;
52             case 'r': p[0] = '\r'; break;
53             case 't': p[0] = '\t'; break;
54             case 'x':  case 'u': {
55                 int max=4;
56                 int num=0;
57                 char*utf8;
58                 char bracket = 0;
59                 if(p[1] == 'u')
60                     max = 6;
61                 if(p[2] == '{')  {
62                     bracket = 1;nr++;max++;
63                 }
64                 while(strchr("0123456789abcdefABCDEF", p[nr]) && (bracket || nr < max)) {
65                     num <<= 4;
66                     if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0';
67                     if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10;
68                     if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10;
69                     nr++;
70                 }
71                 if(bracket && p[nr]=='}') {
72                     bracket = 0;
73                     nr++;
74                 }
75                 utf8 = getUTF8(num);
76                 new = strlen(utf8);
77                 memcpy(p, utf8, new); // do not copy the terminating zero
78                 break;
79             }
80             default:
81                 continue;
82         }
83         tmp->len -= (nr-new); 
84         {
85             int t;
86             char*to=p+new,*from=p+nr;
87             while(*from) {
88                 *to = *from;
89                 to++;
90                 from++;
91             }
92         }
93     }
94 }
95
96 static void store(enum type_t type, int line, int column, char*text, int length)
97 {
98     struct token_t token;
99     string_t tmp;
100     token.type = type;
101     token.line = line;
102     token.column = column;
103     //printf("->%d(%s) %s\n", type, type_names[type], text);fflush(stdout);
104
105     token.text = 0;
106     switch(type) {
107         case END:
108             string_set2(&tmp, "", 0);
109             token.text = (char*)mem_putstring(&strings, tmp);
110         break;
111         case STRING:
112             string_set2(&tmp, text+1, length-2);
113             unescapeString(&tmp);
114             token.text = (char*)mem_putstring(&strings, tmp);
115         break;
116         case TWIP: 
117         case NUMBER: 
118         case IDENTIFIER:
119             string_set2(&tmp, text, length);
120             if(prefix) {
121                 //strcat
122                 token.text = (char*)mem_put(&strings, prefix, strlen(prefix));
123                 mem_putstring(&strings, tmp);
124             } else {
125                 token.text = (char*)mem_putstring(&strings, tmp);
126             }
127             prefix = 0;
128         break;
129         case RAWDATA:
130             string_set2(&tmp, text+1/*:*/, length-5/*.end*/);
131             token.text = (char*)mem_putstring(&strings, tmp);
132         break;
133         case COMMAND:
134             string_set2(&tmp, text+1, length-1);
135             token.text = (char*)mem_putstring(&strings, tmp);
136         break;
137         case ASSIGNMENT: {
138             char*x = &text[length-1];
139             if(x[-1] == '-' || x[-1] == '+')
140                 x--;
141             do{x--;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
142             x++; //first space
143             string_set2(&tmp, text, x-text);
144             token.text = (char*)mem_putstring(&strings, tmp);
145             /*char*y,*x = strchr(text, '=');
146             if(!x) exit(1);
147             y=x;
148             do{y--;} while(*y==32 || *y==10 || *y==13 || *y=='\t');
149             do{x++;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
150             token.text1 = (char*)put(&strings, text, y-text + 1, 1);
151             token.text2 = (char*)put(&strings, x, length-(x-text), 1);*/
152         } break;
153     }
154
155     mem_put(&tokens, &token, sizeof(struct token_t));
156     prefix = 0;
157 }
158
159 #define MAX_INCLUDE_DEPTH 16
160 YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
161 int line_stack[MAX_INCLUDE_DEPTH];
162 int column_stack[MAX_INCLUDE_DEPTH];
163 int include_stack_ptr = 0;
164
165 void handleInclude(char*text, int len)
166 {
167     text+=9;len-=9;
168     while(len >=1 && (text[0] == ' ' || text[0] == '\t')) {
169         text++;len--;
170     }
171     while(len >= 1 && (text[len-1] == ' ' || text[len-1] == '\n')) {
172         len--;
173     }
174     if(len >= 2 && text[0] == '"' && text[len-1] == '"') {
175         text++; len-=2;
176     }
177     text[len] = 0;
178     if(include_stack_ptr >= MAX_INCLUDE_DEPTH) {
179         fprintf( stderr, "Includes nested too deeply" );
180         exit( 1 );
181     }
182     include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
183     line_stack[include_stack_ptr] = line;
184     column_stack[include_stack_ptr] = column;
185     include_stack_ptr++;
186     yyin = fopen(text, "rb");
187     if (!yyin) {
188         fprintf(stderr, "Couldn't open %s\n", text);
189         exit(1);
190     }
191     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
192
193 #ifdef INITIAL
194     BEGIN(INITIAL);
195 #else
196     // best guess
197     BEGIN(0);
198 #endif
199 }
200
201 #define c() {count(yytext, yyleng, YY_START);}
202 #define s(type) {store(type, line, column, yytext, yyleng);}
203 %}
204
205 %s R
206 %x BINARY
207
208 NAME     [a-zA-Z_./](-*[a-zA-Z0-9_./])*
209 TWIP     (-?[0-9]+(\.([0-9]([05])?)?)?)
210 NUMBER   -?[0-9]+(\.[0-9]*)?
211 PERCENT  {NUMBER}%
212 STRING   (\\.|[^\\"\n])*
213 S        [ \n\r\t]
214 RVALUE   \"{STRING}\"|([^ \n\r\t]+)
215
216 %%
217
218 <BINARY>\] {c();BEGIN(0);}
219 <BINARY>.  {c();}
220 <BINARY>\n {c();}
221 {TWIP}/[ \n\r\t]            {s(TWIP);c();BEGIN(0);}
222 {NUMBER}/[ \n\r\t]          {s(NUMBER);c();BEGIN(0);}
223 ^#[^\n]*\n                  {c();}
224 [ \t\r]#[^\n]*\n            {c();}
225 \"{STRING}\"                {s(STRING);c();BEGIN(0);}
226 \"{STRING}$                 {c();printf("unterminated string in line %d: %s\n", line, yytext);exit(1);yyterminate();}
227 {NAME}{S}*\+=               {s(ASSIGNMENT);prefix="<plus>";c();BEGIN(R);}
228 {NAME}{S}*-=                {s(ASSIGNMENT);prefix="<minus>";c();BEGIN(R);}
229 {NAME}{S}*=                 {s(ASSIGNMENT);c();BEGIN(R);}
230 <R>{ /* values which appear only on the right-hand side of assignments, like: x=50% */
231     [^ :\n\t\r]*                    {s(IDENTIFIER);c();BEGIN(0);}
232 }
233 \.include{S}.*\n                    {handleInclude(yytext, yyleng);}
234 \.{NAME}                    {s(COMMAND);c();}
235 :([^.]|\.[^e]|\.e[^n]|\.en[^d]|\.end[^ \n\r\t]|[ \n\r\t])*\.end     {s(RAWDATA);c();}
236 {NAME}                      {s(IDENTIFIER);c();}
237 "["                         {c();BEGIN(BINARY);}
238 {S}                         {c();}
239 .                           {char c,c1=yytext[0];
240                              printf("Syntax error in line %d, %d: %s", line, column, yytext);
241                              while(1) {
242                                  c=input();
243                                  if(c=='\n' || c==EOF) 
244                                      break;
245                                 printf("%c", c);
246                              }
247                              if(c1>='0' && c1<='9')
248                                  printf(" (identifiers must not start with a digit)");
249                              printf("\n");
250                              exit(1);
251                              yyterminate();
252                             }
253 <<EOF>>                     {c();
254                              if ( --include_stack_ptr < 0 ) {
255                                 s(END);
256                                 yyterminate();
257                              } else {
258                                  yy_delete_buffer( YY_CURRENT_BUFFER );
259                                  yy_switch_to_buffer(
260                                       include_stack[include_stack_ptr] );
261                                  column = column_stack[include_stack_ptr];
262                                  line = line_stack[include_stack_ptr];
263                              }
264                             }
265
266 %%
267
268 int yywrap()
269 {
270     return 1;
271 }
272
273 void freeTokens(struct token_t*file)
274 {
275     mem_clear(&strings);
276     mem_clear(&tokens);
277 }
278
279 struct token_t* generateTokens(char*filename)
280 {
281     FILE*fi;
282     int t;
283     struct token_t*result;
284     int num;
285
286     if(!filename)
287         return 0;
288
289     if(!strcmp(filename,"-"))
290         fi = stdin;
291     else
292         fi = fopen(filename, "rb");
293
294     if(!fi) {
295         printf("Couldn't find file %s\n", filename);
296         return 0;
297     }
298     yyin = fi;
299
300     mem_init(&strings);
301     mem_init(&tokens);
302     mem_put(&strings, &t, 1); //hack- make all valid strings start at position >0
303
304     line=1;
305     column=1;
306
307     yylex();
308 #ifdef YY_CURRENT_BUFFER
309     // some newer flex versions require it like this:
310     yy_delete_buffer(YY_CURRENT_BUFFER);
311 #else
312     yy_delete_buffer(yy_current_buffer);
313 #endif
314
315     result = (struct token_t*)tokens.buffer;
316     num = tokens.pos/sizeof(struct token_t);
317
318     for(t=0;t<tokens.pos/sizeof(struct token_t);t++) {
319         if(result[t].text)
320             result[t].text += (int)strings.buffer;
321     }
322
323     if(fi!=stdin)
324         fclose(fi);
325     return result;
326 }
327