initial revision
[swftools.git] / src / parser.lex
1 %{
2
3 #include <string.h>
4 #include <stdlib.h>
5 #include <stdio.h>
6 #include "../lib/q.h"
7 #include "parser.h"
8
9 //RVALUE         {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM}
10 //<a>.                {printf("<a>%s\n", yytext);}
11 // %x: exclusive, %s: inclusive
12 char*type_names[] = {"twip","number","command","string","assignment","identifier","label","end"};
13 static int line=1;
14 static int column=1;
15
16 mem_t strings;
17 mem_t tokens;
18
19 static void count(char*text, int len, int condition)
20 {
21     int t;
22     for(t=0;t<len;t++) {
23         if(text[t]=='\n') {
24             line++;
25             column=1;
26         } else {
27             column++;
28         }
29     }
30 }
31
32 static char*prefix = 0;
33
34 static void unescapeString(string_t * tmp)
35 {
36     char *p, *p1;
37     /* fixme - this routine expects the string to be
38        null-terminated */
39
40     for (p1=tmp->str; (p=strchr(p1, '\\')); p1 = p+1) 
41     {
42         switch(p[1])
43         {
44             case '\\': p[1] = '\\'; tmp->len--; break;
45             case '"': p[1] = '"'; tmp->len--; break;
46             case 'b': p[1] = '\b'; tmp->len--; break;
47             case 'f': p[1] = '\f'; tmp->len--; break;
48             case 'n': p[1] = '\n'; tmp->len--; break;
49             case 'r': p[1] = '\r'; tmp->len--; break;
50             case 't': p[1] = '\t'; tmp->len--; break;
51             default:
52                 continue;
53         }
54         strcpy(p, p+1);
55     }
56 }
57
58 static void store(enum type_t type, int line, int column, char*text, int length)
59 {
60     struct token_t token;
61     string_t tmp;
62     token.type = type;
63     token.line = line;
64     token.column = column;
65     //printf("->%d(%s) %s\n", type, type_names[type], text);fflush(stdout);
66
67     token.text = 0;
68     switch(type) {
69         case END:
70             string_set2(&tmp, "", 0);
71             token.text = (char*)mem_putstring(&strings, tmp);
72         break;
73         case STRING:
74             string_set2(&tmp, text+1, length-2);
75             unescapeString(&tmp);
76             token.text = (char*)mem_putstring(&strings, tmp);
77         break;
78         case TWIP: 
79         case NUMBER: 
80         case IDENTIFIER:
81             string_set2(&tmp, text, length);
82             if(prefix) {
83                 //strcat
84                 token.text = (char*)mem_put(&strings, prefix, strlen(prefix));
85                 mem_putstring(&strings, tmp);
86             } else {
87                 token.text = (char*)mem_putstring(&strings, tmp);
88             }
89             prefix = 0;
90         break;
91         case RAWDATA:
92             string_set2(&tmp, text+1/*:*/, length-5/*.end*/);
93             token.text = (char*)mem_putstring(&strings, tmp);
94         break;
95         case COMMAND:
96             string_set2(&tmp, text+1, length-1);
97             token.text = (char*)mem_putstring(&strings, tmp);
98         break;
99         case ASSIGNMENT: {
100             char*x = &text[length-1];
101             if(x[-1] == '-' || x[-1] == '+')
102                 x--;
103             do{x--;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
104             x++; //first space
105             string_set2(&tmp, text, x-text);
106             token.text = (char*)mem_putstring(&strings, tmp);
107             /*char*y,*x = strchr(text, '=');
108             if(!x) exit(1);
109             y=x;
110             do{y--;} while(*y==32 || *y==10 || *y==13 || *y=='\t');
111             do{x++;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
112             token.text1 = (char*)put(&strings, text, y-text + 1, 1);
113             token.text2 = (char*)put(&strings, x, length-(x-text), 1);*/
114         } break;
115     }
116
117     mem_put(&tokens, &token, sizeof(struct token_t));
118     prefix = 0;
119 }
120
121 #define MAX_INCLUDE_DEPTH 16
122 YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
123 int line_stack[MAX_INCLUDE_DEPTH];
124 int column_stack[MAX_INCLUDE_DEPTH];
125 int include_stack_ptr = 0;
126
127 void handleInclude(char*text, int len)
128 {
129     text+=9;len-=9;
130     while(len >=1 && (text[0] == ' ' || text[0] == '\t')) {
131         text++;len--;
132     }
133     while(len >= 1 && (text[len-1] == ' ' || text[len-1] == '\n')) {
134         len--;
135     }
136     if(len >= 2 && text[0] == '"' && text[len-1] == '"') {
137         text++; len-=2;
138     }
139     text[len] = 0;
140     if(include_stack_ptr >= MAX_INCLUDE_DEPTH) {
141         fprintf( stderr, "Includes nested too deeply" );
142         exit( 1 );
143     }
144     include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
145     line_stack[include_stack_ptr] = line;
146     column_stack[include_stack_ptr] = column;
147     include_stack_ptr++;
148     yyin = fopen(text, "rb");
149     if (!yyin) {
150         fprintf(stderr, "Couldn't open %s\n", text);
151         exit(1);
152     }
153     yy_switch_to_buffer(
154         yy_create_buffer( yyin, YY_BUF_SIZE ) );
155     BEGIN(INITIAL);
156 }
157
158 #define c() {count(yytext, yyleng, YY_START);}
159 #define s(type) {store(type, line, column, yytext, yyleng);}
160 %}
161
162 %s R
163 %x BINARY
164
165 NAME     [a-zA-Z_./](-*[a-zA-Z0-9_./])*
166 TWIP     (-?[0-9]+(\.([0-9]([05])?)?)?)
167 NUMBER   -?[0-9]+(\.[0-9]*)?
168 PERCENT  {NUMBER}%
169 STRING   (\\.|[^\\"\n])*
170 S        [ \n\r\t]
171 RVALUE   \"{STRING}\"|([^ \n\r\t]+)
172
173 %%
174
175 <BINARY>\] {c();BEGIN(0);}
176 <BINARY>.  {c();}
177 <BINARY>\n {c();}
178 {TWIP}/[ \n\r\t]            {s(TWIP);c();BEGIN(0);}
179 {NUMBER}/[ \n\r\t]          {s(NUMBER);c();BEGIN(0);}
180 ^#[^\n]*\n                  {c();}
181 [ \t\r]#[^\n]*\n            {c();}
182 \"{STRING}\"                {s(STRING);c();BEGIN(0);}
183 \"{STRING}$                 {c();printf("unterminated string in line %d: %s\n", line, yytext);exit(1);yyterminate();}
184 {NAME}{S}*\+=               {s(ASSIGNMENT);prefix="<plus>";c();BEGIN(R);}
185 {NAME}{S}*-=                {s(ASSIGNMENT);prefix="<minus>";c();BEGIN(R);}
186 {NAME}{S}*=                 {s(ASSIGNMENT);c();BEGIN(R);}
187 <R>{ /* values which appear only on the right-hand side of assignments, like: x=50% */
188     [^ \n\t\r]*             {s(IDENTIFIER);c();BEGIN(0);}
189 }
190 \.include{S}.*\n                    {handleInclude(yytext, yyleng);}
191 \.{NAME}                    {s(COMMAND);c();}
192 :([^.]|\.[^e]|\.e[^n]|\.en[^d]|[ \n\r\t])*\.end     {s(RAWDATA);c();}
193 {NAME}                      {s(IDENTIFIER);c();}
194 "["                         {c();BEGIN(BINARY);}
195 {S}                         {c();}
196 .                           {char c,c1=yytext[0];
197                              printf("Syntax error in line %d, %d: %s", line, column, yytext);
198                              while(1) {
199                                  c=input();
200                                  if(c=='\n' || c==EOF) 
201                                      break;
202                                 printf("%c", c);
203                              }
204                              if(c1>='0' && c1<='9')
205                                  printf(" (identifiers must not start with a digit)");
206                              printf("\n");
207                              exit(1);
208                              yyterminate();
209                             }
210 <<EOF>>                     {c();
211                              if ( --include_stack_ptr < 0 ) {
212                                 s(END);
213                                 yyterminate();
214                              } else {
215                                  yy_delete_buffer( YY_CURRENT_BUFFER );
216                                  yy_switch_to_buffer(
217                                       include_stack[include_stack_ptr] );
218                                  column = column_stack[include_stack_ptr];
219                                  line = line_stack[include_stack_ptr];
220                              }
221                             }
222
223 %%
224
225 int yywrap()
226 {
227     return 1;
228 }
229
230 void freeTokens(struct token_t*file)
231 {
232     mem_clear(&strings);
233     mem_clear(&tokens);
234 }
235
236 struct token_t* generateTokens(char*filename)
237 {
238     FILE*fi = fopen(filename, "rb");
239     int t;
240     struct token_t*result;
241     int num;
242     if(!fi) {
243         printf("Couldn't find file %s\n", filename);
244         return 0;
245     }
246     yyin = fi;
247
248     mem_init(&strings);
249     mem_init(&tokens);
250     mem_put(&strings, &t, 1); //hack- make all valid strings start at position >0
251
252     line=1;
253     column=1;
254
255     yylex();
256     yy_delete_buffer(yy_current_buffer);
257
258     result = (struct token_t*)tokens.buffer;
259     num = tokens.pos/sizeof(struct token_t);
260
261     for(t=0;t<tokens.pos/sizeof(struct token_t);t++) {
262         if(result[t].text)
263             result[t].text += (int)strings.buffer;
264     }
265
266     fclose(fi);
267     return result;
268 }
269