added string escaping.
[swftools.git] / src / parser.lex
1 %{
2
3 #include <string.h>
4 #include "q.h"
5 #include "parser.h"
6
7 //RVALUE         {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM}
8 //<a>.                {printf("<a>%s\n", yytext);}
9 // %x: exclusive, %s: inclusive
10 char*type_names[] = {"twip","number","command","string","assignment","identifier","label","end"};
11 static int line=1;
12 static int column=1;
13
14 mem_t strings;
15 mem_t tokens;
16
17 static void count(char*text, int len, int condition)
18 {
19     int t;
20     for(t=0;t<len;t++) {
21         if(text[t]=='\n') {
22             line++;
23             column=1;
24         } else {
25             column++;
26         }
27     }
28 }
29
30 static char*prefix = 0;
31
32 static void unescapeString(string_t * tmp)
33 {
34     char *p, *p1;
35
36     for (p1=tmp->str; (p=strchr(p1, '\\')) != 0; p1 = p+1) 
37     {
38         switch(p[1])
39         {
40             case '\\': p[1] = '\\'; break;
41             case 'b': p[1] = '\b'; break;
42             case 'f': p[1] = '\f'; break;
43             case 'n': p[1] = '\n'; break;
44             case 'r': p[1] = '\r'; break;
45             case 't': p[1] = '\t'; break;
46             default:
47                 continue;
48         }
49         strcpy(p, p+1);
50     }
51 }
52
53 static void store(enum type_t type, int line, int column, char*text, int length)
54 {
55     struct token_t token;
56     string_t tmp;
57     token.type = type;
58     token.line = line;
59     token.column = column;
60     //printf("->%d(%s) %s\n", type, type_names[type], text);fflush(stdout);
61
62     token.text = 0;
63     switch(type) {
64         case END:
65             string_set2(&tmp, "", 0);
66             token.text = (char*)mem_putstring(&strings, tmp);
67         break;
68         case STRING:
69             string_set2(&tmp, text+1, length-2);
70             unescapeString(&tmp);
71             token.text = (char*)mem_putstring(&strings, tmp);
72         break;
73         case TWIP: 
74         case NUMBER: 
75         case IDENTIFIER:
76             string_set2(&tmp, text, length);
77             if(prefix) {
78                 //strcat
79                 token.text = (char*)mem_put(&strings, prefix, strlen(prefix));
80                 mem_putstring(&strings, tmp);
81             } else {
82                 token.text = (char*)mem_putstring(&strings, tmp);
83             }
84             prefix = 0;
85         break;
86         case LABEL:
87             string_set2(&tmp, text, length-1);
88             token.text = (char*)mem_putstring(&strings, tmp);
89         break;
90         case COMMAND:
91             string_set2(&tmp, text+1, length-1);
92             token.text = (char*)mem_putstring(&strings, tmp);
93         break;
94         case ASSIGNMENT: {
95             char*x = &text[length-1];
96             if(x[-1] == '-' || x[-1] == '+')
97                 x--;
98             do{x--;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
99             x++; //first space
100             string_set2(&tmp, text, x-text);
101             token.text = (char*)mem_putstring(&strings, tmp);
102             /*char*y,*x = strchr(text, '=');
103             if(!x) exit(1);
104             y=x;
105             do{y--;} while(*y==32 || *y==10 || *y==13 || *y=='\t');
106             do{x++;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
107             token.text1 = (char*)put(&strings, text, y-text + 1, 1);
108             token.text2 = (char*)put(&strings, x, length-(x-text), 1);*/
109         } break;
110     }
111
112     mem_put(&tokens, &token, sizeof(struct token_t));
113     prefix = 0;
114 }
115
116 #define c() {count(yytext, yyleng, YY_START);}
117 #define s(type) {store(type, line, column, yytext, yyleng);}
118 %}
119
120 %s R
121 %x BINARY
122
123 NAME     [a-zA-Z_./](-*[a-zA-Z0-9_./])*
124 TWIP     ([0-9]+(\.([0-9]([05])?)?)?)
125 NUMBER   [0-9]+(\.[0-9]*)?
126 PERCENT  {NUMBER}%
127 STRING   (\\.|[^\\"\n])*
128 S        [ \n\r\t]
129 RVALUE   \"{STRING}\"|([^ \n\r\t]+)
130
131 %%
132
133 <BINARY>\] {c();BEGIN(0);}
134 <BINARY>.  {c();}
135 <BINARY>\n {c();}
136 {TWIP}/[ \n\r\t]            {s(TWIP);c();BEGIN(0);}
137 {NUMBER}/[ \n\r\t]          {s(NUMBER);c();BEGIN(0);}
138 ^#[^\n]*\n                  {c();}
139 [ \t\r]#[^\n]*\n            {c();}
140 \"{STRING}\"                {s(STRING);c();BEGIN(0);}
141 \"{STRING}$                 {c();printf("unterminated string in line %d: %s\n", line, yytext);exit(1);yyterminate();}
142 {NAME}{S}*\+=               {s(ASSIGNMENT);prefix="<plus>";c();BEGIN(R);}
143 {NAME}{S}*-=                {s(ASSIGNMENT);prefix="<minus>";c();BEGIN(R);}
144 {NAME}{S}*=                 {s(ASSIGNMENT);c();BEGIN(R);}
145 <R>{ /* values which appear only on the right-hand side of assignments, like: x=50% */
146     [^ \n\t\r]*             {s(IDENTIFIER);c();BEGIN(0);}
147 }
148 \.{NAME}                    {s(COMMAND);c();}
149 {NAME}{S}*:                 {s(LABEL);c();}
150 {NAME}                      {s(IDENTIFIER);c();}
151 "["                         {c();BEGIN(BINARY);}
152 {S}                         {c();}
153 .                           {char c,c1=0;
154                              printf("Syntax error in line %d, %d: %s", line, column, yytext);
155                              while(1) {
156                                  c=input();
157                                  if(!c1) c1=c;
158                                  if(c=='\n' || c==EOF) 
159                                      break;
160                                 printf("%c", c);
161                              }
162                              if(c1>='0' && c1<='9')
163                                  printf(" (identifiers must not start with a digit)");
164                              printf("\n");
165                              exit(1);
166                              yyterminate();
167                             }
168 <<EOF>>                     {c();s(END);yyterminate();}
169 %%
170
171 int yywrap()
172 {
173     return 1;
174 }
175
176 void freeTokens(struct token_t*file)
177 {
178     mem_clear(&strings);
179     mem_clear(&tokens);
180 }
181
182 struct token_t* generateTokens(char*filename)
183 {
184     FILE*fi = fopen(filename, "rb");
185     int t;
186     struct token_t*result;
187     int num;
188     if(!fi) {
189         printf("Couldn't find file %s\n", filename);
190         return 0;
191     }
192     yyin = fi;
193
194     mem_init(&strings);
195     mem_init(&tokens);
196     mem_put(&strings, &t, 1); //hack- make all valid strings start at position >0
197
198     line=1;
199     column=1;
200
201     yylex();
202     yy_delete_buffer(yy_current_buffer);
203
204     result = (struct token_t*)tokens.buffer;
205     num = tokens.pos/sizeof(struct token_t);
206
207     for(t=0;t<tokens.pos/sizeof(struct token_t);t++) {
208         if(result[t].text)
209             result[t].text += (int)strings.buffer;
210     }
211
212     fclose(fi);
213     return result;
214 }
215