X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=src%2Fparser.lex;h=52449684e16acc732d08a1d68889253e3c6f9f37;hb=31b5a031b1a39be4dc03e20beead77b351587f89;hp=f29926f4c4f0636d79376226bc7fcd6cbf424123;hpb=73168a7f7ca3a242980071be43b5454456374aae;p=swftools.git diff --git a/src/parser.lex b/src/parser.lex index f29926f..5244968 100644 --- a/src/parser.lex +++ b/src/parser.lex @@ -1,7 +1,9 @@ %{ #include -#include "q.h" +#include +#include +#include "../lib/q.h" #include "parser.h" //RVALUE {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM} @@ -29,6 +31,106 @@ static void count(char*text, int len, int condition) static char*prefix = 0; +static char utf8buf[16]; +static char* getUTF8(unsigned int charnum) +{ + memset(utf8buf, 0, sizeof(utf8buf)); + + if(charnum < 0x80) { + utf8buf[0] = charnum; + return utf8buf; + } else if(charnum <0x800) { + /* 0000 0080-0000 07FF 110xxxxx 10xxxxxx */ + utf8buf[0] = 0xc0 | (charnum >> 6); + utf8buf[1] = 0x80 | (charnum & 0x3f); + return utf8buf; + } else if(charnum < 0x10000) { + /* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */ + utf8buf[0] = 0xe0 | (charnum >> 12); + utf8buf[1] = 0x80 |((charnum >> 6)&0x3f); + utf8buf[2] = 0x80 |((charnum )&0x3f); + return utf8buf; + } else if(charnum < 0x200000) { + /* 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + utf8buf[0] = 0xf0 | (charnum >> 18); + utf8buf[1] = 0x80 |((charnum >> 12)&0x3f); + utf8buf[2] = 0x80 |((charnum >> 6 )&0x3f); + utf8buf[3] = 0x80 |((charnum )&0x3f); + return utf8buf; + } else if(charnum < 0x4000000) { + /* 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ + utf8buf[0] = 0xf8 | (charnum >> 24); + utf8buf[1] = 0x80 |((charnum >> 18)&0x3f); + utf8buf[2] = 0x80 |((charnum >> 12)&0x3f); + utf8buf[3] = 0x80 |((charnum >> 6 )&0x3f); + utf8buf[4] = 0x80 |((charnum )&0x3f); + return utf8buf; + } else if(charnum < 0x80000000) { + /* 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx */ + utf8buf[0] = 0xfc | (charnum >> 30); + utf8buf[1] = 0x80 |((charnum >> 24)&0x3f); + utf8buf[2] = 0x80 |((charnum >> 18)&0x3f); + utf8buf[3] = 0x80 |((charnum >> 12)&0x3f); + utf8buf[4] = 0x80 |((charnum >> 6 )&0x3f); + utf8buf[5] = 0x80 |((charnum )&0x3f); + return utf8buf; + } else { + fprintf(stderr, "Illegal character: 0x%08x\n", charnum); + return utf8buf; + } +} + +static void unescapeString(string_t * tmp) +{ + char *p, *p1; + /* fixme - this routine expects the string to be + null-terminated */ + + for (p1=tmp->str; (p=strchr(p1, '\\')); p1 = p+1) + { + int nr=2; + int new=1; + switch(p[1]) + { + case '\\': p[0] = '\\'; break; + case '"': p[0] = '"'; break; + case 'b': p[0] = '\b'; break; + case 'f': p[0] = '\f'; break; + case 'n': p[0] = '\n'; break; + case 'r': p[0] = '\r'; break; + case 't': p[0] = '\t'; break; + case 'x': { + int num=0; + char*utf8; + while(strchr("0123456789abcdefABCDEF", p[nr])) { + num <<= 4; + if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0'; + if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10; + if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10; + nr++; + } + utf8 = getUTF8(num); + new = strlen(utf8); + + memcpy(p, utf8, new); // do not copy the terminating zero + break; + } + default: + continue; + } + tmp->len -= (nr-new); + { + int t; + char*to=p+new,*from=p+nr; + while(*from) { + *to = *from; + to++; + from++; + } + } + } +} + static void store(enum type_t type, int line, int column, char*text, int length) { struct token_t token; @@ -46,6 +148,7 @@ static void store(enum type_t type, int line, int column, char*text, int length) break; case STRING: string_set2(&tmp, text+1, length-2); + unescapeString(&tmp); token.text = (char*)mem_putstring(&strings, tmp); break; case TWIP: @@ -61,8 +164,8 @@ static void store(enum type_t type, int line, int column, char*text, int length) } prefix = 0; break; - case LABEL: - string_set2(&tmp, text, length-1); + case RAWDATA: + string_set2(&tmp, text+1/*:*/, length-5/*.end*/); token.text = (char*)mem_putstring(&strings, tmp); break; case COMMAND: @@ -91,6 +194,43 @@ static void store(enum type_t type, int line, int column, char*text, int length) prefix = 0; } +#define MAX_INCLUDE_DEPTH 16 +YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; +int line_stack[MAX_INCLUDE_DEPTH]; +int column_stack[MAX_INCLUDE_DEPTH]; +int include_stack_ptr = 0; + +void handleInclude(char*text, int len) +{ + text+=9;len-=9; + while(len >=1 && (text[0] == ' ' || text[0] == '\t')) { + text++;len--; + } + while(len >= 1 && (text[len-1] == ' ' || text[len-1] == '\n')) { + len--; + } + if(len >= 2 && text[0] == '"' && text[len-1] == '"') { + text++; len-=2; + } + text[len] = 0; + if(include_stack_ptr >= MAX_INCLUDE_DEPTH) { + fprintf( stderr, "Includes nested too deeply" ); + exit( 1 ); + } + include_stack[include_stack_ptr] = YY_CURRENT_BUFFER; + line_stack[include_stack_ptr] = line; + column_stack[include_stack_ptr] = column; + include_stack_ptr++; + yyin = fopen(text, "rb"); + if (!yyin) { + fprintf(stderr, "Couldn't open %s\n", text); + exit(1); + } + yy_switch_to_buffer( + yy_create_buffer( yyin, YY_BUF_SIZE ) ); + BEGIN(INITIAL); +} + #define c() {count(yytext, yyleng, YY_START);} #define s(type) {store(type, line, column, yytext, yyleng);} %} @@ -99,8 +239,8 @@ static void store(enum type_t type, int line, int column, char*text, int length) %x BINARY NAME [a-zA-Z_./](-*[a-zA-Z0-9_./])* -TWIP ([0-9]+(\.([0-9]([05])?)?)?) -NUMBER [0-9]+(\.[0-9]*)? +TWIP (-?[0-9]+(\.([0-9]([05])?)?)?) +NUMBER -?[0-9]+(\.[0-9]*)? PERCENT {NUMBER}% STRING (\\.|[^\\"\n])* S [ \n\r\t] @@ -121,18 +261,18 @@ RVALUE \"{STRING}\"|([^ \n\r\t]+) {NAME}{S}*-= {s(ASSIGNMENT);prefix="";c();BEGIN(R);} {NAME}{S}*= {s(ASSIGNMENT);c();BEGIN(R);} { /* values which appear only on the right-hand side of assignments, like: x=50% */ - [^ \n\t\r]* {s(IDENTIFIER);c();BEGIN(0);} + [^ :\n\t\r]* {s(IDENTIFIER);c();BEGIN(0);} } +\.include{S}.*\n {handleInclude(yytext, yyleng);} \.{NAME} {s(COMMAND);c();} -{NAME}{S}*: {s(LABEL);c();} +:([^.]|\.[^e]|\.e[^n]|\.en[^d]|[ \n\r\t])*\.end {s(RAWDATA);c();} {NAME} {s(IDENTIFIER);c();} "[" {c();BEGIN(BINARY);} {S} {c();} -. {char c,c1=0; +. {char c,c1=yytext[0]; printf("Syntax error in line %d, %d: %s", line, column, yytext); while(1) { c=input(); - if(!c1) c1=c; if(c=='\n' || c==EOF) break; printf("%c", c); @@ -143,7 +283,19 @@ RVALUE \"{STRING}\"|([^ \n\r\t]+) exit(1); yyterminate(); } -<> {c();s(END);yyterminate();} +<> {c(); + if ( --include_stack_ptr < 0 ) { + s(END); + yyterminate(); + } else { + yy_delete_buffer( YY_CURRENT_BUFFER ); + yy_switch_to_buffer( + include_stack[include_stack_ptr] ); + column = column_stack[include_stack_ptr]; + line = line_stack[include_stack_ptr]; + } + } + %% int yywrap()