X-Git-Url: http://git.asbjorn.biz/?p=swftools.git;a=blobdiff_plain;f=src%2Fparser.lex;h=a4338e8f56b72974e4a4c31e267d0750aff8e951;hp=63142034d8bba33f7a51434545e551f0f3f565f0;hb=c63b2bf21dc1df9a736f0b4c08f6cba828cdab92;hpb=e8458c3ccc1168fc2b40fc851284da352c5405dd diff --git a/src/parser.lex b/src/parser.lex index 6314203..a4338e8 100644 --- a/src/parser.lex +++ b/src/parser.lex @@ -5,6 +5,7 @@ #include #include "../lib/q.h" #include "parser.h" +#include "../lib/utf8.h" //RVALUE {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM} //. {printf("%s\n", yytext);} @@ -31,62 +32,13 @@ static void count(char*text, int len, int condition) static char*prefix = 0; -static char utf8buf[16]; -static char* getUTF8(unsigned int charnum) -{ - memset(utf8buf, 0, sizeof(utf8buf)); - - if(charnum < 0x80) { - utf8buf[0] = charnum; - return utf8buf; - } else if(charnum <0x800) { - /* 0000 0080-0000 07FF 110xxxxx 10xxxxxx */ - utf8buf[0] = 0xc0 | (charnum >> 6); - utf8buf[1] = 0x80 | (charnum & 0x3f); - return utf8buf; - } else if(charnum < 0x10000) { - /* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */ - utf8buf[0] = 0xe0 | (charnum >> 12); - utf8buf[1] = 0x80 |((charnum >> 6)&0x3f); - utf8buf[2] = 0x80 |((charnum )&0x3f); - return utf8buf; - } else if(charnum < 0x200000) { - /* 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - utf8buf[0] = 0xf0 | (charnum >> 18); - utf8buf[1] = 0x80 |((charnum >> 12)&0x3f); - utf8buf[2] = 0x80 |((charnum >> 6 )&0x3f); - utf8buf[3] = 0x80 |((charnum )&0x3f); - return utf8buf; - } else if(charnum < 0x4000000) { - /* 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - utf8buf[0] = 0xf8 | (charnum >> 24); - utf8buf[1] = 0x80 |((charnum >> 18)&0x3f); - utf8buf[2] = 0x80 |((charnum >> 12)&0x3f); - utf8buf[3] = 0x80 |((charnum >> 6 )&0x3f); - utf8buf[4] = 0x80 |((charnum )&0x3f); - return utf8buf; - } else if(charnum < 0x80000000) { - /* 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx */ - utf8buf[0] = 0xfc | (charnum >> 30); - utf8buf[1] = 0x80 |((charnum >> 24)&0x3f); - utf8buf[2] = 0x80 |((charnum >> 18)&0x3f); - utf8buf[3] = 0x80 |((charnum >> 12)&0x3f); - utf8buf[4] = 0x80 |((charnum >> 6 )&0x3f); - utf8buf[5] = 0x80 |((charnum )&0x3f); - return utf8buf; - } else { - fprintf(stderr, "Illegal character: 0x%08x\n", charnum); - return utf8buf; - } -} - static void unescapeString(string_t * tmp) { char *p, *p1; /* fixme - this routine expects the string to be null-terminated */ - for (p1=tmp->str; (p=strchr(p1, '\\')); p1 = p+1) + for (p1=(char*)tmp->str; (p=strchr(p1, '\\')); p1 = p+1) { int nr=2; int new=1; @@ -99,18 +51,29 @@ static void unescapeString(string_t * tmp) case 'n': p[0] = '\n'; break; case 'r': p[0] = '\r'; break; case 't': p[0] = '\t'; break; - case 'x': { + case 'x': case 'u': { + int max=4; int num=0; - while(strchr("0123456789abcdefABCDEF", p[nr])) { + char*utf8; + char bracket = 0; + if(p[1] == 'u') + max = 6; + if(p[2] == '{') { + bracket = 1;nr++;max++; + } + while(strchr("0123456789abcdefABCDEF", p[nr]) && (bracket || nr < max)) { num <<= 4; if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0'; if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10; if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10; nr++; } - char*utf8 = getUTF8(num); + if(bracket && p[nr]=='}') { + bracket = 0; + nr++; + } + utf8 = getUTF8(num); new = strlen(utf8); - memcpy(p, utf8, new); // do not copy the terminating zero break; } @@ -118,12 +81,14 @@ static void unescapeString(string_t * tmp) continue; } tmp->len -= (nr-new); - int t; - char*to=p+new,*from=p+nr; - while(*from) { - *to = *from; - to++; - from++; + { + int t; + char*to=p+new,*from=p+nr; + while(*from) { + *to = *from; + to++; + from++; + } } } } @@ -192,18 +157,21 @@ static void store(enum type_t type, int line, int column, char*text, int length) } #define MAX_INCLUDE_DEPTH 16 -YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; -int line_stack[MAX_INCLUDE_DEPTH]; -int column_stack[MAX_INCLUDE_DEPTH]; -int include_stack_ptr = 0; +static YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; +static int line_stack[MAX_INCLUDE_DEPTH]; +static int column_stack[MAX_INCLUDE_DEPTH]; +static int include_stack_ptr = 0; -void handleInclude(char*text, int len) +static void handleInclude(char*text, int len) { text+=9;len-=9; while(len >=1 && (text[0] == ' ' || text[0] == '\t')) { text++;len--; } - while(len >= 1 && (text[len-1] == ' ' || text[len-1] == '\n')) { + while(len >= 1 && + (text[len-1] == ' ' || + text[len-1] == '\r' || + text[len-1] == '\n')) { len--; } if(len >= 2 && text[0] == '"' && text[len-1] == '"') { @@ -223,9 +191,14 @@ void handleInclude(char*text, int len) fprintf(stderr, "Couldn't open %s\n", text); exit(1); } - yy_switch_to_buffer( - yy_create_buffer( yyin, YY_BUF_SIZE ) ); + yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) ); + +#ifdef INITIAL BEGIN(INITIAL); +#else + // best guess + BEGIN(0); +#endif } #define c() {count(yytext, yyleng, YY_START);} @@ -262,7 +235,7 @@ RVALUE \"{STRING}\"|([^ \n\r\t]+) } \.include{S}.*\n {handleInclude(yytext, yyleng);} \.{NAME} {s(COMMAND);c();} -:([^.]|\.[^e]|\.e[^n]|\.en[^d]|[ \n\r\t])*\.end {s(RAWDATA);c();} +:([^.]|\.[^e]|\.e[^n]|\.en[^d]|\.end[^ \n\r\t]|[ \n\r\t])*\.end {s(RAWDATA);c();} {NAME} {s(IDENTIFIER);c();} "[" {c();BEGIN(BINARY);} {S} {c();} @@ -308,10 +281,19 @@ void freeTokens(struct token_t*file) struct token_t* generateTokens(char*filename) { - FILE*fi = fopen(filename, "rb"); + FILE*fi; int t; struct token_t*result; int num; + + if(!filename) + return 0; + + if(!strcmp(filename,"-")) + fi = stdin; + else + fi = fopen(filename, "rb"); + if(!fi) { printf("Couldn't find file %s\n", filename); return 0; @@ -326,7 +308,12 @@ struct token_t* generateTokens(char*filename) column=1; yylex(); +#ifdef YY_CURRENT_BUFFER + // some newer flex versions require it like this: + yy_delete_buffer(YY_CURRENT_BUFFER); +#else yy_delete_buffer(yy_current_buffer); +#endif result = (struct token_t*)tokens.buffer; num = tokens.pos/sizeof(struct token_t); @@ -336,7 +323,8 @@ struct token_t* generateTokens(char*filename) result[t].text += (int)strings.buffer; } - fclose(fi); + if(fi!=stdin) + fclose(fi); return result; }