X-Git-Url: http://git.asbjorn.biz/?p=swftools.git;a=blobdiff_plain;f=src%2Fparser.lex;h=a4338e8f56b72974e4a4c31e267d0750aff8e951;hp=b2b49480a633ff43a7cfdf2f597b2553f9db466a;hb=c63b2bf21dc1df9a736f0b4c08f6cba828cdab92;hpb=5e98f16612cfb9cc43c2a9b71ed50fa33ecef890 diff --git a/src/parser.lex b/src/parser.lex index b2b4948..a4338e8 100644 --- a/src/parser.lex +++ b/src/parser.lex @@ -5,6 +5,7 @@ #include #include "../lib/q.h" #include "parser.h" +#include "../lib/utf8.h" //RVALUE {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM} //. {printf("%s\n", yytext);} @@ -31,62 +32,13 @@ static void count(char*text, int len, int condition) static char*prefix = 0; -static char utf8buf[16]; -static char* getUTF8(unsigned int charnum) -{ - memset(utf8buf, 0, sizeof(utf8buf)); - - if(charnum < 0x80) { - utf8buf[0] = charnum; - return utf8buf; - } else if(charnum <0x800) { - /* 0000 0080-0000 07FF 110xxxxx 10xxxxxx */ - utf8buf[0] = 0xc0 | (charnum >> 6); - utf8buf[1] = 0x80 | (charnum & 0x3f); - return utf8buf; - } else if(charnum < 0x10000) { - /* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */ - utf8buf[0] = 0xe0 | (charnum >> 12); - utf8buf[1] = 0x80 |((charnum >> 6)&0x3f); - utf8buf[2] = 0x80 |((charnum )&0x3f); - return utf8buf; - } else if(charnum < 0x200000) { - /* 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - utf8buf[0] = 0xf0 | (charnum >> 18); - utf8buf[1] = 0x80 |((charnum >> 12)&0x3f); - utf8buf[2] = 0x80 |((charnum >> 6 )&0x3f); - utf8buf[3] = 0x80 |((charnum )&0x3f); - return utf8buf; - } else if(charnum < 0x4000000) { - /* 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - utf8buf[0] = 0xf8 | (charnum >> 24); - utf8buf[1] = 0x80 |((charnum >> 18)&0x3f); - utf8buf[2] = 0x80 |((charnum >> 12)&0x3f); - utf8buf[3] = 0x80 |((charnum >> 6 )&0x3f); - utf8buf[4] = 0x80 |((charnum )&0x3f); - return utf8buf; - } else if(charnum < 0x80000000) { - /* 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx */ - utf8buf[0] = 0xfc | (charnum >> 30); - utf8buf[1] = 0x80 |((charnum >> 24)&0x3f); - utf8buf[2] = 0x80 |((charnum >> 18)&0x3f); - utf8buf[3] = 0x80 |((charnum >> 12)&0x3f); - utf8buf[4] = 0x80 |((charnum >> 6 )&0x3f); - utf8buf[5] = 0x80 |((charnum )&0x3f); - return utf8buf; - } else { - fprintf(stderr, "Illegal character: 0x%08x\n", charnum); - return utf8buf; - } -} - static void unescapeString(string_t * tmp) { char *p, *p1; /* fixme - this routine expects the string to be null-terminated */ - for (p1=tmp->str; (p=strchr(p1, '\\')); p1 = p+1) + for (p1=(char*)tmp->str; (p=strchr(p1, '\\')); p1 = p+1) { int nr=2; int new=1; @@ -100,18 +52,26 @@ static void unescapeString(string_t * tmp) case 'r': p[0] = '\r'; break; case 't': p[0] = '\t'; break; case 'x': case 'u': { - int max=2; + int max=4; int num=0; char*utf8; + char bracket = 0; if(p[1] == 'u') - max = 4; - while(strchr("0123456789abcdefABCDEF", p[nr]) && nr < max+2) { + max = 6; + if(p[2] == '{') { + bracket = 1;nr++;max++; + } + while(strchr("0123456789abcdefABCDEF", p[nr]) && (bracket || nr < max)) { num <<= 4; if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0'; if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10; if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10; nr++; } + if(bracket && p[nr]=='}') { + bracket = 0; + nr++; + } utf8 = getUTF8(num); new = strlen(utf8); memcpy(p, utf8, new); // do not copy the terminating zero @@ -197,18 +157,21 @@ static void store(enum type_t type, int line, int column, char*text, int length) } #define MAX_INCLUDE_DEPTH 16 -YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; -int line_stack[MAX_INCLUDE_DEPTH]; -int column_stack[MAX_INCLUDE_DEPTH]; -int include_stack_ptr = 0; +static YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; +static int line_stack[MAX_INCLUDE_DEPTH]; +static int column_stack[MAX_INCLUDE_DEPTH]; +static int include_stack_ptr = 0; -void handleInclude(char*text, int len) +static void handleInclude(char*text, int len) { text+=9;len-=9; while(len >=1 && (text[0] == ' ' || text[0] == '\t')) { text++;len--; } - while(len >= 1 && (text[len-1] == ' ' || text[len-1] == '\n')) { + while(len >= 1 && + (text[len-1] == ' ' || + text[len-1] == '\r' || + text[len-1] == '\n')) { len--; } if(len >= 2 && text[0] == '"' && text[len-1] == '"') { @@ -228,9 +191,14 @@ void handleInclude(char*text, int len) fprintf(stderr, "Couldn't open %s\n", text); exit(1); } - yy_switch_to_buffer( - yy_create_buffer( yyin, YY_BUF_SIZE ) ); + yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) ); + +#ifdef INITIAL BEGIN(INITIAL); +#else + // best guess + BEGIN(0); +#endif } #define c() {count(yytext, yyleng, YY_START);} @@ -313,10 +281,19 @@ void freeTokens(struct token_t*file) struct token_t* generateTokens(char*filename) { - FILE*fi = fopen(filename, "rb"); + FILE*fi; int t; struct token_t*result; int num; + + if(!filename) + return 0; + + if(!strcmp(filename,"-")) + fi = stdin; + else + fi = fopen(filename, "rb"); + if(!fi) { printf("Couldn't find file %s\n", filename); return 0; @@ -331,7 +308,12 @@ struct token_t* generateTokens(char*filename) column=1; yylex(); +#ifdef YY_CURRENT_BUFFER + // some newer flex versions require it like this: + yy_delete_buffer(YY_CURRENT_BUFFER); +#else yy_delete_buffer(yy_current_buffer); +#endif result = (struct token_t*)tokens.buffer; num = tokens.pos/sizeof(struct token_t); @@ -341,7 +323,8 @@ struct token_t* generateTokens(char*filename) result[t].text += (int)strings.buffer; } - fclose(fi); + if(fi!=stdin) + fclose(fi); return result; }