From: kramm Date: Wed, 22 Sep 2004 11:43:31 +0000 (+0000) Subject: added handling of \x. X-Git-Tag: release-0-6-2~28 X-Git-Url: http://git.asbjorn.biz/?p=swftools.git;a=commitdiff_plain;h=e8458c3ccc1168fc2b40fc851284da352c5405dd added handling of \x. --- diff --git a/src/parser.lex b/src/parser.lex index 38f24dc..6314203 100644 --- a/src/parser.lex +++ b/src/parser.lex @@ -31,6 +31,55 @@ static void count(char*text, int len, int condition) static char*prefix = 0; +static char utf8buf[16]; +static char* getUTF8(unsigned int charnum) +{ + memset(utf8buf, 0, sizeof(utf8buf)); + + if(charnum < 0x80) { + utf8buf[0] = charnum; + return utf8buf; + } else if(charnum <0x800) { + /* 0000 0080-0000 07FF 110xxxxx 10xxxxxx */ + utf8buf[0] = 0xc0 | (charnum >> 6); + utf8buf[1] = 0x80 | (charnum & 0x3f); + return utf8buf; + } else if(charnum < 0x10000) { + /* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */ + utf8buf[0] = 0xe0 | (charnum >> 12); + utf8buf[1] = 0x80 |((charnum >> 6)&0x3f); + utf8buf[2] = 0x80 |((charnum )&0x3f); + return utf8buf; + } else if(charnum < 0x200000) { + /* 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + utf8buf[0] = 0xf0 | (charnum >> 18); + utf8buf[1] = 0x80 |((charnum >> 12)&0x3f); + utf8buf[2] = 0x80 |((charnum >> 6 )&0x3f); + utf8buf[3] = 0x80 |((charnum )&0x3f); + return utf8buf; + } else if(charnum < 0x4000000) { + /* 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ + utf8buf[0] = 0xf8 | (charnum >> 24); + utf8buf[1] = 0x80 |((charnum >> 18)&0x3f); + utf8buf[2] = 0x80 |((charnum >> 12)&0x3f); + utf8buf[3] = 0x80 |((charnum >> 6 )&0x3f); + utf8buf[4] = 0x80 |((charnum )&0x3f); + return utf8buf; + } else if(charnum < 0x80000000) { + /* 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx */ + utf8buf[0] = 0xfc | (charnum >> 30); + utf8buf[1] = 0x80 |((charnum >> 24)&0x3f); + utf8buf[2] = 0x80 |((charnum >> 18)&0x3f); + utf8buf[3] = 0x80 |((charnum >> 12)&0x3f); + utf8buf[4] = 0x80 |((charnum >> 6 )&0x3f); + utf8buf[5] = 0x80 |((charnum )&0x3f); + return utf8buf; + } else { + fprintf(stderr, "Illegal character: 0x%08x\n", charnum); + return utf8buf; + } +} + static void unescapeString(string_t * tmp) { char *p, *p1; @@ -39,19 +88,43 @@ static void unescapeString(string_t * tmp) for (p1=tmp->str; (p=strchr(p1, '\\')); p1 = p+1) { + int nr=2; + int new=1; switch(p[1]) { - case '\\': p[1] = '\\'; tmp->len--; break; - case '"': p[1] = '"'; tmp->len--; break; - case 'b': p[1] = '\b'; tmp->len--; break; - case 'f': p[1] = '\f'; tmp->len--; break; - case 'n': p[1] = '\n'; tmp->len--; break; - case 'r': p[1] = '\r'; tmp->len--; break; - case 't': p[1] = '\t'; tmp->len--; break; + case '\\': p[0] = '\\'; break; + case '"': p[0] = '"'; break; + case 'b': p[0] = '\b'; break; + case 'f': p[0] = '\f'; break; + case 'n': p[0] = '\n'; break; + case 'r': p[0] = '\r'; break; + case 't': p[0] = '\t'; break; + case 'x': { + int num=0; + while(strchr("0123456789abcdefABCDEF", p[nr])) { + num <<= 4; + if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0'; + if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10; + if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10; + nr++; + } + char*utf8 = getUTF8(num); + new = strlen(utf8); + + memcpy(p, utf8, new); // do not copy the terminating zero + break; + } default: continue; } - strcpy(p, p+1); + tmp->len -= (nr-new); + int t; + char*to=p+new,*from=p+nr; + while(*from) { + *to = *from; + to++; + from++; + } } }