X-Git-Url: http://git.asbjorn.biz/?p=swftools.git;a=blobdiff_plain;f=src%2Fparser.lex;h=a4338e8f56b72974e4a4c31e267d0750aff8e951;hp=63142034d8bba33f7a51434545e551f0f3f565f0;hb=c63b2bf21dc1df9a736f0b4c08f6cba828cdab92;hpb=e8458c3ccc1168fc2b40fc851284da352c5405dd

diff --git a/src/parser.lex b/src/parser.lex
index 6314203..a4338e8 100644
--- a/src/parser.lex
+++ b/src/parser.lex
@@ -5,6 +5,7 @@
 #include <stdio.h>
 #include "../lib/q.h"
 #include "parser.h"
+#include "../lib/utf8.h"
 
 //RVALUE	 {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM}
 //<a>.                {printf("<a>%s\n", yytext);}
@@ -31,62 +32,13 @@ static void count(char*text, int len, int condition)
 
 static char*prefix = 0;
 
-static char utf8buf[16];
-static char* getUTF8(unsigned int charnum)
-{
-    memset(utf8buf, 0, sizeof(utf8buf));
-
-    if(charnum < 0x80) {
-	utf8buf[0] = charnum;
-	return utf8buf;
-    } else if(charnum <0x800) {
-	/* 0000 0080-0000 07FF   110xxxxx 10xxxxxx */
-	utf8buf[0] = 0xc0 | (charnum >> 6);
-	utf8buf[1] = 0x80 | (charnum & 0x3f);
-	return utf8buf;
-    } else if(charnum < 0x10000) {
-	/* 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx */
-	utf8buf[0] = 0xe0 | (charnum >> 12);
-	utf8buf[1] = 0x80 |((charnum >> 6)&0x3f);
-	utf8buf[2] = 0x80 |((charnum     )&0x3f);
-	return utf8buf;
-    } else if(charnum < 0x200000) {
-	/* 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
-	utf8buf[0] = 0xf0 | (charnum >> 18);
-	utf8buf[1] = 0x80 |((charnum >> 12)&0x3f);
-	utf8buf[2] = 0x80 |((charnum >> 6 )&0x3f);
-	utf8buf[3] = 0x80 |((charnum      )&0x3f);
-	return utf8buf;
-    } else if(charnum < 0x4000000) {
-	/* 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
-	utf8buf[0] = 0xf8 | (charnum >> 24);
-	utf8buf[1] = 0x80 |((charnum >> 18)&0x3f);
-	utf8buf[2] = 0x80 |((charnum >> 12)&0x3f);
-	utf8buf[3] = 0x80 |((charnum >> 6 )&0x3f);
-	utf8buf[4] = 0x80 |((charnum      )&0x3f);
-	return utf8buf;
-    } else if(charnum < 0x80000000) {
-	/* 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx */
-	utf8buf[0] = 0xfc | (charnum >> 30);
-	utf8buf[1] = 0x80 |((charnum >> 24)&0x3f);
-	utf8buf[2] = 0x80 |((charnum >> 18)&0x3f);
-	utf8buf[3] = 0x80 |((charnum >> 12)&0x3f);
-	utf8buf[4] = 0x80 |((charnum >> 6 )&0x3f);
-	utf8buf[5] = 0x80 |((charnum      )&0x3f);
-	return utf8buf;
-    } else {
-	fprintf(stderr, "Illegal character: 0x%08x\n", charnum);
-	return utf8buf;
-    }
-}
-
 static void unescapeString(string_t * tmp)
 {
     char *p, *p1;
     /* fixme - this routine expects the string to be
        null-terminated */
 
-    for (p1=tmp->str; (p=strchr(p1, '\\')); p1 = p+1) 
+    for (p1=(char*)tmp->str; (p=strchr(p1, '\\')); p1 = p+1) 
     {
 	int nr=2;
 	int new=1;
@@ -99,18 +51,29 @@ static void unescapeString(string_t * tmp)
 	    case 'n': p[0] = '\n'; break;
 	    case 'r': p[0] = '\r'; break;
 	    case 't': p[0] = '\t'; break;
-	    case 'x':  {
+	    case 'x':  case 'u': {
+		int max=4;
 		int num=0;
-		while(strchr("0123456789abcdefABCDEF", p[nr])) {
+		char*utf8;
+		char bracket = 0;
+		if(p[1] == 'u')
+		    max = 6;
+		if(p[2] == '{')  {
+		    bracket = 1;nr++;max++;
+		}
+		while(strchr("0123456789abcdefABCDEF", p[nr]) && (bracket || nr < max)) {
 		    num <<= 4;
 		    if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0';
 		    if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10;
 		    if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10;
 		    nr++;
 		}
-		char*utf8 = getUTF8(num);
+		if(bracket && p[nr]=='}') {
+		    bracket = 0;
+		    nr++;
+		}
+		utf8 = getUTF8(num);
 		new = strlen(utf8);
-
 		memcpy(p, utf8, new); // do not copy the terminating zero
 		break;
 	    }
@@ -118,12 +81,14 @@ static void unescapeString(string_t * tmp)
 		continue;
 	}
 	tmp->len -= (nr-new); 
-	int t;
-	char*to=p+new,*from=p+nr;
-	while(*from) {
-	    *to = *from;
-	    to++;
-	    from++;
+	{
+	    int t;
+	    char*to=p+new,*from=p+nr;
+	    while(*from) {
+		*to = *from;
+		to++;
+		from++;
+	    }
 	}
     }
 }
@@ -192,18 +157,21 @@ static void store(enum type_t type, int line, int column, char*text, int length)
 }
 
 #define MAX_INCLUDE_DEPTH 16
-YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
-int line_stack[MAX_INCLUDE_DEPTH];
-int column_stack[MAX_INCLUDE_DEPTH];
-int include_stack_ptr = 0;
+static YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
+static int line_stack[MAX_INCLUDE_DEPTH];
+static int column_stack[MAX_INCLUDE_DEPTH];
+static int include_stack_ptr = 0;
 
-void handleInclude(char*text, int len)
+static void handleInclude(char*text, int len)
 {
     text+=9;len-=9;
     while(len >=1 && (text[0] == ' ' || text[0] == '\t')) {
 	text++;len--;
     }
-    while(len >= 1 && (text[len-1] == ' ' || text[len-1] == '\n')) {
+    while(len >= 1 && 
+          (text[len-1] == ' ' || 
+           text[len-1] == '\r' || 
+           text[len-1] == '\n')) {
 	len--;
     }
     if(len >= 2 && text[0] == '"' && text[len-1] == '"') {
@@ -223,9 +191,14 @@ void handleInclude(char*text, int len)
 	fprintf(stderr, "Couldn't open %s\n", text);
 	exit(1);
     }
-    yy_switch_to_buffer(
-	yy_create_buffer( yyin, YY_BUF_SIZE ) );
+    yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
+
+#ifdef INITIAL
     BEGIN(INITIAL);
+#else
+    // best guess
+    BEGIN(0);
+#endif
 }
 
 #define c() {count(yytext, yyleng, YY_START);}
@@ -262,7 +235,7 @@ RVALUE	 \"{STRING}\"|([^ \n\r\t]+)
 }
 \.include{S}.*\n		    {handleInclude(yytext, yyleng);}
 \.{NAME}	            {s(COMMAND);c();}
-:([^.]|\.[^e]|\.e[^n]|\.en[^d]|[ \n\r\t])*\.end	    {s(RAWDATA);c();}
+:([^.]|\.[^e]|\.e[^n]|\.en[^d]|\.end[^ \n\r\t]|[ \n\r\t])*\.end	    {s(RAWDATA);c();}
 {NAME}                      {s(IDENTIFIER);c();}
 "["		            {c();BEGIN(BINARY);}
 {S} 		            {c();}
@@ -308,10 +281,19 @@ void freeTokens(struct token_t*file)
 
 struct token_t* generateTokens(char*filename)
 {
-    FILE*fi = fopen(filename, "rb");
+    FILE*fi;
     int t;
     struct token_t*result;
     int num;
+
+    if(!filename)
+	return 0;
+
+    if(!strcmp(filename,"-"))
+	fi = stdin;
+    else
+	fi = fopen(filename, "rb");
+
     if(!fi) {
 	printf("Couldn't find file %s\n", filename);
 	return 0;
@@ -326,7 +308,12 @@ struct token_t* generateTokens(char*filename)
     column=1;
 
     yylex();
+#ifdef YY_CURRENT_BUFFER
+    // some newer flex versions require it like this:
+    yy_delete_buffer(YY_CURRENT_BUFFER);
+#else
     yy_delete_buffer(yy_current_buffer);
+#endif
 
     result = (struct token_t*)tokens.buffer;
     num = tokens.pos/sizeof(struct token_t);
@@ -336,7 +323,8 @@ struct token_t* generateTokens(char*filename)
 	    result[t].text += (int)strings.buffer;
     }
 
-    fclose(fi);
+    if(fi!=stdin)
+	fclose(fi);
     return result;
 }