gcc 2.95.x fixes.

[swftools.git] / src / parser.lex
diff --git a/src/parser.lex b/src/parser.lex

index beca943..5244968 100644 (file)
--- a/src/parser.lex
+++ b/src/parser.lex
@@ -3,7 +3,7 @@
  #include <string.h>
  #include <stdlib.h>
  #include <stdio.h>
-#include "q.h"
+#include "../lib/q.h"
  #include "parser.h"
  
  //RVALUE        {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM}
@@ -31,6 +31,55 @@ static void count(char*text, int len, int condition)
  
  static char*prefix = 0;
  
+static char utf8buf[16];
+static char* getUTF8(unsigned int charnum)
+{
+    memset(utf8buf, 0, sizeof(utf8buf));
+
+    if(charnum < 0x80) {
+       utf8buf[0] = charnum;
+       return utf8buf;
+    } else if(charnum <0x800) {
+       /* 0000 0080-0000 07FF   110xxxxx 10xxxxxx */
+       utf8buf[0] = 0xc0 | (charnum >> 6);
+       utf8buf[1] = 0x80 | (charnum & 0x3f);
+       return utf8buf;
+    } else if(charnum < 0x10000) {
+       /* 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx */
+       utf8buf[0] = 0xe0 | (charnum >> 12);
+       utf8buf[1] = 0x80 |((charnum >> 6)&0x3f);
+       utf8buf[2] = 0x80 |((charnum     )&0x3f);
+       return utf8buf;
+    } else if(charnum < 0x200000) {
+       /* 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
+       utf8buf[0] = 0xf0 | (charnum >> 18);
+       utf8buf[1] = 0x80 |((charnum >> 12)&0x3f);
+       utf8buf[2] = 0x80 |((charnum >> 6 )&0x3f);
+       utf8buf[3] = 0x80 |((charnum      )&0x3f);
+       return utf8buf;
+    } else if(charnum < 0x4000000) {
+       /* 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
+       utf8buf[0] = 0xf8 | (charnum >> 24);
+       utf8buf[1] = 0x80 |((charnum >> 18)&0x3f);
+       utf8buf[2] = 0x80 |((charnum >> 12)&0x3f);
+       utf8buf[3] = 0x80 |((charnum >> 6 )&0x3f);
+       utf8buf[4] = 0x80 |((charnum      )&0x3f);
+       return utf8buf;
+    } else if(charnum < 0x80000000) {
+       /* 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx */
+       utf8buf[0] = 0xfc | (charnum >> 30);
+       utf8buf[1] = 0x80 |((charnum >> 24)&0x3f);
+       utf8buf[2] = 0x80 |((charnum >> 18)&0x3f);
+       utf8buf[3] = 0x80 |((charnum >> 12)&0x3f);
+       utf8buf[4] = 0x80 |((charnum >> 6 )&0x3f);
+       utf8buf[5] = 0x80 |((charnum      )&0x3f);
+       return utf8buf;
+    } else {
+       fprintf(stderr, "Illegal character: 0x%08x\n", charnum);
+       return utf8buf;
+    }
+}
+
  static void unescapeString(string_t * tmp)
  {
      char *p, *p1;
@@ -39,19 +88,46 @@ static void unescapeString(string_t * tmp)
  
      for (p1=tmp->str; (p=strchr(p1, '\\')); p1 = p+1) 
      {
+       int nr=2;
+       int new=1;
         switch(p[1])
         {
-           case '\\': p[1] = '\\'; tmp->len--; break;
-           case '"': p[1] = '"'; tmp->len--; break;
-           case 'b': p[1] = '\b'; tmp->len--; break;
-           case 'f': p[1] = '\f'; tmp->len--; break;
-           case 'n': p[1] = '\n'; tmp->len--; break;
-           case 'r': p[1] = '\r'; tmp->len--; break;
-           case 't': p[1] = '\t'; tmp->len--; break;
+           case '\\': p[0] = '\\'; break;
+           case '"': p[0] = '"'; break;
+           case 'b': p[0] = '\b'; break;
+           case 'f': p[0] = '\f'; break;
+           case 'n': p[0] = '\n'; break;
+           case 'r': p[0] = '\r'; break;
+           case 't': p[0] = '\t'; break;
+           case 'x':  {
+               int num=0;
+               char*utf8;
+               while(strchr("0123456789abcdefABCDEF", p[nr])) {
+                   num <<= 4;
+                   if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0';
+                   if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10;
+                   if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10;
+                   nr++;
+               }
+               utf8 = getUTF8(num);
+               new = strlen(utf8);
+
+               memcpy(p, utf8, new); // do not copy the terminating zero
+               break;
+           }
             default:
                 continue;
         }
-       strcpy(p, p+1);
+       tmp->len -= (nr-new); 
+       {
+           int t;
+           char*to=p+new,*from=p+nr;
+           while(*from) {
+               *to = *from;
+               to++;
+               from++;
+           }
+       }
      }
  }
  
@@ -88,8 +164,8 @@ static void store(enum type_t type, int line, int column, char*text, int length)
             }
             prefix = 0;
         break;
-       case LABEL:
-           string_set2(&tmp, text, length-1);
+       case RAWDATA:
+           string_set2(&tmp, text+1/*:*/, length-5/*.end*/);
             token.text = (char*)mem_putstring(&strings, tmp);
         break;
         case COMMAND:
@@ -163,8 +239,8 @@ void handleInclude(char*text, int len)
  %x BINARY
  
  NAME    [a-zA-Z_./](-*[a-zA-Z0-9_./])*
-TWIP    ([0-9]+(\.([0-9]([05])?)?)?)
-NUMBER  [0-9]+(\.[0-9]*)?
+TWIP    (-?[0-9]+(\.([0-9]([05])?)?)?)
+NUMBER  -?[0-9]+(\.[0-9]*)?
  PERCENT         {NUMBER}%
  STRING   (\\.|[^\\"\n])*
  S       [ \n\r\t]
@@ -185,11 +261,11 @@ RVALUE     \"{STRING}\"|([^ \n\r\t]+)
  {NAME}{S}*-=               {s(ASSIGNMENT);prefix="<minus>";c();BEGIN(R);}
  {NAME}{S}*=                {s(ASSIGNMENT);c();BEGIN(R);}
  <R>{ /* values which appear only on the right-hand side of assignments, like: x=50% */
-    [^ \n\t\r]*                    {s(IDENTIFIER);c();BEGIN(0);}
+    [^ :\n\t\r]*                   {s(IDENTIFIER);c();BEGIN(0);}
  }
  \.include{S}.*\n                   {handleInclude(yytext, yyleng);}
  \.{NAME}                   {s(COMMAND);c();}
-{NAME}{S}*:                 {s(LABEL);c();}
+:([^.]|\.[^e]|\.e[^n]|\.en[^d]|[ \n\r\t])*\.end            {s(RAWDATA);c();}
  {NAME}                      {s(IDENTIFIER);c();}
  "["                        {c();BEGIN(BINARY);}
  {S}                        {c();}