removed Boolean, Number, uint, int from keywords
[swftools.git] / lib / as3 / tokenizer.lex
index 643588e..ee71c96 100644 (file)
@@ -129,6 +129,109 @@ void handleInclude(char*text, int len, char quotes)
     //BEGIN(INITIAL); keep context
 }
 
+string_t string_unescape(const char*in, int l)
+{
+    int len=0;
+    const char*s = in;
+    const char*end = &in[l];
+    char*n = (char*)malloc(l);
+    char*o = n;
+    while(s<end) {
+        if(*s!='\\') {
+            o[len++] = *s;
+            s++;
+            continue;
+        }
+        s++; //skip past '\'
+        if(s==end) syntaxerror("invalid \\ at end of string");
+
+        /* handle the various line endings (mac, dos, unix) */
+        if(*s=='\r') { 
+            s++; 
+            if(s==end) break;
+            if(*s=='\n') 
+                s++;
+            continue;
+        }
+        if(*s=='\n')  {
+            s++;
+            continue;
+        }
+        switch(*s) {
+           case '\\': o[len++] = '\\';s++; break;
+           case '"': o[len++] = '"';s++; break;
+           case 'b': o[len++] = '\b';s++; break;
+           case 'f': o[len++] = '\f';s++; break;
+           case 'n': o[len++] = '\n';s++; break;
+           case 'r': o[len++] = '\r';s++; break;
+           case 't': o[len++] = '\t';s++; break;
+            case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
+                unsigned int num=0;
+                int nr = 0;
+               while(strchr("01234567", *s) && nr<3 && s<end) {
+                    num <<= 3;
+                    num |= *s-'0';
+                    nr++;
+                    s++;
+                }
+                if(num>256) 
+                    syntaxerror("octal number out of range (0-255): %d", num);
+                o[len++] = num;
+                continue;
+            }
+           case 'x': case 'u': {
+               int max=2;
+               char bracket = 0;
+                char unicode = 0;
+               if(*s == 'u') {
+                   max = 6;
+                    unicode = 1;
+                }
+                s++;
+                if(s==end) syntaxerror("invalid \\u or \\x at end of string");
+               if(*s == '{')  {
+                    s++;
+                    if(s==end) syntaxerror("invalid \\u{ at end of string");
+                   bracket=1;
+               }
+               unsigned int num=0;
+                int nr = 0;
+               while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s<end) {
+                   num <<= 4;
+                   if(*s>='0' && *s<='9') num |= *s - '0';
+                   if(*s>='a' && *s<='f') num |= *s - 'a' + 10;
+                   if(*s>='A' && *s<='F') num |= *s - 'A' + 10;
+                    nr++;
+                   s++;
+               }
+               if(bracket) {
+                    if(*s=='}' && s<end) {
+                        s++;
+                    } else {
+                        syntaxerror("missing terminating '}'");
+                    }
+               }
+                if(unicode) {
+                    char*utf8 = getUTF8(num);
+                    while(*utf8) {
+                        o[len++] = *utf8++;
+                    }
+                } else {
+                    if(num>256) 
+                        syntaxerror("byte out of range (0-255): %d", num);
+                    o[len++] = num;
+                }
+               break;
+           }
+            default:
+                syntaxerror("unknown escape sequence: \"\\%c\"", *s);
+        }
+    }
+    string_t out = string_new(n, len);
+    o[len]=0;
+    return out; 
+}
+
 static void handleString(char*s, int len)
 {
     if(s[0]=='"') {
@@ -140,26 +243,30 @@ static void handleString(char*s, int len)
         s++;len-=2;
     }
     else syntaxerror("String incorrectly terminated");
-    s[len] = 0;
-    avm2_lval.string = s;
+
+    
+    avm2_lval.str = string_unescape(s, len);
 }
 
 
 char start_of_expression;
 
-static inline int m(int type)
+static inline int mkid(int type)
 {
     char*s = malloc(yyleng+1);
     memcpy(s, yytext, yyleng);
     s[yyleng]=0;
+    avm2_lval.id = s;
+    return type;
+}
 
-    NEW(token_t,t);
-    t->type = type;
-    t->text = s;
-    avm2_lval.token = t;
+static inline int m(int type)
+{
+    avm2_lval.token = type;
     return type;
 }
 
+
 static char numberbuf[64];
 static inline int handlenumber()
 {
@@ -229,6 +336,10 @@ void initialize_scanner();
 
 #define c() {countlines(yytext, yyleng);}
 
+//Boolean                      {c();return m(KW_BOOLEAN);}
+//int                          {c();return m(KW_INT);}
+//uint                         {c();return m(KW_UINT);}
+//Number                       {c();return m(KW_NUMBER);}
 %}
 
 %s REGEXPOK
@@ -236,11 +347,12 @@ void initialize_scanner();
 
 NAME    [a-zA-Z_][a-zA-Z0-9_\\]*
 
-NUMBER  -?[0-9]+(\.[0-9]*)?
+NUMBER  [0-9]+(\.[0-9]*)?|-?\.[0-9]+
+NUMBERWITHSIGN [+-]?({NUMBER})
 
 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
 S       [ \n\r\t]
-MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[\x00-\x1f])*[*]+[/]
+MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/]
 SINGLELINE_COMMENT \/\/[^\n]*\n
 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
 %%
@@ -256,6 +368,7 @@ REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
 
 <BEGINNING,REGEXPOK>{
 {REGEXP}                     {c(); BEGIN(INITIAL);return m(T_REGEXP);} 
+{NUMBERWITHSIGN}             {c(); BEGIN(INITIAL);return handlenumber();}
 }
 
 \xef\xbb\xbf                 {/* utf 8 bom */}
@@ -269,6 +382,7 @@ REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
 [&][&]                       {c();BEGIN(REGEXPOK);return m(T_ANDAND);}
 [|][|]                       {c();BEGIN(REGEXPOK);return m(T_OROR);}
 [!][=]                       {c();BEGIN(REGEXPOK);return m(T_NE);}
+[!][=][=]                    {c();BEGIN(REGEXPOK);return m(T_NEE);}
 [=][=][=]                    {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);}
 [=][=]                       {c();BEGIN(REGEXPOK);return m(T_EQEQ);}
 [>][=]                       {c();return m(T_GE);}
@@ -279,12 +393,14 @@ REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
 [-][=]                       {c();return m(T_MINUSBY);}
 [/][=]                       {c();return m(T_DIVBY);}
 [%][=]                       {c();return m(T_MODBY);}
+[*][=]                       {c();return m(T_MULBY);}
 [>][>][=]                    {c();return m(T_SHRBY);}
 [<][<][=]                    {c();return m(T_SHLBY);}
 [>][>][>][=]                 {c();return m(T_USHRBY);}
 [<][<]                       {c();return m(T_SHL);}
 [>][>][>]                    {c();return m(T_USHR);}
 [>][>]                       {c();return m(T_SHR);}
+\.\.\.                       {c();return m(T_DOTDOTDOT);}
 \.\.                         {c();return m(T_DOTDOT);}
 \.                           {c();return m('.');}
 ::                           {c();return m(T_COLONCOLON);}
@@ -298,15 +414,14 @@ internal                     {c();return m(KW_INTERNAL);}
 function                     {c();return m(KW_FUNCTION);}
 package                      {c();return m(KW_PACKAGE);}
 private                      {c();return m(KW_PRIVATE);}
-Boolean                      {c();return m(KW_BOOLEAN);}
 dynamic                      {c();return m(KW_DYNAMIC);}
 extends                      {c();return m(KW_EXTENDS);}
+delete                       {c();return m(KW_DELETE);}
 return                       {c();return m(KW_RETURN);}
 public                       {c();return m(KW_PUBLIC);}
 native                       {c();return m(KW_NATIVE);}
 static                       {c();return m(KW_STATIC);}
 import                       {c();return m(KW_IMPORT);}
-Number                       {c();return m(KW_NUMBER);}
 while                        {c();return m(KW_WHILE);}
 class                        {c();return m(KW_CLASS);}
 const                        {c();return m(KW_CONST);}
@@ -314,11 +429,9 @@ final                        {c();return m(KW_FINAL);}
 false                        {c();return m(KW_FALSE);}
 break                        {c();return m(KW_BREAK);}
 true                         {c();return m(KW_TRUE);}
-uint                         {c();return m(KW_UINT);}
 null                         {c();return m(KW_NULL);}
 else                         {c();return m(KW_ELSE);}
 use                          {c();return m(KW_USE);}
-int                          {c();return m(KW_INT);}
 new                          {c();return m(KW_NEW);}
 get                          {c();return m(KW_GET);}
 for                          {c();return m(KW_FOR);}
@@ -327,9 +440,9 @@ var                          {c();return m(KW_VAR);}
 is                           {c();return m(KW_IS) ;}
 if                           {c();return m(KW_IF) ;}
 as                           {c();return m(KW_AS);}
-{NAME}                       {c();BEGIN(INITIAL);return m(T_IDENTIFIER);}
+{NAME}                       {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);}
 
-[+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
+[+-\/*^~@$!%&\(=\[\]\{\}|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
 [\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
 
 .                           {char c1=yytext[0];
@@ -371,12 +484,12 @@ int yywrap()
 }
 
 static char mbuf[256];
-char*token2string(token_t*t)
+char*token2string(enum yytokentype nr, YYSTYPE v)
 {
-    int nr=t->type;
     if(nr==T_STRING)     return "<string>";
     else if(nr==T_INT)     return "<int>";
     else if(nr==T_UINT)     return "<uint>";
+    else if(nr==T_BYTE)     return "<byte>";
     else if(nr==T_FLOAT)     return "<float>";
     else if(nr==T_REGEXP)     return "REGEXP";
     else if(nr==T_EOF)        return "***END***";
@@ -418,12 +531,8 @@ char*token2string(token_t*t)
     else if(nr==KW_VAR)        return "var";
     else if(nr==KW_IS)         return "is";
     else if(nr==KW_AS)         return "as";
-    else if(nr==T_IDENTIFIER) {
-        if(strlen(t->text)>sizeof(mbuf)-1)
-            return "ID(...)";
-        sprintf(mbuf, "ID(%s)", t->text);
-        return mbuf;
-    } else {
+    else if(nr==T_IDENTIFIER)  return "ID";
+    else {
         sprintf(mbuf, "%d", nr);
         return mbuf;
     }