added rollbacking functionality to trier (for namespaces)
[swftools.git] / lib / as3 / tokenizer.lex
index d9eb6da..732c97f 100644 (file)
@@ -156,7 +156,7 @@ void handleInclude(char*text, int len, char quotes)
         filename = strdup(&text[i1]);
     }
     
-    char*fullfilename = find_file(filename);
+    char*fullfilename = find_file(filename, 1);
     enter_file2(filename, fullfilename, YY_CURRENT_BUFFER);
     yyin = fopen(fullfilename, "rb");
     if (!yyin) {
@@ -259,8 +259,15 @@ static int do_unescape(const char*s, const char*end, char*n)
                 }
                break;
            }
-            default:
-                syntaxerror("unknown escape sequence: \"\\%c\"", *s);
+            default: {
+               if(o) {
+                    o[len+0] = '\\';
+                    o[len+1] = *s;
+                }
+                s++;
+                len+=2;
+                break;
+            }
         }
     }
     if(o) o[len]=0;
@@ -288,12 +295,6 @@ static void handleCData(char*s, int len)
 
 static void handleString(char*s, int len)
 {
-    if(as3_pass < 2) {
-        // don't bother decoding strings in pass 1
-        memset(&a3_lval, 0, sizeof(a3_lval));
-        return;
-    }
-
     if(s[0]=='"') {
         if(s[len-1]!='"') syntaxerror("String doesn't end with '\"'");
         s++;len-=2;
@@ -303,7 +304,6 @@ static void handleString(char*s, int len)
         s++;len-=2;
     }
     else syntaxerror("String incorrectly terminated");
-
     
     a3_lval.str = string_unescape(s, len);
 }
@@ -369,14 +369,14 @@ static inline int handleint()
 
     char*max = l?"1073741824":"2147483647";
     if(yyleng-l>10) {
-        as3_warning("integer overflow: %s (converted to Number)", s);
+        as3_softwarning("integer overflow: %s (converted to Number)", s);
         return handlefloat();
     }
     if(yyleng-l==10) {
         int t;
         for(t=0;t<yyleng-l;t++) {
             if(yytext[l+t]>max[t]) {
-                as3_warning("integer overflow: %s (converted to Number)", s);
+                as3_softwarning("integer overflow: %s (converted to Number)", s);
                 return handlefloat();
             }
             else if(yytext[l+t]<max[t])
@@ -444,12 +444,12 @@ static inline int handlehex()
     }
     if(l && v>1073741824) {
         char*s = nrbuf();
-        as3_warning("signed integer overflow: %s (converted to Number)", s);
+        as3_softwarning("signed integer overflow: %s (converted to Number)", s);
         return setfloat(v);
     }
     if(!l && v>2147483647) {
         char*s = nrbuf();
-        as3_warning("unsigned integer overflow: %s (converted to Number)", s);
+        as3_softwarning("unsigned integer overflow: %s (converted to Number)", s);
         return setfloat(v);
     }
 
@@ -516,14 +516,18 @@ static inline void c() {
     current_column+=yyleng;
 }
 
-static trie_t*namespaces = 0;
-void tokenizer_register_namespace(const char*id)
+trie_t*active_namespaces = 0;
+/*void tokenizer_register_namespace(const char*id)
 {
-    trie_put(&namespaces, id);
+    trie_put(namespaces, id, 0);
 }
+void tokenizer_unregister_namespace(const char*id)
+{
+    trie_remove(namespaces, id);
+}*/
 static inline tokenizer_is_namespace(const char*id)
 {
-    return trie_lookup(namespaces, id);
+    return trie_contains(active_namespaces, id);
 }
 
 static inline int handleIdentifier()
@@ -566,7 +570,7 @@ CDATA    <!\[CDATA\[([^]]|\][^]]|\]\][^>])*\]*\]\]\>
 STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
 S       [ \n\r\t]
 MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/]
-SINGLELINE_COMMENT \/\/[^\n]*\n
+SINGLELINE_COMMENT \/\/[^\n\r]*[\n\r]
 REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
 %%
 
@@ -588,6 +592,9 @@ REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
 {FLOATWITHSIGN}/{_}          {c(); BEGIN(INITIAL);return handlefloat();}
 }
 
+<REGEXPOK>[\{]               {c(); BEGIN(REGEXPOK);return m(T_DICTSTART);}
+[\{]                         {c(); BEGIN(INITIAL); return m('{');}
+
 \xef\xbb\xbf                 {/* utf 8 bom */}
 {S}                          {l();}
 
@@ -599,14 +606,14 @@ REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
 3rr0r                        {/* for debugging: generates a tokenizer-level error */
                               syntaxerror("3rr0r");}
 
-{NAME}{S}*:{S}*for/{_}        {l();handleLabel(yytext, yyleng-3);return T_FOR;}
-{NAME}{S}*:{S}*do/{_}         {l();handleLabel(yytext, yyleng-2);return T_DO;}
-{NAME}{S}*:{S}*while/{_}      {l();handleLabel(yytext, yyleng-5);return T_WHILE;}
-{NAME}{S}*:{S}*switch/{_}     {l();handleLabel(yytext, yyleng-6);return T_SWITCH;}
-for                          {c();a3_lval.id="";return T_FOR;}
-do                           {c();a3_lval.id="";return T_DO;}
-while                        {c();a3_lval.id="";return T_WHILE;}
-switch                       {c();a3_lval.id="";return T_SWITCH;}
+{NAME}{S}*:{S}*for/{_}       {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-3);return T_FOR;}
+{NAME}{S}*:{S}*do/{_}        {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-2);return T_DO;}
+{NAME}{S}*:{S}*while/{_}     {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-5);return T_WHILE;}
+{NAME}{S}*:{S}*switch/{_}    {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-6);return T_SWITCH;}
+for                          {c();BEGIN(INITIAL);a3_lval.id="";return T_FOR;}
+do                           {c();BEGIN(INITIAL);a3_lval.id="";return T_DO;}
+while                        {c();BEGIN(INITIAL);a3_lval.id="";return T_WHILE;}
+switch                       {c();BEGIN(INITIAL);a3_lval.id="";return T_SWITCH;}
 
 [&][&]                       {c();BEGIN(REGEXPOK);return m(T_ANDAND);}
 [|][|]                       {c();BEGIN(REGEXPOK);return m(T_OROR);}
@@ -614,79 +621,80 @@ switch                       {c();a3_lval.id="";return T_SWITCH;}
 [!][=][=]                    {c();BEGIN(REGEXPOK);return m(T_NEE);}
 [=][=][=]                    {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);}
 [=][=]                       {c();BEGIN(REGEXPOK);return m(T_EQEQ);}
-[>][=]                       {c();return m(T_GE);}
-[<][=]                       {c();return m(T_LE);}
+[>][=]                       {c();BEGIN(REGEXPOK);return m(T_GE);}
+[<][=]                       {c();BEGIN(REGEXPOK);return m(T_LE);}
 [-][-]                       {c();BEGIN(INITIAL);return m(T_MINUSMINUS);}
 [+][+]                       {c();BEGIN(INITIAL);return m(T_PLUSPLUS);}
-[+][=]                       {c();return m(T_PLUSBY);}
-[-][=]                       {c();return m(T_MINUSBY);}
-[/][=]                       {c();return m(T_DIVBY);}
-[%][=]                       {c();return m(T_MODBY);}
-[*][=]                       {c();return m(T_MULBY);}
-[|][=]                       {c();return m(T_ORBY);}
-[>][>][=]                    {c();return m(T_SHRBY);}
-[<][<][=]                    {c();return m(T_SHLBY);}
-[>][>][>][=]                 {c();return m(T_USHRBY);}
-[<][<]                       {c();return m(T_SHL);}
-[>][>][>]                    {c();return m(T_USHR);}
-[>][>]                       {c();return m(T_SHR);}
-\.\.\.                       {c();return m(T_DOTDOTDOT);}
-\.\.                         {c();return m(T_DOTDOT);}
-\.                           {c();return m('.');}
-::                           {c();return m(T_COLONCOLON);}
-:                            {c();return m(':');}
-instanceof                   {c();return m(KW_INSTANCEOF);}
-implements                   {c();return m(KW_IMPLEMENTS);}
-interface                    {c();return m(KW_INTERFACE);}
-namespace                    {c();return m(KW_NAMESPACE);}
-protected                    {c();return m(KW_PROTECTED);}
-undefined                    {c();return m(KW_UNDEFINED);}
-continue                     {c();return m(KW_CONTINUE);}
-override                     {c();return m(KW_OVERRIDE);}
-internal                     {c();return m(KW_INTERNAL);}
-function                     {c();return m(KW_FUNCTION);}
-finally                      {c();return m(KW_FINALLY);}
-default                      {c();return m(KW_DEFAULT);}
-package                      {c();return m(KW_PACKAGE);}
-private                      {c();return m(KW_PRIVATE);}
-dynamic                      {c();return m(KW_DYNAMIC);}
-extends                      {c();return m(KW_EXTENDS);}
-delete                       {c();return m(KW_DELETE);}
-return                       {c();return m(KW_RETURN);}
-public                       {c();return m(KW_PUBLIC);}
-native                       {c();return m(KW_NATIVE);}
-static                       {c();return m(KW_STATIC);}
-import                       {c();return m(KW_IMPORT);}
-typeof                       {c();return m(KW_TYPEOF);}
-throw                        {c();return m(KW_THROW);}
-class                        {c();return m(KW_CLASS);}
-const                        {c();return m(KW_CONST);}
-catch                        {c();return m(KW_CATCH);}
-final                        {c();return m(KW_FINAL);}
-false                        {c();return m(KW_FALSE);}
-break                        {c();return m(KW_BREAK);}
-super                        {c();return m(KW_SUPER);}
-each                         {c();return m(KW_EACH);}
-void                         {c();return m(KW_VOID);}
-true                         {c();return m(KW_TRUE);}
-null                         {c();return m(KW_NULL);}
-else                         {c();return m(KW_ELSE);}
-case                         {c();return m(KW_CASE);}
-with                         {c();return m(KW_WITH);}
-use                          {c();return m(KW_USE);}
-new                          {c();return m(KW_NEW);}
-get                          {c();return m(KW_GET);}
-set                          {c();return m(KW_SET);}
-var                          {c();return m(KW_VAR);}
-try                          {c();return m(KW_TRY);}
-is                           {c();return m(KW_IS) ;}
-in                           {c();return m(KW_IN) ;}
-if                           {c();return m(KW_IF) ;}
-as                           {c();return m(KW_AS);}
-{NAME}                       {c();BEGIN(INITIAL);return handleIdentifier();}
-
-[\]\}]                       {c();BEGIN(INITIAL);return m(yytext[0]);}
-[+-\/*^~@$!%&\(=\[\{|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
+[+][=]                       {c();BEGIN(REGEXPOK);return m(T_PLUSBY);}
+[\^][=]                      {c();BEGIN(REGEXPOK);return m(T_XORBY);}
+[-][=]                       {c();BEGIN(REGEXPOK);return m(T_MINUSBY);}
+[/][=]                       {c();BEGIN(REGEXPOK);return m(T_DIVBY);}
+[%][=]                       {c();BEGIN(REGEXPOK);return m(T_MODBY);}
+[*][=]                       {c();BEGIN(REGEXPOK);return m(T_MULBY);}
+[|][=]                       {c();BEGIN(REGEXPOK);return m(T_ORBY);}
+[>][>][=]                    {c();BEGIN(REGEXPOK);return m(T_SHRBY);}
+[<][<][=]                    {c();BEGIN(REGEXPOK);return m(T_SHLBY);}
+[>][>][>][=]                 {c();BEGIN(REGEXPOK);return m(T_USHRBY);}
+[<][<]                       {c();BEGIN(REGEXPOK);return m(T_SHL);}
+[>][>][>]                    {c();BEGIN(REGEXPOK);return m(T_USHR);}
+[>][>]                       {c();BEGIN(REGEXPOK);return m(T_SHR);}
+\.\.\.                       {c();BEGIN(REGEXPOK);return m(T_DOTDOTDOT);}
+\.\.                         {c();BEGIN(REGEXPOK);return m(T_DOTDOT);}
+\.                           {c();BEGIN(REGEXPOK);return m('.');}
+::                           {c();BEGIN(REGEXPOK);return m(T_COLONCOLON);}
+:                            {c();BEGIN(REGEXPOK);return m(':');}
+instanceof                   {c();BEGIN(REGEXPOK);return m(KW_INSTANCEOF);}
+implements                   {c();BEGIN(REGEXPOK);return m(KW_IMPLEMENTS);}
+interface                    {c();BEGIN(INITIAL);return m(KW_INTERFACE);}
+namespace                    {c();BEGIN(INITIAL);return m(KW_NAMESPACE);}
+protected                    {c();BEGIN(INITIAL);return m(KW_PROTECTED);}
+undefined                    {c();BEGIN(INITIAL);return m(KW_UNDEFINED);}
+continue                     {c();BEGIN(INITIAL);return m(KW_CONTINUE);}
+override                     {c();BEGIN(INITIAL);return m(KW_OVERRIDE);}
+internal                     {c();BEGIN(INITIAL);return m(KW_INTERNAL);}
+function                     {c();BEGIN(INITIAL);return m(KW_FUNCTION);}
+finally                      {c();BEGIN(INITIAL);return m(KW_FINALLY);}
+default                      {c();BEGIN(INITIAL);return m(KW_DEFAULT);}
+package                      {c();BEGIN(INITIAL);return m(KW_PACKAGE);}
+private                      {c();BEGIN(INITIAL);return m(KW_PRIVATE);}
+dynamic                      {c();BEGIN(INITIAL);return m(KW_DYNAMIC);}
+extends                      {c();BEGIN(INITIAL);return m(KW_EXTENDS);}
+delete                       {c();BEGIN(REGEXPOK);return m(KW_DELETE);}
+return                       {c();BEGIN(REGEXPOK);return m(KW_RETURN);}
+public                       {c();BEGIN(INITIAL);return m(KW_PUBLIC);}
+native                       {c();BEGIN(INITIAL);return m(KW_NATIVE);}
+static                       {c();BEGIN(INITIAL);return m(KW_STATIC);}
+import                       {c();BEGIN(REGEXPOK);return m(KW_IMPORT);}
+typeof                       {c();BEGIN(REGEXPOK);return m(KW_TYPEOF);}
+throw                        {c();BEGIN(REGEXPOK);return m(KW_THROW);}
+class                        {c();BEGIN(INITIAL);return m(KW_CLASS);}
+const                        {c();BEGIN(INITIAL);return m(KW_CONST);}
+catch                        {c();BEGIN(INITIAL);return m(KW_CATCH);}
+final                        {c();BEGIN(INITIAL);return m(KW_FINAL);}
+false                        {c();BEGIN(INITIAL);return m(KW_FALSE);}
+break                        {c();BEGIN(INITIAL);return m(KW_BREAK);}
+super                        {c();BEGIN(INITIAL);return m(KW_SUPER);}
+each                         {c();BEGIN(INITIAL);return m(KW_EACH);}
+void                         {c();BEGIN(INITIAL);return m(KW_VOID);}
+true                         {c();BEGIN(INITIAL);return m(KW_TRUE);}
+null                         {c();BEGIN(INITIAL);return m(KW_NULL);}
+else                         {c();BEGIN(INITIAL);return m(KW_ELSE);}
+case                         {c();BEGIN(REGEXPOK);return m(KW_CASE);}
+with                         {c();BEGIN(REGEXPOK);return m(KW_WITH);}
+use                          {c();BEGIN(REGEXPOK);return m(KW_USE);}
+new                          {c();BEGIN(REGEXPOK);return m(KW_NEW);}
+get                          {c();BEGIN(INITIAL);return m(KW_GET);}
+set                          {c();BEGIN(INITIAL);return m(KW_SET);}
+var                          {c();BEGIN(INITIAL);return m(KW_VAR);}
+try                          {c();BEGIN(INITIAL);return m(KW_TRY);}
+is                           {c();BEGIN(REGEXPOK);return m(KW_IS) ;}
+in                           {c();BEGIN(REGEXPOK);return m(KW_IN) ;}
+if                           {c();BEGIN(INITIAL);return m(KW_IF) ;}
+as                           {c();BEGIN(REGEXPOK);return m(KW_AS);}
+$?{NAME}                       {c();BEGIN(INITIAL);return handleIdentifier();}
+
+[\]\}*]                       {c();BEGIN(INITIAL);return m(yytext[0]);}
+[+-\/^~@$!%&\(=\[|?:;,<>]   {c();BEGIN(REGEXPOK);return m(yytext[0]);}
 [\)\]]                           {c();BEGIN(INITIAL);return m(yytext[0]);}
 
 .                           {/* ERROR */
@@ -731,12 +739,27 @@ int yywrap()
 static char mbuf[256];
 char*token2string(enum yytokentype nr, YYSTYPE v)
 {
-    if(nr==T_STRING)     return "<string>";
+    if(nr==T_STRING) {
+        char*s = malloc(v.str.len+10);
+        strcpy(s, "<string>");
+        memcpy(s+8, v.str.str, v.str.len);
+        sprintf(s+8+v.str.len, " (%d bytes)", v.str.len);
+        return s;
+    }
+    else if(nr==T_REGEXP) {
+        char*s = malloc(strlen(v.regexp.pattern)+10);
+        sprintf(s, "<regexp>%s", v.regexp.pattern);
+        return s;
+    }
+    else if(nr==T_IDENTIFIER) {
+        char*s = malloc(strlen(v.id)+10);
+        sprintf(s, "<ID>%s", v.id);
+        return s;
+    }
     else if(nr==T_INT)     return "<int>";
     else if(nr==T_UINT)     return "<uint>";
     else if(nr==T_BYTE)     return "<byte>";
     else if(nr==T_FLOAT)     return "<float>";
-    else if(nr==T_REGEXP)     return "REGEXP";
     else if(nr==T_EOF)        return "***END***";
     else if(nr==T_GE)         return ">=";
     else if(nr==T_LE)         return "<=";
@@ -775,7 +798,6 @@ char*token2string(enum yytokentype nr, YYSTYPE v)
     else if(nr==KW_VAR)        return "var";
     else if(nr==KW_IS)         return "is";
     else if(nr==KW_AS)         return "as";
-    else if(nr==T_IDENTIFIER)  return "ID";
     else {
         sprintf(mbuf, "%d", nr);
         return mbuf;