initial checkin
authorkramm <kramm>
Tue, 2 Dec 2008 17:03:09 +0000 (17:03 +0000)
committerkramm <kramm>
Tue, 2 Dec 2008 17:03:09 +0000 (17:03 +0000)
lib/as3/parser.y [new file with mode: 0644]
lib/as3/registry.c [new file with mode: 0644]
lib/as3/registry.h [new file with mode: 0644]
lib/as3/tokenizer.h [new file with mode: 0644]
lib/as3/tokenizer.lex [new file with mode: 0644]

diff --git a/lib/as3/parser.y b/lib/as3/parser.y
new file mode 100644 (file)
index 0000000..c104f55
--- /dev/null
@@ -0,0 +1,607 @@
+//%glr-parser
+//%expect-rr 1
+%error-verbose
+
+%token T_IDENTIFIER
+%token T_STRING
+%token T_REGEXP
+%token T_DOTDOT ".."
+%token T_COLONCOLON "::"
+%token T_GE ">="
+%token T_LE "<="
+%token T_EQEQ "=="
+%token T_PLUSPLUS "++"
+%token T_MINUSMINUS "--"
+%token T_IMPLEMENTS
+%token T_NAMESPACE
+%token T_PACKAGE
+%token T_PROTECTED
+%token T_PUBLIC
+%token T_PRIVATE
+%token T_UINT
+%token T_USE
+%token T_INTERNAL
+%token T_INT
+%token T_NEW
+%token T_NATIVE
+%token T_FUNCTION
+%token T_FOR
+%token T_CLASS
+%token T_CONST
+%token T_SET
+%token T_STATIC
+%token T_IMPORT
+%token T_INTERFACE
+%token T_NUMBER
+%token T_NULL
+%token T_FALSE
+%token T_TRUE
+%token T_BOOLEAN
+%token T_VAR
+%token T_AS
+%token T_IS
+%token T_DYNAMIC
+%token T_OVERRIDE
+%token T_FINAL
+%token T_GET
+%token T_EXTENDS
+
+
+%token T_EMPTY
+     
+%{
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include "abc.h"
+#include "pool.h"
+#include "files.h"
+#include "tokenizer.h"
+#include "registry.h"
+
+static int yyerror(char*s)
+{
+   syntaxerror("%s", s); 
+}
+static token_t* concat2(token_t* t1, token_t* t2)
+{
+    NEW(token_t,t);
+    int l1 = strlen(t1->text);
+    int l2 = strlen(t2->text);
+    t->text = malloc(l1+l2+1);
+    memcpy(t->text   , t1->text, l1);
+    memcpy(t->text+l1, t2->text, l2);
+    t->text[l1+l2] = 0;
+    return t;
+}
+static token_t* concat3(token_t* t1, token_t* t2, token_t* t3)
+{
+    NEW(token_t,t);
+    int l1 = strlen(t1->text);
+    int l2 = strlen(t2->text);
+    int l3 = strlen(t3->text);
+    t->text = malloc(l1+l2+l3+1);
+    memcpy(t->text   , t1->text, l1);
+    memcpy(t->text+l1, t2->text, l2);
+    memcpy(t->text+l1+l2, t3->text, l3);
+    t->text[l1+l2+l3] = 0;
+    return t;
+}
+
+typedef struct _import {
+    char*path;
+} import_t;
+
+DECLARE_LIST(import);
+
+typedef struct _state {
+    abc_file_t*file;
+    abc_script_t*init;
+
+    int level;
+
+    char*package;     
+    char*function;
+    import_list_t*imports;
+   
+    /* class data */
+    char*classname;
+    abc_class_t*cls;
+
+} state_t;
+
+static state_t* state = 0;
+
+DECLARE_LIST(state);
+
+static state_list_t*state_stack=0;
+
+static void initialize_state()
+{
+    NEW(state_t, s);
+    NEW(state_list_t, sl);
+    state_stack = sl;
+    state = sl->state = s;
+
+    state->file = abc_file_new();
+    state->level = 0;
+    
+    state->init = abc_initscript(state->file, 0, 0);
+    abc_method_body_t*m = state->init->method->body;
+    __ getlocal_0(m);
+    __ pushscope(m);
+}
+static void finalize_state()
+{
+    if(state->level) {
+        syntaxerror("unexpected end of file");
+    }
+    abc_method_body_t*m = state->init->method->body;
+    //__ popscope(m);
+    __ returnvoid(m);
+}
+
+static void new_state()
+{
+    NEW(state_t, s);
+    NEW(state_list_t, sl);
+    memcpy(s, state, sizeof(state_t)); //shallow copy
+    sl->next = state_stack;
+    sl->state = s;
+    state_stack = sl;
+    state = s;
+    state->level++;
+}
+static void old_state()
+{
+    if(!state_stack || !state_stack->next)
+        syntaxerror("invalid nesting");
+    state_t*oldstate = state;
+    state_list_t*old = state_stack;
+    state_stack = state_stack->next;
+    free(old);
+    state = state_stack->state;
+}
+
+static void startpackage(token_t*t) 
+{
+    if(state->package) {
+        syntaxerror("Packages can not be nested."); 
+    } 
+    new_state();
+    char*name = t?t->text:"";
+    printf("entering package \"%s\"\n", name);
+    state->package = name;
+}
+static void endpackage()
+{
+    printf("leaving package \"%s\"\n", state->package);
+    old_state();
+}
+
+static void startclass(token_t*modifiers, token_t*name, token_t*extends, token_t*implements)
+{
+    token_list_t*t;
+    if(state->cls) {
+        syntaxerror("inner classes now allowed"); 
+    }
+    new_state();
+    state->classname = name->text;
+    printf("entering class %s\n", name->text);
+    printf("  modifiers: ");for(t=modifiers->tokens;t;t=t->next) printf("%s ", t->token->text);printf("\n");
+    printf("  extends: %s\n", extends->text);
+    printf("  implements (%d): ", list_length(implements->tokens));for(t=implements->tokens;t;t=t->next) printf("%s ", t->token->text);printf("\n");
+
+    char public=0,internal=0,final=0,sealed=1;
+    for(t=modifiers->tokens;t;t=t->next) {
+        if(t->token->type == T_INTERNAL) {
+            /* the programmer is being explicit- 
+               being internal is the default anyway */
+            internal = 1;
+        } else if(t->token->type == T_PUBLIC) {
+            public = 1;
+        } else if(t->token->type == T_FINAL) {
+            final = 1;
+        } else {
+            syntaxerror("modifier \"%s\" not supported in class declaration", t->token->text);
+        }
+    }
+    if(public&&internal)
+        syntaxerror("public and internal not supported at the same time.");
+
+    /* create the class name, together with the proper attributes */
+    multiname_t* classname = 0;
+    if(!public && !state->package)
+        classname = multiname_new(namespace_new_private(current_filename), state->classname);
+    else if(!public && state->package)
+        classname = multiname_new(namespace_new_packageinternal(state->package), state->classname);
+    else if(state->package)
+        classname = multiname_new(namespace_new_package(state->package), state->classname);
+    else
+        syntaxerror("public classes only allowed inside a package");
+
+    /* try to find the superclass */
+    multiname_t* superclass = 0;
+    if(extends->type != T_EMPTY) {
+        superclass = registry_findclass(extends->text);
+    } else {
+        superclass = registry_getobjectclass();
+    }
+
+    state->cls = abc_class_new(state->file, classname, superclass);
+    if(final) abc_class_final(state->cls);
+    if(sealed) abc_class_sealed(state->cls);
+
+    for(t=implements->tokens;t;t=t->next) {
+        abc_class_add_interface(state->cls, registry_findclass(t->token->text));
+    }
+
+    /* now write the construction code for this class */
+    int slotindex = abc_initscript_addClassTrait(state->init, classname, state->cls);
+
+    abc_method_body_t*m = state->init->method->body;
+    __ getglobalscope(m);
+    multiname_t*s = superclass;
+    int count=0;
+    while(s) {
+        //TODO: invert
+        //TODO: take a look at the current scope stack, maybe 
+        //      we can re-use something
+        __ getlex2(m, s);
+        __ pushscope(m);
+        s = registry_getsuperclass(s);
+        count++;
+    }
+    /* TODO: if this is one of *our* classes, we can also 
+             do a getglobalscope/getslot <nr> (which references
+             the init function's slots) */
+    __ getlex2(m, superclass);
+    __ newclass(m,state->cls);
+
+    while(count--) {
+        __ popscope(m);
+    }
+    __ setslot(m, slotindex);
+}
+
+static void endclass()
+{
+    printf("leaving class %s\n", state->classname);
+    old_state();
+}
+static void addimport(token_t*t)
+{
+    NEW(import_t,i);
+    i->path = t->text;
+    list_append(state->imports, i);
+}
+static void print_imports()
+{
+    import_list_t*l = state->imports;
+    while(l) {
+        printf("  import %s\n", l->import->path);
+        l = l->next;
+    }
+}
+static void startfunction(token_t*ns, token_t*mod, token_t*getset, token_t*name,
+                          token_t*params, token_t*type)
+{
+    token_list_t*t;
+    new_state();
+    state->function = name->text;
+    printf("entering function %s\n", name->text);
+    if(ns)
+        printf("  namespace: %s\n", ns->text);
+    printf("  getset: %s\n", getset->text);
+    printf("  params: ");for(t=params->tokens;t;t=t->next) printf("%s ", t->token->text);printf("\n");
+    printf("  mod: ");for(t=mod->tokens;t;t=t->next) printf("%s ", t->token->text);printf("\n");
+    printf("  type: %s\n", type->text);
+    print_imports();
+}
+static void endfunction()
+{
+    printf("leaving function %s\n", state->function);
+    old_state();
+}
+static int newvariable(token_t*mod, token_t*varconst, token_t*name, token_t*type)
+{
+    token_list_t*t;
+    printf("defining new variable %s\n", name->text);
+    printf("  mod: ");for(t=mod->tokens;t;t=t->next) printf("%s ", t->token->text);printf("\n");
+    printf("  access: ");printf("%s\n", varconst->text);
+    printf("  type: ");printf("%s\n", type->text);
+}
+static token_t* empty_token()
+{
+    NEW(token_t,t);
+    t->type=T_EMPTY;
+    t->text=0;
+    return t;
+}
+
+void extend(token_t*list, token_t*add) {
+    list_append(list->tokens,add);
+    if(!list->text)
+        list->text = add->text;
+}
+void extend_s(token_t*list, char*seperator, token_t*add) {
+    list_append(list->tokens,add);
+    char*t1 = list->text;
+    char*t2 = seperator;
+    char*t3 = add->text;
+    int l1 = strlen(t1);
+    int l2 = strlen(t2);
+    int l3 = strlen(t3);
+    list->text = malloc(l1+l2+l3+1);
+    strcpy(list->text, t1);
+    strcpy(list->text+l1, t2);
+    strcpy(list->text+l1+l2, t3);
+    list->text[l1+l2+l3]=0;
+}
+
+%}
+%%
+
+PROGRAM: CODE PROGRAM
+PROGRAM: 
+
+//EMPTY: {
+//    token_t* t = malloc(sizeof(token_t));
+//    t->text = strdup("");
+//    t->type = T_EMPTY;
+//    $$ = t;
+//}
+
+CODE: CODE CODEPIECE {$$=$1;}
+CODE: CODEPIECE {$$=empty_token();}
+
+MAYBECODE: CODE
+MAYBECODE: 
+
+CODEPIECE: ';'
+CODEPIECE: VARIABLE_DECLARATION {$$=$1;}
+CODEPIECE: PACKAGE_DECLARATION
+CODEPIECE: IMPORT
+CODEPIECE: NAMESPACE_DECLARATION
+CODEPIECE: CLASS_DECLARATION
+CODEPIECE: INTERFACE_DECLARATION
+CODEPIECE: FUNCTION_DECLARATION
+CODEPIECE: EXPRESSION
+CODEPIECE: FOR
+CODEPIECE: USE
+CODEPIECE: ASSIGNMENT
+
+PACKAGE_DECLARATION : T_PACKAGE MULTILEVELIDENTIFIER '{' {startpackage($2)} MAYBECODE '}' {endpackage()}
+PACKAGE_DECLARATION : T_PACKAGE '{' {startpackage(0)} MAYBECODE '}' {endpackage()}
+
+IMPORT : T_IMPORT PACKAGESPEC {addimport($2);}
+
+TYPE : BUILTIN_TYPE | '*' | T_IDENTIFIER
+// TODO: do we need this? all it does it is clutter up our keyword space
+BUILTIN_TYPE : T_STRING
+BUILTIN_TYPE : T_NUMBER
+BUILTIN_TYPE : T_INT
+BUILTIN_TYPE : T_UINT
+BUILTIN_TYPE : T_BOOLEAN
+BUILTIN_TYPE : T_NULL
+
+MAYBETYPE: ':' TYPE {$$=$2;}
+MAYBETYPE:          {$$=empty_token();}
+
+//FUNCTION_HEADER:      NAMESPACE MODIFIERS T_FUNCTION GETSET T_IDENTIFIER '(' PARAMS ')' 
+FUNCTION_HEADER:      MODIFIERS T_FUNCTION GETSET T_IDENTIFIER '(' PARAMS ')' 
+                      MAYBETYPE
+FUNCTION_DECLARATION: MODIFIERS T_FUNCTION GETSET T_IDENTIFIER '(' PARAMS ')' 
+                      MAYBETYPE '{' {startfunction(0,$1,$3,$4,$6,$8)} MAYBECODE '}' {endfunction()}
+
+NAMESPACE_DECLARATION : MODIFIERS T_NAMESPACE T_IDENTIFIER
+NAMESPACE_DECLARATION : MODIFIERS T_NAMESPACE T_IDENTIFIER '=' T_IDENTIFIER
+NAMESPACE_DECLARATION : MODIFIERS T_NAMESPACE T_IDENTIFIER '=' T_STRING
+
+//NAMESPACE :              {$$=empty_token();}
+//NAMESPACE : T_IDENTIFIER {$$=$1};
+
+CONSTANT : T_NUMBER
+CONSTANT : T_STRING
+CONSTANT : T_TRUE | T_FALSE
+CONSTANT : T_NULL
+
+VAR : T_CONST | T_VAR
+
+// type annotation
+// TODO: NAMESPACE
+
+VARIABLE_DECLARATION : MODIFIERS VAR T_IDENTIFIER MAYBETYPE {
+    int i = newvariable($1,$2,$3,$4);
+}
+VARIABLE_DECLARATION : MODIFIERS VAR T_IDENTIFIER MAYBETYPE '=' EXPRESSION {
+    int i = newvariable($1,$2,$3,$4);
+    //setvariable(i,$6);
+}
+
+// operator prescendence:
+// http://livedocs.adobe.com/flash/9.0/main/wwhelp/wwhimpl/common/html/wwhelp.htm?context=LiveDocs_Parts&file=00000012.html
+EXPRESSION : EXPRESSION '<' EXPRESSION
+EXPRESSION : EXPRESSION '>' EXPRESSION
+EXPRESSION : EXPRESSION "<=" EXPRESSION
+EXPRESSION : EXPRESSION ">=" EXPRESSION
+EXPRESSION : EXPRESSION "==" EXPRESSION
+EXPRESSION : EXPRESSION '+' TERM
+EXPRESSION : EXPRESSION '-' TERM
+EXPRESSION : TERM
+EXPRESSION : '-' TERM
+TERM : TERM '*' FACTOR
+TERM : TERM '/' FACTOR
+TERM : EMOD
+EMOD: FACTOR "++"
+EMOD: FACTOR "--"
+EMOD: FACTOR
+FACTOR : '(' EXPRESSION ')'
+FACTOR : CONSTANT
+FACTOR : VARIABLE
+FACTOR : FUNCTIONCALL
+FACTOR : T_REGEXP
+FACTOR : NEW
+FACTOR : IS
+FACTOR : AS
+
+IS : EXPRESSION T_IS TYPE
+AS : EXPRESSION T_AS TYPE
+NEW : T_NEW T_IDENTIFIER | T_NEW T_IDENTIFIER '(' ')'
+NEW : T_NEW T_IDENTIFIER '(' EXPRESSIONLIST ')'
+
+FUNCTIONCALL : VARIABLE '(' EXPRESSIONLIST ')'
+FUNCTIONCALL : VARIABLE '(' ')'
+
+EXPRESSIONLIST : EXPRESSION
+EXPRESSIONLIST : EXPRESSION ',' EXPRESSIONLIST
+
+VARIABLE : T_IDENTIFIER
+VARIABLE : VARIABLE '.' T_IDENTIFIER
+VARIABLE : VARIABLE ".." T_IDENTIFIER // descendants
+VARIABLE : VARIABLE "::" VARIABLE // namespace declaration
+VARIABLE : VARIABLE "::" '[' EXPRESSION ']' // qualified expression
+VARIABLE : VARIABLE '[' EXPRESSION ']' // unqualified expression
+
+ASSIGNMENT :           VARIABLE           '=' EXPRESSION
+NEW_ASSIGNMENT : T_VAR VARIABLE MAYBETYPE '=' EXPRESSION
+
+FOR : T_FOR '(' NEW_ASSIGNMENT ';' EXPRESSION ';' EXPRESSION ')' '{' MAYBECODE '}'
+FOR : T_FOR '(' ASSIGNMENT     ';' EXPRESSION ';' EXPRESSION ')' '{' MAYBECODE '}'
+
+USE : T_USE T_NAMESPACE T_IDENTIFIER
+
+// keywords which also may be identifiers
+X_IDENTIFIER : T_IDENTIFIER | T_PACKAGE
+
+PACKAGESPEC : PACKAGESPEC '.' PACKAGESPEC {if($1->text[0]=='*') syntaxerror("wildcard in the middle of path");
+                                           $$ = concat3($1,$2,$3);}
+PACKAGESPEC : X_IDENTIFIER                {$$=$1;}
+PACKAGESPEC : '*'                         {$$=$1;}
+
+GETSET : T_GET {$$=$1;}
+       | T_SET {$$=$1;}
+       |       {$$=empty_token();}
+
+CLASS_DECLARATION : MODIFIERS T_CLASS T_IDENTIFIER EXTENDS IMPLEMENTS_LIST '{' {startclass($1,$3,$4,$5);} MAYBE_DECLARATION_LIST '}' {endclass();}
+INTERFACE_DECLARATION : MODIFIERS T_INTERFACE T_IDENTIFIER EXTENDS_LIST '{' MAYBE_IDECLARATION_LIST '}'
+
+PARAMS: {$$=empty_token();}
+PARAMS: PARAM_LIST {$$=$1;}
+PARAM_LIST: PARAM_LIST ',' PARAM {extend($1,$3);$$=$1;}
+PARAM_LIST: PARAM                {$$=empty_token();extend($$,$1);}
+PARAM:  T_IDENTIFIER ':' TYPE {$$=$1;}
+
+MODIFIERS : {$$=empty_token();}
+MODIFIERS : MODIFIER_LIST {$$=$1}
+MODIFIER_LIST : MODIFIER MODIFIER_LIST {extend($2,$1);$$=$2;}
+MODIFIER_LIST : MODIFIER               {$$=empty_token();extend($$,$1);}
+MODIFIER : T_PUBLIC | T_PRIVATE | T_PROTECTED | T_STATIC | T_DYNAMIC | T_FINAL | T_OVERRIDE | T_NATIVE | T_INTERNAL
+
+DECLARATION : VARIABLE_DECLARATION
+DECLARATION : FUNCTION_DECLARATION
+
+IDECLARATION : VARIABLE_DECLARATION
+IDECLARATION : FUNCTION_DECLARATION
+
+IMPLEMENTS_LIST : {$$=empty_token();}
+IMPLEMENTS_LIST : T_IMPLEMENTS MIDENTIFIER_LIST {$$=$2;}
+
+EXTENDS : {$$=empty_token();}
+EXTENDS : T_EXTENDS MULTILEVELIDENTIFIER {$$=$2;}
+
+EXTENDS_LIST : {$$=empty_token();}
+EXTENDS_LIST : T_EXTENDS MIDENTIFIER_LIST {$$=$2;}
+
+//IDENTIFIER_LIST : T_IDENTIFIER ',' IDENTIFIER_LIST {extend($3,$1);$$=$3;}
+//IDENTIFIER_LIST : T_IDENTIFIER                     {$$=empty_token();extend($$,$1);}
+
+MULTILEVELIDENTIFIER : T_IDENTIFIER                          {$$=empty_token();extend($$,$1);}
+MULTILEVELIDENTIFIER : MULTILEVELIDENTIFIER '.' X_IDENTIFIER {extend_s($1,".",$3);$$=$1;}
+
+MIDENTIFIER_LIST : MULTILEVELIDENTIFIER                      {$$=empty_token();extend($$,$1);}
+MIDENTIFIER_LIST : MIDENTIFIER_LIST ',' MULTILEVELIDENTIFIER {extend($1,$3);$$=$1;}
+
+MAYBE_DECLARATION_LIST : 
+MAYBE_DECLARATION_LIST : DECLARATION_LIST
+DECLARATION_LIST : DECLARATION
+DECLARATION_LIST : DECLARATION_LIST DECLARATION
+
+MAYBE_IDECLARATION_LIST : 
+MAYBE_IDECLARATION_LIST : IDECLARATION_LIST
+IDECLARATION_LIST : IDECLARATION
+IDECLARATION_LIST : IDECLARATION_LIST FUNCTION_HEADER
+
+// keywords: as break case catch class const continue default delete do else extends false finally for function if implements import in instanceof interface internal is native new null package private protected public return super switch this throw to true try typeof use var void while with
+// syntactic keywords: each get set namespace include dynamic final native override static
+// chapter 14
+
+
+%%
+
+#ifdef MAIN
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <memory.h>
+
+void test_lexer()
+{
+    while(1) {
+        int token = yylex();
+        if(token==T_EOF)
+            break;
+        if(token>=32 && token<256) {
+            printf("'%c'\n", token);
+        } else {
+            printf("%s\n", token2string(avm2_lval));
+        }
+    }
+}
+
+int main(int argn, char*argv[])
+{
+    //FILE*fi = fopen("/home/kramm/c/flex/text.as", "rb");
+    char*filename = "include.as";
+    char buf[512];
+    if(argn>1)
+        filename=argv[1];
+
+    add_include_dir(getcwd(buf, 512));
+    char*fullfilename = enter_file(filename, 0);
+
+    FILE*fi = fopen(fullfilename, "rb");
+    if(!fi) {
+        perror(fullfilename);
+        return 1;
+    }
+    initialize_state();
+    avm2_set_in(fi);
+
+    if(argn>2 && !strcmp(argv[2], "-lex")) {
+        test_lexer();
+        return 0;
+    }
+    avm2_parse();
+
+    finalize_state();
+
+    SWF swf;
+    memset(&swf, 0, sizeof(swf));
+    swf.fileVersion = 9;
+    swf.frameRate = 0x2500;
+    swf.movieSize.xmin = swf.movieSize.ymin = 0;
+    swf.movieSize.xmax = 1024*20;
+    swf.movieSize.ymax = 768*20;
+    TAG*tag = swf.firstTag = swf_InsertTag(0, ST_DOABC);
+    swf_WriteABC(tag, state->file);
+    swf_InsertTag(tag, ST_END);
+
+    int f = open("abc.swf",O_RDWR|O_CREAT|O_TRUNC|O_BINARY,0644);
+    swf_WriteSWF(f,&swf);
+    close(f);
+
+    return 0;
+}
+#endif
diff --git a/lib/as3/registry.c b/lib/as3/registry.c
new file mode 100644 (file)
index 0000000..0e3f3db
--- /dev/null
@@ -0,0 +1,26 @@
+#include "pool.h"
+#include "registry.h"
+
+static namespace_t static_empty_ns = {
+    ACCESS_PACKAGE, ""
+};
+static multiname_t static_object_class = {
+    QNAME, &static_empty_ns, 0, "Object"
+};
+
+multiname_t* registry_getobjectclass()
+{
+    return &static_object_class;
+}
+multiname_t* registry_findclass(const char*s)
+{
+    multiname_t*m = multiname_fromstring(s);
+    return m;
+}
+multiname_t* registry_getsuperclass(multiname_t*m)
+{
+    if(m->name && !strcmp(m->name, "Object"))
+            return 0;
+    return &static_object_class;
+}
+
diff --git a/lib/as3/registry.h b/lib/as3/registry.h
new file mode 100644 (file)
index 0000000..0419675
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef __abc_registry_h__
+#define __abc_registry_h__
+multiname_t* registry_findclass(const char*s);
+multiname_t* registry_getobjectclass();
+multiname_t* registry_getsuperclass(multiname_t*m);
+#endif
diff --git a/lib/as3/tokenizer.h b/lib/as3/tokenizer.h
new file mode 100644 (file)
index 0000000..fb034e7
--- /dev/null
@@ -0,0 +1,43 @@
+/* tokenizer.h
+
+   Copyright (c) 2008 Matthias Kramm <kramm@quiss.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef __parser_h__
+#define __parser_h__
+
+#include "../q.h"
+
+DECLARE(token);
+DECLARE_LIST(token);
+
+struct _token {
+    int type;
+    char*text;
+    token_list_t*tokens;
+};
+
+typedef token_t*tokenptr_t;
+
+#define YYSTYPE tokenptr_t
+
+#include "parser.tab.h"
+
+void syntaxerror(const char*format, ...);
+
+#define T_EOF 0
+
+#endif
diff --git a/lib/as3/tokenizer.lex b/lib/as3/tokenizer.lex
new file mode 100644 (file)
index 0000000..7c4c455
--- /dev/null
@@ -0,0 +1,289 @@
+%{
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include "../utf8.h"
+#include "tokenizer.h"
+#include "files.h"
+
+static void countlines(char*text, int len) {
+    int t;
+    for(t=0;t<len;t++) {
+       if(text[t]=='\n') {
+           current_line++;
+           current_column=0;
+       } else {
+           current_column++;
+       }
+    }
+}
+
+static int verbose = 1;
+static void dbg(const char*format, ...)
+{
+    char buf[1024];
+    int l;
+    va_list arglist;
+    if(!verbose)
+       return;
+    va_start(arglist, format);
+    vsprintf(buf, format, arglist);
+    va_end(arglist);
+    l = strlen(buf);
+    while(l && buf[l-1]=='\n') {
+       buf[l-1] = 0;
+       l--;
+    }
+    printf("(tokenizer) ");
+    printf("%s\n", buf);
+    fflush(stdout);
+}
+
+void syntaxerror(const char*format, ...)
+{
+    char buf[1024];
+    int l;
+    va_list arglist;
+    if(!verbose)
+       return;
+    va_start(arglist, format);
+    vsprintf(buf, format, arglist);
+    va_end(arglist);
+    fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename, current_line, current_column, buf);
+    fflush(stderr);
+    exit(1);
+}
+
+
+#ifndef YY_CURRENT_BUFFER
+#define YY_CURRENT_BUFFER yy_current_buffer
+#endif
+
+void handleInclude(char*text, int len, char quotes)
+{
+    char*filename = 0;
+    if(quotes) {
+        char*p1 = strchr(text, '"');
+        char*p2 = strrchr(text, '"');
+        if(!p1 || !p2 || p1==p2) {
+            syntaxerror("Invalid include in line %d\n", current_line);
+        }
+        *p2 = 0;
+        filename = strdup(p1+1);
+    } else {
+        int i1=0,i2=len;
+        // find start
+        while(!strchr(" \n\r\t", text[i1])) i1++;
+        // strip
+        while(strchr(" \n\r\t", text[i1])) i1++;
+        while(strchr(" \n\r\t", text[i2-1])) i2--;
+        if(i2!=len) text[i2]=0;
+        filename = strdup(&text[i1]);
+    }
+    
+    char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
+    yyin = fopen(fullfilename, "rb");
+    if (!yyin) {
+       syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
+    }
+
+    yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
+    //BEGIN(INITIAL); keep context
+}
+
+static inline int m(int type)
+{
+    char*s = malloc(yyleng+1);
+    memcpy(s, yytext, yyleng);
+    s[yyleng]=0;
+
+    NEW(token_t,t);
+    t->type = type;
+    t->text = s;
+    avm2_lval = t;
+    return type;
+}
+
+void initialize_scanner();
+#define YY_USER_INIT initialize_scanner();
+
+#define c() {countlines(yytext, yyleng);}
+
+%}
+
+%s REGEXPOK
+%s BEGINNING
+
+NAME    [a-zA-Z_][a-zA-Z0-9_\\]*
+NUMBER  -?[0-9]+(\.[0-9]*)?
+STRING   ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
+S       [ \n\r\t]
+MULTILINE_COMMENT [/][*]([*][^/]|[^*]|[\x00-\x31])*[*]+[/]
+SINGLELINE_COMMENT \/\/[^\n]*\n
+REGEXP   [/]([^/\n]|\\[/])*[/][a-zA-Z]*
+%%
+
+
+{SINGLELINE_COMMENT}         {c(); /* single line comment */}
+{MULTILINE_COMMENT}          {c(); /* multi line comment */}
+[/][*]                       {syntaxerror("syntax error: unterminated comment", yytext);}
+
+^include{S}+{STRING}{S}*/\n    {c();handleInclude(yytext, yyleng, 1);}
+^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n    {c();handleInclude(yytext, yyleng, 0);}
+{STRING}                     {c(); return m(T_STRING);BEGIN(INITIAL);}
+
+<BEGINNING,REGEXPOK>{
+{REGEXP}                     {c(); return m(T_REGEXP);BEGIN(INITIAL);} 
+}
+
+\xef\xbb\xbf                 {/* utf 8 bom */}
+{S}                          {c();}
+
+{NUMBER}                     {c();return m(T_NUMBER);BEGIN(INITIAL);}
+[>][=]                       {return m(T_GE);}
+[<][=]                       {return m(T_LE);}
+[-][-]                       {return m(T_MINUSMINUS);BEGIN(INITIAL);}
+[+][+]                       {return m(T_PLUSPLUS);BEGIN(INITIAL);}
+==                           {return m(T_EQEQ);BEGIN(REGEXPOK);}
+\.\.                         {return m(T_DOTDOT);}
+\.                           {return m('.');}
+::                           {return m(T_COLONCOLON);}
+:                            {return m(':');}
+implements                   {return m(T_IMPLEMENTS);}
+interface                    {return m(T_INTERFACE);}
+namespace                    {return m(T_NAMESPACE);}
+protected                    {return m(T_PROTECTED);}
+override                     {return m(T_OVERRIDE);}
+internal                     {return m(T_INTERNAL);}
+function                     {return m(T_FUNCTION);}
+package                      {return m(T_PACKAGE);}
+private                      {return m(T_PRIVATE);}
+Boolean                      {return m(T_BOOLEAN);}
+dynamic                      {return m(T_DYNAMIC);}
+extends                      {return m(T_EXTENDS);}
+public                       {return m(T_PUBLIC);}
+native                       {return m(T_NATIVE);}
+static                       {return m(T_STATIC);}
+import                       {return m(T_IMPORT);}
+number                       {return m(T_NUMBER);}
+class                        {return m(T_CLASS);}
+const                        {return m(T_CONST);}
+final                        {return m(T_FINAL);}
+False                        {return m(T_FALSE);}
+True                         {return m(T_TRUE);}
+uint                         {return m(T_UINT);}
+null                         {return m(T_NULL);}
+use                          {return m(T_USE);}
+int                          {return m(T_INT);}
+new                          {return m(T_NEW);}
+get                          {return m(T_GET);}
+for                          {return m(T_FOR);}
+set                          {return m(T_SET);}
+var                          {return m(T_VAR);}
+is                           {return m(T_IS) ;}
+as                           {return m(T_AS);}
+{NAME}                       {c();BEGIN(INITIAL);return m(T_IDENTIFIER);}
+
+[+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
+[\)\]]                            {c();BEGIN(INITIAL);return m(yytext[0]);}
+
+.                           {char c1=yytext[0];
+                              char buf[128];
+                              buf[0] = yytext[0];
+                              int t;
+                              for(t=1;t<128;t++) {
+                                 char c = buf[t]=input();
+                                 if(c=='\n' || c==EOF)  {
+                                      buf[t] = 0;
+                                     break;
+                                  }
+                             }
+                             if(c1>='0' && c1<='9')
+                                 syntaxerror("syntax error: %s (identifiers must not start with a digit)");
+                              else
+                                 syntaxerror("syntax error: %s", buf);
+                             printf("\n");
+                             exit(1);
+                             yyterminate();
+                            }
+<<EOF>>                             {c();
+                              void*b = leave_file();
+                             if (!b) {
+                                yyterminate();
+                                 yy_delete_buffer(YY_CURRENT_BUFFER);
+                                 return m(T_EOF);
+                             } else {
+                                 yy_delete_buffer(YY_CURRENT_BUFFER);
+                                 yy_switch_to_buffer(b);
+                             }
+                            }
+
+%%
+
+int yywrap()
+{
+    return 1;
+}
+
+static char mbuf[256];
+char*token2string(token_t*t)
+{
+    int nr=t->type;
+    if(nr==T_STRING)     return "STRING";
+    else if(nr==T_NUMBER)     return "NUMBER";
+    else if(nr==T_REGEXP)     return "REGEXP";
+    else if(nr==T_EOF)        return "***END***";
+    else if(nr==T_GE)         return ">=";
+    else if(nr==T_LE)         return "<=";
+    else if(nr==T_MINUSMINUS) return "--";
+    else if(nr==T_PLUSPLUS)   return "++";
+    else if(nr==T_IMPLEMENTS) return "implements";
+    else if(nr==T_INTERFACE)  return "interface";
+    else if(nr==T_NAMESPACE)  return "namespace";
+    else if(nr==T_PROTECTED)  return "protected";
+    else if(nr==T_OVERRIDE)   return "override";
+    else if(nr==T_INTERNAL)   return "internal";
+    else if(nr==T_FUNCTION)   return "function";
+    else if(nr==T_PACKAGE)    return "package";
+    else if(nr==T_PRIVATE)    return "private";
+    else if(nr==T_BOOLEAN)    return "Boolean";
+    else if(nr==T_DYNAMIC)    return "dynamic";
+    else if(nr==T_EXTENDS)    return "extends";
+    else if(nr==T_PUBLIC)     return "public";
+    else if(nr==T_NATIVE)     return "native";
+    else if(nr==T_STATIC)     return "static";
+    else if(nr==T_IMPORT)     return "import";
+    else if(nr==T_NUMBER)     return "number";
+    else if(nr==T_CLASS)      return "class";
+    else if(nr==T_CONST)      return "const";
+    else if(nr==T_FINAL)      return "final";
+    else if(nr==T_FALSE)      return "False";
+    else if(nr==T_TRUE)       return "True";
+    else if(nr==T_UINT)       return "uint";
+    else if(nr==T_NULL)       return "null";
+    else if(nr==T_USE)        return "use";
+    else if(nr==T_INT)        return "int";
+    else if(nr==T_NEW)        return "new";
+    else if(nr==T_GET)        return "get";
+    else if(nr==T_FOR)        return "for";
+    else if(nr==T_SET)        return "set";
+    else if(nr==T_VAR)        return "var";
+    else if(nr==T_IS)         return "is";
+    else if(nr==T_AS)         return "as";
+    else if(nr==T_IDENTIFIER) {
+        if(strlen(t->text)>sizeof(mbuf)-1)
+            return "ID(...)";
+        sprintf(mbuf, "ID(%s)", t->text);
+        return mbuf;
+    } else {
+        sprintf(mbuf, "%d", nr);
+        return mbuf;
+    }
+}
+
+void initialize_scanner()
+{
+    BEGIN(BEGINNING);
+}
+