#include <stdio.h>
#include <stdarg.h>
#include "../utf8.h"
+#include "common.h"
#include "tokenizer.h"
#include "files.h"
-int as3_pass = 0;
-int as3_verbosity = 1;
unsigned int as3_tokencount = 0;
-void as3_error(const char*format, ...)
-{
- char buf[1024];
- int l;
- va_list arglist;
- if(as3_verbosity<0)
- exit(1);
- va_start(arglist, format);
- vsprintf(buf, format, arglist);
- va_end(arglist);
- fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename_short, current_line, current_column, buf);
- fflush(stderr);
- exit(1);
-}
-void as3_warning(const char*format, ...)
-{
- char buf[1024];
- int l;
- va_list arglist;
- if(as3_verbosity<1)
- return;
- va_start(arglist, format);
- vsprintf(buf, format, arglist);
- va_end(arglist);
- fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf);
- fflush(stderr);
-}
-void as3_softwarning(const char*format, ...)
-{
- char buf[1024];
- int l;
- va_list arglist;
- if(as3_verbosity<2)
- return;
- va_start(arglist, format);
- vsprintf(buf, format, arglist);
- va_end(arglist);
- fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf);
- fflush(stderr);
-}
static void dbg(const char*format, ...)
{
char buf[1024];
if(as3_verbosity<3)
return;
va_start(arglist, format);
- vsprintf(buf, format, arglist);
+ vsnprintf(buf, sizeof(buf)-1, format, arglist);
va_end(arglist);
l = strlen(buf);
while(l && buf[l-1]=='\n') {
as3_in = 0;
}
+//#undef BEGIN
+//#define BEGIN(x) {(yy_start) = 1 + 2 *x;dbg("entering state %d", x);}
+
#define YY_INPUT(buf,result,max_size) { \
if(!as3_buffer) { \
errno=0; \
}
yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
- //BEGIN(INITIAL); keep context
+ //BEGIN(DEFAULT); keep context
}
static int do_unescape(const char*s, const char*end, char*n)
a3_lval.str.str = strdup_n(a3_lval.str.str, a3_lval.str.len);
}
-static void handleString(char*s, int len)
+static void handleRaw(char*s, int len)
{
- if(as3_pass < 2) {
- // don't bother decoding strings in pass 1
- memset(&a3_lval, 0, sizeof(a3_lval));
- return;
- }
+ a3_lval.str.len = len;
+ a3_lval.str.str = strdup_n(s, a3_lval.str.len);
+}
+static void handleString(char*s, int len)
+{
if(s[0]=='"') {
if(s[len-1]!='"') syntaxerror("String doesn't end with '\"'");
s++;len-=2;
s++;len-=2;
}
else syntaxerror("String incorrectly terminated");
-
a3_lval.str = string_unescape(s, len);
}
return type;
}
-
static char numberbuf[64];
static char*nrbuf()
{
static inline int setint(int v)
{
a3_lval.number_int = v;
- if(v>-128)
- return T_BYTE;
- else if(v>=-32768)
- return T_SHORT;
- else
- return T_INT;
-}
-static inline int setuint(unsigned int v)
-{
- a3_lval.number_uint = v;
- if(v<128)
- return T_BYTE;
- else if(v<32768)
- return T_SHORT;
- else
- return T_UINT;
+ return T_INT;
}
static inline int setfloat(double v)
{
char*s = nrbuf();
char l = (yytext[0]=='-');
- char*max = l?"1073741824":"2147483647";
+ //char*max = l?"1073741824":"2147483647";
+ char*max = l?"2147483648":"2147483647";
+
if(yyleng-l>10) {
as3_softwarning("integer overflow: %s (converted to Number)", s);
return handlefloat();
v*=10;
v+=yytext[t]-'0';
}
- return setuint(v);
+ return setint(v);
}
}
else if((c>='a' && c<='f') || (c>='A' && c<='F'))
v|=(c&0x0f)+9;
}
- if(l && v>1073741824) {
+ if(l && v>=0x80000000) {
char*s = nrbuf();
- as3_softwarning("signed integer overflow: %s (converted to Number)", s);
+ as3_softwarning("integer overflow: %s (converted to Number)", s);
return setfloat(v);
}
- if(!l && v>2147483647) {
+ if(!l && v>0x7fffffff) {
char*s = nrbuf();
- as3_softwarning("unsigned integer overflow: %s (converted to Number)", s);
+ as3_softwarning("integer overflow: %s (converted to Number)", s);
return setfloat(v);
}
if(l==3) {
return setint(-(int)v);
} else {
- return setuint(v);
+ return setint(v);
}
}
current_column+=yyleng;
}
-static trie_t*namespaces = 0;
-void tokenizer_register_namespace(const char*id)
+trie_t*active_namespaces = 0;
+/*void tokenizer_register_namespace(const char*id)
{
- trie_put(&namespaces, id);
+ trie_put(namespaces, id, 0);
}
-static inline tokenizer_is_namespace(const char*id)
+void tokenizer_unregister_namespace(const char*id)
{
- return trie_lookup(namespaces, id);
+ trie_remove(namespaces, id);
+}*/
+static inline char tokenizer_is_namespace(const char*id)
+{
+ return trie_contains(active_namespaces, id);
}
static inline int handleIdentifier()
else
return T_IDENTIFIER;
}
+static int tokenerror();
//Boolean {c();return m(KW_BOOLEAN);}
//int {c();return m(KW_INT);}
//uint {c();return m(KW_UINT);}
//Number {c();return m(KW_NUMBER);}
+//XMLCOMMENT <!--([^->]|(-/[^-])|(--/[^>]))*-->
+//{XMLCOMMENT}
%}
%s REGEXPOK
%s BEGINNING
+%s DEFAULT
+%x XMLTEXT
+%x XML
NAME [a-zA-Z_][a-zA-Z0-9_\\]*
_ [^a-zA-Z0-9_\\]
INTWITHSIGN [+-]?({INT})
FLOATWITHSIGN [+-]?({FLOAT})
-CDATA <!\[CDATA\[([^]]|\][^]]|\]\][^>])*\]*\]\]\>
+CDATA <!\[CDATA\[([^]]|\][^]]|\]\][^>])*\]*\]\]\>
+XMLCOMMENT <!--([^->]|[-]+[^>-]|>)*-*-->
+XML <[^>]+{S}>
+XMLID [A-Za-z0-9_\x80-\xff]+([:][A-Za-z0-9_\x80-\xff]+)?
+XMLSTRING ["][^"]*["]
+
STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
S [ \n\r\t]
MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/]
^include{S}+{STRING}{S}*/\n {l();handleInclude(yytext, yyleng, 1);}
^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {l();handleInclude(yytext, yyleng, 0);}
-{STRING} {l(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;}
-{CDATA} {l(); BEGIN(INITIAL);handleCData(yytext, yyleng);return T_STRING;}
+{STRING} {l(); BEGIN(DEFAULT);handleString(yytext, yyleng);return T_STRING;}
+{CDATA} {l(); BEGIN(DEFAULT);handleCData(yytext, yyleng);return T_STRING;}
+
+<DEFAULT,BEGINNING,REGEXPOK>{
+{XMLCOMMENT} {l(); BEGIN(DEFAULT);handleRaw(yytext, yyleng);return T_STRING;}
+}
+
+<XML>{
+{XMLSTRING} {l(); handleRaw(yytext, yyleng);return T_STRING;}
+[{] {c(); BEGIN(REGEXPOK);return m('{');}
+[<] {c(); return m('<');}
+[/] {c(); return m('/');}
+[>] {c(); return m('>');}
+[=] {c(); return m('=');}
+{XMLID} {c(); handleRaw(yytext, yyleng);return T_IDENTIFIER;}
+{S} {l();}
+<<EOF>> {syntaxerror("unexpected end of file");}
+}
+
+<XMLTEXT>{
+[^<>{]+ {l(); handleRaw(yytext, yyleng);return T_STRING;}
+[{] {c(); BEGIN(REGEXPOK);return m('{');}
+[<] {c(); BEGIN(XML);return m('<');}
+[>] {c(); return m('>');}
+{XMLCOMMENT} {l(); handleRaw(yytext, yyleng);return T_STRING;}
+{CDATA} {l(); handleRaw(yytext, yyleng);return T_STRING;}
+<<EOF>> {syntaxerror("unexpected end of file");}
+}
<BEGINNING,REGEXPOK>{
-{REGEXP} {c(); BEGIN(INITIAL);return handleregexp();}
-{HEXWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlehex();}
-{HEXFLOATWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlehexfloat();}
-{INTWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handleint();}
-{FLOATWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlefloat();}
+{REGEXP} {c(); BEGIN(DEFAULT);return handleregexp();}
+{HEXWITHSIGN}/{_} {c(); BEGIN(DEFAULT);return handlehex();}
+{HEXFLOATWITHSIGN}/{_} {c(); BEGIN(DEFAULT);return handlehexfloat();}
+{INTWITHSIGN}/{_} {c(); BEGIN(DEFAULT);return handleint();}
+{FLOATWITHSIGN}/{_} {c(); BEGIN(DEFAULT);return handlefloat();}
}
<REGEXPOK>[\{] {c(); BEGIN(REGEXPOK);return m(T_DICTSTART);}
-[\{] {c(); BEGIN(INITIAL); return m('{');}
+[\{] {c(); BEGIN(DEFAULT); return m('{');}
\xef\xbb\xbf {/* utf 8 bom */}
{S} {l();}
-{HEXINT}/{_} {c(); BEGIN(INITIAL);return handlehex();}
-{HEXFLOAT}/{_} {c(); BEGIN(INITIAL);return handlehexfloat();}
-{INT}/{_} {c(); BEGIN(INITIAL);return handleint();}
-{FLOAT}/{_} {c(); BEGIN(INITIAL);return handlefloat();}
+{HEXINT}/{_} {c(); BEGIN(DEFAULT);return handlehex();}
+{HEXFLOAT}/{_} {c(); BEGIN(DEFAULT);return handlehexfloat();}
+{INT}/{_} {c(); BEGIN(DEFAULT);return handleint();}
+{FLOAT}/{_} {c(); BEGIN(DEFAULT);return handlefloat();}
+NaN {c(); BEGIN(DEFAULT);return m(KW_NAN);}
3rr0r {/* for debugging: generates a tokenizer-level error */
syntaxerror("3rr0r");}
-{NAME}{S}*:{S}*for/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-3);return T_FOR;}
-{NAME}{S}*:{S}*do/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-2);return T_DO;}
-{NAME}{S}*:{S}*while/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-5);return T_WHILE;}
-{NAME}{S}*:{S}*switch/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-6);return T_SWITCH;}
-for {c();BEGIN(INITIAL);a3_lval.id="";return T_FOR;}
-do {c();BEGIN(INITIAL);a3_lval.id="";return T_DO;}
-while {c();BEGIN(INITIAL);a3_lval.id="";return T_WHILE;}
-switch {c();BEGIN(INITIAL);a3_lval.id="";return T_SWITCH;}
+{NAME}{S}*:{S}*for/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-3);return T_FOR;}
+{NAME}{S}*:{S}*do/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-2);return T_DO;}
+{NAME}{S}*:{S}*while/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-5);return T_WHILE;}
+{NAME}{S}*:{S}*switch/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-6);return T_SWITCH;}
+default{S}xml {l();BEGIN(DEFAULT);return m(KW_DEFAULT_XML);}
+for {c();BEGIN(DEFAULT);a3_lval.id="";return T_FOR;}
+do {c();BEGIN(DEFAULT);a3_lval.id="";return T_DO;}
+while {c();BEGIN(DEFAULT);a3_lval.id="";return T_WHILE;}
+switch {c();BEGIN(DEFAULT);a3_lval.id="";return T_SWITCH;}
[&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);}
[|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);}
[=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQ);}
[>][=] {c();BEGIN(REGEXPOK);return m(T_GE);}
[<][=] {c();BEGIN(REGEXPOK);return m(T_LE);}
-[-][-] {c();BEGIN(INITIAL);return m(T_MINUSMINUS);}
-[+][+] {c();BEGIN(INITIAL);return m(T_PLUSPLUS);}
+[-][-] {c();BEGIN(DEFAULT);return m(T_MINUSMINUS);}
+[+][+] {c();BEGIN(DEFAULT);return m(T_PLUSPLUS);}
[+][=] {c();BEGIN(REGEXPOK);return m(T_PLUSBY);}
[\^][=] {c();BEGIN(REGEXPOK);return m(T_XORBY);}
[-][=] {c();BEGIN(REGEXPOK);return m(T_MINUSBY);}
[%][=] {c();BEGIN(REGEXPOK);return m(T_MODBY);}
[*][=] {c();BEGIN(REGEXPOK);return m(T_MULBY);}
[|][=] {c();BEGIN(REGEXPOK);return m(T_ORBY);}
+[&][=] {c();BEGIN(REGEXPOK);return m(T_ANDBY);}
[>][>][=] {c();BEGIN(REGEXPOK);return m(T_SHRBY);}
[<][<][=] {c();BEGIN(REGEXPOK);return m(T_SHLBY);}
[>][>][>][=] {c();BEGIN(REGEXPOK);return m(T_USHRBY);}
: {c();BEGIN(REGEXPOK);return m(':');}
instanceof {c();BEGIN(REGEXPOK);return m(KW_INSTANCEOF);}
implements {c();BEGIN(REGEXPOK);return m(KW_IMPLEMENTS);}
-interface {c();BEGIN(INITIAL);return m(KW_INTERFACE);}
-namespace {c();BEGIN(INITIAL);return m(KW_NAMESPACE);}
-protected {c();BEGIN(INITIAL);return m(KW_PROTECTED);}
-undefined {c();BEGIN(INITIAL);return m(KW_UNDEFINED);}
-continue {c();BEGIN(INITIAL);return m(KW_CONTINUE);}
-override {c();BEGIN(INITIAL);return m(KW_OVERRIDE);}
-internal {c();BEGIN(INITIAL);return m(KW_INTERNAL);}
-function {c();BEGIN(INITIAL);return m(KW_FUNCTION);}
-finally {c();BEGIN(INITIAL);return m(KW_FINALLY);}
-default {c();BEGIN(INITIAL);return m(KW_DEFAULT);}
-package {c();BEGIN(INITIAL);return m(KW_PACKAGE);}
-private {c();BEGIN(INITIAL);return m(KW_PRIVATE);}
-dynamic {c();BEGIN(INITIAL);return m(KW_DYNAMIC);}
-extends {c();BEGIN(INITIAL);return m(KW_EXTENDS);}
+interface {c();BEGIN(DEFAULT);return m(KW_INTERFACE);}
+protected {c();BEGIN(DEFAULT);return m(KW_PROTECTED);}
+namespace {c();BEGIN(DEFAULT);return m(KW_NAMESPACE);}
+undefined {c();BEGIN(DEFAULT);return m(KW_UNDEFINED);}
+arguments {c();BEGIN(DEFAULT);return m(KW_ARGUMENTS);}
+continue {c();BEGIN(DEFAULT);return m(KW_CONTINUE);}
+override {c();BEGIN(DEFAULT);return m(KW_OVERRIDE);}
+internal {c();BEGIN(DEFAULT);return m(KW_INTERNAL);}
+function {c();BEGIN(DEFAULT);return m(KW_FUNCTION);}
+finally {c();BEGIN(DEFAULT);return m(KW_FINALLY);}
+default {c();BEGIN(DEFAULT);return m(KW_DEFAULT);}
+package {c();BEGIN(DEFAULT);return m(KW_PACKAGE);}
+private {c();BEGIN(DEFAULT);return m(KW_PRIVATE);}
+dynamic {c();BEGIN(DEFAULT);return m(KW_DYNAMIC);}
+extends {c();BEGIN(DEFAULT);return m(KW_EXTENDS);}
delete {c();BEGIN(REGEXPOK);return m(KW_DELETE);}
return {c();BEGIN(REGEXPOK);return m(KW_RETURN);}
-public {c();BEGIN(INITIAL);return m(KW_PUBLIC);}
-native {c();BEGIN(INITIAL);return m(KW_NATIVE);}
-static {c();BEGIN(INITIAL);return m(KW_STATIC);}
+public {c();BEGIN(DEFAULT);return m(KW_PUBLIC);}
+native {c();BEGIN(DEFAULT);return m(KW_NATIVE);}
+static {c();BEGIN(DEFAULT);return m(KW_STATIC);}
import {c();BEGIN(REGEXPOK);return m(KW_IMPORT);}
typeof {c();BEGIN(REGEXPOK);return m(KW_TYPEOF);}
throw {c();BEGIN(REGEXPOK);return m(KW_THROW);}
-class {c();BEGIN(INITIAL);return m(KW_CLASS);}
-const {c();BEGIN(INITIAL);return m(KW_CONST);}
-catch {c();BEGIN(INITIAL);return m(KW_CATCH);}
-final {c();BEGIN(INITIAL);return m(KW_FINAL);}
-false {c();BEGIN(INITIAL);return m(KW_FALSE);}
-break {c();BEGIN(INITIAL);return m(KW_BREAK);}
-super {c();BEGIN(INITIAL);return m(KW_SUPER);}
-each {c();BEGIN(INITIAL);return m(KW_EACH);}
-void {c();BEGIN(INITIAL);return m(KW_VOID);}
-true {c();BEGIN(INITIAL);return m(KW_TRUE);}
-null {c();BEGIN(INITIAL);return m(KW_NULL);}
-else {c();BEGIN(INITIAL);return m(KW_ELSE);}
+class {c();BEGIN(DEFAULT);return m(KW_CLASS);}
+const {c();BEGIN(DEFAULT);return m(KW_CONST);}
+catch {c();BEGIN(DEFAULT);return m(KW_CATCH);}
+final {c();BEGIN(DEFAULT);return m(KW_FINAL);}
+false {c();BEGIN(DEFAULT);return m(KW_FALSE);}
+break {c();BEGIN(DEFAULT);return m(KW_BREAK);}
+super {c();BEGIN(DEFAULT);return m(KW_SUPER);}
+each {c();BEGIN(DEFAULT);return m(KW_EACH);}
+void {c();BEGIN(DEFAULT);return m(KW_VOID);}
+true {c();BEGIN(DEFAULT);return m(KW_TRUE);}
+null {c();BEGIN(DEFAULT);return m(KW_NULL);}
+else {c();BEGIN(DEFAULT);return m(KW_ELSE);}
case {c();BEGIN(REGEXPOK);return m(KW_CASE);}
with {c();BEGIN(REGEXPOK);return m(KW_WITH);}
use {c();BEGIN(REGEXPOK);return m(KW_USE);}
new {c();BEGIN(REGEXPOK);return m(KW_NEW);}
-get {c();BEGIN(INITIAL);return m(KW_GET);}
-set {c();BEGIN(INITIAL);return m(KW_SET);}
-var {c();BEGIN(INITIAL);return m(KW_VAR);}
-try {c();BEGIN(INITIAL);return m(KW_TRY);}
+get {c();BEGIN(DEFAULT);return m(KW_GET);}
+set {c();BEGIN(DEFAULT);return m(KW_SET);}
+var {c();BEGIN(DEFAULT);return m(KW_VAR);}
+try {c();BEGIN(DEFAULT);return m(KW_TRY);}
is {c();BEGIN(REGEXPOK);return m(KW_IS) ;}
in {c();BEGIN(REGEXPOK);return m(KW_IN) ;}
-if {c();BEGIN(INITIAL);return m(KW_IF) ;}
+if {c();BEGIN(DEFAULT);return m(KW_IF) ;}
as {c();BEGIN(REGEXPOK);return m(KW_AS);}
-$?{NAME} {c();BEGIN(INITIAL);return handleIdentifier();}
+$?{NAME} {c();BEGIN(DEFAULT);return handleIdentifier();}
-[\]\}*] {c();BEGIN(INITIAL);return m(yytext[0]);}
+[\]\}*] {c();BEGIN(DEFAULT);return m(yytext[0]);}
[+-\/^~@$!%&\(=\[|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
-[\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);}
-
-. {/* ERROR */
- char c1=yytext[0];
- char buf[128];
- buf[0] = yytext[0];
- int t;
- for(t=1;t<128;t++) {
- char c = buf[t]=input();
- if(c=='\n' || c==EOF) {
- buf[t] = 0;
- break;
- }
- }
- if(c1>='0' && c1<='9')
- syntaxerror("syntax error: %s (identifiers must not start with a digit)");
- else
- syntaxerror("syntax error: %s", buf);
- printf("\n");
- exit(1);
- yyterminate();
- }
+[\)\]] {c();BEGIN(DEFAULT);return m(yytext[0]);}
+
+<DEFAULT,BEGINNING,REGEXPOK,XML,XMLTEXT>{
+. {tokenerror();}
+}
<<EOF>> {l();
void*b = leave_file();
if (!b) {
return 1;
}
+static int tokenerror()
+{
+ char c1=yytext[0];
+ char buf[128];
+ buf[0] = yytext[0];
+ int t;
+ for(t=1;t<128;t++) {
+ char c = buf[t]=input();
+ if(c=='\n' || c==EOF) {
+ buf[t] = 0;
+ break;
+ }
+ }
+ if(c1>='0' && c1<='9')
+ syntaxerror("syntax error: %s (identifiers must not start with a digit)");
+ else
+ syntaxerror("syntax error [%d]: %s", (yy_start-1)/2, buf);
+ printf("\n");
+ exit(1);
+ yyterminate();
+}
+
+
static char mbuf[256];
char*token2string(enum yytokentype nr, YYSTYPE v)
{
}
else if(nr==T_INT) return "<int>";
else if(nr==T_UINT) return "<uint>";
- else if(nr==T_BYTE) return "<byte>";
else if(nr==T_FLOAT) return "<float>";
else if(nr==T_EOF) return "***END***";
else if(nr==T_GE) return ">=";
}
}
+void tokenizer_begin_xml()
+{
+ dbg("begin reading xml");
+ BEGIN(XML);
+}
+void tokenizer_begin_xmltext()
+{
+ dbg("begin reading xml text");
+ BEGIN(XMLTEXT);
+}
+void tokenizer_end_xmltext()
+{
+ dbg("end reading xml text");
+ BEGIN(XML);
+}
+void tokenizer_end_xml()
+{
+ dbg("end reading xml");
+ BEGIN(DEFAULT);
+}
+
void initialize_scanner()
{
BEGIN(BEGINNING);