} else {
int i1=0,i2=len;
// find start
- while(!strchr(" \n\r\t", text[i1])) i1++;
+ while(!strchr(" \n\r\t\xa0", text[i1])) i1++;
// strip
- while(strchr(" \n\r\t", text[i1])) i1++;
- while(strchr(" \n\r\t", text[i2-1])) i2--;
+ while(strchr(" \n\r\t\xa0", text[i1])) i1++;
+ while(strchr(" \n\r\t\xa0", text[i2-1])) i2--;
if(i2!=len) text[i2]=0;
filename = strdup(&text[i1]);
}
current_column+=yyleng;
}
-trie_t*active_namespaces = 0;
-/*void tokenizer_register_namespace(const char*id)
-{
- trie_put(namespaces, id, 0);
-}
-void tokenizer_unregister_namespace(const char*id)
-{
- trie_remove(namespaces, id);
-}*/
-static inline char tokenizer_is_namespace(const char*id)
-{
- return trie_contains(active_namespaces, id);
-}
-
static inline int handleIdentifier()
{
char*s = malloc(yyleng+1);
memcpy(s, yytext, yyleng);
s[yyleng]=0;
a3_lval.id = s;
- if(tokenizer_is_namespace(s))
- return T_NAMESPACE;
- else
- return T_IDENTIFIER;
+ return T_IDENTIFIER;
}
static int tokenerror();
//XMLCOMMENT <!--([^->]|(-/[^-])|(--/[^>]))*-->
//{XMLCOMMENT}
+
%}
%s REGEXPOK
%x XMLTEXT
%x XML
-NAME [a-zA-Z_][a-zA-Z0-9_\\]*
-_ [^a-zA-Z0-9_\\]
+X1 parsing identifiers with a non unicode lexer is a knightmare we have to skip all possible
+X2 combinations of byte order markers or utf8 space chars and i dont quite like the fact that
+X3 lex doesnt support proper comments in this section either...
+X4 {NAME_HEAD}{NAME_TAIL}
+
+NAME_NOC2EF [a-zA-Z_\x80-\xc1\xc3-\xee\xf0-\xff]
+NAME_EF [\xef][a-zA-Z0-9_\\\x80-\xba\xbc-\xff]
+NAME_C2 [\xc2][a-zA-Z0-9_\\\x80-\x9f\xa1-\xff]
+NAME_EFBB [\xef][\xbb][a-zA-Z0-9_\\\x80-\xbe\xc0-\xff]
+NAME_TAIL [a-zA-Z_0-9\\\x80-\xff]*
+NAME_HEAD (({NAME_NOC2EF})|({NAME_EF})|({NAME_C2})|({NAME_EFBB}))
+NAME {NAME_HEAD}{NAME_TAIL}
+
+_ [^a-zA-Z0-9_\\\x80-\xff]
HEXINT 0x[a-zA-Z0-9]+
HEXFLOAT 0x[a-zA-Z0-9]*\.[a-zA-Z0-9]*
CDATA <!\[CDATA\[([^]]|\][^]]|\]\][^>])*\]*\]\]\>
XMLCOMMENT <!--([^->]|[-]+[^>-]|>)*-*-->
XML <[^>]+{S}>
+XMLID [A-Za-z0-9_\x80-\xff]+([:][A-Za-z0-9_\x80-\xff]+)?
+XMLSTRING ["][^"]*["]
STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
-S [ \n\r\t]
+S ([ \n\r\t\xa0]|[\xc2][\xa0])
MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/]
SINGLELINE_COMMENT \/\/[^\n\r]*[\n\r]
REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]*
[/][*] {syntaxerror("syntax error: unterminated comment", yytext);}
^include{S}+{STRING}{S}*/\n {l();handleInclude(yytext, yyleng, 1);}
-^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {l();handleInclude(yytext, yyleng, 0);}
+^include{S}+[^" \t\xa0\r\n][\x20-\xff]*{S}*/\n {l();handleInclude(yytext, yyleng, 0);}
{STRING} {l(); BEGIN(DEFAULT);handleString(yytext, yyleng);return T_STRING;}
{CDATA} {l(); BEGIN(DEFAULT);handleCData(yytext, yyleng);return T_STRING;}
}
<XML>{
-{STRING} {l(); handleString(yytext, yyleng);return T_STRING;}
+{XMLSTRING} {l(); handleRaw(yytext, yyleng);return T_STRING;}
[{] {c(); BEGIN(REGEXPOK);return m('{');}
[<] {c(); return m('<');}
[/] {c(); return m('/');}
[>] {c(); return m('>');}
[=] {c(); return m('=');}
-{NAME} {c(); handleRaw(yytext, yyleng);return T_IDENTIFIER;}
+{XMLID} {c(); handleRaw(yytext, yyleng);return T_IDENTIFIER;}
{S} {l();}
<<EOF>> {syntaxerror("unexpected end of file");}
}
<REGEXPOK>[\{] {c(); BEGIN(REGEXPOK);return m(T_DICTSTART);}
[\{] {c(); BEGIN(DEFAULT); return m('{');}
-\xef\xbb\xbf {/* utf 8 bom */}
+\xef\xbb\xbf {/* utf 8 bom (0xfeff) */}
{S} {l();}
{HEXINT}/{_} {c(); BEGIN(DEFAULT);return handlehex();}
{NAME}{S}*:{S}*do/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-2);return T_DO;}
{NAME}{S}*:{S}*while/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-5);return T_WHILE;}
{NAME}{S}*:{S}*switch/{_} {l();BEGIN(DEFAULT);handleLabel(yytext, yyleng-6);return T_SWITCH;}
+default{S}xml {l();BEGIN(DEFAULT);return m(KW_DEFAULT_XML);}
for {c();BEGIN(DEFAULT);a3_lval.id="";return T_FOR;}
do {c();BEGIN(DEFAULT);a3_lval.id="";return T_DO;}
while {c();BEGIN(DEFAULT);a3_lval.id="";return T_WHILE;}
instanceof {c();BEGIN(REGEXPOK);return m(KW_INSTANCEOF);}
implements {c();BEGIN(REGEXPOK);return m(KW_IMPLEMENTS);}
interface {c();BEGIN(DEFAULT);return m(KW_INTERFACE);}
-namespace {c();BEGIN(DEFAULT);return m(KW_NAMESPACE);}
protected {c();BEGIN(DEFAULT);return m(KW_PROTECTED);}
+namespace {c();BEGIN(DEFAULT);return m(KW_NAMESPACE);}
undefined {c();BEGIN(DEFAULT);return m(KW_UNDEFINED);}
arguments {c();BEGIN(DEFAULT);return m(KW_ARGUMENTS);}
continue {c();BEGIN(DEFAULT);return m(KW_CONTINUE);}
if(c1>='0' && c1<='9')
syntaxerror("syntax error: %s (identifiers must not start with a digit)");
else
- syntaxerror("syntax error [%d]: %s", (yy_start-1)/2, buf);
+ syntaxerror("syntax error [state=%d]: %s", (yy_start-1)/2, buf);
printf("\n");
exit(1);
yyterminate();