#include "tokenizer.h"
#include "files.h"
-static void countlines(char*text, int len) {
- int t;
- for(t=0;t<len;t++) {
- if(text[t]=='\n') {
- current_line++;
- current_column=0;
- } else {
- current_column++;
- }
- }
-}
-
static int verbose = 1;
static void dbg(const char*format, ...)
{
fflush(stderr);
exit(1);
}
+void warning(const char*format, ...)
+{
+ return;
+ char buf[1024];
+ int l;
+ va_list arglist;
+ if(!verbose)
+ return;
+ va_start(arglist, format);
+ vsprintf(buf, format, arglist);
+ va_end(arglist);
+ fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf);
+ fflush(stderr);
+}
#ifndef YY_CURRENT_BUFFER
//BEGIN(INITIAL); keep context
}
+static int do_unescape(const char*s, const char*end, char*n)
+{
+ char*o = n;
+ int len=0;
+ while(s<end) {
+ if(*s!='\\') {
+ if(o) o[len] = *s;len++;
+ s++;
+ continue;
+ }
+ s++; //skip past '\'
+ if(s==end) syntaxerror("invalid \\ at end of string");
+
+ /* handle the various line endings (mac, dos, unix) */
+ if(*s=='\r') {
+ s++;
+ if(s==end) break;
+ if(*s=='\n')
+ s++;
+ continue;
+ }
+ if(*s=='\n') {
+ s++;
+ continue;
+ }
+ switch(*s) {
+ case '\\': if(o) o[len] = '\\';s++;len++; break;
+ case '"': if(o) o[len] = '"';s++;len++; break;
+ case '\'': if(o) o[len] = '\'';s++;len++; break;
+ case 'b': if(o) o[len] = '\b';s++;len++; break;
+ case 'f': if(o) o[len] = '\f';s++;len++; break;
+ case 'n': if(o) o[len] = '\n';s++;len++; break;
+ case 'r': if(o) o[len] = '\r';s++;len++; break;
+ case 't': if(o) o[len] = '\t';s++;len++; break;
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
+ unsigned int num=0;
+ int nr = 0;
+ while(strchr("01234567", *s) && nr<3 && s<end) {
+ num <<= 3;
+ num |= *s-'0';
+ nr++;
+ s++;
+ }
+ if(num>256)
+ syntaxerror("octal number out of range (0-255): %d", num);
+ if(o) o[len] = num;len++;
+ continue;
+ }
+ case 'x': case 'u': {
+ int max=2;
+ char bracket = 0;
+ char unicode = 0;
+ if(*s == 'u') {
+ max = 6;
+ unicode = 1;
+ }
+ s++;
+ if(s==end) syntaxerror("invalid \\u or \\x at end of string");
+ if(*s == '{') {
+ s++;
+ if(s==end) syntaxerror("invalid \\u{ at end of string");
+ bracket=1;
+ }
+ unsigned int num=0;
+ int nr = 0;
+ while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s<end) {
+ num <<= 4;
+ if(*s>='0' && *s<='9') num |= *s - '0';
+ if(*s>='a' && *s<='f') num |= *s - 'a' + 10;
+ if(*s>='A' && *s<='F') num |= *s - 'A' + 10;
+ nr++;
+ s++;
+ }
+ if(bracket) {
+ if(*s=='}' && s<end) {
+ s++;
+ } else {
+ syntaxerror("missing terminating '}'");
+ }
+ }
+ if(unicode) {
+ char*utf8 = getUTF8(num);
+ while(*utf8) {
+ if(o) o[len] = *utf8;utf8++;len++;
+ }
+ } else {
+ if(num>256)
+ syntaxerror("byte out of range (0-255): %d", num);
+ if(o) o[len] = num;len++;
+ }
+ break;
+ }
+ default:
+ syntaxerror("unknown escape sequence: \"\\%c\"", *s);
+ }
+ }
+ if(o) o[len]=0;
+ return len;
+}
+
+static string_t string_unescape(const char*in, int l)
+{
+ const char*s = in;
+ const char*end = &in[l];
+
+ int len = do_unescape(s, end, 0);
+ char*n = (char*)malloc(len+1);
+ do_unescape(s, end, n);
+ string_t out = string_new(n, len);
+ return out;
+}
+
static void handleString(char*s, int len)
{
if(s[0]=='"') {
s++;len-=2;
}
else syntaxerror("String incorrectly terminated");
- s[len] = 0;
- avm2_lval.string = s;
+
+
+ avm2_lval.str = string_unescape(s, len);
}
char start_of_expression;
-static inline int m(int type)
+static inline int mkid(int type)
{
char*s = malloc(yyleng+1);
memcpy(s, yytext, yyleng);
s[yyleng]=0;
+ avm2_lval.id = s;
+ return type;
+}
- NEW(token_t,t);
- t->type = type;
- t->text = s;
- avm2_lval.token = t;
+static inline int m(int type)
+{
+ avm2_lval.token = type;
return type;
}
+
static char numberbuf[64];
-static inline int handlenumber()
+static char*nrbuf()
{
if(yyleng>sizeof(numberbuf)-1)
syntaxerror("decimal number overflow");
-
char*s = numberbuf;
memcpy(s, yytext, yyleng);
s[yyleng]=0;
+ return s;
+}
- int t;
- char is_float=0;
- for(t=0;t<yyleng;t++) {
- if(yytext[t]=='.') {
- if(is_float)
- syntaxerror("Invalid number");
- is_float=1;
- } else if(!strchr("-0123456789", yytext[t])) {
- syntaxerror("Invalid number");
- }
- }
- if(is_float) {
- avm2_lval.number_float = atof(s);
- return T_FLOAT;
- }
+static inline int setint(int v)
+{
+ avm2_lval.number_int = v;
+ if(v>-128)
+ return T_BYTE;
+ else if(v>=-32768)
+ return T_SHORT;
+ else
+ return T_INT;
+}
+static inline int setuint(unsigned int v)
+{
+ avm2_lval.number_uint = v;
+ if(v<128)
+ return T_BYTE;
+ else if(v<32768)
+ return T_SHORT;
+ else
+ return T_UINT;
+}
+static inline int setfloat(double v)
+{
+ avm2_lval.number_float = v;
+ return T_FLOAT;
+}
+
+static inline int handlefloat()
+{
+ char*s = nrbuf();
+ avm2_lval.number_float = atof(s);
+ return T_FLOAT;
+}
+
+static inline int handleint()
+{
+ char*s = nrbuf();
char l = (yytext[0]=='-');
char*max = l?"1073741824":"2147483647";
- if(yyleng-l>10)
- syntaxerror("integer overflow");
+ if(yyleng-l>10) {
+ warning("integer overflow: %s", s);
+ return handlefloat();
+ }
if(yyleng-l==10) {
int t;
for(t=0;t<yyleng-l;t++) {
- if(yytext[l+t]>max[t])
- syntaxerror("integer overflow %s > %s", s+l,max);
+ if(yytext[l+t]>max[t]) {
+ warning("integer overflow: %s", s);
+ return handlefloat();
+ }
else if(yytext[l+t]<max[t])
break;
}
}
if(yytext[0]=='-') {
int v = atoi(s);
- avm2_lval.number_int = v;
- if(v>-128)
- return T_BYTE;
- else if(v>=-32768)
- return T_SHORT;
- else
- return T_INT;
+ return setint(v);
} else {
unsigned int v = 0;
+ int t;
for(t=0;t<yyleng;t++) {
v*=10;
v+=yytext[t]-'0';
}
- avm2_lval.number_uint = v;
- if(v<128)
- return T_BYTE;
- else if(v<32768)
- return T_SHORT;
- else
- return T_UINT;
+ return setuint(v);
}
}
+static inline int handlehex()
+{
+ char l = (yytext[0]=='-')+2;
+
+ if(yyleng-l>8) {
+ char*s = nrbuf();
+ syntaxerror("integer overflow %s", s);
+ }
+
+ int t;
+ unsigned int v = 0;
+ for(t=l;t<yyleng;t++) {
+ v<<=4;
+ char c = yytext[t];
+ if(c>='0' && c<='9')
+ v|=(c&15);
+ else if(c>='a' && c<='f' ||
+ c>='A' && c<='F')
+ v|=(c&0x0f)+9;
+ }
+ if(l && v>1073741824) {
+ char*s = nrbuf();
+ warning("signed integer overflow: %s", s);
+ return setfloat(v);
+ }
+ if(!l && v>2147483647) {
+ char*s = nrbuf();
+ warning("unsigned integer overflow: %s", s);
+ return setfloat(v);
+ }
+
+ if(l==3) {
+ return setint(-(int)v);
+ } else {
+ return setuint(v);
+ }
+}
+
+void handleLabel(char*text, int len)
+{
+ int t;
+ for(t=len-1;t>=0;--t) {
+ if(text[t]!=' ' &&
+ text[t]!=':')
+ break;
+ }
+ char*s = malloc(t+1);
+ memcpy(s, yytext, t);
+ s[t]=0;
+ avm2_lval.id = s;
+}
+
+static int handleregexp()
+{
+ char*s = malloc(yyleng);
+ int len=yyleng-1;
+ memcpy(s, yytext+1, len);
+ s[len] = 0;
+ int t;
+ for(t=len;t>=0;--t) {
+ if(s[t]=='/') {
+ s[t] = 0;
+ break;
+ }
+ }
+ avm2_lval.regexp.pattern = s;
+ if(t==len) {
+ avm2_lval.regexp.options = 0;
+ } else {
+ avm2_lval.regexp.options = s+t+1;
+ }
+ return T_REGEXP;
+}
+
void initialize_scanner();
#define YY_USER_INIT initialize_scanner();
-#define c() {countlines(yytext, yyleng);}
+/* count the number of lines+columns consumed by this token */
+static inline void l() {
+ int t;
+ for(t=0;t<yyleng;t++) {
+ if(yytext[t]=='\n') {
+ current_line++;
+ current_column=0;
+ } else {
+ current_column++;
+ }
+ }
+}
+/* count the number of columns consumed by this token */
+static inline void c() {
+ current_column+=yyleng;
+}
+
+//Boolean {c();return m(KW_BOOLEAN);}
+//int {c();return m(KW_INT);}
+//uint {c();return m(KW_UINT);}
+//Number {c();return m(KW_NUMBER);}
+
%}
%s BEGINNING
NAME [a-zA-Z_][a-zA-Z0-9_\\]*
+_ [^a-zA-Z0-9_\\]
+
+HEXINT 0x[a-zA-Z0-9]+
+INT [0-9]+
+FLOAT [0-9]+(\.[0-9]*)?|\.[0-9]+
-NUMBER -?[0-9]+(\.[0-9]*)?
+HEXWITHSIGN [+-]?({HEXINT})
+INTWITHSIGN [+-]?({INT})
+FLOATWITHSIGN [+-]?({FLOAT})
STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
S [ \n\r\t]
-MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[\x00-\x1f])*[*]+[/]
+MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/]
SINGLELINE_COMMENT \/\/[^\n]*\n
REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]*
%%
-{SINGLELINE_COMMENT} {c(); /* single line comment */}
-{MULTILINE_COMMENT} {c(); /* multi line comment */}
+{SINGLELINE_COMMENT} {l(); /* single line comment */}
+{MULTILINE_COMMENT} {l(); /* multi line comment */}
[/][*] {syntaxerror("syntax error: unterminated comment", yytext);}
-^include{S}+{STRING}{S}*/\n {c();handleInclude(yytext, yyleng, 1);}
-^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {c();handleInclude(yytext, yyleng, 0);}
-{STRING} {c(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;}
+^include{S}+{STRING}{S}*/\n {l();handleInclude(yytext, yyleng, 1);}
+^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {l();handleInclude(yytext, yyleng, 0);}
+{STRING} {l(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;}
<BEGINNING,REGEXPOK>{
-{REGEXP} {c(); BEGIN(INITIAL);return m(T_REGEXP);}
+{REGEXP} {c(); BEGIN(INITIAL);return handleregexp();}
+{HEXWITHSIGN} {c(); BEGIN(INITIAL);return handlehex();}
+{INTWITHSIGN} {c(); BEGIN(INITIAL);return handleint();}
+{FLOATWITHSIGN} {c(); BEGIN(INITIAL);return handlefloat();}
}
\xef\xbb\xbf {/* utf 8 bom */}
-{S} {c();}
+{S} {l();}
-{NUMBER} {c(); BEGIN(INITIAL);return handlenumber();}
+{HEXINT} {c(); BEGIN(INITIAL);return handlehex();}
+{INT} {c(); BEGIN(INITIAL);return handleint();}
+{FLOAT} {c(); BEGIN(INITIAL);return handlefloat();}
3rr0r {/* for debugging: generates a tokenizer-level error */
syntaxerror("3rr0r");}
+{NAME}{S}*:{S}*for/{_} {l();handleLabel(yytext, yyleng-3);return T_FOR;}
+{NAME}{S}*:{S}*do/{_} {l();handleLabel(yytext, yyleng-2);return T_DO;}
+{NAME}{S}*:{S}*while/{_} {l();handleLabel(yytext, yyleng-5);return T_WHILE;}
+{NAME}{S}*:{S}*switch/{_} {l();handleLabel(yytext, yyleng-6);return T_SWITCH;}
+for {c();avm2_lval.id="";return T_FOR;}
+do {c();avm2_lval.id="";return T_DO;}
+while {c();avm2_lval.id="";return T_WHILE;}
+switch {c();avm2_lval.id="";return T_SWITCH;}
+
[&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);}
[|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);}
[!][=] {c();BEGIN(REGEXPOK);return m(T_NE);}
+[!][=][=] {c();BEGIN(REGEXPOK);return m(T_NEE);}
[=][=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);}
[=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQ);}
[>][=] {c();return m(T_GE);}
[<][=] {c();return m(T_LE);}
-[+][=] {c();return m(T_PLUSBY);}
-[-][=] {c();return m(T_MINUSBY);}
[-][-] {c();BEGIN(INITIAL);return m(T_MINUSMINUS);}
[+][+] {c();BEGIN(INITIAL);return m(T_PLUSPLUS);}
+[+][=] {c();return m(T_PLUSBY);}
+[-][=] {c();return m(T_MINUSBY);}
+[/][=] {c();return m(T_DIVBY);}
+[%][=] {c();return m(T_MODBY);}
+[*][=] {c();return m(T_MULBY);}
+[|][=] {c();return m(T_ORBY);}
+[>][>][=] {c();return m(T_SHRBY);}
+[<][<][=] {c();return m(T_SHLBY);}
+[>][>][>][=] {c();return m(T_USHRBY);}
+[<][<] {c();return m(T_SHL);}
+[>][>][>] {c();return m(T_USHR);}
+[>][>] {c();return m(T_SHR);}
+\.\.\. {c();return m(T_DOTDOTDOT);}
\.\. {c();return m(T_DOTDOT);}
\. {c();return m('.');}
:: {c();return m(T_COLONCOLON);}
: {c();return m(':');}
+instanceof {c();return m(KW_INSTANCEOF);}
implements {c();return m(KW_IMPLEMENTS);}
interface {c();return m(KW_INTERFACE);}
namespace {c();return m(KW_NAMESPACE);}
protected {c();return m(KW_PROTECTED);}
+undefined {c();return m(KW_UNDEFINED);}
+continue {c();return m(KW_CONTINUE);}
override {c();return m(KW_OVERRIDE);}
internal {c();return m(KW_INTERNAL);}
function {c();return m(KW_FUNCTION);}
+default {c();return m(KW_DEFAULT);}
package {c();return m(KW_PACKAGE);}
private {c();return m(KW_PRIVATE);}
-Boolean {c();return m(KW_BOOLEAN);}
dynamic {c();return m(KW_DYNAMIC);}
extends {c();return m(KW_EXTENDS);}
+delete {c();return m(KW_DELETE);}
return {c();return m(KW_RETURN);}
public {c();return m(KW_PUBLIC);}
native {c();return m(KW_NATIVE);}
static {c();return m(KW_STATIC);}
import {c();return m(KW_IMPORT);}
-Number {c();return m(KW_NUMBER);}
-while {c();return m(KW_WHILE);}
+typeof {c();return m(KW_TYPEOF);}
+throw {c();return m(KW_THROW);}
class {c();return m(KW_CLASS);}
const {c();return m(KW_CONST);}
+catch {c();return m(KW_CATCH);}
final {c();return m(KW_FINAL);}
false {c();return m(KW_FALSE);}
break {c();return m(KW_BREAK);}
+super {c();return m(KW_SUPER);}
+each {c();return m(KW_EACH);}
+void {c();return m(KW_VOID);}
true {c();return m(KW_TRUE);}
-uint {c();return m(KW_UINT);}
null {c();return m(KW_NULL);}
else {c();return m(KW_ELSE);}
+case {c();return m(KW_CASE);}
+with {c();return m(KW_WITH);}
use {c();return m(KW_USE);}
-int {c();return m(KW_INT);}
new {c();return m(KW_NEW);}
get {c();return m(KW_GET);}
-for {c();return m(KW_FOR);}
set {c();return m(KW_SET);}
var {c();return m(KW_VAR);}
+try {c();return m(KW_TRY);}
is {c();return m(KW_IS) ;}
+in {c();return m(KW_IN) ;}
if {c();return m(KW_IF) ;}
as {c();return m(KW_AS);}
-{NAME} {c();BEGIN(INITIAL);return m(T_IDENTIFIER);}
+{NAME} {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);}
-[+-\/*^~@$!%&\(=\[\]\{\}|?:;,.<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
-[\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);}
+[+-\/*^~@$!%&\(=\[\]\{\}|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
+[\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);}
. {char c1=yytext[0];
char buf[128];
exit(1);
yyterminate();
}
-<<EOF>> {c();
+<<EOF>> {l();
void*b = leave_file();
if (!b) {
yyterminate();
}
static char mbuf[256];
-char*token2string(token_t*t)
+char*token2string(enum yytokentype nr, YYSTYPE v)
{
- int nr=t->type;
if(nr==T_STRING) return "<string>";
else if(nr==T_INT) return "<int>";
else if(nr==T_UINT) return "<uint>";
+ else if(nr==T_BYTE) return "<byte>";
else if(nr==T_FLOAT) return "<float>";
else if(nr==T_REGEXP) return "REGEXP";
else if(nr==T_EOF) return "***END***";
else if(nr==KW_INT) return "int";
else if(nr==KW_NEW) return "new";
else if(nr==KW_GET) return "get";
- else if(nr==KW_FOR) return "for";
else if(nr==KW_SET) return "set";
else if(nr==KW_VAR) return "var";
else if(nr==KW_IS) return "is";
else if(nr==KW_AS) return "as";
- else if(nr==T_IDENTIFIER) {
- if(strlen(t->text)>sizeof(mbuf)-1)
- return "ID(...)";
- sprintf(mbuf, "ID(%s)", t->text);
- return mbuf;
- } else {
+ else if(nr==T_IDENTIFIER) return "ID";
+ else {
sprintf(mbuf, "%d", nr);
return mbuf;
}