From: Matthias Kramm Date: Mon, 4 Jan 2010 07:46:01 +0000 (-0800) Subject: added simple xml parser X-Git-Tag: version-0-9-1~200 X-Git-Url: http://git.asbjorn.biz/?p=swftools.git;a=commitdiff_plain;h=131881aa4c31c77d8ad1b915dbe267760b429573 added simple xml parser --- diff --git a/.gitignore b/.gitignore index 98c707a..4ba6893 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,18 @@ *.diff *.PDF *.wav +*.patch +*.so +gmon.out +*.orig +*.html +src/*.sc +*.log +*.abc +lib/as3/*.as +lib/as3/lib +lib/as3/.* +lib/ruby/ruby m *dSYM tags diff --git a/lib/Makefile.in b/lib/Makefile.in index f0b7ebe..d1c5375 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -18,7 +18,7 @@ gfxpoly_objects = gfxpoly/active.$(O) gfxpoly/convert.$(O) gfxpoly/poly.$(O) gfx rfxswf_modules = modules/swfbits.c modules/swfaction.c modules/swfdump.c modules/swfcgi.c modules/swfbutton.c modules/swftext.c modules/swffont.c modules/swftools.c modules/swfsound.c modules/swfshape.c modules/swfobject.c modules/swfdraw.c modules/swffilter.c modules/swfrender.c h.263/swfvideo.c modules/swfalignzones.c -base_objects=q.$(O) utf8.$(O) png.$(O) jpeg.$(O) wav.$(O) mp3.$(O) os.$(O) bitio.$(O) log.$(O) mem.$(O) MD5.$(O) +base_objects=q.$(O) utf8.$(O) png.$(O) jpeg.$(O) wav.$(O) mp3.$(O) os.$(O) bitio.$(O) log.$(O) mem.$(O) MD5.$(O) xml.$(O) gfx_objects=gfximage.$(O) gfxtools.$(O) gfxfont.$(O) devices/dummy.$(O) devices/file.$(O) devices/render.$(O) devices/text.$(O) devices/record.$(O) devices/ops.$(O) devices/polyops.$(O) devices/bbox.$(O) devices/rescale.$(O) @DEVICE_OPENGL@ @DEVICE_PDF@ rfxswf_objects=modules/swfaction.$(O) modules/swfbits.$(O) modules/swfbutton.$(O) modules/swfcgi.$(O) modules/swfdraw.$(O) modules/swfdump.$(O) modules/swffilter.$(O) modules/swffont.$(O) modules/swfobject.$(O) modules/swfrender.$(O) modules/swfshape.$(O) modules/swfsound.$(O) modules/swftext.$(O) modules/swftools.$(O) modules/swfalignzones.$(O) @@ -53,6 +53,8 @@ mp3.$(O): mp3.c mp3.h $(top_builddir)/config.h $(C) mp3.c -o $@ wav.$(O): wav.c wav.h $(top_builddir)/config.h $(C) wav.c -o $@ +xml.$(O): xml.c xml.h bitio.h + $(C) xml.c -o $@ os.$(O): os.c os.h $(top_builddir)/config.h $(C) -DSWFTOOLS_DATADIR=\"$(pkgdatadir)\" os.c -o $@ modules/swfaction.$(O): modules/swfaction.c rfxswf.h diff --git a/lib/bitio.c b/lib/bitio.c index 442bf48..17d9293 100644 --- a/lib/bitio.c +++ b/lib/bitio.c @@ -71,6 +71,9 @@ static int reader_fileread(reader_t*reader, void* data, int len) } static void reader_fileread_dealloc(reader_t*r) { + if(r->type == READER_TYPE_FILE2) { + close((ptroff_t)r->internal); + } memset(r, 0, sizeof(reader_t)); } void reader_init_filereader(reader_t*r, int handle) @@ -83,6 +86,16 @@ void reader_init_filereader(reader_t*r, int handle) r->bitpos = 8; r->pos = 0; } +void reader_init_filereader2(reader_t*r, const char*filename) +{ + int fi = open(filename, +#ifdef O_BINARY + O_BINARY| +#endif + O_RDONLY); + reader_init_filereader(r, fi); + r->type = READER_TYPE_FILE2; +} /* ---------------------------- mem reader ------------------------------- */ @@ -294,7 +307,7 @@ void writer_init_filewriter(writer_t*w, int handle) } void writer_init_filewriter2(writer_t*w, char*filename) { - int fi = open("movie.swf", + int fi = open(filename, #ifdef O_BINARY O_BINARY| #endif diff --git a/lib/bitio.h b/lib/bitio.h index 1b9f4f6..65618cc 100644 --- a/lib/bitio.h +++ b/lib/bitio.h @@ -31,6 +31,7 @@ #define READER_TYPE_ZLIB_C 4 #define READER_TYPE_ZLIB READER_TYPE_ZLIB_U #define READER_TYPE_NULL 5 +#define READER_TYPE_FILE2 6 #define WRITER_TYPE_FILE 1 #define WRITER_TYPE_MEM 2 @@ -90,6 +91,7 @@ void writer_writeString(writer_t*w, const char*s); /* standard readers / writers */ void reader_init_filereader(reader_t*r, int handle); +void reader_init_filereader2(reader_t*r, const char*filename); void reader_init_zlibinflate(reader_t*r, reader_t*input); void reader_init_memreader(reader_t*r, void*data, int length); void reader_init_nullreader(reader_t*r); diff --git a/lib/example/.gitignore b/lib/example/.gitignore index abb494a..ba657c3 100644 --- a/lib/example/.gitignore +++ b/lib/example/.gitignore @@ -12,4 +12,5 @@ transtest hexfont protect zlibtest +alignzones test.html diff --git a/lib/xml.c b/lib/xml.c new file mode 100644 index 0000000..25a83c2 --- /dev/null +++ b/lib/xml.c @@ -0,0 +1,375 @@ +/* xml.c + Lightweight and fast xml parser. + + Part of the swftools package. + + Copyright (c) 2010 Matthias Kramm + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include +#include +#include +#include +#include "xml.h" + +/* +group: 0=data 1=whitespace 2='"' 3='<' 4='>' 5='&' 6=';' 7='?' 8='/' 9='=' 10=EOF +*/ + +static int group[256] = +{ +// 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f +// \t \n \r + 10, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, +// 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f +// + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f +// ! " # $ % & ' ( ) * + , - . / + 1, 0, 2, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 8, +// 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f +// 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, 9, 4, 7, +// 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f +// @ A B C D E F G H I J K L M N O + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f +// P Q R S T U V W X Y Z [ \ ] ^ _ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f +// ` a b c d e f g h i j k l m n o + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f +// p q r s t u v w x y z { | } ~ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f +// + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f +// + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af +// + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf +// + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf +// + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df +// + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef +// + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff +// + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static const char*errors[]= +{ + 0, +#define E1 -0x41 + /*E1*/"xml file must start with & ; ? / = EOB*/ + /* 0 */{ E1, 0,E1, 1,E1,E1,E1,E1,E1,E1,-63}, // .< + /* 1 */{ E1,E1,E1,E1,E1,E1,E1, 9,E1,E1,-63}, // <.? + /* 2 */{ -3, 2,E3,E2,E2,E2,E2,E2,12,E2,-63}, // <. + /* 3 */{ E3,E3,E3,E3,-1,E3,E3,E3,E3,E1,-63}, // < /.> + /* 4 */{ E3,E3,E3,E3,-2,E3,E3,E3,E3,E1,-63}, // < .> + /* 5 */{ 5, 5, 5,-4, 5, 5, 5, 5, 5, 5,-63}, // da.ta + /* 6 */{ 6,-7,E3,E2,-6,E2,E2,E3,-9,E3,-63}, // %d\n", + buffer[pos], state, new_state[state][group[buffer[pos]]]);*/ + + /* inner loop */ + do { + state = new_state[old=state][group[buffer[pos++]]]; + } while(state>=0); + + switch(state) { + tag_stack_t*st; + xmlattribute_t*a; + case -63: // end of buffer + if(pos!=num+1) { + // we could backtrace, but the spec says this is indeed illegal + fprintf(stderr, "error: xml contains \\0 chars\n"); + return 0; + } + // undo + pos = num; + state = old; + break; + case -1: // self closing tag + attributes = attributes_reverse(attributes); + out->start_tag(out, tagname.result, attributes); + out->end_tag(out, tagname.result); + stringstate_clear(&tagname); + attributes_free(attributes);attributes = 0; + stringstate_start(&data, buffer, pos); + state = 5; + break; + case -6: // after + stringstate_finish(&tagname, buffer, pos-1); + // fallthrough + case -2: // . + st = malloc(sizeof(tag_stack_t)); + st->name = tagname.result; + st->prev = stack; + stack = st; + attributes = attributes_reverse(attributes); + if(!first) out->start_tag(out, tagname.result, attributes); + attributes_free(attributes);attributes = 0; + stringstate_start(&data, buffer, pos); + state = 5; + break; + case -3: case -13: // after <, start of tag name + first=0; + stringstate_start(&tagname, buffer, pos-1); + state = state==-3?6:13; + break; + case -14: // after , end of tag name + stringstate_finish(&tagname, buffer, pos-1); + // fallthrough + case -15: // after + out->end_tag(out, tagname.result); + stringstate_clear(&tagname); + stringstate_start(&data, buffer, pos); + state = 5; + break; + case -4: // end of data + stringstate_finish(&data, buffer, pos-1); + if(!first) out->data(out, data.result, data.len); + stringstate_clear(&data); + state = 2; + break; + case -7: // after <, at whitespace, end of tag name + stringstate_finish(&tagname, buffer, pos-1); + state = 7; + break; + case -8: // inside tag, start of attribute name + stringstate_start(&attr_name, buffer, pos-1); + state = 8; + break; + case -9: + stringstate_finish(&tagname, buffer, pos-1); + state = 3; + break; + case -10: // end of attribute name + stringstate_finish(&attr_name, buffer, pos-1); + state = 10; + break; + case -11: // start of attribute value + stringstate_start(&attr_value, buffer, pos); + state = 11; + break; + case -5: // end of attribute value + stringstate_finish(&attr_value, buffer, pos-1); + a = malloc(sizeof(xmlattribute_t)); + a->name = attr_name.result;attr_name.result=0; + a->value = attr_value.result;attr_value.result=0; + a->next = attributes; + attributes = a; + state = 7; + break; + default: + if(-state&0x40) { + fprintf(stderr, "%s\n", errors[(-state)&0x3f]); + return 0; + } else { + fprintf(stderr, "internal error: no action %d\n", state); + } + return 0; + break; + } + } + stringstate_save(&tagname, buffer, pos); + stringstate_save(&attr_name, buffer, pos); + stringstate_save(&attr_value, buffer, pos); + stringstate_save(&data, buffer, pos); + } + + /* note: any of these except data *has* to be empty for a well formed xml */ + stringstate_clear(&tagname); + stringstate_clear(&attr_name); + stringstate_clear(&attr_value); + stringstate_clear(&data); + + while(stack) { + tag_stack_t*next = stack->prev; + free((void*)stack->name); + free(stack); + stack = next; + } + return 1; +} + +#ifdef MAIN +void my_start_tag(xmlconsumer_t*c, char*name, xmlattribute_t*attr) +{ + printf("<%s", name); + for(;attr;attr=attr->next) { + printf(" %s=\"%s\"", attr->name, attr->value); + } + printf(">"); +} +void my_data(xmlconsumer_t*c, char*data, int len) +{ + printf("%s", data); +} +void my_end_tag(xmlconsumer_t*c, char*name) +{ + printf("", name); +} +int main() +{ + xmlconsumer_t c = {my_start_tag, my_data, my_end_tag, 0}; + + reader_t r; + reader_init_filereader2(&r, "test.xml"); + xml_parse(&r, &c); + r.dealloc(&r); + printf("\n"); +} +#endif diff --git a/lib/xml.h b/lib/xml.h new file mode 100644 index 0000000..206970a --- /dev/null +++ b/lib/xml.h @@ -0,0 +1,42 @@ +/* xml.h + Lightweight and fast xml parser. + + Part of the swftools package. + + Copyright (c) 2010 Matthias Kramm + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef __xml_h__ +#define __xml_h__ + +#include "bitio.h" + +typedef struct _xmlattribute { + const char*name; + const char*value; + struct _xmlattribute*next; +} xmlattribute_t; + +typedef struct _xmlconsumer { + void (*start_tag)(struct _xmlconsumer*, char*name, xmlattribute_t*attributes); + void (*data)(struct _xmlconsumer*, char*data, int len); + void (*end_tag)(struct _xmlconsumer*, char*name); + void*internal; +} xmlconsumer_t; + +int xml_parse(reader_t*reader, xmlconsumer_t*out); + +#endif