X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=pdf2swf%2Fxpdf%2FParser.cc;h=af7c933114a47773f140e42697554c06e45eb4b5;hb=eb361a0f099d689011066b71a9d5e5d9fd94b913;hp=4df53c986caae7dfc98811eed10290957259324d;hpb=50dd339d3d6262763616efe8d7ee415ab19befb9;p=swftools.git diff --git a/pdf2swf/xpdf/Parser.cc b/pdf2swf/xpdf/Parser.cc index 4df53c9..af7c933 100644 --- a/pdf2swf/xpdf/Parser.cc +++ b/pdf2swf/xpdf/Parser.cc @@ -2,15 +2,16 @@ // // Parser.cc // -// Copyright 1996-2002 Glyph & Cog, LLC +// Copyright 1996-2003 Glyph & Cog, LLC // //======================================================================== -#ifdef __GNUC__ +#include + +#ifdef USE_GCC_PRAGMAS #pragma implementation #endif -#include #include #include "Object.h" #include "Array.h" @@ -18,9 +19,7 @@ #include "Parser.h" #include "XRef.h" #include "Error.h" -#ifndef NO_DECRYPTION #include "Decrypt.h" -#endif Parser::Parser(XRef *xrefA, Lexer *lexerA) { xref = xrefA; @@ -36,23 +35,17 @@ Parser::~Parser() { delete lexer; } -#ifndef NO_DECRYPTION Object *Parser::getObj(Object *obj, Guchar *fileKey, int keyLength, int objNum, int objGen) { -#else -Object *Parser::getObj(Object *obj) { -#endif char *key; Stream *str; Object obj2; int num; -#ifndef NO_DECRYPTION Decrypt *decrypt; GString *s; char *p; int i; -#endif // refill buffer after inline image data if (inlineImg == 2) { @@ -68,11 +61,7 @@ Object *Parser::getObj(Object *obj) { shift(); obj->initArray(xref); while (!buf1.isCmd("]") && !buf1.isEOF()) -#ifndef NO_DECRYPTION obj->arrayAdd(getObj(&obj2, fileKey, keyLength, objNum, objGen)); -#else - obj->arrayAdd(getObj(&obj2)); -#endif if (buf1.isEOF()) error(getPos(), "End of file inside array"); shift(); @@ -88,13 +77,11 @@ Object *Parser::getObj(Object *obj) { } else { key = copyString(buf1.getName()); shift(); - if (buf1.isEOF() || buf1.isError()) + if (buf1.isEOF() || buf1.isError()) { + gfree(key); break; -#ifndef NO_DECRYPTION + } obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen)); -#else - obj->dictAdd(key, getObj(&obj2)); -#endif } } if (buf1.isEOF()) @@ -102,12 +89,10 @@ Object *Parser::getObj(Object *obj) { if (buf2.isCmd("stream")) { if ((str = makeStream(obj))) { obj->initStream(str); -#ifndef NO_DECRYPTION if (fileKey) { str->getBaseStream()->doDecryption(fileKey, keyLength, objNum, objGen); } -#endif } else { obj->free(); obj->initError(); @@ -128,7 +113,6 @@ Object *Parser::getObj(Object *obj) { obj->initInt(num); } -#ifndef NO_DECRYPTION // string } else if (buf1.isString() && fileKey) { buf1.copy(obj); @@ -141,7 +125,6 @@ Object *Parser::getObj(Object *obj) { } delete decrypt; shift(); -#endif // simple object } else { @@ -154,6 +137,7 @@ Object *Parser::getObj(Object *obj) { Stream *Parser::makeStream(Object *dict) { Object obj; + BaseStream *baseStr; Stream *str; Guint pos, endPos, length; @@ -173,16 +157,16 @@ Stream *Parser::makeStream(Object *dict) { } // check for length in damaged file - if (xref->getStreamEnd(pos, &endPos)) { + if (xref && xref->getStreamEnd(pos, &endPos)) { length = endPos - pos; } - // make base stream - str = lexer->getStream()->getBaseStream()->makeSubStream(pos, gTrue, - length, dict); - - // get filters - str = str->addFilters(dict); + // in badly damaged PDF files, we can run off the end of the input + // stream immediately after the "stream" token + if (!lexer->getStream()) { + return NULL; + } + baseStr = lexer->getStream()->getBaseStream(); // skip over stream data lexer->setPos(pos + length); @@ -190,17 +174,33 @@ Stream *Parser::makeStream(Object *dict) { // refill token buffers and check for 'endstream' shift(); // kill '>>' shift(); // kill 'stream' - if (buf1.isCmd("endstream")) + if (buf1.isCmd("endstream")) { shift(); - else + } else { error(getPos(), "Missing 'endstream'"); + // kludge for broken PDF files: just add 5k to the length, and + // hope its enough + length += 5000; + } + + // make base stream + str = baseStr->makeSubStream(pos, gTrue, length, dict); + + // get filters + str = str->addFilters(dict); return str; } void Parser::shift() { if (inlineImg > 0) { - ++inlineImg; + if (inlineImg < 2) { + ++inlineImg; + } else { + // in a damaged content stream, if 'ID' shows up in the middle + // of a dictionary, we need to reset + inlineImg = 0; + } } else if (buf2.isCmd("ID")) { lexer->skipChar(); // skip char after 'ID' command inlineImg = 1;