X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=pdf2swf%2Fxpdf%2FParser.cc;h=af7c933114a47773f140e42697554c06e45eb4b5;hb=4d50e9ccf474061088318da8e7a67789fd488a2a;hp=6ad0c5a7816afa4cb55fd565dfa3f6073669af76;hpb=fc554a43712b76d16b41ec77dd311b4a78b1ef6b;p=swftools.git diff --git a/pdf2swf/xpdf/Parser.cc b/pdf2swf/xpdf/Parser.cc index 6ad0c5a..af7c933 100644 --- a/pdf2swf/xpdf/Parser.cc +++ b/pdf2swf/xpdf/Parser.cc @@ -2,11 +2,13 @@ // // Parser.cc // -// Copyright 1996 Derek B. Noonburg +// Copyright 1996-2003 Glyph & Cog, LLC // //======================================================================== -#ifdef __GNUC__ +#include + +#ifdef USE_GCC_PRAGMAS #pragma implementation #endif @@ -17,12 +19,11 @@ #include "Parser.h" #include "XRef.h" #include "Error.h" -#ifndef NO_DECRYPTION #include "Decrypt.h" -#endif -Parser::Parser(Lexer *lexer1) { - lexer = lexer1; +Parser::Parser(XRef *xrefA, Lexer *lexerA) { + xref = xrefA; + lexer = lexerA; inlineImg = 0; lexer->getObj(&buf1); lexer->getObj(&buf2); @@ -34,22 +35,17 @@ Parser::~Parser() { delete lexer; } -#ifndef NO_DECRYPTION Object *Parser::getObj(Object *obj, - Guchar *fileKey, int objNum, int objGen) { -#else -Object *Parser::getObj(Object *obj) { -#endif + Guchar *fileKey, int keyLength, + int objNum, int objGen) { char *key; Stream *str; Object obj2; int num; -#ifndef NO_DECRYPTION Decrypt *decrypt; GString *s; char *p; int i; -#endif // refill buffer after inline image data if (inlineImg == 2) { @@ -63,13 +59,9 @@ Object *Parser::getObj(Object *obj) { // array if (buf1.isCmd("[")) { shift(); - obj->initArray(); + obj->initArray(xref); while (!buf1.isCmd("]") && !buf1.isEOF()) -#ifndef NO_DECRYPTION - obj->arrayAdd(getObj(&obj2, fileKey, objNum, objGen)); -#else - obj->arrayAdd(getObj(&obj2)); -#endif + obj->arrayAdd(getObj(&obj2, fileKey, keyLength, objNum, objGen)); if (buf1.isEOF()) error(getPos(), "End of file inside array"); shift(); @@ -77,7 +69,7 @@ Object *Parser::getObj(Object *obj) { // dictionary or stream } else if (buf1.isCmd("<<")) { shift(); - obj->initDict(); + obj->initDict(xref); while (!buf1.isCmd(">>") && !buf1.isEOF()) { if (!buf1.isName()) { error(getPos(), "Dictionary key must be a name object"); @@ -85,13 +77,11 @@ Object *Parser::getObj(Object *obj) { } else { key = copyString(buf1.getName()); shift(); - if (buf1.isEOF() || buf1.isError()) + if (buf1.isEOF() || buf1.isError()) { + gfree(key); break; -#ifndef NO_DECRYPTION - obj->dictAdd(key, getObj(&obj2, fileKey, objNum, objGen)); -#else - obj->dictAdd(key, getObj(&obj2)); -#endif + } + obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen)); } } if (buf1.isEOF()) @@ -99,11 +89,10 @@ Object *Parser::getObj(Object *obj) { if (buf2.isCmd("stream")) { if ((str = makeStream(obj))) { obj->initStream(str); -#ifndef NO_DECRYPTION if (fileKey) { - str->getBaseStream()->doDecryption(fileKey, objNum, objGen); + str->getBaseStream()->doDecryption(fileKey, keyLength, + objNum, objGen); } -#endif } else { obj->free(); obj->initError(); @@ -124,12 +113,11 @@ Object *Parser::getObj(Object *obj) { obj->initInt(num); } -#ifndef NO_DECRYPTION // string } else if (buf1.isString() && fileKey) { buf1.copy(obj); s = obj->getString(); - decrypt = new Decrypt(fileKey, objNum, objGen); + decrypt = new Decrypt(fileKey, keyLength, objNum, objGen); for (i = 0, p = obj->getString()->getCString(); i < s->getLength(); ++i, ++p) { @@ -137,7 +125,6 @@ Object *Parser::getObj(Object *obj) { } delete decrypt; shift(); -#endif // simple object } else { @@ -150,8 +137,9 @@ Object *Parser::getObj(Object *obj) { Stream *Parser::makeStream(Object *dict) { Object obj; + BaseStream *baseStr; Stream *str; - int pos, endPos, length; + Guint pos, endPos, length; // get stream start position lexer->skipToNextLine(); @@ -160,7 +148,7 @@ Stream *Parser::makeStream(Object *dict) { // get length dict->dictLookup("Length", &obj); if (obj.isInt()) { - length = obj.getInt(); + length = (Guint)obj.getInt(); obj.free(); } else { error(getPos(), "Bad 'Length' attribute in stream"); @@ -169,15 +157,16 @@ Stream *Parser::makeStream(Object *dict) { } // check for length in damaged file - if ((endPos = xref->getStreamEnd(pos)) >= 0) { + if (xref && xref->getStreamEnd(pos, &endPos)) { length = endPos - pos; } - // make base stream - str = lexer->getStream()->getBaseStream()->makeSubStream(pos, length, dict); - - // get filters - str = str->addFilters(dict); + // in badly damaged PDF files, we can run off the end of the input + // stream immediately after the "stream" token + if (!lexer->getStream()) { + return NULL; + } + baseStr = lexer->getStream()->getBaseStream(); // skip over stream data lexer->setPos(pos + length); @@ -185,17 +174,33 @@ Stream *Parser::makeStream(Object *dict) { // refill token buffers and check for 'endstream' shift(); // kill '>>' shift(); // kill 'stream' - if (buf1.isCmd("endstream")) + if (buf1.isCmd("endstream")) { shift(); - else + } else { error(getPos(), "Missing 'endstream'"); + // kludge for broken PDF files: just add 5k to the length, and + // hope its enough + length += 5000; + } + + // make base stream + str = baseStr->makeSubStream(pos, gTrue, length, dict); + + // get filters + str = str->addFilters(dict); return str; } void Parser::shift() { if (inlineImg > 0) { - ++inlineImg; + if (inlineImg < 2) { + ++inlineImg; + } else { + // in a damaged content stream, if 'ID' shows up in the middle + // of a dictionary, we need to reset + inlineImg = 0; + } } else if (buf2.isCmd("ID")) { lexer->skipChar(); // skip char after 'ID' command inlineImg = 1;