X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fxpdf%2FXRef.cc;fp=lib%2Fxpdf%2FXRef.cc;h=b3715412fd55903b87e69d54a2bd8068c1dcc732;hb=3bb2dc06d0393fbd794faca14a5c118e7f191f5d;hp=0000000000000000000000000000000000000000;hpb=1279e7201aaf80eca58ad982ec11de68c85631af;p=swftools.git diff --git a/lib/xpdf/XRef.cc b/lib/xpdf/XRef.cc new file mode 100644 index 0000000..b371541 --- /dev/null +++ b/lib/xpdf/XRef.cc @@ -0,0 +1,888 @@ +//======================================================================== +// +// XRef.cc +// +// Copyright 1996-2003 Glyph & Cog, LLC +// +//======================================================================== + +#include + +#ifdef USE_GCC_PRAGMAS +#pragma implementation +#endif + +#include +#include +#include +#include +#include "gmem.h" +#include "Object.h" +#include "Stream.h" +#include "Lexer.h" +#include "Parser.h" +#include "Dict.h" +#include "Error.h" +#include "ErrorCodes.h" +#include "XRef.h" + +//------------------------------------------------------------------------ + +#define xrefSearchSize 1024 // read this many bytes at end of file + // to look for 'startxref' + +//------------------------------------------------------------------------ +// Permission bits +//------------------------------------------------------------------------ + +#define permPrint (1<<2) +#define permChange (1<<3) +#define permCopy (1<<4) +#define permNotes (1<<5) +#define defPermFlags 0xfffc + +//------------------------------------------------------------------------ +// ObjectStream +//------------------------------------------------------------------------ + +class ObjectStream { +public: + + // Create an object stream, using object number , + // generation 0. + ObjectStream(XRef *xref, int objStrNumA); + + ~ObjectStream(); + + // Return the object number of this object stream. + int getObjStrNum() { return objStrNum; } + + // Get the th object from this stream, which should be + // object number , generation 0. + Object *getObject(int objIdx, int objNum, Object *obj); + +private: + + int objStrNum; // object number of the object stream + int nObjects; // number of objects in the stream + Object *objs; // the objects (length = nObjects) + int *objNums; // the object numbers (length = nObjects) +}; + +ObjectStream::ObjectStream(XRef *xref, int objStrNumA) { + Stream *str; + Parser *parser; + int *offsets; + Object objStr, obj1, obj2; + int first, i; + + objStrNum = objStrNumA; + nObjects = 0; + objs = NULL; + objNums = NULL; + + if (!xref->fetch(objStrNum, 0, &objStr)->isStream()) { + goto err1; + } + + if (!objStr.streamGetDict()->lookup("N", &obj1)->isInt()) { + obj1.free(); + goto err1; + } + nObjects = obj1.getInt(); + obj1.free(); + if (nObjects <= 0) { + goto err1; + } + + if (!objStr.streamGetDict()->lookup("First", &obj1)->isInt()) { + obj1.free(); + goto err1; + } + first = obj1.getInt(); + obj1.free(); + if (first < 0) { + goto err1; + } + + objs = new Object[nObjects]; + objNums = (int *)gmallocn(nObjects, sizeof(int)); + offsets = (int *)gmallocn(nObjects, sizeof(int)); + + // parse the header: object numbers and offsets + objStr.streamReset(); + obj1.initNull(); + str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first); + parser = new Parser(xref, new Lexer(xref, str)); + for (i = 0; i < nObjects; ++i) { + parser->getObj(&obj1); + parser->getObj(&obj2); + if (!obj1.isInt() || !obj2.isInt()) { + obj1.free(); + obj2.free(); + delete parser; + gfree(offsets); + goto err1; + } + objNums[i] = obj1.getInt(); + offsets[i] = obj2.getInt(); + obj1.free(); + obj2.free(); + if (objNums[i] < 0 || offsets[i] < 0 || + (i > 0 && offsets[i] < offsets[i-1])) { + delete parser; + gfree(offsets); + goto err1; + } + } + while (str->getChar() != EOF) ; + delete parser; + + // skip to the first object - this shouldn't be necessary because + // the First key is supposed to be equal to offsets[0], but just in + // case... + for (i = first; i < offsets[0]; ++i) { + objStr.getStream()->getChar(); + } + + // parse the objects + for (i = 0; i < nObjects; ++i) { + obj1.initNull(); + if (i == nObjects - 1) { + str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0); + } else { + str = new EmbedStream(objStr.getStream(), &obj1, gTrue, + offsets[i+1] - offsets[i]); + } + parser = new Parser(xref, new Lexer(xref, str)); + parser->getObj(&objs[i]); + while (str->getChar() != EOF) ; + delete parser; + } + + gfree(offsets); + + err1: + objStr.free(); + return; +} + +ObjectStream::~ObjectStream() { + int i; + + if (objs) { + for (i = 0; i < nObjects; ++i) { + objs[i].free(); + } + delete[] objs; + } + gfree(objNums); +} + +Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) { + if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) { + return obj->initNull(); + } + return objs[objIdx].copy(obj); +} + +//------------------------------------------------------------------------ +// XRef +//------------------------------------------------------------------------ + +XRef::XRef(BaseStream *strA) { + Guint pos; + Object obj; + + ok = gTrue; + errCode = errNone; + size = 0; + entries = NULL; + streamEnds = NULL; + streamEndsLen = 0; + objStr = NULL; + + encrypted = gFalse; + permFlags = defPermFlags; + ownerPasswordOk = gFalse; + + // read the trailer + str = strA; + start = str->getStart(); + pos = getStartXref(); + + // if there was a problem with the 'startxref' position, try to + // reconstruct the xref table + if (pos == 0) { + if (!(ok = constructXRef())) { + errCode = errDamaged; + return; + } + + // read the xref table + } else { + while (readXRef(&pos)) ; + + // if there was a problem with the xref table, + // try to reconstruct it + if (!ok) { + if (!(ok = constructXRef())) { + errCode = errDamaged; + return; + } + } + } + + // get the root dictionary (catalog) object + trailerDict.dictLookupNF("Root", &obj); + if (obj.isRef()) { + rootNum = obj.getRefNum(); + rootGen = obj.getRefGen(); + obj.free(); + } else { + obj.free(); + if (!(ok = constructXRef())) { + errCode = errDamaged; + return; + } + } + + // now set the trailer dictionary's xref pointer so we can fetch + // indirect objects from it + trailerDict.getDict()->setXRef(this); +} + +XRef::~XRef() { + gfree(entries); + trailerDict.free(); + if (streamEnds) { + gfree(streamEnds); + } + if (objStr) { + delete objStr; + } +} + +// Read the 'startxref' position. +Guint XRef::getStartXref() { + char buf[xrefSearchSize+1]; + char *p; + int c, n, i; + + // read last xrefSearchSize bytes + str->setPos(xrefSearchSize, -1); + for (n = 0; n < xrefSearchSize; ++n) { + if ((c = str->getChar()) == EOF) { + break; + } + buf[n] = c; + } + buf[n] = '\0'; + + // find startxref + for (i = n - 9; i >= 0; --i) { + if (!strncmp(&buf[i], "startxref", 9)) { + break; + } + } + if (i < 0) { + return 0; + } + for (p = &buf[i+9]; isspace(*p); ++p) ; + lastXRefPos = strToUnsigned(p); + + return lastXRefPos; +} + +// Read one xref table section. Also reads the associated trailer +// dictionary, and returns the prev pointer (if any). +GBool XRef::readXRef(Guint *pos) { + Parser *parser; + Object obj; + GBool more; + + // start up a parser, parse one token + obj.initNull(); + parser = new Parser(NULL, + new Lexer(NULL, + str->makeSubStream(start + *pos, gFalse, 0, &obj))); + parser->getObj(&obj); + + // parse an old-style xref table + if (obj.isCmd("xref")) { + obj.free(); + more = readXRefTable(parser, pos); + + // parse an xref stream + } else if (obj.isInt()) { + obj.free(); + if (!parser->getObj(&obj)->isInt()) { + goto err1; + } + obj.free(); + if (!parser->getObj(&obj)->isCmd("obj")) { + goto err1; + } + obj.free(); + if (!parser->getObj(&obj)->isStream()) { + goto err1; + } + more = readXRefStream(obj.getStream(), pos); + obj.free(); + + } else { + goto err1; + } + + delete parser; + return more; + + err1: + obj.free(); + delete parser; + ok = gFalse; + return gFalse; +} + +GBool XRef::readXRefTable(Parser *parser, Guint *pos) { + XRefEntry entry; + GBool more; + Object obj, obj2; + Guint pos2; + int first, n, newSize, i; + + while (1) { + parser->getObj(&obj); + if (obj.isCmd("trailer")) { + obj.free(); + break; + } + if (!obj.isInt()) { + goto err1; + } + first = obj.getInt(); + obj.free(); + if (!parser->getObj(&obj)->isInt()) { + goto err1; + } + n = obj.getInt(); + obj.free(); + if (first < 0 || n < 0 || first + n < 0) { + goto err1; + } + if (first + n > size) { + for (newSize = size ? 2 * size : 1024; + first + n > newSize && newSize > 0; + newSize <<= 1) ; + if (newSize < 0) { + goto err1; + } + entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); + for (i = size; i < newSize; ++i) { + entries[i].offset = 0xffffffff; + entries[i].type = xrefEntryFree; + } + size = newSize; + } + for (i = first; i < first + n; ++i) { + if (!parser->getObj(&obj)->isInt()) { + goto err1; + } + entry.offset = (Guint)obj.getInt(); + obj.free(); + if (!parser->getObj(&obj)->isInt()) { + goto err1; + } + entry.gen = obj.getInt(); + obj.free(); + parser->getObj(&obj); + if (obj.isCmd("n")) { + entry.type = xrefEntryUncompressed; + } else if (obj.isCmd("f")) { + entry.type = xrefEntryFree; + } else { + goto err1; + } + obj.free(); + if (entries[i].offset == 0xffffffff) { + entries[i] = entry; + // PDF files of patents from the IBM Intellectual Property + // Network have a bug: the xref table claims to start at 1 + // instead of 0. + if (i == 1 && first == 1 && + entries[1].offset == 0 && entries[1].gen == 65535 && + entries[1].type == xrefEntryFree) { + i = first = 0; + entries[0] = entries[1]; + entries[1].offset = 0xffffffff; + } + } + } + } + + // read the trailer dictionary + if (!parser->getObj(&obj)->isDict()) { + goto err1; + } + + // get the 'Prev' pointer + obj.getDict()->lookupNF("Prev", &obj2); + if (obj2.isInt()) { + *pos = (Guint)obj2.getInt(); + more = gTrue; + } else if (obj2.isRef()) { + // certain buggy PDF generators generate "/Prev NNN 0 R" instead + // of "/Prev NNN" + *pos = (Guint)obj2.getRefNum(); + more = gTrue; + } else { + more = gFalse; + } + obj2.free(); + + // save the first trailer dictionary + if (trailerDict.isNone()) { + obj.copy(&trailerDict); + } + + // check for an 'XRefStm' key + if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) { + pos2 = (Guint)obj2.getInt(); + readXRef(&pos2); + if (!ok) { + obj2.free(); + goto err1; + } + } + obj2.free(); + + obj.free(); + return more; + + err1: + obj.free(); + ok = gFalse; + return gFalse; +} + +GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) { + Dict *dict; + int w[3]; + GBool more; + Object obj, obj2, idx; + int newSize, first, n, i; + + dict = xrefStr->getDict(); + + if (!dict->lookupNF("Size", &obj)->isInt()) { + goto err1; + } + newSize = obj.getInt(); + obj.free(); + if (newSize < 0) { + goto err1; + } + if (newSize > size) { + entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); + for (i = size; i < newSize; ++i) { + entries[i].offset = 0xffffffff; + entries[i].type = xrefEntryFree; + } + size = newSize; + } + + if (!dict->lookupNF("W", &obj)->isArray() || + obj.arrayGetLength() < 3) { + goto err1; + } + for (i = 0; i < 3; ++i) { + if (!obj.arrayGet(i, &obj2)->isInt()) { + obj2.free(); + goto err1; + } + w[i] = obj2.getInt(); + obj2.free(); + if (w[i] < 0 || w[i] > 4) { + goto err1; + } + } + obj.free(); + + xrefStr->reset(); + dict->lookupNF("Index", &idx); + if (idx.isArray()) { + for (i = 0; i+1 < idx.arrayGetLength(); i += 2) { + if (!idx.arrayGet(i, &obj)->isInt()) { + idx.free(); + goto err1; + } + first = obj.getInt(); + obj.free(); + if (!idx.arrayGet(i+1, &obj)->isInt()) { + idx.free(); + goto err1; + } + n = obj.getInt(); + obj.free(); + if (first < 0 || n < 0 || + !readXRefStreamSection(xrefStr, w, first, n)) { + idx.free(); + goto err0; + } + } + } else { + if (!readXRefStreamSection(xrefStr, w, 0, newSize)) { + idx.free(); + goto err0; + } + } + idx.free(); + + dict->lookupNF("Prev", &obj); + if (obj.isInt()) { + *pos = (Guint)obj.getInt(); + more = gTrue; + } else { + more = gFalse; + } + obj.free(); + if (trailerDict.isNone()) { + trailerDict.initDict(dict); + } + + return more; + + err1: + obj.free(); + err0: + ok = gFalse; + return gFalse; +} + +GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) { + Guint offset; + int type, gen, c, newSize, i, j; + + if (first + n < 0) { + return gFalse; + } + if (first + n > size) { + for (newSize = size ? 2 * size : 1024; + first + n > newSize && newSize > 0; + newSize <<= 1) ; + if (newSize < 0) { + return gFalse; + } + entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); + for (i = size; i < newSize; ++i) { + entries[i].offset = 0xffffffff; + entries[i].type = xrefEntryFree; + } + size = newSize; + } + for (i = first; i < first + n; ++i) { + if (w[0] == 0) { + type = 1; + } else { + for (type = 0, j = 0; j < w[0]; ++j) { + if ((c = xrefStr->getChar()) == EOF) { + return gFalse; + } + type = (type << 8) + c; + } + } + for (offset = 0, j = 0; j < w[1]; ++j) { + if ((c = xrefStr->getChar()) == EOF) { + return gFalse; + } + offset = (offset << 8) + c; + } + for (gen = 0, j = 0; j < w[2]; ++j) { + if ((c = xrefStr->getChar()) == EOF) { + return gFalse; + } + gen = (gen << 8) + c; + } + if (entries[i].offset == 0xffffffff) { + switch (type) { + case 0: + entries[i].offset = offset; + entries[i].gen = gen; + entries[i].type = xrefEntryFree; + break; + case 1: + entries[i].offset = offset; + entries[i].gen = gen; + entries[i].type = xrefEntryUncompressed; + break; + case 2: + entries[i].offset = offset; + entries[i].gen = gen; + entries[i].type = xrefEntryCompressed; + break; + default: + return gFalse; + } + } + } + + return gTrue; +} + +// Attempt to construct an xref table for a damaged file. +GBool XRef::constructXRef() { + Parser *parser; + Object newTrailerDict, obj; + char buf[256]; + Guint pos; + int num, gen; + int newSize; + int streamEndsSize; + char *p; + int i; + GBool gotRoot; + + gfree(entries); + size = 0; + entries = NULL; + + error(0, "PDF file is damaged - attempting to reconstruct xref table..."); + gotRoot = gFalse; + streamEndsLen = streamEndsSize = 0; + + str->reset(); + while (1) { + pos = str->getPos(); + if (!str->getLine(buf, 256)) { + break; + } + p = buf; + + // got trailer dictionary + if (!strncmp(p, "trailer", 7)) { + obj.initNull(); + parser = new Parser(NULL, + new Lexer(NULL, + str->makeSubStream(pos + 7, gFalse, 0, &obj))); + parser->getObj(&newTrailerDict); + if (newTrailerDict.isDict()) { + newTrailerDict.dictLookupNF("Root", &obj); + if (obj.isRef()) { + rootNum = obj.getRefNum(); + rootGen = obj.getRefGen(); + if (!trailerDict.isNone()) { + trailerDict.free(); + } + newTrailerDict.copy(&trailerDict); + gotRoot = gTrue; + } + obj.free(); + } + newTrailerDict.free(); + delete parser; + + // look for object + } else if (isdigit(*p)) { + num = atoi(p); + if (num > 0) { + do { + ++p; + } while (*p && isdigit(*p)); + if (isspace(*p)) { + do { + ++p; + } while (*p && isspace(*p)); + if (isdigit(*p)) { + gen = atoi(p); + do { + ++p; + } while (*p && isdigit(*p)); + if (isspace(*p)) { + do { + ++p; + } while (*p && isspace(*p)); + if (!strncmp(p, "obj", 3)) { + if (num >= size) { + newSize = (num + 1 + 255) & ~255; + if (newSize < 0) { + error(-1, "Bad object number"); + return gFalse; + } + entries = (XRefEntry *) + greallocn(entries, newSize, sizeof(XRefEntry)); + for (i = size; i < newSize; ++i) { + entries[i].offset = 0xffffffff; + entries[i].type = xrefEntryFree; + } + size = newSize; + } + if (entries[num].type == xrefEntryFree || + gen >= entries[num].gen) { + entries[num].offset = pos - start; + entries[num].gen = gen; + entries[num].type = xrefEntryUncompressed; + } + } + } + } + } + } + + } else if (!strncmp(p, "endstream", 9)) { + if (streamEndsLen == streamEndsSize) { + streamEndsSize += 64; + streamEnds = (Guint *)greallocn(streamEnds, + streamEndsSize, sizeof(int)); + } + streamEnds[streamEndsLen++] = pos; + } + } + + if (gotRoot) + return gTrue; + + error(-1, "Couldn't find trailer dictionary"); + return gFalse; +} + +void XRef::setEncryption(int permFlagsA, GBool ownerPasswordOkA, + Guchar *fileKeyA, int keyLengthA, int encVersionA) { + int i; + + encrypted = gTrue; + permFlags = permFlagsA; + ownerPasswordOk = ownerPasswordOkA; + if (keyLengthA <= 16) { + keyLength = keyLengthA; + } else { + keyLength = 16; + } + for (i = 0; i < keyLength; ++i) { + fileKey[i] = fileKeyA[i]; + } + encVersion = encVersionA; +} + +GBool XRef::okToPrint(GBool ignoreOwnerPW) { + return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint); +} + +GBool XRef::okToChange(GBool ignoreOwnerPW) { + return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange); +} + +GBool XRef::okToCopy(GBool ignoreOwnerPW) { + return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy); +} + +GBool XRef::okToAddNotes(GBool ignoreOwnerPW) { + return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes); +} + +Object *XRef::fetch(int num, int gen, Object *obj) { + XRefEntry *e; + Parser *parser; + Object obj1, obj2, obj3; + + // check for bogus ref - this can happen in corrupted PDF files + if (num < 0 || num >= size) { + goto err; + } + + e = &entries[num]; + switch (e->type) { + + case xrefEntryUncompressed: + if (e->gen != gen) { + goto err; + } + obj1.initNull(); + parser = new Parser(this, + new Lexer(this, + str->makeSubStream(start + e->offset, gFalse, 0, &obj1))); + parser->getObj(&obj1); + parser->getObj(&obj2); + parser->getObj(&obj3); + if (!obj1.isInt() || obj1.getInt() != num || + !obj2.isInt() || obj2.getInt() != gen || + !obj3.isCmd("obj")) { + obj1.free(); + obj2.free(); + obj3.free(); + delete parser; + goto err; + } + parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL, keyLength, + num, gen); + obj1.free(); + obj2.free(); + obj3.free(); + delete parser; + break; + + case xrefEntryCompressed: + if (gen != 0) { + goto err; + } + if (!objStr || objStr->getObjStrNum() != (int)e->offset) { + if (objStr) { + delete objStr; + } + objStr = new ObjectStream(this, e->offset); + } + objStr->getObject(e->gen, num, obj); + break; + + default: + goto err; + } + + return obj; + + err: + return obj->initNull(); +} + +Object *XRef::getDocInfo(Object *obj) { + return trailerDict.dictLookup("Info", obj); +} + +// Added for the pdftex project. +Object *XRef::getDocInfoNF(Object *obj) { + return trailerDict.dictLookupNF("Info", obj); +} + +GBool XRef::getStreamEnd(Guint streamStart, Guint *streamEnd) { + int a, b, m; + + if (streamEndsLen == 0 || + streamStart > streamEnds[streamEndsLen - 1]) { + return gFalse; + } + + a = -1; + b = streamEndsLen - 1; + // invariant: streamEnds[a] < streamStart <= streamEnds[b] + while (b - a > 1) { + m = (a + b) / 2; + if (streamStart <= streamEnds[m]) { + b = m; + } else { + a = m; + } + } + *streamEnd = streamEnds[b]; + return gTrue; +} + +Guint XRef::strToUnsigned(char *s) { + Guint x; + char *p; + int i; + + x = 0; + for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) { + x = 10 * x + (*p - '0'); + } + return x; +}