1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
29 #include "ErrorCodes.h"
32 //------------------------------------------------------------------------
34 #define xrefSearchSize 1024 // read this many bytes at end of file
35 // to look for 'startxref'
38 //------------------------------------------------------------------------
40 //------------------------------------------------------------------------
42 #define permPrint (1<<2)
43 #define permChange (1<<3)
44 #define permCopy (1<<4)
45 #define permNotes (1<<5)
46 #define defPermFlags 0xfffc
49 //------------------------------------------------------------------------
51 //------------------------------------------------------------------------
56 // Create an object stream, using object number <objStrNum>,
58 ObjectStream(XRef *xref, int objStrNumA);
62 // Return the object number of this object stream.
63 int getObjStrNum() { return objStrNum; }
65 // Get the <objIdx>th object from this stream, which should be
66 // object number <objNum>, generation 0.
67 Object *getObject(int objIdx, int objNum, Object *obj);
71 int objStrNum; // object number of the object stream
72 int nObjects; // number of objects in the stream
73 Object *objs; // the objects (length = nObjects)
74 int *objNums; // the object numbers (length = nObjects)
77 ObjectStream::ObjectStream(XRef *xref, int objStrNumA) {
81 Object objStr, obj1, obj2;
84 objStrNum = objStrNumA;
89 if (!xref->fetch(objStrNum, 0, &objStr)->isStream()) {
93 if (!objStr.streamGetDict()->lookup("N", &obj1)->isInt()) {
97 nObjects = obj1.getInt();
103 if (!objStr.streamGetDict()->lookup("First", &obj1)->isInt()) {
107 first = obj1.getInt();
110 objs = new Object[nObjects];
111 objNums = (int *)gmalloc(nObjects * sizeof(int));
112 offsets = (int *)gmalloc(nObjects * sizeof(int));
114 // parse the header: object numbers and offsets
115 objStr.streamReset();
117 str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first);
118 parser = new Parser(xref, new Lexer(xref, str));
119 for (i = 0; i < nObjects; ++i) {
120 parser->getObj(&obj1);
121 parser->getObj(&obj2);
122 if (!obj1.isInt() || !obj2.isInt()) {
129 objNums[i] = obj1.getInt();
130 offsets[i] = obj2.getInt();
134 while (str->getChar() != EOF) ;
137 // skip to the first object - this shouldn't be necessary because
138 // the First key is supposed to be equal to offsets[0], but just in
140 for (i = first; i < offsets[0]; ++i) {
141 objStr.getStream()->getChar();
145 for (i = 0; i < nObjects; ++i) {
147 if (i == nObjects - 1) {
148 str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0);
150 str = new EmbedStream(objStr.getStream(), &obj1, gTrue,
151 offsets[i+1] - offsets[i]);
153 parser = new Parser(xref, new Lexer(xref, str));
154 parser->getObj(&objs[i]);
155 while (str->getChar() != EOF) ;
166 ObjectStream::~ObjectStream() {
170 for (i = 0; i < nObjects; ++i) {
178 Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) {
179 if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) {
180 return obj->initNull();
182 return objs[objIdx].copy(obj);
185 //------------------------------------------------------------------------
187 //------------------------------------------------------------------------
189 XRef::XRef(BaseStream *strA, GString *ownerPassword, GString *userPassword) {
203 start = str->getStart();
204 pos = getStartXref();
206 // if there was a problem with the 'startxref' position, try to
207 // reconstruct the xref table
209 if (!(ok = constructXRef())) {
210 errCode = errDamaged;
214 // read the xref table
216 while (readXRef(&pos)) ;
218 // if there was a problem with the xref table,
219 // try to reconstruct it
221 if (!(ok = constructXRef())) {
222 errCode = errDamaged;
228 // get the root dictionary (catalog) object
229 trailerDict.dictLookupNF("Root", &obj);
231 rootNum = obj.getRefNum();
232 rootGen = obj.getRefGen();
236 if (!(ok = constructXRef())) {
237 errCode = errDamaged;
242 // now set the trailer dictionary's xref pointer so we can fetch
243 // indirect objects from it
244 trailerDict.getDict()->setXRef(this);
246 // check for encryption
247 #ifndef NO_DECRYPTION
250 if (checkEncrypted(ownerPassword, userPassword)) {
252 errCode = errEncrypted;
268 // Read the 'startxref' position.
269 Guint XRef::getStartXref() {
270 char buf[xrefSearchSize+1];
274 // read last xrefSearchSize bytes
275 str->setPos(xrefSearchSize, -1);
276 for (n = 0; n < xrefSearchSize; ++n) {
277 if ((c = str->getChar()) == EOF) {
285 for (i = n - 9; i >= 0; --i) {
286 if (!strncmp(&buf[i], "startxref", 9)) {
293 for (p = &buf[i+9]; isspace(*p); ++p) ;
294 lastXRefPos = strToUnsigned(p);
299 // Read one xref table section. Also reads the associated trailer
300 // dictionary, and returns the prev pointer (if any).
301 GBool XRef::readXRef(Guint *pos) {
306 // start up a parser, parse one token
308 parser = new Parser(NULL,
310 str->makeSubStream(start + *pos, gFalse, 0, &obj)));
311 parser->getObj(&obj);
313 // parse an old-style xref table
314 if (obj.isCmd("xref")) {
316 more = readXRefTable(parser, pos);
318 // parse an xref stream
319 } else if (obj.isInt()) {
321 if (!parser->getObj(&obj)->isInt()) {
325 if (!parser->getObj(&obj)->isCmd("obj")) {
329 if (!parser->getObj(&obj)->isStream()) {
332 more = readXRefStream(obj.getStream(), pos);
349 GBool XRef::readXRefTable(Parser *parser, Guint *pos) {
354 int first, n, newSize, i;
357 parser->getObj(&obj);
358 if (obj.isCmd("trailer")) {
365 first = obj.getInt();
367 if (!parser->getObj(&obj)->isInt()) {
372 if (first + n > size) {
373 for (newSize = size ? 2 * size : 1024;
376 entries = (XRefEntry *)grealloc(entries, newSize * sizeof(XRefEntry));
377 for (i = size; i < newSize; ++i) {
378 entries[i].offset = 0xffffffff;
379 entries[i].type = xrefEntryFree;
383 for (i = first; i < first + n; ++i) {
384 if (!parser->getObj(&obj)->isInt()) {
387 entry.offset = (Guint)obj.getInt();
389 if (!parser->getObj(&obj)->isInt()) {
392 entry.gen = obj.getInt();
394 parser->getObj(&obj);
395 if (obj.isCmd("n")) {
396 entry.type = xrefEntryUncompressed;
397 } else if (obj.isCmd("f")) {
398 entry.type = xrefEntryFree;
403 if (entries[i].offset == 0xffffffff) {
405 // PDF files of patents from the IBM Intellectual Property
406 // Network have a bug: the xref table claims to start at 1
408 if (i == 1 && first == 1 &&
409 entries[1].offset == 0 && entries[1].gen == 65535 &&
410 entries[1].type == xrefEntryFree) {
412 entries[0] = entries[1];
413 entries[1].offset = 0xffffffff;
419 // read the trailer dictionary
420 if (!parser->getObj(&obj)->isDict()) {
424 // get the 'Prev' pointer
425 obj.getDict()->lookupNF("Prev", &obj2);
427 *pos = (Guint)obj2.getInt();
429 } else if (obj2.isRef()) {
430 // certain buggy PDF generators generate "/Prev NNN 0 R" instead
432 *pos = (Guint)obj2.getRefNum();
439 // save the first trailer dictionary
440 if (trailerDict.isNone()) {
441 obj.copy(&trailerDict);
444 // check for an 'XRefStm' key
445 if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) {
446 pos2 = obj2.getInt();
463 GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) {
467 Object obj, obj2, idx;
468 int newSize, first, n, i;
470 dict = xrefStr->getDict();
472 if (!dict->lookupNF("Size", &obj)->isInt()) {
475 newSize = obj.getInt();
477 if (newSize > size) {
478 entries = (XRefEntry *)grealloc(entries, newSize * sizeof(XRefEntry));
479 for (i = size; i < newSize; ++i) {
480 entries[i].offset = 0xffffffff;
481 entries[i].type = xrefEntryFree;
486 if (!dict->lookupNF("W", &obj)->isArray() ||
487 obj.arrayGetLength() < 3) {
490 for (i = 0; i < 3; ++i) {
491 if (!obj.arrayGet(i, &obj2)->isInt()) {
495 w[i] = obj2.getInt();
501 dict->lookupNF("Index", &idx);
503 for (i = 0; i+1 < idx.arrayGetLength(); i += 2) {
504 if (!idx.arrayGet(i, &obj)->isInt()) {
508 first = obj.getInt();
510 if (!idx.arrayGet(i+1, &obj)->isInt()) {
516 if (!readXRefStreamSection(xrefStr, w, first, n)) {
522 if (!readXRefStreamSection(xrefStr, w, 0, size)) {
529 dict->lookupNF("Prev", &obj);
531 *pos = (Guint)obj.getInt();
537 if (trailerDict.isNone()) {
538 trailerDict.initDict(dict);
550 GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
552 int type, gen, c, newSize, i, j;
554 if (first + n > size) {
555 for (newSize = size ? 2 * size : 1024;
558 entries = (XRefEntry *)grealloc(entries, newSize * sizeof(XRefEntry));
559 for (i = size; i < newSize; ++i) {
560 entries[i].offset = 0xffffffff;
561 entries[i].type = xrefEntryFree;
565 for (i = first; i < first + n; ++i) {
569 for (type = 0, j = 0; j < w[0]; ++j) {
570 if ((c = xrefStr->getChar()) == EOF) {
573 type = (type << 8) + c;
576 for (offset = 0, j = 0; j < w[1]; ++j) {
577 if ((c = xrefStr->getChar()) == EOF) {
580 offset = (offset << 8) + c;
582 for (gen = 0, j = 0; j < w[2]; ++j) {
583 if ((c = xrefStr->getChar()) == EOF) {
586 gen = (gen << 8) + c;
590 entries[i].offset = offset;
591 entries[i].gen = gen;
592 entries[i].type = xrefEntryFree;
595 entries[i].offset = offset;
596 entries[i].gen = gen;
597 entries[i].type = xrefEntryUncompressed;
600 entries[i].offset = offset;
601 entries[i].gen = gen;
602 entries[i].type = xrefEntryCompressed;
612 // Attempt to construct an xref table for a damaged file.
613 GBool XRef::constructXRef() {
615 Object newTrailerDict, obj;
629 error(0, "PDF file is damaged - attempting to reconstruct xref table...");
631 streamEndsLen = streamEndsSize = 0;
636 if (!str->getLine(buf, 256)) {
641 // got trailer dictionary
642 if (!strncmp(p, "trailer", 7)) {
644 parser = new Parser(NULL,
646 str->makeSubStream(start + pos + 7, gFalse, 0, &obj)));
647 parser->getObj(&newTrailerDict);
648 if (newTrailerDict.isDict()) {
649 newTrailerDict.dictLookupNF("Root", &obj);
651 rootNum = obj.getRefNum();
652 rootGen = obj.getRefGen();
653 if (!trailerDict.isNone()) {
656 newTrailerDict.copy(&trailerDict);
661 newTrailerDict.free();
665 } else if (isdigit(*p)) {
669 } while (*p && isdigit(*p));
673 } while (*p && isspace(*p));
678 } while (*p && isdigit(*p));
682 } while (*p && isspace(*p));
683 if (!strncmp(p, "obj", 3)) {
685 newSize = (num + 1 + 255) & ~255;
686 entries = (XRefEntry *)
687 grealloc(entries, newSize * sizeof(XRefEntry));
688 for (i = size; i < newSize; ++i) {
689 entries[i].offset = 0xffffffff;
690 entries[i].type = xrefEntryFree;
694 if (entries[num].type == xrefEntryFree ||
695 gen >= entries[num].gen) {
696 entries[num].offset = pos - start;
697 entries[num].gen = gen;
698 entries[num].type = xrefEntryUncompressed;
705 } else if (!strncmp(p, "endstream", 9)) {
706 if (streamEndsLen == streamEndsSize) {
707 streamEndsSize += 64;
708 streamEnds = (Guint *)grealloc(streamEnds,
709 streamEndsSize * sizeof(int));
711 streamEnds[streamEndsLen++] = pos;
718 error(-1, "Couldn't find trailer dictionary");
722 #ifndef NO_DECRYPTION
723 GBool XRef::checkEncrypted(GString *ownerPassword, GString *userPassword) {
724 Object encrypt, filterObj, versionObj, revisionObj, lengthObj;
725 Object ownerKey, userKey, permissions, fileID, fileID1;
730 encVersion = encRevision = 0;
733 permFlags = defPermFlags;
734 ownerPasswordOk = gFalse;
735 trailerDict.dictLookup("Encrypt", &encrypt);
736 if ((encrypted1 = encrypt.isDict())) {
738 encrypt.dictLookup("Filter", &filterObj);
739 if (filterObj.isName("Standard")) {
740 encrypt.dictLookup("V", &versionObj);
741 encrypt.dictLookup("R", &revisionObj);
742 encrypt.dictLookup("Length", &lengthObj);
743 encrypt.dictLookup("O", &ownerKey);
744 encrypt.dictLookup("U", &userKey);
745 encrypt.dictLookup("P", &permissions);
746 trailerDict.dictLookup("ID", &fileID);
747 if (versionObj.isInt() &&
748 revisionObj.isInt() &&
749 ownerKey.isString() && ownerKey.getString()->getLength() == 32 &&
750 userKey.isString() && userKey.getString()->getLength() == 32 &&
751 permissions.isInt() &&
753 encVersion = versionObj.getInt();
754 encRevision = revisionObj.getInt();
755 if (lengthObj.isInt()) {
756 keyLength = lengthObj.getInt() / 8;
760 permFlags = permissions.getInt();
761 if (encVersion >= 1 && encVersion <= 2 &&
762 encRevision >= 2 && encRevision <= 3) {
763 fileID.arrayGet(0, &fileID1);
764 if (fileID1.isString()) {
765 if (Decrypt::makeFileKey(encVersion, encRevision, keyLength,
766 ownerKey.getString(), userKey.getString(),
767 permFlags, fileID1.getString(),
768 ownerPassword, userPassword, fileKey,
770 if (ownerPassword && !ownerPasswordOk) {
771 error(-1, "Incorrect owner password");
775 error(-1, "Incorrect password");
778 error(-1, "Weird encryption info");
782 error(-1, "Unsupported version/revision (%d/%d) of Standard security handler",
783 encVersion, encRevision);
786 error(-1, "Weird encryption info");
796 error(-1, "Unknown security handler '%s'",
797 filterObj.isName() ? filterObj.getName() : "???");
803 // this flag has to be set *after* we read the O/U/P strings
804 encrypted = encrypted1;
809 GBool XRef::checkEncrypted(GString *ownerPassword, GString *userPassword) {
813 trailerDict.dictLookup("Encrypt", &obj);
814 if ((encrypted = !obj.isNull())) {
815 error(-1, "PDF file is encrypted and this version of the Xpdf tools");
816 error(-1, "was built without decryption support.");
823 GBool XRef::okToPrint(GBool ignoreOwnerPW) {
824 #ifndef NO_DECRYPTION
825 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint);
831 GBool XRef::okToChange(GBool ignoreOwnerPW) {
832 #ifndef NO_DECRYPTION
833 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange);
839 GBool XRef::okToCopy(GBool ignoreOwnerPW) {
840 #ifndef NO_DECRYPTION
841 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy);
847 GBool XRef::okToAddNotes(GBool ignoreOwnerPW) {
848 #ifndef NO_DECRYPTION
849 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes);
855 Object *XRef::fetch(int num, int gen, Object *obj) {
858 Object obj1, obj2, obj3;
860 // check for bogus ref - this can happen in corrupted PDF files
861 if (num < 0 || num >= size) {
868 case xrefEntryUncompressed:
873 parser = new Parser(this,
875 str->makeSubStream(start + e->offset, gFalse, 0, &obj1)));
876 parser->getObj(&obj1);
877 parser->getObj(&obj2);
878 parser->getObj(&obj3);
879 if (!obj1.isInt() || obj1.getInt() != num ||
880 !obj2.isInt() || obj2.getInt() != gen ||
881 !obj3.isCmd("obj")) {
884 #ifndef NO_DECRYPTION
885 parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL, keyLength,
896 case xrefEntryCompressed:
900 if (!objStr || objStr->getObjStrNum() != (int)e->offset) {
904 objStr = new ObjectStream(this, e->offset);
906 objStr->getObject(e->gen, num, obj);
916 return obj->initNull();
919 Object *XRef::getDocInfo(Object *obj) {
920 return trailerDict.dictLookup("Info", obj);
923 // Added for the pdftex project.
924 Object *XRef::getDocInfoNF(Object *obj) {
925 return trailerDict.dictLookupNF("Info", obj);
928 GBool XRef::getStreamEnd(Guint streamStart, Guint *streamEnd) {
931 if (streamEndsLen == 0 ||
932 streamStart > streamEnds[streamEndsLen - 1]) {
937 b = streamEndsLen - 1;
938 // invariant: streamEnds[a] < streamStart <= streamEnds[b]
941 if (streamStart <= streamEnds[m]) {
947 *streamEnd = streamEnds[b];
951 Guint XRef::strToUnsigned(char *s) {
957 for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
958 x = 10 * x + (*p - '0');