X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=pdf2swf%2Fxpdf%2FGfxFont.cc;h=a4581c0ff35a32b68d73ede1c6a8aa77cf517cff;hb=f3bc04e70501213ea1d9d2aa22d5c2fa550dab3f;hp=8dcd8e78d01e3c20ac71fd3a40ef9c350f3c449e;hpb=50dd339d3d6262763616efe8d7ee415ab19befb9;p=swftools.git diff --git a/pdf2swf/xpdf/GfxFont.cc b/pdf2swf/xpdf/GfxFont.cc index 8dcd8e7..a4581c0 100644 --- a/pdf2swf/xpdf/GfxFont.cc +++ b/pdf2swf/xpdf/GfxFont.cc @@ -2,15 +2,16 @@ // // GfxFont.cc // -// Copyright 1996-2002 Glyph & Cog, LLC +// Copyright 1996-2003 Glyph & Cog, LLC // //======================================================================== -#ifdef __GNUC__ +#include + +#ifdef USE_GCC_PRAGMAS #pragma implementation #endif -#include #include #include #include @@ -24,7 +25,9 @@ #include "CharCodeToUnicode.h" #include "FontEncodingTables.h" #include "BuiltinFontTables.h" -#include "FontFile.h" +#include "FoFiType1.h" +#include "FoFiType1C.h" +#include "FoFiTrueType.h" #include "GfxFont.h" //------------------------------------------------------------------------ @@ -34,6 +37,11 @@ struct StdFontMapEntry { char *properName; }; +// Acrobat 4.0 and earlier substituted Base14-compatible fonts without +// providing Widths and a FontDescriptor, so we munge the names into +// the proper Base14 names. This table is from implementation note 44 +// in the PDF 1.4 spec, with some additions based on empirical +// evidence. static StdFontMapEntry stdFontMap[] = { { "Arial", "Helvetica" }, { "Arial,Bold", "Helvetica-Bold" }, @@ -47,8 +55,8 @@ static StdFontMapEntry stdFontMap[] = { { "Arial-ItalicMT", "Helvetica-Oblique" }, { "ArialMT", "Helvetica" }, { "Courier,Bold", "Courier-Bold" }, - { "Courier,Italic", "Courier-Oblique" }, { "Courier,BoldItalic", "Courier-BoldOblique" }, + { "Courier,Italic", "Courier-Oblique" }, { "CourierNew", "Courier" }, { "CourierNew,Bold", "Courier-Bold" }, { "CourierNew,BoldItalic", "Courier-BoldOblique" }, @@ -65,6 +73,9 @@ static StdFontMapEntry stdFontMap[] = { { "Helvetica,Italic", "Helvetica-Oblique" }, { "Helvetica-BoldItalic", "Helvetica-BoldOblique" }, { "Helvetica-Italic", "Helvetica-Oblique" }, + { "Symbol,Bold", "Symbol" }, + { "Symbol,BoldItalic", "Symbol" }, + { "Symbol,Italic", "Symbol" }, { "TimesNewRoman", "Times-Roman" }, { "TimesNewRoman,Bold", "Times-Bold" }, { "TimesNewRoman,BoldItalic", "Times-BoldItalic" }, @@ -79,7 +90,10 @@ static StdFontMapEntry stdFontMap[] = { { "TimesNewRomanPS-BoldMT", "Times-Bold" }, { "TimesNewRomanPS-Italic", "Times-Italic" }, { "TimesNewRomanPS-ItalicMT", "Times-Italic" }, - { "TimesNewRomanPSMT", "Times-Roman" } + { "TimesNewRomanPSMT", "Times-Roman" }, + { "TimesNewRomanPSMT,Bold", "Times-Bold" }, + { "TimesNewRomanPSMT,BoldItalic", "Times-BoldItalic" }, + { "TimesNewRomanPSMT,Italic", "Times-Italic" } }; //------------------------------------------------------------------------ @@ -127,12 +141,16 @@ GfxFont::GfxFont(char *tagA, Ref idA, GString *nameA) { tag = new GString(tagA); id = idA; name = nameA; + origName = nameA; embFontName = NULL; extFontFile = NULL; } GfxFont::~GfxFont() { delete tag; + if (origName && origName != name) { + delete origName; + } if (name) { delete name; } @@ -173,19 +191,19 @@ void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) { // look for embedded font file if (obj1.dictLookupNF("FontFile", &obj2)->isRef()) { - if (type == fontType1) { - embFontID = obj2.getRef(); - } else { + embFontID = obj2.getRef(); + if (type != fontType1) { error(-1, "Mismatch between font type and embedded font file"); + type = fontType1; } } obj2.free(); if (embFontID.num == -1 && obj1.dictLookupNF("FontFile2", &obj2)->isRef()) { - if (type == fontTrueType || type == fontCIDType2) { - embFontID = obj2.getRef(); - } else { + embFontID = obj2.getRef(); + if (type != fontTrueType && type != fontCIDType2) { error(-1, "Mismatch between font type and embedded font file"); + type = type == fontCIDType0 ? fontCIDType2 : fontTrueType; } } obj2.free(); @@ -194,33 +212,29 @@ void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) { if (obj2.fetch(xref, &obj3)->isStream()) { obj3.streamGetDict()->lookup("Subtype", &obj4); if (obj4.isName("Type1")) { - if (type == fontType1) { - embFontID = obj2.getRef(); - } else { + embFontID = obj2.getRef(); + if (type != fontType1) { error(-1, "Mismatch between font type and embedded font file"); + type = fontType1; } } else if (obj4.isName("Type1C")) { - if (type == fontType1) { - type = fontType1C; - embFontID = obj2.getRef(); - } else if (type == fontType1C) { - embFontID = obj2.getRef(); - } else { + embFontID = obj2.getRef(); + if (type != fontType1 && type != fontType1C) { error(-1, "Mismatch between font type and embedded font file"); } + type = fontType1C; } else if (obj4.isName("TrueType")) { - if (type == fontTrueType) { - embFontID = obj2.getRef(); - } else { + embFontID = obj2.getRef(); + if (type != fontTrueType) { error(-1, "Mismatch between font type and embedded font file"); + type = fontTrueType; } } else if (obj4.isName("CIDFontType0C")) { - if (type == fontCIDType0) { - type = fontCIDType0C; - embFontID = obj2.getRef(); - } else { + embFontID = obj2.getRef(); + if (type != fontCIDType0) { error(-1, "Mismatch between font type and embedded font file"); } + type = fontCIDType0C; } else { error(-1, "Unknown embedded font type '%s'", obj4.isName() ? obj4.getName() : "???"); @@ -255,6 +269,10 @@ void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) { if (t != 0) { descent = t; } + // some broken font descriptors specify a positive descent + if (descent > 0) { + descent = -descent; + } } obj2.free(); @@ -273,8 +291,8 @@ void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) { obj1.free(); } -CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits) { - CharCodeToUnicode *ctu; +CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits, + CharCodeToUnicode *ctu) { GString *buf; Object obj1; int c; @@ -290,17 +308,24 @@ CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits) { } obj1.streamClose(); obj1.free(); - ctu = CharCodeToUnicode::parseCMap(buf, nBits); + if (ctu) { + ctu->mergeCMap(buf, nBits); + } else { + ctu = CharCodeToUnicode::parseCMap(buf, nBits); + } delete buf; return ctu; } void GfxFont::findExtFontFile() { + static char *type1Exts[] = { ".pfa", ".pfb", ".ps", "", NULL }; + static char *ttExts[] = { ".ttf", NULL }; + if (name) { if (type == fontType1) { - extFontFile = globalParams->findFontFile(name, ".pfa", ".pfb"); + extFontFile = globalParams->findFontFile(name, type1Exts); } else if (type == fontTrueType) { - extFontFile = globalParams->findFontFile(name, ".ttf", NULL); + extFontFile = globalParams->findFontFile(name, ttExts); } } } @@ -318,7 +343,8 @@ char *GfxFont::readExtFontFile(int *len) { fseek(f, 0, SEEK_SET); buf = (char *)gmalloc(*len); if ((int)fread(buf, 1, *len, f) != *len) { - error(-1, "Error reading external font file '%s'", extFontFile); + error(-1, "Error reading external font file '%s'", + extFontFile->getCString()); } fclose(f); return buf; @@ -369,16 +395,20 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, GfxFontType typeA, Dict *fontDict): GfxFont(tagA, idA, nameA) { + GString *name2; BuiltinFont *builtinFont; char **baseEnc; GBool baseEncFromFontFile; char *buf; int len; - FontFile *fontFile; + FoFiType1 *ffT1; + FoFiType1C *ffT1C; int code, code2; char *charName; GBool missing, hex; Unicode toUnicode[256]; + CharCodeToUnicode *utu, *ctu2; + Unicode uBuf[8]; double mul; int firstChar, lastChar; Gushort w; @@ -388,26 +418,33 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, type = typeA; ctu = NULL; - // Acrobat 4.0 and earlier substituted Base14-compatible fonts - // without providing Widths and a FontDescriptor, so we munge the - // names into the proper Base14 names. (This table is from - // implementation note 44 in the PDF 1.4 spec.) + // do font name substitution for various aliases of the Base 14 font + // names if (name) { + name2 = name->copy(); + i = 0; + while (i < name2->getLength()) { + if (name2->getChar(i) == ' ') { + name2->del(i); + } else { + ++i; + } + } a = 0; b = sizeof(stdFontMap) / sizeof(StdFontMapEntry); - // invariant: stdFontMap[a].altName <= name < stdFontMap[b].altName + // invariant: stdFontMap[a].altName <= name2 < stdFontMap[b].altName while (b - a > 1) { m = (a + b) / 2; - if (name->cmp(stdFontMap[m].altName) >= 0) { + if (name2->cmp(stdFontMap[m].altName) >= 0) { a = m; } else { b = m; } } - if (!name->cmp(stdFontMap[a].altName)) { - delete name; + if (!name2->cmp(stdFontMap[a].altName)) { name = new GString(stdFontMap[a].properName); } + delete name2; } // is it a built-in font? @@ -438,6 +475,17 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, // get info from font descriptor readFontDescriptor(xref, fontDict); + // for non-embedded fonts, don't trust the ascent/descent/bbox + // values from the font descriptor + if (builtinFont && embFontID.num < 0) { + ascent = 0.001 * builtinFont->ascent; + descent = 0.001 * builtinFont->descent; + fontBBox[0] = 0.001 * builtinFont->bbox[0]; + fontBBox[1] = 0.001 * builtinFont->bbox[1]; + fontBBox[2] = 0.001 * builtinFont->bbox[2]; + fontBBox[3] = 0.001 * builtinFont->bbox[3]; + } + // look for an external font file findExtFontFile(); @@ -483,13 +531,14 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, // 2. embedded or external font file // 3. default: // - builtin --> builtin encoding - // - TrueType --> MacRomanEncoding + // - TrueType --> WinAnsiEncoding // - others --> StandardEncoding // and then add a list of differences (if any) from // FontDict.Encoding.Differences. // check FontDict for base encoding hasEncoding = gFalse; + usesMacRomanEnc = gFalse; baseEnc = NULL; baseEncFromFontFile = gFalse; fontDict->lookup("Encoding", &obj1); @@ -497,6 +546,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, obj1.dictLookup("BaseEncoding", &obj2); if (obj2.isName("MacRomanEncoding")) { hasEncoding = gTrue; + usesMacRomanEnc = gTrue; baseEnc = macRomanEncoding; } else if (obj2.isName("MacExpertEncoding")) { hasEncoding = gTrue; @@ -504,13 +554,11 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, } else if (obj2.isName("WinAnsiEncoding")) { hasEncoding = gTrue; baseEnc = winAnsiEncoding; - } else if (obj2.isName("StandardEncoding")) { - hasEncoding = gTrue; - baseEnc = standardEncoding; } obj2.free(); } else if (obj1.isName("MacRomanEncoding")) { hasEncoding = gTrue; + usesMacRomanEnc = gTrue; baseEnc = macRomanEncoding; } else if (obj1.isName("MacExpertEncoding")) { hasEncoding = gTrue; @@ -518,54 +566,64 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, } else if (obj1.isName("WinAnsiEncoding")) { hasEncoding = gTrue; baseEnc = winAnsiEncoding; - } else if (obj1.isName("StandardEncoding")) { - hasEncoding = gTrue; - baseEnc = standardEncoding; } // check embedded or external font file for base encoding // (only for Type 1 fonts - trying to get an encoding out of a // TrueType font is a losing proposition) - fontFile = NULL; + ffT1 = NULL; + ffT1C = NULL; buf = NULL; - if ((type == fontType1 || type == fontType1C) && - (extFontFile || embFontID.num >= 0)) { + if (type == fontType1 && (extFontFile || embFontID.num >= 0)) { if (extFontFile) { - buf = readExtFontFile(&len); + ffT1 = FoFiType1::load(extFontFile->getCString()); } else { buf = readEmbFontFile(xref, &len); + ffT1 = FoFiType1::make(buf, len); } - if (buf) { - if (type == fontType1C && !strncmp(buf, "%!", 2)) { - // various tools (including Adobe's) occasionally embed Type 1 - // fonts but label them Type 1C - type = fontType1; + if (ffT1) { + if (ffT1->getName()) { + if (embFontName) { + delete embFontName; + } + embFontName = new GString(ffT1->getName()); } - if (type == fontType1) { - fontFile = new Type1FontFile(buf, len); - } else { - fontFile = new Type1CFontFile(buf, len); + if (!baseEnc) { + baseEnc = ffT1->getEncoding(); + baseEncFromFontFile = gTrue; } - if (fontFile->getName()) { + } + } else if (type == fontType1C && (extFontFile || embFontID.num >= 0)) { + if (extFontFile) { + ffT1C = FoFiType1C::load(extFontFile->getCString()); + } else { + buf = readEmbFontFile(xref, &len); + ffT1C = FoFiType1C::make(buf, len); + } + if (ffT1C) { + if (ffT1C->getName()) { if (embFontName) { delete embFontName; } - embFontName = new GString(fontFile->getName()); + embFontName = new GString(ffT1C->getName()); } if (!baseEnc) { - baseEnc = fontFile->getEncoding(); + baseEnc = ffT1C->getEncoding(); baseEncFromFontFile = gTrue; } - gfree(buf); } } + if (buf) { + gfree(buf); + } // get default base encoding if (!baseEnc) { - if (builtinFont) { + if (builtinFont && embFontID.num < 0) { baseEnc = builtinFont->defaultBaseEnc; + hasEncoding = gTrue; } else if (type == fontTrueType) { - baseEnc = macRomanEncoding; + baseEnc = winAnsiEncoding; } else { baseEnc = standardEncoding; } @@ -579,6 +637,20 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, } } + // some Type 1C font files have empty encodings, which can break the + // T1C->T1 conversion (since the 'seac' operator depends on having + // the accents in the encoding), so we fill in any gaps from + // StandardEncoding + if (type == fontType1C && (extFontFile || embFontID.num >= 0) && + baseEncFromFontFile) { + for (i = 0; i < 256; ++i) { + if (!enc[i] && standardEncoding[i]) { + enc[i] = standardEncoding[i]; + encFree[i] = gFalse; + } + } + } + // merge differences into encoding if (obj1.isDict()) { obj1.dictLookup("Differences", &obj2); @@ -590,7 +662,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, if (obj3.isInt()) { code = obj3.getInt(); } else if (obj3.isName()) { - if (code < 256) { + if (code >= 0 && code < 256) { if (encFree[code]) { gfree(enc[code]); } @@ -608,82 +680,106 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, obj2.free(); } obj1.free(); - if (fontFile) { - delete fontFile; + if (ffT1) { + delete ffT1; + } + if (ffT1C) { + delete ffT1C; } //----- build the mapping to Unicode ----- - // look for a ToUnicode CMap - if (!(ctu = readToUnicodeCMap(fontDict, 8))) { - - // no ToUnicode CMap, so use the char names + // pass 1: use the name-to-Unicode mapping table + missing = hex = gFalse; + for (code = 0; code < 256; ++code) { + if ((charName = enc[code])) { + if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) && + strcmp(charName, ".notdef")) { + // if it wasn't in the name-to-Unicode table, check for a + // name that looks like 'Axx' or 'xx', where 'A' is any letter + // and 'xx' is two hex digits + if ((strlen(charName) == 3 && + isalpha(charName[0]) && + isxdigit(charName[1]) && isxdigit(charName[2]) && + ((charName[1] >= 'a' && charName[1] <= 'f') || + (charName[1] >= 'A' && charName[1] <= 'F') || + (charName[2] >= 'a' && charName[2] <= 'f') || + (charName[2] >= 'A' && charName[2] <= 'F'))) || + (strlen(charName) == 2 && + isxdigit(charName[0]) && isxdigit(charName[1]) && + ((charName[0] >= 'a' && charName[0] <= 'f') || + (charName[0] >= 'A' && charName[0] <= 'F') || + (charName[1] >= 'a' && charName[1] <= 'f') || + (charName[1] >= 'A' && charName[1] <= 'F')))) { + hex = gTrue; + } + missing = gTrue; + } + } else { + toUnicode[code] = 0; + } + } - // pass 1: use the name-to-Unicode mapping table - missing = hex = gFalse; + // pass 2: try to fill in the missing chars, looking for names of + // the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B' + // are any letters, 'xx' is two hex digits, and 'nn' is 2-4 + // decimal digits + if (missing && globalParams->getMapNumericCharNames()) { for (code = 0; code < 256; ++code) { - if ((charName = enc[code])) { - if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) && - strcmp(charName, ".notdef")) { - // if it wasn't in the name-to-Unicode table, check for a - // name that looks like 'Axx' or 'xx', where 'A' is any letter - // and 'xx' is two hex digits - if ((strlen(charName) == 3 && - isalpha(charName[0]) && - isxdigit(charName[1]) && isxdigit(charName[2]) && - ((charName[1] >= 'a' && charName[1] <= 'f') || - (charName[1] >= 'A' && charName[1] <= 'F') || - (charName[2] >= 'a' && charName[2] <= 'f') || - (charName[2] >= 'A' && charName[2] <= 'F'))) || - (strlen(charName) == 2 && - isxdigit(charName[0]) && isxdigit(charName[1]) && - ((charName[0] >= 'a' && charName[0] <= 'f') || - (charName[0] >= 'A' && charName[0] <= 'F') || - (charName[1] >= 'a' && charName[1] <= 'f') || - (charName[1] >= 'A' && charName[1] <= 'F')))) { - hex = gTrue; - } - missing = gTrue; + if ((charName = enc[code]) && !toUnicode[code] && + strcmp(charName, ".notdef")) { + n = strlen(charName); + code2 = -1; + if (hex && n == 3 && isalpha(charName[0]) && + isxdigit(charName[1]) && isxdigit(charName[2])) { + sscanf(charName+1, "%x", &code2); + } else if (hex && n == 2 && + isxdigit(charName[0]) && isxdigit(charName[1])) { + sscanf(charName, "%x", &code2); + } else if (!hex && n >= 2 && n <= 4 && + isdigit(charName[0]) && isdigit(charName[1])) { + code2 = atoi(charName); + } else if (n >= 3 && n <= 5 && + isdigit(charName[1]) && isdigit(charName[2])) { + code2 = atoi(charName+1); + } else if (n >= 4 && n <= 6 && + isdigit(charName[2]) && isdigit(charName[3])) { + code2 = atoi(charName+2); + } + if (code2 >= 0 && code2 <= 0xff) { + toUnicode[code] = (Unicode)code2; } - } else { - toUnicode[code] = 0; } } + } - // pass 2: try to fill in the missing chars, looking for names of - // the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B' - // are any letters, 'xx' is two hex digits, and 'nn' is 2-4 - // decimal digits - if (missing && globalParams->getMapNumericCharNames()) { - for (code = 0; code < 256; ++code) { - if ((charName = enc[code]) && !toUnicode[code] && - strcmp(charName, ".notdef")) { - n = strlen(charName); - code2 = -1; - if (hex && n == 3 && isalpha(charName[0]) && - isxdigit(charName[1]) && isxdigit(charName[2])) { - sscanf(charName+1, "%x", &code2); - } else if (hex && n == 2 && - isxdigit(charName[0]) && isxdigit(charName[1])) { - sscanf(charName, "%x", &code2); - } else if (!hex && n >= 2 && n <= 4 && - isdigit(charName[0]) && isdigit(charName[1])) { - code2 = atoi(charName); - } else if (n >= 3 && n <= 5 && - isdigit(charName[1]) && isdigit(charName[2])) { - code2 = atoi(charName+1); - } else if (n >= 4 && n <= 6 && - isdigit(charName[2]) && isdigit(charName[3])) { - code2 = atoi(charName+2); - } - if (code2 >= 0 && code2 <= 0xff) { - toUnicode[code] = (Unicode)code2; - } + // construct the char code -> Unicode mapping object + ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode); + + // merge in a ToUnicode CMap, if there is one -- this overwrites + // existing entries in ctu, i.e., the ToUnicode CMap takes + // precedence, but the other encoding info is allowed to fill in any + // holes + readToUnicodeCMap(fontDict, 8, ctu); + + // look for a Unicode-to-Unicode mapping + if (name && (utu = globalParams->getUnicodeToUnicode(name))) { + for (i = 0; i < 256; ++i) { + toUnicode[i] = 0; + } + ctu2 = CharCodeToUnicode::make8BitToUnicode(toUnicode); + for (i = 0; i < 256; ++i) { + n = ctu->mapToUnicode((CharCode)i, uBuf, 8); + if (n >= 1) { + n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8); + if (n >= 1) { + ctu2->setMapping((CharCode)i, uBuf, n); } } } - - ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode); + utu->decRefCnt(); + delete ctu; + ctu = ctu2; } //----- get the character widths ----- @@ -697,13 +793,22 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, fontDict->lookup("FirstChar", &obj1); firstChar = obj1.isInt() ? obj1.getInt() : 0; obj1.free(); + if (firstChar < 0 || firstChar > 255) { + firstChar = 0; + } fontDict->lookup("LastChar", &obj1); lastChar = obj1.isInt() ? obj1.getInt() : 255; obj1.free(); + if (lastChar < 0 || lastChar > 255) { + lastChar = 255; + } mul = (type == fontType3) ? fontMat[0] : 0.001; fontDict->lookup("Widths", &obj1); if (obj1.isArray()) { flags |= fontFixedWidth; + if (obj1.arrayGetLength() < lastChar - firstChar + 1) { + lastChar = firstChar + obj1.arrayGetLength() - 1; + } for (code = firstChar; code <= lastChar; ++code) { obj1.arrayGet(code - firstChar, &obj2); if (obj2.isNum()) { @@ -797,12 +902,131 @@ CharCodeToUnicode *Gfx8BitFont::getToUnicode() { return ctu; } +Gushort *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) { + Gushort *map; + int cmapPlatform, cmapEncoding; + int unicodeCmap, macRomanCmap, msSymbolCmap, cmap; + GBool useMacRoman, useUnicode; + char *charName; + Unicode u; + int code, i, n; + + map = (Gushort *)gmallocn(256, sizeof(Gushort)); + for (i = 0; i < 256; ++i) { + map[i] = 0; + } + + // To match up with the Adobe-defined behaviour, we choose a cmap + // like this: + // 1. If the PDF font has an encoding: + // 1a. If the PDF font specified MacRomanEncoding and the + // TrueType font has a Macintosh Roman cmap, use it, and + // reverse map the char names through MacRomanEncoding to + // get char codes. + // 1b. If the TrueType font has a Microsoft Unicode cmap or a + // non-Microsoft Unicode cmap, use it, and use the Unicode + // indexes, not the char codes. + // 1c. If the PDF font is symbolic and the TrueType font has a + // Microsoft Symbol cmap, use it, and use char codes + // directly (possibly with an offset of 0xf000). + // 1d. If the TrueType font has a Macintosh Roman cmap, use it, + // as in case 1a. + // 2. If the PDF font does not have an encoding or the PDF font is + // symbolic: + // 2a. If the TrueType font has a Macintosh Roman cmap, use it, + // and use char codes directly (possibly with an offset of + // 0xf000). + // 2b. If the TrueType font has a Microsoft Symbol cmap, use it, + // and use char codes directly (possible with an offset of + // 0xf000). + // 3. If none of these rules apply, use the first cmap and hope for + // the best (this shouldn't happen). + unicodeCmap = macRomanCmap = msSymbolCmap = -1; + for (i = 0; i < ff->getNumCmaps(); ++i) { + cmapPlatform = ff->getCmapPlatform(i); + cmapEncoding = ff->getCmapEncoding(i); + if ((cmapPlatform == 3 && cmapEncoding == 1) || + cmapPlatform == 0) { + unicodeCmap = i; + } else if (cmapPlatform == 1 && cmapEncoding == 0) { + macRomanCmap = i; + } else if (cmapPlatform == 3 && cmapEncoding == 0) { + msSymbolCmap = i; + } + } + cmap = 0; + useMacRoman = gFalse; + useUnicode = gFalse; + if (hasEncoding) { + if (usesMacRomanEnc && macRomanCmap >= 0) { + cmap = macRomanCmap; + useMacRoman = gTrue; + } else if (unicodeCmap >= 0) { + cmap = unicodeCmap; + useUnicode = gTrue; + } else if ((flags & fontSymbolic) && msSymbolCmap >= 0) { + cmap = msSymbolCmap; + } else if ((flags & fontSymbolic) && macRomanCmap >= 0) { + cmap = macRomanCmap; + } else if (macRomanCmap >= 0) { + cmap = macRomanCmap; + useMacRoman = gTrue; + } + } else { + if (macRomanCmap >= 0) { + cmap = macRomanCmap; + } else if (msSymbolCmap >= 0) { + cmap = msSymbolCmap; + } + } + + // reverse map the char names through MacRomanEncoding, then map the + // char codes through the cmap + if (useMacRoman) { + for (i = 0; i < 256; ++i) { + if ((charName = enc[i])) { + if ((code = globalParams->getMacRomanCharCode(charName))) { + map[i] = ff->mapCodeToGID(cmap, code); + } + } + } + + // map Unicode through the cmap + } else if (useUnicode) { + for (i = 0; i < 256; ++i) { + if (((charName = enc[i]) && + (u = globalParams->mapNameToUnicode(charName))) || + (n = ctu->mapToUnicode((CharCode)i, &u, 1))) { + map[i] = ff->mapCodeToGID(cmap, u); + } + } + + // map the char codes through the cmap, possibly with an offset of + // 0xf000 + } else { + for (i = 0; i < 256; ++i) { + if (!(map[i] = ff->mapCodeToGID(cmap, i))) { + map[i] = ff->mapCodeToGID(cmap, 0xf000 + i); + } + } + } + + // try the TrueType 'post' table to handle any unmapped characters + for (i = 0; i < 256; ++i) { + if (!map[i] && (charName = enc[i])) { + map[i] = (Gushort)(int)ff->mapNameToGID(charName); + } + } + + return map; +} + Dict *Gfx8BitFont::getCharProcs() { return charProcs.isDict() ? charProcs.getDict() : (Dict *)NULL; } Object *Gfx8BitFont::getCharProc(int code, Object *proc) { - if (charProcs.isDict()) { + if (enc[code] && charProcs.isDict()) { charProcs.dictLookup(enc[code], proc); } else { proc->initNull(); @@ -818,12 +1042,12 @@ Dict *Gfx8BitFont::getResources() { // GfxCIDFont //------------------------------------------------------------------------ -static int cmpWidthExcep(const void *w1, const void *w2) { +static int CDECL cmpWidthExcep(const void *w1, const void *w2) { return ((GfxFontCIDWidthExcep *)w1)->first - ((GfxFontCIDWidthExcep *)w2)->first; } -static int cmpWidthExcepV(const void *w1, const void *w2) { +static int CDECL cmpWidthExcepV(const void *w1, const void *w2) { return ((GfxFontCIDWidthExcepV *)w1)->first - ((GfxFontCIDWidthExcepV *)w2)->first; } @@ -836,8 +1060,11 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, GString *collection, *cMapName; Object desFontDictObj; Object obj1, obj2, obj3, obj4, obj5, obj6; + CharCodeToUnicode *utu; + CharCode c; + Unicode uBuf[8]; int c1, c2; - int excepsSize, i, j, k; + int excepsSize, i, j, k, n; ascent = 0.95; descent = -0.35; @@ -908,7 +1135,7 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, obj1.free(); // look for a ToUnicode CMap - if (!(ctu = readToUnicodeCMap(fontDict, 16))) { + if (!(ctu = readToUnicodeCMap(fontDict, 16, NULL))) { // the "Adobe-Identity" and "Adobe-UCS" collections don't have // cidToUnicode files @@ -919,12 +1146,30 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, if (!(ctu = globalParams->getCIDToUnicode(collection))) { error(-1, "Unknown character collection '%s'", collection->getCString()); - delete collection; - goto err2; + // fall-through, assuming the Identity mapping -- this appears + // to match Adobe's behavior } } } + // look for a Unicode-to-Unicode mapping + if (name && (utu = globalParams->getUnicodeToUnicode(name))) { + if (ctu) { + for (c = 0; c < ctu->getLength(); ++c) { + n = ctu->mapToUnicode(c, uBuf, 8); + if (n >= 1) { + n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8); + if (n >= 1) { + ctu->setMapping(c, uBuf, n); + } + } + } + utu->decRefCnt(); + } else { + ctu = utu; + } + } + // encoding (i.e., CMap) //~ need to handle a CMap stream here //~ also need to deal with the UseCMap entry in the stream dict @@ -947,17 +1192,17 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, // CIDToGIDMap (for embedded TrueType fonts) if (type == fontCIDType2) { - fontDict->lookup("CIDToGIDMap", &obj1); + desFontDict->lookup("CIDToGIDMap", &obj1); if (obj1.isStream()) { cidToGIDLen = 0; i = 64; - cidToGID = (Gushort *)gmalloc(i * sizeof(Gushort)); + cidToGID = (Gushort *)gmallocn(i, sizeof(Gushort)); obj1.streamReset(); while ((c1 = obj1.streamGetChar()) != EOF && (c2 = obj1.streamGetChar()) != EOF) { if (cidToGIDLen == i) { i *= 2; - cidToGID = (Gushort *)grealloc(cidToGID, i * sizeof(Gushort)); + cidToGID = (Gushort *)greallocn(cidToGID, i, sizeof(Gushort)); } cidToGID[cidToGIDLen++] = (Gushort)((c1 << 8) + c2); } @@ -987,8 +1232,8 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, if (widths.nExceps == excepsSize) { excepsSize += 16; widths.exceps = (GfxFontCIDWidthExcep *) - grealloc(widths.exceps, - excepsSize * sizeof(GfxFontCIDWidthExcep)); + greallocn(widths.exceps, + excepsSize, sizeof(GfxFontCIDWidthExcep)); } widths.exceps[widths.nExceps].first = obj2.getInt(); widths.exceps[widths.nExceps].last = obj3.getInt(); @@ -1003,8 +1248,8 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, if (widths.nExceps + obj3.arrayGetLength() > excepsSize) { excepsSize = (widths.nExceps + obj3.arrayGetLength() + 15) & ~15; widths.exceps = (GfxFontCIDWidthExcep *) - grealloc(widths.exceps, - excepsSize * sizeof(GfxFontCIDWidthExcep)); + greallocn(widths.exceps, + excepsSize, sizeof(GfxFontCIDWidthExcep)); } j = obj2.getInt(); for (k = 0; k < obj3.arrayGetLength(); ++k) { @@ -1036,11 +1281,11 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, if (desFontDict->lookup("DW2", &obj1)->isArray() && obj1.arrayGetLength() == 2) { if (obj1.arrayGet(0, &obj2)->isNum()) { - widths.defVY = obj1.getNum() * 0.001; + widths.defVY = obj2.getNum() * 0.001; } obj2.free(); if (obj1.arrayGet(1, &obj2)->isNum()) { - widths.defHeight = obj1.getNum() * 0.001; + widths.defHeight = obj2.getNum() * 0.001; } obj2.free(); } @@ -1051,8 +1296,8 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, excepsSize = 0; i = 0; while (i + 1 < obj1.arrayGetLength()) { - obj1.arrayGet(0, &obj2); - obj2.arrayGet(0, &obj3); + obj1.arrayGet(i, &obj2); + obj1.arrayGet(i+ 1, &obj3); if (obj2.isInt() && obj3.isInt() && i + 4 < obj1.arrayGetLength()) { if (obj1.arrayGet(i + 2, &obj4)->isNum() && obj1.arrayGet(i + 3, &obj5)->isNum() && @@ -1060,8 +1305,8 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, if (widths.nExcepsV == excepsSize) { excepsSize += 16; widths.excepsV = (GfxFontCIDWidthExcepV *) - grealloc(widths.excepsV, - excepsSize * sizeof(GfxFontCIDWidthExcepV)); + greallocn(widths.excepsV, + excepsSize, sizeof(GfxFontCIDWidthExcepV)); } widths.excepsV[widths.nExcepsV].first = obj2.getInt(); widths.excepsV[widths.nExcepsV].last = obj3.getInt(); @@ -1081,14 +1326,14 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, excepsSize = (widths.nExcepsV + obj3.arrayGetLength() / 3 + 15) & ~15; widths.excepsV = (GfxFontCIDWidthExcepV *) - grealloc(widths.excepsV, - excepsSize * sizeof(GfxFontCIDWidthExcepV)); + greallocn(widths.excepsV, + excepsSize, sizeof(GfxFontCIDWidthExcepV)); } j = obj2.getInt(); - for (k = 0; k < obj3.arrayGetLength(); ++k) { + for (k = 0; k < obj3.arrayGetLength(); k += 3) { if (obj3.arrayGet(k, &obj4)->isNum() && - obj3.arrayGet(k, &obj5)->isNum() && - obj3.arrayGet(k, &obj6)->isNum()) { + obj3.arrayGet(k+1, &obj5)->isNum() && + obj3.arrayGet(k+2, &obj6)->isNum()) { widths.excepsV[widths.nExceps].first = j; widths.excepsV[widths.nExceps].last = j; widths.excepsV[widths.nExceps].height = obj4.getNum() * 0.001; @@ -1225,7 +1470,9 @@ int GfxCIDFont::getWMode() { } CharCodeToUnicode *GfxCIDFont::getToUnicode() { - ctu->incRefCnt(); + if (ctu) { + ctu->incRefCnt(); + } return ctu; } @@ -1237,24 +1484,38 @@ GString *GfxCIDFont::getCollection() { // GfxFontDict //------------------------------------------------------------------------ -GfxFontDict::GfxFontDict(XRef *xref, Dict *fontDict) { +GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict) { int i; Object obj1, obj2; + Ref r; numFonts = fontDict->getLength(); - fonts = (GfxFont **)gmalloc(numFonts * sizeof(GfxFont *)); + fonts = (GfxFont **)gmallocn(numFonts, sizeof(GfxFont *)); for (i = 0; i < numFonts; ++i) { fontDict->getValNF(i, &obj1); obj1.fetch(xref, &obj2); - if (obj1.isRef() && obj2.isDict()) { + if (obj2.isDict()) { + if (obj1.isRef()) { + r = obj1.getRef(); + } else { + // no indirect reference for this font, so invent a unique one + // (legal generation numbers are five digits, so any 6-digit + // number would be safe) + r.num = i; + if (fontDictRef) { + r.gen = 100000 + fontDictRef->num; + } else { + r.gen = 999999; + } + } fonts[i] = GfxFont::makeFont(xref, fontDict->getKey(i), - obj1.getRef(), obj2.getDict()); + r, obj2.getDict()); if (fonts[i] && !fonts[i]->isOk()) { delete fonts[i]; fonts[i] = NULL; } } else { - error(-1, "font resource is not a dictionary reference"); + error(-1, "font resource is not a dictionary"); fonts[i] = NULL; } obj1.free();