1 //========================================================================
3 // CharCodeToUnicode.cc
5 // Copyright 2001-2003 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
21 #include "GlobalParams.h"
22 #include "PSTokenizer.h"
23 #include "CharCodeToUnicode.h"
25 //------------------------------------------------------------------------
27 #define maxUnicodeString 8
29 struct CharCodeToUnicodeString {
31 Unicode u[maxUnicodeString];
35 //------------------------------------------------------------------------
37 static int getCharFromString(void *data) {
51 static int getCharFromFile(void *data) {
52 return fgetc((FILE *)data);
55 //------------------------------------------------------------------------
57 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName,
58 GString *collection) {
61 CharCode size, mapLenA;
64 CharCodeToUnicode *ctu;
66 if (!(f = fopen(fileName->getCString(), "r"))) {
67 error(-1, "Couldn't open cidToUnicode file '%s'",
68 fileName->getCString());
73 mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
76 while (getLine(buf, sizeof(buf), f)) {
77 if (mapLenA == size) {
79 mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
81 if (sscanf(buf, "%x", &u) == 1) {
84 error(-1, "Bad line (%d) in cidToUnicode file '%s'",
85 (int)(mapLenA + 1), fileName->getCString());
92 ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
98 CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
102 CharCodeToUnicodeString *sMapA;
103 CharCode size, oldSize, len, sMapSizeA, sMapLenA;
107 Unicode uBuf[maxUnicodeString];
108 CharCodeToUnicode *ctu;
111 if (!(f = fopen(fileName->getCString(), "r"))) {
112 error(-1, "Couldn't open unicodeToUnicode file '%s'",
113 fileName->getCString());
118 mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
119 memset(mapA, 0, size * sizeof(Unicode));
122 sMapSizeA = sMapLenA = 0;
125 while (getLine(buf, sizeof(buf), f)) {
127 if (!(tok = strtok(buf, " \t\r\n")) ||
128 sscanf(tok, "%x", &u0) != 1) {
129 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
130 line, fileName->getCString());
134 while (n < maxUnicodeString) {
135 if (!(tok = strtok(NULL, " \t\r\n"))) {
138 if (sscanf(tok, "%x", &uBuf[n]) != 1) {
139 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
140 line, fileName->getCString());
146 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
147 line, fileName->getCString());
155 mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
156 memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
162 if (sMapLenA == sMapSizeA) {
164 sMapA = (CharCodeToUnicodeString *)
165 greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString));
167 sMapA[sMapLenA].c = u0;
168 for (i = 0; i < n; ++i) {
169 sMapA[sMapLenA].u[i] = uBuf[i];
171 sMapA[sMapLenA].len = n;
180 ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
181 sMapA, sMapLenA, sMapSizeA);
186 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
187 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
190 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
191 CharCodeToUnicode *ctu;
194 ctu = new CharCodeToUnicode(NULL);
195 p = buf->getCString();
196 ctu->parseCMap1(&getCharFromString, &p, nBits);
200 void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) {
203 p = buf->getCString();
204 parseCMap1(&getCharFromString, &p, nBits);
207 void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
210 char tok1[256], tok2[256], tok3[256];
211 int nDigits, n1, n2, n3;
213 CharCode code1, code2;
218 pst = new PSTokenizer(getCharFunc, data);
219 pst->getToken(tok1, sizeof(tok1), &n1);
220 while (pst->getToken(tok2, sizeof(tok2), &n2)) {
221 if (!strcmp(tok2, "usecmap")) {
222 if (tok1[0] == '/') {
223 name = new GString(tok1 + 1);
224 if ((f = globalParams->findToUnicodeFile(name))) {
225 parseCMap1(&getCharFromFile, f, nBits);
228 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
233 pst->getToken(tok1, sizeof(tok1), &n1);
234 } else if (!strcmp(tok2, "beginbfchar")) {
235 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
236 if (!strcmp(tok1, "endbfchar")) {
239 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
240 !strcmp(tok2, "endbfchar")) {
241 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
244 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
245 tok2[0] == '<' && tok2[n2 - 1] == '>')) {
246 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
249 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
250 if (sscanf(tok1 + 1, "%x", &code1) != 1) {
251 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
254 addMapping(code1, tok2 + 1, n2 - 2, 0);
256 pst->getToken(tok1, sizeof(tok1), &n1);
257 } else if (!strcmp(tok2, "beginbfrange")) {
258 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
259 if (!strcmp(tok1, "endbfrange")) {
262 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
263 !strcmp(tok2, "endbfrange") ||
264 !pst->getToken(tok3, sizeof(tok3), &n3) ||
265 !strcmp(tok3, "endbfrange")) {
266 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
269 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
270 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>')) {
271 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
274 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
275 if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
276 sscanf(tok2 + 1, "%x", &code2) != 1) {
277 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
280 if (!strcmp(tok3, "[")) {
282 while (pst->getToken(tok1, sizeof(tok1), &n1) &&
283 code1 + i <= code2) {
284 if (!strcmp(tok1, "]")) {
287 if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
289 addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
291 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
295 } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
297 for (i = 0; code1 <= code2; ++code1, ++i) {
298 addMapping(code1, tok3 + 1, n3 - 2, i);
302 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
305 pst->getToken(tok1, sizeof(tok1), &n1);
313 void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
320 if (code >= mapLen) {
322 mapLen = (code + 256) & ~255;
323 map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
324 for (i = oldLen; i < mapLen; ++i) {
329 if (sscanf(uStr, "%x", &u) != 1) {
330 error(-1, "Illegal entry in ToUnicode CMap");
333 map[code] = u + offset;
335 if (sMapLen >= sMapSize) {
336 sMapSize = sMapSize + 16;
337 sMap = (CharCodeToUnicodeString *)
338 greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
341 sMap[sMapLen].c = code;
342 sMap[sMapLen].len = n / 4;
343 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
344 strncpy(uHex, uStr + j*4, 4);
346 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
347 error(-1, "Illegal entry in ToUnicode CMap");
350 sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset;
355 CharCodeToUnicode::CharCodeToUnicode(GString *tagA) {
360 map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
361 for (i = 0; i < mapLen; ++i) {
365 sMapLen = sMapSize = 0;
372 CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA,
373 CharCode mapLenA, GBool copyMap,
374 CharCodeToUnicodeString *sMapA,
375 int sMapLenA, int sMapSizeA) {
379 map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
380 memcpy(map, mapA, mapLen * sizeof(Unicode));
386 sMapSize = sMapSizeA;
393 CharCodeToUnicode::~CharCodeToUnicode() {
402 gDestroyMutex(&mutex);
406 void CharCodeToUnicode::incRefCnt() {
412 gUnlockMutex(&mutex);
416 void CharCodeToUnicode::decRefCnt() {
422 done = --refCnt == 0;
424 gUnlockMutex(&mutex);
431 GBool CharCodeToUnicode::match(GString *tagA) {
432 return tag && !tag->cmp(tagA);
435 void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
441 for (i = 0; i < sMapLen; ++i) {
442 if (sMap[i].c == c) {
447 if (sMapLen == sMapSize) {
449 sMap = (CharCodeToUnicodeString *)
450 greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
457 for (j = 0; j < len && j < maxUnicodeString; ++j) {
463 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
473 for (i = 0; i < sMapLen; ++i) {
474 if (sMap[i].c == c) {
475 for (j = 0; j < sMap[i].len && j < size; ++j) {
484 //------------------------------------------------------------------------
486 CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
490 cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *));
491 for (i = 0; i < size; ++i) {
496 CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
499 for (i = 0; i < size; ++i) {
501 cache[i]->decRefCnt();
507 CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) {
508 CharCodeToUnicode *ctu;
511 if (cache[0] && cache[0]->match(tag)) {
512 cache[0]->incRefCnt();
515 for (i = 1; i < size; ++i) {
516 if (cache[i] && cache[i]->match(tag)) {
518 for (j = i; j >= 1; --j) {
519 cache[j] = cache[j - 1];
529 void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
532 if (cache[size - 1]) {
533 cache[size - 1]->decRefCnt();
535 for (i = size - 1; i >= 1; --i) {
536 cache[i] = cache[i - 1];