1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
22 //------------------------------------------------------------------------
24 // A '1' in this array means the character is white space. A '1' or
25 // '2' means the character ends a name or command.
26 static char specialChars[256] = {
27 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
29 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
45 //------------------------------------------------------------------------
47 //------------------------------------------------------------------------
49 Lexer::Lexer(XRef *xref, Stream *str) {
52 curStr.initStream(str);
53 streams = new Array(xref);
54 streams->add(curStr.copy(&obj));
60 Lexer::Lexer(XRef *xref, Object *obj) {
63 if (obj->isStream()) {
64 streams = new Array(xref);
66 streams->add(obj->copy(&obj2));
68 streams = obj->getArray();
72 if (streams->getLength() > 0) {
73 streams->get(strPtr, &curStr);
77 static int illegalChars = 0;
80 if (!curStr.isNone()) {
88 error(0, "Illegal characters in hex string (%d)", illegalChars);
92 int Lexer::getChar() {
96 while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
100 if (strPtr < streams->getLength()) {
101 streams->get(strPtr, &curStr);
102 curStr.streamReset();
108 int Lexer::lookChar() {
109 if (curStr.isNone()) {
112 return curStr.streamLookChar();
115 Object *Lexer::getObj(Object *obj) {
118 GBool comment, neg, done;
125 // skip whitespace and comments
128 if ((c = getChar()) == EOF) {
129 return obj->initEOF();
132 if (c == '\r' || c == '\n')
134 } else if (c == '%') {
136 } else if (specialChars[c] != 1) {
141 // start reading token
145 case '0': case '1': case '2': case '3': case '4':
146 case '5': case '6': case '7': case '8': case '9':
152 } else if (c == '.') {
161 xi = xi * 10 + (c - '0');
162 } else if (c == '.') {
179 // ignore minus signs in the middle of numbers to match
181 error(getPos(), "Badly formatted number");
189 xf = xf + scale * (c - '0');
206 switch (c = getChar()) {
210 // This breaks some PDF files, e.g., ones from Photoshop.
214 error(getPos(), "Unterminated string");
224 if (--numParen == 0) {
232 switch (c = getChar()) {
253 case '0': case '1': case '2': case '3':
254 case '4': case '5': case '6': case '7':
257 if (c >= '0' && c <= '7') {
259 c2 = (c2 << 3) + (c - '0');
261 if (c >= '0' && c <= '7') {
263 c2 = (c2 << 3) + (c - '0');
276 error(getPos(), "Unterminated string");
291 if (n == tokBufSize) {
293 s = new GString(tokBuf, tokBufSize);
295 s->append(tokBuf, tokBufSize);
304 s = new GString(tokBuf, n);
306 s->append(tokBuf, n);
314 while ((c = lookChar()) != EOF && !specialChars[c]) {
318 if (c2 >= '0' && c2 <= '9') {
320 } else if (c2 >= 'A' && c2 <= 'F') {
322 } else if (c2 >= 'a' && c2 <= 'f') {
330 if (c2 >= '0' && c2 <= '9') {
332 } else if (c2 >= 'A' && c2 <= 'F') {
334 } else if (c2 >= 'a' && c2 <= 'f') {
338 //error(getPos(), "Illegal digit in hex char in name");
342 if (++n == tokBufSize) {
343 error(getPos(), "Name token too long");
349 obj->initName(tokBuf);
357 obj->initCmd(tokBuf);
360 // hex string or dict punctuation
367 tokBuf[0] = tokBuf[1] = '<';
369 obj->initCmd(tokBuf);
381 } else if (c == EOF) {
382 error(getPos(), "Unterminated hex string");
384 } else if (specialChars[c] != 1) {
386 if (c >= '0' && c <= '9')
388 else if (c >= 'A' && c <= 'F')
390 else if (c >= 'a' && c <= 'f')
394 //error(getPos(), "Illegal character <%02x> in hex string", c);
397 if (n == tokBufSize) {
399 s = new GString(tokBuf, tokBufSize);
401 s->append(tokBuf, tokBufSize);
413 s = new GString(tokBuf, n);
415 s->append(tokBuf, n);
417 s->append((char)(c2 << 4));
427 tokBuf[0] = tokBuf[1] = '>';
429 obj->initCmd(tokBuf);
432 //error(getPos(), "Illegal character '>'");
441 //error(getPos(), "Illegal character '%c'", c);
451 while ((c = lookChar()) != EOF && !specialChars[c]) {
453 if (++n == tokBufSize) {
454 error(getPos(), "Command token too long");
460 if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
461 obj->initBool(gTrue);
462 } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
463 obj->initBool(gFalse);
464 } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
467 obj->initCmd(tokBuf);
474 void Lexer::skipToNextLine() {
479 if (c == EOF || c == '\n') {
483 if ((c = lookChar()) == '\n') {
491 GBool Lexer::isSpace(int c) {
492 return c >= 0 && c <= 0xff && specialChars[c] == 1;