1 //========================================================================
5 // Copyright 1996-2002 Glyph & Cog, LLC
7 //========================================================================
10 #pragma implementation
21 //------------------------------------------------------------------------
23 // A '1' in this array means the character is white space. A '1' or
24 // '2' means the character ends a name or command.
25 static char specialChars[256] = {
26 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
28 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
44 //------------------------------------------------------------------------
46 //------------------------------------------------------------------------
48 Lexer::Lexer(XRef *xref, Stream *str) {
51 curStr.initStream(str);
52 streams = new Array(xref);
53 streams->add(curStr.copy(&obj));
59 Lexer::Lexer(XRef *xref, Object *obj) {
62 if (obj->isStream()) {
63 streams = new Array(xref);
65 streams->add(obj->copy(&obj2));
67 streams = obj->getArray();
71 if (streams->getLength() > 0) {
72 streams->get(strPtr, &curStr);
78 if (!curStr.isNone()) {
87 int Lexer::getChar() {
91 while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
95 if (strPtr < streams->getLength()) {
96 streams->get(strPtr, &curStr);
103 int Lexer::lookChar() {
104 if (curStr.isNone()) {
107 return curStr.streamLookChar();
110 Object *Lexer::getObj(Object *obj) {
113 GBool comment, neg, done;
120 // skip whitespace and comments
123 if ((c = getChar()) == EOF) {
124 return obj->initEOF();
127 if (c == '\r' || c == '\n')
129 } else if (c == '%') {
131 } else if (specialChars[c] != 1) {
136 // start reading token
140 case '0': case '1': case '2': case '3': case '4':
141 case '5': case '6': case '7': case '8': case '9':
147 } else if (c == '.') {
156 xi = xi * 10 + (c - '0');
157 } else if (c == '.') {
177 xf = xf + scale * (c - '0');
194 switch (c = getChar()) {
198 // This breaks some PDF files, e.g., ones from Photoshop.
202 error(getPos(), "Unterminated string");
212 if (--numParen == 0) {
220 switch (c = getChar()) {
241 case '0': case '1': case '2': case '3':
242 case '4': case '5': case '6': case '7':
245 if (c >= '0' && c <= '7') {
247 c2 = (c2 << 3) + (c - '0');
249 if (c >= '0' && c <= '7') {
251 c2 = (c2 << 3) + (c - '0');
264 error(getPos(), "Unterminated string");
279 if (n == tokBufSize) {
281 s = new GString(tokBuf, tokBufSize);
283 s->append(tokBuf, tokBufSize);
292 s = new GString(tokBuf, n);
294 s->append(tokBuf, n);
302 while ((c = lookChar()) != EOF && !specialChars[c]) {
306 if (c2 >= '0' && c2 <= '9') {
308 } else if (c2 >= 'A' && c2 <= 'F') {
310 } else if (c2 >= 'a' && c2 <= 'f') {
318 if (c2 >= '0' && c2 <= '9') {
320 } else if (c2 >= 'A' && c2 <= 'F') {
322 } else if (c2 >= 'a' && c2 <= 'f') {
325 error(getPos(), "Illegal digit in hex char in name");
329 if (++n == tokBufSize) {
330 error(getPos(), "Name token too long");
336 obj->initName(tokBuf);
344 obj->initCmd(tokBuf);
347 // hex string or dict punctuation
354 tokBuf[0] = tokBuf[1] = '<';
356 obj->initCmd(tokBuf);
368 } else if (c == EOF) {
369 error(getPos(), "Unterminated hex string");
371 } else if (specialChars[c] != 1) {
373 if (c >= '0' && c <= '9')
375 else if (c >= 'A' && c <= 'F')
377 else if (c >= 'a' && c <= 'f')
380 error(getPos(), "Illegal character <%02x> in hex string", c);
382 if (n == tokBufSize) {
384 s = new GString(tokBuf, tokBufSize);
386 s->append(tokBuf, tokBufSize);
398 s = new GString(tokBuf, n);
400 s->append(tokBuf, n);
402 s->append((char)(c2 << 4));
412 tokBuf[0] = tokBuf[1] = '>';
414 obj->initCmd(tokBuf);
416 error(getPos(), "Illegal character '>'");
425 error(getPos(), "Illegal character '%c'", c);
434 while ((c = lookChar()) != EOF && !specialChars[c]) {
436 if (++n == tokBufSize) {
437 error(getPos(), "Command token too long");
443 if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
444 obj->initBool(gTrue);
445 } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
446 obj->initBool(gFalse);
447 } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
450 obj->initCmd(tokBuf);
458 void Lexer::skipToNextLine() {
463 if (c == EOF || c == '\n') {
467 if ((c = lookChar()) == '\n') {