+++ /dev/null
-/*
-This is a Optical-Character-Recognition program
-Copyright (C) 2000-2007 Joerg Schulenburg
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- see README for EMAIL-address
- */
-
-#include "unicode.h"
-#include <stdio.h>
-
-/* FIXME jb global */
-int warn=0; /* if 1 a message is generated if composition is not defined */
-
-/* Arguments: the character (main), and the modifier (accent, etc). See the
- function if you want to know the modifiers.
- Description: This function intends to be a small helper, to avoid having
- to write switches in functions. It's therefore mainly to accents, and
- specially for the most usual ones. It supports the basic greek
- characters too, which is actually not very helpful.
- Returns: the unicode character corresponding to the composed character.
-
- ToDo:
- - It seems to me, that tables should be more effectiv.
- So we should use tables in future? (js)
- */
-wchar_t compose(wchar_t main, wchar_t modifier) {
-/* supported by now: part of ISO8859-1, basic greek characters */
- if( main == UNKNOWN || main == PICTURE ) return main;
-#ifdef DEBUG
- if(modifier!=UNICODE_NULL && modifier!=SPACE)
- printf(" compose(%c,%d)",(char)main,(int)modifier);
-#endif
- if(main>127 && modifier!=0 && modifier!=SPACE && warn)
- fprintf(stderr,"# Warning compose %04x + %04x>127\n",
- (int)modifier,(int)main);
- switch (modifier) {
- case UNICODE_NULL:
- case SPACE:
- return (wchar_t)main;
-
- case APOSTROPHE: /* do NOT USE this. It's here for compatibility only.
- Use ACUTE_ACCENT instead. */
- fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
-
- case ACUTE_ACCENT: /* acute/cedilla */
- switch (main) {
- case 'a': return LATIN_SMALL_LETTER_A_WITH_ACUTE;
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_ACUTE;
- case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_ACUTE;
- case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_ACUTE;
- case 'c': return LATIN_SMALL_LETTER_C_WITH_ACUTE;
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_ACUTE;
- case 'e': return LATIN_SMALL_LETTER_E_WITH_ACUTE;
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_ACUTE;
- case 'g': return LATIN_SMALL_LETTER_G_WITH_ACUTE;
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_ACUTE;
- case 'i': return LATIN_SMALL_LETTER_I_WITH_ACUTE;
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_ACUTE;
- case 'l': return LATIN_SMALL_LETTER_L_WITH_ACUTE;
- case 'L': return LATIN_CAPITAL_LETTER_L_WITH_ACUTE;
- case 'n': return LATIN_SMALL_LETTER_N_WITH_ACUTE;
- case 'N': return LATIN_CAPITAL_LETTER_N_WITH_ACUTE;
- case 'o': return LATIN_SMALL_LETTER_O_WITH_ACUTE;
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
- case 'r': return LATIN_SMALL_LETTER_R_WITH_ACUTE;
- case 'R': return LATIN_CAPITAL_LETTER_R_WITH_ACUTE;
- case 's': return LATIN_SMALL_LETTER_S_WITH_ACUTE;
- case 'S': return LATIN_CAPITAL_LETTER_S_WITH_ACUTE;
- case 'u': return LATIN_SMALL_LETTER_U_WITH_ACUTE;
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_ACUTE;
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_ACUTE;
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_ACUTE;
- case 'z': return LATIN_SMALL_LETTER_Z_WITH_ACUTE;
- case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_ACUTE;
- default:
- if(warn)fprintf( stderr, " COMPOSE: ACUTE_ACCENT+%04x not defined\n",(int)main);
- }
- break;
-
- case BREVE: /* caron (latin2) "u"-above-... (small bow) */
- switch (main) {
- /* FIXME write separate heuristics for breve */
- case 'a': return LATIN_SMALL_LETTER_A_WITH_BREVE;
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_BREVE;
- case 'e': return LATIN_SMALL_LETTER_E_WITH_BREVE;
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_BREVE;
- case 'g': return LATIN_SMALL_LETTER_G_WITH_BREVE;
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_BREVE;
- case 'i': return LATIN_SMALL_LETTER_I_WITH_BREVE;
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_BREVE;
- case 'o': return LATIN_SMALL_LETTER_O_WITH_BREVE;
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_BREVE;
- case 'u': return LATIN_SMALL_LETTER_U_WITH_BREVE;
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_BREVE;
- default:
- if(warn)fprintf( stderr, " COMPOSE: BREVE+%04x not defined\n",(int)main);
- }
- break;
-
- case CARON: /* caron (latin2) "v"-above-... */
- switch (main) {
- case 'a': return LATIN_SMALL_LETTER_A_WITH_CARON;
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CARON;
- case 'c': return LATIN_SMALL_LETTER_C_WITH_CARON;
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CARON;
- case 'e': return LATIN_SMALL_LETTER_E_WITH_CARON;
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CARON;
- case 'i': return LATIN_SMALL_LETTER_I_WITH_CARON;
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CARON;
- case 'o': return LATIN_SMALL_LETTER_O_WITH_CARON;
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
- case 's': return LATIN_SMALL_LETTER_S_WITH_CARON;
- case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CARON;
- case 'u': return LATIN_SMALL_LETTER_U_WITH_CARON;
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CARON;
- case 'z': return LATIN_SMALL_LETTER_Z_WITH_CARON;
- case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_CARON;
- default:
- if(warn)fprintf( stderr, " COMPOSE: CARON+%04x not defined\n",(int)main);
- }
- break;
-
- case CEDILLA:
- switch (main) {
- case 'c': return LATIN_SMALL_LETTER_C_WITH_CEDILLA;
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
- default:
- if(warn)fprintf( stderr, " COMPOSE: CEDILLA+%04x not defined\n",(int)main);
- }
- break;
-
- case TILDE:
- switch (main) {
- case 'a': return LATIN_SMALL_LETTER_A_WITH_TILDE;
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_TILDE;
- case 'i': return LATIN_SMALL_LETTER_I_WITH_TILDE;
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_TILDE;
- case 'n': return LATIN_SMALL_LETTER_N_WITH_TILDE;
- case 'N': return LATIN_CAPITAL_LETTER_N_WITH_TILDE;
- case 'o': return LATIN_SMALL_LETTER_O_WITH_TILDE;
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
- case 'u': return LATIN_SMALL_LETTER_U_WITH_TILDE;
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_TILDE;
- default:
- if(warn)fprintf( stderr, " COMPOSE: TILDE+%04x not defined\n",(int)main);
- }
- break;
-
- case GRAVE_ACCENT:
- switch (main) {
- case 'a': return LATIN_SMALL_LETTER_A_WITH_GRAVE;
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_GRAVE;
- case 'e': return LATIN_SMALL_LETTER_E_WITH_GRAVE;
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_GRAVE;
- case 'i': return LATIN_SMALL_LETTER_I_WITH_GRAVE;
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_GRAVE;
- case 'n': return LATIN_SMALL_LETTER_N_WITH_GRAVE;
- case 'N': return LATIN_CAPITAL_LETTER_N_WITH_GRAVE;
- case 'o': return LATIN_SMALL_LETTER_O_WITH_GRAVE;
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
- case 'u': return LATIN_SMALL_LETTER_U_WITH_GRAVE;
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_GRAVE;
- default:
- if(warn)fprintf( stderr, " COMPOSE: GRAVE_ACCENT+%04x not defined\n",(int)main);
- }
- break;
-
- case QUOTATION_MARK: /* do NOT USE this. It's here for compatibility only.
- Use DIAERESIS instead. */
- fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
-
- case DIAERESIS:
- switch (main) {
- case 'a': return LATIN_SMALL_LETTER_A_WITH_DIAERESIS;
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS;
- case 'e': return LATIN_SMALL_LETTER_E_WITH_DIAERESIS;
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS;
- case 'i': return LATIN_SMALL_LETTER_I_WITH_DIAERESIS;
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS;
- case 'o': return LATIN_SMALL_LETTER_O_WITH_DIAERESIS;
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
- case 'u': return LATIN_SMALL_LETTER_U_WITH_DIAERESIS;
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS;
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_DIAERESIS;
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
- default:
- if(warn)fprintf( stderr, " COMPOSE: DIAERESIS+%04x (%c) not defined\n",(int)main,(char)main);
- }
- break;
-
- case CIRCUMFLEX_ACCENT: /* ^ */
- switch (main) {
- case 'a': return LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX;
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX;
- case 'c': return LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX;
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX;
- case 'e': return LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX;
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX;
- case 'g': return LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX;
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX;
- case 'h': return LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX;
- case 'H': return LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX;
- case 'i': return LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX;
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX;
- case 'j': return LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX;
- case 'J': return LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX;
- case 'o': return LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX;
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
- case '0': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
- case 's': return LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX;
- case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX;
- case 'u': return LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX;
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX;
- case 'w': return LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX;
- case 'W': return LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX;
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX;
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX;
- default:
- if(warn)fprintf( stderr, " COMPOSE: CIRCUMFLEX_ACCENT+%04x not defined\n",(int)main);
- }
- break;
-
- case MACRON: /* a minus sign above the char (latin2) */
- switch (main) {
- case 'a': return LATIN_SMALL_LETTER_A_WITH_MACRON;
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_MACRON;
- case 'e': return LATIN_SMALL_LETTER_E_WITH_MACRON;
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_MACRON;
- case 'i': return LATIN_SMALL_LETTER_I_WITH_MACRON;
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_MACRON;
- case 'o': return LATIN_SMALL_LETTER_O_WITH_MACRON;
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_MACRON;
- case 'u': return LATIN_SMALL_LETTER_U_WITH_MACRON;
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_MACRON;
- case 'y': return LATIN_SMALL_LETTER_Y_WITH_MACRON;
- case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_MACRON;
- case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_MACRON;
- case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_MACRON;
- case '=': return IDENTICAL_TO;
- case '-': return '=';
- case ' ': return MODIFIER_LETTER_MACRON;
- default:
- if(warn)fprintf( stderr, " COMPOSE: MACRON+%04x not defined\n",(int)main);
- }
- break;
-
- case DOT_ABOVE: /* latin2 */
- switch (main) {
- case 'a': return LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE;
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE;
- case 'c': return LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE;
- case 'C': return LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE;
- case 'e': return LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE;
- case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE;
- case 'g': return LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE;
- case 'G': return LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE;
- case 'l': return 'i'; /* correct wrong recognition */
- case 'i': return 'i';
- case LATIN_SMALL_LETTER_DOTLESS_I: return 'i';
- case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
- case 'j': return 'j';
- case 'o': return LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE;
- case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE;
- case 'z': return LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE;
- case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE;
- case ',': return ';';
- case '.': return ':';
- default:
- if(warn)fprintf( stderr, " COMPOSE: DOT_ABOVE+%04x not defined\n",(int)main);
- }
- break;
-
- case RING_ABOVE:
- switch (main) {
- case 'a': return LATIN_SMALL_LETTER_A_WITH_RING_ABOVE;
- case 'A': return LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE;
- case 'u': return LATIN_SMALL_LETTER_U_WITH_RING_ABOVE;
- case 'U': return LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE;
- default:
- if(warn)fprintf( stderr, " COMPOSE: RING_ABOVE+%04x not defined\n",(int)main);
- }
- break;
-
- case 'e': /* e ligatures: ae, oe. */
- case 'E':
- switch (main) {
- case 'a': return LATIN_SMALL_LETTER_AE;
- case 'A': return LATIN_CAPITAL_LETTER_AE;
- case 'o': return LATIN_SMALL_LIGATURE_OE;
- case 'O': return LATIN_CAPITAL_LIGATURE_OE;
- case '0': return LATIN_CAPITAL_LIGATURE_OE;
- default:
- if(warn)fprintf( stderr, " COMPOSE: %04x+e/E not defined\n",(int)main);
- }
- break;
-
- case 'g': /* greek */
- switch (main) {
- /* missing 0x37A-0x390 */
- /* weird cases: Q -> theta (it resembles a little, doesn't it?)
- V -> psi (what can I do?) */
- case 'A': return GREEK_CAPITAL_LETTER_ALPHA;
- case 'B': return GREEK_CAPITAL_LETTER_BETA;
- case 'G': return GREEK_CAPITAL_LETTER_GAMMA;
- case 'D': return GREEK_CAPITAL_LETTER_DELTA;
- case 'E': return GREEK_CAPITAL_LETTER_EPSILON;
- case 'Z': return GREEK_CAPITAL_LETTER_ZETA;
- case 'H': return GREEK_CAPITAL_LETTER_ETA;
- case 'Q': return GREEK_CAPITAL_LETTER_THETA;
- case 'I': return GREEK_CAPITAL_LETTER_IOTA;
- case 'K': return GREEK_CAPITAL_LETTER_KAPPA;
- case 'L': return GREEK_CAPITAL_LETTER_LAMDA;
- case 'M': return GREEK_CAPITAL_LETTER_MU;
- case 'N': return GREEK_CAPITAL_LETTER_NU;
- case 'X': return GREEK_CAPITAL_LETTER_XI;
- case 'O': return GREEK_CAPITAL_LETTER_OMICRON;
- case 'P': return GREEK_CAPITAL_LETTER_PI;
- case 'R': return GREEK_CAPITAL_LETTER_RHO;
- case 'S': return GREEK_CAPITAL_LETTER_SIGMA;
- case 'T': return GREEK_CAPITAL_LETTER_TAU;
- case 'Y': return GREEK_CAPITAL_LETTER_UPSILON;
- case 'F': return GREEK_CAPITAL_LETTER_PHI;
- case 'C': return GREEK_CAPITAL_LETTER_CHI;
- case 'V': return GREEK_CAPITAL_LETTER_PSI;
- case 'W': return GREEK_CAPITAL_LETTER_OMEGA;
-/*
- case '': return GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA;
- case '': return GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA;
- case '': return GREEK_SMALL_LETTER_ALPHA_WITH_TONOS;
- case '': return GREEK_SMALL_LETTER_EPSILON_WITH_TONOS;
- case '': return GREEK_SMALL_LETTER_ETA_WITH_TONOS;
- case '': return GREEK_SMALL_LETTER_IOTA_WITH_TONOS;
- case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS;
-*/
- case 'a': return GREEK_SMALL_LETTER_ALPHA;
- case 'b': return GREEK_SMALL_LETTER_BETA;
- case 'g': return GREEK_SMALL_LETTER_GAMMA;
- case 'd': return GREEK_SMALL_LETTER_DELTA;
- case 'e': return GREEK_SMALL_LETTER_EPSILON;
- case 'z': return GREEK_SMALL_LETTER_ZETA;
- case 'h': return GREEK_SMALL_LETTER_ETA;
- case 'q': return GREEK_SMALL_LETTER_THETA;
- case 'i': return GREEK_SMALL_LETTER_IOTA;
- case 'k': return GREEK_SMALL_LETTER_KAPPA;
- case 'l': return GREEK_SMALL_LETTER_LAMDA;
- case 'm': return GREEK_SMALL_LETTER_MU;
- case 'n': return GREEK_SMALL_LETTER_NU;
- case 'x': return GREEK_SMALL_LETTER_XI;
- case 'o': return GREEK_SMALL_LETTER_OMICRON;
- case 'p': return GREEK_SMALL_LETTER_PI;
- case 'r': return GREEK_SMALL_LETTER_RHO;
- case '&': return GREEK_SMALL_LETTER_FINAL_SIGMA;
- case 's': return GREEK_SMALL_LETTER_SIGMA;
- case 't': return GREEK_SMALL_LETTER_TAU;
- case 'y': return GREEK_SMALL_LETTER_UPSILON;
- case 'f': return GREEK_SMALL_LETTER_PHI;
- case 'c': return GREEK_SMALL_LETTER_CHI;
- case 'v': return GREEK_SMALL_LETTER_PSI;
- case 'w': return GREEK_SMALL_LETTER_OMEGA;
-/*
- case '': return GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA;
- case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA;
- case '': return GREEK_SMALL_LETTER_OMICRON_WITH_TONOS;
- case '': return GREEK_SMALL_LETTER_UPSILON_WITH_TONOS;
- case '': return GREEK_SMALL_LETTER_OMEGA_WITH_TONOS;
- case '': return GREEK_BETA_SYMBOL;
- case '': return GREEK_THETA_SYMBOL;
- case '': return GREEK_UPSILON_WITH_HOOK_SYMBOL;
- case '': return GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL;
- case '': return GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL;
- case '': return GREEK_PHI_SYMBOL;
- case '': return GREEK_PI_SYMBOL;
-*/
- default:
- if(warn)fprintf( stderr, " COMPOSE: GREEK %04x not defined\n",(int)main);
- }
- break;
-
- default:
- fprintf( stderr, " COMPOSE: modifier %04x not defined\n",(int)modifier);
- }
- return (wchar_t)main;
-}
-
-#define UNDEFINED "~"
-
-/* Arguments: character in Unicode format, type of format to convert to.
- Returns: a string containing the Unicode character converted to the chosen
- format. This string is statically allocated and should not be freed.
- ToDo: better using tables?
- */
-const char *decode(wchar_t c, FORMAT type) {
- /* static char d; --- js: big bug (missing \0) if &d returned */
- /*FIXME jb static*/ static char bbuf[8*32]; /* space for 8 buffers, rotating */
- /*FIXME jb static*/ static char *buf=bbuf; /* used for UTF8 sequences and undefined codes */
- buf+=32; if(buf>=bbuf+8*32) buf=bbuf;
- buf[0]=buf[1]=buf[2]=0;
- switch (type) {
- case ISO8859_1:
- if ( c <= 0xFF ) { /* UNICODE == ISO8859-1 */
- buf[0] = (char)c;
- return buf;
- }
- switch (c) { /* not found in list, but perhaps we can describe it */
- /* todo: add greek. GREEK_SMALL_LETTER_ALPHA = alpha */
-
- /* general puctuation */
- case HYPHEN:
- return (const char *)"-";
- case FIGURE_DASH:
- case EN_DASH:
- return (const char *)"--";
- case EM_DASH:
- return (const char *)"---";
- case LEFT_SINGLE_QUOTATION_MARK:
- return (const char *)"`";
- case RIGHT_SINGLE_QUOTATION_MARK:
- return (const char *)"'";
- case SINGLE_LOW_9_QUOTATION_MARK:
- return (const char *)",";
- case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
- return (const char *)UNDEFINED;
- case LEFT_DOUBLE_QUOTATION_MARK:
- return (const char *)"``";
- case RIGHT_DOUBLE_QUOTATION_MARK:
- return (const char *)"''";
- case DOUBLE_LOW_9_QUOTATION_MARK:
- return (const char *)",,";
- case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
- return (const char *)UNDEFINED;
- case DAGGER:
- return (const char *)"+";
- case DOUBLE_DAGGER:
- return (const char *)"*";
- case BULLET:
- return (const char *)"*";
- case TRIANGULAR_BULLET:
- return (const char *)"*";
- case HYPHENATION_POINT:
- return (const char *)"-";
- case HORIZONTAL_ELLIPSIS:
- return (const char *)"...";
- case PER_MILLE_SIGN:
- return (const char *)"%%"; /* awk! */
- case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
- return (const char *)"<";
- case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
- return (const char *)">";
- case EURO_CURRENCY_SIGN:
- return (const char *)"EUR"; /* change it! */
-
- /* ligatures */
- case LATIN_SMALL_LIGATURE_FF:
- return (const char *)"ff";
- case LATIN_SMALL_LIGATURE_FI:
- return (const char *)"fi";
- case LATIN_SMALL_LIGATURE_FL:
- return (const char *)"fl";
- case LATIN_SMALL_LIGATURE_FFI:
- return (const char *)"ffi";
- case LATIN_SMALL_LIGATURE_FFL:
- return (const char *)"ffl";
- case LATIN_SMALL_LIGATURE_LONG_S_T:
- case LATIN_SMALL_LIGATURE_ST:
- return (const char *)"st";
-
- /* extra */
- case UNKNOWN:
- return (const char *)"_";
- case PICTURE:
- return (const char *)"_"; /* Due to Mobile OCR */
-
- default:
- /* snprintf seems to be no standard, so I use insecure sprintf */
- sprintf(buf,"\\code(%04x)",(unsigned)c);
- return buf; /* UNDEFINED; */
- }
- break;
- case TeX:
- if ( c >= SPACE && c <= TILDE ) { /* ASCII */
- switch (c) {
- case '$':
- return (const char *)"\\$";
- case '&':
- return (const char *)"\\&";
- case '%':
- return (const char *)"\\%";
- case '#':
- return (const char *)"\\#";
- case '_':
- return (const char *)"\\_";
- case '{':
- return (const char *)"\\{";
- case '}':
- return (const char *)"\\}";
- case '\\':
- return (const char *)"$\\backslash$";
- case '~':
- return (const char *)"\\~{}";
- case '^':
- return (const char *)"\\^{}";
- default:
- buf[0] = (char)c;
- return (const char *)buf;
- }
- }
- switch (c) {
- /* ISO8859_1 */
- case NO_BREAK_SPACE:
- return (const char *)"~";
- case INVERTED_EXCLAMATION_MARK:
- return (const char *)"!'";
- case CENT_SIGN:
- return (const char *)"\\textcent"; /* \usepackage{textcomp} */
- case POUND_SIGN:
- return (const char *)"\\pounds";
- case EURO_CURRENCY_SIGN:
- return (const char *)"\\euro"; /* \usepackage{eurosans} */
- case CURRENCY_SIGN:
- return (const char *)"\\textcurrency"; /* \usepackage{textcomp} */
- case YEN_SIGN:
- return (const char *)"\\textyen"; /* \usepackage{textcomp} */
- case BROKEN_BAR:
- return (const char *)"\\textbrokenbar"; /* \usepackage{textcomp} */
- case SECTION_SIGN:
- return (const char *)"\\S";
- case DIAERESIS:
- return (const char *)"\"";
- case COPYRIGHT_SIGN:
- return (const char *)"\\copyright";
- case FEMININE_ORDINAL_INDICATOR:
- return (const char *)"$^{\\underbar{a}}$";
- case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
- return (const char *)"\\flqq{}";
- case NOT_SIGN:
- return (const char *)"$\\lnot$";
- case SOFT_HYPHEN:
- return (const char *)"\\-";
- case REGISTERED_SIGN:
- return (const char *)"\\textregistered";/* \usepackage{textcomp} */
- case MACRON:
- return (const char *)"\\textasciimacron";/* \usepackage{textcomp} */
- case DEGREE_SIGN:
- return (const char *)"$^{o}$";
- case PLUS_MINUS_SIGN:
- return (const char *)"$\\pm$";
- case SUPERSCRIPT_TWO:
- return (const char *)"$^{2}$";
- case SUPERSCRIPT_THREE:
- return (const char *)"$^{3}$";
- case ACUTE_ACCENT:
- return (const char *)"\\( \\prime \\)";
- case MICRO_SIGN:
- return (const char *)"$\\mu$";
- case PILCROW_SIGN:
- return (const char *)"\\P";
- case MIDDLE_DOT:
- return (const char *)"$\\cdot$";
- case CEDILLA:
- return (const char *)"\\,";
- case SUPERSCRIPT_ONE:
- return (const char *)"$^{1}$";
- case MASCULINE_ORDINAL_INDICATOR:
- return (const char *)"$^{\\underbar{o}}$";
- case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
- return (const char *)"\\frqq{}";
- case VULGAR_FRACTION_ONE_QUARTER: /* these fractions are not good*/
- return (const char *)"\\( 1\\over 4 \\)";
- case VULGAR_FRACTION_ONE_HALF:
- return (const char *)"\\( 1\\over 2 \\)";
- case VULGAR_FRACTION_THREE_QUARTERS:
- return (const char *)"\\( 3\\over 4 \\)";
- case INVERTED_QUESTION_MARK:
- return (const char *)"?'";
- case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
- return (const char *)"\\`A";
- case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
- return (const char *)"\\'A";
- case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
- return (const char *)"\\^A";
- case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
- return (const char *)"\\~A";
- case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
- return (const char *)"\\\"A";
- case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
- return (const char *)"\\AA";
- case LATIN_CAPITAL_LETTER_AE:
- return (const char *)"\\AE";
- case LATIN_CAPITAL_LETTER_C_WITH_CARON:
- return (const char *)"\\v{C}";
- case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
- return (const char *)"\\C";
- case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
- return (const char *)"\\`E";
- case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
- return (const char *)"\\'E";
- case LATIN_CAPITAL_LETTER_E_WITH_CARON:
- return (const char *)"\\v{E}";
- case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
- return (const char *)"\\^E";
- case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
- return (const char *)"\\\"E";
- case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
- return (const char *)"\\`I";
- case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
- return (const char *)"\\'I";
- case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
- return (const char *)"\\^I";
- case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
- return (const char *)"\\\"I";
- case LATIN_CAPITAL_LETTER_ETH:
- return (const char *)UNDEFINED;
- case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
- return (const char *)"\\~N";
- case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
- return (const char *)"\\`O";
- case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
- return (const char *)"\\'O";
- case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
- return (const char *)"\\^O";
- case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
- return (const char *)"\\~O";
- case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
- return (const char *)"\\\"O";
- case MULTIPLICATION_SIGN:
- return (const char *)"$\\times$";
- case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
- return (const char *)"\\O";
- case LATIN_CAPITAL_LETTER_S_WITH_CARON:
- return (const char *)"\\v{S}";
- case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
- return (const char *)"\\`U";
- case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
- return (const char *)"\\'U";
- case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
- return (const char *)"\\^U";
- case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
- return (const char *)"\\\"U";
- case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
- return (const char *)"\\'Y";
- case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
- return (const char *)"\\v{Z}";
- case LATIN_CAPITAL_LETTER_THORN:
- return (const char *)UNDEFINED;
- case LATIN_SMALL_LETTER_SHARP_S:
- return (const char *)"\\ss";
- case LATIN_SMALL_LETTER_A_WITH_GRAVE:
- return (const char *)"\\`a";
- case LATIN_SMALL_LETTER_A_WITH_ACUTE:
- return (const char *)"\\'a";
- case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
- return (const char *)"\\^a";
- case LATIN_SMALL_LETTER_A_WITH_TILDE:
- return (const char *)"\\~a";
- case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
- return (const char *)"\\\"a";
- case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
- return (const char *)"\\aa";
- case LATIN_SMALL_LETTER_AE:
- return (const char *)"\\ae";
- case LATIN_SMALL_LETTER_C_WITH_CARON:
- return (const char *)"\\v{c}";
- case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
- return (const char *)"\\c";
- case LATIN_SMALL_LETTER_E_WITH_GRAVE:
- return (const char *)"\\`e";
- case LATIN_SMALL_LETTER_E_WITH_ACUTE:
- return (const char *)"\\'e";
- case LATIN_SMALL_LETTER_E_WITH_CARON:
- return (const char *)"\\v{e}";
- case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
- return (const char *)"\\^e";
- case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
- return (const char *)"\\\"e";
- case LATIN_SMALL_LETTER_I_WITH_GRAVE:
- return (const char *)"\\`i";
- case LATIN_SMALL_LETTER_I_WITH_ACUTE:
- return (const char *)"\\'i";
- case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
- return (const char *)"\\^i";
- case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
- return (const char *)"\\\"i";
- case LATIN_SMALL_LETTER_ETH:
- return (const char *)UNDEFINED;
- case LATIN_SMALL_LETTER_N_WITH_TILDE:
- return (const char *)"\\~n";
- case LATIN_SMALL_LETTER_O_WITH_GRAVE:
- return (const char *)"\\`o";
- case LATIN_SMALL_LETTER_O_WITH_ACUTE:
- return (const char *)"\\'o";
- case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
- return (const char *)"\\^o";
- case LATIN_SMALL_LETTER_O_WITH_TILDE:
- return (const char *)"\\~o";
- case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
- return (const char *)"\\\"o";
- case DIVISION_SIGN:
- return (const char *)"$\\div$";
- case LATIN_SMALL_LETTER_O_WITH_STROKE:
- return (const char *)"\\o";
- case LATIN_SMALL_LETTER_S_WITH_CARON:
- return (const char *)"\\v{s}";
- case LATIN_SMALL_LETTER_U_WITH_GRAVE:
- return (const char *)"\\`u";
- case LATIN_SMALL_LETTER_U_WITH_ACUTE:
- return (const char *)"\\'u";
- case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
- return (const char *)"\\^u";
- case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
- return (const char *)"\\\"u";
- case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
- return (const char *)"\\'y";
- case LATIN_SMALL_LETTER_THORN:
- return (const char *)UNDEFINED;
- case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
- return (const char *)"\\\"y";
- case LATIN_SMALL_LETTER_Z_WITH_CARON:
- return (const char *)"\\v{z}";
-
- /* greek */
- /* some (punctuation, accents, accented capital) greek letters missing*/
- case GREEK_CAPITAL_LETTER_ALPHA:
- return (const char *)"A";
- case GREEK_CAPITAL_LETTER_BETA:
- return (const char *)"B";
- case GREEK_CAPITAL_LETTER_GAMMA:
- return (const char *)"\\( \\Gamma \\)";
- case GREEK_CAPITAL_LETTER_DELTA:
- return (const char *)"\\( \\Delta \\)";
- case GREEK_CAPITAL_LETTER_EPSILON:
- return (const char *)"E";
- case GREEK_CAPITAL_LETTER_ZETA:
- return (const char *)"Z";
- case GREEK_CAPITAL_LETTER_ETA:
- return (const char *)"H";
- case GREEK_CAPITAL_LETTER_THETA:
- return (const char *)"\\( \\Theta \\)";
- case GREEK_CAPITAL_LETTER_IOTA:
- return (const char *)"I";
- case GREEK_CAPITAL_LETTER_KAPPA:
- return (const char *)"K";
- case GREEK_CAPITAL_LETTER_LAMDA:
- return (const char *)"\\( \\Lambda \\)";
- case GREEK_CAPITAL_LETTER_MU:
- return (const char *)"M";
- case GREEK_CAPITAL_LETTER_NU:
- return (const char *)"N";
- case GREEK_CAPITAL_LETTER_XI:
- return (const char *)"\\( \\Xi \\)";
- case GREEK_CAPITAL_LETTER_OMICRON:
- return (const char *)"O";
- case GREEK_CAPITAL_LETTER_PI:
- return (const char *)"\\( \\Pi \\)";
- case GREEK_CAPITAL_LETTER_RHO:
- return (const char *)"P";
- case GREEK_CAPITAL_LETTER_SIGMA:
- return (const char *)"\\( \\Sigma \\)";
- case GREEK_CAPITAL_LETTER_TAU:
- return (const char *)"T";
- case GREEK_CAPITAL_LETTER_UPSILON:
- return (const char *)"\\( \\Upsilon \\)";
- case GREEK_CAPITAL_LETTER_PHI:
- return (const char *)"\\( \\Phi \\)";
- case GREEK_CAPITAL_LETTER_CHI:
- return (const char *)"\\( \\Chi \\)";
- case GREEK_CAPITAL_LETTER_PSI:
- return (const char *)"\\( \\Psi \\)";
- case GREEK_CAPITAL_LETTER_OMEGA:
- return (const char *)"\\( \\Omega \\)";
- case GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA:
- return (const char *)UNDEFINED;
- case GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA:
- return (const char *)UNDEFINED;
- case GREEK_SMALL_LETTER_ALPHA_WITH_TONOS:
- return (const char *)UNDEFINED;
- case GREEK_SMALL_LETTER_EPSILON_WITH_TONOS:
- return (const char *)UNDEFINED;
- case GREEK_SMALL_LETTER_ETA_WITH_TONOS:
- return (const char *)UNDEFINED;
- case GREEK_SMALL_LETTER_IOTA_WITH_TONOS:
- return (const char *)UNDEFINED;
- case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS:
- return (const char *)UNDEFINED;
- case GREEK_SMALL_LETTER_ALPHA:
- return (const char *)"\\( \\alpha \\)";
- case GREEK_SMALL_LETTER_BETA:
- return (const char *)"\\( \\beta \\)";
- case GREEK_SMALL_LETTER_GAMMA:
- return (const char *)"\\( \\gamma \\)";
- case GREEK_SMALL_LETTER_DELTA:
- return (const char *)"\\( \\delta \\)";
- case GREEK_SMALL_LETTER_EPSILON:
- return (const char *)"\\( \\epsilon \\)";
- case GREEK_SMALL_LETTER_ZETA:
- return (const char *)"\\( \\zeta \\)";
- case GREEK_SMALL_LETTER_ETA:
- return (const char *)"\\( \\eta \\)";
- case GREEK_SMALL_LETTER_THETA:
- return (const char *)"\\( \\theta \\)";
- case GREEK_SMALL_LETTER_IOTA:
- return (const char *)"\\( \\iota \\)";
- case GREEK_SMALL_LETTER_KAPPA:
- return (const char *)"\\( \\kappa \\)";
- case GREEK_SMALL_LETTER_LAMDA:
- return (const char *)"\\( \\lambda \\)";
- case GREEK_SMALL_LETTER_MU:
- return (const char *)"\\( \\mu \\)";
- case GREEK_SMALL_LETTER_NU:
- return (const char *)"\\( \\nu \\)";
- case GREEK_SMALL_LETTER_XI:
- return (const char *)"\\( \\xi \\)";
- case GREEK_SMALL_LETTER_OMICRON:
- return (const char *)"\\( \\omicron \\)";
- case GREEK_SMALL_LETTER_PI:
- return (const char *)"\\( \\pi \\)";
- case GREEK_SMALL_LETTER_RHO:
- return (const char *)"\\( \\rho \\)";
- case GREEK_SMALL_LETTER_FINAL_SIGMA:
- return (const char *)"\\( \\varsigma \\)";
- case GREEK_SMALL_LETTER_SIGMA:
- return (const char *)"\\( \\sigma \\)";
- case GREEK_SMALL_LETTER_TAU:
- return (const char *)"\\( \\tau \\)";
- case GREEK_SMALL_LETTER_UPSILON:
- return (const char *)"\\( \\upsilon \\)";
- case GREEK_SMALL_LETTER_PHI:
- return (const char *)"\\( \\varphi \\)";
- case GREEK_SMALL_LETTER_CHI:
- return (const char *)"\\( \\chi \\)";
- case GREEK_SMALL_LETTER_PSI:
- return (const char *)"\\( \\psi \\)";
- case GREEK_SMALL_LETTER_OMEGA:
- return (const char *)"\\( \\omega \\)";
- case GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA:
- return (const char *)UNDEFINED;
- case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA:
- return (const char *)UNDEFINED;
- case GREEK_SMALL_LETTER_OMICRON_WITH_TONOS:
- return (const char *)UNDEFINED;
- case GREEK_SMALL_LETTER_UPSILON_WITH_TONOS:
- return (const char *)UNDEFINED;
- case GREEK_SMALL_LETTER_OMEGA_WITH_TONOS:
- return (const char *)UNDEFINED;
- case GREEK_BETA_SYMBOL:
- return (const char *)UNDEFINED;
- case GREEK_THETA_SYMBOL:
- return (const char *)"\\( \\vartheta \\)";
- case GREEK_UPSILON_WITH_HOOK_SYMBOL:
- return (const char *)UNDEFINED;
- case GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL:
- return (const char *)UNDEFINED;
- case GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL:
- return (const char *)UNDEFINED;
- case GREEK_PHI_SYMBOL:
- return (const char *)"\\( \\phi \\)";
- case GREEK_PI_SYMBOL:
- return (const char *)"\\( \\varpi \\)";
- /* and some greek letters missing*/
-
- /* punctuation (partial) */
- case HYPHEN:
- return (const char *)"-";
- case NON_BREAKING_HYPHEN:
- return (const char *)UNDEFINED;
- case FIGURE_DASH:
- case EN_DASH:
- return (const char *)"--";
- case EM_DASH:
- return (const char *)"---";
- case HORIZONTAL_BAR:
- return (const char *)UNDEFINED;
- case LEFT_SINGLE_QUOTATION_MARK:
- return (const char *)"`";
- case RIGHT_SINGLE_QUOTATION_MARK:
- return (const char *)"'";
- case SINGLE_LOW_9_QUOTATION_MARK:
- return (const char *)"\\glq{}";
- case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
- return (const char *)UNDEFINED;
- case LEFT_DOUBLE_QUOTATION_MARK:
- return (const char *)"``";
- case RIGHT_DOUBLE_QUOTATION_MARK:
- return (const char *)"''";
- case DOUBLE_LOW_9_QUOTATION_MARK:
- return (const char *)"\\glqq{}";
- case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
- return (const char *)UNDEFINED;
- case DAGGER:
- return (const char *)"\\dag";
- case DOUBLE_DAGGER:
- return (const char *)"\\ddag";
- case BULLET:
- return (const char *)"$\\bullet$";
- case TRIANGULAR_BULLET:
- return (const char *)"$\\blacktriangleright";
- case HYPHENATION_POINT:
- return (const char *)"\\-";
- case HORIZONTAL_ELLIPSIS:
- return (const char *)"\\ldots";
- case PER_MILLE_SIGN:
- return (const char *)UNDEFINED;
- case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
- return (const char *)"\\flq{}";
- case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
- return (const char *)"\\frq{}";
- /* ligatures */
- case LATIN_SMALL_LIGATURE_FF:
- return (const char *)"ff";
- case LATIN_SMALL_LIGATURE_FI:
- return (const char *)"fi";
- case LATIN_SMALL_LIGATURE_FL:
- return (const char *)"fl";
- case LATIN_SMALL_LIGATURE_FFI:
- return (const char *)"ffi";
- case LATIN_SMALL_LIGATURE_FFL:
- return (const char *)"ffl";
- case LATIN_SMALL_LIGATURE_LONG_S_T:
- case LATIN_SMALL_LIGATURE_ST:
- return (const char *)"st";
- /* reserved */
- case 0:
- return (const char *)"";
- case UNKNOWN:
- return (const char *)"\\_";
- case PICTURE:
- return (const char *)"(PICTURE)";
- default:
- /* snprintf seems to be no standard, so I use insecure sprintf */
- sprintf(buf,"\\symbol{%u}",(unsigned)c);
- return buf; /* UNDEFINED; */
- }
- case HTML:
- if ( c >= SPACE && c <= TILDE ) { /* ASCII */
- switch (c) {
- case '&':
- return (const char *)"&";
- /* semicolon must not be coded */
- case '\'':
- return (const char *)"'";
- case '"':
- return (const char *)""";
- case '<':
- return (const char *)"<";
- case '>':
- return (const char *)">";
- }
- buf[0] = (char)c;
- return buf;
- }
- switch (c) {
- case PICTURE:
- return (const char *)"<!--PICTURE-->";
- case UNKNOWN:
- return (const char *)"_"; /* better use colored symbol? */
- case LINE_FEED:
- return (const char *)"<br />"; /* \n handled somwhere else? */
- case FORM_FEED:
- case CARRIAGE_RETURN:
- return (const char *)"<br />";
- case NO_BREAK_SPACE:
- return (const char *)"<nobr />";
- case INVERTED_EXCLAMATION_MARK:
- return (const char *)"¡";
- case CENT_SIGN:
- return (const char *)"¢";
- case POUND_SIGN:
- return (const char *)"£";
- case CURRENCY_SIGN:
- return (const char *)"¤";
- case YEN_SIGN:
- return (const char *)"¥";
- case BROKEN_BAR:
- return (const char *)"¦";
- case SECTION_SIGN:
- return (const char *)"§";
- case DIAERESIS:
- return (const char *)"¨";
- case COPYRIGHT_SIGN:
- return (const char *)"©";
- case FEMININE_ORDINAL_INDICATOR:
- return (const char *)"ªem;";
- case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
- return (const char *)"«";
- case NOT_SIGN:
- return (const char *)"¬";
- case SOFT_HYPHEN:
- return (const char *)"­";
- case REGISTERED_SIGN:
- return (const char *)"®";
- case MACRON:
- return (const char *)"¯";
- case DEGREE_SIGN:
- return (const char *)"°";
- case PLUS_MINUS_SIGN:
- return (const char *)"±";
- case SUPERSCRIPT_TWO:
- return (const char *)"²";
- case SUPERSCRIPT_THREE:
- return (const char *)"³";
- case ACUTE_ACCENT:
- return (const char *)"´";
- case MICRO_SIGN:
- return (const char *)"µ";
- case PILCROW_SIGN:
- return (const char *)"¶";
- case MIDDLE_DOT:
- return (const char *)"·";
- case CEDILLA:
- return (const char *)"¸";
- case SUPERSCRIPT_ONE:
- return (const char *)"¹";
- case MASCULINE_ORDINAL_INDICATOR:
- return (const char *)"º";
- case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
- return (const char *)"»";
- case VULGAR_FRACTION_ONE_QUARTER:
- return (const char *)"¼";
- case VULGAR_FRACTION_ONE_HALF:
- return (const char *)"½";
- case VULGAR_FRACTION_THREE_QUARTERS:
- return (const char *)"¾";
- case INVERTED_QUESTION_MARK:
- return (const char *)"¿";
- case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
- return (const char *)"À";
- case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
- return (const char *)"Á";
- case LATIN_CAPITAL_LETTER_A_WITH_BREVE:
- return (const char *)"Ă";
- case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
- return (const char *)"Â";
- case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
- return (const char *)"Ã";
- case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
- return (const char *)"Ä";
- case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
- return (const char *)"Å";
- case LATIN_CAPITAL_LETTER_AE:
- return (const char *)"Æ";
- case LATIN_CAPITAL_LETTER_C_WITH_CARON:
- return (const char *)"Č";
- case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
- return (const char *)"Ç";
- case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
- return (const char *)"È";
- case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
- return (const char *)"É";
- case LATIN_CAPITAL_LETTER_E_WITH_CARON:
- return (const char *)"Ě";
- case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
- return (const char *)"Ê";
- case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
- return (const char *)"Ë";
- case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
- return (const char *)"Ì";
- case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
- return (const char *)"Í";
- case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
- return (const char *)"Î";
- case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
- return (const char *)"Ï";
- case LATIN_CAPITAL_LETTER_ETH:
- return (const char *)"Ð";
- case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
- return (const char *)"Ñ";
- case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
- return (const char *)"Ò";
- case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
- return (const char *)"Ó";
- case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
- return (const char *)"Ô";
- case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
- return (const char *)"Õ";
- case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
- return (const char *)"Ö";
- case MULTIPLICATION_SIGN:
- return (const char *)"×";
- case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
- return (const char *)"Ø";
- case LATIN_CAPITAL_LETTER_S_WITH_CARON:
- return (const char *)"Š";
- case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
- return (const char *)"Ù";
- case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
- return (const char *)"Ú";
- case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
- return (const char *)"Û";
- case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
- return (const char *)"Ü";
- case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
- return (const char *)"Ý";
- case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
- return (const char *)"Ž";
- case LATIN_CAPITAL_LETTER_THORN:
- return (const char *)"Þ";
- case LATIN_SMALL_LETTER_SHARP_S:
- return (const char *)"ß";
- case LATIN_SMALL_LETTER_A_WITH_GRAVE:
- return (const char *)"à";
- case LATIN_SMALL_LETTER_A_WITH_ACUTE:
- return (const char *)"á";
- case LATIN_SMALL_LETTER_A_WITH_BREVE:
- return (const char *)"ă";
- case LATIN_SMALL_LETTER_A_WITH_CARON:
- return (const char *)"&acaron;";
- case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
- return (const char *)"â";
- case LATIN_SMALL_LETTER_A_WITH_TILDE:
- return (const char *)"ã";
- case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
- return (const char *)"ä";
- case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
- return (const char *)"å";
- case LATIN_SMALL_LETTER_AE:
- return (const char *)"æ";
- case LATIN_SMALL_LETTER_C_WITH_CARON:
- return (const char *)"č";
- case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
- return (const char *)"ç";
- case LATIN_SMALL_LETTER_E_WITH_GRAVE:
- return (const char *)"è";
- case LATIN_SMALL_LETTER_E_WITH_ACUTE:
- return (const char *)"é";
- case LATIN_SMALL_LETTER_E_WITH_CARON:
- return (const char *)"ě";
- case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
- return (const char *)"ê";
- case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
- return (const char *)"ë";
- case LATIN_SMALL_LETTER_I_WITH_GRAVE:
- return (const char *)"ì";
- case LATIN_SMALL_LETTER_I_WITH_ACUTE:
- return (const char *)"í";
- case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
- return (const char *)"î";
- case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
- return (const char *)"ï";
- case LATIN_SMALL_LETTER_ETH:
- return (const char *)"ð";
- case LATIN_SMALL_LETTER_N_WITH_TILDE:
- return (const char *)"ñ";
- case LATIN_SMALL_LETTER_O_WITH_GRAVE:
- return (const char *)"ò";
- case LATIN_SMALL_LETTER_O_WITH_ACUTE:
- return (const char *)"ó";
- case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
- return (const char *)"ô";
- case LATIN_SMALL_LETTER_O_WITH_TILDE:
- return (const char *)"õ";
- case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
- return (const char *)"ö";
- case DIVISION_SIGN:
- return (const char *)"÷";
- case LATIN_SMALL_LETTER_O_WITH_STROKE:
- return (const char *)"ø";
- case LATIN_SMALL_LETTER_S_WITH_CARON:
- return (const char *)"š";
- case LATIN_SMALL_LETTER_U_WITH_GRAVE:
- return (const char *)"ù";
- case LATIN_SMALL_LETTER_U_WITH_ACUTE:
- return (const char *)"ú";
- case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
- return (const char *)"û";
- case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
- return (const char *)"ü";
- case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
- return (const char *)"ý";
- case LATIN_SMALL_LETTER_THORN:
- return (const char *)"þ";
- case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
- return (const char *)"ÿ";
- case LATIN_SMALL_LETTER_Z_WITH_CARON:
- return (const char *)"ž";
- case EURO_CURRENCY_SIGN:
- return (const char *)"€";
- case 0:
- return (const char *)"";
- default:
- sprintf(buf,"&#%u;",(unsigned)c);
- return buf; /* undefined */
- }
- /* break; unreachable code */
- case XML: /* only 5 &xxx;-ENTITIES ar defined by default */
- if ( c >= SPACE && c <= TILDE ) { /* ASCII */
- switch (c) {
- case '&':
- return (const char *)"&";
- case '\'':
- return (const char *)"'";
- case '"':
- return (const char *)""";
- case '<':
- return (const char *)"<";
- case '>':
- return (const char *)">";
- }
- buf[0] = (char)c;
- return buf;
- }
- switch (c) { /* subject of change! */
- case PICTURE:
- return (const char *)"(PICTURE)";
- case UNKNOWN:
- return (const char *)"_"; /* better use colored symbol? */
- case LINE_FEED: /* \n handled somwhere else? */
- case FORM_FEED:
- case CARRIAGE_RETURN:
- return (const char *)"<br />";
- case NO_BREAK_SPACE:
- return (const char *)"<nobr />";
- case 0:
- return (const char *)"";
- default:
- sprintf(buf,"&#x%03x;",(unsigned)c);
- return buf; /* undefined */
- }
- /* break; unreachable code */
- case SGML:
- switch (c) {
- default:
- sprintf(buf,"&#%u;",(unsigned)c);
- return buf; /* UNDEFINED */
- }
- /* break; unreachable code */
- case ASCII: /* mainly used for debugging */
- if ( c=='\n' || (c>= 0x20 && c <= 0x7F) ) {
- buf[0] = (char)c;
- return buf;
- }
- switch (c) {
- /* extra */
- case UNKNOWN:
- return (const char *)"(?)";
- case PICTURE:
- return (const char *)"(?)";
-
- default:
- /* snprintf seems to be no standard, so I use insecure sprintf */
- if ((unsigned)c>255) sprintf(buf,"(0x%04x)",(unsigned)c);
- else sprintf(buf,"(0x%02x)",(unsigned)c);
- return buf; /* UNDEFINED; */
- }
- /* break; unreachable code */
- default: /* use UTF8 as default, test with xterm -u8 */
- /* extra */
- if ( c == UNKNOWN ) return (const char *)"_";
- if ( c == PICTURE ) return (const char *)"_"; /* Due to Mobile OCR */
- if ( c <= (wchar_t)0x0000007F ) { /* UTF8 == 7bit ASCII */
- buf[0] = (char)c;
- return buf;
- }
- if ( c <= (wchar_t)0x000007FF ) { /* UTF8 == 11bit */
- buf[0] = (char)(0xc0|((c>> 6) & 0x1f)); /* 110xxxxx */
- buf[1] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
- buf[2] = (char)0; /* terminate string */
- return buf;
- }
- /* wchar_t is 16bit for Borland-C !? Jan07 */
- if ( c <= (wchar_t)0x0000FFFF ) { /* UTF8 == 16bit */
- buf[0] = (char)(0xe0|((c>>12) & 0x0f)); /* 1110xxxx */
- buf[1] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
- buf[2] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
- buf[3] = (char)0; /* terminate string */
- return buf;
- }
- if ( c <= (wchar_t)0x001FFFFF ) { /* UTF8 == 21bit */
- buf[0] = (char)(0xf0|((c>>18) & 0x07)); /* 11110xxx */
- buf[1] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
- buf[2] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
- buf[3] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
- buf[4] = (char)0; /* terminate string */
- return buf;
- }
- if ( c <= (wchar_t)0x03FFFFFF ) { /* UTF8 == 26bit */
- buf[0] = (char)(0xf8|((c>>24) & 0x03)); /* 111110xx */
- buf[1] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
- buf[2] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
- buf[3] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
- buf[4] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
- buf[5] = (char)0; /* terminate string */
- return buf;
- }
- if ( c <= (wchar_t)0x7FFFFFFF ) { /* UTF8 == 31bit */
- buf[0] = (char)(0xfc|((c>>30) & 0x01)); /* 1111110x */
- buf[1] = (char)(0x80|((c>>24) & 0x3f)); /* 10xxxxxx */
- buf[2] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
- buf[3] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
- buf[4] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
- buf[5] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
- buf[6] = (char)0; /* terminate string */
- return buf;
- }
- return (const char *)UNDEFINED;
- }
-}