From e633c982297e1728e8373f80d25533776a9b92b0 Mon Sep 17 00:00:00 2001 From: Matthias Kramm Date: Wed, 18 Nov 2009 16:07:18 -0800 Subject: [PATCH] added glyph pairing to align zone detector --- lib/devices/swf.c | 18 +++++++--- lib/modules/swfalignzones.c | 27 +++++++++++--- lib/modules/swftext.c | 83 ++++++++++++++++++++++++++++++++++++++++--- lib/rfxswf.h | 14 ++++++++ src/Makefile.in | 9 +++++ 5 files changed, 138 insertions(+), 13 deletions(-) diff --git a/lib/devices/swf.c b/lib/devices/swf.c index 3277938..a3f57e9 100644 --- a/lib/devices/swf.c +++ b/lib/devices/swf.c @@ -623,6 +623,7 @@ static void chararray_writetotag(chararray_t*_chardata, TAG*tag) int lastx; int lasty; int lastsize; + int lastchar; int charids[128]; int charadvance[128]; int charstorepos; @@ -646,6 +647,7 @@ static void chararray_writetotag(chararray_t*_chardata, TAG*tag) lastx = CHARMIDX; lasty = CHARMIDY; lastsize = -1; + lastchar = -1; if(pass==1) { @@ -670,9 +672,9 @@ static void chararray_writetotag(chararray_t*_chardata, TAG*tag) charatposition_t*chr = &chardata->chr[t]; - if(lastfont != chardata->chr[t].font || - lastx!=chardata->chr[t].x || - lasty!=chardata->chr[t].y || + if(lastfont != chr->font || + lastx!=chr->x || + lasty!=chr->y || !colorcompare(&color, &chardata->chr[t].color) || charstorepos==127 || lastsize != chardata->chr[t].size || @@ -736,6 +738,12 @@ static void chararray_writetotag(chararray_t*_chardata, TAG*tag) lastx = chr->x; lasty = chr->y; lastsize = chr->size; + } else { + assert(lastchar>=0); + if(pass==1 && lastchar!=chr->charid) { + swf_FontUsePair(chr->font, lastchar, chr->charid); + swf_FontUsePair(chr->font, chr->charid, lastchar); + } } if(islast) @@ -745,7 +753,7 @@ static void chararray_writetotag(chararray_t*_chardata, TAG*tag) if(tpos-1) nextx = chardata->chr[t+1].x; if(t==chardata->pos-1 && chardata->next) nextx = chardata->next->chr[0].x; int dx = nextx-chr->x; - + int advance; if(dx>=0 && (dx<(1<<(advancebits-1)) || pass==0)) { advance = dx; @@ -754,8 +762,10 @@ static void chararray_writetotag(chararray_t*_chardata, TAG*tag) advance = 0; lastx=chr->x; } + charids[charstorepos] = chr->charid; charadvance[charstorepos] = advance; + lastchar = chr->charid; charstorepos ++; } chardata = chardata->next; diff --git a/lib/modules/swfalignzones.c b/lib/modules/swfalignzones.c index 1961903..f05cad8 100644 --- a/lib/modules/swfalignzones.c +++ b/lib/modules/swfalignzones.c @@ -40,7 +40,7 @@ static void find_best(float*_row, int width, int*_x1, int*_x2, int min_size, int float max1=-1e20,max2=-1e20; int t; float*row = malloc(sizeof(float)*(width+1)); - int filter_size = 20; + int filter_size = 10; float* filter = malloc(sizeof(float)*(filter_size*2+1)); double var = filter_size/3; for(t=-filter_size;t<=filter_size;t++) { @@ -169,7 +169,7 @@ static ALIGNZONE detect_for_char(SWFFONT * f, int nr, float*row, float*column, S /* find two best x values */ int x1=-1,y1=-1,x2=-1,y2=-1; - int nr_x = 1; + int nr_x = 0; find_best(row, width, &x1, &x2, f->use->smallest_size, char_bbox.xmin - font_bbox.xmin, char_bbox.xmax - font_bbox.xmin, nr_x, @@ -210,9 +210,8 @@ void swf_FontCreateAlignZones(SWFFONT * f) f->alignzones[t].dy = 0xffff;//floatToF16(460.80 / 1024.0); } } else { - int t; SRECT bounds = {0,0,0,0}; - + int t; for(t=0;tnumchars;t++) { SRECT b = f->layout->bounds[t]; negate_y(&b); @@ -231,7 +230,25 @@ void swf_FontCreateAlignZones(SWFFONT * f) for(t=0;t<=height;t++) {column_global[t]/=f->numchars/2;} for(t=0;tnumchars;t++) { - memcpy(column, column_global, sizeof(float)*(height+1)); + //memcpy(column, column_global, sizeof(float)*(height+1)); + + memset(column, 0, sizeof(float)*(height+1)); + int s; + int drawn = 0; + printf("[font %d] pairing %c with ", f->id, f->glyph2ascii[t]); + for(s=0;suse->num_neighbors;s++) { + if(f->use->neighbors[s].char2 == t) { + printf("%c ", f->glyph2ascii[f->use->neighbors[s].char1]); + draw_char(f, f->use->neighbors[s].char1, row, column, bounds); + drawn++; + } + } + printf("\n"); + + for(s=0;s<=height;s++) { + column[t] /= drawn*2; + } + memset(row, 0, sizeof(float)*(width+1)); draw_char(f, t, row, column, bounds); diff --git a/lib/modules/swftext.c b/lib/modules/swftext.c index 194ec72..2ce669a 100644 --- a/lib/modules/swftext.c +++ b/lib/modules/swftext.c @@ -625,6 +625,12 @@ static void font_freeusage(SWFFONT*f) if(f->use->chars) { rfx_free(f->use->chars);f->use->chars = 0; } + if(f->use->neighbors) { + rfx_free(f->use->neighbors);f->use->neighbors = 0; + } + if(f->use->neighbors_hash) { + rfx_free(f->use->neighbors_hash);f->use->neighbors_hash = 0; + } rfx_free(f->use); f->use = 0; } } @@ -851,12 +857,9 @@ int swf_FontInitUsage(SWFFONT * f) fprintf(stderr, "Usage initialized twice"); return -1; } - f->use = (FONTUSAGE*)rfx_alloc(sizeof(FONTUSAGE)); - f->use->is_reduced = 0; + f->use = (FONTUSAGE*)rfx_calloc(sizeof(FONTUSAGE)); f->use->smallest_size = 0xffff; - f->use->used_glyphs = 0; f->use->chars = (int*)rfx_calloc(sizeof(f->use->chars[0]) * f->numchars); - f->use->glyphs_specified = 0; return 0; } @@ -906,6 +909,78 @@ int swf_FontUseAll(SWFFONT* f) return 0; } +static unsigned hash2(int char1, int char2) +{ + unsigned hash = char1^(char2<<8); + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} +static void hashadd(FONTUSAGE*u, int char1, int char2, int nr) +{ + unsigned hash = hash2(char1, char2); + while(1) { + hash = hash%u->neighbors_hash_size; + if(!u->neighbors_hash[hash]) { + u->neighbors_hash[hash] = nr+1; + return; + } + hash++; + } +} +int swf_FontUseGetPair(SWFFONT * f, int char1, int char2) +{ + FONTUSAGE*u = f->use; + if(!u || !u->neighbors_hash_size) + return 0; + unsigned hash = hash2(char1, char2); + while(1) { + hash = hash%u->neighbors_hash_size; + int pos = u->neighbors_hash[hash]; + if(!pos) + return 0; + if(pos && + u->neighbors[pos-1].char1 == char1 && + u->neighbors[pos-1].char2 == char2) { + return pos; + } + hash++; + } + +} +void swf_FontUsePair(SWFFONT * f, int char1, int char2) +{ + if (!f->use) + swf_FontInitUsage(f); + FONTUSAGE*u = f->use; + + if(u->num_neighbors*3 >= u->neighbors_hash_size*2) { + if(u->neighbors_hash) { + free(u->neighbors_hash); + } + u->neighbors_hash_size = u->neighbors_hash_size?u->neighbors_hash_size*2:1024; + u->neighbors_hash = rfx_calloc(u->neighbors_hash_size*sizeof(int)); + int t; + for(t=0;tnum_neighbors;t++) { + hashadd(u, u->neighbors[t].char1, u->neighbors[t].char2, t); + } + } + + if(!swf_FontUseGetPair(f, char1, char2)) { + if(u->num_neighbors == u->neighbors_size) { + u->neighbors_size += 4096; + u->neighbors = rfx_realloc(u->neighbors, sizeof(SWFGLYPHPAIR)*u->neighbors_size); + } + u->neighbors[u->num_neighbors].char1 = char1; + u->neighbors[u->num_neighbors].char2 = char2; + hashadd(u, char1, char2, u->num_neighbors); + u->num_neighbors++; + } else { + // increase? + } +} + int swf_FontUseGlyph(SWFFONT * f, int glyph, U16 size) { if (!f->use) diff --git a/lib/rfxswf.h b/lib/rfxswf.h index 0baf61e..4ca38a2 100644 --- a/lib/rfxswf.h +++ b/lib/rfxswf.h @@ -530,12 +530,24 @@ typedef struct SHAPE * shape; } SWFGLYPH; +typedef struct _SWFGLYPHPAIR +{ + U16 char1; + U16 char2; +} SWFGLYPHPAIR; + typedef struct _FONTUSAGE { int* chars; char is_reduced; int used_glyphs; int glyphs_specified; U16 smallest_size; + + SWFGLYPHPAIR* neighbors; + int num_neighbors; + int neighbors_size; + int* neighbors_hash; + int neighbors_hash_size; } FONTUSAGE; #define FONT_STYLE_BOLD 1 @@ -631,6 +643,8 @@ int swf_FontReduce_swfc(SWFFONT * f); int swf_FontInitUsage(SWFFONT * f); int swf_FontUseGlyph(SWFFONT * f, int glyph, U16 size); +void swf_FontUsePair(SWFFONT * f, int char1, int char2); +int swf_FontUseGetPair(SWFFONT * f, int char1, int char2); int swf_FontUseAll(SWFFONT* f); int swf_FontUseUTF8(SWFFONT * f, U8 * s, U16 size); int swf_FontUse(SWFFONT* f,U8 * s); diff --git a/src/Makefile.in b/src/Makefile.in index 463b8d9..0c1381d 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -56,6 +56,15 @@ parser.$(O): parser.yy.c parser.h ../lib/q.h ../lib/librfxswf$(A): ../lib/modules/swfrender.c ../lib/modules/swfshape.c ../lib/modules/swftext.c ../lib/modules/swffont.c ../lib/modules/swfbits.c ../lib/rfxswf.c ../lib/devices/swf.c ../lib/modules/swfalignzones.c cd ../lib;$(MAKE) librfxswf$(A);cd - +../lib/libpdf$(A): ../lib/pdf/GFXOutputDev.cc + cd ../lib;$(MAKE) libpdf$(A);cd - + +../lib/libgfx$(A): ../lib/devices/*.c + cd ../lib;$(MAKE) libgfx$(A);cd - + +../lib/libgfxswf$(A): ../lib/devices/swf.c + cd ../lib;$(MAKE) libgfxswf$(A);cd - + # TODO: include the following rule only if lex is available parser.yy.c: parser.lex flex -B -s -oparser.yy.c parser.lex -- 1.7.10.4