From 5ba24931ec3861ca5befcaae1502dd9105639b44 Mon Sep 17 00:00:00 2001 From: Matthias Kramm Date: Mon, 26 Oct 2009 14:29:50 -0700 Subject: [PATCH] finetuned space char detection --- lib/pdf/GFXOutputDev.cc | 7 ++++--- lib/pdf/InfoOutputDev.cc | 17 +++++++++++++++++ lib/pdf/InfoOutputDev.h | 1 + lib/q.c | 30 ++++++++++++++++++++++++++++++ lib/q.h | 2 ++ 5 files changed, 54 insertions(+), 3 deletions(-) diff --git a/lib/pdf/GFXOutputDev.cc b/lib/pdf/GFXOutputDev.cc index 2596d25..881c75a 100644 --- a/lib/pdf/GFXOutputDev.cc +++ b/lib/pdf/GFXOutputDev.cc @@ -1457,10 +1457,11 @@ void GFXOutputDev::drawChar(GfxState *state, double x, double y, !last_char_was_space) { double expected_x = last_char_x + current_gfxfont->glyphs[last_char].advance*m.m00; int space = this->current_fontinfo->space_char; - if(m.tx - expected_x >= m.m00*64) { - msg(" There's a %f (%f) pixel gap between char %d and char %d, I'm inserting a space here", + float width = this->current_fontinfo->average_advance; + if(m.tx - expected_x >= m.m00*width*4/10) { + msg(" There's a %f pixel gap between char %d and char %d (expected no more than %f), I'm inserting a space here", m.tx-expected_x, - (m.tx-expected_x)/m.m00, + width*m.m00*4/10, last_char, glyphid); gfxmatrix_t m2 = m; m2.tx = expected_x + (m.tx - expected_x - current_gfxfont->glyphs[space].advance*m.m00)/2; diff --git a/lib/pdf/InfoOutputDev.cc b/lib/pdf/InfoOutputDev.cc index fd9bf99..a142c77 100644 --- a/lib/pdf/InfoOutputDev.cc +++ b/lib/pdf/InfoOutputDev.cc @@ -15,6 +15,7 @@ #endif #include "GfxState.h" #include "../log.h" +#include "../q.h" #include #include @@ -216,6 +217,20 @@ static gfxfont_t* createGfxFont(FontInfo*src) return font; } +static float find_average_glyph_advance(gfxfont_t*f) +{ + if(!f->num_glyphs) + return 0.0; + + float*values = (float*)malloc(sizeof(float)*f->num_glyphs); + int t; + for(t=0;tnum_glyphs;t++) { + values[t] = f->glyphs[t].advance; + } + float m = medianf(values, f->num_glyphs); + free(values); + return m; +} gfxfont_t* FontInfo::getGfxFont() { @@ -223,6 +238,8 @@ gfxfont_t* FontInfo::getGfxFont() this->gfxfont = createGfxFont(this); this->gfxfont->id = strdup(this->id); this->space_char = findSpace(this->gfxfont); + this->average_advance = find_average_glyph_advance(this->gfxfont); + if(this->space_char>=0) { msg(" Font %s has space char %d (unicode=%d)", this->id, this->space_char, diff --git a/lib/pdf/InfoOutputDev.h b/lib/pdf/InfoOutputDev.h index ead6a44..0934add 100644 --- a/lib/pdf/InfoOutputDev.h +++ b/lib/pdf/InfoOutputDev.h @@ -85,6 +85,7 @@ public: SplashFont*splash_font; char seen; int space_char; + float average_advance; }; extern char*getFontID(GfxFont*font); diff --git a/lib/q.c b/lib/q.c index d70822d..63794e0 100644 --- a/lib/q.c +++ b/lib/q.c @@ -113,6 +113,36 @@ int mem_get(mem_t*m, void*data, int length) return length; } +// ------------------------------- median ------------------------------------- + +float medianf(float*a, int n) +{ + int i,j,l,m; + float x; + int k=n&1?n/2:n/2-1; + l=0; + m=n-1; + while(l