finetuned space char detection
authorMatthias Kramm <kramm@quiss.org>
Mon, 26 Oct 2009 21:29:50 +0000 (14:29 -0700)
committerMatthias Kramm <kramm@quiss.org>
Mon, 26 Oct 2009 21:29:50 +0000 (14:29 -0700)
lib/pdf/GFXOutputDev.cc
lib/pdf/InfoOutputDev.cc
lib/pdf/InfoOutputDev.h
lib/q.c
lib/q.h

index 2596d25..881c75a 100644 (file)
@@ -1457,10 +1457,11 @@ void GFXOutputDev::drawChar(GfxState *state, double x, double y,
               !last_char_was_space) {
                double expected_x = last_char_x + current_gfxfont->glyphs[last_char].advance*m.m00;
                int space = this->current_fontinfo->space_char;
               !last_char_was_space) {
                double expected_x = last_char_x + current_gfxfont->glyphs[last_char].advance*m.m00;
                int space = this->current_fontinfo->space_char;
-               if(m.tx - expected_x >= m.m00*64) {
-                   msg("<debug> There's a %f (%f) pixel gap between char %d and char %d, I'm inserting a space here", 
+               float width = this->current_fontinfo->average_advance;
+               if(m.tx - expected_x >= m.m00*width*4/10) {
+                   msg("<debug> There's a %f pixel gap between char %d and char %d (expected no more than %f), I'm inserting a space here", 
                            m.tx-expected_x, 
                            m.tx-expected_x, 
-                           (m.tx-expected_x)/m.m00,
+                           width*m.m00*4/10,
                            last_char, glyphid);
                    gfxmatrix_t m2 = m;
                    m2.tx = expected_x + (m.tx - expected_x - current_gfxfont->glyphs[space].advance*m.m00)/2;
                            last_char, glyphid);
                    gfxmatrix_t m2 = m;
                    m2.tx = expected_x + (m.tx - expected_x - current_gfxfont->glyphs[space].advance*m.m00)/2;
index fd9bf99..a142c77 100644 (file)
@@ -15,6 +15,7 @@
 #endif
 #include "GfxState.h"
 #include "../log.h"
 #endif
 #include "GfxState.h"
 #include "../log.h"
+#include "../q.h"
 #include <math.h>
 #include <assert.h>
 
 #include <math.h>
 #include <assert.h>
 
@@ -216,6 +217,20 @@ static gfxfont_t* createGfxFont(FontInfo*src)
     return font;
 }
 
     return font;
 }
 
+static float find_average_glyph_advance(gfxfont_t*f)
+{
+    if(!f->num_glyphs)
+       return 0.0;
+
+    float*values = (float*)malloc(sizeof(float)*f->num_glyphs);
+    int t;
+    for(t=0;t<f->num_glyphs;t++) {
+       values[t] = f->glyphs[t].advance;
+    }
+    float m = medianf(values, f->num_glyphs);
+    free(values);
+    return m;
+}
 
 gfxfont_t* FontInfo::getGfxFont()
 {
 
 gfxfont_t* FontInfo::getGfxFont()
 {
@@ -223,6 +238,8 @@ gfxfont_t* FontInfo::getGfxFont()
         this->gfxfont = createGfxFont(this);
         this->gfxfont->id = strdup(this->id);
        this->space_char = findSpace(this->gfxfont);
         this->gfxfont = createGfxFont(this);
         this->gfxfont->id = strdup(this->id);
        this->space_char = findSpace(this->gfxfont);
+       this->average_advance = find_average_glyph_advance(this->gfxfont);
+
        if(this->space_char>=0) {
            msg("<debug> Font %s has space char %d (unicode=%d)", 
                    this->id, this->space_char, 
        if(this->space_char>=0) {
            msg("<debug> Font %s has space char %d (unicode=%d)", 
                    this->id, this->space_char, 
index ead6a44..0934add 100644 (file)
@@ -85,6 +85,7 @@ public:
     SplashFont*splash_font;
     char seen;
     int space_char;
     SplashFont*splash_font;
     char seen;
     int space_char;
+    float average_advance;
 };
 
 extern char*getFontID(GfxFont*font);
 };
 
 extern char*getFontID(GfxFont*font);
diff --git a/lib/q.c b/lib/q.c
index d70822d..63794e0 100644 (file)
--- a/lib/q.c
+++ b/lib/q.c
@@ -113,6 +113,36 @@ int mem_get(mem_t*m, void*data, int length)
     return length;
 }
 
     return length;
 }
 
+// ------------------------------- median -------------------------------------
+
+float medianf(float*a, int n)
+{
+    int i,j,l,m;
+    float x;
+    int k=n&1?n/2:n/2-1;
+    l=0; 
+    m=n-1;
+    while(l<m) {
+        x=a[k];
+        i=l;j=m;
+        do {
+            while(a[i]<x) i++;
+            while(x<a[j]) j--;
+            if(i<=j) {
+               //swap
+               float t = a[i];
+               a[i] = a[j];
+               a[j] = t;
+                i++;
+               j--;
+            }
+        } while(i<=j);
+        if(j<k) l=i;
+        if(k<i) m=j;
+    }
+    return a[k];
+}
+
 // ------------------------------- ringbuffer_t -------------------------------
 
 typedef struct _ringbuffer_internal_t
 // ------------------------------- ringbuffer_t -------------------------------
 
 typedef struct _ringbuffer_internal_t
diff --git a/lib/q.h b/lib/q.h
index e255b4e..f6d597d 100644 (file)
--- a/lib/q.h
+++ b/lib/q.h
@@ -135,6 +135,8 @@ typedef struct _trie {
 char* strdup_n(const char*str, int size);
 char* allocprintf(const char*str, ...);
 
 char* strdup_n(const char*str, int size);
 char* allocprintf(const char*str, ...);
 
+float medianf(float*values, int n);
+
 unsigned int crc32_add_byte(unsigned int crc32, unsigned char b);
 unsigned int crc32_add_string(unsigned int crc32, const char*s);
 
 unsigned int crc32_add_byte(unsigned int crc32, unsigned char b);
 unsigned int crc32_add_string(unsigned int crc32, const char*s);