From 727449f065760e1d99735f44638c25678cd8bc52 Mon Sep 17 00:00:00 2001 From: Matthias Kramm Date: Wed, 19 Aug 2009 22:28:17 +0200 Subject: [PATCH] implemented '-s detectspace' functionality --- lib/Makefile.in | 2 +- lib/pdf/GFXOutputDev.cc | 31 +++++++++++++++++++++++++-- lib/pdf/GFXOutputDev.h | 6 ++++++ lib/pdf/InfoOutputDev.cc | 52 +++++++++++++++++++++++++++++++++++++++++++--- lib/pdf/InfoOutputDev.h | 2 ++ lib/pdf/Makefile.in | 10 ++++----- lib/pdf/pdf.cc | 13 +++++++++++- spec/spec_helper.rb | 2 +- src/Makefile.in | 7 ++++--- src/pdf2pdf.1 | 40 +++++++++++++++++++++++++++++++++++ src/pdf2pdf.c | 31 ++++++++++++++++++++------- src/pdf2pdf.doc | 29 ++++++++++++++++++++++++++ 12 files changed, 201 insertions(+), 24 deletions(-) create mode 100644 src/pdf2pdf.1 create mode 100644 src/pdf2pdf.doc diff --git a/lib/Makefile.in b/lib/Makefile.in index e1d2295..630a70f 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -166,7 +166,7 @@ uninstall: clean: rm -f *.o *.obj *.lo *.a *.lib *.la gmon.out - for dir in modules devices swf as3 h.263 gfxpoly;do rm -f $$dir/*.o $$dir/*.obj $$dir/*.lo $$dir/*.a $$dir/*.lib $$dir/*.la $$dir/gmon.out;done + for dir in modules devices swf as3 readers art gocr h.263 gfxpoly;do rm -f $$dir/*.o $$dir/*.obj $$dir/*.lo $$dir/*.a $$dir/*.lib $$dir/*.la $$dir/gmon.out;done cd lame && $(MAKE) clean && cd .. || true cd action && $(MAKE) clean && cd .. cd python && $(MAKE) clean && cd .. diff --git a/lib/pdf/GFXOutputDev.cc b/lib/pdf/GFXOutputDev.cc index 2d80cb2..2c90c92 100644 --- a/lib/pdf/GFXOutputDev.cc +++ b/lib/pdf/GFXOutputDev.cc @@ -1416,10 +1416,35 @@ void GFXOutputDev::drawChar(GfxState *state, double x, double y, gfxmatrix_t m = this->current_font_matrix; this->transformXY(state, x-originX, y-originY, &m.tx, &m.ty); //m.tx += originX; m.ty += originY; - - msg(" drawChar(%f,%f,c='%c' (%d), u=%d <%d>) CID=%d render=%d glyphid=%d font=%08x",m.tx,m.ty,(charid&127)>=32?charid:'?', charid, u, uLen, font->isCIDFont(), render, glyphid, current_gfxfont); + msg(" drawChar(%f,%f,c='%c' (%d), u=%d <%d>) CID=%d render=%d glyphid=%d font=%08x",m.tx,m.ty,(charid&127)>=32?charid:'?', charid, u, uLen, font->isCIDFont(), render, glyphid, current_gfxfont); + if((render == RENDER_FILL && !config_drawonlyshapes) || render == RENDER_INVISIBLE) { + int space = this->current_fontinfo->space_char; + if(config_extrafontdata && space>=0 && m.m00 && !m.m01) { + /* space char detection */ + if(last_char_gfxfont == current_gfxfont && + last_char_y == m.ty && + !last_char_was_space) { + double expected_x = last_char_x + current_gfxfont->glyphs[last_char].advance*m.m00; + int space = this->current_fontinfo->space_char; + if(m.tx - expected_x >= m.m00*64) { + msg(" There's a %f (%f) pixel gap between char %d and char %d, I'm inserting a space here", + m.tx-expected_x, + (m.tx-expected_x)/m.m00, + last_char, glyphid); + gfxmatrix_t m2 = m; + m2.tx = expected_x + (m.tx - expected_x - current_gfxfont->glyphs[space].advance*m.m00)/2; + if(m2.tx < expected_x) m2.tx = expected_x; + device->drawchar(device, current_gfxfont, space, &col, &m2); + } + } + last_char_gfxfont = current_gfxfont; + last_char = glyphid; + last_char_x = m.tx; + last_char_y = m.ty; + last_char_was_space = GLYPH_IS_SPACE(¤t_gfxfont->glyphs[glyphid]); + } device->drawchar(device, current_gfxfont, glyphid, &col, &m); } else { msg(" Drawing glyph %d as shape", charid); @@ -1605,6 +1630,8 @@ void GFXOutputDev::startPage(int pageNum, GfxState *state, double crop_x1, doubl states[statepos].dashPattern = 0; states[statepos].dashLength = 0; states[statepos].dashStart = 0; + + this->last_char_gfxfont = 0; } diff --git a/lib/pdf/GFXOutputDev.h b/lib/pdf/GFXOutputDev.h index 0b07e4f..a35f70c 100644 --- a/lib/pdf/GFXOutputDev.h +++ b/lib/pdf/GFXOutputDev.h @@ -274,6 +274,12 @@ public: FontInfo*current_fontinfo; gfxmatrix_t current_font_matrix; + gfxfont_t* last_char_gfxfont; + int last_char; + double last_char_x; + double last_char_y; + char last_char_was_space; + /* config */ int config_use_fontconfig; int config_break_on_warning; diff --git a/lib/pdf/InfoOutputDev.cc b/lib/pdf/InfoOutputDev.cc index 75644bc..affad69 100644 --- a/lib/pdf/InfoOutputDev.cc +++ b/lib/pdf/InfoOutputDev.cc @@ -16,8 +16,9 @@ #include "GfxState.h" #include "../log.h" #include +#include -/* there's not yet a way to set this */ +int config_addspace = 1; int config_fontquality = 10; int config_bigchar = 0; @@ -70,6 +71,7 @@ FontInfo::FontInfo(char*id) this->lastx = 0; this->lasty = 0; this->gfxfont = 0; + this->space_char = -1; } FontInfo::~FontInfo() { @@ -92,7 +94,42 @@ FontInfo::~FontInfo() gfxfont_free(this->gfxfont); } -gfxfont_t* createGfxFont(FontInfo*src) +static int findSpace(gfxfont_t*font) +{ + int first_space = -1; + int t; + for(t=0;tnum_glyphs;t++) { + gfxglyph_t*g = &font->glyphs[t]; + if(GLYPH_IS_SPACE(g)) { + if(g->unicode == 32) return t; + if(first_space<0) + first_space = t; + } + } + if(GLYPH_IS_SPACE(&font->glyphs[32])) { + return 32; + } + return first_space; +} + +static int addSpace(gfxfont_t*font) +{ + font->num_glyphs++; + font->glyphs = (gfxglyph_t*)realloc(font->glyphs, sizeof(gfxglyph_t)*font->num_glyphs); + gfxglyph_t*g = &font->glyphs[font->num_glyphs-1]; + memset(g, 0, sizeof(*g)); + g->unicode = 32; + //g->advance = font->ascent; + g->advance = fabs(font->ascent - font->descent)*2 / 3; + if(font->max_unicode > 32) + font->unicode2glyph[32] = font->num_glyphs-1; +#if 0 + g->line = gfxline_makerectangle(0, -font->ascent, g->advance, font->descent); +#endif + return font->num_glyphs-1; +} + +static gfxfont_t* createGfxFont(FontInfo*src) { gfxfont_t*font = (gfxfont_t*)malloc(sizeof(gfxfont_t)); memset(font, 0, sizeof(gfxfont_t)); @@ -185,6 +222,15 @@ gfxfont_t* FontInfo::getGfxFont() if(!this->gfxfont) { this->gfxfont = createGfxFont(this); this->gfxfont->id = strdup(this->id); + this->space_char = findSpace(this->gfxfont); + if(this->space_char>=0) { + msg(" Font %s has space char %d (unicode=%d)", + this->id, this->space_char, + this->gfxfont->glyphs[this->space_char].unicode); + } else if(config_addspace) { + this->space_char = addSpace(this->gfxfont); + msg(" Appending space char to font %s, position %d", this->gfxfont->id, this->space_char); + } } return this->gfxfont; } @@ -340,7 +386,7 @@ void InfoOutputDev::drawChar(GfxState *state, double x, double y, g->advance = currentfont->splash_font->last_advance; g->unicode = 0; } - if(uLen && (u[0]>=32 && u[0]unicode || !g->unicode)) { + if(uLen && ((u[0]>=32 && u[0]unicode) || !g->unicode)) { g->unicode = u[0]; } if(currentfont->lastchar>=0 && currentfont->lasty == y) { diff --git a/lib/pdf/InfoOutputDev.h b/lib/pdf/InfoOutputDev.h index 68b036f..04e98f7 100644 --- a/lib/pdf/InfoOutputDev.h +++ b/lib/pdf/InfoOutputDev.h @@ -46,6 +46,7 @@ #include "../gfxfont.h" #define INTERNAL_FONT_SIZE 1024.0 +#define GLYPH_IS_SPACE(g) ((!(g)->line || ((g)->line->type==gfx_moveTo && !(g)->line->next)) && (g)->advance) struct GlyphInfo { @@ -83,6 +84,7 @@ public: int*charid2glyph; SplashFont*splash_font; char seen; + int space_char; }; extern char*getFontID(GfxFont*font); diff --git a/lib/pdf/Makefile.in b/lib/pdf/Makefile.in index a52a36d..ea86f9b 100644 --- a/lib/pdf/Makefile.in +++ b/lib/pdf/Makefile.in @@ -34,17 +34,17 @@ fonts.$(O): fonts.c $(C) fonts.c -o $@ cmyk.$(O): cmyk.cc $(CC) -I ./ -I xpdf cmyk.cc -o $@ -GFXOutputDev.$(O): GFXOutputDev.cc GFXOutputDev.h CommonOutputDev.h ../gfxpoly.h +GFXOutputDev.$(O): GFXOutputDev.cc GFXOutputDev.h CommonOutputDev.h InfoOutputDev.h ../gfxpoly.h $(CC) -I ./ -I xpdf GFXOutputDev.cc -o $@ InfoOutputDev.$(O): InfoOutputDev.cc InfoOutputDev.h $(CC) -I ./ -I xpdf InfoOutputDev.cc -o $@ -BitmapOutputDev.$(O): BitmapOutputDev.cc BitmapOutputDev.h CommonOutputDev.h +BitmapOutputDev.$(O): BitmapOutputDev.cc BitmapOutputDev.h CommonOutputDev.h InfoOutputDev.h $(CC) -I ./ -I xpdf BitmapOutputDev.cc -o $@ -FullBitmapOutputDev.$(O): FullBitmapOutputDev.cc FullBitmapOutputDev.h CommonOutputDev.h +FullBitmapOutputDev.$(O): FullBitmapOutputDev.cc FullBitmapOutputDev.h CommonOutputDev.h InfoOutputDev.h $(CC) -I ./ -I xpdf FullBitmapOutputDev.cc -o $@ -DummyOutputDev.$(O): DummyOutputDev.cc DummyOutputDev.h +DummyOutputDev.$(O): DummyOutputDev.cc DummyOutputDev.h InfoOutputDev.h $(CC) -I ./ -I xpdf DummyOutputDev.cc -o $@ -pdf.$(O): pdf.cc GFXOutputDev.h InfoOutputDev.h CommonOutputDev.h BitmapOutputDev.h FullBitmapOutputDev.h +pdf.$(O): pdf.cc GFXOutputDev.h InfoOutputDev.h CommonOutputDev.h BitmapOutputDev.h FullBitmapOutputDev.h InfoOutputDev.h $(CC) -I ./ -I xpdf pdf.cc -o $@ xpdf/UnicodeMap.$(O): xpdf/UnicodeMap.cc diff --git a/lib/pdf/pdf.cc b/lib/pdf/pdf.cc index 3e6a594..5cada51 100644 --- a/lib/pdf/pdf.cc +++ b/lib/pdf/pdf.cc @@ -394,13 +394,24 @@ char* pdf_doc_getinfo(gfxdocument_t*doc, const char*name) } +/* shortcut to InfoOutputDev.cc */ +extern int config_addspace; +extern int config_fontquality; +extern int config_bigchar; + static void pdf_set_parameter(gfxsource_t*src, const char*name, const char*value) { gfxsource_internal_t*i = (gfxsource_internal_t*)src->internal; - parameterlist_t*p = &i->parameters; + msg(" setting parameter %s to \"%s\"", name, value); if(!strncmp(name, "fontdir", strlen("fontdir"))) { addGlobalFontDir(value); + } else if(!strcmp(name, "detectspaces")) { + config_addspace = atoi(value); + } else if(!strcmp(name, "fontquality")) { + config_fontquality = atoi(value); + } else if(!strcmp(name, "bigchar")) { + config_bigchar = atoi(value); } else if(!strcmp(name, "pages")) { global_page_range = strdup(value); } else if(!strncmp(name, "font", strlen("font")) && name[4]!='q') { diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index bbfe1a5..1727b0f 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -112,7 +112,7 @@ class DocFile `pdfinfo #{@filename}` =~ /Page size:\s*([0-9]+) x ([0-9]+) pts/ width,height = $1,$2 dpi = (72.0 * 612 / width.to_i).to_i - output = `pdf2swf --flatten -s zoom=#{dpi} -p #{@page} #{@filename} -o #{@swfname} 2>&1` + output = `pdf2swf -f --flatten -s zoom=#{dpi} -p #{@page} #{@filename} -o #{@swfname} 2>&1` #output = `pdf2swf -s zoom=#{dpi} --flatten -p #{@page} #{@filename} -o #{@swfname} 2>&1` raise ConversionFailed.new(output,@swfname) unless File.exists?(@swfname) end diff --git a/src/Makefile.in b/src/Makefile.in index 3304e62..f176f65 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -97,8 +97,8 @@ as3compile$(E): as3compile.$(O) ../lib/librfxswf$(A) ../lib/libbase$(A) jpeg2swf$(E): jpeg2swf.$(O) ../lib/librfxswf$(A) ../lib/libbase$(A) $(L) jpeg2swf.$(O) -o $@ ../lib/librfxswf$(A) ../lib/libbase$(A) $(LIBS) $(STRIP) $@ -swfrender$(E): swfrender.$(O) ../lib/librfxswf$(A) ../lib/libgfx$(A) ../lib/libbase$(A) ../lib/libgfxswf$(A) - $(L) swfrender.$(O) -o $@ ../lib/librfxswf$(A) ../lib/libgfx$(A) ../lib/libgfxswf$(A) ../lib/libbase$(A) $(LIBS) +swfrender$(E): swfrender.$(O) ../lib/libgfxswf$(A) ../lib/librfxswf$(A) ../lib/libgfx$(A) ../lib/libbase$(A) + $(L) swfrender.$(O) -o $@ ../lib/libgfxswf$(A) ../lib/librfxswf$(A) ../lib/libgfx$(A) ../lib/libbase$(A) $(LIBS) $(STRIP) $@ PDF2SWF_OBJ=../lib/libgfxswf$(A) ../lib/librfxswf$(A) ../lib/libpdf$(A) ../lib/libgfx$(A) ../lib/libbase$(A) @@ -137,6 +137,7 @@ doc: perl ../parsedoc.pl swfdump.doc perl ../parsedoc.pl swfc.doc perl ../parsedoc.pl as3compile.doc - #perl ../parsedoc.pl swfbytes.doc + perl ../parsedoc.pl swfbytes.doc + perl ../parsedoc.pl pdf2pdf.doc .PHONY: clean doc diff --git a/src/pdf2pdf.1 b/src/pdf2pdf.1 new file mode 100644 index 0000000..a13a721 --- /dev/null +++ b/src/pdf2pdf.1 @@ -0,0 +1,40 @@ +.TH pdf2pdf "1" "August 2009" "pdf2pdf" "swftools" +.SH NAME +pdf2pdf - Runs a pdf through the pdf2swf conversion engine, and writes it back to a pdf. + +.SH Synopsis +.B pdf2pdf + +.SH DESCRIPTION +Runs a pdf through the pdf2swf conversion engine, and writes it +back to a pdf. + +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR + Print short help message and exit +.TP +\fB\-v\fR, \fB\-\-verbose\fR + Be verbose. Use more than one -v for greater effect. +.TP +\fB\-p\fR, \fB\-\-pages\fR \fIpages\fR + Pages to convert +.TP +\fB\-X\fR, \fB\-\-width\fR \fIwidth\fR + Make sure the output pdf is \fIwidth\fR pixels wide +.TP +\fB\-Y\fR, \fB\-\-height\fR \fIheight\fR + Make sure the output pdf is \fIheight\fR pixels high +.TP +\fB\-s\fR, \fB\-\-set\fR \fIparameter>= +.TP +\fB\-o\fR, \fB\-\-output\fR \fIfilename\fR + Write output to file \fIfilename\fR. (If not given, the output will go + to a file with the extension .print.pdf) +.TP +\fB\-V\fR, \fB\-\-version\fR + Print version info and exit +.SH AUTHOR + +Matthias Kramm diff --git a/src/pdf2pdf.c b/src/pdf2pdf.c index faa8d1b..1803029 100644 --- a/src/pdf2pdf.c +++ b/src/pdf2pdf.c @@ -125,13 +125,16 @@ int args_callback_option(char*name,char*val) { return 0; } -struct options_t options[] = -{{"o","output"}, - {"q","quiet"}, - {"V","version"}, - {"s","set"}, - {"p","pages"}, - {0,0} +static struct options_t options[] = { +{"h", "help"}, +{"v", "verbose"}, +{"p", "pages"}, +{"X", "width"}, +{"Y", "height"}, +{"s", "set"}, +{"o", "output"}, +{"V", "version"}, +{0,0} }; int args_callback_longoption(char*name,char*val) { @@ -168,8 +171,20 @@ int args_callback_command(char*name, char*val) { return 0; } -void args_callback_usage(char*name) +void args_callback_usage(char *name) { + printf("\n"); + printf("Usage: %s \n", name); + printf("\n"); + printf("-h , --help Print short help message and exit\n"); + printf("-v , --verbose Be verbose. Use more than one -v for greater effect.\n"); + printf("-p , --pages Pages to convert\n"); + printf("-X , --width Make sure the output pdf is pixels wide\n"); + printf("-Y , --height Make sure the output pdf is pixels high\n"); + printf("-s , --set = Set to \n"); + printf("-o , --output Write output to file .\n"); + printf("-V , --version Print version info and exit\n"); + printf("\n"); } int main(int argn, char *argv[]) diff --git a/src/pdf2pdf.doc b/src/pdf2pdf.doc new file mode 100644 index 0000000..78e15d8 --- /dev/null +++ b/src/pdf2pdf.doc @@ -0,0 +1,29 @@ +Usage: %s + +Runs a pdf through the pdf2swf conversion engine, and writes it back to a pdf. + +Runs a pdf through the pdf2swf conversion engine, and writes it +back to a pdf. + +-h, --help + Print short help message and exit +-v, --verbose + Be verbose. Use more than one -v for greater effect. +-p, --pages + Pages to convert +-X, --width + Make sure the output pdf is pixels wide +-Y, --height + Make sure the output pdf is pixels high +-s, --set = + Set to +-o, --output + Write output to file . + Write output to file . (If not given, the output will go + to a file with the extension .print.pdf) +-V, --version + Print version info and exit + +.SH AUTHOR + +Matthias Kramm -- 1.7.10.4