X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fgocr%2Fbox.c;fp=lib%2Fgocr%2Fbox.c;h=f0cc989f8201aa185cf38a81924ab266305e8160;hb=8154e11e1c06aefe18c16b33f2b12d6de21273a4;hp=0000000000000000000000000000000000000000;hpb=e8fe2f290123fc66181709a8a5263ad9e91c6939;p=swftools.git diff --git a/lib/gocr/box.c b/lib/gocr/box.c new file mode 100644 index 0000000..f0cc989 --- /dev/null +++ b/lib/gocr/box.c @@ -0,0 +1,369 @@ +/* +This is a Optical-Character-Recognition program +Copyright (C) 2000-2007 Joerg Schulenburg + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + see README for EMAIL address + + */ + +#include +#include +#include +#include +/* do we need #include ? conflicts with INFINITY in unicode.h */ +#include "gocr.h" +#include "pgm2asc.h" + +/* for sorting letters by position on the image +/ ToDo: - use function same line like this or include lines.m1 etc. */ +int box_gt(struct box *box1, struct box *box2) { + // box1 after box2 ? + if (box1->line > box2->line) + return 1; + if (box1->line < box2->line) + return 0; + if (box1->x0 > box2->x1) // before + return 1; + if (box1->x1 < box2->x0) // before + return 0; + if (box1->x0 > box2->x0) // before, overlapping! + return 1; + + return 0; +} + +/* --- copy part of pix p into new pix b ---- len=10000 + * Returns: 0 on success, 1 on error. + * naming it as copybox isnt very clever, because it dont have to do with the + * char boxes (struct box) + */ +int copybox (pix * p, int x0, int y0, int dx, int dy, pix * b, int len) { + int x, y; + + /* test boundaries */ + if (b->p == NULL || dx < 0 || dy < 0 || dx * dy > len) { + fprintf(stderr, " error-copybox x=%5d %5d d=%5d %5d\n", x0, y0, dx, dy); + return 1; + } + + b->x = dx; + b->y = dy; + b->bpp = 1; +#ifdef FASTER_INCOMPLETE + for (y = 0; y < dy; y++) + memcpy(&pixel_atp(b, 0, y), &pixel_atp(p, x0, y + y0 ), dx); + // and unmark pixels +#else + for (y = 0; y < dy; y++) + for (x = 0; x < dx; x++) + pixel_atp(b, x, y) = getpixel(p, x + x0, y + y0); +#endif + + return 0; +} + +/* reset table of alternative chars (and free memory) */ +int reset_box_ac(struct box *box){ + int i; + for (i=0; inum_ac; i++) + if (box->tas[i]) { + /* fprintf(stderr,"DBG free_s[%d] %p %s\n",i,box->tas[i],box->tas[i]); */ + free(box->tas[i]); + box->tas[i]=0; /* prevent double freeing */ + } + box->num_ac=0; /* mark as freed */ + return 0; +} + +/* ini or copy a box: get memory for box and initialize the memory */ +struct box *malloc_box (struct box *inibox) { + struct box *buf; + int i; + + buf = (struct box *) malloc(sizeof(struct box)); + if (!buf) + return NULL; + if (inibox) { + memcpy(buf, inibox, sizeof(struct box)); + /* only pointer are copied, we want to copy the contents too */ + for (i=0;inum_ac;i++) { + if (inibox->tas[i]) { + buf->tas[i]=(char *)malloc(strlen(inibox->tas[i])+1); + memcpy(buf->tas[i], inibox->tas[i], strlen(inibox->tas[i])+1); + } + } + } + else { /* ToDo: init it */ + buf->num_ac=0; + buf->num_frames=0; + } + /* fprintf(stderr,"\nDBG ini_box %p",buf); */ + return buf; +} + +/* free memory of box */ +int free_box (struct box *box) { + if (!box) return 0; + /* fprintf(stderr,"DBG free_box %p\n",box); out_x(box); */ + reset_box_ac(box); /* free alternative char table */ + free(box); /* free the box memory */ + return 0; +} + +/* simplify the vectorgraph, + * but what is the best way? + * a) melting two neighbouring vectors with nearly same direction? + * (nearest angle to pi) + * b) melting three neigbours with smallest area? + * ToDo: + * mode = 0 - only lossless + * mode = 1 - reduce one vector, smallest possible loss + * mode = 2 - remove jitter (todo, or somewhere else) + * ToDo: include also loop around (last - first element) + * ToDo: reduce by 10..50% + */ +int reduce_vectors ( struct box *box1, int mode ) { + int i1, i2, nx, ny, mx, my, len, + minlen=1024, /* minlength of to neighbouring vectors */ + besti1=0, /* frame for best reduction */ + besti2=2; /* vector replacing its predecessor */ + double sprod, maxsprod=-1; + if (mode!=1) fprintf(stderr,"ERR not supported yet, ToDo\n"); + for (i2=1,i1=0; i1num_frames; i1++) { /* every frame */ + for (;i2num_frame_vectors[i1]-1; i2++) { /* every vector */ + /* predecessor n */ + nx = box1->frame_vector[i2-0][0] - box1->frame_vector[i2-1][0]; + ny = box1->frame_vector[i2-0][1] - box1->frame_vector[i2-1][1]; + /* successor m */ + mx = box1->frame_vector[i2+1][0] - box1->frame_vector[i2-0][0]; + my = box1->frame_vector[i2+1][1] - box1->frame_vector[i2-0][1]; + /* angle is w = a*b/(|a|*|b|) = 1 means parallel */ + /* normalized: minimize w^2 = (a*b/(|a|*|b|)-1)^2 */ + /* -1=90grd, 0=0grd, -2=180grd */ + sprod = /* fabs */(abs(nx*mx+ny*my)*(nx*mx+ny*my) + /(1.*(nx*nx+ny*ny)*(mx*mx+my*my))-1); + /* we dont include math.h because INFINITY conflicts to unicode,h */ + if (sprod<0) sprod=-sprod; + len = (mx*mx+my*my)*(nx*nx+ny*ny); /* sum lengths^2 */ +// ..c ###c ... .. ... +// .b. len=2+2 #b.. len=2+5 #bc len=1+2 bc len=1+1 b#a len=4+5 +// a.. spr=0 a... spr=1/10 a.. spr=1/4 a. spr=1 ##c spr=9/5 +// + if ( len* sprod* sprod* sprod* sprod + num_frames>0) + for (i2=besti2; i2num_frame_vectors[ box1->num_frames-1 ]-1; i2++) { + box1->frame_vector[i2][0]=box1->frame_vector[i2+1][0]; + box1->frame_vector[i2][1]=box1->frame_vector[i2+1][1]; + } + for (i1=besti1; i1num_frames; i1++) + box1->num_frame_vectors[i1]--; +// fprintf(stderr,"\nDBG_reduce_vectors i= %d nv= %d sprod=%f len2=%d\n# ...", +// besti2,box1->num_frame_vectors[ box1->num_frames-1 ],maxsprod,minlen); +// out_x(box1); + return 0; +} + +/* add the contents of box2 to box1 + * especially add vectors of box2 to box1 + */ +int merge_boxes( struct box *box1, struct box *box2 ) { + int i1, i2, i3, i4; + struct box tmpbox, *bsmaller, *bbigger; /* for mixing and sorting */ + /* DEBUG, use valgrind to check uninitialized memory */ +#if 0 + fprintf(stderr,"\nDBG merge_boxes_input:"); out_x(box1); out_x(box2); +#endif + /* pair distance is to expendable, taking borders is easier */ + if ((box2->x1 - box2->x0)*(box2->y1 - box2->y0) + >(box1->x1 - box1->x0)*(box1->y1 - box1->y0)) { + bbigger=box2; bsmaller=box1; } + else { + bbigger=box1; bsmaller=box2; } + /* ToDo: does not work if a third box is added */ + if (box2->y0>box1->y1 || box2->y1y0 + || box2->x0>box1->x1 || box2->x1x0) { + box1->num_boxes += box2->num_boxes; /* num seperate objects 2=ij */ + } else { + if (box2->num_boxes>box1->num_boxes) box1->num_boxes=box2->num_boxes; + box1->num_subboxes += box2->num_subboxes+1; /* num holes 1=abdepq 2=B */ + } + box1->dots += box2->dots; /* num i-dots */ + if ( box2->x0 < box1->x0 ) box1->x0 = box2->x0; + if ( box2->x1 > box1->x1 ) box1->x1 = box2->x1; + if ( box2->y0 < box1->y0 ) box1->y0 = box2->y0; + if ( box2->y1 > box1->y1 ) box1->y1 = box2->y1; + i1 = i2 = 0; + if (bbigger->num_frames) + i1 = bbigger->num_frame_vectors[ bbigger->num_frames - 1 ]; + if (bsmaller->num_frames) + i2 = bsmaller->num_frame_vectors[ bsmaller->num_frames - 1 ]; + while (i1+i2 > MaxFrameVectors) { + if (i1>i2) { reduce_vectors( bbigger, 1 ); i1--; } + else { reduce_vectors( bsmaller, 1 ); i2--; } + } + /* if i1+i2>MaxFrameVectors simplify the vectorgraph */ + /* if sum num_frames>MaxNumFrames through shortest graph away and warn */ + /* first copy the bigger box */ + memcpy(&tmpbox, bbigger, sizeof(struct box)); + /* attach the smaller box */ + for (i4=i3=0; i3num_frames; i3++) { + if (tmpbox.num_frames>=MaxNumFrames) break; + + for (; i4num_frame_vectors[i3]; i4++) { + memcpy(tmpbox.frame_vector[i1], + bsmaller->frame_vector[i4],2*sizeof(int)); + i1++; + } + tmpbox.num_frame_vectors[ tmpbox.num_frames ] = i1; + tmpbox.frame_vol[ tmpbox.num_frames ] = bsmaller->frame_vol[ i3 ]; + tmpbox.frame_per[ tmpbox.num_frames ] = bsmaller->frame_per[ i3 ]; + tmpbox.num_frames++; + if (tmpbox.num_frames>=MaxNumFrames) { + if (JOB->cfg.verbose) + fprintf(stderr,"\nDBG merge_boxes MaxNumFrames reached"); + break; + } + } + /* copy tmpbox to destination */ + box1->num_frames = tmpbox.num_frames; + memcpy(box1->num_frame_vectors, + tmpbox.num_frame_vectors,sizeof(int)*MaxNumFrames); + memcpy(box1->frame_vol, + tmpbox.frame_vol,sizeof(int)*MaxNumFrames); + memcpy(box1->frame_per, + tmpbox.frame_per,sizeof(int)*MaxNumFrames); + memcpy(box1->frame_vector, + tmpbox.frame_vector,sizeof(int)*2*MaxFrameVectors); +#if 0 + if (JOB->cfg.verbose) + fprintf(stderr,"\nDBG merge_boxes_result:"); out_x(box1); +#endif + return 0; +} + +/* used for division of glued chars + * after a box is splitted into 2, where vectors are copied to both, + * vectors outside the new box are cutted and thrown away, + * later replaced by + * - 1st remove outside vectors with outside neighbours (complete frames?) + * add vector on outside vector with inside neighbours + * care about connections through box between outside vectors + * - 2nd reduce outside crossings (inclusive splitting frames if necessary) + * depending on direction (rotation) of outside connections + * - 3th shift outside vectors to crossing points + * - split add this points, connect only in-out...out-in, + * - cutting can result in more objects + * ToDo: dont connect --1---2--------3----4-- new-y1 (inside above not drawn) + * \ \->>>>-/ / outside + * \----<<<<-----/ old-y1 + * |======| subtractable? + * + * only connect --1---2--------3----4-- new-y1 + * \>>/ \>>>/ old-y1 outside + * + * ToDo: new vol, per + */ +int cut_box( struct box *box1) { + int i1, i2, i3, i4, x, y, lx, ly, dbg=0; + if (JOB->cfg.verbose) dbg=1; // debug level, enlarge to get more output + if (dbg) fprintf(stderr,"\n cut box x= %3d %3d", box1->x0, box1->y0); + /* check if complete frames are outside the box */ + for (i1=0; i1num_frames; i1++){ + if (dbg>2) fprintf(stderr,"\n checking frame %d outside", i1); + i2 = ((i1)?box1->num_frame_vectors[ i1-1 ]:0); // this frame + i3 = box1->num_frame_vectors[ i1 ]; // next frame + for (i4=i2; i4 < i3; i4++) { + x = box1->frame_vector[i4][0]; + y = box1->frame_vector[i4][1]; + /* break, if one vector is lying inside */ + if (x>=box1->x0 && x<=box1->x1 && y>=box1->y0 && y<=box1->y1) break; + } + if (i4==i3) { /* all vectors outside */ + if (dbg>1) fprintf(stderr,"\n remove frame %d",i1); + /* replace all frames i1,i1+1,... by i1+1,i1+2,... */ + /* replace (x,y) pairs first */ + for (i4=i2; i4num_frame_vectors[ box1->num_frames-1 ]-(i3-i2); + i4++) { + box1->frame_vector[i4][0] = box1->frame_vector[i4+i3-i2][0]; + box1->frame_vector[i4][1] = box1->frame_vector[i4+i3-i2][1]; + } + /* replace the num_frame_vectors */ + for (i4=i1; i4num_frames-1; i4++) + box1->num_frame_vectors[ i4 ] = + box1->num_frame_vectors[ i4+1 ]-(i3-i2); + box1->num_frames--; i1--; + } + } + /* remove vectors outside the box */ + i3=0; + for (i1=0; i1num_frames; i1++){ + if (dbg>2) fprintf(stderr,"\n check cutting vectors on frame %d", i1); + x = box1->frame_vector[0][0]; /* last x */ + y = box1->frame_vector[0][1]; /* last y */ + /* ToDo: start inside to get a closed object */ + if (xx0 || x>box1->x1 || yy0 || y>box1->y1) i3=1; + for (i2=0; i2num_frame_vectors[ i1 ]; i2++) { + lx = x; /* last x */ + ly = y; /* last y */ + x = box1->frame_vector[i2][0]; + y = box1->frame_vector[i2][1]; + // fprintf(stderr,"DBG LEV3 i2= %3d xy= %3d %3d",i2,x,y); + /* check if outside */ + if (xx0 || x>box1->x1 || yy0 || y>box1->y1) { + /* replace by nearest point at border, ToDo: better crossingpoint */ + if (i3==0) { /* wrong if it starts outside */ + if (x < box1->x0) x = box1->frame_vector[i2][0] = box1->x0; + if (x > box1->x1) x = box1->frame_vector[i2][0] = box1->x1; + if (y < box1->y0) y = box1->frame_vector[i2][1] = box1->y0; + if (y > box1->y1) y = box1->frame_vector[i2][1] = box1->y1; + } else { + /* remove vector */ + if (dbg>1) fprintf(stderr,"\n remove vector[%d][%d] x= %2d %2d",i1,i2,x-box1->x0,y-box1->y0); + for (i4=i2;i4num_frame_vectors[ box1->num_frames-1 ]-1;i4++) { + box1->frame_vector[i4][0] = box1->frame_vector[i4+1][0]; + box1->frame_vector[i4][1] = box1->frame_vector[i4+1][1]; + } + for (i4=i1; i4num_frames; i4++) + box1->num_frame_vectors[ i4 ]--; + i2--; /* next element is shiftet now, setting back the counter */ + } + i3++; + // fprintf(stderr," outside i3= %d\n",i3); + continue; + } + // fprintf(stderr," inside i3= %d",i3); + if (i3) { /* ToDo: better crossing point last vector and border */ + if (lx < box1->x0) lx = box1->x0; + if (lx > box1->x1) lx = box1->x1; + if (ly < box1->y0) ly = box1->y0; + if (ly > box1->y1) ly = box1->y1; + x = box1->frame_vector[i2][0] = lx; + y = box1->frame_vector[i2][1] = ly; + i3 = 0; + } + // fprintf(stderr," xy= %3d %3d\n",x,y); + } + } + //if (dbg>2) { fprintf(stderr,"\nDBG cut_box_result:"); out_x(box1); } + return 0; +} +