X-Git-Url: http://git.asbjorn.biz/?a=blobdiff_plain;f=lib%2Fpython%2Fgfx.c;h=1d28bc5e64b699260ebc2d771cf16cf88c408f9a;hb=dba45ef979c0f9aaba3a594532006f048d2d5ac0;hp=427b3df5761cd4996eb8fcc3463bb938702b3a83;hpb=9704bfcd1ddcad5250d7d4aeb1d27225f1792b34;p=swftools.git diff --git a/lib/python/gfx.c b/lib/python/gfx.c index 427b3df..1d28bc5 100644 --- a/lib/python/gfx.c +++ b/lib/python/gfx.c @@ -25,13 +25,18 @@ #undef HAVE_STAT #include "../devices/swf.h" #include "../devices/render.h" +#include "../devices/ocr.h" #include "../devices/rescale.h" #include "../devices/text.h" #include "../pdf/pdf.h" +#include "../readers/swf.h" +#include "../readers/image.h" #include "../log.h" #include "../utf8.h" -gfxsource_t*pdfdriver; +static gfxsource_t*pdfdriver = 0; +static gfxsource_t*swfdriver = 0; +static gfxsource_t*imagedriver = 0; staticforward PyTypeObject OutputClass; staticforward PyTypeObject PageClass; @@ -133,11 +138,11 @@ static PyObject* output_endpage(PyObject* _self, PyObject* args, PyObject* kwarg self->output_device->endpage(self->output_device); return PY_NONE; } -PyDoc_STRVAR(output_setParameter_doc, \ -"setParameter(key, value)\n\n" +PyDoc_STRVAR(output_setparameter_doc, \ +"setparameter(key, value)\n\n" "Set a output-device dependent parameter" ); -static PyObject* output_setParameter(PyObject* _self, PyObject* args, PyObject* kwargs) +static PyObject* output_setparameter(PyObject* _self, PyObject* args, PyObject* kwargs) { OutputObject* self = (OutputObject*)_self; static char *kwlist[] = {"key", "value", NULL}; @@ -166,6 +171,27 @@ static PyObject* f_createSWF(PyObject* parent, PyObject* args, PyObject* kwargs) return (PyObject*)self; } +PyDoc_STRVAR(f_createOCR_doc, \ +"OCR()\n\n" +"Creates a device which processes documents using OCR (optical\n" +"character recognition).\n" +"This is handy for e.g. extracting fulltext from PDF documents\n" +"which have broken fonts, and where hence the \"PlainText\"\n" +"device doesn't work.\n" +); +static PyObject* f_createOCR(PyObject* parent, PyObject* args, PyObject* kwargs) +{ + static char *kwlist[] = {NULL}; + if (args && !PyArg_ParseTupleAndKeywords(args, kwargs, "", kwlist)) + return NULL; + OutputObject*self = PyObject_New(OutputObject, &OutputClass); + + self->output_device = malloc(sizeof(gfxdevice_t)); + gfxdevice_ocr_init(self->output_device); + return (PyObject*)self; +} + + PyDoc_STRVAR(f_createImageList_doc, \ "ImageList()\n\n" "Creates a device which renders documents to bitmaps.\n" @@ -415,7 +441,7 @@ static PyMethodDef output_methods[] = {"save", (PyCFunction)output_save, METH_KEYWORDS, output_save_doc}, {"startpage", (PyCFunction)output_startpage, METH_KEYWORDS, output_startpage_doc}, {"endpage", (PyCFunction)output_endpage, METH_KEYWORDS, output_endpage_doc}, - {"setParameter", (PyCFunction)output_setParameter, METH_KEYWORDS, output_setParameter_doc}, + {"setparameter", (PyCFunction)output_setparameter, METH_KEYWORDS, output_setparameter_doc}, {0,0,0,0} }; @@ -657,14 +683,14 @@ static PyObject* doc_getInfo(PyObject* _self, PyObject* args, PyObject* kwargs) return PyString_FromString(s); } -PyDoc_STRVAR(doc_setParameter_doc, -"setParameter(key, value)\n\n" +PyDoc_STRVAR(doc_setparameter_doc, +"setparameter(key, value)\n\n" "Pass a parameter or setting to the document parser. Unlike\n" "the module level setparameter() function, the parameters set\n" -"using setParameter will only be valid for the object itself\n" +"using setparameter will only be valid for the object itself\n" "during its lifetime.\n" ); -static PyObject* doc_setParameter(PyObject* _self, PyObject* args, PyObject* kwargs) +static PyObject* doc_setparameter(PyObject* _self, PyObject* args, PyObject* kwargs) { DocObject* self = (DocObject*)_self; @@ -679,34 +705,63 @@ static PyObject* doc_setParameter(PyObject* _self, PyObject* args, PyObject* kwa PyDoc_STRVAR(f_open_doc, "open(type, filename) -> object\n\n" -"Open a PDF file. The type argument always has to be \"pdf\"\n" -"It returns a doc object which can be used to process the pdf\n" -"contents. E.g.\n" +"Open a PDF, SWF or image file. The type argument should be \"pdf\",\n" +"\"swf\" or \"image\" accordingly. It returns a doc object which can be\n" +"used to process the file contents.\n" +"E.g.\n" " doc = open(\"pdf\", \"document.pdf\")\n" -"If the file is not a PDF file or is encrypted without\n" +" doc = open(\"swf\", \"flashfile.swf\")\n" +" doc = open(\"image\", \"image.png\")\n" +"If the file could not be loaded, or is a encrypted PDF file without\n" "a proper password specified, an exception is being raised.\n" "If the filename argument contains a '|' char, everything behind\n" "the '|' is treated as password used for opening the file.\n" "E.g.\n" " doc = open(\"pdf\", \"document.pdf|mysecretpassword\")\n" +".\n" +"Notice that for image files, the only supported file formats right now\n" +"are jpeg and png.\n" ); static PyObject* f_open(PyObject* parent, PyObject* args, PyObject* kwargs) { static char *kwlist[] = {"type", "filename", NULL}; - char*filename; - char*type; + char*filename=0; + char*type=0; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ss", kwlist, &type, &filename)) { static char *kwlist2[] = {"filename", NULL}; - type = "pdf"; + type = 0; PyErr_Clear(); if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist2, &filename)) return NULL; } DocObject*self = PyObject_New(DocObject, &DocClass); + + if(!type) { //autodetect + type = "pdf"; //default + int l = strlen(filename); + if(l>4) { + if(filename[l-4]=='.') { + if(strchr("pP", filename[l-3]) && strchr("dD", filename[l-2]) && strchr("fF", filename[l-1])) + type = "pdf"; + if(strchr("jJ", filename[l-3]) && strchr("pP", filename[l-2]) && strchr("gG", filename[l-1])) + type = "image"; + if(strchr("pP", filename[l-3]) && strchr("nN", filename[l-2]) && strchr("gG", filename[l-1])) + type = "image"; + if(strchr("sS", filename[l-3]) && strchr("wW", filename[l-2]) && strchr("fF", filename[l-1])) + type = "swf"; + } else if(filename[l-5]=='.') { + type = "image"; + } + } + } if(!strcmp(type,"pdf")) self->doc = pdfdriver->open(pdfdriver,filename); + else if(!strcmp(type, "image") || !strcmp(type, "img")) + self->doc = imagedriver->open(imagedriver, filename); + else if(!strcmp(type, "swf") || !strcmp(type, "SWF")) + self->doc = swfdriver->open(imagedriver, filename); else return PY_ERROR("Unknown type %s", type); @@ -723,7 +778,7 @@ static PyMethodDef doc_methods[] = /* PDF functions */ {"getPage", (PyCFunction)doc_getPage, METH_KEYWORDS, doc_getPage_doc}, {"getInfo", (PyCFunction)doc_getInfo, METH_KEYWORDS, doc_getInfo_doc}, - {"setParameter", (PyCFunction)doc_setParameter, METH_KEYWORDS, doc_setParameter_doc}, + {"setparameter", (PyCFunction)doc_setparameter, METH_KEYWORDS, doc_setparameter_doc}, {0,0,0,0} }; @@ -807,7 +862,8 @@ PyDoc_STRVAR(doc_doc, "A Doc object is used for storing a document (like a PDF).\n" "doc.pages contains the number of pages in the document,\n" "and doc.filename the name of the file the document was\n" -"created (loaded) from\n" +"created (loaded) from. If the document was created from\n" +"an image file, the number of pages is always 1\n" ); static PyTypeObject DocClass = { @@ -909,12 +965,12 @@ static PyMethodDef pdf2swf_methods[] = {"open", (PyCFunction)f_open, METH_KEYWORDS, f_open_doc}, {"addfont", (PyCFunction)f_addfont, METH_KEYWORDS, f_addfont_doc}, {"addfontdir", (PyCFunction)f_addfontdir, METH_KEYWORDS, f_addfontdir_doc}, - {"setoption", (PyCFunction)f_setparameter, METH_KEYWORDS, f_setparameter_doc}, // for backwards-compatibility {"setparameter", (PyCFunction)f_setparameter, METH_KEYWORDS, f_setparameter_doc}, {"verbose", (PyCFunction)f_verbose, METH_KEYWORDS, f_verbose_doc}, /* devices */ {"SWF", (PyCFunction)f_createSWF, METH_KEYWORDS, f_createSWF_doc}, + {"OCR", (PyCFunction)f_createOCR, METH_KEYWORDS, f_createOCR_doc}, {"ImageList", (PyCFunction)f_createImageList, METH_KEYWORDS, f_createImageList_doc}, {"PlainText", (PyCFunction)f_createPlainText, METH_KEYWORDS, f_createPlainText_doc}, {"PassThrough", (PyCFunction)f_createPassThrough, METH_KEYWORDS, f_createPassThrough_doc}, @@ -930,7 +986,7 @@ PyDoc_STRVAR(gfx_doc, \ "The latter functionality is similar to what is offered by swftools'\n" "(http://www.swftools.org) pdf2swf utility, however more powerful-\n" "You can also create individual SWF files from single pages of the PDF\n" -"or combine more than one page into a bigger PDF.\n" +"or mix pages from different PDF files.\n" ); void initgfx(void) @@ -941,6 +997,8 @@ void initgfx(void) DocClass.ob_type = &PyType_Type; pdfdriver = gfxsource_pdf_create(); + swfdriver = gfxsource_swf_create(); + imagedriver = gfxsource_image_create(); PyObject*module = Py_InitModule3("gfx", pdf2swf_methods, gfx_doc); PyObject*module_dict = PyModule_GetDict(module);