made art_svp_merge public

[swftools.git] / lib / python / gfx.c
diff --git a/lib/python/gfx.c b/lib/python/gfx.c

index 239822d..1d28bc5 100644 (file)
--- a/lib/python/gfx.c
+++ b/lib/python/gfx.c
@@ -25,13 +25,18 @@
  #undef HAVE_STAT
  #include "../devices/swf.h"
  #include "../devices/render.h"
+#include "../devices/ocr.h"
  #include "../devices/rescale.h"
  #include "../devices/text.h"
  #include "../pdf/pdf.h"
+#include "../readers/swf.h"
+#include "../readers/image.h"
  #include "../log.h"
  #include "../utf8.h"
  
-gfxsource_t*pdfdriver;
+static gfxsource_t*pdfdriver = 0;
+static gfxsource_t*swfdriver = 0;
+static gfxsource_t*imagedriver = 0;
  
  staticforward PyTypeObject OutputClass;
  staticforward PyTypeObject PageClass;
@@ -75,7 +80,10 @@ PyDoc_STRVAR(output_save_doc, \
  "Saves the contents of an output device to a file\n"
  "Depending on what the output device is, the contents\n"
  "of the file may be plain text, an image, an SWF file,\n"
-"etc.\n");
+"etc.\n"
+"For the ImageList device, several files (named\n"
+"filename.1.png, filename.2.png etc.) might be created)\n"
+);
  static PyObject* output_save(PyObject* _self, PyObject* args, PyObject* kwargs)
  {
      OutputObject* self = (OutputObject*)_self;
@@ -130,12 +138,26 @@ static PyObject* output_endpage(PyObject* _self, PyObject* args, PyObject* kwarg
      self->output_device->endpage(self->output_device);
      return PY_NONE;
  }
+PyDoc_STRVAR(output_setparameter_doc, \
+"setparameter(key, value)\n\n"
+"Set a output-device dependent parameter"
+);
+static PyObject* output_setparameter(PyObject* _self, PyObject* args, PyObject* kwargs)
+{
+    OutputObject* self = (OutputObject*)_self;
+    static char *kwlist[] = {"key", "value", NULL};
+    char*key=0,*value=0;
+    if (args && !PyArg_ParseTupleAndKeywords(args, kwargs, "ss", kwlist, &key, &value))
+       return NULL;
+    self->output_device->setparameter(self->output_device, key, value);
+    return PY_NONE;
+}
  PyDoc_STRVAR(f_createSWF_doc, \
  "SWF()\n\n"
  "Creates a device which renders documents to SWF (Flash) files.\n"
  "Depending on the way the document parser behaves (see the poly2bitmap\n"
  "and bitmap parameters), the resulting SWF might use vector operations\n"
-"and Flash Texts to display the document, or just a single bitmap\n"
+"and Flash Texts to display the document, or just a single bitmap.\n"
  );
  static PyObject* f_createSWF(PyObject* parent, PyObject* args, PyObject* kwargs)
  {
@@ -149,13 +171,33 @@ static PyObject* f_createSWF(PyObject* parent, PyObject* args, PyObject* kwargs)
      return (PyObject*)self;
  }
  
+PyDoc_STRVAR(f_createOCR_doc, \
+"OCR()\n\n"
+"Creates a device which processes documents using OCR (optical\n"
+"character recognition).\n"
+"This is handy for e.g. extracting fulltext from PDF documents\n"
+"which have broken fonts, and where hence the \"PlainText\"\n"
+"device doesn't work.\n"
+);
+static PyObject* f_createOCR(PyObject* parent, PyObject* args, PyObject* kwargs)
+{
+    static char *kwlist[] = {NULL};
+    if (args && !PyArg_ParseTupleAndKeywords(args, kwargs, "", kwlist))
+       return NULL;
+    OutputObject*self = PyObject_New(OutputObject, &OutputClass);
+    
+    self->output_device = malloc(sizeof(gfxdevice_t));
+    gfxdevice_ocr_init(self->output_device);
+    return (PyObject*)self;
+}
+
+
  PyDoc_STRVAR(f_createImageList_doc, \
  "ImageList()\n\n"
  "Creates a device which renders documents to bitmaps.\n"
  "Each page that is rendered will create new bitmap.\n"
-"As, right now, the only way to access the bitmaps is\n"
-"by using the save() function on the imagelist, you can\n"
-"currently only retrieve the first bitmap/page.\n"
+"Using save(), you can save the images to a number\n"
+"of files\n"
  );
  static PyObject* f_createImageList(PyObject* parent, PyObject* args, PyObject* kwargs)
  {
@@ -350,7 +392,6 @@ PyDoc_STRVAR(f_createPassThrough_doc, \
  "to page.render().\n"
  "device needs to be a class implementing at least the following functions:\n\n"
  "setparameter(key,value)\n"
-"startpage(width,height)\n"
  "startclip(outline)\n"
  "endclip()\n"
  "stroke(outline, width, color, capstyle, jointstyle, miterLimit)\n"
@@ -360,9 +401,8 @@ PyDoc_STRVAR(f_createPassThrough_doc, \
  "addfont(font)\n"
  "drawchar(font, glyph, color, matrix)\n"
  "drawlink(outline, url)\n"
-"finish()\n\n"
  "If any of these functions are not defined, a error message will be printed,\n"
-"however the rendering process will be continued.\n"
+"however the rendering process will *not* be aborted.\n"
  );
  static PyObject* f_createPassThrough(PyObject* parent, PyObject* args, PyObject* kwargs)
  {
@@ -401,6 +441,7 @@ static PyMethodDef output_methods[] =
      {"save", (PyCFunction)output_save, METH_KEYWORDS, output_save_doc},
      {"startpage", (PyCFunction)output_startpage, METH_KEYWORDS, output_startpage_doc},
      {"endpage", (PyCFunction)output_endpage, METH_KEYWORDS, output_endpage_doc},
+    {"setparameter", (PyCFunction)output_setparameter, METH_KEYWORDS, output_setparameter_doc},
      {0,0,0,0}
  };
  
@@ -497,7 +538,7 @@ static PyObject* page_render(PyObject* _self, PyObject* args, PyObject* kwargs)
  PyDoc_STRVAR(page_asImage_doc, \
  "asImage(width, height)\n\n"
  "Creates a bitmap from a page. The bitmap will be returned as a string\n"
-"containing RGB triplets. The bitmap will have the specified width and\n"
+"containing RGB triplets. The bitmap will be rescaled to the specified width and\n"
  "height. The aspect ratio of width and height doesn't need to be the same\n"
  "as the page.\n"
  );
@@ -589,14 +630,13 @@ static int page_print(PyObject * _self, FILE *fi, int flags)
  
  PyDoc_STRVAR(doc_getPage_doc,
  "getPage(nr)\n\n"
-"\n"
  "Get one page from a document file. The nr parameter specifies\n"
  "which page to retrieve. Counting starts at 1, so the first page\n"
  "can be retrieved by\n"
  "    page = doc.getPage(1)\n"
  ".\n"
  "You can find out how many pages a document contains by querying\n"
-"it's pages field (doc.pages)\n"
+"its pages field (doc.pages)\n"
  );
  static PyObject* doc_getPage(PyObject* _self, PyObject* args, PyObject* kwargs)
  {
@@ -621,7 +661,6 @@ static PyObject* doc_getPage(PyObject* _self, PyObject* args, PyObject* kwargs)
  
  PyDoc_STRVAR(doc_getInfo_doc,
  "getInfo(key)\n\n"
-"\n"
  "Retrieve some information about a document. For PDF files, key\n"
  "can have the following values:\n\n"
  "\"title\", \"subject\", \"keywords\", \"author\", \"creator\", \"producer\",\n"
@@ -644,15 +683,14 @@ static PyObject* doc_getInfo(PyObject* _self, PyObject* args, PyObject* kwargs)
      return PyString_FromString(s);
  }
  
-PyDoc_STRVAR(doc_setParameter_doc,
-"setParameter(key, value)\n\n"
-"\n"
+PyDoc_STRVAR(doc_setparameter_doc,
+"setparameter(key, value)\n\n"
  "Pass a parameter or setting to the document parser. Unlike\n"
-"the module level setoption() function, the parameters set\n"
-"using setParameter will only be valid for the object itself\n"
+"the module level setparameter() function, the parameters set\n"
+"using setparameter will only be valid for the object itself\n"
  "during its lifetime.\n"
  );
-static PyObject* doc_setParameter(PyObject* _self, PyObject* args, PyObject* kwargs)
+static PyObject* doc_setparameter(PyObject* _self, PyObject* args, PyObject* kwargs)
  {
      DocObject* self = (DocObject*)_self;
  
@@ -667,33 +705,63 @@ static PyObject* doc_setParameter(PyObject* _self, PyObject* args, PyObject* kwa
  
  PyDoc_STRVAR(f_open_doc,
  "open(type, filename) -> object\n\n"
-"Open a PDF file. The type argument always has to be \"pdf\"\n"
-"It returns a doc object which can be used to process the pdf\n"
-"contents. E.g.\n"
+"Open a PDF, SWF or image file. The type argument should be \"pdf\",\n"
+"\"swf\" or \"image\" accordingly. It returns a doc object which can be\n"
+"used to process the file contents.\n"
+"E.g.\n"
  "    doc = open(\"pdf\", \"document.pdf\")\n"
-"If the file is not a PDF file or is encrypted without\n"
+"    doc = open(\"swf\", \"flashfile.swf\")\n"
+"    doc = open(\"image\", \"image.png\")\n"
+"If the file could not be loaded, or is a encrypted PDF file without\n"
  "a proper password specified, an exception is being raised.\n"
  "If the filename argument contains a '|' char, everything behind\n"
  "the '|' is treated as password used for opening the file.\n"
  "E.g.\n"
  "    doc = open(\"pdf\", \"document.pdf|mysecretpassword\")\n"
+".\n"
+"Notice that for image files, the only supported file formats right now\n"
+"are jpeg and png.\n"
  );
  static PyObject* f_open(PyObject* parent, PyObject* args, PyObject* kwargs)
  {
      static char *kwlist[] = {"type", "filename", NULL};
-    char*filename;
-    char*type;
+    char*filename=0;
+    char*type=0;
      if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ss", kwlist, &type, &filename)) {
-        type = "pdf";
+       static char *kwlist2[] = {"filename", NULL};
+        type = 0;
         PyErr_Clear();
-       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &filename))
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist2, &filename))
             return NULL;
      }
  
      DocObject*self = PyObject_New(DocObject, &DocClass);
+
+    if(!type) { //autodetect
+       type = "pdf"; //default
+       int l = strlen(filename);
+       if(l>4) {
+           if(filename[l-4]=='.') {
+               if(strchr("pP", filename[l-3]) && strchr("dD", filename[l-2]) && strchr("fF", filename[l-1]))
+                   type = "pdf";
+               if(strchr("jJ", filename[l-3]) && strchr("pP", filename[l-2]) && strchr("gG", filename[l-1]))
+                   type = "image";
+               if(strchr("pP", filename[l-3]) && strchr("nN", filename[l-2]) && strchr("gG", filename[l-1]))
+                   type = "image";
+               if(strchr("sS", filename[l-3]) && strchr("wW", filename[l-2]) && strchr("fF", filename[l-1]))
+                   type = "swf";
+           } else if(filename[l-5]=='.') {
+               type = "image";
+           }
+       }
+    }
     
      if(!strcmp(type,"pdf"))
         self->doc = pdfdriver->open(pdfdriver,filename);
+    else if(!strcmp(type, "image") || !strcmp(type, "img"))  
+       self->doc = imagedriver->open(imagedriver, filename);
+    else if(!strcmp(type, "swf") || !strcmp(type, "SWF"))
+       self->doc = swfdriver->open(imagedriver, filename);
      else
         return PY_ERROR("Unknown type %s", type);
  
@@ -710,7 +778,7 @@ static PyMethodDef doc_methods[] =
      /* PDF functions */
      {"getPage", (PyCFunction)doc_getPage, METH_KEYWORDS, doc_getPage_doc},
      {"getInfo", (PyCFunction)doc_getInfo, METH_KEYWORDS, doc_getInfo_doc},
-    {"setParameter", (PyCFunction)doc_setParameter, METH_KEYWORDS, doc_setParameter_doc},
+    {"setparameter", (PyCFunction)doc_setparameter, METH_KEYWORDS, doc_setparameter_doc},
      {0,0,0,0}
  };
  
@@ -754,7 +822,7 @@ PyDoc_STRVAR(output_doc,
  "object directly (i.e., from a class), however you can\n"
  "use a PassThrough() device to pass things over to Python.\n"
  "Examples for classes implementing the Output class are: \n"
-"ImageList, SWF, PlainText, PassThrough\n"
+"ImageList, SWF, PlainText and PassThrough.\n"
  );
  static PyTypeObject OutputClass =
  {
@@ -794,7 +862,8 @@ PyDoc_STRVAR(doc_doc,
  "A Doc object is used for storing a document (like a PDF).\n"
  "doc.pages contains the number of pages in the document,\n"
  "and doc.filename the name of the file the document was\n"
-"created (loaded) from\n"
+"created (loaded) from. If the document was created from\n"
+"an image file, the number of pages is always 1\n"
  );
  static PyTypeObject DocClass =
  {
@@ -813,19 +882,18 @@ static PyTypeObject DocClass =
  
  //=====================================================================
  
-PyDoc_STRVAR(f_setoption_doc, \
-"setoption(key,value)\n\n"
-"\n"
+PyDoc_STRVAR(f_setparameter_doc, \
+"setparameter(key,value)\n\n"
  "Set a parameter in the gfx module (which might affect the PDF\n"
  "parser or any of the rendering backends). This is a parameter\n"
-"which would usually be passed with the \"-s\" option to pdf2swf\n"
+"which would usually be passed with the \"-s\" option to pdf2swf.\n"
  "For a list of all parameters, see the output of\n"
  "    pdf2swf -s help\n"
  "and\n"
  "    pdf2swf somefile.pdf -s help\n"
  ".\n"
  );
-static PyObject* f_setoption(PyObject* self, PyObject* args, PyObject* kwargs)
+static PyObject* f_setparameter(PyObject* self, PyObject* args, PyObject* kwargs)
  {
      static char *kwlist[] = {"key", "value", NULL};
      char*key=0,*value=0;
@@ -897,11 +965,12 @@ static PyMethodDef pdf2swf_methods[] =
      {"open", (PyCFunction)f_open, METH_KEYWORDS, f_open_doc},
      {"addfont", (PyCFunction)f_addfont, METH_KEYWORDS, f_addfont_doc},
      {"addfontdir", (PyCFunction)f_addfontdir, METH_KEYWORDS, f_addfontdir_doc},
-    {"setoption", (PyCFunction)f_setoption, METH_KEYWORDS, f_setoption_doc},
+    {"setparameter", (PyCFunction)f_setparameter, METH_KEYWORDS, f_setparameter_doc},
      {"verbose", (PyCFunction)f_verbose, METH_KEYWORDS, f_verbose_doc},
  
      /* devices */
      {"SWF", (PyCFunction)f_createSWF, METH_KEYWORDS, f_createSWF_doc},
+    {"OCR", (PyCFunction)f_createOCR, METH_KEYWORDS, f_createOCR_doc},
      {"ImageList", (PyCFunction)f_createImageList, METH_KEYWORDS, f_createImageList_doc},
      {"PlainText", (PyCFunction)f_createPlainText, METH_KEYWORDS, f_createPlainText_doc},
      {"PassThrough", (PyCFunction)f_createPassThrough, METH_KEYWORDS, f_createPassThrough_doc},
@@ -917,7 +986,7 @@ PyDoc_STRVAR(gfx_doc, \
  "The latter functionality is similar to what is offered by swftools'\n" 
  "(http://www.swftools.org) pdf2swf utility, however more powerful-\n" 
  "You can also create individual SWF files from single pages of the PDF\n" 
-"or combine more than one page into a bigger PDF.\n"
+"or mix pages from different PDF files.\n"
  );
  
  void initgfx(void)
@@ -928,6 +997,8 @@ void initgfx(void)
      DocClass.ob_type = &PyType_Type;
  
      pdfdriver = gfxsource_pdf_create();
+    swfdriver = gfxsource_swf_create();
+    imagedriver = gfxsource_image_create();
      
      PyObject*module = Py_InitModule3("gfx", pdf2swf_methods, gfx_doc);
      PyObject*module_dict = PyModule_GetDict(module);