new parameter -s textonly
[swftools.git] / rendertest / testpdfs.py
1 import Image
2 import ImageChops
3 import ImageFilter
4 import sys
5 import os
6 import traceback
7 from athana import getTALstr
8 import random
9 import md5
10
11 filenames = []
12 directories = ["pdfs"]
13
14 SWFRENDER="swfrender"
15 PDFTOPPM="./pdftoppm"
16 CONVERT="convert"
17 PDF2SWF="pdf2swf"
18
19 COMPARE=["xpdf", "xpdf -C"]
20 OUTPUTDIR = "results.clip/"
21
22 #COMPARE=["xpdf", PDF2SWF+" -s convertgradients"]
23 #OUTPUTDIR = "results.pdf2swf/"
24
25 #COMPARE=[PDF2SWF, PDF2SWF+" --flatten"]
26 #OUTPUTDIR = "results.flatten/"
27
28 counter = 1
29
30 def randstr():
31     return md5.md5(str(random.random())).hexdigest()[0:8]
32
33 def unlink(file):
34     try:
35         os.unlink(file)
36     except:
37         pass
38
39 def system(command):
40     if ">" not in command:
41         if os.system(command + " > /tmp/log.txt 2>&1") & 0xff00:
42             error = open("/tmp/log.txt", "rb").read()
43             print error
44             return error
45     else:
46         if os.system(command) & 0xff00:
47             return "Unknown error in "+command
48
49 class ConversionError:
50     def __init__(self, msg):
51         self.msg = msg
52     def __str__(self):
53         return self.msg
54
55 class TooComplexError:
56     def __init__(self, msg):
57         self.msg = msg
58     def __str__(self):
59         return self.msg
60
61 class BadMatch:
62     def __init__(self, msg):
63         self.msg = msg
64     def __str__(self):
65         return self.msg
66
67 def formatException():
68     s = "Exception "+str(sys.exc_info()[0])
69     info = sys.exc_info()[1]
70     if info:
71         s += " "+str(info)
72     s += "\n"
73     for l in traceback.extract_tb(sys.exc_info()[2]):
74         s += "  File \"%s\", line %d, in %s\n" % (l[0],l[1],l[2])
75         s += "    %s\n" % l[3]
76     return s
77
78
79 class PDFPage:
80     def __init__(self, filename, page, width, height):
81         self.filename = filename
82         self.page = page
83         self.rating = None
84         self.message = None
85         self.htmlmessage = None
86         self.file1 = None
87         self.file2 = None
88         self.file12 = None
89         self.html12 = None
90         self.htmldiff = None
91         self.width,self.height = width,height
92
93     def runtool(self, cmd, filename, page, file):
94         if cmd.startswith("xpdf"):
95             unlink("/tmp/test-%06d.ppm" % page)
96             args = cmd[4:]
97             error = system(PDFTOPPM + "%s -r 72 -f %d -l %d '%s' /tmp/test" % (args, page, page, filename))
98             if error:
99                 raise ConversionError(error)
100             unlink(file)
101             error = system(CONVERT + " /tmp/test-%06d.ppm  %s" % (page, file))
102             if error:
103                 raise ConversionError(error)
104             unlink("/tmp/test-%06d.ppm" % page)
105         else:
106             unlink("/tmp/test.swf")
107             unlink("svp.ps")
108             error = system(COMPARE[0]+ " -Q 300 -p%d '%s' -o /tmp/test.swf" % (page, filename))
109             #system("mv svp.ps %s.ps" % randstr())
110             if error and "supports 65536" in error:
111                 raise TooComplexError(error)
112             if error:
113                 raise ConversionError(error)
114             unlink(file)
115             error = system(SWFRENDER + " /tmp/test.swf -o %s" % file)
116             if error:
117                 raise ConversionError(error)
118             unlink("/tmp/test.swf")
119         
120     def runtools(self, filename, page, file1, file2, file12):
121
122         badness = 0.0
123
124         self.runtool(COMPARE[0], filename, page, file2)
125         self.runtool(COMPARE[1], filename, page, file1)
126
127         unlink(file12)
128
129         pic1 = Image.open(file1)
130         pic1.load()
131         self.width1 = pic1.size[0]
132         self.height1 = pic1.size[1]
133         
134         pic2 = Image.open(file2)
135         pic2.load()
136         self.width2 = pic2.size[0]
137         self.height2 = pic2.size[1]
138
139         if abs(self.width1-self.width2)>5 or abs(self.height1!=self.height2)>5:
140             badness += 65536*abs(self.width2-self.width1)*max(self.height1,self.height2)+65536*abs(self.height2-self.height1)*max(self.width1,self.width2)
141
142         minx = min(self.width1,self.width2)
143         miny = min(self.height1,self.height2)
144
145         pic1 = pic1.crop((0,0,minx,miny))
146         pic1 = pic1.convert("RGB")
147         pic1 = pic1.filter(ImageFilter.BLUR)
148         pic2 = pic2.crop((0,0,minx,miny))
149         pic2 = pic2.convert("RGB")
150         pic2 = pic2.filter(ImageFilter.BLUR)
151
152         diffimage = ImageChops.difference(pic1,pic2)
153         diffimage.save(file12, "PNG")
154         
155         # compute quadratical difference
156         diff = diffimage.histogram()
157         for i in range(1,128):
158             badness += (diff[i] + diff[256-i])*float(i*i)
159             badness += (diff[256+i] + diff[256+256-i])*float(i*i)
160             badness += (diff[512+i] + diff[512+256-i])*float(i*i)
161
162         badness /= (minx*miny)*3
163
164         return badness
165
166     def compare(self):
167         try:
168             global counter
169             self.file1 = str(counter) + ".png"
170             counter = counter + 1
171             self.file2 = str(counter) + ".png"
172             counter = counter + 1
173             self.file12 = str(counter) + ".png"
174             counter = counter + 1
175             self.rating = self.runtools(self.filename, self.page, OUTPUTDIR + self.file1, OUTPUTDIR + self.file2, OUTPUTDIR + self.file12)
176         except BadMatch:
177             self.rating = 65534.0
178             self.message = formatException()
179             print self.message
180         except ConversionError:
181             self.rating = 65535.0
182             self.message = formatException()
183             print self.message
184         except TooComplexError:
185             self.rating = 65536.0
186             self.message = formatException()
187             print self.message
188         except:
189             self.rating = 65537.0
190             self.message = formatException()
191             print self.message
192
193     def getsizes(self):
194         if self.message:
195             return ""
196         if abs(self.width1 - self.width2) > 5 or \
197            abs(self.height1 - self.height2) > 5:
198                return '<font color="red">%dx%d <-> %dx%d</font>' % (self.width1, self.height1, self.width2, self.height2)
199         else:
200                return '%dx%d,%dx%d' % (self.width1, self.height1, self.width2, self.height2)
201
202     def generatehtml(self):
203         global OUTPUTDIR
204         global counter
205         self.html12 = str(counter) + ".html"
206         counter = counter + 1
207         self.htmldiff = str(counter) + ".html"
208         counter = counter + 1
209         fi = open(OUTPUTDIR + self.html12, "wb")
210         fi.write(getTALstr("""
211 <html><head></head>
212 <body>
213 <tal:block tal:replace="python:'File: '+self.filename"/><br>
214 <tal:block tal:replace="python:'Page: '+str(self.page)"/><br>
215 <tal:block tal:replace="python:'Rating: '+str(self.rating)"/><br>
216 <pre tal:condition="python:self.message" tal:replace="python:'Message: '+str(self.message)"/><br>
217 <hr>
218 <table cellspacing="0" cellpadding="0">
219 <tr><td><img tal:attributes="src python:self.file1"/></td><td><img tal:attributes="src python:self.file2"/></td></tr>
220 <tr><td>pdf2swf Version</td><td>pdftoppm Version</td></tr>
221 </table>
222 <hr>
223 </body>
224 </html>""", {"self": self}))
225         fi.close()
226         
227         fi = open(OUTPUTDIR + self.htmldiff, "wb")
228         fi.write(getTALstr("""
229 <html><head></head>
230 <body>
231 <tal:block tal:replace="python:'File: '+self.filename"/><br>
232 <tal:block tal:replace="python:'Page: '+str(self.page)"/><br>
233 <tal:block tal:replace="python:'Rating: '+str(self.rating)"/><br>
234 <pre tal:condition="python:self.message" tal:replace="python:'Message: '+str(self.message)"/><br>
235 <hr>
236 <img tal:attributes="src python:self.file12"/>
237 <hr>
238 </body>
239 </html>""", {"self": self}))
240         fi.close()
241         
242         if self.message:
243             self.htmlmessage = str(counter) + ".html"
244             counter = counter + 1
245             fi = open(OUTPUTDIR + self.htmlmessage, "wb")
246             fi.write(getTALstr("""
247 <html><head></head>
248 <body>
249 <pre tal:content="raw python:self.message">
250 </pre>
251 </body>
252 </html>""", {"self": self}))
253             fi.close()
254         
255
256
257 def compare_pages(page1,page2):
258     if page1.rating < page2.rating:
259         return 1
260     elif page1.rating > page2.rating:
261         return -1
262     else:
263         return 0
264
265
266 def add_directory(directory):
267     if not os.path.isdir(directory):
268         print "bad directory:",directory
269         return
270     for file in os.listdir(directory):
271         global filenames
272         filename = os.path.join(directory, file)
273         if file.lower().endswith(".pdf"):
274             filenames += [filename]
275             print "+",filename
276         elif os.path.isdir(filename):
277             add_directory(filename)
278
279 pages = []
280 try:
281     os.mkdir(OUTPUTDIR)
282 except: pass
283
284 for file in filenames:
285     print "+",file
286
287 for dir in directories:
288     add_directory(dir)
289
290 for filename in filenames:
291     try:
292         unlink("/tmp/test.txt")
293         error = system(PDF2SWF + " -I %s -o /tmp/test.txt" % filename)
294         if error:
295             raise ConversionError(error)
296         fi = open("/tmp/test.txt", "rb")
297         for line in fi.readlines():
298             p = {}
299             for param in line.split(" "):
300                 key,value = param.split("=")
301                 p[key] = value
302             page = int(p["page"])
303             width = int(float(p["width"]))
304             height = int(float(p["height"]))
305             print filename, page, "%dx%d" % (width, height)
306             pdfpage = PDFPage(filename, page, width, height)
307             pdfpage.compare()
308
309             if width < 2000 and height < 2000:
310                 pages += [pdfpage]
311
312             # only consider the first 3 pages
313             if page > 3:
314                 break
315         fi.close()
316     except KeyboardInterrupt:
317         break
318     except:
319         pdfpage = PDFPage(filename, -1, -1, -1)
320         pdfpage.rating = 65536.0
321         pdfpage.message = formatException()
322         pages += [pdfpage]
323
324 pages.sort(compare_pages)
325
326 position = 1
327 for page in pages:
328     page.generatehtml()
329     page.position = position
330     position = position + 1
331
332 fi = open(OUTPUTDIR + "index.html", "wb")
333 fi.write(getTALstr("""<html>
334 <head></head>
335 <body>
336 <table border="1"><tr><th>Position</th><th>Rating</th><th>File</th><th>Size</th><th>Page</th><th>Images</th><th>Diff</th><th>Further Info</th></tr>
337 <tal:block tal:repeat="page pages">
338 <tr>
339 <td tal:content="python:page.position"/>
340 <td tal:content="python:page.rating"/>
341 <td tal:content="python:page.filename"/>
342 <td tal:content="raw python:page.getsizes()"/>
343 <td tal:content="python:page.page"/>
344 <td><a tal:attributes="href python:page.html12">Side by Side</a></td>
345 <td><a tal:attributes="href python:page.htmldiff">Difference</a></td>
346 <td><a tal:condition="python:page.message" tal:attributes="href python:page.htmlmessage">Error message</a></td>
347 </tr>
348 </tal:block>
349 </table>
350 </body>
351 </html>""", {"pages": pages}))
352 fi.close()
353