I am trying to split a huge pdf file into several small pdf usinf pyPdf files. I tried with this simplified code:
from pyPdf import PdfFileWriter, PdfFileReader inputpdf = PdfFileReader(file("document.pdf", "rb")) for i in xrange(inputpdf.numPages): output = PdfFileWriter() output.addPage(inputpdf.getPage(i)) outputStream = file("document-page%s.pdf" % i, "wb") output.write(outputStream) outputStream.close()
but I got the following error message:
Traceback (most recent call last): File "./hltShortSummary.py", line 24, in <module> for i in xrange(inputpdf.numPages): File "/usr/lib/pymodules/python2.7/pyPdf/pdf.py", line 342, in <lambda> numPages = property(lambda self: self.getNumPages(), None, None) File "/usr/lib/pymodules/python2.7/pyPdf/pdf.py", line 334, in getNumPages self._flatten() File "/usr/lib/pymodules/python2.7/pyPdf/pdf.py", line 500, in _flatten pages = catalog["/Pages"].getObject() File "/usr/lib/pymodules/python2.7/pyPdf/generic.py", line 466, in __getitem__ return dict.__getitem__(self, key).getObject() File "/usr/lib/pymodules/python2.7/pyPdf/generic.py", line 165, in getObject return self.pdf.getObject(self).getObject() File "/usr/lib/pymodules/python2.7/pyPdf/pdf.py", line 549, in getObject retval = readObject(self.stream, self) File "/usr/lib/pymodules/python2.7/pyPdf/generic.py", line 67, in readObject return DictionaryObject.readFromStream(stream, pdf) File "/usr/lib/pymodules/python2.7/pyPdf/generic.py", line 517, in readFromStream value = readObject(stream, pdf) File "/usr/lib/pymodules/python2.7/pyPdf/generic.py", line 58, in readObject return ArrayObject.readFromStream(stream, pdf) File "/usr/lib/pymodules/python2.7/pyPdf/generic.py", line 153, in readFromStream arr.append(readObject(stream, pdf)) File "/usr/lib/pymodules/python2.7/pyPdf/generic.py", line 87, in readObject return NumberObject.readFromStream(stream) File "/usr/lib/pymodules/python2.7/pyPdf/generic.py", line 232, in readFromStream return NumberObject(name) ValueError: invalid literal for int() with base 10: ''
any ideas