I finally realized that pyPDF can help. I am sending him if he can help someone else.
(1) string search function
def fnPDF_FindText(xFile, xString): # xfile : the PDF file in which to look # xString : the string to look for import pyPdf, re PageFound = -1 pdfDoc = pyPdf.PdfFileReader(file(xFile, "rb")) for i in range(0, pdfDoc.getNumPages()): content = "" content += pdfDoc.getPage(i).extractText() + "\n" content1 = content.encode('ascii', 'ignore').lower() ResSearch = re.search(xString, content1) if ResSearch is not None: PageFound = i break return PageFound
(2) function to extract pages of interest
def fnPDF_ExtractPages(xFileNameOriginal, xFileNameOutput, xPageStart, xPageEnd): from pyPdf import PdfFileReader, PdfFileWriter output = PdfFileWriter() pdfOne = PdfFileReader(file(xFileNameOriginal, "rb")) for i in range(xPageStart, xPageEnd): output.addPage(pdfOne.getPage(i)) outputStream = file(xFileNameOutput, "wb") output.write(outputStream) outputStream.close()
I hope this will be useful for someone else
user1043144
source share