As your comment in response to yms , i.e. "but which library can I use to extract images and (more importantly) return them to PDF?"
Here is a simple demo 1) Extract jbig2 or you can tell all the images from pdf .
2) Convert the jbig2 image to any other format, in my case its jpeg .
3) Create a new pdf containing jpeg .
Using jbig2-imageio and itext libraries ,
In the example below, change the resources and directory path to suit your needs.
To do this, I had to go through several resources that I will attach at the end. Hope this helps.
import com.itextpdf.text.Document; import com.itextpdf.text.Image; import com.itextpdf.text.pdf.PdfPCell; import com.itextpdf.text.pdf.PdfPTable; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.PdfWriter; import com.itextpdf.text.pdf.parser.*; import com.levigo.jbig2.JBIG2ImageReader; import com.levigo.jbig2.JBIG2ImageReaderSpi; import com.levigo.jbig2.JBIG2ReadParam; import com.levigo.jbig2.io.DefaultInputStreamFactory; import java.awt.image.BufferedImage; import java.io.*; import javax.imageio.ImageIO; import javax.imageio.stream.ImageInputStream; public class JBig2Image { private String filepath; private int imageIndex; public JBig2Image() { this.filepath = "/home/blackadmin/Desktop/pdf/demo18.jbig2"; this.imageIndex = 0; extractImgFromPdf(); convertJBig2ToJpeg(); createPDF(); } private void extractImgFromPdf() { try { /////////// Extract all Images from pdf ///////////////////////// PdfReader reader = new PdfReader("/home/blackadmin/Desktop/pdf/orig.pdf"); PdfReaderContentParser parser = new PdfReaderContentParser(reader); MyImageRenderListener listener = new MyImageRenderListener("/home/blackadmin/Desktop/pdf/demo%s.%s"); for (int i = 1; i <= reader.getNumberOfPages(); i++) { parser.processContent(i, listener); } } catch (IOException ex) { System.out.println(ex); } } private void convertJBig2ToJpeg() { InputStream inputStream = null; try { ///////// Read jbig2 image //////////////////////////////////////// inputStream = new FileInputStream(new File(filepath)); DefaultInputStreamFactory disf = new DefaultInputStreamFactory(); ImageInputStream imageInputStream = disf.getInputStream(inputStream); JBIG2ImageReader imageReader = new JBIG2ImageReader(new JBIG2ImageReaderSpi()); imageReader.setInput(imageInputStream); JBIG2ReadParam param = imageReader.getDefaultReadParam(); BufferedImage bufferedImage = imageReader.read(imageIndex, param); ////////// jbig2 to jpeg /////////////////////////////////////////// ImageIO.write(bufferedImage, "jpeg", new File("/home/blackadmin/Desktop/pdf/demo18.jpeg")); } catch (IOException ex) { System.out.println(ex); } finally { try { inputStream.close(); } catch (IOException ex) { System.out.println(ex); } } } public void createPDF() { Document document = new Document(); try { PdfWriter.getInstance(document, new FileOutputStream("/home/blackadmin/Desktop/pdf/output.pdf")); document.open(); PdfPTable table = new PdfPTable(1); //1 column. Image image = Image.getInstance("/home/blackadmin/Desktop/pdf/demo18.jpeg"); image.scaleToFit(800f, 600f); image.scaleAbsolute(800f, 600f); // Give the size of image you want to print on pdf PdfPCell nestedImgCell = new PdfPCell(image); table.addCell(nestedImgCell); document.add(table); document.close(); System.out.println( "======== PDF Created Successfully ========="); } catch (Exception e) { System.out.println(e); } } public static void main(String[] args) throws IOException { new JBig2Image(); } } class MyImageRenderListener implements RenderListener { /** * The new document to which we've added a border rectangle. */ protected String path = ""; /** * Creates a RenderListener that will look for images. */ public MyImageRenderListener(String path) { this.path = path; } /** * @see com.itextpdf.text.pdf.parser.RenderListener#beginTextBlock() */ public void beginTextBlock() { } /** * @see com.itextpdf.text.pdf.parser.RenderListener#endTextBlock() */ public void endTextBlock() { } /** * @see com.itextpdf.text.pdf.parser.RenderListener#renderImage( * com.itextpdf.text.pdf.parser.ImageRenderInfo) */ public void renderImage(ImageRenderInfo renderInfo) { try { String filename; FileOutputStream os; PdfImageObject image = renderInfo.getImage(); if (image == null) { return; } filename = String.format(path, renderInfo.getRef().getNumber(), image.getFileType()); os = new FileOutputStream(filename); os.write(image.getImageAsBytes()); os.flush(); os.close(); } catch (IOException e) { System.out.println(e.getMessage()); } } /** * @see com.itextpdf.text.pdf.parser.RenderListener#renderText( * com.itextpdf.text.pdf.parser.TextRenderInfo) */ public void renderText(TextRenderInfo renderInfo) { } }
References:
1) Extract jbig2 from pdf ( extract images ) ( MyImageRenderListener ).
2) Convert jbig2 ( JBIG2ImageReaderDemo )
Harmeet singh
source share