I'm struggling to extract fast and effective words that are in the rectangles from BufferedImage.
For example, I have the following page: (edit!) The image is scanned, so it may contain noise, skew and distortion.

How can I extract the following images without a rectangle: (edit!) I can use OpenCv or any other library, but I am absolutely new to advanced image processing methods. 
EDIT
I used the method suggested by karlphillip here , and it works decent.
Here is the code:
package ro.ubbcluj.detection; import java.awt.FlowLayout; import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import javax.imageio.ImageIO; import javax.swing.ImageIcon; import javax.swing.JFrame; import javax.swing.JLabel; import javax.swing.WindowConstants; import org.opencv.core.Core; import org.opencv.core.Mat; import org.opencv.core.MatOfByte; import org.opencv.core.MatOfPoint; import org.opencv.core.Point; import org.opencv.core.Scalar; import org.opencv.core.Size; import org.opencv.highgui.Highgui; import org.opencv.imgproc.Imgproc; public class RectangleDetection { public static void main(String[] args) throws IOException { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); Mat image = loadImage(); Mat grayscale = convertToGrayscale(image); Mat treshold = tresholdImage(grayscale); List<MatOfPoint> contours = findContours(treshold); Mat contoursImage = fillCountours(contours, grayscale); Mat grayscaleWithContours = convertToGrayscale(contoursImage); Mat tresholdGrayscaleWithContours = tresholdImage(grayscaleWithContours); Mat eroded = erodeAndDilate(tresholdGrayscaleWithContours); List<MatOfPoint> squaresFound = findSquares(eroded); Mat squaresDrawn = Rectangle.drawSquares(grayscale, squaresFound); BufferedImage convertedImage = convertMatToBufferedImage(squaresDrawn); displayImage(convertedImage); } private static List<MatOfPoint> findSquares(Mat eroded) { return Rectangle.findSquares(eroded); } private static Mat erodeAndDilate(Mat input) { int erosion_type = Imgproc.MORPH_RECT; int erosion_size = 5; Mat result = new Mat(); Mat element = Imgproc.getStructuringElement(erosion_type, new Size(2 * erosion_size + 1, 2 * erosion_size + 1)); Imgproc.erode(input, result, element); Imgproc.dilate(result, result, element); return result; } private static Mat convertToGrayscale(Mat input) { Mat grayscale = new Mat(); Imgproc.cvtColor(input, grayscale, Imgproc.COLOR_BGR2GRAY); return grayscale; } private static Mat fillCountours(List<MatOfPoint> contours, Mat image) { Mat result = image.clone(); Imgproc.cvtColor(result, result, Imgproc.COLOR_GRAY2RGB); for (int i = 0; i < contours.size(); i++) { Imgproc.drawContours(result, contours, i, new Scalar(255, 0, 0), -1, 8, new Mat(), 0, new Point()); } return result; } private static List<MatOfPoint> findContours(Mat image) { List<MatOfPoint> contours = new ArrayList<>(); Mat hierarchy = new Mat(); Imgproc.findContours(image, contours, hierarchy, Imgproc.RETR_TREE, Imgproc.CHAIN_APPROX_NONE); return contours; } private static Mat detectLinesHough(Mat img) { Mat lines = new Mat(); int threshold = 80; int minLineLength = 10; int maxLineGap = 5; double rho = 0.4; Imgproc.HoughLinesP(img, lines, rho, Math.PI / 180, threshold, minLineLength, maxLineGap); Imgproc.cvtColor(img, img, Imgproc.COLOR_GRAY2RGB); System.out.println(lines.cols()); for (int x = 0; x < lines.cols(); x++) { double[] vec = lines.get(0, x); double x1 = vec[0], y1 = vec[1], x2 = vec[2], y2 = vec[3]; Point start = new Point(x1, y1); Point end = new Point(x2, y2); Core.line(lines, start, end, new Scalar(0, 255, 0), 3); } return img; } static BufferedImage convertMatToBufferedImage(Mat mat) throws IOException { MatOfByte matOfByte = new MatOfByte(); Highgui.imencode(".jpg", mat, matOfByte); byte[] byteArray = matOfByte.toArray(); InputStream in = new ByteArrayInputStream(byteArray); return ImageIO.read(in); } static void displayImage(BufferedImage image) { JFrame frame = new JFrame(); frame.getContentPane().setLayout(new FlowLayout()); frame.getContentPane().add(new JLabel(new ImageIcon(image))); frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE); frame.pack(); frame.setVisible(true); } private static Mat tresholdImage(Mat img) { Mat treshold = new Mat(); Imgproc.threshold(img, treshold, 225, 255, Imgproc.THRESH_BINARY_INV); return treshold; } private static Mat tresholdImage2(Mat img) { Mat treshold = new Mat(); Imgproc.threshold(img, treshold, -1, 255, Imgproc.THRESH_BINARY_INV + Imgproc.THRESH_OTSU); return treshold; } private static Mat loadImage() { return Highgui .imread("E:\\Programs\\Eclipse Workspace\\LicentaWorkspace\\OpenCvRectangleDetection\\src\\img\\form3.jpg"); }
}
and class rectangle
package ro.ubbcluj.detection; import java.awt.image.BufferedImage; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.opencv.core.Core; import org.opencv.core.Mat; import org.opencv.core.MatOfPoint; import org.opencv.core.MatOfPoint2f; import org.opencv.core.Point; import org.opencv.core.Scalar; import org.opencv.core.Size; import org.opencv.imgproc.Imgproc; public class Rectangle { static List<MatOfPoint> findSquares(Mat input) { Mat pyr = new Mat(); Mat timg = new Mat();
Example result:


... however, it does not work so well for small images: 

Perhaps some improvements may be suggested? Or how to make the algorithm faster if I have a batch of images for processing?