TesseractNotFound - Pytesser

Question

TesseractNotFound - Pytesser

I am trying to do OCR using pytesser downloaded from HERE .

Here is the pytesser.py code

try: import cv2.cv as cv OPENCV_AVAILABLE = True except ImportError: OPENCV_AVAILABLE = False from subprocess import Popen, PIPE import os PROG_NAME = 'tesseract' TEMP_IMAGE = 'tmp.bmp' TEMP_FILE = 'tmp' #All the PSM arguments as a variable name (avoid having to know them) PSM_OSD_ONLY = 0 PSM_SEG_AND_OSD = 1 PSM_SEG_ONLY = 2 PSM_AUTO = 3 PSM_SINGLE_COLUMN = 4 PSM_VERTICAL_ALIGN = 5 PSM_UNIFORM_BLOCK = 6 PSM_SINGLE_LINE = 7 PSM_SINGLE_WORD = 8 PSM_SINGLE_WORD_CIRCLE = 9 PSM_SINGLE_CHAR = 10 class TesseractException(Exception): #Raised when tesseract does not return 0 pass class TesseractNotFound(Exception): #When tesseract is not found in the path pass def check_path(): #Check if tesseract is in the path raise TesseractNotFound otherwise for path in os.environ.get('PATH', '').split(';'): filepath = os.path.join(path, PROG_NAME) if os.path.exists(filepath) and not os.path.isdir(filepath): return True raise TesseractNotFound def process_request(input_file, output_file, lang=None, psm=None): args = [PROG_NAME, input_file, output_file] #Create the arguments if lang is not None: args.append("-l") args.append(lang) if psm is not None: args.append("-psm") args.append(str(psm)) proc = Popen(args, stdout=PIPE, stderr=PIPE) #Open process ret = proc.communicate() #Launch it code = proc.returncode if code != 0: if code == 2: raise TesseractException, "File not found" if code == -11: raise TesseractException, "Language code invalid: "+ret[1] else: raise TesseractException, ret[1] def iplimage_to_string(im, lang=None, psm=None): if not OPENCV_AVAILABLE: print "OpenCV not Available" return -1 else: cv.SaveImage(TEMP_IMAGE, im) txt = image_to_string(TEMP_IMAGE, lang, psm) os.remove(TEMP_IMAGE) return txt def image_to_string(file,lang=None, psm=None): check_path() #Check if tesseract available in the path process_request(file, TEMP_FILE, lang, psm) #Process command f = open(TEMP_FILE+".txt","r") #Open back the file txt = f.read() os.remove(TEMP_FILE+".txt") return txt if __name__ =='__main__': print image_to_string("image.jpg", "fra", PSM_AUTO) #Example

The problem is that when I try to execute the sample fragment indicated in the link above, I get the `Tesseract error

 >>> import pytesser >>> txt = pytesser.image_to_string('C:/output.png') Traceback (most recent call last): File "<pyshell#1>", line 1, in <module> txt = pytesser.image_to_string('C:/output.png') File "C:\Python27\lib\site-packages\pytesser.py", line 71, in image_to_string check_path() #Check if tesseract available in the path File "C:\Python27\lib\site-packages\pytesser.py", line 38, in check_path raise TesseractNotFound TesseractNotFound >>>

my Tesseract-OCR installation is located in C:\Tesseract-OCR

I set TESSDATA_PREFIX=C:\Tesseract-OCR\ also Path=C:\Tesseract-OCR

I want to know why I get TessractnotFound even though the correct environment variables are set?

Thanks.

0

python python-2.7 ocr tesseract

md1hunox Apr 30 '13 at 15:19

source share

1 answer

md1hunox · Answer 1 · 2013-04-30T16:42:00+0000

After the changes in pytesser.py my problem is solved, there are no problems with the installed paths.

The changes are as follows:

PROG_NAME=tesseract changed to PROG_NAME=tesseract.exe

image_to_string() added to f.close() function after txt=f.read()

Like this:)

TesseractNotFound - Pytesser

More articles: