Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- # Copyright 2013 Zdenko Podobný
- # Author: Zdenko Podobný
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- This is python version of C++ example:
- https://code.google.com/p/tesseract-ocr/wiki/APIExample#example
- It demonstrate how to use tesseract-ocr 3.02 c-api and leptonica to get info
- about image components.
- Tested on openSUSE 13.1 64bit with tesseract 3.03 (r918), leptonica 1.69
- """
- #pylint: disable-msg=C0103, R0903
- import os
- import ctypes
- # Demo variables
- lang = "eng"
- filename = "../phototest.tif"
- TESSDATA_PREFIX = os.environ.get('TESSDATA_PREFIX')
- if not TESSDATA_PREFIX:
- TESSDATA_PREFIX = "../"
- (L_INSERT, L_COPY, L_CLONE, L_COPY_CLONE) = map(ctypes.c_int, xrange(4))
- # Define Page Iterator Levels
- (RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL) = \
- map(ctypes.c_int, xrange(5))
- # Define Page Segmentation Modes
- (PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN,
- PSM_SINGLE_BLOCK_VERT_TEXT, PSM_SINGLE_BLOCK, PSM_SINGLE_LINE,
- PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT,
- PSM_SPARSE_TEXT_OSD, PSM_COUNT) = map(ctypes.c_int, xrange(14))
- class BOX(ctypes.Structure):
- """Leptonica box structure
- """
- _fields_ = [
- ("x", ctypes.c_int32),
- ("y", ctypes.c_int32),
- ("w", ctypes.c_int32),
- ("h", ctypes.c_int32),
- ("refcount", ctypes.c_uint32)
- ]
- libname = "libtesseract.so.3"
- leptlib = "liblept.so"
- try:
- tesseract = ctypes.cdll.LoadLibrary(libname)
- except OSError, error:
- print "Loading of '%s failed..." % libname
- print error
- exit(1)
- try:
- leptonica = ctypes.cdll.LoadLibrary(leptlib)
- except OSError, error:
- print "Loading of '%s failed..." % leptlib
- print error
- exit(1)
- tesseract.TessVersion.restype = ctypes.c_char_p
- tesseract_version = tesseract.TessVersion()[:4]
- # We need to check library version because libtesseract.so.3 is symlink
- # and can point to other version than 3.02
- if float(tesseract_version) < 3.02:
- print "Found tesseract-ocr library version %s." % tesseract_version
- print "C-API is present only in version 3.02!"
- exit(2)
- # Read image with leptonica => create PIX structure and report image size info
- pix_image = leptonica.pixRead(filename)
- print "image width:", leptonica.pixGetWidth(pix_image)
- print "image height:", leptonica.pixGetHeight(pix_image)
- # Create tesseract api
- api = tesseract.TessBaseAPICreate()
- rc = tesseract.TessBaseAPIInit3(api, TESSDATA_PREFIX, lang)
- if (rc):
- tesseract.TessBaseAPIDelete(api)
- print("Could not initialize tesseract.\n")
- exit(3)
- tesseract.TessBaseAPISetPageSegMode(api, PSM_AUTO_OSD)
- # Set PIX structure to tesseract api
- tesseract.TessBaseAPISetImage2(api, pix_image)
- # Get info(BOXA structure) about lines(RIL_TEXTLINE) from image in api
- boxa = tesseract.TessBaseAPIGetComponentImages(api, RIL_TEXTLINE, 1,
- None, None)
- # Get info about number of items on image
- n_items = leptonica.boxaGetCount(boxa)
- print "Found %d textline image components." % n_items
- # Set up result type (BOX structure) for leptonica function boxaGetBox
- BOX_Ptr_t = ctypes.POINTER(BOX)
- leptonica.boxaGetBox.restype = BOX_Ptr_t
- # Shut up tesseract - there is a lot of unwanted messages for RIL_TEXTLINE
- tesseract.TessBaseAPISetVariable(api, "debug_file", "/dev/null")
- # print info about items
- for item in range(0, n_items):
- BOX = leptonica.boxaGetBox(boxa, item, L_CLONE)
- box = BOX.contents
- tesseract.TessBaseAPISetRectangle(api, box.x, box.y, box.w, box.h)
- ocr_result = tesseract.TessBaseAPIGetUTF8Text(api)
- result_text = ctypes.string_at(ocr_result)
- conf = tesseract.TessBaseAPIMeanTextConf(api)
- print "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s" % \
- (item, box.x, box.y, box.w, box.h, conf, result_text.strip())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement