python version of GetComponentImages example of API

#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright 2013 Zdenko Podobný
# Author: Zdenko Podobný
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This is python version of C++ example:
    https://code.google.com/p/tesseract-ocr/wiki/APIExample#example

It demonstrate how to use tesseract-ocr 3.02 c-api and leptonica to get info
about image components.

Tested on openSUSE 13.1 64bit with tesseract 3.03 (r918), leptonica 1.69
"""
#pylint: disable-msg=C0103, R0903

import os
import ctypes

# Demo variables
lang = "eng"
filename = "../phototest.tif"

TESSDATA_PREFIX = os.environ.get('TESSDATA_PREFIX')
if not TESSDATA_PREFIX:
    TESSDATA_PREFIX = "../"

(L_INSERT, L_COPY, L_CLONE, L_COPY_CLONE) = map(ctypes.c_int, xrange(4))
# Define Page Iterator Levels
(RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL) = \
    map(ctypes.c_int, xrange(5))
# Define Page Segmentation Modes
(PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN,
 PSM_SINGLE_BLOCK_VERT_TEXT, PSM_SINGLE_BLOCK, PSM_SINGLE_LINE,
 PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT,
 PSM_SPARSE_TEXT_OSD, PSM_COUNT) = map(ctypes.c_int, xrange(14))

class BOX(ctypes.Structure):
    """Leptonica box structure
    """
    _fields_ = [
        ("x", ctypes.c_int32),
        ("y", ctypes.c_int32),
        ("w", ctypes.c_int32),
        ("h", ctypes.c_int32),
        ("refcount", ctypes.c_uint32)
    ]

libname = "libtesseract.so.3"
leptlib = "liblept.so"

try:
    tesseract = ctypes.cdll.LoadLibrary(libname)
except OSError, error:
    print "Loading of '%s failed..." % libname
    print error
    exit(1)

try:
    leptonica = ctypes.cdll.LoadLibrary(leptlib)
except OSError, error:
    print "Loading of '%s failed..." % leptlib
    print error
    exit(1)

tesseract.TessVersion.restype = ctypes.c_char_p
tesseract_version = tesseract.TessVersion()[:4]
# We need to check library version because libtesseract.so.3 is symlink
# and can point to other version than 3.02
if float(tesseract_version) < 3.02:
    print "Found tesseract-ocr library version %s." % tesseract_version
    print "C-API is present only in version 3.02!"
    exit(2)

# Read image with leptonica => create PIX structure and report image size info
pix_image = leptonica.pixRead(filename)
print "image width:", leptonica.pixGetWidth(pix_image)
print "image height:", leptonica.pixGetHeight(pix_image)

# Create tesseract api
api = tesseract.TessBaseAPICreate()
rc = tesseract.TessBaseAPIInit3(api, TESSDATA_PREFIX, lang)
if (rc):
    tesseract.TessBaseAPIDelete(api)
    print("Could not initialize tesseract.\n")
    exit(3)
tesseract.TessBaseAPISetPageSegMode(api, PSM_AUTO_OSD)

# Set PIX structure to tesseract api
tesseract.TessBaseAPISetImage2(api, pix_image)
# Get info(BOXA structure) about lines(RIL_TEXTLINE) from image in api
boxa = tesseract.TessBaseAPIGetComponentImages(api, RIL_TEXTLINE, 1,
                                               None, None)
# Get info about number of items on image
n_items = leptonica.boxaGetCount(boxa)
print "Found %d textline image components." % n_items

# Set up result type (BOX structure) for leptonica function boxaGetBox
BOX_Ptr_t = ctypes.POINTER(BOX)
leptonica.boxaGetBox.restype = BOX_Ptr_t

# Shut up tesseract - there is a lot of unwanted messages for RIL_TEXTLINE
tesseract.TessBaseAPISetVariable(api, "debug_file", "/dev/null")

# print info about items
for item in range(0, n_items):
    BOX = leptonica.boxaGetBox(boxa, item, L_CLONE)
    box = BOX.contents
    tesseract.TessBaseAPISetRectangle(api, box.x, box.y, box.w, box.h)
    ocr_result = tesseract.TessBaseAPIGetUTF8Text(api)
    result_text = ctypes.string_at(ocr_result)
    conf = tesseract.TessBaseAPIMeanTextConf(api)
    print "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s" % \
        (item, box.x, box.y, box.w, box.h, conf, result_text.strip())