zdenop

python version of GetComponentImages example of API

Dec 7th, 2013
668
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3.  
  4. # Copyright 2013 Zdenko Podobný
  5. # Author: Zdenko Podobný
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License");
  8. # you may not use this file except in compliance with the License.
  9. # You may obtain a copy of the License at
  10. #
  11. #      http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS,
  15. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18.  
  19. """
  20. This is python version of C++ example:
  21.    https://code.google.com/p/tesseract-ocr/wiki/APIExample#example
  22.  
  23. It demonstrate how to use tesseract-ocr 3.02 c-api and leptonica to get info
  24. about image components.
  25.  
  26. Tested on openSUSE 13.1 64bit with tesseract 3.03 (r918), leptonica 1.69
  27. """
  28. #pylint: disable-msg=C0103, R0903
  29.  
  30. import os
  31. import ctypes
  32.  
  33. # Demo variables
  34. lang = "eng"
  35. filename = "../phototest.tif"
  36.  
  37. TESSDATA_PREFIX = os.environ.get('TESSDATA_PREFIX')
  38. if not TESSDATA_PREFIX:
  39.     TESSDATA_PREFIX = "../"
  40.  
  41. (L_INSERT, L_COPY, L_CLONE, L_COPY_CLONE) = map(ctypes.c_int, xrange(4))
  42. # Define Page Iterator Levels
  43. (RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL) = \
  44.     map(ctypes.c_int, xrange(5))
  45. # Define Page Segmentation Modes
  46. (PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN,
  47.  PSM_SINGLE_BLOCK_VERT_TEXT, PSM_SINGLE_BLOCK, PSM_SINGLE_LINE,
  48.  PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT,
  49.  PSM_SPARSE_TEXT_OSD, PSM_COUNT) = map(ctypes.c_int, xrange(14))
  50.  
  51. class BOX(ctypes.Structure):
  52.     """Leptonica box structure
  53.    """
  54.     _fields_ = [
  55.         ("x", ctypes.c_int32),
  56.         ("y", ctypes.c_int32),
  57.         ("w", ctypes.c_int32),
  58.         ("h", ctypes.c_int32),
  59.         ("refcount", ctypes.c_uint32)
  60.     ]
  61.  
  62. libname = "libtesseract.so.3"
  63. leptlib = "liblept.so"
  64.  
  65. try:
  66.     tesseract = ctypes.cdll.LoadLibrary(libname)
  67. except OSError, error:
  68.     print "Loading of '%s failed..." % libname
  69.     print error
  70.     exit(1)
  71.  
  72. try:
  73.     leptonica = ctypes.cdll.LoadLibrary(leptlib)
  74. except OSError, error:
  75.     print "Loading of '%s failed..." % leptlib
  76.     print error
  77.     exit(1)
  78.  
  79. tesseract.TessVersion.restype = ctypes.c_char_p
  80. tesseract_version = tesseract.TessVersion()[:4]
  81. # We need to check library version because libtesseract.so.3 is symlink
  82. # and can point to other version than 3.02
  83. if float(tesseract_version) < 3.02:
  84.     print "Found tesseract-ocr library version %s." % tesseract_version
  85.     print "C-API is present only in version 3.02!"
  86.     exit(2)
  87.  
  88. # Read image with leptonica => create PIX structure and report image size info
  89. pix_image = leptonica.pixRead(filename)
  90. print "image width:", leptonica.pixGetWidth(pix_image)
  91. print "image height:", leptonica.pixGetHeight(pix_image)
  92.  
  93. # Create tesseract api
  94. api = tesseract.TessBaseAPICreate()
  95. rc = tesseract.TessBaseAPIInit3(api, TESSDATA_PREFIX, lang)
  96. if (rc):
  97.     tesseract.TessBaseAPIDelete(api)
  98.     print("Could not initialize tesseract.\n")
  99.     exit(3)
  100. tesseract.TessBaseAPISetPageSegMode(api, PSM_AUTO_OSD)
  101.  
  102. # Set PIX structure to tesseract api
  103. tesseract.TessBaseAPISetImage2(api, pix_image)
  104. # Get info(BOXA structure) about lines(RIL_TEXTLINE) from image in api
  105. boxa = tesseract.TessBaseAPIGetComponentImages(api, RIL_TEXTLINE, 1,
  106.                                                None, None)
  107. # Get info about number of items on image
  108. n_items = leptonica.boxaGetCount(boxa)
  109. print "Found %d textline image components." % n_items
  110.  
  111. # Set up result type (BOX structure) for leptonica function boxaGetBox
  112. BOX_Ptr_t = ctypes.POINTER(BOX)
  113. leptonica.boxaGetBox.restype = BOX_Ptr_t
  114.  
  115. # Shut up tesseract - there is a lot of unwanted messages for RIL_TEXTLINE
  116. tesseract.TessBaseAPISetVariable(api, "debug_file", "/dev/null")
  117.  
  118. # print info about items
  119. for item in range(0, n_items):
  120.     BOX = leptonica.boxaGetBox(boxa, item, L_CLONE)
  121.     box = BOX.contents
  122.     tesseract.TessBaseAPISetRectangle(api, box.x, box.y, box.w, box.h)
  123.     ocr_result = tesseract.TessBaseAPIGetUTF8Text(api)
  124.     result_text = ctypes.string_at(ocr_result)
  125.     conf = tesseract.TessBaseAPIMeanTextConf(api)
  126.     print "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s" % \
  127.         (item, box.x, box.y, box.w, box.h, conf, result_text.strip())
RAW Paste Data