zdenop

Simple example of python wrapper for tesseract and leptonica

Dec 25th, 2020 (edited)
1,500
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.44 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3.  
  4. """Simple example of python wrapper for tesseract and leptonica
  5.   based on:
  6.   https://sk-spell.sk.cx/building-minimalistic-tesseract
  7.   https://github.com/zdenop/SimpleTesseractPythonWrapper
  8. """
  9.  
  10. import ctypes
  11. import locale
  12. import os
  13. import platform
  14. from ctypes.util import find_library
  15.  
  16. import cffi
  17. from PIL import Image, ImageDraw, ImageFont
  18.  
  19. ffi = cffi.FFI()
  20. ffi.cdef(
  21.     """
  22. typedef signed char             l_int8;
  23. typedef unsigned char           l_uint8;
  24. typedef short                   l_int16;
  25. typedef unsigned short          l_uint16;
  26. typedef int                     l_int32;
  27. typedef unsigned int            l_uint32;
  28. typedef float                   l_float32;
  29. typedef double                  l_float64;
  30. typedef long long               l_int64;
  31. typedef unsigned long long      l_uint64;
  32. typedef int l_ok; /*!< return type 0 if OK, 1 on error */
  33.  
  34.  
  35. struct Pix;
  36. typedef struct Pix PIX;
  37. typedef enum lept_img_format {
  38.    IFF_UNKNOWN        = 0,
  39.    IFF_BMP            = 1,
  40.    IFF_JFIF_JPEG      = 2,
  41.    IFF_PNG            = 3,
  42.    IFF_TIFF           = 4,
  43.    IFF_TIFF_PACKBITS  = 5,
  44.    IFF_TIFF_RLE       = 6,
  45.    IFF_TIFF_G3        = 7,
  46.    IFF_TIFF_G4        = 8,
  47.    IFF_TIFF_LZW       = 9,
  48.    IFF_TIFF_ZIP       = 10,
  49.    IFF_PNM            = 11,
  50.    IFF_PS             = 12,
  51.    IFF_GIF            = 13,
  52.    IFF_JP2            = 14,
  53.    IFF_WEBP           = 15,
  54.    IFF_LPDF           = 16,
  55.    IFF_TIFF_JPEG      = 17,
  56.    IFF_DEFAULT        = 18,
  57.    IFF_SPIX           = 19
  58. };
  59.  
  60. typedef enum newsev {
  61.    L_SEVERITY_EXTERNAL = 0,   /* Get the severity from the environment   */
  62.    L_SEVERITY_ALL      = 1,   /* Lowest severity: print all messages     */
  63.    L_SEVERITY_DEBUG    = 2,   /* Print debugging and higher messages     */
  64.    L_SEVERITY_INFO     = 3,   /* Print informational and higher messages */
  65.    L_SEVERITY_WARNING  = 4,   /* Print warning and higher messages       */
  66.    L_SEVERITY_ERROR    = 5,   /* Print error and higher messages         */
  67.    L_SEVERITY_NONE     = 6    /* Highest severity: print no messages     */
  68. };
  69.  
  70. char * getLeptonicaVersion (  );
  71. PIX * pixRead ( const char *filename );
  72. PIX * pixCreate ( int width, int height, int depth );
  73. PIX * pixEndianByteSwapNew(PIX  *pixs);
  74. l_int32 pixSetData ( PIX *pix, l_uint32 *data );
  75. l_ok pixSetPixel ( PIX *pix, l_int32 x, l_int32 y, l_uint32 val );
  76. l_ok pixWrite ( const char *fname, PIX *pix, l_int32 format );
  77. l_int32 pixFindSkew ( PIX *pixs, l_float32 *pangle, l_float32 *pconf );
  78. PIX * pixDeskew ( PIX *pixs, l_int32 redsearch );
  79. void pixDestroy ( PIX **ppix );
  80. l_ok pixGetResolution ( const PIX *pix, l_int32 *pxres, l_int32 *pyres );
  81. l_ok pixSetResolution ( PIX *pix, l_int32 xres, l_int32 yres );
  82. l_int32 pixGetWidth ( const PIX *pix );
  83. l_int32 setMsgSeverity ( l_int32 newsev );
  84.  
  85. typedef struct TessBaseAPI TessBaseAPI;
  86. typedef struct ETEXT_DESC ETEXT_DESC;
  87. typedef struct TessPageIterator TessPageIterator;
  88. typedef struct TessResultIterator TessResultIterator;
  89. typedef int BOOL;
  90.  
  91. typedef enum TessOcrEngineMode  {
  92.    OEM_TESSERACT_ONLY          = 0,
  93.    OEM_LSTM_ONLY               = 1,
  94.    OEM_TESSERACT_LSTM_COMBINED = 2,
  95.    OEM_DEFAULT                 = 3} TessOcrEngineMode;
  96.  
  97. typedef enum TessPageSegMode {
  98.    PSM_OSD_ONLY               =  0,
  99.    PSM_AUTO_OSD               =  1,
  100.    PSM_AUTO_ONLY              =  2,
  101.    PSM_AUTO                   =  3,
  102.    PSM_SINGLE_COLUMN          =  4,
  103.    PSM_SINGLE_BLOCK_VERT_TEXT =  5,
  104.    PSM_SINGLE_BLOCK           =  6,
  105.    PSM_SINGLE_LINE            =  7,
  106.    PSM_SINGLE_WORD            =  8,
  107.    PSM_CIRCLE_WORD            =  9,
  108.    PSM_SINGLE_CHAR            = 10,
  109.    PSM_SPARSE_TEXT            = 11,
  110.    PSM_SPARSE_TEXT_OSD        = 12,
  111.    PSM_COUNT                  = 13} TessPageSegMode;
  112.  
  113. typedef enum TessPageIteratorLevel {
  114.     RIL_BLOCK    = 0,
  115.     RIL_PARA     = 1,
  116.     RIL_TEXTLINE = 2,
  117.     RIL_WORD     = 3,
  118.     RIL_SYMBOL    = 4} TessPageIteratorLevel;    
  119.  
  120. TessPageIterator* TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
  121. TessPageIterator* TessResultIteratorGetPageIterator(TessResultIterator* handle);
  122.  
  123. BOOL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level);
  124. BOOL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level,
  125.                                 int* left, int* top, int* right, int* bottom);
  126.  
  127. const char* TessVersion();
  128.  
  129. TessBaseAPI* TessBaseAPICreate();
  130. int    TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language);
  131. int    TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem);
  132. void   TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode);
  133. void   TessBaseAPISetImage(TessBaseAPI* handle,
  134.                           const unsigned char* imagedata, int width, int height,
  135.                           int bytes_per_pixel, int bytes_per_line);
  136. void   TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix);
  137.  
  138. BOOL   TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value);
  139. BOOL   TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, char** best_script_name,
  140.                                                            int* best_orientation_deg, float* script_confidence,
  141.                                                            float* orientation_confidence);
  142. int TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor);
  143. TessResultIterator* TessBaseAPIGetIterator(TessBaseAPI* handle);
  144. BOOL   TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level);
  145. char*  TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level);
  146. float  TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level);
  147. char*  TessBaseAPIGetUTF8Text(TessBaseAPI* handle);
  148. const char*  TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
  149.                                                              BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
  150.                                                              BOOL* is_smallcaps, int* pointsize, int* font_id);
  151. void   TessBaseAPIEnd(TessBaseAPI* handle);
  152. void   TessBaseAPIDelete(TessBaseAPI* handle);
  153. """
  154. )
  155.  
  156.  
  157. def get_abs_path_of_library(library):
  158.     """Get absolute path of library."""
  159.     abs_path = None
  160.     lib_name = find_library(library)
  161.     if os.path.exists(lib_name):
  162.         abs_path = os.path.abspath(lib_name)
  163.         return abs_path
  164.     libdl = ctypes.CDLL(lib_name)
  165.     if not libdl:
  166.         return abs_path  # None
  167.     try:
  168.         dlinfo = libdl.dlinfos
  169.     except AttributeError as err:
  170.         # Workaroung for linux
  171.         abs_path = str(err).split(":")[0]
  172.     return abs_path
  173.  
  174.  
  175. def pil2PIX32(im, leptonica):
  176.     """Convert PIL to leptonica PIX."""
  177.     # At the moment we handle everything as RGBA image
  178.     if im.mode != "RGBA":
  179.         im = im.convert("RGBA")
  180.     depth = 32
  181.     width, height = im.size
  182.     data = im.tobytes("raw", "RGBA")
  183.     pixs = leptonica.pixCreate(width, height, depth)
  184.     leptonica.pixSetData(pixs, ffi.from_buffer("l_uint32[]", data))
  185.  
  186.     try:
  187.         resolutionX = im.info["resolution"][0]
  188.         resolutionY = im.info["resolution"][1]
  189.         leptonica.pixSetResolution(pixs, resolutionX, resolutionY)
  190.     except KeyError:
  191.         pass
  192.     try:
  193.         resolutionX = im.info["dpi"][0]
  194.         resolutionY = im.info["dpi"][1]
  195.         leptonica.pixSetResolution(pixs, resolutionX, resolutionY)
  196.     except KeyError:
  197.         pass
  198.  
  199.     return leptonica.pixEndianByteSwapNew(pixs)
  200.  
  201.  
  202. def img_lepto_to_pil(pix):
  203.     """Convert leptonica pix to PIL
  204.       Source: https://stackoverflow.com/questions/55195932/typeerror-initializer-for-ctype-unsigned-int-must-be-a-cdata-pointer-not-b/57776268#57776268
  205.    """
  206.     cdata_ptr = ffi.new("l_uint8**")
  207.     size_ptr = ffi.new("size_t*")
  208.     leptonica.pixWriteMem(cdata_ptr, size_ptr, pix, IFF_TIFF)
  209.     cdata = cdata_ptr[0]
  210.     size = size_ptr[0]
  211.  
  212.     tiff_bytes = bytes(ffi.buffer(cdata, size))
  213.     with BytesIO(tiff_bytes) as bytesio:
  214.         pilimage = PIL.Image.open(bytesio).copy()
  215.         return pilimag
  216.  
  217.  
  218. def main():
  219.     """Main loop."""
  220.     # Settings
  221.     tess_libname = r"F:/win64_msvc_min/bin/tesseract50.dll"
  222.     lept_libname = r"F:/win64_msvc_min/bin/leptonica-1.81.0.dll"
  223.     filename = "line.ppm"
  224.     lang = "eng"
  225.  
  226.     tessdata = os.environ.get("TESSDATA_PREFIX")
  227.     if not tessdata:
  228.         # Use project tessdata
  229.         tessdata = os.path.join(os.getcwd(), "tessdata")
  230.         os.environ["TESSDATA_PREFIX"] = tessdata
  231.  
  232.     # Load libraries in ABI mode
  233.     if not os.path.dirname(tess_libname) in os.environ.get("PATH"):
  234.         os.environ["PATH"] = os.environ.get("PATH") + ';' + os.path.dirname(tess_libname)
  235.     if os.path.exists(tess_libname):
  236.         tesseract = ffi.dlopen(tess_libname)
  237.     else:
  238.         print(f"'{tess_libname}' does not exists!")
  239.     tesseract_version = ffi.string(tesseract.TessVersion())
  240.     print("Tesseract-ocr version", tesseract_version.decode("utf-8"))
  241.  
  242.     if os.path.exists(lept_libname):
  243.         leptonica = ffi.dlopen(lept_libname)
  244.     else:
  245.         print(f"'{lept_libname}' does not exists!")
  246.     leptonica_version = ffi.string(leptonica.getLeptonicaVersion())
  247.     print(leptonica_version.decode("utf-8"))
  248.     api = None
  249.  
  250.     # Read image to pix
  251.     im = Image.open(filename)
  252.     pix = pil2PIX32(im, leptonica)
  253.  
  254.     # Turn off leptonica warnings
  255.     leptonica.setMsgSeverity(leptonica.L_SEVERITY_EXTERNAL)
  256.  
  257.     # Create tesseract API
  258.     if api:
  259.         tesseract.TessBaseAPIEnd(api)
  260.         tesseract.TessBaseAPIDelete(api)
  261.     api = tesseract.TessBaseAPICreate()
  262.  
  263.     oem = tesseract.OEM_DEFAULT
  264.     tesseract.TessBaseAPIInit2(api, tessdata.encode(), lang.encode(), oem)
  265.     tesseract.TessBaseAPISetPageSegMode(api, tesseract.PSM_AUTO)
  266.  
  267.     tesseract.TessBaseAPISetImage2(api, pix)
  268.     # recognize is needed to get result iterator
  269.     tesseract.TessBaseAPIRecognize(api, ffi.NULL)
  270.  
  271.     utf8_text = ffi.string(tesseract.TessBaseAPIGetUTF8Text(api)).decode("utf-8")
  272.     print(utf8_text)
  273.  
  274.     # Delete api and pix
  275.     if api:
  276.         tesseract.TessBaseAPIEnd(api)
  277.         tesseract.TessBaseAPIDelete(api)
  278.  
  279.     result = ffi.new("PIX**")
  280.     result[0] = pix
  281.     leptonica.pixDestroy(result)
  282.     del pix
  283.     del result
  284.     api = None
  285.  
  286.  
  287. if __name__ == "__main__":
  288.     main()
  289.  
Add Comment
Please, Sign In to add comment