Advertisement
Guest User

Tesseract.au3

a guest
Jun 15th, 2012
3,047
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
AutoIt 14.08 KB | None | 0 0
  1. #include-once
  2. #Include <Array.au3>
  3. #Include <File.au3>
  4. #include <GDIPlus.au3>
  5. #include <ScreenCapture.au3>
  6. #include <WinAPI.au3>
  7. #include <ScrollBarConstants.au3>
  8. #include <WindowsConstants.au3>
  9. #Include <GuiComboBox.au3>
  10. #Include <GuiListBox.au3>
  11. #Region Header
  12.  
  13. #EndRegion Header
  14. #Region Global Variables and Constants
  15. Global $last_capture
  16. Global $tesseract_temp_path = @TempDir & "\"
  17. ;Global $tesseract_temp_path = "c:\"
  18. Global $tesseract_Program_file = @ProgramFilesDir & "\tesseract-OCR\tesseract.exe"
  19. #EndRegion Global Variables and Constants
  20. #Region Core functions
  21. ; #FUNCTION# ;===============================================================================
  22. ;
  23. ; Name...........:  _TesseractTempPathSet()
  24. ; Description ...:  Sets the location where Tesseract functions temporary store their files.
  25. ;                       You must have read and write access to this location.
  26. ;                       The default location is "C:\".
  27. ; Syntax.........:  _TesseractTempPathSet($temp_path)
  28. ; Parameters ....:  $temp_path  - The path to use for temporary file storage.
  29. ;                                   This path must not contain any spaces (see "Remarks" below).
  30. ; Return values .:  On Success  - Returns 1.
  31. ;                   On Failure  - Returns 0.
  32. ; Author ........:  seangriffin
  33. ; Modified.......:
  34. ; Remarks .......:  The current version of Tesseract doesn't support paths with spaces.
  35. ; Related .......:
  36. ; Link ..........:
  37. ; Example .......:  No
  38. ;
  39. ; ;==========================================================================================
  40. func _TesseractTempPathSet($temp_path)
  41.  
  42.     $tesseract_temp_path = $temp_path
  43.    
  44.     Return 1
  45. EndFunc
  46.  
  47. ; #FUNCTION# ;===============================================================================
  48. ;
  49. ; Name...........:  _TesseractScreenCapture()
  50. ; Description ...:  Captures text from the screen.
  51. ; Syntax.........:  _TesseractScreenCapture($get_last_capture = 0, $delimiter = "", $cleanup = 1, $scale = 2, $left_indent = 0, $top_indent = 0, $right_indent = 0, $bottom_indent = 0, $show_capture = 0)
  52. ; Parameters ....:  $get_last_capture   - Retrieve the text of the last capture, rather than
  53. ;                                           performing another capture.  Useful if the text in
  54. ;                                           the window or control hasn't changed since the last capture.
  55. ;                                           0 = do not retrieve the last capture (default)
  56. ;                                           1 = retrieve the last capture
  57. ;                   $delimiter          - Optional: The string that delimits elements in the text.
  58. ;                                           A string of text will be returned if this isn't provided.
  59. ;                                           An array of delimited text will be returned if this is provided.
  60. ;                                           Eg. Use @CRLF to return the items of a listbox as an array.
  61. ;                   $cleanup            - Optional: Remove invalid text recognised
  62. ;                                           0 = do not remove invalid text
  63. ;                                           1 = remove invalid text (default)
  64. ;                   $scale              - Optional: The scaling factor of the screenshot prior to text recognition.
  65. ;                                           Increase this number to improve accuracy.
  66. ;                                           The default is 2.
  67. ;                   $left_indent        - A number of pixels to indent the capture from the
  68. ;                                           left of the screen.
  69. ;                   $top_indent         - A number of pixels to indent the capture from the
  70. ;                                           top of the screen.
  71. ;                   $right_indent       - A number of pixels to indent the capture from the
  72. ;                                           right of the screen.
  73. ;                   $bottom_indent      - A number of pixels to indent the capture from the
  74. ;                                           bottom of the screen.
  75. ;                   $show_capture       - Display screenshot and text captures
  76. ;                                           (for debugging purposes).
  77. ;                                           0 = do not display the screenshot taken (default)
  78. ;                                           1 = display the screenshot taken and exit
  79. ; Return values .:  On Success  - Returns an array of text that was captured.
  80. ;                   On Failure  - Returns an empty array.
  81. ; Author ........:  seangriffin
  82. ; Modified.......:
  83. ; Remarks .......:  Use the default values for first time use.  If the text recognition accuracy is low,
  84. ;                   I suggest setting $show_capture to 1 and rerunning.  If the screenshot of the
  85. ;                   window or control includes borders or erroneous pixels that may interfere with
  86. ;                   the text recognition process, then use $left_indent, $top_indent, $right_indent and
  87. ;                   $bottom_indent to adjust the portion of the screen being captured, to
  88. ;                   exclude these non-textural elements.
  89. ;                   If text accuracy is still low, increase the $scale parameter.  In general, the higher
  90. ;                   the scale the clearer the font and the more accurate the text recognition.
  91. ; Related .......:
  92. ; Link ..........:
  93. ; Example .......:  No
  94. ;
  95. ; ;==========================================================================================
  96. func _TesseractScreenCapture($get_last_capture = 0, $delimiter = "", $cleanup = 1, $scale = 2, $left_indent = 0, $top_indent = 0, $right_indent = 0, $bottom_indent = 0, $show_capture = 0)
  97.  
  98.     Local $tInfo
  99.     dim $aArray, $final_ocr[1], $xyPos_old = -1, $capture_scale = 3
  100.     Local $tSCROLLINFO = DllStructCreate($tagSCROLLINFO)
  101.     DllStructSetData($tSCROLLINFO, "cbSize", DllStructGetSize($tSCROLLINFO))
  102.     DllStructSetData($tSCROLLINFO, "fMask", $SIF_ALL)
  103.  
  104.     if $last_capture = "" Then
  105.  
  106.         $last_capture = ObjCreate("Scripting.Dictionary")
  107.     EndIf
  108.  
  109.     ; if last capture is requested, and one exists.
  110.     if $get_last_capture = 1 and $last_capture.item(0) <> "" Then
  111.        
  112.         return $last_capture.item(0)
  113.     EndIf
  114.  
  115.     $capture_filename = _TempFile($tesseract_temp_path, "~", ".tif")
  116.     $ocr_filename = StringLeft($capture_filename, StringLen($capture_filename) - 4)
  117.     $ocr_filename_and_ext = $ocr_filename & ".txt"
  118.  
  119.     CaptureToTIFF("", "", "", $capture_filename, $scale, $left_indent, $top_indent, $right_indent, $bottom_indent)
  120.    
  121.     ShellExecuteWait(@ProgramFilesDir & "\tesseract\tesseract.exe", $capture_filename & " " & $ocr_filename)
  122.  
  123.     ; If no delimter specified, then return a string
  124.     if StringCompare($delimiter, "") = 0 Then
  125.        
  126.         $final_ocr = FileRead($ocr_filename_and_ext)
  127.     Else
  128.    
  129.         _FileReadToArray($ocr_filename_and_ext, $aArray)
  130.         _ArrayDelete($aArray, 0)
  131.  
  132.         ; Append the recognised text to a final array
  133.         _ArrayConcatenate($final_ocr, $aArray)
  134.     EndIf
  135.  
  136.     ; If the captures are to be displayed
  137.     if $show_capture = 1 Then
  138.    
  139.         GUICreate("Tesseract Screen Capture.  Note: image displayed is not to scale", 640, 480, 0, 0, $WS_SIZEBOX + $WS_SYSMENU)  ; will create a dialog box that when displayed is centered
  140.  
  141.         GUISetBkColor(0xE0FFFF)
  142.  
  143.         $Obj1 = ObjCreate("Preview.Preview.1")  
  144.         $Obj1_ctrl = GUICtrlCreateObj($Obj1, 0, 0, 640, 480)
  145.         $Obj1.ShowFile($capture_filename, 1)
  146.  
  147.         GUISetState()
  148.  
  149.         if IsArray($final_ocr) Then
  150.        
  151.             _ArrayDisplay($aArray, "Tesseract Text Capture")
  152.         Else
  153.            
  154.             MsgBox(0, "Tesseract Text Capture", $final_ocr)
  155.         EndIf
  156.  
  157.         GUIDelete()
  158.     EndIf
  159.  
  160.     FileDelete($ocr_filename & ".*")
  161.  
  162.     ; Cleanup
  163.     if IsArray($final_ocr) And $cleanup = 1 Then
  164.  
  165.         ; Cleanup the items
  166.         for $final_ocr_num = 1 to (UBound($final_ocr)-1)
  167.  
  168.             ; Remove erroneous characters
  169.             $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], ".", "")
  170.             $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], "'", "")
  171.             $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], ",", "")
  172.             $final_ocr[$final_ocr_num] = StringStripWS($final_ocr[$final_ocr_num], 3)
  173.         Next
  174.  
  175.         ; Remove duplicate and blank items
  176.         for $each in $final_ocr
  177.        
  178.             $found_item = _ArrayFindAll($final_ocr, $each)
  179.            
  180.             ; Remove blank items
  181.             if IsArray($found_item) Then
  182.                 if StringCompare($final_ocr[$found_item[0]], "") = 0 Then
  183.                    
  184.                     _ArrayDelete($final_ocr, $found_item[0])
  185.                 EndIf
  186.             EndIf
  187.  
  188.             ; Remove duplicate items
  189.             for $found_item_num = 2 to UBound($found_item)
  190.                
  191.                 _ArrayDelete($final_ocr, $found_item[$found_item_num-1])
  192.             Next
  193.         Next
  194.     EndIf
  195.  
  196.     ; Store a copy of the capture
  197.     if $last_capture.item(0) = "" Then
  198.            
  199.         $last_capture.item(0) = $final_ocr
  200.     EndIf
  201.  
  202.     Return $final_ocr
  203. EndFunc
  204.  
  205. ;; #FUNCTION# ;===============================================================================
  206. ;
  207. ; Name...........:  CaptureToTIFF()
  208. ; Description ...:  Captures an image of the screen, a window or a control, and saves it to a TIFF file.
  209. ; Syntax.........:  CaptureToTIFF($win_title = "", $win_text = "", $ctrl_id = "", $sOutImage = "", $scale = 1, $left_indent = 0, $top_indent = 0, $right_indent = 0, $bottom_indent = 0)
  210. ; Parameters ....:  $win_title      - The title of the window to capture an image of.
  211. ;                   $win_text       - Optional: The text of the window to capture an image of.
  212. ;                   $ctrl_id        - Optional: The ID of the control to capture an image of.
  213. ;                                       An image of the window will be returned if one isn't provided.
  214. ;                   $sOutImage      - The filename to store the image in.
  215. ;                   $scale          - Optional: The scaling factor of the capture.
  216. ;                   $left_indent    - A number of pixels to indent the screen capture from the
  217. ;                                       left of the window or control.
  218. ;                   $top_indent     - A number of pixels to indent the screen capture from the
  219. ;                                       top of the window or control.
  220. ;                   $right_indent   - A number of pixels to indent the screen capture from the
  221. ;                                       right of the window or control.
  222. ;                   $bottom_indent  - A number of pixels to indent the screen capture from the
  223. ;                                       bottom of the window or control.
  224. ; Return values .:  None
  225. ; Author ........:  seangriffin
  226. ; Modified.......:
  227. ; Remarks .......:  
  228. ; Related .......:
  229. ; Link ..........:
  230. ; Example .......:  No
  231. ;
  232. ; ;==========================================================================================
  233. Func CaptureToTIFF($win_title = "", $win_text = "", $ctrl_id = "", $sOutImage = "", $scale = 1, $left_indent = 0, $top_indent = 0, $right_indent = 0, $bottom_indent = 0)
  234.  
  235.     Local $hWnd, $hwnd2, $hDC, $hBMP, $hImage1, $hGraphic, $CLSID, $tParams, $pParams, $tData, $i = 0, $hImage2, $pos[4], $tar_leftx, $tar_lefty, $tar_rightx, $tar_righty, $winsize[4]
  236.     Local $Ext = StringUpper(StringMid($sOutImage, StringInStr($sOutImage, ".", 0, -1) + 1))
  237.     Local $giTIFColorDepth = 24
  238.     Local $giTIFCompression = $GDIP_EVTCOMPRESSIONNONE
  239.  
  240.     ; If capturing a control
  241.     if StringCompare($ctrl_id, "") <> 0 Then
  242.  
  243.         $hwnd2 = ControlGetHandle($win_title, $win_text, $ctrl_id)
  244.         $pos = ControlGetPos($win_title, $win_text, $ctrl_id)
  245.     Else
  246.        
  247.         ; If capturing a window
  248.         if StringCompare($win_title, "") <> 0 Then
  249.  
  250.             $hwnd2 = WinGetHandle($win_title, $win_text)
  251.             $pos = WinGetPos($win_title, $win_text)
  252.         Else
  253.            
  254.             ; If capturing the desktop
  255.             $hwnd2 = ""
  256.             $pos[0] = 0
  257.             $pos[1] = 0
  258.             $pos[2] = @DesktopWidth
  259.             $pos[3] = @DesktopHeight
  260.         EndIf
  261.     EndIf
  262.    
  263.  
  264.    
  265.    
  266.     ; Capture an image of the window / control
  267.     if IsHWnd($hwnd2) Then
  268.    
  269.         WinActivate($win_title, $win_text)
  270.         ;added to calculate missing variables from function call needed to control the screen shot ProcessClose
  271.         $winsize = WinGetPos ( $win_title, $win_text )
  272.         $tar_leftx = $left_indent
  273.         $tar_lefty = $top_indent
  274.         $tar_rightx = $winsize[2] - $right_indent
  275.         $tar_righty = $winsize[3] - $bottom_indent
  276.         $hBitmap2 = _ScreenCapture_CaptureWnd("", $hwnd2, $tar_leftx, $tar_lefty, $tar_rightx, $tar_righty, False)
  277.     Else
  278.         ;added to calculate missing variables from function call needed to control the screen shot ProcessClose
  279.         $winsize = $pos
  280.         $tar_leftx = $left_indent
  281.         $tar_lefty = $top_indent
  282.         $tar_rightx = $winsize[2] - $right_indent
  283.         $tar_righty = $winsize[3] - $bottom_indent
  284.         $hBitmap2 = _ScreenCapture_Capture("", $tar_leftx, $tar_lefty, $tar_rightx, $tar_righty, False)
  285.     EndIf
  286.     ;old version of if statement - correction to function
  287.     ;if IsHWnd($hwnd2) Then
  288.     ;
  289.     ;   WinActivate($win_title, $win_text)
  290.     ;   $hBitmap2 = _ScreenCapture_CaptureWnd("", $hwnd2, 0, 0, -1, -1, False)
  291.     ;Else
  292.     ;  
  293.     ;   $hBitmap2 = _ScreenCapture_Capture("", 0, 0, -1, -1, False)
  294.     ;EndIf
  295.  
  296.     _GDIPlus_Startup ()
  297.    
  298.     ; Convert the image to a bitmap
  299.     $hImage2 = _GDIPlus_BitmapCreateFromHBITMAP ($hBitmap2)
  300.  
  301.     $hWnd = _WinAPI_GetDesktopWindow()
  302.     $hDC = _WinAPI_GetDC($hWnd)
  303.     ;Old version of this function call
  304.     ;$hBMP = _WinAPI_CreateCompatibleBitmap($hDC, ($pos[2] * $scale) - ($right_indent * $scale), ($pos[3] * $scale) - ($bottom_indent * $scale))
  305.     $hBMP = _WinAPI_CreateCompatibleBitmap($hDC, ($tar_rightx - $tar_leftx) * $scale, ($tar_righty - $tar_lefty) * $scale)
  306.  
  307.     _WinAPI_ReleaseDC($hWnd, $hDC)
  308.     $hImage1 = _GDIPlus_BitmapCreateFromHBITMAP ($hBMP)
  309.     $hGraphic = _GDIPlus_ImageGetGraphicsContext($hImage1)
  310.     ;Modified from orginal to support corrected screen captures
  311.     ;_GDIPLus_GraphicsDrawImageRect($hGraphic, $hImage2, 0 - ($left_indent * $scale), 0 - ($top_indent * $scale), ($pos[2] * $scale) + $left_indent, ($pos[3] * $scale) + $top_indent)
  312.     _GDIPLus_GraphicsDrawImageRect($hGraphic, $hImage2, 0, 0, ($tar_rightx - $tar_leftx) * $scale, ($tar_righty - $tar_lefty) * $scale)
  313.     $CLSID = _GDIPlus_EncodersGetCLSID($Ext)
  314.  
  315.     ; Set TIFF parameters
  316.     $tParams = _GDIPlus_ParamInit(2)
  317.     $tData = DllStructCreate("int ColorDepth;int Compression")
  318.     DllStructSetData($tData, "ColorDepth", $giTIFColorDepth)
  319.     DllStructSetData($tData, "Compression", $giTIFCompression)
  320.     _GDIPlus_ParamAdd($tParams, $GDIP_EPGCOLORDEPTH, 1, $GDIP_EPTLONG, DllStructGetPtr($tData, "ColorDepth"))
  321.     _GDIPlus_ParamAdd($tParams, $GDIP_EPGCOMPRESSION, 1, $GDIP_EPTLONG, DllStructGetPtr($tData, "Compression"))
  322.     If IsDllStruct($tParams) Then $pParams = DllStructGetPtr($tParams)
  323.  
  324.     ; Save TIFF and cleanup
  325.     _GDIPlus_ImageSaveToFileEx($hImage1, $sOutImage, $CLSID, $pParams)
  326.     _GDIPlus_ImageDispose($hImage1)
  327.     _GDIPlus_ImageDispose($hImage2)
  328.     _GDIPlus_GraphicsDispose ($hGraphic)
  329.     _WinAPI_DeleteObject($hBMP)
  330.     _WinAPI_DeleteObject ($hBitmap2)
  331.     _GDIPlus_Shutdown()
  332. EndFunc
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement