Guest User

Untitled

a guest
Jan 23rd, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.17 KB | None | 0 0
  1. string path = @"C:picmytext.jpg";
  2. Bitmap image = new Bitmap(path);
  3. Tesseract ocr = new Tesseract();
  4. ocr.SetVariable("tessedit_char_whitelist", "0123456789"); // If digit only
  5. ocr.Init(@"C:tessdata", "eng", false); // To use correct tessdata
  6. List<tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty);
  7. foreach (tessnet2.Word word in result)
  8. Console.WriteLine("{0} : {1}", word.Confidence, word.Text);
  9.  
  10. public static string GetText(Bitmap imgsource)
  11. {
  12. var ocrtext = string.Empty;
  13. using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
  14. {
  15. using (var img = PixConverter.ToPix(imgsource))
  16. {
  17. using (var page = engine.Process(img))
  18. {
  19. ocrtext = page.GetText();
  20. }
  21. }
  22. }
  23.  
  24. return ocrtext;
  25. }
  26.  
  27. // Sleep or Pause the Thread for 1 sec, if service is running too fast...
  28. Thread.Sleep(millisecondsTimeout: 1000);
  29. Guid tempGuid = ToSeqGuid();
  30. string newFileName = tempGuid.ToString().Split('-')[0];
  31. string outputFileName = appPath + "\pdf2png\" + fileNameithoutExtension + "-" + newFileName +
  32. ".png";
  33. extractor.SaveCurrentImageToFile(outputFileName, ImageFormat.Png);
  34. // Create text file here using Tesseract
  35. foreach (var file in Directory.GetFiles(appPath + "\pdf2png"))
  36. {
  37. try
  38. {
  39. var pngFileName = Path.GetFileNameWithoutExtension(file);
  40. string[] myArguments =
  41. {
  42. "/C tesseract ", file,
  43. " " + appPath + "\png2text\" + pngFileName
  44. }; // /C for closing process automatically whent completes
  45. string strParam = String.Join(" ", myArguments);
  46.  
  47. var myCmdProcess = new Process();
  48. var theProcess = new ProcessStartInfo("cmd.exe", strParam)
  49. {
  50. CreateNoWindow = true,
  51. UseShellExecute = false,
  52. RedirectStandardOutput = true,
  53. RedirectStandardError = true,
  54. WindowStyle = ProcessWindowStyle.Minimized
  55. }; // Keep the cmd.exe window minimized
  56. myCmdProcess.StartInfo = theProcess;
  57. myCmdProcess.Exited += myCmdProcess_Exited;
  58. myCmdProcess.Start();
  59.  
  60. //if (process)
  61. {
  62. /*
  63. MessageBox.Show("cmd.exe process started: " + Environment.NewLine +
  64. "Process Name: " + myCmdProcess.ProcessName +
  65. Environment.NewLine + " Process Id: " + myCmdProcess.Id
  66. + Environment.NewLine + "process.Handle: " +
  67. myCmdProcess.Handle);
  68. */
  69. Process.EnterDebugMode();
  70. //ShowWindow(hWnd: process.Handle, nCmdShow: 2);
  71. /*
  72. MessageBox.Show("After EnterDebugMode() cmd.exe process Exited: " +
  73. Environment.NewLine +
  74. "Process Name: " + myCmdProcess.ProcessName +
  75. Environment.NewLine + " Process Id: " + myCmdProcess.Id
  76. + Environment.NewLine + "process.Handle: " +
  77. myCmdProcess.Handle);
  78. */
  79. myCmdProcess.WaitForExit(60000);
  80. /*
  81. MessageBox.Show("After WaitForExit() cmd.exe process Exited: " +
  82. Environment.NewLine +
  83. "Process Name: " + myCmdProcess.ProcessName +
  84. Environment.NewLine + " Process Id: " + myCmdProcess.Id
  85. + Environment.NewLine + "process.Handle: " +
  86. myCmdProcess.Handle);
  87. */
  88. myCmdProcess.Refresh();
  89. Process.LeaveDebugMode();
  90. //myCmdProcess.Dispose();
  91. /*
  92. MessageBox.Show("After LeaveDebugMode() cmd.exe process Exited: " +
  93. Environment.NewLine);
  94. */
  95. }
  96.  
  97.  
  98. //process.Kill();
  99. // Waits for the process to complete task and exites automatically
  100. Thread.Sleep(millisecondsTimeout: 1000);
  101.  
  102. // This works fine in Windows 7 Environment, and not in Windows 8
  103. // Try following code block
  104. // Check, if process is not comletey exited
  105.  
  106. if (!myCmdProcess.HasExited)
  107. {
  108. //process.WaitForExit(2000); // Try to wait for exit 2 more seconds
  109. /*
  110. MessageBox.Show(" Process of cmd.exe was exited by WaitForExit(); Method " +
  111. Environment.NewLine);
  112. */
  113. try
  114. {
  115. // If not, then Kill the process
  116. myCmdProcess.Kill();
  117. //myCmdProcess.Dispose();
  118. //if (!myCmdProcess.HasExited)
  119. //{
  120. // myCmdProcess.Kill();
  121. //}
  122.  
  123. MessageBox.Show(" Process of cmd.exe exited ( Killed ) successfully " +
  124. Environment.NewLine);
  125. }
  126. catch (System.ComponentModel.Win32Exception ex)
  127. {
  128. MessageBox.Show(
  129. " Exception: System.ComponentModel.Win32Exception " +
  130. ex.ErrorCode + Environment.NewLine);
  131. }
  132. catch (NotSupportedException notSupporEx)
  133. {
  134. MessageBox.Show(" Exception: NotSupportedException " +
  135. notSupporEx.Message +
  136. Environment.NewLine);
  137. }
  138. catch (InvalidOperationException invalidOperation)
  139. {
  140. MessageBox.Show(
  141. " Exception: InvalidOperationException " +
  142. invalidOperation.Message + Environment.NewLine);
  143. foreach (
  144. var textFile in Directory.GetFiles(appPath + "\png2text", "*.txt",
  145. SearchOption.AllDirectories))
  146. {
  147. loggingInfo += textFile +
  148. " In Reading Text from generated text file by Tesseract " +
  149. Environment.NewLine;
  150. strBldr.Append(File.ReadAllText(textFile));
  151. }
  152. // Delete text file after reading text here
  153. Directory.GetFiles(appPath + "\pdf2png").ToList().ForEach(File.Delete);
  154. Directory.GetFiles(appPath + "\png2text").ToList().ForEach(File.Delete);
  155. }
  156. }
  157. }
  158. catch (Exception exception)
  159. {
  160. MessageBox.Show(
  161. " Cought Exception in Generating image do{...}while{...} function " +
  162. Environment.NewLine + exception.Message + Environment.NewLine);
  163. }
  164. }
  165. // Delete png image here
  166. Directory.GetFiles(appPath + "\pdf2png").ToList().ForEach(File.Delete);
  167. Thread.Sleep(millisecondsTimeout: 1000);
  168. // Read text from text file here
  169. foreach (var textFile in Directory.GetFiles(appPath + "\png2text", "*.txt",
  170. SearchOption.AllDirectories))
  171. {
  172. loggingInfo += textFile +
  173. " In Reading Text from generated text file by Tesseract " +
  174. Environment.NewLine;
  175. strBldr.Append(File.ReadAllText(textFile));
  176. }
  177. // Delete text file after reading text here
  178. Directory.GetFiles(appPath + "\png2text").ToList().ForEach(File.Delete);
  179. } while (extractor.GetNextImage()); // Advance image enumeration...
Add Comment
Please, Sign In to add comment