Advertisement
Guest User

Untitled

a guest
Feb 22nd, 2019
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.78 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "metadata": {
  5. "ExecuteTime": {
  6. "end_time": "2019-02-20T09:27:15.757201Z",
  7. "start_time": "2019-02-20T09:27:15.753197Z"
  8. }
  9. },
  10. "cell_type": "markdown",
  11. "source": "# Tesseract Basic Example"
  12. },
  13. {
  14. "metadata": {
  15. "ExecuteTime": {
  16. "end_time": "2019-02-20T09:10:45.913853Z",
  17. "start_time": "2019-02-20T09:10:42.459611Z"
  18. },
  19. "trusted": true
  20. },
  21. "cell_type": "code",
  22. "source": "try:\n from PIL import Image\nexcept ImportError:\n import Image\nimport pytesseract\n\n# If you don't have tesseract executable in your PATH, include the following:\npytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract'\n# Example tesseract_cmd = r'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract'\n\n# Simple image to string\nprint(pytesseract.image_to_string(Image.open('test.png')))\n\n# # French text image to string\n# print(pytesseract.image_to_string(Image.open('test-european.jpg'), lang='fra'))\n\n# Get bounding box estimates\nprint(pytesseract.image_to_boxes(Image.open('test.png')))\n\n# Get verbose data including boxes, confidences, line and page numbers\nprint(pytesseract.image_to_data(Image.open('test.png')))\n\n# Get information about orientation and script detection\n# print(pytesseract.image_to_osd(Image.open('test.png')))\n\n# In order to bypass the internal image conversions, just use relative or absolute image path\n# NOTE: If you don't use supported images, tesseract will return error\nprint(pytesseract.image_to_string('test.png'))\n\n# get a searchable PDF\n# pdf = pytesseract.image_to_pdf_or_hocr('test.png', extension='pdf')\n\n# get HOCR output\n# hocr = pytesseract.image_to_pdf_or_hocr('test.png', extension='hocr')",
  23. "execution_count": 4,
  24. "outputs": [
  25. {
  26. "name": "stdout",
  27. "output_type": "stream",
  28. "text": "QGphJT\nQ 2 35 59 107 0\nG 68 52 113 120 0\np 131 34 168 100 0\nh 191 49 237 120 0\nJ 254 49 270 112 0\nT 270 49 316 112 0\nlevel\tpage_num\tblock_num\tpar_num\tline_num\tword_num\tleft\ttop\twidth\theight\tconf\ttext\n1\t1\t0\t0\t0\t0\t0\t0\t316\t159\t-1\t\n2\t1\t1\t0\t0\t0\t2\t39\t314\t86\t-1\t\n3\t1\t1\t1\t0\t0\t2\t39\t314\t86\t-1\t\n4\t1\t1\t1\t1\t0\t2\t39\t314\t86\t-1\t\n5\t1\t1\t1\t1\t1\t2\t39\t314\t86\t21\tQGphJT\nQGphJ\nHello\n"
  29. }
  30. ]
  31. },
  32. {
  33. "metadata": {
  34. "ExecuteTime": {
  35. "end_time": "2019-02-20T11:04:51.259163Z",
  36. "start_time": "2019-02-20T11:04:51.255161Z"
  37. }
  38. },
  39. "cell_type": "markdown",
  40. "source": "# Python Example 1"
  41. },
  42. {
  43. "metadata": {
  44. "trusted": true
  45. },
  46. "cell_type": "code",
  47. "source": "",
  48. "execution_count": null,
  49. "outputs": []
  50. }
  51. ],
  52. "metadata": {
  53. "gist": {
  54. "id": "",
  55. "data": {
  56. "description": "MyNotebook.ipynb",
  57. "public": true
  58. }
  59. },
  60. "kernelspec": {
  61. "name": "python3",
  62. "display_name": "Python 3",
  63. "language": "python"
  64. },
  65. "language_info": {
  66. "name": "python",
  67. "version": "3.6.8",
  68. "mimetype": "text/x-python",
  69. "codemirror_mode": {
  70. "name": "ipython",
  71. "version": 3
  72. },
  73. "pygments_lexer": "ipython3",
  74. "nbconvert_exporter": "python",
  75. "file_extension": ".py"
  76. },
  77. "notify_time": "5",
  78. "toc": {
  79. "nav_menu": {
  80. "height": "114px",
  81. "width": "276px"
  82. },
  83. "number_sections": true,
  84. "sideBar": true,
  85. "skip_h1_title": false,
  86. "base_numbering": "1",
  87. "title_cell": "Tesseract Basic Example",
  88. "title_sidebar": "Python Example",
  89. "toc_cell": false,
  90. "toc_position": {
  91. "height": "calc(100% - 180px)",
  92. "left": "10px",
  93. "top": "150px",
  94. "width": "209.188px"
  95. },
  96. "toc_section_display": true,
  97. "toc_window_display": true
  98. },
  99. "varInspector": {
  100. "window_display": false,
  101. "cols": {
  102. "lenName": 16,
  103. "lenType": 16,
  104. "lenVar": 40
  105. },
  106. "kernels_config": {
  107. "python": {
  108. "library": "var_list.py",
  109. "delete_cmd_prefix": "del ",
  110. "delete_cmd_postfix": "",
  111. "varRefreshCmd": "print(var_dic_list())"
  112. },
  113. "r": {
  114. "library": "var_list.r",
  115. "delete_cmd_prefix": "rm(",
  116. "delete_cmd_postfix": ") ",
  117. "varRefreshCmd": "cat(var_dic_list()) "
  118. }
  119. },
  120. "types_to_exclude": [
  121. "module",
  122. "function",
  123. "builtin_function_or_method",
  124. "instance",
  125. "_Feature"
  126. ]
  127. },
  128. "widgets": {
  129. "application/vnd.jupyter.widget-state+json": {
  130. "state": {},
  131. "version_major": 2,
  132. "version_minor": 0
  133. }
  134. }
  135. },
  136. "nbformat": 4,
  137. "nbformat_minor": 2
  138. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement