Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "metadata": {
- "ExecuteTime": {
- "end_time": "2019-02-20T09:27:15.757201Z",
- "start_time": "2019-02-20T09:27:15.753197Z"
- }
- },
- "cell_type": "markdown",
- "source": "# Tesseract Basic Example"
- },
- {
- "metadata": {
- "ExecuteTime": {
- "end_time": "2019-02-20T09:10:45.913853Z",
- "start_time": "2019-02-20T09:10:42.459611Z"
- },
- "trusted": true
- },
- "cell_type": "code",
- "source": "try:\n from PIL import Image\nexcept ImportError:\n import Image\nimport pytesseract\n\n# If you don't have tesseract executable in your PATH, include the following:\npytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract'\n# Example tesseract_cmd = r'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract'\n\n# Simple image to string\nprint(pytesseract.image_to_string(Image.open('test.png')))\n\n# # French text image to string\n# print(pytesseract.image_to_string(Image.open('test-european.jpg'), lang='fra'))\n\n# Get bounding box estimates\nprint(pytesseract.image_to_boxes(Image.open('test.png')))\n\n# Get verbose data including boxes, confidences, line and page numbers\nprint(pytesseract.image_to_data(Image.open('test.png')))\n\n# Get information about orientation and script detection\n# print(pytesseract.image_to_osd(Image.open('test.png')))\n\n# In order to bypass the internal image conversions, just use relative or absolute image path\n# NOTE: If you don't use supported images, tesseract will return error\nprint(pytesseract.image_to_string('test.png'))\n\n# get a searchable PDF\n# pdf = pytesseract.image_to_pdf_or_hocr('test.png', extension='pdf')\n\n# get HOCR output\n# hocr = pytesseract.image_to_pdf_or_hocr('test.png', extension='hocr')",
- "execution_count": 4,
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": "QGphJT\nQ 2 35 59 107 0\nG 68 52 113 120 0\np 131 34 168 100 0\nh 191 49 237 120 0\nJ 254 49 270 112 0\nT 270 49 316 112 0\nlevel\tpage_num\tblock_num\tpar_num\tline_num\tword_num\tleft\ttop\twidth\theight\tconf\ttext\n1\t1\t0\t0\t0\t0\t0\t0\t316\t159\t-1\t\n2\t1\t1\t0\t0\t0\t2\t39\t314\t86\t-1\t\n3\t1\t1\t1\t0\t0\t2\t39\t314\t86\t-1\t\n4\t1\t1\t1\t1\t0\t2\t39\t314\t86\t-1\t\n5\t1\t1\t1\t1\t1\t2\t39\t314\t86\t21\tQGphJT\nQGphJ\nHello\n"
- }
- ]
- },
- {
- "metadata": {
- "ExecuteTime": {
- "end_time": "2019-02-20T11:04:51.259163Z",
- "start_time": "2019-02-20T11:04:51.255161Z"
- }
- },
- "cell_type": "markdown",
- "source": "# Python Example 1"
- },
- {
- "metadata": {
- "trusted": true
- },
- "cell_type": "code",
- "source": "",
- "execution_count": null,
- "outputs": []
- }
- ],
- "metadata": {
- "gist": {
- "id": "",
- "data": {
- "description": "MyNotebook.ipynb",
- "public": true
- }
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3",
- "language": "python"
- },
- "language_info": {
- "name": "python",
- "version": "3.6.8",
- "mimetype": "text/x-python",
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "pygments_lexer": "ipython3",
- "nbconvert_exporter": "python",
- "file_extension": ".py"
- },
- "notify_time": "5",
- "toc": {
- "nav_menu": {
- "height": "114px",
- "width": "276px"
- },
- "number_sections": true,
- "sideBar": true,
- "skip_h1_title": false,
- "base_numbering": "1",
- "title_cell": "Tesseract Basic Example",
- "title_sidebar": "Python Example",
- "toc_cell": false,
- "toc_position": {
- "height": "calc(100% - 180px)",
- "left": "10px",
- "top": "150px",
- "width": "209.188px"
- },
- "toc_section_display": true,
- "toc_window_display": true
- },
- "varInspector": {
- "window_display": false,
- "cols": {
- "lenName": 16,
- "lenType": 16,
- "lenVar": 40
- },
- "kernels_config": {
- "python": {
- "library": "var_list.py",
- "delete_cmd_prefix": "del ",
- "delete_cmd_postfix": "",
- "varRefreshCmd": "print(var_dic_list())"
- },
- "r": {
- "library": "var_list.r",
- "delete_cmd_prefix": "rm(",
- "delete_cmd_postfix": ") ",
- "varRefreshCmd": "cat(var_dic_list()) "
- }
- },
- "types_to_exclude": [
- "module",
- "function",
- "builtin_function_or_method",
- "instance",
- "_Feature"
- ]
- },
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "state": {},
- "version_major": 2,
- "version_minor": 0
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement