Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import argparse
- import numpy as np
- import pytesseract
- import cv2
- from PIL import Image
- filename = "image_processed.png"
- def ProcessImage():
- # load the example image and convert it to grayscale
- image_path = "image_raw.png"
- image = cv2.imread(image_path)
- oryginal = cv2.imread(image_path)
- # convert to grayscale
- image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
- # get image shape
- height, width = image.shape
- # bitwise not to make negative
- # image = cv2.bitwise_not(image)
- thresh = True
- blur = True
- # check to see if we should apply thresholding to preprocess the image
- if thresh:
- image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
- # make a check to see if median blurring should be done to remove noise
- if blur:
- image = cv2.medianBlur(image, 3)
- image = cv2.GaussianBlur(image, (3, 3), 0)
- # write the grayscale image to disk as a temporary file so we can apply OCR to it
- cv2.imwrite(filename, image)
- # ichl1=int(round(height*(.0)))
- # ichu1=int(round(height*(.5)))
- # icwl1=int(round(width*(.1770)))
- # icwu1=int(round(width*(.3125)))
- # img_crop2 = image[width:, :int(height/2)]
- # img_crop3 = image[:int(width/2), int(height/2):]
- # img_crop4 = image[width:, int(height/2):]
- cropList = [
- image[:int(height*0.33), :],
- image[int(height*0.33):int(height*0.66), :],
- image[int(height*0.66):, :]
- ]
- image_part = image[50:150, 20:120]
- s = image_part.tostring()
- # cv2.imwrite("part.png", image_part)
- # print(s)
- out = open("output.txt", "w")
- for line in image_part:
- s = ' '.join(str(x) for x in line)
- out.write(s + "\n")
- # cropList = [
- # img_crop1 = image[:width/2, :height/2],
- # img_crop2 = image[width:, :height/2],
- # img_crop3 = image[:width/2, height/2:],
- # img_crop4 = image[width:, height/2:]
- # ]
- # imgList = []
- # i = 0
- # for img in cropList:
- # filen = "{}.png".format(i)
- # i += 1
- # cv2.imwrite(filen, img)
- pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-OCR/tesseract'
- text = pytesseract.image_to_string(Image.open(filename))
- print("'" + text + "'")
- # for img in cropList:
- # text = pytesseract.image_to_string(img)
- # print("'" + text + "'")
- def Extract():
- pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-OCR/tesseract'
- text = pytesseract.image_to_string(Image.open(filename))
- return text
- # if (not os.path.exists(filename)):
- # ProcessImage()
- ProcessImage()
- # t = Extract()
- # print(t)
- # word = ""
- # for l in text:
- # if l.isspace():
- # print(word)
- # word = ""
- # else:
- # word += l
- # print (word)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement