Untitled

import os
import argparse
import numpy as np
import pytesseract
import cv2
from PIL import Image

filename = "image_processed.png"

def ProcessImage():
	# load the example image and convert it to grayscale
	image_path = "image_raw.png"
	image = cv2.imread(image_path)
	oryginal = cv2.imread(image_path)
	# convert to grayscale
	image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# get image shape
	height, width = image.shape
	# bitwise not to make negative
	# image = cv2.bitwise_not(image)

	thresh = True
	blur = True
	# check to see if we should apply thresholding to preprocess the image
	if thresh:
		image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
	# make a check to see if median blurring should be done to remove noise
	if blur:
		image = cv2.medianBlur(image, 3)
		image = cv2.GaussianBlur(image, (3, 3), 0)

	# write the grayscale image to disk as a temporary file so we can apply OCR to it
	cv2.imwrite(filename, image)

	# ichl1=int(round(height*(.0)))
	# ichu1=int(round(height*(.5)))
	# icwl1=int(round(width*(.1770)))
	# icwu1=int(round(width*(.3125)))

	# img_crop2 = image[width:, :int(height/2)]
	# img_crop3 = image[:int(width/2), int(height/2):]
	# img_crop4 = image[width:, int(height/2):]

	cropList = [
		image[:int(height*0.33), :],
		image[int(height*0.33):int(height*0.66), :],
		image[int(height*0.66):, :]
	]

	image_part = image[50:150, 20:120]
	s = image_part.tostring()
	# cv2.imwrite("part.png", image_part)
	# print(s)
	out = open("output.txt", "w")
	for line in image_part:
		s = ' '.join(str(x) for x in line)
		out.write(s + "\n")

	# cropList = [
	# 	img_crop1 = image[:width/2, :height/2],
	# 	img_crop2 = image[width:, :height/2],
	# 	img_crop3 = image[:width/2, height/2:],
	# 	img_crop4 = image[width:, height/2:]
	# ]

	# imgList = []

	# i = 0
	# for img in cropList:
	# 	filen = "{}.png".format(i)
	# 	i += 1
	# 	cv2.imwrite(filen, img)
	pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-OCR/tesseract'
	text = pytesseract.image_to_string(Image.open(filename))
	print("'" + text + "'")
	# for img in cropList:
	# 	text = pytesseract.image_to_string(img)
	# 	print("'" + text + "'")

def Extract():
	pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-OCR/tesseract'
	text = pytesseract.image_to_string(Image.open(filename))
	return text

# if (not os.path.exists(filename)):
# 	ProcessImage()

ProcessImage()


# t = Extract()
# print(t)


# word = ""
# for l in text:
# 	if l.isspace():
# 		print(word)
# 		word = ""
# 	else:
# 		word += l
# print (word)