Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pylab as pl
- import numpy as np
- from PIL import Image, ImageEnhance, ImageColor
- from scipy import ndimage, misc
- import os, time, subprocess, sys
- import cv2.cv as cv
- import cv2
- import math
- pd = 'C:/Users/UserBob/Desktop/RANDOM/delete_this/test_photos'
- for image in os.listdir(pd):
- fp = os.path.join(pd,image)
- # print('Opening: '+str(fp))
- try:
- if os.path.isfile(fp):
- o_img = Image.open(fp).convert('L')
- img = Image.new("RGBA",o_img.size)
- img.paste(o_img)
- size = img.size[0]/3,img.size[1]/3
- img.thumbnail(size, Image.ANTIALIAS)
- contrast = ImageEnhance.Contrast(img)
- img = contrast.enhance(4) #20
- brightness = ImageEnhance.Brightness(img)
- img = brightness.enhance(1.5) #2
- sharp = ImageEnhance.Sharpness(img)
- img = sharp.enhance(2)
- tmp = 'tmp.JPEG'
- img.save(tmp)
- img = pl.imread(tmp)[:, :, 0].astype(np.uint8)
- #img = ndimage.gaussian_filter(img, 2)
- img2 = ndimage.binary_erosion(img, iterations=30) #40
- img3 = ndimage.binary_dilation(img2, iterations=30) #40
- labels, n = ndimage.label(img3)
- counts = np.bincount(labels.ravel())
- counts[0] = 0
- img4 = labels==np.argmax(counts)
- img5 = ndimage.binary_fill_holes(img4)
- result = ~img & img5
- result = ndimage.binary_erosion(result, iterations=3) #3
- result = ndimage.binary_dilation(result, iterations=3) #3
- result = -result
- misc.imsave(tmp,result)
- p = subprocess.Popen('tesseract '+str(tmp)+' out -psm 6 ' , stdout=subprocess.PIPE)
- with open('out.txt','rb') as f:
- text = f.readlines()
- print(len(text),str(text))
- except Exception, e:
- import traceback
- print(traceback.format_exc())
- print(e)
- continue
- print('Done')
- #result = cv2.imdecode(np.frombuffer(result, np.uint8), 1)
- #to binary
- #result = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
- #(thresh, result) = cv2.threshold(result, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
- # (conts, _) = cv2.findContours(result, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
- #cv2.imwrite(tmp,conts)
- #cv2.imshow('result',result)
- #blur
- # kernel = np.ones((30,30),np.float32)/180
- # result = cv2.filter2D(result,-1,kernel)
- # skew detection
- # result = cv2.imread(tmp,0)
- # edges = cv2.Canny(result, 50, 150)
- # cv2.imwrite(tmp,result)
- # minLineLength = 30
- # maxLineGap = 1
- # lines = cv2.HoughLinesP(result,1,np.pi/2,2,minLineLength,maxLineGap)
- # angle = 0
- # try:
- # print(edges)
- # print(lines)
- # for x1,y1,x2,y2 in lines[0]:
- # print(y2,y1,x2,x1,math.atan2(y2 - y1, x2 - x1))
- # angle += math.atan2(y2 - y1, x2 - x1)
- # cv2.line(result,(x1,y1),(x2,y2),(0,255,0),2)
- # cv2.imwrite(tmp,result)
- # print(angle)
- # time.sleep(2)
- # except Exception, e:
- # print(e)
- # pass
- # buffer=tesseract.pixRead(tmp)
- # #buffer = open(tmp,'rb').read()
- # h,w = result.shape
- # chan = 1
- # checkImg = cv.CreateImageHeader((w,h), cv.IPL_DEPTH_8U, chan)
- # cv.SetData(checkImg, result.tostring(), result.dtype.itemsize * chan * (w))
- # tesseract.SetCvImage(result, api)
- # text = api.GetUTF8Text()
- # p = subprocess.Popen('tesseract '+str(tmp)+' out -psm 6 ' , stdout=subprocess.PIPE)
- # with open('out.txt','rb') as f:
- # text = f.readlines()
- # print(len(text),str(text))
- # except Exception, e:
- # import traceback
- # print(traceback.format_exc())
- # print(e)
- # continue
- # print('Done')
Advertisement
Add Comment
Please, Sign In to add comment