Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Tue Aug 17 12:40:23 2021
- @author: Sajid
- """
- from skimage.io import imread, imshow
- from skimage.filters import gaussian, threshold_otsu
- from skimage.feature import canny
- from skimage.transform import probabilistic_hough_line, rotate
- #testing
- import numpy as np
- import os
- import cv2
- import math
- import matplotlib.pyplot as plt
- import torch
- from torch import nn
- from torch import optim
- import torch.nn.functional as F
- from torchvision import datasets, transforms, models
- from collections import OrderedDict
- from PIL import Image
- import pandas as pd
- import seaborn as sns
- import math
- import cv2
- import numpy as np
- def process_image(image):
- ''' Scales, crops, and normalizes a PIL image for a PyTorch model
- '''
- img = Image.open(image)
- transformation = transforms.Compose([transforms.Resize([64,64]),
- #transforms.Grayscale(num_output_channels=1),
- transforms.ToTensor(),
- transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
- ])
- return transformation(img)
- def deskew(image):
- #threshold to get rid of extraneous noise
- thresh = threshold_otsu(image)
- normalize = image > thresh
- # gaussian blur
- blur = gaussian(normalize, 3)
- # canny edges in scikit-image
- edges = canny(blur)
- # hough lines
- hough_lines = probabilistic_hough_line(edges)
- # hough lines returns a list of points, in the form ((x1, y1), (x2, y2))
- # representing line segments. the first step is to calculate the slopes of
- # these lines from their paired point values
- slopes = [(y2 - y1)/(x2 - x1) if (x2-x1) else 0 for (x1,y1), (x2, y2) in hough_lines]
- # it just so happens that this slope is also y where y = tan(theta), the angle
- # in a circle by which the line is offset
- rad_angles = [np.arctan(x) for x in slopes]
- # and we change to degrees for the rotation
- deg_angles = [np.degrees(x) for x in rad_angles]
- # which of these degree values is most common?
- histo = np.histogram(deg_angles, bins=180)
- # correcting for 'sideways' alignments
- rotation_number = histo[1][np.argmax(histo[0])]
- if rotation_number > 45:
- rotation_number = -(90-rotation_number)
- elif rotation_number < -45:
- rotation_number = 90 - abs(rotation_number)
- return rotation_number
- def deskew2(img,angle):
- #load in grayscale:
- #invert the colors of our image:
- cv2.imshow('input',img)
- cv2.bitwise_not(img, img)
- #compute the minimum bounding box:
- non_zero_pixels = cv2.findNonZero(img)
- center, wh, theta = cv2.minAreaRect(non_zero_pixels)
- root_mat = cv2.getRotationMatrix2D(center, angle, 1)
- rows, cols = img.shape
- rotated = cv2.warpAffine(img, root_mat, (cols, rows), flags=cv2.INTER_CUBIC)
- #Border removing:
- sizex = np.int0(wh[0])+10
- sizey = np.int0(wh[1])+10
- print(theta)
- if theta > -45 :
- temp = sizex
- sizex= sizey
- sizey= temp
- return cv2.getRectSubPix(rotated, (sizey,sizex), center)
- def sortit(allImages):
- sz1=len(allImages)
- leaders=[]
- ans=[]
- tmp=[]
- rev=[-1]*10000
- for i in range(0,sz1):
- a,currImage=allImages[i];
- (x,y,w,h)=a
- ymin=y
- ymax=y+h
- sz2=len(leaders)
- got=0
- for j in range(0,sz2):
- b,leader=leaders[j]
- (xl,yl,wl,hl)=b
- ylmin=yl
- ylmax=yl+hl
- if (ymin<=ylmin and ymax>=ylmin) or (ymin<=ylmax and ymax>=ylmax):
- ans[rev[j]].append(allImages[i])
- got=1
- break
- if got==0:
- tmp=[]
- tmp.append(allImages[i])
- ans.append(tmp)
- leaders.append(allImages[i])
- rev[len(leaders)-1]=len(ans)-1
- return ans
- def wordSegment(img, kernelSize=25, sigma=11, theta=7, minArea=0):
- kernel = createKernel(kernelSize, sigma, theta)
- imgFiltered = cv2.filter2D(img, -1, kernel, borderType=cv2.BORDER_REPLICATE).astype(np.uint8)
- (_, imgThres) = cv2.threshold(imgFiltered, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
- imgThres = 255 - imgThres
- if cv2.__version__.startswith('3.'):
- (_, components, _) = cv2.findContours(imgThres, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- else:
- (components, _) = cv2.findContours(imgThres, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
- res = []
- it=0
- for c in components:
- if cv2.contourArea(c) < minArea:
- continue
- currBox = cv2.boundingRect(c) # returns (x, y, w, h)
- print(currBox)
- (x, y, w, h) = currBox
- currImg = img[y:y+h, x:x+w]
- res.append((currBox, currImg))
- it=it+1
- #cv2.imshow('Croping %d' %(it),currImg)
- return sortit(res)
- def createKernel(kernelSize, sigma, theta):
- assert kernelSize % 2
- halfSize = kernelSize // 2
- kernel = np.zeros([kernelSize, kernelSize])
- sigmaX = sigma
- sigmaY = sigma * theta
- for i in range(kernelSize):
- for j in range(kernelSize):
- x = i - halfSize
- y = j - halfSize
- expTerm = np.exp(-x**2 / (2 * sigmaX) - y**2 / (2 * sigmaY))
- xTerm = (x**2 - sigmaX**2) / (2 * math.pi * sigmaX**5 * sigmaY)
- yTerm = (y**2 - sigmaY**2) / (2 * math.pi * sigmaY**5 * sigmaX)
- kernel[i, j] = (xTerm + yTerm) * expTerm
- kernel = kernel / np.sum(kernel)
- return kernel
- img = cv2.imread(r"F:\Thesis Files\Bangla OCR Dataset\Dataset\Dataset\1\1_3.jpg",cv2.IMREAD_GRAYSCALE)
- img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
- cv2.imshow('binary image',img)
- angel = deskew(img)
- img = deskew2(img,angel)
- cv2.bitwise_not(img,img)
- cv2.imshow("Skew Corrected",img)
- img = cv2.fastNlMeansDenoising(img, img, 50.0, 7, 21)
- ho,wo=img.shape
- area=ho*wo
- ara=wordSegment(img,25,11,7,area/5000)
- ara.reverse()
- cv2.imshow('input image',img)
- sz=len(ara)
- for i in range(0,sz):
- ara[i]=sorted(ara[i], key=lambda entry:entry[0][0])
- for i in range(0,sz):
- #print(ara[i].shape)
- tmp=ara[i]
- sz2=len(tmp)
- for j in range(0,sz2):
- a,b=tmp[j]
- cv2.imshow('Crop %d%d' % (i,j), b)
- '''
- for i in range(0,sz):
- a,b=ara[i]
- cv2.imshow('Crop %d' %(i,),b)
- '''
- cv2.waitKey(0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement