Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Mon Jan 18 19:17:12 2021
- @author: Sajid
- """
- # -*- coding: utf-8 -*-
- """
- Created on Mon Jan 18 18:33:48 2021
- @author: Sajid
- """
- from skimage.io import imread, imshow
- from skimage.filters import gaussian, threshold_otsu
- from skimage.feature import canny
- from skimage.transform import probabilistic_hough_line, rotate
- #testing
- import numpy as np
- import os
- import cv2
- import math
- import matplotlib.pyplot as plt
- import torch
- from torch import nn
- from torch import optim
- import torch.nn.functional as F
- from torchvision import datasets, transforms, models
- from collections import OrderedDict
- from PIL import Image
- import pandas as pd
- import seaborn as sns
- # define the CNN architecture
- class Net(nn.Module):
- ### TODO: choose an architecture, and complete the class
- def __init__(self):
- super(Net, self).__init__()
- # convolutional layer (sees 64x64x3 image tensor)
- self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
- # convolutional layer (sees 32x32x16 tensor)
- self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
- # convolutional layer (sees 16x16x32 tensor)
- self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
- # convolutional layer (sees 8x8x64 tensor)
- self.conv4 = nn.Conv2d(64, 128, 3, padding=1)
- self.conv5 = nn.Conv2d(128, 256, 3, padding=1)
- # max pooling layer
- self.pool = nn.MaxPool2d(2, 2)
- # linear layer (256 * 2 * 2 -> 512)
- self.fc1 = nn.Linear(256 * 2 * 2 , 2048)
- # linear layer (512 -> 50)
- self.fc2 = nn.Linear(2048,512)
- # dropout layer (p=0.2)
- self.dropout = nn.Dropout(0.2)
- self.fc3 = nn.Linear(512,50)
- #self.softmax = nn.Softmax(dim=1)
- def forward(self, x):
- # add sequence of convolutional and max pooling layers
- x = self.pool(F.relu(self.conv1(x)))
- x = self.pool(F.relu(self.conv2(x)))
- x = self.pool(F.relu(self.conv3(x)))
- x = self.pool(F.relu(self.conv4(x)))
- x = self.pool(F.relu(self.conv5(x)))
- # flatten image input
- x = x.view(-1, 256*2*2)
- # add dropout layer
- x = self.dropout(x)
- # add 1st hidden layer, with relu activation function
- x = F.relu(self.fc1(x))
- # add dropout layer
- x = self.dropout(x)
- # add 2nd hidden layer, with relu activation function
- x = self.fc2(x)
- x = self.dropout(x)
- x = self.fc3(x)
- return x
- train_on_gpu = torch.cuda.is_available()
- '''
- if not train_on_gpu:
- print('CUDA is not available. Training on CPU ...')
- else:
- print('CUDA is available! Training on GPU ...')
- '''
- classes=['অ','আ','ই', 'ঈ', 'উ','ঊ','ঋ','এ','ঐ', 'ও' , 'ঔ','ক', 'খ', 'গ', 'ঘ', 'ঙ', 'চ', 'ছ', 'জ', 'ঝ', 'ঞ', 'ট',
- 'ঠ', 'ড', 'ঢ', 'ণ', 'ত', 'থ', 'দ', 'ধ', 'ন', 'প', 'ফ', 'ব', 'ভ', 'ম', 'য', 'র', 'ল', 'শ' , 'ষ', 'স' , 'হ' ,
- 'ড়','ঢ়','য়','ৎ','৹', ':', '৺']
- model_scratch = Net()
- model_scratch.load_state_dict(torch.load('model_scratch.pt' , map_location=torch.device('cpu')))
- def process_image(image):
- ''' Scales, crops, and normalizes a PIL image for a PyTorch model
- '''
- img = Image.open(image)
- transformation = transforms.Compose([transforms.Resize([64,64]),
- #transforms.Grayscale(num_output_channels=1),
- transforms.ToTensor(),
- transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
- ])
- return transformation(img)
- def predict(image_path, model):
- ''' Predict the class (or classes) of an image using a trained deep learning model.
- '''
- model.to('cpu')
- image = process_image(image_path)
- image = image.unsqueeze_(0)
- model.eval()
- with torch.no_grad():
- output = model.forward(image)
- #probabilities = torch.exp(output)
- #topk_probabilities, topk_labels = probabilities.topk(topk)
- #return([topk_probabilities, topk_labels]
- # convert output probabilities to predicted class
- _, preds_tensor = torch.max(output, 1)
- preds = np.squeeze(preds_tensor.numpy()) if not train_on_gpu else np.squeeze(preds_tensor.cpu().numpy())
- return(classes[preds])
- def deskew(image):
- #threshold to get rid of extraneous noise
- thresh = threshold_otsu(image)
- normalize = image > thresh
- # gaussian blur
- blur = gaussian(normalize, 3)
- # canny edges in scikit-image
- edges = canny(blur)
- # hough lines
- hough_lines = probabilistic_hough_line(edges)
- # hough lines returns a list of points, in the form ((x1, y1), (x2, y2))
- # representing line segments. the first step is to calculate the slopes of
- # these lines from their paired point values
- slopes = [(y2 - y1)/(x2 - x1) if (x2-x1) else 0 for (x1,y1), (x2, y2) in hough_lines]
- # it just so happens that this slope is also y where y = tan(theta), the angle
- # in a circle by which the line is offset
- rad_angles = [np.arctan(x) for x in slopes]
- # and we change to degrees for the rotation
- deg_angles = [np.degrees(x) for x in rad_angles]
- # which of these degree values is most common?
- histo = np.histogram(deg_angles, bins=180)
- # correcting for 'sideways' alignments
- rotation_number = histo[1][np.argmax(histo[0])]
- if rotation_number > 45:
- rotation_number = -(90-rotation_number)
- elif rotation_number < -45:
- rotation_number = 90 - abs(rotation_number)
- return rotation_number
- def deskew2(img,angle):
- #load in grayscale:
- #invert the colors of our image:
- cv2.imshow('input',img)
- cv2.bitwise_not(img, img)
- #compute the minimum bounding box:
- non_zero_pixels = cv2.findNonZero(img)
- center, wh, theta = cv2.minAreaRect(non_zero_pixels)
- root_mat = cv2.getRotationMatrix2D(center, angle, 1)
- rows, cols = img.shape
- rotated = cv2.warpAffine(img, root_mat, (cols, rows), flags=cv2.INTER_CUBIC)
- #Border removing:
- sizex = np.int0(wh[0])+10
- sizey = np.int0(wh[1])+10
- print(theta)
- if theta > -45 :
- temp = sizex
- sizex= sizey
- sizey= temp
- return cv2.getRectSubPix(rotated, (sizey,sizex), center)
- #def breakword(img):
- def detectlines(img):
- kernel = np.zeros((5,5),np.uint8)
- img = cv2.erode(img,kernel,iterations = 1)
- cv2.imshow('thinned',img)
- cv2.imwrite("noyse.jpg",img)
- thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
- # use morphology erode to blur horizontally
- ho,wo=img.shape
- ho=math.floor(ho/5)
- wo=math.floor(ho/10)
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ho,wo))
- morph = cv2.morphologyEx(thresh, cv2.MORPH_DILATE, kernel)
- # use morphology open to remove thin lines from dotted lines
- #kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 17))
- #morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
- # find contours
- cntrs = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
- print('yo')
- print(len(cntrs))
- # Draw contours excluding the topmost box
- result = img.copy()
- ara = []
- lng=len(cntrs)
- for i in range(0,lng):
- c=cntrs[i]
- box = cv2.boundingRect(c)
- x,y,w,h = box
- cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 2)
- cur = img[y:y+h,x:x+w].copy()
- ara.append(cur)
- '''
- if len(cntrs)==1:
- ara.append(cur)
- else:
- ara1 = detectlines(cur)
- for i in ara1:
- ara.append(i)
- '''
- # write result to disk
- print(len(ara))
- sz=len(ara)
- cv2.imwrite("text_above_lines_threshold.png", thresh)
- cv2.imwrite("text_above_lines_morph.png", morph)
- cv2.imwrite("text_above_lines_lines.jpg", result)
- #cv2.imshow("GRAY", gray)
- cv2.imshow("THRESH", thresh)
- cv2.imshow("MORPH", morph)
- cv2.imshow("RESULT", result)
- return ara
- img = cv2.imread(r"D:\Bangla OCR Dataset\Dataset\Dataset\1\1_3.jpg",cv2.IMREAD_GRAYSCALE)
- cv2.imshow('input image',img)
- print(img.shape)
- img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
- cv2.imshow('binary image',img)
- angel = deskew(img)
- img = deskew2(img,angel)
- cv2.bitwise_not(img,img)
- cv2.imshow("Skew Corrected",img)
- img = cv2.fastNlMeansDenoising(img, img, 50.0, 7, 21)
- cv2.imshow('noiseless image1',img)
- #img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
- #cv2.imshow('noiseless image2',img)
- # threshold the grayscale image
- ara=detectlines(img)
- sz=len(ara)
- print('yoyo')
- print(sz)
- sliced = []
- for i in range(0,sz):
- cv2.imshow('Crop %d' % (i,), ara[sz-i-1])
- cv2.imwrite("Crp%d.png" % (i,), ara[sz-i-1])
- img2=ara[sz-i-1]
- thresh2 = cv2.threshold(img2, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
- print(img2.shape)
- he,we=img2.shape
- he =math.ceil(he/5.0)
- we = math.floor(we/10.0)
- print("ksdljjjjj")
- print(ara[sz-i-1].shape)
- # use morphology erode to blur horizontally
- kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (he,we))
- morph2 = cv2.morphologyEx(thresh2, cv2.MORPH_DILATE, kernel2)
- # use morphology open to remove thin lines from dotted lines
- #kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 17))
- #morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
- # find contours
- cntrs2 = cv2.findContours(morph2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- cntrs2 = cntrs2[0] if len(cntrs2) == 2 else cntrs2[1]
- # Draw contours excluding the topmost box
- result2 = img2.copy()
- temp=[]
- for c2 in cntrs2:
- box2 = cv2.boundingRect(c2)
- x,y,w,h = box2
- cv2.rectangle(result2, (x, y), (x+w, y+h), (0, 0, 255), 2)
- temp.append(img2[y:y+h,x:x+w].copy())
- cv2.imwrite("temporary%d.png" % (i), result2)
- sliced.append(temp)
- sz2=len(temp)
- print(sz2)
- for i in range(0,sz):
- sz2=len(sliced[sz-i-1])
- for j in range(0,sz2):
- sliced[sz-i-1][sz2-j-1]=cv2.inRange(sliced[sz-i-1][sz2-j-1],221,255)
- sliced[sz-i-1][sz2-j-1]=cv2.merge([sliced[sz-i-1][sz2-j-1],sliced[sz-i-1][sz2-j-1],sliced[sz-i-1][sz2-j-1]])
- cv2.imshow('sliced6%d%d' % (sz-i-1,j), sliced[sz-i-1][sz2-j-1])
- cv2.imwrite("sli9%d%d.png" % (sz-i-1,j), sliced[sz-i-1][sz2-j-1])
- print('yeah')
- print(sliced[sz-i-1][sz2-j-1].shape)
- #breakword(sliced[sz-i-1][sz2-j-1])
- #print(predict('C:/Users/Sajid/bcc000010.bmp',model_scratch))
- cv2.waitKey(0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement