Wordbreaking11

# -*- coding: utf-8 -*-
"""
Created on Sun Oct 10 21:55:15 2021

@author: Sajid
"""

from skimage.io import imread, imshow
from skimage.filters import gaussian, threshold_otsu
from skimage.feature import canny
from skimage.transform import probabilistic_hough_line, rotate

#testing
import numpy as np
import os
import cv2
import math
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models


from collections import OrderedDict
from PIL import Image

import pandas as pd
import seaborn as sns

import math
import cv2
import numpy as np


# define the CNN architecture
class Net(nn.Module):
    ### TODO: choose an architecture, and complete the class

    def __init__(self):
        super(Net, self).__init__()

        # convolutional layer (sees 64x64x3 image tensor)
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        # convolutional layer (sees 32x32x16 tensor)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        # convolutional layer (sees 16x16x32 tensor)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        # convolutional layer (sees 8x8x64 tensor)
        self.conv4 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv5 = nn.Conv2d(128, 256, 3, padding=1)

        # max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        # linear layer (256 * 2 * 2 -> 512)
        self.fc1 = nn.Linear(256 * 2 * 2 , 2048)
        # linear layer (512 -> 50)
        self.fc2 = nn.Linear(2048,512)
        # dropout layer (p=0.2)
        self.dropout = nn.Dropout(0.2)
        self.fc3 = nn.Linear(512,50)
        #self.softmax = nn.Softmax(dim=1)


    def forward(self, x):
        # add sequence of convolutional and max pooling layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = self.pool(F.relu(self.conv5(x)))
        # flatten image input
        x = x.view(-1, 256*2*2)

        # add dropout layer
        x = self.dropout(x)
        # add 1st hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        # add dropout layer
        x = self.dropout(x)
        # add 2nd hidden layer, with relu activation function
        x = self.fc2(x)
        x = self.dropout(x)
        x = self.fc3(x)
        return x
train_on_gpu = torch.cuda.is_available()


classes=['অ','আ','ই', 'ঈ', 'উ','ঊ','ঋ','এ','ঐ', 'ও' ,   'ঔ','ক', 'খ',   'গ',    'ঘ',    'ঙ',    'চ',    'ছ',    'জ',    'ঝ',    'ঞ',    'ট',
    'ঠ',    'ড',    'ঢ',    'ণ',    'ত',    'থ',    'দ',    'ধ',    'ন',        'প',    'ফ',    'ব',    'ভ',    'ম',    'য',    'র',        'ল',                'শ' ,   'ষ',    'স' ,   'হ' ,
    'ড়','ঢ়','য়','ৎ','৹', ':', '৺']

model_scratch = Net()
model_scratch.load_state_dict(torch.load('model_scratch.pt' , map_location=torch.device('cpu')))

def process_image(image):
    ''' Scales, crops, and normalizes a PIL image for a PyTorch model

    '''

    #img = Image.open(image)
    img=image
    img=cv2.merge([img,img,img])
    img = transforms.ToPILImage()(img)
    transformation = transforms.Compose([transforms.Resize([64,64]),
                                      #transforms.Grayscale(num_output_channels=1),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      ])
    return transformation(img)
def predict(image_path, model):

    model.to('cpu')
    image = process_image(image_path)
    image = image.unsqueeze_(0)

    model.eval()
    with torch.no_grad():
        output = model.forward(image)

    #probabilities = torch.exp(output)

    #topk_probabilities, topk_labels = probabilities.topk(topk)
    #return([topk_probabilities, topk_labels]
    # convert output probabilities to predicted class
    _, preds_tensor = torch.max(output, 1)
    preds = np.squeeze(preds_tensor.numpy()) if not train_on_gpu else np.squeeze(preds_tensor.cpu().numpy())
    return(classes[preds])


img = cv2.imread(r"F:\Thesis Files\Bangla OCR Dataset\Dataset\Dataset\1\Words\1_2\1_2_2_1.JPG",cv2.IMREAD_GRAYSCALE)
cv2.imshow('input',img)
he,we=img.shape
vec=[]
vag=we/10
vag=math.floor(vag)
for i in range(0,10):
    cur=(i+1)*vag
    currImg = img[0:he, 0:cur]
    vec.append(currImg)
for i in range(0,10):
    cv2.imshow('Crop %d' % i,vec[i])
    print(predict(vec[i],model_scratch))

cv2.waitKey(0)