combined3

# -*- coding: utf-8 -*-
"""
Created on Mon Jan 18 19:17:12 2021

@author: Sajid
"""

# -*- coding: utf-8 -*-
"""
Created on Mon Jan 18 18:33:48 2021

@author: Sajid

"""
from skimage.io import imread, imshow
from skimage.filters import gaussian, threshold_otsu
from skimage.feature import canny
from skimage.transform import probabilistic_hough_line, rotate

#testing
import numpy as np
import os
import cv2
import math
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models


from collections import OrderedDict
from PIL import Image

import pandas as pd
import seaborn as sns


def process_image(image):
    ''' Scales, crops, and normalizes a PIL image for a PyTorch model

    '''

    img = Image.open(image)
    transformation = transforms.Compose([transforms.Resize([64,64]),
                                      #transforms.Grayscale(num_output_channels=1),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      ])
    return transformation(img)


def deskew(image):


    #threshold to get rid of extraneous noise
    thresh = threshold_otsu(image)
    normalize = image > thresh

    # gaussian blur
    blur = gaussian(normalize, 3)

    # canny edges in scikit-image
    edges = canny(blur)

    # hough lines
    hough_lines = probabilistic_hough_line(edges)

    # hough lines returns a list of points, in the form ((x1, y1), (x2, y2))
    # representing line segments. the first step is to calculate the slopes of
    # these lines from their paired point values
    slopes = [(y2 - y1)/(x2 - x1) if (x2-x1) else 0 for (x1,y1), (x2, y2) in hough_lines]

    # it just so happens that this slope is also y where y = tan(theta), the angle
    # in a circle by which the line is offset
    rad_angles = [np.arctan(x) for x in slopes]

    # and we change to degrees for the rotation
    deg_angles = [np.degrees(x) for x in rad_angles]

    # which of these degree values is most common?
    histo = np.histogram(deg_angles, bins=180)

    # correcting for 'sideways' alignments
    rotation_number = histo[1][np.argmax(histo[0])]

    if rotation_number > 45:
        rotation_number = -(90-rotation_number)
    elif rotation_number < -45:
        rotation_number = 90 - abs(rotation_number)

    return rotation_number


def deskew2(img,angle):

    #load in grayscale:


    #invert the colors of our image:
    cv2.imshow('input',img)
    cv2.bitwise_not(img, img)

    #compute the minimum bounding box:
    non_zero_pixels = cv2.findNonZero(img)
    center, wh, theta = cv2.minAreaRect(non_zero_pixels)

    root_mat = cv2.getRotationMatrix2D(center, angle, 1)
    rows, cols = img.shape
    rotated = cv2.warpAffine(img, root_mat, (cols, rows), flags=cv2.INTER_CUBIC)


    #Border removing:
    sizex = np.int0(wh[0])+10
    sizey = np.int0(wh[1])+10
    print(theta)
    if theta > -45 :
        temp = sizex
        sizex= sizey
        sizey= temp
    return cv2.getRectSubPix(rotated, (sizey,sizex), center)

#def breakword(img):
def detectlines(img):
    kernel = np.zeros((5,5),np.uint8)
    img = cv2.erode(img,kernel,iterations = 1)
    cv2.imshow('thinned',img)
    cv2.imwrite("noyse.jpg",img)
    thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # use morphology erode to blur horizontally
    ho,wo=img.shape
    mark=0;
    b=[]
    for i in range(0,ho):
        cnt=0
        for j in range(0,wo):
            #print(img[i][j])
            if img[i][j] != 255:
                cnt=cnt+1;#black

        #print(cnt)
        if(cnt<50):
            if mark==0:
                mark=1
                b.append(i)
        else:
            if mark==1:
                mark=0

    print(b)

    ara = []
    lng=len(b)
    for i in range(0,lng):
        if i==lng-1:
            continue
        c=b[i]
        d=b[i+1]
        #cv2.rectangle(img,(c,0),(d,wo),(0,0,255),2)
        #print(c)
        #print(d)
        #print(wo)
        #print("yo")
        #box = cv2.boundingRect(img)
        #x,y,w,h = box
        cv2.rectangle(img, (0,c), (wo,d), (0, 0, 255), 2)
        cur = img[c:d,0:wo].copy()
        ara.append(cur)


    # write result to disk
    print(len(ara))
    sz=len(ara)


    return ara


img = cv2.imread(r"D:\Bangla OCR Dataset\Dataset\Dataset\1\1_3.jpg",cv2.IMREAD_GRAYSCALE)
cv2.imshow('input image',img)

print(img.shape)
img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
cv2.imshow('binary image',img)

angel = deskew(img)
img = deskew2(img,angel)
cv2.bitwise_not(img,img)
cv2.imshow("Skew Corrected",img)

img = cv2.fastNlMeansDenoising(img, img, 50.0, 7, 21)
cv2.imshow('noiseless image1',img)
#img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
#cv2.imshow('noiseless image2',img)

# threshold the grayscale image
ara=detectlines(img)
sz=len(ara)
for i in range(0,sz):
    #print(ara[i].shape)
    cv2.imshow('Crop %d' % (i,), ara[i])
cv2.waitKey(0)