Venkata C - Coursera Project

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import zipfile

from PIL import Image
import pytesseract
import cv2 as cv
import numpy as np

# loading the face detection classifier
face_cascade = cv.CascadeClassifier('readonly/haarcascade_frontalface_default.xml')
from PIL import ImageDraw, ImageFont
import os

def img2txt(image):
    grayimg = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    tempfl = "temp{}.png".format(os.getpid())
    cv.imwrite(tempfl, grayimg)
    img = Image.open(tempfl)
    ret = pytesseract.image_to_string(img)
    os.remove(tempfl)
    return ret


def img2faces(image):
    grayimg = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(
        grayimg,
        scaleFactor=1.3,
        minNeighbors=5,
        minSize=(40, 40),
        flags = cv.CASCADE_SCALE_IMAGE
    )
    return faces

class ImgEntry:
    def __init__(self, txt, faces, img):
        self.txt = txt
        self.faces = faces
        self.img = img

def proczip(zipfl):
    ret = dict()
    with zipfile.ZipFile(zipfl, "r") as z:
        for name in z.namelist():
            print("Processing " + name)
            filedata = z.read(name)
            img = cv.imdecode(np.frombuffer(filedata, np.uint8), 1)
            ret[name] = ImgEntry(img2txt(img), img2faces(img), img)
    return ret

def lookup(imgdir, srchtxt):
    ret = []
    for (fl, imgent) in imgdir.items():
        if srchtxt in imgent.txt:
            ret.append((fl, imgent.img, imgent.faces))
    return ret


def draw_text(txt, x, y, img, d, f):
    d.text((x,y), txt, font=f, fill="black")


def getface(img, face):
    x, y, w, h = face
    faceimg = img[y:y+h, x:x+w, :]
    tempfl = "temp{}.png".format(os.getpid())
    try:
        os.remove(tempfl)
    except:
        pass
    cv.imwrite(tempfl, faceimg)
    return Image.open(tempfl)

def showlst(lst):
    txth = 35
    imgwidth = 200
    imgheight = 200
    offs = 20

    per_row = 5

    sheet_width = per_row * imgwidth
    sheet_height = 0
    if len(lst) == 0:
        sheet_height += txth

    for (fl, img, faces) in lst:
        if len(faces) == 0:
            sheet_height += txth
        else:
            sheet_height += (int(len(faces) / per_row) + 1) * imgheight
            sheet_height += 2 * offs
        sheet_height += txth
    contact_sheet=Image.new("RGB", (sheet_width,sheet_height))

    d = ImageDraw.Draw(contact_sheet)
    d.rectangle(((0, 0), (sheet_width, sheet_height)), fill="white")

    f = ImageFont.truetype("readonly/fanwood-webfont.ttf", size=30)

    x = 0
    y = 0
    if len(lst) == 0:
        draw_text("No results were found in any of the files.", x, y, contact_sheet, d, f)
        return contact_sheet

    for (fl, img, faces) in lst:
        draw_text("Results found in file " + fl, x, y, contact_sheet, d, f)
        y += txth
        if len(faces) == 0:
            draw_text("But there were no faces in that file.", x, y, contact_sheet, d, f)
            x = 0
            y += txth
        else:
            imgs_on_this_row = 0
            y += offs
            for face in faces:
                if imgs_on_this_row == 0:
                    d.rectangle(((x, y), (x+sheet_width, y+imgheight)), fill="black")
                face = getface(img, face)
                face.thumbnail((imgwidth, imgheight))
                contact_sheet.paste(face, (x, y))
                imgs_on_this_row += 1
                x += imgwidth
                if imgs_on_this_row == per_row:
                    imgs_on_this_row = 0
                    x = 0
                    y = y + imgheight
            if imgs_on_this_row != 0:
                x = 0
                y = y + imgheight
            y += offs

    new_sheet = Image.new("RGB", (sheet_width + 2 * offs,y))
    d = ImageDraw.Draw(new_sheet)
    d.rectangle(((0, 0), (sheet_width + 2 * offs, y)), fill="white")
    cropped_sheet = contact_sheet.crop((0,0,sheet_width,y))
    new_sheet.paste(cropped_sheet, (offs,0))

    return new_sheet


# In[2]:


imgs = proczip("readonly/images.zip")


# In[3]:


smimgs = proczip("readonly/small_img.zip")


# In[4]:


Mark = showlst(lookup(imgs, 'Mark'))
display(Mark)


# In[5]:


Chris = showlst(lookup(smimgs, 'Chris'))
display(Chris)


# In[6]:


display(showlst(lookup(imgs, "wontfind")))


# In[ ]:


# Download as html skips the output of the very last step in the notebook, for some reason.
# So, put in a dummy last step.