Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # coding: utf-8
- # In[1]:
- import zipfile
- from PIL import Image
- import pytesseract
- import cv2 as cv
- import numpy as np
- # loading the face detection classifier
- face_cascade = cv.CascadeClassifier('readonly/haarcascade_frontalface_default.xml')
- from PIL import ImageDraw, ImageFont
- import os
- def img2txt(image):
- grayimg = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
- tempfl = "temp{}.png".format(os.getpid())
- cv.imwrite(tempfl, grayimg)
- img = Image.open(tempfl)
- ret = pytesseract.image_to_string(img)
- os.remove(tempfl)
- return ret
- def img2faces(image):
- grayimg = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
- faces = face_cascade.detectMultiScale(
- grayimg,
- scaleFactor=1.3,
- minNeighbors=5,
- minSize=(40, 40),
- flags = cv.CASCADE_SCALE_IMAGE
- )
- return faces
- class ImgEntry:
- def __init__(self, txt, faces, img):
- self.txt = txt
- self.faces = faces
- self.img = img
- def proczip(zipfl):
- ret = dict()
- with zipfile.ZipFile(zipfl, "r") as z:
- for name in z.namelist():
- print("Processing " + name)
- filedata = z.read(name)
- img = cv.imdecode(np.frombuffer(filedata, np.uint8), 1)
- ret[name] = ImgEntry(img2txt(img), img2faces(img), img)
- return ret
- def lookup(imgdir, srchtxt):
- ret = []
- for (fl, imgent) in imgdir.items():
- if srchtxt in imgent.txt:
- ret.append((fl, imgent.img, imgent.faces))
- return ret
- def draw_text(txt, x, y, img, d, f):
- d.text((x,y), txt, font=f, fill="black")
- def getface(img, face):
- x, y, w, h = face
- faceimg = img[y:y+h, x:x+w, :]
- tempfl = "temp{}.png".format(os.getpid())
- try:
- os.remove(tempfl)
- except:
- pass
- cv.imwrite(tempfl, faceimg)
- return Image.open(tempfl)
- def showlst(lst):
- txth = 35
- imgwidth = 200
- imgheight = 200
- offs = 20
- per_row = 5
- sheet_width = per_row * imgwidth
- sheet_height = 0
- if len(lst) == 0:
- sheet_height += txth
- for (fl, img, faces) in lst:
- if len(faces) == 0:
- sheet_height += txth
- else:
- sheet_height += (int(len(faces) / per_row) + 1) * imgheight
- sheet_height += 2 * offs
- sheet_height += txth
- contact_sheet=Image.new("RGB", (sheet_width,sheet_height))
- d = ImageDraw.Draw(contact_sheet)
- d.rectangle(((0, 0), (sheet_width, sheet_height)), fill="white")
- f = ImageFont.truetype("readonly/fanwood-webfont.ttf", size=30)
- x = 0
- y = 0
- if len(lst) == 0:
- draw_text("No results were found in any of the files.", x, y, contact_sheet, d, f)
- return contact_sheet
- for (fl, img, faces) in lst:
- draw_text("Results found in file " + fl, x, y, contact_sheet, d, f)
- y += txth
- if len(faces) == 0:
- draw_text("But there were no faces in that file.", x, y, contact_sheet, d, f)
- x = 0
- y += txth
- else:
- imgs_on_this_row = 0
- y += offs
- for face in faces:
- if imgs_on_this_row == 0:
- d.rectangle(((x, y), (x+sheet_width, y+imgheight)), fill="black")
- face = getface(img, face)
- face.thumbnail((imgwidth, imgheight))
- contact_sheet.paste(face, (x, y))
- imgs_on_this_row += 1
- x += imgwidth
- if imgs_on_this_row == per_row:
- imgs_on_this_row = 0
- x = 0
- y = y + imgheight
- if imgs_on_this_row != 0:
- x = 0
- y = y + imgheight
- y += offs
- new_sheet = Image.new("RGB", (sheet_width + 2 * offs,y))
- d = ImageDraw.Draw(new_sheet)
- d.rectangle(((0, 0), (sheet_width + 2 * offs, y)), fill="white")
- cropped_sheet = contact_sheet.crop((0,0,sheet_width,y))
- new_sheet.paste(cropped_sheet, (offs,0))
- return new_sheet
- # In[2]:
- imgs = proczip("readonly/images.zip")
- # In[3]:
- smimgs = proczip("readonly/small_img.zip")
- # In[4]:
- Mark = showlst(lookup(imgs, 'Mark'))
- display(Mark)
- # In[5]:
- Chris = showlst(lookup(smimgs, 'Chris'))
- display(Chris)
- # In[6]:
- display(showlst(lookup(imgs, "wontfind")))
- # In[ ]:
- # Download as html skips the output of the very last step in the notebook, for some reason.
- # So, put in a dummy last step.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement