Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "<a id='1'></a>\n",
- "# 1. Import packages"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Using TensorFlow backend.\n"
- ]
- }
- ],
- "source": [
- "from keras.models import Sequential, Model\n",
- "from keras.layers import *\n",
- "from keras.layers.advanced_activations import LeakyReLU\n",
- "from keras.activations import relu\n",
- "from keras.initializers import RandomNormal\n",
- "from keras.applications import *\n",
- "import keras.backend as K\n",
- "from tensorflow.contrib.distributions import Beta\n",
- "import tensorflow as tf\n",
- "from keras.optimizers import Adam"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "from image_augmentation import random_transform\n",
- "from image_augmentation import random_warp\n",
- "from utils import get_image_paths, load_images, stack_images\n",
- "from pixel_shuffler import PixelShuffler"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import time\n",
- "import numpy as np\n",
- "from PIL import Image\n",
- "import cv2\n",
- "import glob\n",
- "from random import randint, shuffle\n",
- "from IPython.display import clear_output\n",
- "from IPython.display import display\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "<a id='2'></a>\n",
- "# 2. Install requirements\n",
- "\n",
- "## ========== CAUTION ========== \n",
- "\n",
- "If you are running this jupyter on local machine. Please read [this blog](http://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/) before running the following cells which pip install packages."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "# https://github.com/ageitgey/face_recognition\n",
- "#!pip install face_recognition"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "#!pip install moviepy"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "<a id='4'></a>\n",
- "# 4. Config\n",
- "\n",
- "mixup paper: https://arxiv.org/abs/1710.09412\n",
- "\n",
- "Default training data directories: `./faceA/` and `./faceB/`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "K.set_learning_phase(1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "channel_axis=-1\n",
- "channel_first = False"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "IMAGE_SHAPE = (64, 64, 3)\n",
- "nc_in = 3 # number of input channels of generators\n",
- "nc_D_inp = 6 # number of input channels of discriminators\n",
- "\n",
- "use_perceptual_loss = False\n",
- "use_lsgan = True\n",
- "use_instancenorm = False\n",
- "use_mixup = True\n",
- "mixup_alpha = 0.2 # 0.2\n",
- "\n",
- "batchSize = 32\n",
- "lrD = 1e-4 # Discriminator learning rate\n",
- "lrG = 1e-4 # Generator learning rate\n",
- "\n",
- "# Path of training images\n",
- "img_dirA = './faceA/*.*'\n",
- "img_dirB = './faceB/*.*'"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "<a id='5'></a>\n",
- "# 5. Define models"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "from model_GAN_v2 import *"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "collapsed": true,
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "encoder = Encoder()\n",
- "decoder_A = Decoder_ps()\n",
- "decoder_B = Decoder_ps()\n",
- "\n",
- "x = Input(shape=IMAGE_SHAPE)\n",
- "\n",
- "netGA = Model(x, decoder_A(encoder(x)))\n",
- "netGB = Model(x, decoder_B(encoder(x)))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "netDA = Discriminator(nc_D_inp)\n",
- "netDB = Discriminator(nc_D_inp)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "<a id='6'></a>\n",
- "# 6. Load Models"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "model loaded.\n"
- ]
- }
- ],
- "source": [
- "try:\n",
- " encoder.load_weights(\"models/encoder.h5\")\n",
- " decoder_A.load_weights(\"models/decoder_A.h5\")\n",
- " decoder_B.load_weights(\"models/decoder_B.h5\")\n",
- " #netDA.load_weights(\"models/netDA.h5\") \n",
- " #netDB.load_weights(\"models/netDB.h5\") \n",
- " print (\"model loaded.\")\n",
- "except:\n",
- " print (\"Weights file not found.\")\n",
- " pass"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "<a id='7'></a>\n",
- "# 7. Define Inputs/Outputs Variables\n",
- "\n",
- " distorted_A: A (batch_size, 64, 64, 3) tensor, input of generator_A (netGA).\n",
- " distorted_B: A (batch_size, 64, 64, 3) tensor, input of generator_B (netGB).\n",
- " fake_A: (batch_size, 64, 64, 3) tensor, output of generator_A (netGA).\n",
- " fake_B: (batch_size, 64, 64, 3) tensor, output of generator_B (netGB).\n",
- " mask_A: (batch_size, 64, 64, 1) tensor, mask output of generator_A (netGA).\n",
- " mask_B: (batch_size, 64, 64, 1) tensor, mask output of generator_B (netGB).\n",
- " path_A: A function that takes distorted_A as input and outputs fake_A.\n",
- " path_B: A function that takes distorted_B as input and outputs fake_B.\n",
- " path_mask_A: A function that takes distorted_A as input and outputs mask_A.\n",
- " path_mask_B: A function that takes distorted_B as input and outputs mask_B.\n",
- " path_abgr_A: A function that takes distorted_A as input and outputs concat([mask_A, fake_A]).\n",
- " path_abgr_B: A function that takes distorted_B as input and outputs concat([mask_B, fake_B]).\n",
- " real_A: A (batch_size, 64, 64, 3) tensor, target images for generator_A given input distorted_A.\n",
- " real_B: A (batch_size, 64, 64, 3) tensor, target images for generator_B given input distorted_B."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "def cycle_variables(netG):\n",
- " distorted_input = netG.inputs[0]\n",
- " fake_output = netG.outputs[0]\n",
- " alpha = Lambda(lambda x: x[:,:,:, :1])(fake_output)\n",
- " rgb = Lambda(lambda x: x[:,:,:, 1:])(fake_output)\n",
- " \n",
- " masked_fake_output = alpha * rgb + (1-alpha) * distorted_input \n",
- "\n",
- " fn_generate = K.function([distorted_input], [masked_fake_output])\n",
- " fn_mask = K.function([distorted_input], [concatenate([alpha, alpha, alpha])])\n",
- " fn_abgr = K.function([distorted_input], [concatenate([alpha, rgb])])\n",
- " return distorted_input, fake_output, alpha, fn_generate, fn_mask, fn_abgr"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "distorted_A, fake_A, mask_A, path_A, path_mask_A, path_abgr_A = cycle_variables(netGA)\n",
- "distorted_B, fake_B, mask_B, path_B, path_mask_B, path_abgr_B = cycle_variables(netGB)\n",
- "real_A = Input(shape=IMAGE_SHAPE)\n",
- "real_B = Input(shape=IMAGE_SHAPE)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "<a id='12'></a>\n",
- "# 12. Make video clips\n",
- "\n",
- "Given a video as input, the following cells will detect face for each frame using dlib's cnn model. And use trained GAN model to transform detected face into target face. Then output a video with swapped faces."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {
- "collapsed": true,
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "# Download ffmpeg if need, which is required by moviepy.\n",
- "\n",
- "#import imageio\n",
- "#imageio.plugins.ffmpeg.download()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import face_recognition\n",
- "from moviepy.editor import VideoFileClip"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "whom2whom = \"BtoA\" # default trainsforming faceB to faceA\n",
- "\n",
- "if whom2whom is \"AtoB\":\n",
- " path_func = path_abgr_B\n",
- "elif whom2whom is \"BtoA\":\n",
- " path_func = path_abgr_A\n",
- "else:\n",
- " print (\"whom2whom should be either AtoB or BtoA\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "<a id='13'></a>\n",
- "# 13. Make video clips w/o face alignment\n",
- "\n",
- "### Default transform: face B to face A"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "use_smoothed_mask = True\n",
- "use_smoothed_bbox = True\n",
- "\n",
- "def is_higher_than_480p(x):\n",
- " return (x.shape[1] >= 858 and x.shape[0] >= 480)\n",
- "\n",
- "def is_higher_than_720p(x):\n",
- " return (x.shape[1] >= 1280 and x.shape[0] >= 720)\n",
- "\n",
- "def is_higher_than_1080p(x):\n",
- " return (x.shape[1] >= 1920 and x.shape[0] >= 1080)\n",
- "\n",
- "def calibrate_coord(faces, video_scaling_factor):\n",
- " for i, (x0, y1, x1, y0) in enumerate(faces):\n",
- " faces[i] = (x0*video_scaling_factor, y1*video_scaling_factor, \n",
- " x1*video_scaling_factor, y0*video_scaling_factor)\n",
- " return faces\n",
- "\n",
- "def get_faces_bbox(image, model=\"hog\"): \n",
- " if is_higher_than_1080p(image):\n",
- " video_scaling_factor = 4 + video_scaling_offset\n",
- " resized_image = cv2.resize(image, \n",
- " (image.shape[1]//video_scaling_factor, image.shape[0]//video_scaling_factor))\n",
- " faces = face_recognition.face_locations(resized_image, model=model)\n",
- " faces = calibrate_coord(faces, video_scaling_factor)\n",
- " elif is_higher_than_720p(image):\n",
- " video_scaling_factor = 3 + video_scaling_offset\n",
- " resized_image = cv2.resize(image, \n",
- " (image.shape[1]//video_scaling_factor, image.shape[0]//video_scaling_factor))\n",
- " faces = face_recognition.face_locations(resized_image, model=model)\n",
- " faces = calibrate_coord(faces, video_scaling_factor) \n",
- " elif is_higher_than_480p(image):\n",
- " video_scaling_factor = 2 + video_scaling_offset\n",
- " resized_image = cv2.resize(image, \n",
- " (image.shape[1]//video_scaling_factor, image.shape[0]//video_scaling_factor))\n",
- " faces = face_recognition.face_locations(resized_image, model=model)\n",
- " faces = calibrate_coord(faces, video_scaling_factor)\n",
- " else:\n",
- " faces = face_recognition.face_locations(image, model=model)\n",
- " return faces\n",
- "\n",
- "def get_smoothed_coord(x0, x1, y0, y1):\n",
- " global prev_x0, prev_x1, prev_y0, prev_y1\n",
- " x0 = int(0.65*prev_x0 + 0.35*x0)\n",
- " x1 = int(0.65*prev_x1 + 0.35*x1)\n",
- " y1 = int(0.65*prev_y1 + 0.35*y1)\n",
- " y0 = int(0.65*prev_y0 + 0.35*y0)\n",
- " return x0, x1, y0, y1 \n",
- " \n",
- "def set_global_coord(x0, x1, y0, y1):\n",
- " global prev_x0, prev_x1, prev_y0, prev_y1\n",
- " prev_x0 = x0\n",
- " prev_x1 = x1\n",
- " prev_y1 = y1\n",
- " prev_y0 = y0\n",
- " \n",
- "def generate_face(ae_input, path_abgr, roi_size):\n",
- " result = np.squeeze(np.array([path_abgr([[ae_input]])]))\n",
- " result_a = result[:,:,0] * 255\n",
- " result_bgr = np.clip( (result[:,:,1:] + 1) * 255 / 2, 0, 255 )\n",
- " result_a = cv2.GaussianBlur(result_a ,(7,7),6)\n",
- " result_a = np.expand_dims(result_a, axis=2)\n",
- " result = (result_a/255 * result_bgr + (1 - result_a/255) * ((ae_input + 1) * 255 / 2)).astype('uint8')\n",
- " result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)\n",
- " result = cv2.resize(result, (roi_size[1],roi_size[0]))\n",
- " result_a = np.expand_dims(cv2.resize(result_a, (roi_size[1],roi_size[0])), axis=2)\n",
- " return result, result_a\n",
- "\n",
- "def get_init_mask_map(image):\n",
- " return np.zeros_like(image)\n",
- "\n",
- "def get_init_comb_img(input_img):\n",
- " comb_img = np.zeros([input_img.shape[0], input_img.shape[1]*2,input_img.shape[2]])\n",
- " comb_img[:, :input_img.shape[1], :] = input_img\n",
- " comb_img[:, input_img.shape[1]:, :] = input_img\n",
- " return comb_img \n",
- "\n",
- "def get_init_triple_img(input_img, no_face=False):\n",
- " if no_face:\n",
- " triple_img = np.zeros([input_img.shape[0], input_img.shape[1]*3,input_img.shape[2]])\n",
- " triple_img[:, :input_img.shape[1], :] = input_img\n",
- " triple_img[:, input_img.shape[1]:input_img.shape[1]*2, :] = input_img \n",
- " triple_img[:, input_img.shape[1]*2:, :] = (input_img * .15).astype('uint8') \n",
- " return triple_img\n",
- " else:\n",
- " triple_img = np.zeros([input_img.shape[0], input_img.shape[1]*3,input_img.shape[2]])\n",
- " return triple_img\n",
- "\n",
- "def get_mask(roi_image, h, w):\n",
- " mask = np.zeros_like(roi_image)\n",
- " mask[h//15:-h//15,w//15:-w//15,:] = 255\n",
- " mask = cv2.GaussianBlur(mask,(15,15),10)\n",
- " return mask\n",
- "\n",
- "def process_video(input_img): \n",
- " # modify this line to reduce input size\n",
- " #input_img = input_img[:, input_img.shape[1]//3:2*input_img.shape[1]//3,:] \n",
- " image = input_img\n",
- " faces = get_faces_bbox(image, model=\"hog\")\n",
- " \n",
- " if len(faces) == 0:\n",
- " comb_img = get_init_comb_img(input_img)\n",
- " triple_img = get_init_triple_img(input_img, no_face=True)\n",
- " \n",
- " mask_map = get_init_mask_map(image)\n",
- " comb_img = get_init_comb_img(input_img)\n",
- " global prev_x0, prev_x1, prev_y0, prev_y1\n",
- " global frames \n",
- " for (x0, y1, x1, y0) in faces: \n",
- " # smoothing bounding box\n",
- " if use_smoothed_bbox:\n",
- " if frames != 0:\n",
- " x0, x1, y0, y1 = get_smoothed_coord(x0, x1, y0, y1)\n",
- " set_global_coord(x0, x1, y0, y1)\n",
- " else:\n",
- " set_global_coord(x0, x1, y0, y1)\n",
- " frames += 1\n",
- " h = x1 - x0\n",
- " w = y1 - y0\n",
- " \n",
- " cv2_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)\n",
- " roi_image = cv2_img[x0+h//15:x1-h//15,y0+w//15:y1-w//15,:]\n",
- " roi_size = roi_image.shape \n",
- " \n",
- " ae_input = cv2.resize(roi_image, (64,64))/255. * 2 - 1 \n",
- " result, result_a = generate_face(ae_input, path_abgr_A, roi_size)\n",
- " mask_map[x0+h//15:x1-h//15, y0+w//15:y1-w//15,:] = result_a\n",
- " mask_map = np.clip(mask_map + .15 * input_img, 0, 255 ) \n",
- " \n",
- " if use_smoothed_mask:\n",
- " mask = get_mask(roi_image, h, w)\n",
- " roi_rgb = cv2.cvtColor(roi_image, cv2.COLOR_BGR2RGB)\n",
- " smoothed_result = mask/255 * result + (1-mask/255) * roi_rgb\n",
- " comb_img[x0+h//15:x1-h//15, input_img.shape[1]+y0+w//15:input_img.shape[1]+y1-w//15,:] = smoothed_result\n",
- " else:\n",
- " comb_img[x0+h//15:x1-h//15, input_img.shape[1]+y0+w//15:input_img.shape[1]+y1-w//15,:] = result\n",
- " \n",
- " triple_img = get_init_triple_img(input_img)\n",
- " triple_img[:, :input_img.shape[1]*2, :] = comb_img\n",
- " triple_img[:, input_img.shape[1]*2:, :] = mask_map\n",
- " \n",
- " # ========== Change rthe following line to ==========\n",
- " return comb_img[:, input_img.shape[1]:, :] # return only result image\n",
- " # return comb_img # return input and result image combined as one\n",
- " #return triple_img #return input,result and mask heatmap image combined as one"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[MoviePy] >>>> Building video OUTPUT_VIDEO.mp4\n",
- "[MoviePy] Writing video OUTPUT_VIDEO.mp4\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|████████████████████████████████████████████████████████████████████████████████| 341/341 [00:46<00:00, 7.12it/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[MoviePy] Done.\n",
- "[MoviePy] >>>> Video ready: OUTPUT_VIDEO.mp4 \n",
- "\n",
- "Wall time: 46.9 s\n"
- ]
- }
- ],
- "source": [
- "# Variables for smoothing bounding box\n",
- "global prev_x0, prev_x1, prev_y0, prev_y1\n",
- "global frames\n",
- "prev_x0 = prev_x1 = prev_y0 = prev_y1 = 0\n",
- "frames = 0\n",
- "video_scaling_offset = 0 # Increase by 1 if OOM happens.\n",
- "\n",
- "output = 'OUTPUT_VIDEO.mp4'\n",
- "clip1 = VideoFileClip(\"INPUT_VIDEO.mp4\")\n",
- "clip = clip1.fl_image(process_video)#.subclip(11, 13) #NOTE: this function expects color images!!\n",
- "%time clip.write_videofile(output, audio=False)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### gc.collect() sometimes solves memory error"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "8409"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import gc\n",
- "gc.collect()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement