Untitled

# imports and basic notebook setup
from cStringIO import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
import sys
from IPython.display import clear_output, Image, display
from google.protobuf import text_format

import caffe

# If your GPU supports CUDA and Caffe was built with CUDA support,
# uncomment the following to run Caffe operations on the GPU.
# caffe.set_mode_gpu()
# caffe.set_device(0) # select GPU device if multiple devices exist

def saveimg(a, fname, fmt='jpeg'):
    a = np.uint8(np.clip(a, 0, 255))
    f = StringIO()
    PIL.Image.fromarray(a).save(fname, fmt)
    #display(Image(data=f.getvalue()))


# a couple of utility functions for converting to and from Caffe's input image layout
def preprocess(net, img):
    return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
def deprocess(net, img):
    return np.dstack((img + net.transformer.mean['data'])[::-1])

def objective_L2(dst):
    dst.diff[:] = dst.data

def make_step(net, step_size=1.5, end='inception_4c/output',
              jitter=32, clip=True, objective=objective_L2):
    '''Basic gradient ascent step.'''

    src = net.blobs['data'] # input image is stored in Net's 'data' blob
    dst = net.blobs[end]

    ox, oy = np.random.randint(-jitter, jitter+1, 2)
    src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift

    net.forward(end=end)
    objective(dst)  # specify the optimization objective
    net.backward(start=end)
    g = src.diff[0]
    # apply normalized ascent step to the input image
    src.data[:] += step_size/np.abs(g).mean() * g

    src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image

    if clip:
        bias = net.transformer.mean['data']
        src.data[:] = np.clip(src.data, -bias, 255-bias)

def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4,
              end='inception_4c/output', clip=True, **step_params):
    # prepare base images for all octaves
    octaves = [preprocess(net, base_img)]
    for i in xrange(octave_n-1):
        octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1))

    src = net.blobs['data']
    detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details
    for octave, octave_base in enumerate(octaves[::-1]):
        h, w = octave_base.shape[-2:]
        if octave > 0:
            # upscale details from the previous octave
            h1, w1 = detail.shape[-2:]
            detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1)

        src.reshape(1,3,h,w) # resize the network's input image size
        src.data[0] = octave_base+detail

        for i in xrange(iter_n):
            make_step(net, end=end, clip=clip, **step_params)

            # visualization
            vis = deprocess(net, src.data[0])
            if not clip: # adjust image contrast if clipping is disabled
                vis = vis*(255.0/np.percentile(vis, 99.98))
            #showarray(vis)
            print octave, i, end, vis.shape
            clear_output(wait=True)

        # extract details produced on the current octave
        detail = src.data[0]-octave_base
    # returning the resulting image
    return deprocess(net, src.data[0])

def resize_image(data, sz=(256, 256)):
    """
    Resize image. Please use this resize logic for best results instead of the
    caffe, since it was used to generate training dataset
    :param str data:
        The image data
    :param sz tuple:
        The resized image dimensions
    :returns bytearray:
        A byte array with the resized image
    """
    img_data = str(data)
    im = PIL.Image.open(StringIO(img_data))
    if im.mode != "RGB":
        im = im.convert('RGB')
    imr = im.resize(sz, resample=PIL.Image.BILINEAR)
    fh_im = StringIO()
    imr.save(fh_im, format='JPEG')
    fh_im.seek(0)
    return bytearray(fh_im.read())


def make_nsfw_input(filename, net):
    image_data = open(filename).read()
    img_data_rs = resize_image(image_data, sz=(256, 256))
    caffe_image = caffe.io.load_image(StringIO(img_data_rs))
    H, W, _ = caffe_image.shape
    _, _, h, w = net.blobs['data'].data.shape
    h_off = max((H - h) / 2, 0)
    w_off = max((W - w) / 2, 0)
    input_cropped = caffe_image[h_off:h_off + h, w_off:w_off + w, :]
    caffe_transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
    caffe_transformer.set_transpose('data', (2, 0, 1))  # move image channels to outermost
    caffe_transformer.set_mean('data', np.array([104, 117, 123]))  # subtract the dataset-mean value in each channel
    caffe_transformer.set_raw_scale('data', 255)  # rescale from [0, 1] to [0, 255]
    caffe_transformer.set_channel_swap('data', (2, 1, 0))  # swap channels from RGB to BGR
    transformed_image = caffe_transformer.preprocess('data', input_cropped)
    transformed_image.shape = (1,) + transformed_image.shape
    return transformed_image

def main(argv):
    input_name = argv[1]
    input_img = np.float32(PIL.Image.open(input_name))

    model_path = '../nsfw_model/' # substitute your path here
    net_fn   = model_path + 'deploy.prototxt'
    param_fn = model_path + 'resnet_50_1by2_nsfw.caffemodel'

    # Patching model to be able to compute gradients.
    # Note that you can also manually add "force_backward: true" line to "deploy.prototxt".
    model = caffe.io.caffe_pb2.NetParameter()
    text_format.Merge(open(net_fn).read(), model)
    model.force_backward = True
    open('tmp.prototxt', 'w').write(str(model))

    net = caffe.Classifier('tmp.prototxt', param_fn,
                           mean = np.float32([104.0, 117.0, 123.0]), # ImageNet mean, training set dependent
                           channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB


    #layer_name = 'conv_stage2_block3_branch2a'
    layer_name = 'conv_stage3_block0_proj_shortcut' #conv_stage1_block3_branch2a'

    # compute input according to classify_nsfw.py
    transformed_image = make_nsfw_input(input_name, net)

    if (len(argv) == 2):
        print "Computing NSFW score"
        input_name = net.inputs[0]
        all_outputs = net.forward_all(blobs=['prob'], **{input_name: transformed_image})

        outputs = all_outputs['prob'][0].astype(float)

        print "score: ", outputs[1]

        return

    output_name = argv[2]

    if len(argv) > 3:
        guide_name = argv[3]
        transformed_guide = make_nsfw_input(guide_name, net)
        input_name = net.inputs[0]
        all_outputs = net.forward_all(blobs=['prob', layer_name], **{input_name: transformed_guide})
        guide_features = all_outputs[layer_name].copy();
        print "guide score: ", all_outputs['prob'][0].astype(float)[1]

        def objective_guide(dst):
            x = dst.data[0].copy()
            y = guide_features
            ch = x.shape[0]
            x = x.reshape(ch,-1)
            y = y.reshape(ch,-1)
            A = x.T.dot(y) # compute the matrix of dot-products with guide features
            dst.diff[0].reshape(ch,-1)[:] = y[:,A.argmax(1)] # select ones that match best

        output_img=deepdream(net, input_img, 10, octave_n=6, end=layer_name, objective=objective_guide)
        print "FIN DU REVE"
    else:
        #img2=deepdream(net, img, 10, 6, end='conv_stage3_block0_proj_shortcut') #conv_stage3_block1_branch2b') #eltwise_stage3_block1')
        output_img=deepdream(net, input_img, 10, 8, end=layer_name)

    saveimg(output_img, output_name)


if __name__ == '__main__':
    main(sys.argv)