Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # imports and basic notebook setup
- from cStringIO import StringIO
- import numpy as np
- import scipy.ndimage as nd
- import PIL.Image
- import sys
- from IPython.display import clear_output, Image, display
- from google.protobuf import text_format
- import caffe
- # If your GPU supports CUDA and Caffe was built with CUDA support,
- # uncomment the following to run Caffe operations on the GPU.
- # caffe.set_mode_gpu()
- # caffe.set_device(0) # select GPU device if multiple devices exist
- def saveimg(a, fname, fmt='jpeg'):
- a = np.uint8(np.clip(a, 0, 255))
- f = StringIO()
- PIL.Image.fromarray(a).save(fname, fmt)
- #display(Image(data=f.getvalue()))
- # a couple of utility functions for converting to and from Caffe's input image layout
- def preprocess(net, img):
- return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
- def deprocess(net, img):
- return np.dstack((img + net.transformer.mean['data'])[::-1])
- def objective_L2(dst):
- dst.diff[:] = dst.data
- def make_step(net, step_size=1.5, end='inception_4c/output',
- jitter=32, clip=True, objective=objective_L2):
- '''Basic gradient ascent step.'''
- src = net.blobs['data'] # input image is stored in Net's 'data' blob
- dst = net.blobs[end]
- ox, oy = np.random.randint(-jitter, jitter+1, 2)
- src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift
- net.forward(end=end)
- objective(dst) # specify the optimization objective
- net.backward(start=end)
- g = src.diff[0]
- # apply normalized ascent step to the input image
- src.data[:] += step_size/np.abs(g).mean() * g
- src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image
- if clip:
- bias = net.transformer.mean['data']
- src.data[:] = np.clip(src.data, -bias, 255-bias)
- def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4,
- end='inception_4c/output', clip=True, **step_params):
- # prepare base images for all octaves
- octaves = [preprocess(net, base_img)]
- for i in xrange(octave_n-1):
- octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1))
- src = net.blobs['data']
- detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details
- for octave, octave_base in enumerate(octaves[::-1]):
- h, w = octave_base.shape[-2:]
- if octave > 0:
- # upscale details from the previous octave
- h1, w1 = detail.shape[-2:]
- detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1)
- src.reshape(1,3,h,w) # resize the network's input image size
- src.data[0] = octave_base+detail
- for i in xrange(iter_n):
- make_step(net, end=end, clip=clip, **step_params)
- # visualization
- vis = deprocess(net, src.data[0])
- if not clip: # adjust image contrast if clipping is disabled
- vis = vis*(255.0/np.percentile(vis, 99.98))
- #showarray(vis)
- print octave, i, end, vis.shape
- clear_output(wait=True)
- # extract details produced on the current octave
- detail = src.data[0]-octave_base
- # returning the resulting image
- return deprocess(net, src.data[0])
- def resize_image(data, sz=(256, 256)):
- """
- Resize image. Please use this resize logic for best results instead of the
- caffe, since it was used to generate training dataset
- :param str data:
- The image data
- :param sz tuple:
- The resized image dimensions
- :returns bytearray:
- A byte array with the resized image
- """
- img_data = str(data)
- im = PIL.Image.open(StringIO(img_data))
- if im.mode != "RGB":
- im = im.convert('RGB')
- imr = im.resize(sz, resample=PIL.Image.BILINEAR)
- fh_im = StringIO()
- imr.save(fh_im, format='JPEG')
- fh_im.seek(0)
- return bytearray(fh_im.read())
- def make_nsfw_input(filename, net):
- image_data = open(filename).read()
- img_data_rs = resize_image(image_data, sz=(256, 256))
- caffe_image = caffe.io.load_image(StringIO(img_data_rs))
- H, W, _ = caffe_image.shape
- _, _, h, w = net.blobs['data'].data.shape
- h_off = max((H - h) / 2, 0)
- w_off = max((W - w) / 2, 0)
- input_cropped = caffe_image[h_off:h_off + h, w_off:w_off + w, :]
- caffe_transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
- caffe_transformer.set_transpose('data', (2, 0, 1)) # move image channels to outermost
- caffe_transformer.set_mean('data', np.array([104, 117, 123])) # subtract the dataset-mean value in each channel
- caffe_transformer.set_raw_scale('data', 255) # rescale from [0, 1] to [0, 255]
- caffe_transformer.set_channel_swap('data', (2, 1, 0)) # swap channels from RGB to BGR
- transformed_image = caffe_transformer.preprocess('data', input_cropped)
- transformed_image.shape = (1,) + transformed_image.shape
- return transformed_image
- def main(argv):
- input_name = argv[1]
- input_img = np.float32(PIL.Image.open(input_name))
- model_path = '../nsfw_model/' # substitute your path here
- net_fn = model_path + 'deploy.prototxt'
- param_fn = model_path + 'resnet_50_1by2_nsfw.caffemodel'
- # Patching model to be able to compute gradients.
- # Note that you can also manually add "force_backward: true" line to "deploy.prototxt".
- model = caffe.io.caffe_pb2.NetParameter()
- text_format.Merge(open(net_fn).read(), model)
- model.force_backward = True
- open('tmp.prototxt', 'w').write(str(model))
- net = caffe.Classifier('tmp.prototxt', param_fn,
- mean = np.float32([104.0, 117.0, 123.0]), # ImageNet mean, training set dependent
- channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB
- #layer_name = 'conv_stage2_block3_branch2a'
- layer_name = 'conv_stage3_block0_proj_shortcut' #conv_stage1_block3_branch2a'
- # compute input according to classify_nsfw.py
- transformed_image = make_nsfw_input(input_name, net)
- if (len(argv) == 2):
- print "Computing NSFW score"
- input_name = net.inputs[0]
- all_outputs = net.forward_all(blobs=['prob'], **{input_name: transformed_image})
- outputs = all_outputs['prob'][0].astype(float)
- print "score: ", outputs[1]
- return
- output_name = argv[2]
- if len(argv) > 3:
- guide_name = argv[3]
- transformed_guide = make_nsfw_input(guide_name, net)
- input_name = net.inputs[0]
- all_outputs = net.forward_all(blobs=['prob', layer_name], **{input_name: transformed_guide})
- guide_features = all_outputs[layer_name].copy();
- print "guide score: ", all_outputs['prob'][0].astype(float)[1]
- def objective_guide(dst):
- x = dst.data[0].copy()
- y = guide_features
- ch = x.shape[0]
- x = x.reshape(ch,-1)
- y = y.reshape(ch,-1)
- A = x.T.dot(y) # compute the matrix of dot-products with guide features
- dst.diff[0].reshape(ch,-1)[:] = y[:,A.argmax(1)] # select ones that match best
- output_img=deepdream(net, input_img, 10, octave_n=6, end=layer_name, objective=objective_guide)
- print "FIN DU REVE"
- else:
- #img2=deepdream(net, img, 10, 6, end='conv_stage3_block0_proj_shortcut') #conv_stage3_block1_branch2b') #eltwise_stage3_block1')
- output_img=deepdream(net, input_img, 10, 8, end=layer_name)
- saveimg(output_img, output_name)
- if __name__ == '__main__':
- main(sys.argv)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement