Script

import csv
import os
import random
# import paramiko
# import pandas as pd
# import scipy as sp
# import pipes
# import subprocess
from itertools import groupby
from shutil import copyfile
# from sklearn.cross_validation import train_test_split


def read_images(filename=None):
    """
    Reading list of images from file
    :param filename: name of csv file, that contains path to files
    :return: list with elements following format : [image_name, image_path, microsegment, prob]
    """
    images_list = []
    with open(filename, 'rb') as file:
        reader = csv.reader(file)
        for row in reader:
            if len(images_list) > 5000:
                break
            plate, link, microsegment, prob = row
            image_name = link.split('/')[-1]
            image_path = '/'.join(link.split('/')[-2:])
            car = [image_name, image_path, microsegment, prob]
            if ".jpg" in image_name  and len(link) != 0 and os.path.isfile("/home/synaps/images/{}".format(image_path)):
                print "Image exists and added"
                images_list.append(car)
    return images_list


def file_exists(path):
    local_path = "/home/synaps/images/{}".format(path)
    client = paramiko.SSHClient()
    client.load_system_host_keys()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    client.connect("81.211.114.194", username="aleksandr", password="P@ssw0rd", port=8822)
    _, stdout, _ = client.exec_command("[ -f {} ] && echo OK".format(local_path))
    if "OK" in stdout.read():
        return True
    return False


def select_images(img_list):
    """
    Select images which have enough volume in population
    :param img_list: list of images to select
    :return: list of selected images
    """
    """
    limit = 5000
    res = []
    for k, g in groupby(images_list, lambda x: x[2]):
        if len(res) >= limit:
            break
        m = list(g)
        if len(m) > 4:
            for e in m:
                res.append(e)
    return res


def save_images(data, y, folder_name):
    folder_path = "./datasets/alexander/data/{}/".format(folder_name)
    storage_path = "/home/synaps/images/{}"
    file = open("./datasets/alexander/data/" + folder_name + ".txt", "w")
    for img,l in zip(data, y):
        file.write(img.split('/')[-1] + " " + l+"\n")
    file.close()
    for img in data:
        img_path = storage_path.format(img)
        print img_path
        copyfile(img_path, folder_path + img_path.split('/')[-1])


def cross_validation(data, test_size=0.25):
    X, y = [], []
    random.shuffle(data)
    n = len(data)
    m = int((1 - test_size) * n)
    for car in data:
        X.append(car[1])
        y.append(car[2])
    X_learn, X_validate, y_learn, y_validate = X[:m], X[m:], y[:m], y[m:]
    save_images(data=X_learn,y=y_learn, folder_name="train")
    save_images(data=X_validate,y=y_validate, folder_name="val")


images_list = read_images(filename='Microsegments_data.csv')
print len(images_list)
selected_imgs = select_images(images_list)
print len(selected_imgs)
print selected_imgs[0]
cross_validation(selected_imgs)

# !scripts/create_net.sh utkin
# !scripts/make_net_mean.sh utkin
# !$CAFFE_ROOT/build/tools/caffe train --solver=models/bvlc_reference_caffenet_utkin/solver.prototxt