final data code

"""mask """

# import the necessary packages
import cv2
import os, os.path
import numpy as np
#debug info OpenCV version
print ("OpenCV version: " + cv2.__version__)

#image path and valid extensions
imageDir = "/home/kfir/data" #specify your path here
image_path_list = []
valid_image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] #specify your valid extensions here
valid_image_extensions = [item.lower() for item in valid_image_extensions]#sorting endings

#create a list of all the images in directory and
#append images with a vaild extention to image_path_list
for file in os.listdir(imageDir):
    extension = os.path.splitext(file)[1]
    if extension.lower() not in valid_image_extensions:
        continue
    image_path_list.append(os.path.join(imageDir, file))
image_path_list = sorted(image_path_list)

#loop through image_path_list to open each image
for imagePath in image_path_list:
    image = cv2.imread(imagePath)
    # blur the frame and convert it to HSV color space
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)

    #define the lower and upper boundaries in the HSV coordiantes.
    lower = np.array([90,85,0])
    upper = np.array([120,255,255])

    # define a filtering kernel
    kernel = np.ones((5,5), np.uint8)

    #construct a mask for the desired color range, then perform a series of
    #erosions and dilations (or opening and closing) to remove any small noise left
    mask = cv2.inRange(hsv, lower, upper)
#    mask = cv2.erode(mask, None, iterations = 2) #erosion
#    mask = cv2.dilate(mask, None, iterations = 2)   #dilation
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)#opening
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)#closing
    res = cv2.bitwise_and(image,image, mask = mask)

    # find contours in the mask and initialize the current
    # (x, y) center of the objects
    cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
    cv2.CHAIN_APPROX_SIMPLE)[-2]
    center = None

    #only proceed if at least one contour was found
    if len(cnts) > 0:
        for i in range(len(cnts)):
            x,y,w,h = cv2.boundingRect(cnts[i])#retrieves a bounding rectangle pixel locations
            M = cv2.moments(cnts[i])#the contour moments
            center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))#x and Y center of mass

            # only proceed if the rectangle meets a minimum size
            # only look in the bottom third part of the image, overlook the car
            if (w > 15 and h > 25) and (w < 60 and h < 80):
                if center[1] > 480 and center[1] < 600:
#                    print("printing %s",imagePath)
                    # draw the box and centroid on the image
                    cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)
                    #annotate the bounding box to a new file
#                    up_x = x
#                    up_y = y
#                    down_x = x + w
#                    down_y = y + h
#                    with open(imagePath +".txt","a+")  as dataFile:
#                        dataFile.write(" ".join([" cone",str(up_x),str(up_y),str(down_x),str(down_y)]))

#    show the resulted image (and optionally mask and res)
    cv2.imshow('image',image)
    cv2.imshow('mask',mask)
    cv2.imshow('res',res)
    #0 = wait indefinitely
    #exit when escape key is pressed
    key = cv2.waitKey(0)
    if key == 27: # escape
        break

    # close any open windows
    cv2.destroyAllWindows()

xml_files = []
#get teh text files
def get_txt_files(path):
    for file in os.listdir(path):
        extension = os.path.splitext(file)[1]
        if extension == ".txt":
            xml_files.append(os.path.join(path, file))
        else:
            continue

get_txt_files(imageDir)
xml_files = sorted(xml_files)
#the xml format
def create_xml(text,name):
    xmls = ['<annotation>\n', '\t<folder>VOC2007</folder>\n', '\t<filename>000026.jpg</filename>\n', '\t<source>\n', '\t\t<database>The VOC2007 Database</database>\n', '\t\t<annotation>PASCAL VOC2007</annotation>\n', '\t\t<image>flickr</image>\n', '\t\t<flickrid>192073981</flickrid>\n', '\t</source>\n', '\t<owner>\n', '\t\t<flickrid>tobeng</flickrid>\n', '\t\t<name>kfir_yotam</name>\n', '\t</owner>\n', '\t<size>\n', '\t\t<width>1280</width>\n', '\t\t<height>720</height>\n', '\t\t<depth>3</depth>\n', '\t</size>\n', '\t<segmented>0</segmented>\n', '\t<object>\n', '\t\t<name>car</name>\n', '\t\t<pose>unspecified</pose>\n', '\t\t<truncated>0</truncated>\n', '\t\t<difficult>0</difficult>\n', '\t\t<bndbox>\n', '\t\t\t<xmin>90</xmin>\n', '\t\t\t<ymin>125</ymin>\n', '\t\t\t<xmax>337</xmax>\n', '\t\t\t<ymax>212</ymax>\n', '\t\t</bndbox>\n', '\t</object>\n', '</annotation>\n']
    annotation_data = text.read()
    file_name = name
    object_name = "cone"
    x_min = annotation_data.split(' ')[2]
    y_min = annotation_data.split(' ')[3]
    x_max = annotation_data.split(' ')[4]
    y_max = annotation_data.split(' ')[5]
    xmls[2]= '\t<filename>'+file_name+'</filename>\n'
    xmls[20]= '\t\t<name>'+object_name+'</name>\n'
    xmls[25]= '\t\t\t<xmin>'+x_min+'</xmin>\n'
    xmls[26]= '\t\t\t<ymin>'+y_min+'</ymin>\n'
    xmls[27]= '\t\t\t<xmax>'+x_max+'</xmax>\n'
    xmls[28]= '\t\t\t<ymax>'+y_max+'</ymax>\n'
    return xmls

#write a list of XML to file
def write_to_file(xmls,file_name):
    f = open(file_name,'w')
    for line in xmls:
        f.write(line)
    f.close()

def convert(file_name, text):
    xmls = create_xml(text,name)
    write_to_file(xmls,file_name.split('.')[0]+'.xml')
#loop through .txt files and create .xml
for j in range(len(xml_files)):
    text = open(xml_files[j],'r')
    name = os.path.basename(xml_files[j])
    name = os.path.splitext(name)[0]
    convert(name, text)


""" lexicograph """
import os
import shutil

itemPath = []
dirPath = []
#Directory where your crawler saves the images
rootDir = "/home/kfir/data"
#Directory where you want to organize your images
destDir = "/home/kfir/data/adapt"

#find all the jpg files and save the path
def openFolder(rootDir):
    path = []
    global itemPath
    global dirPath
    for listItem in os.listdir(rootDir):
        path = os.path.join(rootDir,listItem)
        if os.path.isdir(path):
            dirPath.append(path)
            openFolder(path)
        if os.path.isfile(path) and path[-3:]=="jpg":
            itemPath.append(path)


if __name__ == "__main__":
    openFolder(rootDir)
    files=itemPath
    #Give each image a unique and consistent name
    counter = 0
    progress = 0
    length = len(files)
    #print every 5%
    delta = int(length/20)
    for item in files:
        shutil.move(item,destDir+"/"+str(counter).zfill(6)+".jpg")
        counter += 1


""" create training and test sets """
# split the data into "train", "test", "validation", "val"
# creates .txt files with the corresponding image serial number
import cv2
import os, os.path
import random
import sys

im_path = "/home/kfir/data"
xml_path =  "/home/kfir/data/Annotations"
#appending xml files
xml_files = []
for file in os.listdir(xml_path):
        extension = os.path.splitext(file)[1]
        if extension == ".xml":
            xml_files.append(os.path.join(xml_path, file))
        else:
            continue
images = []
for file in os.listdir(im_path):
        extension = os.path.splitext(file)[1]
        if extension == ".jpg":
            images.append(os.path.join(im_path, file))
        else:
            continue

#leaving only the file name without the extension
for j in range(len(xml_files)):
    xml_files[j] = os.path.basename(xml_files[j])
    xml_files[j] = os.path.splitext(xml_files[j])[0]

for j in range(len(images)):
    images[j] = os.path.basename(images[j])
    images[j] = os.path.splitext(images[j])[0]

xml_files = sorted(xml_files)
images = sorted(images)

#i = -1
#j = -1
#counter=0
#while i+1 < len(xml_files):
#    j += 1
#    i += 1
#    while xml_files[i] != images[j]:
##        os.remove(im_path+"/"+images[j]+".jpg")
#        print(images[j])
#        counter+=1
#        j += 1
#print("counter",counter)


random.shuffle(xml_files)#randomizing the images
# splits the data (make sure test_data and train_data don't overlap)
train_data = xml_files[:round(0.8*len(xml_files))]
test_data = xml_files[len(train_data):]
if train_data[len(train_data)-1] == test_data[0]:
    test_data = xml_files[len(train_data)+1:]
#sort the file names
train_data = sorted(train_data)
test_data = sorted(test_data)
#create the train and test txt files
train = open("train.txt","w+")
trainval = open("trainval.txt","w+")
test = open("test.txt","w+")
cone_train = open("cone_train.txt","w+")
cone_test = open("cone_test.txt","w+")
for i in range(len(train_data)):
    train.write("%s\n" % train_data[i])
    trainval.write("%s\n" % train_data[i])
    cone_train.write("%s 1\n" % train_data[i])
train.close()
cone_train.close()
trainval.close()

for i in range(len(test_data)):
    test.write("%s\n" % test_data[i])
    cone_test.write("%s 1\n" % test_data[i])
test.close()
cone_test.close()