create_name_list.py

import os
from PIL import Image
import translate_xml
import tensorflow as tf
import cv2

imagepath = './datasets/VOCdevkit/VOC2007/JPEGImages'
# imagepath = './ssd_model/VOCdevkit/VOC2007/JPEGImages'
xmlpath = imagepath.replace('JPEGImages', 'Annotations')
PATH_TO_LABELS = './pb/pascal_label_map.pbtxt'

files = os.listdir(imagepath)
mode_to_depth = {'L': 0, 'P': 1, 'RGB': 3, 'RGBA': 4, 'CMYK': 4, 'I;16': 2, 'I;16L': 2, 'I;16B': 2}

cascPath = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascPath)

with tf.gfile.GFile(PATH_TO_LABELS, 'r') as fid:
    label_map_string = fid.read()


def run():
    fo = open('./datasets/VOCdevkit/VOC2007/ImageSets/Main/train.txt', 'w+')
    # fo = open('./ssd_model/VOCdevkit/VOC2007/ImageSets/Main/train.txt', 'w+')

    remove_files = list()
    write_files = list()
    remove_files.clear()
    write_files.clear()
    for file in files:
        if not os.path.isdir(file):
            imagefile = imagepath + '/' + file
            try:
                faceimage = cv2.imread(imagefile)
                gray = cv2.cvtColor(faceimage, cv2.COLOR_BGR2GRAY)
                faces = faceCascade.detectMultiScale(
                    gray,
                    scaleFactor=1.05,
                    minNeighbors=1,
                    minSize=(30, 30),
                    flags=cv2.IMREAD_GRAYSCALE
                )
                if not len(faces):
                    raise Exception("No Face Detected !")

                with Image.open(imagefile) as image:
                    (width, height) = image.size
                    # xmlpath = imagepath.replace('JPEGImages', 'Annotations')
                    image.close()
                    dicts = translate_xml.xml_file_to_dicts(xmlpath + '/' + file.replace('.jpg', '.xml'))
                    if '.jpg' not in dicts['annotation']['filename']:
                        file = file.replace('.jpg', '')

                    if file == dicts['annotation']['filename']:
                        if not str(width) == dicts['annotation']['size']['width'] \
                                or not str(height) == dicts['annotation']['size']['height'] \
                                or not '3' == dicts["annotation"]['size']['depth']:
                            raise Exception('object size Error')
                        if 'object' in dicts['annotation']:
                            for i in range(len(dicts['annotation']['object'])):
                                if int(dicts['annotation']['object'][i]['bndbox']['xmax']) >= width \
                                        or int(dicts['annotation']['object'][i]['bndbox']['ymax']) >= height \
                                        or int(dicts['annotation']['object'][i]['bndbox']['xmax']) == 0 \
                                        or int(dicts['annotation']['object'][i]['bndbox']['ymax']) == 0:
                                    raise Exception('object width or height Error')
                                if 'bndbox' not in dicts['annotation']['object'][i]:
                                    raise Exception('bndbox Error')
                                if 'name' not in dicts['annotation']['object'][i]:
                                    raise Exception('name Error')
                                if dicts['annotation']['object'][i]['name'] not in label_map_string:
                                    raise Exception('name Error')
                        else:
                            raise Exception('none objects Error')
                        if '.jpg' in file:
                            file = file.replace('.jpg', '')
                        file = '\n' + file
                        write_files.append(file)
                    else:
                        raise Exception('xml file Error')

            except Exception as ex:
                remove_files.append(file)
                print(str(ex))

    write_files.sort()
    fo.writelines(write_files)
    fo.close()

    if len(remove_files) > 0:
        for remove_file in remove_files:
            try:
                remove_filename = remove_file
                if '.jpg' not in remove_file:
                    remove_filename = remove_file + '.jpg'
                os.remove(imagepath + '/' + remove_filename)
                os.remove(xmlpath + '/' + remove_filename.replace('.jpg', '.xml'))
            except Exception as ex:
                print(str(ex))


if __name__ == '__main__':
    run()