Advertisement
Guest User

Untitled

a guest
Sep 26th, 2016
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.10 KB | None | 0 0
  1. # arguement required.
  2. # Pre-requisites: 1. Format the dataset as explained by deboc
  3. # 2. create the Imageset text file using 'ls Annotations/ -m | sed s/\\s/\\n/g | sed s/.xml//g | sed s/,//g > ImageSets/train.txt'
  4.  
  5. # 1st arguement = Imageset file name Example: train.txt
  6. # 2nd arguement = path to custom dataset Example: ~/py-faster-rcnn/data/VOC_devkit/
  7.  
  8. # Using Instructions
  9. # $ python discard_shuffle_data.py [arg1] [arg2]
  10.  
  11. import numpy as np
  12. import random, sys, os
  13. import cv2
  14. import xml.etree.ElementTree as ET
  15.  
  16. ext=['.png','.jpg','.jpeg']
  17. discard_list=[]
  18.  
  19. def get_image_path_from_index(index):
  20. """
  21. Construct an image path from the image file name form annotation path to be given to cv function for bbox verification
  22. """
  23. for item in ext:
  24. image_path = os.path.join(data_path, 'data', 'Images', index + item)
  25. if os.path.exists(image_path):
  26. break
  27. assert os.path.exists(image_path), \
  28. 'Path does not exist: {}'.format(image_path)
  29. return image_path
  30.  
  31. def get_annotation_path_from_index(index):
  32. "returns annotation path"
  33. annotation_path=os.path.join(data_path, 'data', 'Annotations',index + '.xml')
  34. assert os.path.exists(annotation_path), \
  35. 'Path does not exist: {}'.format(annotation_path)
  36. return annotation_path
  37.  
  38.  
  39. def get_image_size(image_path):
  40. "output image height and width of input image"
  41. img = cv2.imread(image_path,0)
  42. height, width = img.shape[:2]
  43.  
  44. def get_bbox_size():
  45. "will return bbox size which will be used to compare to image_size heigth and width"
  46. "xml parsed here"
  47.  
  48.  
  49. imageset_filename=sys.argv[1]
  50. data_path=sys.argv[2]
  51.  
  52. imageset_file_path = os.path.join(data_path, 'data', 'ImageSets', imageset_filename)
  53. assert os.path.exists(imageset_file_path), \
  54. 'Path does not exist: {}'.format(imageset_file_path)
  55.  
  56.  
  57.  
  58. lines = open(imageset_file_path).readlines()
  59. for line in lines:
  60. image_index=line.strip() #should not contain newline character
  61.  
  62. image_path=get_image_path_from_index(image_index)
  63. annotation_path=get_annotation_path_from_index(image_index)
  64. tree=ET.parse(annotation_path)
  65.  
  66. img_max_width = int(tree.find('.//width').text)
  67. img_max_height= int(tree.find('.//height').text)
  68.  
  69. objs=tree.findall('object')
  70.  
  71. for obj in objs:
  72. bbox = obj.find('bndbox')
  73. x1 = int(bbox.find('xmin').text) - 1
  74. y1 = int(bbox.find('ymin').text) - 1
  75. x2 = int(bbox.find('xmax').text) - 1
  76. y2 = int(bbox.find('ymax').text) - 1
  77.  
  78. box=[x1,y1,x2,y2]
  79. #print box
  80.  
  81. if x1 < -1 or y1 < -1 or (x2-x1) > img_max_width or (y2-y1) > img_max_height:
  82. # Conditional loop for checking the error in bbox from dataset
  83. discard_list.append(image_index)
  84. print "Discarded image index : ",image_index
  85. break
  86.  
  87. print "No. of discarded indexes : ",len(discard_list)
  88.  
  89. random.shuffle(lines) # Shuffles the lines in the list "lines"
  90. text_file = open(imageset_file_path, "w")
  91.  
  92. for line in lines: # Check if line belongs to discard_list. If yes, that image index is not written
  93. if line.strip() not in discard_list:
  94. text_file.write(line)
  95. text_file.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement