Guest User

Untitled

a guest
May 26th, 2018
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.73 KB | None | 0 0
  1. import argparse
  2. import sys
  3. import os
  4. import json
  5. import logging
  6. import requests
  7. from PIL import Image
  8.  
  9. ################### INSTALLATION NOTE #######################
  10. ##############################################################
  11.  
  12. ## pip install requests
  13. ## pip install pillow
  14.  
  15. ###############################################################
  16. ###############################################################
  17.  
  18.  
  19. #enable info logging.
  20. logging.getLogger().setLevel(logging.INFO)
  21.  
  22. def maybe_download(image_url, image_dir):
  23. """Download the image if not already exist, return the location path"""
  24. fileName = image_url.split("/")[-1]
  25. filePath = os.path.join(image_dir, fileName)
  26. if (os.path.exists(filePath)):
  27. return filePath
  28.  
  29. #else download the image
  30. try:
  31. response = requests.get(image_url)
  32. if response.status_code == 200:
  33. with open(filePath, 'wb') as f:
  34. f.write(response.content)
  35. return filePath
  36. else:
  37. raise ValueError, "Not a 200 response"
  38. except Exception as e:
  39. logging.exception("Failed to download image at " + image_url + " \n" + str(e) + "\nignoring....")
  40. raise e
  41.  
  42.  
  43. def get_xml_for_bbx(bbx_label, bbx_data, width, height):
  44.  
  45. # We store the left top and right bottom as point '0' and point '1'
  46. xmin = int(bbx_data['points'][0]['x']*width)
  47. ymin = int(bbx_data['points'][0]['y']*height)
  48. xmax = int(bbx_data['points'][1]['x']*width)
  49. ymax = int(bbx_data['points'][1]['y']*height)
  50.  
  51. xml = "<object>\n"
  52. xml = xml + "\t<name>" + bbx_label + "</name>\n"
  53. xml = xml + "\t<pose>Unspecified</pose>\n"
  54. xml = xml + "\t<truncated>Unspecified</truncated>\n"
  55. xml = xml + "\t<difficult>Unspecified</difficult>\n"
  56. xml = xml + "\t<occluded>Unspecified</occluded>\n"
  57. xml = xml + "\t<bndbox>\n"
  58. xml = xml + "\t\t<xmin>" + str(xmin) + "</xmin>\n"
  59. xml = xml + "\t\t<xmax>" + str(xmax) + "</xmax>\n"
  60. xml = xml + "\t\t<ymin>" + str(ymin) + "</ymin>\n"
  61. xml = xml + "\t\t<ymax>" + str(ymax) + "</ymax>\n"
  62. xml = xml + "\t</bndbox>\n"
  63. xml = xml + "</object>\n"
  64. return xml
  65.  
  66.  
  67. def convert_to_PascalVOC(dataturks_labeled_item, image_dir, xml_out_dir):
  68.  
  69. """Convert a dataturks labeled item to pascalVOCXML string.
  70. Args:
  71. dataturks_labeled_item: JSON of one labeled image from dataturks.
  72. image_dir: Path to directory to downloaded images (or a directory already having the images downloaded).
  73. xml_out_dir: Path to the dir where the xml needs to be written.
  74. Returns:
  75. None.
  76. Raises:
  77. None.
  78. """
  79. try:
  80. data = json.loads(dataturks_labeled_item)
  81. width = data['annotation'][0]['imageWidth']
  82. height = data['annotation'][0]['imageHeight']
  83. image_url = data['content']
  84.  
  85. filePath = maybe_download(image_url, image_dir)
  86.  
  87. with Image.open(filePath) as img:
  88. width, height = img.size
  89.  
  90. fileName = filePath.split("/")[-1]
  91. image_dir_folder_Name = image_dir.split("/")[-1]
  92.  
  93.  
  94. xml = "<annotation>\n<folder>" + image_dir_folder_Name + "</folder>\n"
  95. xml = xml + "<filename>" + fileName +"</filename>\n"
  96. xml = xml + "<path>" + filePath +"</path>\n"
  97. xml = xml + "<source>\n\t<database>Unknown</database>\n</source>\n"
  98. xml = xml + "<size>\n"
  99. xml = xml + "\t<width>" + str(width) + "</width>\n"
  100. xml = xml + "\t<height>" + str(height) + "</height>\n"
  101. xml = xml + "\t<depth>Unspecified</depth>\n"
  102. xml = xml + "</size>\n"
  103. xml = xml + "<segmented>Unspecified</segmented>\n"
  104.  
  105. for bbx in data['annotation']:
  106. bbx_labels = bbx['label']
  107. #handle both list of labels or a single label.
  108. if not isinstance(bbx_labels, list):
  109. bbx_labels = [bbx_labels]
  110.  
  111. for bbx_label in bbx_labels:
  112. xml = xml + get_xml_for_bbx(bbx_label, bbx, width, height)
  113.  
  114. xml = xml + "</annotation>"
  115.  
  116. #output to a file.
  117. xmlFilePath = os.path.join(xml_out_dir, fileName + ".xml")
  118. with open(xmlFilePath, 'w') as f:
  119. f.write(xml)
  120. return True
  121. except Exception as e:
  122. logging.exception("Unable to process item " + dataturks_labeled_item + "\n" + "error = " + str(e))
  123. return False
  124.  
  125. def main():
  126. #make sure everything is setup.
  127. if (not os.path.isdir(image_download_dir)):
  128. logging.exception("Please specify a valid directory path to download images, " + image_download_dir + " doesn't exist")
  129. return
  130. if (not os.path.isdir(pascal_voc_xml_dir)):
  131. logging.exception("Please specify a valid directory path to write Pascal VOC xml files, " + pascal_voc_xml_dir + " doesn't exist")
  132. return
  133. if (not os.path.exists(dataturks_JSON_FilePath)):
  134. logging.exception(
  135. "Please specify a valid path to dataturks JSON output file, " + dataturks_JSON_FilePath + " doesn't exist")
  136. return
  137.  
  138. lines = []
  139. with open(dataturks_JSON_FilePath, 'r') as f:
  140. lines = f.readlines()
  141.  
  142. if (not lines or len(lines) == 0):
  143. logging.exception(
  144. "Please specify a valid path to dataturks JSON output file, " + dataturks_JSON_FilePath + " is empty")
  145. return
  146.  
  147. count = 0;
  148. success = 0
  149. for line in lines:
  150. status = convert_to_PascalVOC(line, image_download_dir, pascal_voc_xml_dir)
  151. if (status):
  152. success = success + 1
  153.  
  154. count+=1;
  155. if (count % 10 == 0):
  156. logging.info(str(count) + " items done ...")
  157.  
  158. logging.info("Completed: " + str(success) + " items done, " + str(len(lines) - success) + " items ignored due to errors")
  159.  
  160.  
  161. def create_arg_parser():
  162. """"Creates and returns the ArgumentParser object."""
  163.  
  164. parser = argparse.ArgumentParser(description='Converts Dataturks output JSON file for Image bounding box to Pascal VOC format.')
  165. parser.add_argument('dataturks_JSON_FilePath',
  166. help='Path to the JSON file downloaded from Dataturks.')
  167. parser.add_argument('image_download_dir',
  168. help='Path to the directory where images will be dowloaded (if not already found in the directory).')
  169. parser.add_argument('pascal_voc_xml_dir',
  170. help='Path to the directory where Pascal VOC XML files will be stored.')
  171. return parser
  172.  
  173. if __name__ == '__main__':
  174. arg_parser = create_arg_parser()
  175. parsed_args = arg_parser.parse_args(sys.argv[1:])
  176. global dataturks_JSON_FilePath
  177. global image_download_dir
  178. global pascal_voc_xml_dir
  179.  
  180. #setup global paths needed accross the script.
  181. dataturks_JSON_FilePath = parsed_args.dataturks_JSON_FilePath
  182. image_download_dir = parsed_args.image_download_dir
  183. pascal_voc_xml_dir = parsed_args.pascal_voc_xml_dir
  184. main()
Add Comment
Please, Sign In to add comment