Advertisement
Guest User

Untitled

a guest
Feb 11th, 2016
54
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.58 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import os
  4. import re
  5. import sys
  6. import xml.etree.ElementTree as ElementTree
  7.  
  8.  
  9. class XMLScrubber(object):
  10. ALPHANUMERIC_REGEX = '^[a-zA-Z0-9]*$'
  11. MASKED_CHARACTER = 'X'
  12.  
  13. @staticmethod
  14. def process(input_file_path, out_dir_path, element_xpaths):
  15. print 'Scrubbing request file from {0}'.format(input_file_path)
  16.  
  17. html_response_tree = ElementTree.parse(input_file_path)
  18. for element_xpath in element_xpaths:
  19. pii_element = html_response_tree.find(element_xpath)
  20. masked_chars = [re.sub(XMLScrubber.ALPHANUMERIC_REGEX, XMLScrubber.MASKED_CHARACTER, i) for i in
  21. pii_element.text]
  22. pii_element.text = ''.join(masked_chars)
  23.  
  24. out_file_path = '{0}/{1}'.format(out_dir_path, os.path.basename(input_file_path))
  25.  
  26. print('Outputting scrubbed file to {0}'.format(out_file_path))
  27. html_response_tree.write(out_file_path)
  28.  
  29.  
  30. # TODO: need to pass XPath IDs as command line arguments
  31. XPATHS = [".//*[@id=\"Initial_PROJECTED_TO_STATED_RATIO\"]"]
  32.  
  33. file_extension = sys.argv[1]
  34. input_directory = sys.argv[2]
  35. output_directory = sys.argv[3]
  36.  
  37. print('File extension: {0}'.format(file_extension))
  38. print('Input directory: {0}'.format(input_directory))
  39. print('Output directory: {0}'.format(output_directory))
  40.  
  41. for file_path in os.listdir(input_directory):
  42. if file_path.endswith('.{0}'.format(file_extension)):
  43. full_file_path = "{0}/{1}".format(input_directory, file_path)
  44. print "Processing {0}".format(full_file_path)
  45. XMLScrubber.process(full_file_path, output_directory, XPATHS)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement