Guest User

Untitled

a guest
May 23rd, 2018
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.21 KB | None | 0 0
  1. Code:
  2. # stored as a text file, but here for clarity
  3.  
  4. list_of_lines = """
  5. <RECORD>
  6. <TITLE>MISS</TITLE>
  7. <NAME>ELIZABETH</NAME>
  8. <SURNAME>II</SURNAME>
  9. <ADDRESS1>1 BUCKINGHAM PALACE</ADDRESS1>
  10. <ADDRESS2>LONDON</ADDRESS2>
  11. <ADDRESS3>GREATER LONDON</ADDRESS3>
  12. <POST_CODE>W1 11A</POST_CODE>
  13. <CASE_NUM>Q1QQ1234</CASE_NUM>
  14. <ID>32145698</ID>
  15. <LAST_UPDATE_DATE>2016-12-12</LAST_UPDATE_DATE>
  16. </RECORD>
  17. <RECORD>
  18. <TITLE>MR</TITLE>
  19. <NAME>PRINCE</NAME>
  20. <SURNAME>PHILLIP</SURNAME>
  21. <ADDRESS1>1 BUCKINGHAM PALACE</ADDRESS1>
  22. <ADDRESS2>LONDON</ADDRESS2>
  23. <ADDRESS3>GREATER LONDON</ADDRESS3>
  24. <POST_CODE>W1 11A</POST_CODE>
  25. <CASE_NUM>K5KK4321</CASE_NUM>
  26. <ID>56987412</ID>
  27. <LAST_UPDATE_DATE>2017-01-16</LAST_UPDATE_DATE>
  28. </RECORD>
  29. <RECORD>
  30. """
  31.  
  32. class recordManager:
  33.  
  34. def __init__(self):
  35. self.r_location = "list_of_lines.txt"
  36.  
  37. def record_splitter(self, beg):
  38.  
  39. re_beg_spl = re.compile(".*<RECORD>")
  40. re_end_spl = re.compile(".*(<\/RECORD>)")
  41.  
  42. end = None
  43.  
  44. for count, line in enumerate( open(self.r_location) ):
  45. if count > beg:
  46. if re_end_spl.match(line):
  47. end = count
  48.  
  49. if not re_end_spl.match(line):
  50. if re_beg_spl.match(line):
  51. beg = count
  52. else:
  53. break
  54.  
  55. recordManager.dic_factory(self, beg, end)
  56.  
  57.  
  58. def dic_factory(self, beg, end):
  59.  
  60. re_casenum = re.compile(".*<CASE_NUM>(.*)<\/CASE_NUM>")
  61. re_tag_val = re.compile(".*<(\w*)>(.*)<.*")
  62.  
  63. id_ = None
  64. tags = []
  65. vals = []
  66.  
  67. for count, line in enumerate( open(self.r_location) ):
  68.  
  69. if beg < count < end:
  70. if re_casenum.match(line):
  71. m = re_casenum.match(line)
  72. id_ = m.group(1)
  73.  
  74. if re_tag_val.match(line):
  75. m = re_tag_val.match(line)
  76. tags.append( m.group(1) )
  77. vals.append( m.group(2) )
  78.  
  79. beg = end +1
  80. print {id_ : dict(zip(tags, vals)) }
  81. # {32145698 : {'POST_CODE': 'W1 11A', 'SURNAME': 'II', 'NAME': 'ELIZABETH', 'TITLE': 'MISS', 'ADDRESS1': '1 BUCKINGHAM PALACE', 'ADDRESS2': 'LONDON', 'ADDRESS3': 'GREATER LONDON', 'RECORD_TYPE': '1', 'CASE_NUM': 'Q1QQ1234', 'LAST_UPDATE_DATE': '2016-12-12', 'ID': '32145698'}}
  82.  
  83. self.record_splitter(beg)
  84.  
  85.  
  86. if __name__ == '__main__':
  87. inst_fol = record_manager(file)
  88. recordManager.record_splitter(inst_folder, 0)
  89.  
  90. if __name__ == '__main__':
  91. inst_fol = record_manager(file)
  92. beg, end = recordManager.record_splitter(inst_folder, 0)
  93.  
  94. def __init__(self):
  95. self.r_location = "list_of_lines.txt"
  96. self.beg = 0
  97.  
  98. for count, line in enumerate( open(self.r_location) ):
  99. if count > self.beg:
  100. if re_end_spl.match(line):
  101. end = count
  102.  
  103. if not re_end_spl.match(line):
  104. if re_beg_spl.match(line):
  105. self.beg = count
  106. else:
  107. break
  108.  
  109. recordManager.dic_factory(self, self.beg, end)
Add Comment
Please, Sign In to add comment