Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Code:
- # stored as a text file, but here for clarity
- list_of_lines = """
- <RECORD>
- <TITLE>MISS</TITLE>
- <NAME>ELIZABETH</NAME>
- <SURNAME>II</SURNAME>
- <ADDRESS1>1 BUCKINGHAM PALACE</ADDRESS1>
- <ADDRESS2>LONDON</ADDRESS2>
- <ADDRESS3>GREATER LONDON</ADDRESS3>
- <POST_CODE>W1 11A</POST_CODE>
- <CASE_NUM>Q1QQ1234</CASE_NUM>
- <ID>32145698</ID>
- <LAST_UPDATE_DATE>2016-12-12</LAST_UPDATE_DATE>
- </RECORD>
- <RECORD>
- <TITLE>MR</TITLE>
- <NAME>PRINCE</NAME>
- <SURNAME>PHILLIP</SURNAME>
- <ADDRESS1>1 BUCKINGHAM PALACE</ADDRESS1>
- <ADDRESS2>LONDON</ADDRESS2>
- <ADDRESS3>GREATER LONDON</ADDRESS3>
- <POST_CODE>W1 11A</POST_CODE>
- <CASE_NUM>K5KK4321</CASE_NUM>
- <ID>56987412</ID>
- <LAST_UPDATE_DATE>2017-01-16</LAST_UPDATE_DATE>
- </RECORD>
- <RECORD>
- """
- class recordManager:
- def __init__(self):
- self.r_location = "list_of_lines.txt"
- def record_splitter(self, beg):
- re_beg_spl = re.compile(".*<RECORD>")
- re_end_spl = re.compile(".*(<\/RECORD>)")
- end = None
- for count, line in enumerate( open(self.r_location) ):
- if count > beg:
- if re_end_spl.match(line):
- end = count
- if not re_end_spl.match(line):
- if re_beg_spl.match(line):
- beg = count
- else:
- break
- recordManager.dic_factory(self, beg, end)
- def dic_factory(self, beg, end):
- re_casenum = re.compile(".*<CASE_NUM>(.*)<\/CASE_NUM>")
- re_tag_val = re.compile(".*<(\w*)>(.*)<.*")
- id_ = None
- tags = []
- vals = []
- for count, line in enumerate( open(self.r_location) ):
- if beg < count < end:
- if re_casenum.match(line):
- m = re_casenum.match(line)
- id_ = m.group(1)
- if re_tag_val.match(line):
- m = re_tag_val.match(line)
- tags.append( m.group(1) )
- vals.append( m.group(2) )
- beg = end +1
- print {id_ : dict(zip(tags, vals)) }
- # {32145698 : {'POST_CODE': 'W1 11A', 'SURNAME': 'II', 'NAME': 'ELIZABETH', 'TITLE': 'MISS', 'ADDRESS1': '1 BUCKINGHAM PALACE', 'ADDRESS2': 'LONDON', 'ADDRESS3': 'GREATER LONDON', 'RECORD_TYPE': '1', 'CASE_NUM': 'Q1QQ1234', 'LAST_UPDATE_DATE': '2016-12-12', 'ID': '32145698'}}
- self.record_splitter(beg)
- if __name__ == '__main__':
- inst_fol = record_manager(file)
- recordManager.record_splitter(inst_folder, 0)
- if __name__ == '__main__':
- inst_fol = record_manager(file)
- beg, end = recordManager.record_splitter(inst_folder, 0)
- def __init__(self):
- self.r_location = "list_of_lines.txt"
- self.beg = 0
- for count, line in enumerate( open(self.r_location) ):
- if count > self.beg:
- if re_end_spl.match(line):
- end = count
- if not re_end_spl.match(line):
- if re_beg_spl.match(line):
- self.beg = count
- else:
- break
- recordManager.dic_factory(self, self.beg, end)
Add Comment
Please, Sign In to add comment