Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- [u'Intake Received Date:',
- u'9/11/2012',
- u'Intake ID:',
- u'CA00325127',
- u'Allegation Category:',
- u'Infection Control',
- u'Investigation Finding:',
- u'Substantiated',
- u'Intake Received Date:',
- u'5/14/2012',
- u'Intake ID:',
- u'CA00310421',
- u'Allegation Category:',
- u'Quality of Care/Treatment',
- u'Investigation Finding:',
- u'Substantiated',
- u'Intake Received Date:',
- u'8/15/2011',
- u'Intake ID:',
- u'CA00279396',
- u'Allegation Category:',
- u'Quality of Care/Treatment',
- u'Sub Categories:',
- u'Screening',
- u'Investigation Finding:',
- u'Unsubstantiated',]
- 'Intake Received Date', 'Intake ID', 'Allegation Category', 'Sub Categories', 'Investigation Finding'
- '9/11/2012', 'CA00325127', 'Infection Control', '', 'Substantiated'
- '5/14/2012', 'CA00310421', 'Quality of Care/Treatment', '', 'Substantiated'
- '8/15/2011', 'CA00279396', 'Quality of Care/Treatment', 'Screening', 'Unsubstantiated'
- compgroup = []
- for k, g in groupby(complist, key=lambda x:re.search(r'Intake Received Date', x)):
- if not k:
- compgroup.append(list(g))
- #Intake Received Date was removed, so insert it back to beginning of each list:
- for c in compgroup:
- c.insert(0, u'Intake Received Date')
- #Create list of dicts to map the preceding titles to their respective data element:
- dic = []
- for c in compgroup:
- dic.append(dict(zip(*[iter(c)]*2)))
- data=[u'Intake Received Date:',
- u'9/11/2012',
- u'Intake ID:',
- u'CA00325127',
- u'Allegation Category:',
- u'Infection Control',
- u'Investigation Finding:',
- u'Substantiated',
- u'Intake Received Date:',
- u'5/14/2012',
- u'Intake ID:',
- u'CA00310421',
- u'Allegation Category:',
- u'Quality of Care/Treatment',
- u'Investigation Finding:',
- u'Substantiated',
- u'Intake Received Date:',
- u'8/15/2011',
- u'Intake ID:',
- u'CA00279396',
- u'Allegation Category:',
- u'Quality of Care/Treatment',
- u'Sub Categories:',
- u'Screening',
- u'Investigation Finding:',
- u'Unsubstantiated',]
- from itertools import groupby
- headers=['Intake Received Date:', 'Intake ID:', 'Allegation Category:', 'Sub Categories:', 'Investigation Finding:']
- sep='Intake Received Date:'
- compgroup = []
- for k, g in groupby(data, key=lambda x: x==sep):
- if not k:
- compgroup.append([sep]+list(g))
- print ', '.join(e[0:-1] for e in headers)
- for di in [dict(zip(*[iter(c)]*2)) for c in compgroup]:
- line=[]
- for h in headers:
- try:
- line.append(di[h])
- except KeyError:
- line.append('*')
- print ', '.join(line)
- Intake Received Date, Intake ID, Allegation Category, Sub Categories, Investigation Finding
- 9/11/2012, CA00325127, Infection Control, *, Substantiated
- 5/14/2012, CA00310421, Quality of Care/Treatment, *, Substantiated
- 8/15/2011, CA00279396, Quality of Care/Treatment, Screening, Unsubstantiated
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement