Advertisement
Guest User

Untitled

a guest
Apr 16th, 2014
46
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.73 KB | None | 0 0
  1. [u'Intake Received Date:',
  2. u'9/11/2012',
  3. u'Intake ID:',
  4. u'CA00325127',
  5. u'Allegation Category:',
  6. u'Infection Control',
  7. u'Investigation Finding:',
  8. u'Substantiated',
  9. u'Intake Received Date:',
  10. u'5/14/2012',
  11. u'Intake ID:',
  12. u'CA00310421',
  13. u'Allegation Category:',
  14. u'Quality of Care/Treatment',
  15. u'Investigation Finding:',
  16. u'Substantiated',
  17. u'Intake Received Date:',
  18. u'8/15/2011',
  19. u'Intake ID:',
  20. u'CA00279396',
  21. u'Allegation Category:',
  22. u'Quality of Care/Treatment',
  23. u'Sub Categories:',
  24. u'Screening',
  25. u'Investigation Finding:',
  26. u'Unsubstantiated',]
  27.  
  28. 'Intake Received Date', 'Intake ID', 'Allegation Category', 'Sub Categories', 'Investigation Finding'
  29. '9/11/2012', 'CA00325127', 'Infection Control', '', 'Substantiated'
  30. '5/14/2012', 'CA00310421', 'Quality of Care/Treatment', '', 'Substantiated'
  31. '8/15/2011', 'CA00279396', 'Quality of Care/Treatment', 'Screening', 'Unsubstantiated'
  32.  
  33. compgroup = []
  34. for k, g in groupby(complist, key=lambda x:re.search(r'Intake Received Date', x)):
  35. if not k:
  36. compgroup.append(list(g))
  37.  
  38.  
  39. #Intake Received Date was removed, so insert it back to beginning of each list:
  40. for c in compgroup:
  41. c.insert(0, u'Intake Received Date')
  42.  
  43.  
  44. #Create list of dicts to map the preceding titles to their respective data element:
  45. dic = []
  46. for c in compgroup:
  47. dic.append(dict(zip(*[iter(c)]*2)))
  48.  
  49. data=[u'Intake Received Date:',
  50. u'9/11/2012',
  51. u'Intake ID:',
  52. u'CA00325127',
  53. u'Allegation Category:',
  54. u'Infection Control',
  55. u'Investigation Finding:',
  56. u'Substantiated',
  57. u'Intake Received Date:',
  58. u'5/14/2012',
  59. u'Intake ID:',
  60. u'CA00310421',
  61. u'Allegation Category:',
  62. u'Quality of Care/Treatment',
  63. u'Investigation Finding:',
  64. u'Substantiated',
  65. u'Intake Received Date:',
  66. u'8/15/2011',
  67. u'Intake ID:',
  68. u'CA00279396',
  69. u'Allegation Category:',
  70. u'Quality of Care/Treatment',
  71. u'Sub Categories:',
  72. u'Screening',
  73. u'Investigation Finding:',
  74. u'Unsubstantiated',]
  75.  
  76. from itertools import groupby
  77.  
  78. headers=['Intake Received Date:', 'Intake ID:', 'Allegation Category:', 'Sub Categories:', 'Investigation Finding:']
  79. sep='Intake Received Date:'
  80. compgroup = []
  81. for k, g in groupby(data, key=lambda x: x==sep):
  82. if not k:
  83. compgroup.append([sep]+list(g))
  84.  
  85. print ', '.join(e[0:-1] for e in headers)
  86.  
  87. for di in [dict(zip(*[iter(c)]*2)) for c in compgroup]:
  88. line=[]
  89. for h in headers:
  90. try:
  91. line.append(di[h])
  92. except KeyError:
  93. line.append('*')
  94. print ', '.join(line)
  95.  
  96. Intake Received Date, Intake ID, Allegation Category, Sub Categories, Investigation Finding
  97. 9/11/2012, CA00325127, Infection Control, *, Substantiated
  98. 5/14/2012, CA00310421, Quality of Care/Treatment, *, Substantiated
  99. 8/15/2011, CA00279396, Quality of Care/Treatment, Screening, Unsubstantiated
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement