Advertisement
Guest User

Untitled

a guest
Feb 11th, 2016
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.55 KB | None | 0 0
  1. import csv
  2.  
  3. class ThisDialect(csv.excel):
  4. lineterminator='\r'
  5.  
  6.  
  7. jobnames = list(csv.DictReader(open("job-name.csv", 'U'), dialect=ThisDialect()))[1:]
  8. paircounts = list(csv.DictReader(open("pair-count.csv", 'U'), dialect=ThisDialect()))[1:]
  9.  
  10. code_to_name = {}
  11. for j in jobnames:
  12. code_to_name[j['code']] = j['occupation']
  13. # if "Nurse" in j['occupation']:
  14. # print j['occupation'], j['code']
  15. # 1/0
  16.  
  17. # Sexes: 1 is male, 2 is female
  18. married_counts = {}
  19. t1 = 0
  20. t2 = 0
  21. for p in paircounts:
  22. if p['occ_sp'] in ('0', ''):
  23. continue
  24. k = (p['sex_sp'], p['occ_sp'])
  25. if k[1] == '0':
  26. continue
  27. if p['sex_sp'] == p['sex']:
  28. continue
  29. married_counts[k] = married_counts.get(k, 0) + float(p['total'])
  30. if p['sex_sp'] == '1':
  31. t1 += float(p['total'])
  32. if p['sex_sp'] == '2':
  33. t2 += float(p['total'])
  34.  
  35. print "Most commonly-married female professions:"
  36. most_common_female = []
  37. for (sex, code), count in married_counts.items():
  38. if sex != '2':
  39. continue
  40. most_common_female.append((count, code))
  41. most_common_female.sort(reverse=True)
  42. for count, code in most_common_female[:10]:
  43. this_perc = 100.0 * int(count) / t2
  44. print "%s: %.1f%%" % (code_to_name[code], this_perc)
  45. print
  46.  
  47. # investigate_code = '10' # CEOs
  48. investigate_code = '1010' # Programmers
  49. # vestigate_code = '3255' # Registered Nurses
  50. # investigate_code = '4220' # Janitors
  51. # investigate_code = '5700' # Secretaries
  52.  
  53. MARRYING_SEX = '1'
  54. MARRIED_SEX = '2'
  55. assert MARRIED_SEX != MARRYING_SEX, "sorry the script needs to be updated to look at same-sex marriages"
  56. tx = (t2 if MARRIED_SEX == '2' else t1)
  57.  
  58. by_count = []
  59. total = 0
  60. for p in paircounts:
  61. if p['occ_sp'] in ('0', ''):
  62. continue
  63. if p['sex'] != MARRYING_SEX or p['sex_sp'] != MARRIED_SEX:
  64. continue
  65. if p['occ'] == investigate_code:
  66. by_count.append((float(p['total']), p['occ_sp']))
  67. total += by_count[-1][0]
  68. by_count.sort(reverse=True)
  69.  
  70. seen = set()
  71. normalized = []
  72. over_expected = []
  73. for count, code in by_count:
  74. this_perc = 100.0 * int(count) / total
  75. global_perc = married_counts[(MARRIED_SEX, code)] / tx * 100
  76. seen.add(code)
  77. over_expected.append(((count - total * global_perc / 100) / total, count, code))
  78. normalized.append((this_perc / global_perc, count, code))
  79.  
  80. print "Most common for %s to marry by count:" % code_to_name[investigate_code]
  81. for count, code in by_count[:5]:
  82. this_perc = 100.0 * int(count) / total
  83. global_perc = married_counts[(MARRIED_SEX, code)] / tx * 100
  84. print "%s: %d, %.1f%%, %.1f%%" % (code_to_name[code], int(count), this_perc, global_perc)
  85. print
  86.  
  87. print "Most common for %s to marry, normalized vs the population:" % code_to_name[investigate_code]
  88. normalized.sort(reverse=True)
  89. for mult, count, code in normalized[:5]:
  90. this_perc = 100.0 * int(count) / total
  91. global_perc = married_counts[(MARRIED_SEX, code)] / t2 * 100
  92. print "%s: %.1fx (%.1f%% vs %.1f%%; %d found)" % (code_to_name[code], mult, this_perc, global_perc, count)
  93. print
  94.  
  95. print "Most common for %s to marry, over expected:" % code_to_name[investigate_code]
  96. over_expected.sort(reverse=True)
  97. for over, count, code in over_expected[:5]:
  98. this_perc = 100.0 * int(count) / total
  99. global_perc = married_counts[(MARRIED_SEX, code)] / t2 * 100
  100. print "%s: %.1f%% more than expected (%.1f%% vs %.1f%%)" % (code_to_name[code], 100 * over, this_perc, global_perc)
  101. print
  102.  
  103. '''
  104. for sex, code in married_counts:
  105. if sex != MARRIED_SEX:
  106. continue
  107. if code not in seen:
  108. print "didn't see:", code_to_name[code], married_counts[(MARRIED_SEX, code)] / t2 * 100
  109. '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement