Advertisement
Guest User

Untitled

a guest
Jan 17th, 2020
115
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.70 KB | None | 0 0
  1. import csv
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from enum import IntEnum
  5.  
  6.  
  7. class Column(IntEnum):
  8. RECORD_ID = 0
  9. AGENCY_CODE = 1
  10. AGENCY_NAME = 2
  11. AGENCY_TYPE = 3
  12. CITY = 4
  13. STATE = 5
  14. YEAR = 6
  15. MONTH = 7
  16. INCIDENT = 8
  17. CRIME_TYPE = 9
  18. CRIME_SOLVED = 10
  19. VICTIM_SEX = 11
  20. VICTIM_AGE = 12
  21. VICTIM_RACE = 13
  22. VICTIM_ETHNICITY = 14
  23. PERPETRATOR_SEX = 15
  24. PERPETRATOR_AGE = 16
  25. PERPETRATOR_RACE = 17
  26. PERPETRATOR_ETHNICITY = 18
  27. RELATIONSHIP = 19
  28. WEAPON = 20
  29. VICTIM_COUNT = 21
  30. PERPETRATOR_COUNT = 22
  31. RECORD_SOURCE = 23
  32.  
  33.  
  34. ROW_COUNT = 638455
  35. data = []
  36.  
  37.  
  38. def get_column(column):
  39. col = []
  40.  
  41. for i in range(ROW_COUNT):
  42. r = data[i][column]
  43.  
  44. if r != ' ' and r != '0':
  45. col.append(r)
  46.  
  47. return col
  48.  
  49.  
  50. def get_column_freq(col):
  51. m = dict()
  52. for entry in col:
  53. if entry == 0:
  54. continue
  55.  
  56. if entry in m:
  57. m[entry] += 1
  58. else:
  59. m[entry] = 1
  60.  
  61. return m
  62.  
  63.  
  64. def state_bar_graph():
  65. states = get_column_freq(get_column(Column.STATE))
  66. keys = states.keys()
  67. vals = states.values()
  68.  
  69. pos = np.arange(len(keys))
  70. plt.barh(pos, vals)
  71. plt.yticks(pos, keys)
  72. plt.xlabel('# homicides')
  73. plt.title('Homicides by state 1980 - 2014')
  74. plt.show()
  75.  
  76.  
  77. def year_bar_graph():
  78. years = get_column_freq(get_column(Column.YEAR))
  79. keys = years.keys()
  80. vals = years.values()
  81.  
  82. pos = np.arange(len(keys))
  83. plt.barh(pos, vals)
  84. plt.yticks(pos, keys)
  85. plt.xlabel('# homicides')
  86. plt.title('Homicides in USA 1980 - 2014')
  87. plt.show()
  88.  
  89.  
  90. def weapon_bar_graph():
  91. weapons = get_column_freq(get_column(Column.WEAPON))
  92. keys = weapons.keys()
  93. vals = weapons.values()
  94.  
  95. pos = np.arange(len(keys))
  96. plt.barh(pos, vals)
  97. plt.yticks(pos, keys)
  98. plt.xlabel('# homicides')
  99. plt.title('Homicides by weapon in USA 1980 - 2014')
  100. plt.show()
  101.  
  102.  
  103. def victim_gender_bar_graph():
  104. genders = get_column_freq(get_column(Column.VICTIM_SEX))
  105. keys = genders.keys()
  106. vals = genders.values()
  107.  
  108. pos = np.arange(len(keys))
  109. plt.bar(pos, vals)
  110. plt.xticks(pos, keys)
  111. plt.ylabel('# homicides')
  112. plt.title('Homicides by victim gender in USA 1980 - 2014')
  113. plt.show()
  114.  
  115.  
  116. def perpetrator_race_bar_graph():
  117. races = get_column_freq(get_column(Column.PERPETRATOR_RACE))
  118. keys = races.keys()
  119. vals = races.values()
  120.  
  121. pos = np.arange(len(keys))
  122. plt.bar(pos, vals)
  123. plt.xticks(pos, keys)
  124. plt.ylabel('# homicides')
  125. plt.title('Homicides by perpetrator race in USA 1980 - 2014')
  126. plt.show()
  127.  
  128.  
  129. def victim_age_histogram():
  130. ages = get_column(Column.VICTIM_AGE)
  131. ages = list(map(int, ages))
  132. ages.sort()
  133. plt.hist(ages, bins=100)
  134. plt.xlim(0, 100)
  135. plt.xlabel('age')
  136. plt.ylabel('# homicides')
  137. plt.title('Homicides by victim age in USA 1980 - 2014')
  138. plt.show()
  139.  
  140.  
  141. def perpetrator_age_histogram():
  142. ages = get_column(Column.PERPETRATOR_AGE)
  143. ages = list(map(int, ages))
  144. ages.sort()
  145. plt.hist(ages, bins=100)
  146. plt.xlim(0, 100)
  147. plt.xlabel('age')
  148. plt.ylabel('# homicides')
  149. plt.title('Homicides by perpetrator age in USA 1980 - 2014')
  150. plt.show()
  151.  
  152.  
  153. def siblings_and_weapon_bar_graph():
  154. m = dict()
  155. s = set(['Son', 'Daughter'])
  156. for row in data:
  157. weapon = row[Column.WEAPON]
  158. if row[Column.RELATIONSHIP] in s:
  159. if weapon in m:
  160. m[weapon] += 1
  161. else:
  162. m[weapon] = 1
  163.  
  164. keys = m.keys()
  165. vals = m.values()
  166.  
  167. pos = np.arange(len(keys))
  168. plt.barh(pos, vals)
  169. plt.yticks(pos, keys)
  170. plt.xlabel('# homicides')
  171. plt.title('Homicides with weapons between siblings 1980 - 2014')
  172. plt.show()
  173.  
  174.  
  175. def victim_perp_race_bar_graph():
  176. same = 0
  177. diff = 0
  178. for row in data:
  179. if row[Column.VICTIM_RACE] == row[Column.PERPETRATOR_ETHNICITY]:
  180. same += 1
  181. else:
  182. diff += 1
  183.  
  184. print(same)
  185. print(diff)
  186.  
  187. plt.bar([0, 1], [same, diff])
  188. plt.xticks([0, 1], ['Same race', 'Different race'])
  189. plt.title('Homicides by victim and perpetrator race 1980 - 2014')
  190. plt.show()
  191.  
  192.  
  193. with open('database.csv') as csv_file:
  194. csv_reader = csv.reader(csv_file, delimiter=',')
  195. line_count = 0
  196.  
  197. for row in csv_reader:
  198. if line_count == 0:
  199. columns = row
  200. data = [[0 for x in range(len(columns))] for y in range(ROW_COUNT)]
  201. line_count += 1
  202. else:
  203. data[line_count - 1] = row
  204. line_count += 1
  205.  
  206. victim_perp_race_bar_graph()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement