Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import numpy as np
- import matplotlib.pyplot as plt
- from enum import IntEnum
- class Column(IntEnum):
- RECORD_ID = 0
- AGENCY_CODE = 1
- AGENCY_NAME = 2
- AGENCY_TYPE = 3
- CITY = 4
- STATE = 5
- YEAR = 6
- MONTH = 7
- INCIDENT = 8
- CRIME_TYPE = 9
- CRIME_SOLVED = 10
- VICTIM_SEX = 11
- VICTIM_AGE = 12
- VICTIM_RACE = 13
- VICTIM_ETHNICITY = 14
- PERPETRATOR_SEX = 15
- PERPETRATOR_AGE = 16
- PERPETRATOR_RACE = 17
- PERPETRATOR_ETHNICITY = 18
- RELATIONSHIP = 19
- WEAPON = 20
- VICTIM_COUNT = 21
- PERPETRATOR_COUNT = 22
- RECORD_SOURCE = 23
- ROW_COUNT = 638455
- data = []
- def get_column(column):
- col = []
- for i in range(ROW_COUNT):
- r = data[i][column]
- if r != ' ' and r != '0':
- col.append(r)
- return col
- def get_column_freq(col):
- m = dict()
- for entry in col:
- if entry == 0:
- continue
- if entry in m:
- m[entry] += 1
- else:
- m[entry] = 1
- return m
- def state_bar_graph():
- states = get_column_freq(get_column(Column.STATE))
- keys = states.keys()
- vals = states.values()
- pos = np.arange(len(keys))
- plt.barh(pos, vals)
- plt.yticks(pos, keys)
- plt.xlabel('# homicides')
- plt.title('Homicides by state 1980 - 2014')
- plt.show()
- def year_bar_graph():
- years = get_column_freq(get_column(Column.YEAR))
- keys = years.keys()
- vals = years.values()
- pos = np.arange(len(keys))
- plt.barh(pos, vals)
- plt.yticks(pos, keys)
- plt.xlabel('# homicides')
- plt.title('Homicides in USA 1980 - 2014')
- plt.show()
- def weapon_bar_graph():
- weapons = get_column_freq(get_column(Column.WEAPON))
- keys = weapons.keys()
- vals = weapons.values()
- pos = np.arange(len(keys))
- plt.barh(pos, vals)
- plt.yticks(pos, keys)
- plt.xlabel('# homicides')
- plt.title('Homicides by weapon in USA 1980 - 2014')
- plt.show()
- def victim_gender_bar_graph():
- genders = get_column_freq(get_column(Column.VICTIM_SEX))
- keys = genders.keys()
- vals = genders.values()
- pos = np.arange(len(keys))
- plt.bar(pos, vals)
- plt.xticks(pos, keys)
- plt.ylabel('# homicides')
- plt.title('Homicides by victim gender in USA 1980 - 2014')
- plt.show()
- def perpetrator_race_bar_graph():
- races = get_column_freq(get_column(Column.PERPETRATOR_RACE))
- keys = races.keys()
- vals = races.values()
- pos = np.arange(len(keys))
- plt.bar(pos, vals)
- plt.xticks(pos, keys)
- plt.ylabel('# homicides')
- plt.title('Homicides by perpetrator race in USA 1980 - 2014')
- plt.show()
- def victim_age_histogram():
- ages = get_column(Column.VICTIM_AGE)
- ages = list(map(int, ages))
- ages.sort()
- plt.hist(ages, bins=100)
- plt.xlim(0, 100)
- plt.xlabel('age')
- plt.ylabel('# homicides')
- plt.title('Homicides by victim age in USA 1980 - 2014')
- plt.show()
- def perpetrator_age_histogram():
- ages = get_column(Column.PERPETRATOR_AGE)
- ages = list(map(int, ages))
- ages.sort()
- plt.hist(ages, bins=100)
- plt.xlim(0, 100)
- plt.xlabel('age')
- plt.ylabel('# homicides')
- plt.title('Homicides by perpetrator age in USA 1980 - 2014')
- plt.show()
- def siblings_and_weapon_bar_graph():
- m = dict()
- s = set(['Son', 'Daughter'])
- for row in data:
- weapon = row[Column.WEAPON]
- if row[Column.RELATIONSHIP] in s:
- if weapon in m:
- m[weapon] += 1
- else:
- m[weapon] = 1
- keys = m.keys()
- vals = m.values()
- pos = np.arange(len(keys))
- plt.barh(pos, vals)
- plt.yticks(pos, keys)
- plt.xlabel('# homicides')
- plt.title('Homicides with weapons between siblings 1980 - 2014')
- plt.show()
- def victim_perp_race_bar_graph():
- same = 0
- diff = 0
- for row in data:
- if row[Column.VICTIM_RACE] == row[Column.PERPETRATOR_ETHNICITY]:
- same += 1
- else:
- diff += 1
- print(same)
- print(diff)
- plt.bar([0, 1], [same, diff])
- plt.xticks([0, 1], ['Same race', 'Different race'])
- plt.title('Homicides by victim and perpetrator race 1980 - 2014')
- plt.show()
- with open('database.csv') as csv_file:
- csv_reader = csv.reader(csv_file, delimiter=',')
- line_count = 0
- for row in csv_reader:
- if line_count == 0:
- columns = row
- data = [[0 for x in range(len(columns))] for y in range(ROW_COUNT)]
- line_count += 1
- else:
- data[line_count - 1] = row
- line_count += 1
- victim_perp_race_bar_graph()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement