Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from collections import defaultdict
- all_values = []
- age_value_map = defaultdict(list)
- with open('2015_2017_MaleData.dat') as f:
- for line in f.readlines():
- # note: columns in data sheet definition
- # are 1-based, hence the -1 in indices
- # data sheet defines AGE_A(13-14)
- # as the age of the respondend
- age = int(line[12:14])
- # data sheet defines MON12PRTS(157)
- # as "Number of female sex partners in last 12 months"
- number_str = line[156]
- if number_str == ' ':
- # ignore N/A data
- continue
- number = int(number_str)
- all_values.append(number)
- age_value_map[age].append(number)
- def calc_median(values):
- middle = len(values) // 2
- sorted_values = sorted(values)
- return sorted_values[middle]
- def age_group_values(age_value_map, youngest, oldest):
- group_values = []
- for age in range(youngest, oldest + 1):
- if age in age_value_map:
- group_values.extend(age_value_map[age])
- return group_values
- print("Overall median: {}".format(calc_median(all_values)))
- group_18_24 = age_group_values(age_value_map, 18, 24)
- print("Median of group 18-25: {}".format(calc_median(group_18_24)))
- group_25_34 = age_group_values(age_value_map, 25, 34)
- print("Median of group 25-34: {}".format(calc_median(group_25_34)))
- group_35_44 = age_group_values(age_value_map, 35, 44)
- print("Median of group 35-44: {}".format(calc_median(group_35_44)))
- group_45_54 = age_group_values(age_value_map, 45, 54)
- print("Median of group 35-44: {}".format(calc_median(group_45_54)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement