FEgor04

csv_connect.py

Aug 22nd, 2019
218
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.10 KB | None | 0 0
  1. import age_analyzer as analyzer
  2. import pandas as pd
  3. from math import floor
  4. import statistics as st
  5. import numpy as np
  6.  
  7. def fill_vk_age(input_csv, output_csv):
  8.     data = pd.read_csv(input_csv)
  9.     for i in range(0, data.__len__()):
  10.         target = data['ID'][i]
  11.         if analyzer.get_age(target) != -1:
  12.             data["VK Age"][i] = analyzer.get_age(target)
  13.         else:
  14.             data["VK Age"][i] = "IS NOT SPECIFIED"
  15.     df = pd.DataFrame(data)
  16.     df.to_csv(output_csv)
  17.  
  18. def fill_friends_age(input_csv, output_csv):
  19.     data = pd.read_csv(input_csv)
  20.     for i in range(0, data.__len__()):
  21.         if analyzer.is_profile_closed(data["ID"][i]):
  22.             print(f"{data['ID'][i]} is closed")
  23.             pass
  24.         else:
  25.             ages = analyzer.get_friends_ages(data["ID"][i])
  26.             try:
  27.                 data["Mean"][i] = floor(st.mean(ages))
  28.             except:
  29.                 data["Mean"][i] = "PROFILE CLOSED"
  30.  
  31.             try:
  32.                 data["Mode"][i] = floor(st.mode(ages))
  33.             except:
  34.                 data["Mode"][i] = "PROFILE CLOSED"
  35.  
  36.             try:
  37.                 data["Harmonic Mean"][i] = floor(st.harmonic_mean(ages))
  38.             except:
  39.                 data["Harmonic Mean"][i] = "PROFILE CLOSED"
  40.  
  41.             try:
  42.                 data["Median"][i] = floor(st.median(ages))
  43.             except:
  44.                 data["Median"][i] = "PROFILE CLOSED"
  45.  
  46.     df = pd.DataFrame(data)
  47.     print(df)
  48.     df.to_csv(output_csv, index=False)
  49.  
  50. def people_who_specified_age(data):
  51.     count  = 0
  52.     for i in range(0, data.__len__()):
  53.         if data["VK Age"][i] != "IS NOT SPECIFIED":
  54.             count += 1
  55.     return count
  56.  
  57. def people_whose_vk_age_is_equal_to_real_age(data):
  58.     count = 0
  59.     for i in range(0, data.__len__()):
  60.         try:
  61.             if int(data["VK Age"][i]) == int(data["Real Age"][i]):
  62.                 count += 1
  63.         except:
  64.             pass
  65.     return count
  66.  
  67. def fill_error_list(data):
  68.     columns = ['Mean', 'HMean', 'Median', 'Mode']
  69.     error_list_dict = {
  70.         'Mean': [],
  71.         'HMean': [],
  72.         'Median': [],
  73.         'Mode': []
  74.     }
  75.     # print(type(columns))
  76.     # print(columns)
  77.     # print(error_list_data)
  78.     for i in range(0, data.__len__()):
  79.         if data["Mean"][i] != "PROFILE CLOSED" and data["Mode"][i] != "PROFILE CLOSED" and data["Median"][i] != "PROFILE CLOSED" and data["Harmonic Mean"][i] != "PROFILE CLOSED":
  80.             mean_error = abs(int(data["Mean"][i]) - int(data["Real Age"][i]))
  81.             hmean_error = abs(int(data["Harmonic Mean"][i]) - int(data["Real Age"][i]))
  82.             median_error = abs(int(data["Median"][i]) - int(data["Real Age"][i]))
  83.             mode_error = abs(int(data["Mode"][i]) - int(data["Real Age"][i]))
  84.             error_list_dict["Mean"].append(mean_error)
  85.             error_list_dict["HMean"].append(hmean_error)
  86.             error_list_dict["Median"].append(median_error)
  87.             error_list_dict["Mode"].append(mode_error)
  88.         else:
  89.             pass
  90.     error_list_data = pd.DataFrame(data=error_list_dict)
  91.     return error_list_data
  92.  
  93. def fill_accuracy(data):
  94.     print(f"Data type: {type(data)}", end="\n\n")
  95.     error_data = fill_error_list(data)
  96.     mean_row = error_data["Mean"].value_counts()
  97.     hmean_row = error_data["HMean"].value_counts()
  98.     median_row = error_data["Median"].value_counts()
  99.     mode_row = error_data["Mode"].value_counts()
  100.  
  101.     error_level_list = [0]
  102.     accuracy_mean_list = [mean_row[0].item()]
  103.     accurracy_hmean_list = [hmean_row[0].item()]
  104.     accuracy_mode_list = [mode_row[0].item()]
  105.     accuracy_median_list = [median_row[0].item()]
  106.     for i in range(1, data.__len__()):
  107.         error_level_list.append(i)
  108.         accuracy_mean_list.append(mean_row[i].item() + accuracy_mean_list[i-1])
  109.         accuracy_mode_list.append(mean_row[i].item() + accuracy_mode_list[i-1])
  110.         accuracy_median_list.append(mean_row[i].item() + accuracy_median_list[i-1])
  111.         accurracy_hmean_list.append(mean_row[i].item() + accurracy_hmean_list[i-1])
  112.     accuracy_dict = {
  113.         "ErrorLevel": error_level_list,
  114.         "Mean": accuracy_mean_list,
  115.         "HMean": accurracy_hmean_list,
  116.         "Mode": accuracy_mode_list,
  117.         "Median": accuracy_median_list
  118.     }
  119.     return pd.DataFrame(data=accuracy_dict)
  120.  
  121. def analyze(input_file):
  122.     data = pd.read_csv(input_file)
  123.     specified_age = people_who_specified_age(data)
  124.     people_with_true_age = people_whose_vk_age_is_equal_to_real_age(data)
  125.     print(f"Number of people, who specified their age: {specified_age} ({round( (specified_age / data.__len__() * 100), 2 )} %)")
  126.     print(f"Number of people, whose vk age is equal to real age: {people_with_true_age} ({round( (people_with_true_age / data.__len__() * 100), 2)} %)")
  127.     print("-------------------------------------------------------------------------------------")
  128.     # print(data)
  129.     # error_list_data = fill_error_list(data)
  130.     # print(error_list_data["Mean"]
  131.     # print(error_list_data["Mean"].value_counts()[2])
  132.     accuracy_data = fill_accuracy(data)
  133.     print(accuracy_data)
  134.     # print(data)
Advertisement
Add Comment
Please, Sign In to add comment