Advertisement
Guest User

Untitled

a guest
Aug 25th, 2016
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.77 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Mon Aug 22 00:01:16 2016
  4.  
  5. @author: benedictusyoga
  6. """
  7.  
  8. import pandas
  9. import numpy
  10.  
  11. data = pandas.read_csv("../gapminder.csv", low_memory = False)
  12.  
  13. print(len(data)) #number of observations (rows)
  14. print(len(data.columns)) # number of variables (columns)
  15.  
  16. # gapminder columns country,incomeperperson,alcconsumption,armedforcesrate,breastcancerper100th,co2emissions,femaleemployrate,hivrate,internetuserate,lifeexpectancy,oilperperson,polityscore,relectricperperson,suicideper100th,employrate,urbanrate
  17.  
  18. def dist_freq(aux_data, ranges):
  19. cut_data = pandas.cut(aux_data, ranges)
  20. count = cut_data.value_counts(sort = False, dropna = False)
  21. percent = cut_data.value_counts(sort = False, dropna = False, normalize = True)
  22.  
  23. print("Category\t\tFrequency\tPercentage")
  24. max_idx = len(count)
  25. for i in range(max_idx):
  26. if i == max_idx - 1:
  27. print("Missing Data", "\t\t", count[max_idx-1], "\t\t", percent[max_idx-1]*100, "%")
  28. else:
  29. print("(",ranges[i], "-",ranges[i+1],")", "\t", count[i], "\t\t", percent[i]*100, "%")
  30.  
  31. print("Income per person in countries")
  32. aux_incomeperperson = pandas.to_numeric(data["incomeperperson"], errors = "coerce")
  33. ranges_incomeperperson = [0, 2000, 5000, 10000, 20000, 35000, 50000, numpy.Inf]
  34. dist_freq(aux_incomeperperson, ranges_incomeperperson)
  35.  
  36.  
  37. print("Internet use rate in countries")
  38. aux_internetuserate = pandas.to_numeric(data["internetuserate"], errors = "coerce")
  39. ranges_internetuserate = [0, 5, 15, 45, 90, numpy.Inf]
  40. dist_freq(aux_internetuserate, ranges_internetuserate)
  41.  
  42. print("Urban rate in countries")
  43. aux_urbanrate = pandas.to_numeric(data["urbanrate"], errors = "coerce")
  44. ranges_urbanrate = [0, 5, 15, 45, 90, numpy.Inf]
  45. dist_freq(aux_urbanrate, ranges_urbanrate)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement