Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Mon Aug 22 00:01:16 2016
- @author: benedictusyoga
- """
- import pandas
- import numpy
- data = pandas.read_csv("../gapminder.csv", low_memory = False)
- print(len(data)) #number of observations (rows)
- print(len(data.columns)) # number of variables (columns)
- # gapminder columns country,incomeperperson,alcconsumption,armedforcesrate,breastcancerper100th,co2emissions,femaleemployrate,hivrate,internetuserate,lifeexpectancy,oilperperson,polityscore,relectricperperson,suicideper100th,employrate,urbanrate
- def dist_freq(aux_data, ranges):
- cut_data = pandas.cut(aux_data, ranges)
- count = cut_data.value_counts(sort = False, dropna = False)
- percent = cut_data.value_counts(sort = False, dropna = False, normalize = True)
- print("Category\t\tFrequency\tPercentage")
- max_idx = len(count)
- for i in range(max_idx):
- if i == max_idx - 1:
- print("Missing Data", "\t\t", count[max_idx-1], "\t\t", percent[max_idx-1]*100, "%")
- else:
- print("(",ranges[i], "-",ranges[i+1],")", "\t", count[i], "\t\t", percent[i]*100, "%")
- print("Income per person in countries")
- aux_incomeperperson = pandas.to_numeric(data["incomeperperson"], errors = "coerce")
- ranges_incomeperperson = [0, 2000, 5000, 10000, 20000, 35000, 50000, numpy.Inf]
- dist_freq(aux_incomeperperson, ranges_incomeperperson)
- print("Internet use rate in countries")
- aux_internetuserate = pandas.to_numeric(data["internetuserate"], errors = "coerce")
- ranges_internetuserate = [0, 5, 15, 45, 90, numpy.Inf]
- dist_freq(aux_internetuserate, ranges_internetuserate)
- print("Urban rate in countries")
- aux_urbanrate = pandas.to_numeric(data["urbanrate"], errors = "coerce")
- ranges_urbanrate = [0, 5, 15, 45, 90, numpy.Inf]
- dist_freq(aux_urbanrate, ranges_urbanrate)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement