Advertisement
Guest User

SEGURA ESSE PYTHON

a guest
Apr 24th, 2018
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.62 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from mlxtend.frequent_patterns import apriori
  5. from mlxtend.preprocessing import TransactionEncoder
  6. from mlxtend.frequent_patterns import association_rules
  7.  
  8. df = pd.read_csv("grupo11.csv", skipinitialspace=True)
  9. df.drop_duplicates(keep='first', inplace=True)
  10.  
  11.  
  12. def print_analytical_baseado_parametro(coluna):
  13. print(df[[coluna, "Analytical Skills 1", "Analytical Skills 2", "Analytical Skills 3"]].groupby(coluna).mean())
  14.  
  15.  
  16. def print_domain_baseado_parametro(coluna):
  17. print(df[[coluna, "Domain Skills 1", "Domain Skills 2", "Domain Test 3", "Domain Test 4"]].groupby(coluna).mean())
  18.  
  19.  
  20. def print_quantitative_baseado_parametro(coluna):
  21. print(df[[coluna, "Quantitative Ability 1", "Quantitative Ability 2", "Quantitative Ability 3",
  22. "Quantitative Ability 4"]].groupby(coluna).mean())
  23.  
  24. def print_english_baseado_parametro(coluna):
  25. print(df[[coluna, "English 1", "English 2", "English 3", "English 4"]].groupby(coluna).mean())
  26.  
  27.  
  28. # Limpando dados faltantes
  29. df = df.apply(lambda x: x.replace('MD', np.nan))
  30.  
  31. df = df[df["Degree of study"] != "Z"]
  32. df = df[df["Degree of study"] != "W"]
  33.  
  34. #df = df[df["Year of Birth"] != "Y3"]
  35.  
  36. # Colocando a media nos valores inexistents
  37. df = df.apply(pd.to_numeric, errors='ignore')
  38. df['Quantitative Ability 1'].fillna(df['Quantitative Ability 1'].mean(), inplace=True)
  39. df['Domain Skills 1'].fillna(df['Domain Skills 1'].mean(), inplace=True)
  40. df['Analytical Skills 1'].fillna(df['Analytical Skills 1'].mean(), inplace=True)
  41. df["Performance"].fillna(df["Performance"].mode()[0], inplace=True)
  42.  
  43. # Tirando as colunas que não fazem diferença
  44. df = df.drop(['Name', 'Candidate ID', 'Year of Completion of college'], axis=1)
  45. df['Performance'] = pd.Categorical(df['Performance'], categories=['LP', 'MP', 'BP'], ordered=True)
  46.  
  47. print(df['Degree of study'].value_counts())
  48. # print(df[["Degree of study", "Specialization in study"]].groupby("Specialization in study").count())
  49.  
  50. df[["Gender", "English 1"]].groupby("Gender").boxplot(subplots=False, vert=False)
  51.  
  52. print(df['Degree of study'].value_counts())
  53. print_analytical_baseado_parametro("Degree of study")
  54. print_domain_baseado_parametro("Degree of study")
  55. print_quantitative_baseado_parametro("Degree of study")
  56. print_english_baseado_parametro("Degree of study")
  57.  
  58. # df.hist('Analytical Skills 1', by='Degree of study', color='darkred')
  59. #df[["Degree of study", "English 1", "English 2", "English 3", "English 4"]].groupby(["Degree of study"]).mean().plot.bar(alpha=0.8)
  60.  
  61. plt.show()
  62.  
  63.  
  64. # print(df['Performance'].value_counts())
  65. # print(df.describe())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement