Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from mlxtend.frequent_patterns import apriori
- from mlxtend.preprocessing import TransactionEncoder
- from mlxtend.frequent_patterns import association_rules
- df = pd.read_csv("grupo11.csv", skipinitialspace=True)
- df.drop_duplicates(keep='first', inplace=True)
- def print_analytical_baseado_parametro(coluna):
- print(df[[coluna, "Analytical Skills 1", "Analytical Skills 2", "Analytical Skills 3"]].groupby(coluna).mean())
- def print_domain_baseado_parametro(coluna):
- print(df[[coluna, "Domain Skills 1", "Domain Skills 2", "Domain Test 3", "Domain Test 4"]].groupby(coluna).mean())
- def print_quantitative_baseado_parametro(coluna):
- print(df[[coluna, "Quantitative Ability 1", "Quantitative Ability 2", "Quantitative Ability 3",
- "Quantitative Ability 4"]].groupby(coluna).mean())
- def print_english_baseado_parametro(coluna):
- print(df[[coluna, "English 1", "English 2", "English 3", "English 4"]].groupby(coluna).mean())
- # Limpando dados faltantes
- df = df.apply(lambda x: x.replace('MD', np.nan))
- df = df[df["Degree of study"] != "Z"]
- df = df[df["Degree of study"] != "W"]
- #df = df[df["Year of Birth"] != "Y3"]
- # Colocando a media nos valores inexistents
- df = df.apply(pd.to_numeric, errors='ignore')
- df['Quantitative Ability 1'].fillna(df['Quantitative Ability 1'].mean(), inplace=True)
- df['Domain Skills 1'].fillna(df['Domain Skills 1'].mean(), inplace=True)
- df['Analytical Skills 1'].fillna(df['Analytical Skills 1'].mean(), inplace=True)
- df["Performance"].fillna(df["Performance"].mode()[0], inplace=True)
- # Tirando as colunas que não fazem diferença
- df = df.drop(['Name', 'Candidate ID', 'Year of Completion of college'], axis=1)
- df['Performance'] = pd.Categorical(df['Performance'], categories=['LP', 'MP', 'BP'], ordered=True)
- print(df['Degree of study'].value_counts())
- # print(df[["Degree of study", "Specialization in study"]].groupby("Specialization in study").count())
- df[["Gender", "English 1"]].groupby("Gender").boxplot(subplots=False, vert=False)
- print(df['Degree of study'].value_counts())
- print_analytical_baseado_parametro("Degree of study")
- print_domain_baseado_parametro("Degree of study")
- print_quantitative_baseado_parametro("Degree of study")
- print_english_baseado_parametro("Degree of study")
- # df.hist('Analytical Skills 1', by='Degree of study', color='darkred')
- #df[["Degree of study", "English 1", "English 2", "English 3", "English 4"]].groupby(["Degree of study"]).mean().plot.bar(alpha=0.8)
- plt.show()
- # print(df['Performance'].value_counts())
- # print(df.describe())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement