Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def scale_df(df,axis=0):
- return (df - df.mean(axis=axis)) / df.std(axis=axis)
- def plot_hmap(df, ix=None, cmap='bwr'):
- if ix is None:
- ix = np.arange(df.shape[0])
- plt.imshow(df.iloc[ix,:], cmap=cmap)
- plt.colorbar(fraction=0.03)
- plt.yticks(np.arange(df.shape[0]), df.index[ix])
- plt.xticks(np.arange(df.shape[1]))
- plt.grid(False)
- plt.show()
- def scale_and_plot(df, ix = None):
- df_marginal_scaled = scale_df(df.T).T
- if ix is None:
- ix = AC(4).fit(df_marginal_scaled).labels_.argsort() # a trick to make better heatmaps
- cap = np.min([np.max(df_marginal_scaled.as_matrix()), np.abs(np.min(df_marginal_scaled.as_matrix()))])
- df_marginal_scaled = np.clip(df_marginal_scaled, -1*cap, cap)
- plot_hmap(df_marginal_scaled, ix=ix)
- def normalize(df):
- result = df.copy()
- for feature_name in df.columns:
- max_value = df[feature_name].max()
- min_value = df[feature_name].min()
- result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
- return result
- dayofweek_by_location = df.pivot_table(values='ID', index='Location.Description', columns=df.index.dayofweek, aggfunc=np.size).fillna(0)
- dayofweek_by_type = df.pivot_table(values='ID', index='Primary.Type', columns=df.index.dayofweek, aggfunc=np.size).fillna(0)
- location_by_type = df.pivot_table(values='ID', index='Location.Description', columns='Primary.Type', aggfunc=np.size).fillna(0)
- from sklearn.cluster import AgglomerativeClustering as AC
- plt.figure(figsize=(17,17))
- scale_and_plot(dayofweek_by_type)
Add Comment
Please, Sign In to add comment