Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Wed Dec 11 20:53:36 2019
- @author: nicol
- """
- import spotipy
- import spotipy.util as util
- from spotipy.oauth2 import SpotifyClientCredentials
- from sklearn.manifold import TSNE
- import matplotlib.pyplot as plt
- from sklearn.cluster import DBSCAN
- import pandas as pd
- import numpy as np
- from mpl_toolkits.mplot3d import Axes3D
- # Fonction pour faire des "radar charts stylees"
- def radar_chart(r,theta):
- angles = np.linspace(0,2*np.pi,len(theta),endpoint=False)
- r = np.concatenate((r,[r[0]]))
- angles = np.concatenate((angles,angles[[0]]))
- fig = plt.figure()
- ax = fig.add_subplot(111, polar=True)
- ax.plot(angles, r, 'o-', linewidth=2)
- ax.fill(angles, r, alpha=0.25)
- ax.set_thetagrids(angles * 180/np.pi, theta)
- ax.grid(True)
- # Recup du data sur spotify, True si il faut le faire, False sinon
- # Nous on a déjà le fichier des features donc pas besoin de recommencer
- if False:
- cid = 'ea8db4c94aa744588a4dd0621d45e05e'
- secret = '26b39cad02aa416c86db8fc2307118b8'
- username = "dour"
- client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
- sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)# Get read access to your library
- auth = spotipy.oauth2.SpotifyClientCredentials(client_id = cid, client_secret=secret)
- token = auth.get_access_token()
- spot = spotipy.Spotify(auth=token)
- playlist = []
- audio_features = []
- artist = []
- for i in range(9):
- p = (spot.user_playlist_tracks(cid, 'spotify:playlist:2tKVxEhCfeW275FzQqOWg0', fields='items', limit=100,offset=i*100))['items']
- for j in range(len(p)):
- audio_features.append(spot.audio_features(p[j]['track']['id']))
- artist.append([p[j]['track']['name'],p[j]['track']['popularity']])
- columns = ['artist'].append(list(audio_features[0][0].keys()))
- for i in range(len(audio_features)):
- audio_features[i] = list(audio_features[i][0].values())
- audio = pd.DataFrame(audio_features,columns=['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
- 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
- 'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms',
- 'time_signature'])
- # Au final sont intéressant : danceability, energy, loudness, mode, liveness, valence, tempo
- features = ['danceability','energy','loudness','mode','liveness','valence','tempo']
- audio_f = audio[features]
- a = audio_f.values
- # TSNE sur le data pour le rendre mieux en 3D
- a_red = TSNE(n_components=2).fit_transform(a)
- # Clustering en utilisant DBSCAN mais ici fait sur le data en dimensionnalité déjà réduite
- # => mieux de le faire sur a et pas a_red, mais marche pas assez
- clustering = DBSCAN(eps=3,min_samples=8).fit(a_red)
- # Plot 3D du TSNE
- core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool)
- core_samples_mask[clustering.core_sample_indices_] = True
- labels = clustering.labels_
- unique_labels = set(labels)
- colors = [plt.cm.Spectral(each)
- for each in np.linspace(0, 1, len(unique_labels))]
- fig = plt.figure()
- ax = fig.add_subplot(111)
- for k, col in zip(unique_labels, colors):
- if k == -1:
- # Black used for noise.
- col = [0, 0, 0, 1]
- class_member_mask = (labels == k)
- xy = a_red[class_member_mask & core_samples_mask]
- ax.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
- markeredgecolor='k', markersize=14)
- fig.show()
- audio_c = audio_f
- audio_c['cluster'] = labels
- # plot des distributions de chaque donnée audio pour chaque cluster
- #for i in range(len(features)):
- # fig,a = plt.subplots(2,4)
- # a = a.ravel()
- # b = enumerate(a)
- # bins = np.linspace(min(audio_c[features[i]]), max(audio_c[features[i]]),10)
- # for idx,ax in b:
- # l = audio_c[features[i]].where(audio_c['cluster']==idx)
- # ax.hist(l[~np.isnan(l)],bins=bins)
- # ax.set_xlim(min(audio_c[features[i]]), max(audio_c[features[i]]))
- #
- ## radar chart pour chaque cluster de toutes les données pour voir les
- ## différences entre chaque cluster
- #for lab in unique_labels:
- # l = audio_c.where(audio_c['cluster'] == lab)
- # l = l.dropna()
- # l = l.drop('cluster',axis=1)
- # r=np.array(np.mean(l.values,axis=0))
- # r = (r-np.min(audio_c.values[:,:7],axis=0))/(np.max(audio_c.values[:,:7],axis=0)-np.min(audio_c.values[:,:7],axis=0))
- # theta=l.columns
- # radar_chart(r,theta)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement