Advertisement
Guest User

Untitled

a guest
Dec 14th, 2019
3,510
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.50 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Wed Dec 11 20:53:36 2019
  4.  
  5. @author: nicol
  6. """
  7.  
  8. import spotipy
  9. import spotipy.util as util
  10. from spotipy.oauth2 import SpotifyClientCredentials
  11. from sklearn.manifold import TSNE
  12. import matplotlib.pyplot as plt
  13. from sklearn.cluster import DBSCAN
  14. import pandas as pd
  15. import numpy as np
  16. from mpl_toolkits.mplot3d import Axes3D
  17.  
  18. # Fonction pour faire des "radar charts stylees"
  19. def radar_chart(r,theta):
  20. angles = np.linspace(0,2*np.pi,len(theta),endpoint=False)
  21. r = np.concatenate((r,[r[0]]))
  22. angles = np.concatenate((angles,angles[[0]]))
  23. fig = plt.figure()
  24. ax = fig.add_subplot(111, polar=True)
  25. ax.plot(angles, r, 'o-', linewidth=2)
  26. ax.fill(angles, r, alpha=0.25)
  27. ax.set_thetagrids(angles * 180/np.pi, theta)
  28. ax.grid(True)
  29.  
  30. # Recup du data sur spotify, True si il faut le faire, False sinon
  31. # Nous on a déjà le fichier des features donc pas besoin de recommencer
  32. if False:
  33. cid = 'ea8db4c94aa744588a4dd0621d45e05e'
  34. secret = '26b39cad02aa416c86db8fc2307118b8'
  35. username = "dour"
  36. client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
  37. sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)# Get read access to your library
  38. auth = spotipy.oauth2.SpotifyClientCredentials(client_id = cid, client_secret=secret)
  39. token = auth.get_access_token()
  40. spot = spotipy.Spotify(auth=token)
  41. playlist = []
  42. audio_features = []
  43. artist = []
  44. for i in range(9):
  45. p = (spot.user_playlist_tracks(cid, 'spotify:playlist:2tKVxEhCfeW275FzQqOWg0', fields='items', limit=100,offset=i*100))['items']
  46. for j in range(len(p)):
  47. audio_features.append(spot.audio_features(p[j]['track']['id']))
  48. artist.append([p[j]['track']['name'],p[j]['track']['popularity']])
  49.  
  50. columns = ['artist'].append(list(audio_features[0][0].keys()))
  51. for i in range(len(audio_features)):
  52. audio_features[i] = list(audio_features[i][0].values())
  53.  
  54. audio = pd.DataFrame(audio_features,columns=['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
  55. 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
  56. 'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms',
  57. 'time_signature'])
  58.  
  59.  
  60. # Au final sont intéressant : danceability, energy, loudness, mode, liveness, valence, tempo
  61. features = ['danceability','energy','loudness','mode','liveness','valence','tempo']
  62. audio_f = audio[features]
  63. a = audio_f.values
  64. # TSNE sur le data pour le rendre mieux en 3D
  65. a_red = TSNE(n_components=2).fit_transform(a)
  66. # Clustering en utilisant DBSCAN mais ici fait sur le data en dimensionnalité déjà réduite
  67. # => mieux de le faire sur a et pas a_red, mais marche pas assez
  68. clustering = DBSCAN(eps=3,min_samples=8).fit(a_red)
  69.  
  70. # Plot 3D du TSNE
  71. core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool)
  72. core_samples_mask[clustering.core_sample_indices_] = True
  73.  
  74. labels = clustering.labels_
  75. unique_labels = set(labels)
  76.  
  77.  
  78. colors = [plt.cm.Spectral(each)
  79. for each in np.linspace(0, 1, len(unique_labels))]
  80.  
  81. fig = plt.figure()
  82. ax = fig.add_subplot(111)
  83.  
  84. for k, col in zip(unique_labels, colors):
  85. if k == -1:
  86. # Black used for noise.
  87. col = [0, 0, 0, 1]
  88.  
  89. class_member_mask = (labels == k)
  90.  
  91. xy = a_red[class_member_mask & core_samples_mask]
  92. ax.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
  93. markeredgecolor='k', markersize=14)
  94. fig.show()
  95.  
  96.  
  97.  
  98. audio_c = audio_f
  99. audio_c['cluster'] = labels
  100. # plot des distributions de chaque donnée audio pour chaque cluster
  101. #for i in range(len(features)):
  102. # fig,a = plt.subplots(2,4)
  103. # a = a.ravel()
  104. # b = enumerate(a)
  105. # bins = np.linspace(min(audio_c[features[i]]), max(audio_c[features[i]]),10)
  106. # for idx,ax in b:
  107. # l = audio_c[features[i]].where(audio_c['cluster']==idx)
  108. # ax.hist(l[~np.isnan(l)],bins=bins)
  109. # ax.set_xlim(min(audio_c[features[i]]), max(audio_c[features[i]]))
  110. #
  111. ## radar chart pour chaque cluster de toutes les données pour voir les
  112. ## différences entre chaque cluster
  113. #for lab in unique_labels:
  114. # l = audio_c.where(audio_c['cluster'] == lab)
  115. # l = l.dropna()
  116. # l = l.drop('cluster',axis=1)
  117. # r=np.array(np.mean(l.values,axis=0))
  118. # r = (r-np.min(audio_c.values[:,:7],axis=0))/(np.max(audio_c.values[:,:7],axis=0)-np.min(audio_c.values[:,:7],axis=0))
  119. # theta=l.columns
  120. # radar_chart(r,theta)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement