tuomasvaltanen

Untitled

Apr 21st, 2021 (edited)
674
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # workshop 3, 21.4.2021
  2.  
  3. import numpy as np
  4. import pandas as pd
  5. import seaborn as sns
  6. import matplotlib.pyplot as plt
  7. from scipy import stats
  8. import ast
  9. from sklearn.preprocessing import MultiLabelBinarizer
  10.  
  11. # load the data
  12. starbucks = pd.read_csv('starbucks_promotion.csv')
  13.  
  14. # the channels-column is in string format
  15. # force it to be an actual list instead so we can split it later
  16. starbucks['channels'] = starbucks['channels'].apply(ast.literal_eval)
  17.  
  18. # just a copy for inspection in variable explorer
  19. starbucks_copy = starbucks.copy()
  20.  
  21. # scikit-learn => multilabelbinarizer can split the channels in to separate columns
  22. mlb = MultiLabelBinarizer()
  23. expandedLabelData = mlb.fit_transform(starbucks["channels"].tolist())
  24. labelClasses = mlb.classes_
  25.  
  26. # Create a pandas.DataFrame from our output
  27. expandedLabels = pd.DataFrame(expandedLabelData, columns=labelClasses)
  28.  
  29. # combine new columns with original data
  30. starbucks = pd.concat([starbucks, expandedLabels], axis=1)
  31.  
  32. # remove unneeded channels
  33. starbucks.drop('channels', axis=1, inplace=True)
  34. starbucks.drop('id', axis=1, inplace=True)
  35. starbucks.drop('Unnamed: 0', axis=1, inplace=True)
  36.  
  37. # column names for inspecting
  38. columns = starbucks.columns
  39.  
  40. # basic correlation matrix
  41. # not enough data for this though...
  42. correlations = starbucks.corr()
  43.  
  44. # basic pairplot
  45. plt.clf()
  46. sns.pairplot(starbucks, hue='offer_type')
  47. plt.figure()
  48.  
  49. # NEW FILE
  50.  
  51. # video game sales test data, csv file in MS Teams channel
  52.  
  53. import numpy as np
  54. import pandas as pd
  55. import seaborn as sns
  56. import matplotlib.pyplot as plt
  57. from scipy import stats
  58.  
  59. games = pd.read_csv('videogamesales.csv')
  60.  
  61. fallout4 = games[games['Name'] == 'Fallout 4']
  62.  
  63. # need for speed most wanted has been release 12 times on different platforms
  64. needforspeed = games[games['Name'] == 'Need for Speed: Most Wanted']
  65.  
  66. # TODO
  67. # one interesting thing: what are the rankings if we combine
  68. # the sales of each game in all platforms
  69.  
  70. correlations = games.corr()
  71.  
  72. # what kind of games sell well in each region
  73.  
  74. # if use you use [[ ]] instead of [] around NA_Sales
  75. # you will get a DataFrame instead of Series
  76. # reset index will take Genre out of index to a column
  77. NA_popular_genre = games.groupby('Genre')[['NA_Sales']].sum().reset_index()
  78. JP_popular_genre = games.groupby('Genre')[['JP_Sales']].sum().reset_index()
  79.  
  80.  
  81. # NA Sales, most popular genres
  82. plt.clf()
  83. plt.figure(figsize=(15,10))
  84. sns.barplot(x='Genre', y='NA_Sales', data=NA_popular_genre)
  85. plt.xticks(rotation=45)
  86. plt.show()
  87.  
  88.  
  89. # JP Sales, most popular genres
  90. plt.clf()
  91. plt.figure(figsize=(15,10))
  92. sns.barplot(x='Genre', y='JP_Sales', data=JP_popular_genre)
  93. plt.xticks(rotation=45)
  94. plt.show()
  95.  
  96. # which publisher has released the most games
  97.  
  98. # this would be interesting if only franchises/one game is counted
  99. # even if some game has been released on multiple platforms
  100.  
  101. most_games_publishers = games['Publisher'].value_counts()
  102.  
  103. # when was the last game released by THQ? it should be 2011-2013
  104. # last game, Company of Heroes 2 in 2013 (PC)
  105. thq = games[games['Publisher'] == 'THQ']
  106.  
  107. # TODO
  108. # does platform, good critic and user rating mean good sales?
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×