Advertisement
Guest User

Untitled

a guest
Feb 24th, 2020
226
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.61 KB | None | 0 0
  1. import pandas as pd
  2. from tabulate import tabulate
  3. import pandas as pd
  4. import matplotlib.pyplot as plt
  5. import seaborn as sns
  6. import numpy as np
  7.  
  8. # Standard plotly imports
  9. import chart_studio.plotly as py
  10. import plotly.graph_objs as go
  11. from plotly.offline import iplot, init_notebook_mode
  12. # Using plotly + cufflinks in offline mode
  13. import cufflinks
  14. data=pd.read_csv(r"Data/combineFinal.csv",encoding="unicode_escape")
  15. def processTypeOfSofa(answer):
  16. if answer==1:
  17. return(data[data.loc[:,"TYPE"]=="Sectional sofa"])
  18. elif answer==2:
  19. return(data[data.loc[:,"TYPE"]=="Sleeper sofa"])
  20. elif answer==3:
  21. return(data[data.loc[:,"TYPE"]=="Loveseat"])
  22. elif answer==4:
  23. return(data[data.loc[:,"TYPE"]=="Chaise"])
  24. elif answer==5:
  25. return(data[data.loc[:,"TYPE"]=="Sofa Bed"])
  26. elif answer==6:
  27. return(data[data.loc[:,"TYPE"]=="Sleeper sofa"])
  28. elif answer==7:
  29. return(data[data.loc[:,"TYPE"]=="Recliner"])
  30. elif answer==8:
  31. return(data[data.loc[:,"TYPE"]=="Sofa"])
  32. else:
  33. return("Invalid input")
  34. def processSource(choiceOfSource):
  35. if(choiceOfSource==1):
  36. return(data[data.loc[:,"Website"]=="Craigslist"])
  37. elif(choiceOfSource==2):
  38. return(data[data.loc[:,"Website"]=="Ikea"])
  39. elif(choiceOfSource==3):
  40. return(data[data.loc[:,"Website"]=="Ebay"])
  41.  
  42. def giveStatistics(dataFrame):
  43. #df = pd.dataframe(dataFrame)
  44.  
  45. print("")
  46. print('The lowest price available is: ' + str((dataFrame.Price.min())))
  47. print('The highest price available is: ' + str((dataFrame.Price.max())))
  48. print('The average price available is: ' + str((dataFrame.Price.mean())))
  49.  
  50. def dataframe_Table(dataframe):
  51.  
  52. #df1 = dataframe['Name', 'Price', 'TYPE','Website']
  53. print('trial')
  54. cols=list(dataframe.columns)
  55. cols.remove("Product URL")
  56. cols.remove("REVIEW")
  57. pdtabulate = lambda dataframe:tabulate(dataframe[cols], headers = 'keys', tablefmt = 'psql',showindex=False)
  58. print(pdtabulate(dataframe))
  59.  
  60. def statistics_graph(dataframe):
  61.  
  62. cufflinks.go_offline(connected=True)
  63. init_notebook_mode(connected=True)
  64. # Read data from file 'filename.csv'
  65. # (in the same directory that your python process is based)
  66. # Control delimiters, rows, column names with read_csv (see later)
  67.  
  68. print(data)
  69. print(data.TYPE)
  70.  
  71. type(data.Price[0])
  72. data['Price'] = data['Price'].astype(str)
  73. data['Price'] = data.Price.apply(lambda x: x.strip('$')) # removing $ sign
  74. data['Price'] = data.Price.apply(lambda x: x.replace(",", "")) # removing , sign
  75. print(data.Price)
  76.  
  77. data['TYPE'] = data['TYPE'].astype(str)
  78. # data['TYPE'] = [x[7:] for x in data.TYPE]
  79.  
  80. data['Price'] = data['Price'].astype(float, errors='ignore')
  81. data['Price'] = pd.to_numeric(data['Price'], errors='coerce')
  82.  
  83.  
  84.  
  85. ax = sns.countplot(x="Website", data=data)
  86.  
  87. # plot data
  88. fig, ax = plt.subplots(figsize=(15, 7))
  89. # use unstack()
  90. data.groupby(['Website', 'TYPE']).count()['Price'].unstack().plot(ax=ax)
  91.  
  92.  
  93.  
  94. sns.set(
  95. rc={'figure.figsize': (18, 6)},
  96. style="white"
  97. )
  98. sns.violinplot(
  99. x='Website',
  100. y='Price',
  101. hue='TYPE',
  102. data=data
  103. )
  104. sns.despine()
  105. import plotly.express as px
  106. fig = px.scatter(
  107. data_frame=data,
  108. x="TYPE",
  109. y="Price",
  110. animation_group="TYPE",
  111. size="Price",
  112. color="Website",
  113. hover_name="TYPE",
  114. facet_col="Website",
  115. size_max=45
  116. )
  117. fig.show()
  118.  
  119. filtered_class = data[data['Website'] != 'AVANTGARDE']
  120. chart = sns.lmplot("TYPE", "Price", data=filtered_class, hue="TYPE", fit_reg=False, col='Website', col_wrap=2)
  121. for ax in chart.axes.flat:
  122. for label in ax.get_yticklabels():
  123. label.set_rotation(0)
  124.  
  125. data.sort_values(by=['Price'], inplace=True)
  126. g = sns.FacetGrid(data, col="Website", col_wrap=3, height=4)
  127. g = (g.map(plt.hist, "Price", bins=np.arange(0, 2300, 500)))
  128.  
  129. g1 = sns.FacetGrid(data, col='TYPE', col_wrap=3, height=4)
  130. g1 = (g1.map(plt.hist, "Price", bins=np.arange(0, 2300,
  131. 500))) # From the graph, it seems like companies chose not to categorize sofas either explicitly or in the name. It may be to increase visibility of certain sofas which can be suggested to users irrespective of the filter they applied.
  132.  
  133. data_pivot = data.pivot(columns='Website', values='TYPE').iplot(
  134. kind='box',
  135. yTitle='Website',
  136. title='Type of sofas by website')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement