Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from tabulate import tabulate
- import pandas as pd
- import matplotlib.pyplot as plt
- import seaborn as sns
- import numpy as np
- # Standard plotly imports
- import chart_studio.plotly as py
- import plotly.graph_objs as go
- from plotly.offline import iplot, init_notebook_mode
- # Using plotly + cufflinks in offline mode
- import cufflinks
- data=pd.read_csv(r"Data/combineFinal.csv",encoding="unicode_escape")
- def processTypeOfSofa(answer):
- if answer==1:
- return(data[data.loc[:,"TYPE"]=="Sectional sofa"])
- elif answer==2:
- return(data[data.loc[:,"TYPE"]=="Sleeper sofa"])
- elif answer==3:
- return(data[data.loc[:,"TYPE"]=="Loveseat"])
- elif answer==4:
- return(data[data.loc[:,"TYPE"]=="Chaise"])
- elif answer==5:
- return(data[data.loc[:,"TYPE"]=="Sofa Bed"])
- elif answer==6:
- return(data[data.loc[:,"TYPE"]=="Sleeper sofa"])
- elif answer==7:
- return(data[data.loc[:,"TYPE"]=="Recliner"])
- elif answer==8:
- return(data[data.loc[:,"TYPE"]=="Sofa"])
- else:
- return("Invalid input")
- def processSource(choiceOfSource):
- if(choiceOfSource==1):
- return(data[data.loc[:,"Website"]=="Craigslist"])
- elif(choiceOfSource==2):
- return(data[data.loc[:,"Website"]=="Ikea"])
- elif(choiceOfSource==3):
- return(data[data.loc[:,"Website"]=="Ebay"])
- def giveStatistics(dataFrame):
- #df = pd.dataframe(dataFrame)
- print("")
- print('The lowest price available is: ' + str((dataFrame.Price.min())))
- print('The highest price available is: ' + str((dataFrame.Price.max())))
- print('The average price available is: ' + str((dataFrame.Price.mean())))
- def dataframe_Table(dataframe):
- #df1 = dataframe['Name', 'Price', 'TYPE','Website']
- print('trial')
- cols=list(dataframe.columns)
- cols.remove("Product URL")
- cols.remove("REVIEW")
- pdtabulate = lambda dataframe:tabulate(dataframe[cols], headers = 'keys', tablefmt = 'psql',showindex=False)
- print(pdtabulate(dataframe))
- def statistics_graph(dataframe):
- cufflinks.go_offline(connected=True)
- init_notebook_mode(connected=True)
- # Read data from file 'filename.csv'
- # (in the same directory that your python process is based)
- # Control delimiters, rows, column names with read_csv (see later)
- print(data)
- print(data.TYPE)
- type(data.Price[0])
- data['Price'] = data['Price'].astype(str)
- data['Price'] = data.Price.apply(lambda x: x.strip('$')) # removing $ sign
- data['Price'] = data.Price.apply(lambda x: x.replace(",", "")) # removing , sign
- print(data.Price)
- data['TYPE'] = data['TYPE'].astype(str)
- # data['TYPE'] = [x[7:] for x in data.TYPE]
- data['Price'] = data['Price'].astype(float, errors='ignore')
- data['Price'] = pd.to_numeric(data['Price'], errors='coerce')
- ax = sns.countplot(x="Website", data=data)
- # plot data
- fig, ax = plt.subplots(figsize=(15, 7))
- # use unstack()
- data.groupby(['Website', 'TYPE']).count()['Price'].unstack().plot(ax=ax)
- sns.set(
- rc={'figure.figsize': (18, 6)},
- style="white"
- )
- sns.violinplot(
- x='Website',
- y='Price',
- hue='TYPE',
- data=data
- )
- sns.despine()
- import plotly.express as px
- fig = px.scatter(
- data_frame=data,
- x="TYPE",
- y="Price",
- animation_group="TYPE",
- size="Price",
- color="Website",
- hover_name="TYPE",
- facet_col="Website",
- size_max=45
- )
- fig.show()
- filtered_class = data[data['Website'] != 'AVANTGARDE']
- chart = sns.lmplot("TYPE", "Price", data=filtered_class, hue="TYPE", fit_reg=False, col='Website', col_wrap=2)
- for ax in chart.axes.flat:
- for label in ax.get_yticklabels():
- label.set_rotation(0)
- data.sort_values(by=['Price'], inplace=True)
- g = sns.FacetGrid(data, col="Website", col_wrap=3, height=4)
- g = (g.map(plt.hist, "Price", bins=np.arange(0, 2300, 500)))
- g1 = sns.FacetGrid(data, col='TYPE', col_wrap=3, height=4)
- g1 = (g1.map(plt.hist, "Price", bins=np.arange(0, 2300,
- 500))) # From the graph, it seems like companies chose not to categorize sofas either explicitly or in the name. It may be to increase visibility of certain sofas which can be suggested to users irrespective of the filter they applied.
- data_pivot = data.pivot(columns='Website', values='TYPE').iplot(
- kind='box',
- yTitle='Website',
- title='Type of sofas by website')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement