Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # data analysis workshop 1, 13.4.2021
- # example, car sales
- import numpy as np
- import pandas as pd
- import seaborn as sns
- import matplotlib.pyplot as plt
- from scipy import stats
- def percentage_difference(row):
- selling = row['Selling_Price']
- road = row['Present_Price']
- result = 1 - round(selling / road, 2)
- return result
- cars = pd.read_csv('car data.csv')
- cars = cars.drop('Car_Name', axis=1)
- cars['Price_Difference'] = cars.apply(percentage_difference, axis=1)
- column_names = ['Year', 'Selling_Price', 'Present_Price', 'Price_Difference', 'Kms_Driven', 'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner']
- cars = cars.reindex(columns=column_names)
- cars = cars.drop('Owner', axis=1)
- automatic_diesels = cars[cars['Fuel_Type'] == 'Diesel']
- # the amount of extra costs (price difference) is affected by car year and kms driven
- correlations = cars.corr()
- plt.clf()
- sns.pairplot(cars)
- plt.figure()
- # Automatic transmission cars tend to be more expensive
- plt.clf()
- sns.pairplot(cars, hue='Transmission')
- plt.figure()
- # Also Diesel fuel type seem to be more valuable in used cars
- plt.clf()
- sns.pairplot(cars, hue='Fuel_Type')
- plt.figure()
- plt.clf()
- sns.boxplot(x='Transmission', y='Present_Price', data=cars, hue='Fuel_Type')
- plt.figure()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement