Advertisement
Guest User

proton interview

a guest
Sep 21st, 2019
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.31 KB | None | 0 0
  1. # read file
  2. originaldata = pd.read_csv("./amazon_co-ecommerce_sample.csv")
  3. print(originaldata.shape)
  4. originaldata
  5. #importing data analysis packages
  6. import numpy as np
  7. import pandas as pd
  8. import random as rnd
  9. #importing data visualization packages
  10. import seaborn as sns
  11. import matplotlib.pyplot as plt
  12. %matplotlib inline
  13.  
  14. originaldata.dropna(how='all', axis='columns')
  15.  
  16. #getting rid of unnamed columns with Nan
  17. originaldata.columns
  18. originaldata.columns.str.match('Unnamed')
  19. originaldata.loc[:, ~originaldata.columns.str.match('Unnamed')]
  20.  
  21. originaldata.isnull().sum()
  22.  
  23. originaldata = pd.read_csv('amazon_co-ecommerce_sample.csv', index_col=0)
  24. originaldata.drop(originaldata.columns[originaldata.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
  25. originaldata.dropna(how='all', axis='columns')
  26.  
  27. #imputation and interpolation
  28. originaldata.isnull().sum()
  29.  
  30. from sklearn.preprocessing import Imputer
  31. imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
  32. imp.fit(originaldata)
  33. originaldata= imp.transform(originaldata)
  34.  
  35. nullFeatures = [feature for feature, numNull in originaldata.isnull().sum().iteritems() if numNull > 0]
  36. nullFeatures
  37.  
  38. for n in nullFeatures[2:]:
  39. originaldata[n] = originaldata[n].interpolate()
  40. originaldata.isnull().sum()
  41.  
  42. pp.ProfileReport(originaldata)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement