Advertisement
Guest User

Untitled

a guest
Feb 24th, 2020
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.43 KB | None | 0 0
  1. import pandas as pd
  2. pd.set_option('expand_frame_repr', False)
  3. pd.set_option('display.max_columns', 30)
  4. pd.set_option('display.max_columns', 500)
  5. data = pd.read_csv(r"combine.csv", encoding = "unicode_escape")
  6. #print(data)
  7. Craiglist_data = data[data['Website'] == "Craigslist"]
  8.  
  9. data['TYPE'].fillna('Sofa', inplace=True)
  10. data = data[data['Name'].notna()]
  11. print(data)
  12.  
  13. data.loc[data['Name'].str.contains("eclin"),'TYPE'] = 'Recliner'
  14. data.loc[data['Name'].str.contains("leeper", "Bed"),'TYPE'] = 'Sleeper sofa'
  15. data.loc[data['Name'].str.contains("Bed"),'TYPE'] = 'Sofa Bed'
  16. data.loc[data['Name'].str.contains("Love"),'TYPE'] = 'Loveseat'
  17. data.loc[data['Name'].str.contains("ection"),'TYPE'] = 'Sectional sofa'
  18. data.loc[data['Name'].str.contains("futon"),'TYPE'] = 'Futon'
  19. data.loc[data['TYPE'].str.contains(" Sofa", "'Type: Sofa'"), 'TYPE'] = 'Sofa'
  20. data.loc[data['TYPE'].str.contains(" Sleeper sofa"), 'TYPE'] = 'Sleeper sofa'
  21. data.loc[data['TYPE'].str.contains("'Type: Sofa Bed"), 'TYPE'] = 'Sofa Bed'
  22. data.loc[data['TYPE'].str.contains("Indoor/outdoor sofa"), 'TYPE'] = 'Sofa'
  23. data.loc[data['TYPE'].str.contains("Sectional"), 'TYPE'] = 'Sectional sofa'
  24. data.loc[data['TYPE'].str.contains("'Type: Chaise'"), 'TYPE'] = 'Chaise'
  25.  
  26. data.loc[data['TYPE'].str.contains("'Type: 3 2 1 Combination',"), 'TYPE'] = 'Futon' #JH
  27. data.loc[data['TYPE'].str.contains("'Room: Living Room',","'Room: Any Room',"), 'TYPE'] = 'Sofa' #JH
  28. data.loc[data['TYPE'].str.contains("4-seat sofa, in/outdoor,"), 'TYPE'] = 'Sofa' #JH
  29. data.loc[data['TYPE'].str.contains("Modular corner sofa, 3-seat","'Type: Corner',",), 'TYPE'] = 'Sofa' #JH
  30.  
  31.  
  32. data.loc[data['TYPE'].str.contains("Unbranded", "Type"),'TYPE'] = 'Sofa'
  33.  
  34. #print(data)
  35.  
  36.  
  37. data[data['TYPE'] == 'Recliner']
  38. data[data['TYPE'] == 'Sleeper sofa']
  39. data[data['TYPE'] == 'Loveseat']
  40. data[data['TYPE'] == 'Sectional sofa']
  41. pd.set_option('display.max_rows', None)
  42.  
  43.  
  44. data['TYPE'].nunique()
  45.  
  46. data.TYPE.unique()
  47.  
  48. type(data.Price[0])
  49. data['Price'] = data['Price'].astype(str)
  50. data['Price'] = data.Price.apply(lambda x: x.strip('$')) #removing $ sign
  51. data['Price'] = data.Price.apply(lambda x: x.replace(",", "")) #removing , sign
  52.  
  53.  
  54. #data['TYPE'] = data['TYPE'].astype(str)
  55. #data['TYPE'] = [x[7:] for x in data.TYPE]
  56.  
  57. data['Price'] = data['Price'].astype(float, errors = 'ignore')
  58. data['Price'] = pd.to_numeric(data['Price'],errors='coerce')
  59.  
  60.  
  61.  
  62. data.to_csv(r'combineFinal.csv',encoding='UTF-8', index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement