Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- pd.set_option('expand_frame_repr', False)
- pd.set_option('display.max_columns', 30)
- pd.set_option('display.max_columns', 500)
- data = pd.read_csv(r"combine.csv", encoding = "unicode_escape")
- #print(data)
- Craiglist_data = data[data['Website'] == "Craigslist"]
- data['TYPE'].fillna('Sofa', inplace=True)
- data = data[data['Name'].notna()]
- print(data)
- data.loc[data['Name'].str.contains("eclin"),'TYPE'] = 'Recliner'
- data.loc[data['Name'].str.contains("leeper", "Bed"),'TYPE'] = 'Sleeper sofa'
- data.loc[data['Name'].str.contains("Bed"),'TYPE'] = 'Sofa Bed'
- data.loc[data['Name'].str.contains("Love"),'TYPE'] = 'Loveseat'
- data.loc[data['Name'].str.contains("ection"),'TYPE'] = 'Sectional sofa'
- data.loc[data['Name'].str.contains("futon"),'TYPE'] = 'Futon'
- data.loc[data['TYPE'].str.contains(" Sofa", "'Type: Sofa'"), 'TYPE'] = 'Sofa'
- data.loc[data['TYPE'].str.contains(" Sleeper sofa"), 'TYPE'] = 'Sleeper sofa'
- data.loc[data['TYPE'].str.contains("'Type: Sofa Bed"), 'TYPE'] = 'Sofa Bed'
- data.loc[data['TYPE'].str.contains("Indoor/outdoor sofa"), 'TYPE'] = 'Sofa'
- data.loc[data['TYPE'].str.contains("Sectional"), 'TYPE'] = 'Sectional sofa'
- data.loc[data['TYPE'].str.contains("'Type: Chaise'"), 'TYPE'] = 'Chaise'
- data.loc[data['TYPE'].str.contains("'Type: 3 2 1 Combination',"), 'TYPE'] = 'Futon' #JH
- data.loc[data['TYPE'].str.contains("'Room: Living Room',","'Room: Any Room',"), 'TYPE'] = 'Sofa' #JH
- data.loc[data['TYPE'].str.contains("4-seat sofa, in/outdoor,"), 'TYPE'] = 'Sofa' #JH
- data.loc[data['TYPE'].str.contains("Modular corner sofa, 3-seat","'Type: Corner',",), 'TYPE'] = 'Sofa' #JH
- data.loc[data['TYPE'].str.contains("Unbranded", "Type"),'TYPE'] = 'Sofa'
- #print(data)
- data[data['TYPE'] == 'Recliner']
- data[data['TYPE'] == 'Sleeper sofa']
- data[data['TYPE'] == 'Loveseat']
- data[data['TYPE'] == 'Sectional sofa']
- pd.set_option('display.max_rows', None)
- data['TYPE'].nunique()
- data.TYPE.unique()
- type(data.Price[0])
- data['Price'] = data['Price'].astype(str)
- data['Price'] = data.Price.apply(lambda x: x.strip('$')) #removing $ sign
- data['Price'] = data.Price.apply(lambda x: x.replace(",", "")) #removing , sign
- #data['TYPE'] = data['TYPE'].astype(str)
- #data['TYPE'] = [x[7:] for x in data.TYPE]
- data['Price'] = data['Price'].astype(float, errors = 'ignore')
- data['Price'] = pd.to_numeric(data['Price'],errors='coerce')
- data.to_csv(r'combineFinal.csv',encoding='UTF-8', index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement