Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from sklearn.linear_model import LinearRegression
- df=pd.read_csv("train.csv",index_col=False)
- df1=pd.read_csv("test.csv",index_col=False)
- def handle_non_numerical_data(df):
- columns = df.columns.values
- for column in columns:
- text_digit_vals = {}
- def convert_to_int(val):
- return text_digit_vals[val]
- if df[column].dtype != np.int64 and df[column].dtype != np.float64:
- column_contents = df[column].values.tolist()
- unique_elements = set(column_contents)
- x = 0
- for unique in unique_elements:
- if unique not in text_digit_vals:
- text_digit_vals[unique] = x
- x+=1
- df[column] = list(map(convert_to_int, df[column]))
- return df
- HouseStyle={"1Story":1,
- "1.5Fin":2,
- "1.5Unf":3,
- "2Story":4,
- "2.5Fin":5,
- "2.5Unf":6,
- "SFoyer":7,
- "SLvl":8}
- #print(HouseStyle)
- #df['HouseStyle'].update( df['HouseStyle'].map(HouseStyle) )
- #print(df['HouseStyle'])
- LotShape={"Reg":1,
- "IR1":2,
- "IR2":3,
- "IR3":4}
- Utilities={"AllPub":1,
- "NoSewr":2,
- "NoSeWa":3,
- "ELO":4}
- LandSlope={"Gtl":1,
- "Mod":2,
- "Sev":3}
- ExterQual={"Ex":1,
- "Gd":2,
- "TA":3,
- "Fa":4,
- "Po":5}
- ExterCond={"Ex":1,
- "Gd":2,
- "TA":3,
- "Fa":4,
- "Po":5}
- BsmtQual={"Ex":1,
- "Gd":2,
- "TA":3,
- "Fa":4,
- "Po":5,
- "NA":6}
- BsmtCond={"Ex":1,
- "Gd":2,
- "TA":3,
- "Fa":4,
- "Po":5,
- "NA":6}
- BsmtExposure={
- "Gd":1,
- "Av":2,
- "Mn":3,
- "No":4,
- "NA":5}
- BsmtFinType1={"GLQ":1,
- "ALQ":2,
- "BLQ":3,
- "Rec":4,
- "LwQ":5,
- "Unf":6,
- "NA":7}
- BsmtFinType2={"GLQ":1,
- "ALQ":2,
- "BLQ":3,
- "Rec":4,
- "LwQ":5,
- "Unf":6,
- "NA":7}
- HeatinQC={"Ex":1,
- "Gd":2,
- "TA":3,
- "Fa":4,
- "Po":5}
- Electrical={"SBrkr":1,
- "FuseA":2,
- "FuseF":3,
- "FuseP":4,
- "Mix":5}
- KitchenQual={"Ex":1,
- "Gd":2,
- "TA":3,
- "Fa":4,
- "Po":5}
- Functional={"Typ":1,
- "Min1":2,
- "Min2":3,
- "Mod":4,
- "Maj1":5,
- "Maj2":6,
- "Sev":7,
- "Sal":8}
- FireplaceQu={"Ex":1,
- "Gd":2,
- "TA":3,
- "Fa":4,
- "Po":5,
- "NA":6}
- GarageFinish={"Fin":1,
- "Rfn":2,
- "Unf":3,
- "NA":4}
- GarageQual={"Ex":1,
- "Gd":2,
- "TA":3,
- "Fa":4,
- "Po":5,
- "NA":6}
- GarageCond={"Ex":1,
- "Gd":2,
- "TA":3,
- "Fa":4,
- "Po":5,
- "NA":6}
- PavedDrive={"Y":1,
- "P":2,
- "N":3}
- PoolQC={"Ex":1,
- "Gd":2,
- "TA":3,
- "Fa":4,
- "NA":5}
- Fence={"GdPrv":1,
- "MnPrv":2,
- "GdWo":3,
- "MnWw":4,
- "NA":5}
- depfeatures={'HouseStyle':HouseStyle,'Fence':Fence,'PoolQC':PoolQC,'PavedDrive':PavedDrive,'GarageCond':GarageCond,
- 'GarageQual':GarageQual,'GarageFinish':GarageFinish,'FireplaceQu':FireplaceQu,'Functional':Functional
- ,'KitchenQual':KitchenQual,'Electrical':Electrical,'BsmtFinType2':BsmtFinType2,'BsmtFinType1':BsmtFinType1,
- 'BsmtExposure':BsmtExposure,'BsmtCond':BsmtCond,'BsmtQual':BsmtQual,'ExterCond':ExterCond,'ExterQual':ExterQual,
- 'LandSlope':LandSlope,'Utilities':Utilities,'LotShape':LotShape,'HeatinQC':HeatinQC}
- df=handle_non_numerical_data(df)
- df1=handle_non_numerical_data(df1)
- df = df.replace(np.nan, 0, regex=True)
- df1 = df1.replace(np.nan, 0, regex=True)
- test_ID=df1['Id'].values
- del df1['Id']
- del df['Id']
- y=df['SalePrice'].values
- x=df1.values
- X=df.loc[:, df.columns != 'SalePrice'].values
- w=np.random.rand(len(y))
- model=LinearRegression(fit_intercept=True, normalize=True, copy_X=True, n_jobs=1)
- model.fit(X,y,w)
- y_pred=model.predict(x)
- submission = pd.DataFrame({
- "Id": test_ID,
- "SalePrice": y_pred
- })
- submission.to_csv('houseprice.csv', index=False)
Add Comment
Please, Sign In to add comment