Guest User

Untitled

a guest
Oct 20th, 2017
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.03 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.linear_model import LinearRegression
  4.  
  5.  
  6. df=pd.read_csv("train.csv",index_col=False)
  7. df1=pd.read_csv("test.csv",index_col=False)
  8.  
  9. def handle_non_numerical_data(df):
  10. columns = df.columns.values
  11. for column in columns:
  12. text_digit_vals = {}
  13. def convert_to_int(val):
  14. return text_digit_vals[val]
  15.  
  16. if df[column].dtype != np.int64 and df[column].dtype != np.float64:
  17. column_contents = df[column].values.tolist()
  18. unique_elements = set(column_contents)
  19. x = 0
  20. for unique in unique_elements:
  21. if unique not in text_digit_vals:
  22. text_digit_vals[unique] = x
  23. x+=1
  24.  
  25. df[column] = list(map(convert_to_int, df[column]))
  26.  
  27. return df
  28.  
  29.  
  30.  
  31.  
  32. HouseStyle={"1Story":1,
  33. "1.5Fin":2,
  34. "1.5Unf":3,
  35. "2Story":4,
  36. "2.5Fin":5,
  37. "2.5Unf":6,
  38. "SFoyer":7,
  39. "SLvl":8}
  40.  
  41. #print(HouseStyle)
  42. #df['HouseStyle'].update( df['HouseStyle'].map(HouseStyle) )
  43.  
  44.  
  45. #print(df['HouseStyle'])
  46. LotShape={"Reg":1,
  47. "IR1":2,
  48. "IR2":3,
  49. "IR3":4}
  50.  
  51. Utilities={"AllPub":1,
  52. "NoSewr":2,
  53. "NoSeWa":3,
  54. "ELO":4}
  55. LandSlope={"Gtl":1,
  56. "Mod":2,
  57. "Sev":3}
  58. ExterQual={"Ex":1,
  59. "Gd":2,
  60. "TA":3,
  61. "Fa":4,
  62. "Po":5}
  63. ExterCond={"Ex":1,
  64. "Gd":2,
  65. "TA":3,
  66. "Fa":4,
  67. "Po":5}
  68. BsmtQual={"Ex":1,
  69. "Gd":2,
  70. "TA":3,
  71. "Fa":4,
  72. "Po":5,
  73. "NA":6}
  74. BsmtCond={"Ex":1,
  75. "Gd":2,
  76. "TA":3,
  77. "Fa":4,
  78. "Po":5,
  79. "NA":6}
  80. BsmtExposure={
  81. "Gd":1,
  82. "Av":2,
  83. "Mn":3,
  84. "No":4,
  85. "NA":5}
  86. BsmtFinType1={"GLQ":1,
  87. "ALQ":2,
  88. "BLQ":3,
  89. "Rec":4,
  90. "LwQ":5,
  91. "Unf":6,
  92. "NA":7}
  93. BsmtFinType2={"GLQ":1,
  94. "ALQ":2,
  95. "BLQ":3,
  96. "Rec":4,
  97. "LwQ":5,
  98. "Unf":6,
  99. "NA":7}
  100. HeatinQC={"Ex":1,
  101. "Gd":2,
  102. "TA":3,
  103. "Fa":4,
  104. "Po":5}
  105. Electrical={"SBrkr":1,
  106. "FuseA":2,
  107. "FuseF":3,
  108. "FuseP":4,
  109. "Mix":5}
  110. KitchenQual={"Ex":1,
  111. "Gd":2,
  112. "TA":3,
  113. "Fa":4,
  114. "Po":5}
  115. Functional={"Typ":1,
  116. "Min1":2,
  117. "Min2":3,
  118. "Mod":4,
  119. "Maj1":5,
  120. "Maj2":6,
  121. "Sev":7,
  122. "Sal":8}
  123. FireplaceQu={"Ex":1,
  124. "Gd":2,
  125. "TA":3,
  126. "Fa":4,
  127. "Po":5,
  128. "NA":6}
  129.  
  130. GarageFinish={"Fin":1,
  131. "Rfn":2,
  132. "Unf":3,
  133. "NA":4}
  134.  
  135. GarageQual={"Ex":1,
  136. "Gd":2,
  137. "TA":3,
  138. "Fa":4,
  139. "Po":5,
  140. "NA":6}
  141. GarageCond={"Ex":1,
  142. "Gd":2,
  143. "TA":3,
  144. "Fa":4,
  145. "Po":5,
  146. "NA":6}
  147. PavedDrive={"Y":1,
  148. "P":2,
  149. "N":3}
  150. PoolQC={"Ex":1,
  151. "Gd":2,
  152. "TA":3,
  153. "Fa":4,
  154. "NA":5}
  155. Fence={"GdPrv":1,
  156. "MnPrv":2,
  157. "GdWo":3,
  158. "MnWw":4,
  159. "NA":5}
  160.  
  161.  
  162. depfeatures={'HouseStyle':HouseStyle,'Fence':Fence,'PoolQC':PoolQC,'PavedDrive':PavedDrive,'GarageCond':GarageCond,
  163. 'GarageQual':GarageQual,'GarageFinish':GarageFinish,'FireplaceQu':FireplaceQu,'Functional':Functional
  164. ,'KitchenQual':KitchenQual,'Electrical':Electrical,'BsmtFinType2':BsmtFinType2,'BsmtFinType1':BsmtFinType1,
  165. 'BsmtExposure':BsmtExposure,'BsmtCond':BsmtCond,'BsmtQual':BsmtQual,'ExterCond':ExterCond,'ExterQual':ExterQual,
  166. 'LandSlope':LandSlope,'Utilities':Utilities,'LotShape':LotShape,'HeatinQC':HeatinQC}
  167.  
  168.  
  169.  
  170. df=handle_non_numerical_data(df)
  171. df1=handle_non_numerical_data(df1)
  172.  
  173. df = df.replace(np.nan, 0, regex=True)
  174. df1 = df1.replace(np.nan, 0, regex=True)
  175.  
  176.  
  177. test_ID=df1['Id'].values
  178. del df1['Id']
  179. del df['Id']
  180. y=df['SalePrice'].values
  181. x=df1.values
  182. X=df.loc[:, df.columns != 'SalePrice'].values
  183. w=np.random.rand(len(y))
  184. model=LinearRegression(fit_intercept=True, normalize=True, copy_X=True, n_jobs=1)
  185.  
  186. model.fit(X,y,w)
  187. y_pred=model.predict(x)
  188.  
  189. submission = pd.DataFrame({
  190. "Id": test_ID,
  191. "SalePrice": y_pred
  192. })
  193. submission.to_csv('houseprice.csv', index=False)
Add Comment
Please, Sign In to add comment