Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df = pd.concat([df.drop('IssueCode', 1), pd.get_dummies(df['IssueCode'],prefix = 'IssueCode_').astype(np.int8)], axis=1)
- df = pd.concat([df.drop('SolutionCode', 1), pd.get_dummies(df['SolutionCode'],prefix = 'SolutionCode_').astype(np.int8)], axis=1)
- df = pd.concat([df.drop('Col1', 1), pd.get_dummies(df['Col1'],prefix = 'Col1_').astype(np.int8)], axis=1)
- df = pd.concat([df.drop('Col2', 1), pd.get_dummies(df['Col2'],prefix = 'Col2_').astype(np.int8)], axis=1)
- df = pd.concat([df.drop('Col3', 1), pd.get_dummies(df['Col3'],prefix = 'Col3_').astype(np.int8)], axis=1)
- from sklearn.model_selection import cross_val_predict
- import pymssql
- import pandas as pd
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import mean_squared_error
- from sklearn.metrics import r2_score
- import datetime
- import random
- from sklearn.ensemble import RandomForestRegressor
- pd.set_option('display.max_rows', 5000)
- pd.set_option('display.max_columns', 5000)
- pd.set_option('display.width', 10000)
- TaskTime = 900
- RunTime = 120
- sFolder = "/mnt/c/temp/"
- def Lead0(value):
- return "0" + str(value) if value < 10 else str(value)
- dNow = datetime.datetime.now()
- sNow = Lead0(dNow.year) + Lead0(dNow.month) + Lead0(dNow.day) + Lead0(dNow.hour) + Lead0(dNow.minute) + Lead0(dNow.second)
- print(sNow)
- conn = pymssql.connect(server="MyServer", database="MyDB", port="1433", user="***", password="*****")
- df = pd.read_sql("SELECT * FROM MyTable where MyDate between '1 jul 2018' and '30 jun 2019'", conn)
- conn.close()
- #df = pd.get_dummies(df)
- #When I uncomment this I get Memory Error
- mdl = RandomForestRegressor(n_estimators = 500)
- y_pred = cross_val_predict(mdl, X, y, cv=5)
- #This is causing error String to float
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement