Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def load_dataset(split="trn_set", limit=None, ignore_categorical=False):
- sql = """
- SELECT o.*, f1.*, f2.*, f3.*, f4.*,
- EXTRACT(MONTH FROM o.dt) AS month
- FROM %s AS t
- JOIN Online AS o
- ON t.index = o.index
- JOIN features_group_1 AS f1
- ON t.index = f1.index
- JOIN features_group_2 AS f2
- ON t.index = f2.index
- JOIN features_group_3 AS f3
- ON t.index = f3.index
- JOIN features_group_4 AS f4
- ON t.index = f4.index
- """%split
- if limit:
- sql += " LIMIT %i"%limit
- df = pd.read_sql_query(sql.replace('\n', " ").replace("\t", " "), engine)
- df.event1 = df.event1.fillna(0)
- X = df.drop(["index", "event2", "dt", "day", "session", "visitor", "custno"], axis=1)
- Y = df.event2
- return X, Y
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement