Advertisement
Guest User

Untitled

a guest
Apr 19th, 2019
135
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.77 KB | None | 0 0
  1. def load_dataset(split="trn_set", limit=None, ignore_categorical=False):
  2. sql = """
  3. SELECT o.*, f1.*, f2.*, f3.*, f4.*,
  4. EXTRACT(MONTH FROM o.dt) AS month
  5. FROM %s AS t
  6. JOIN Online AS o
  7. ON t.index = o.index
  8. JOIN features_group_1 AS f1
  9. ON t.index = f1.index
  10. JOIN features_group_2 AS f2
  11. ON t.index = f2.index
  12. JOIN features_group_3 AS f3
  13. ON t.index = f3.index
  14. JOIN features_group_4 AS f4
  15. ON t.index = f4.index
  16. """%split
  17. if limit:
  18. sql += " LIMIT %i"%limit
  19.  
  20. df = pd.read_sql_query(sql.replace('\n', " ").replace("\t", " "), engine)
  21. df.event1 = df.event1.fillna(0)
  22. X = df.drop(["index", "event2", "dt", "day", "session", "visitor", "custno"], axis=1)
  23. Y = df.event2
  24. return X, Y
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement