Advertisement
Guest User

Untitled

a guest
Nov 12th, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.88 KB | None | 0 0
  1. def synthetic_data(size = 1000,n_stages = 4,n_features = 2,noise = False,size_noise = 0.1):
  2. data = pd.DataFrame(np.arange((int(size/n_stages))*n_stages), columns=['id'])
  3. data['stages'] = [0]*(int(size/n_stages))*n_stages
  4. start = 0
  5. feature_cols = []
  6. for stage in range(n_stages):
  7. ways = []
  8. for _ in range(np.random.randint(2,n_stages+1)):
  9. ways.append(np.random.randint(1,np.random.randint(2,n_stages+1)))
  10. ways = pd.unique(ways)
  11. if len(ways) == 1:
  12. if ways[0] == 1:
  13. ways = np.insert(ways,[1],[2])
  14. else:
  15. ways = np.insert(ways,[0],[1])
  16. data.loc[start:start+int(size/n_stages),'stages'] = data.loc[start:start+int(size/n_stages),'stages'].apply(lambda x:ways)
  17.  
  18. start += int(size/n_stages)
  19.  
  20. data = data.explode('stages')
  21. data['stages'] = data['stages'].astype(str)
  22. data['from'] = data.groupby(['id'])['stages'].shift(1)
  23. edges = data.loc[:, ['from', "stages"]].drop_duplicates().dropna().values
  24. edges = [list(map(str, edge)) for edge in edges]
  25. i = 0
  26. for stage in data['stages'].unique():
  27. transitions = list(filter(lambda x: stage in x[0], edges))
  28. if len(transitions) > 1:
  29. stage_from = stage
  30. stages_to = [transition[1] for transition in transitions]
  31.  
  32. train_mask = data['from'] == stage_from
  33. for feat,stage_to in enumerate(stages_to):
  34. mask = (data["stages"] == stage_to) & (data['from'] == stage_from)
  35. data.loc[mask,'X_{}'.format(i)] = (feat + 1) + np.random.random() * 1.5
  36. feature_cols.append("X_{}".format(i))
  37. i+=1
  38.  
  39. data[data.columns[3:]] = data[data.columns[3:]].fillna(0)
  40. return feature_cols,data
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement