daily pastebin goal
49%
SHARE
TWEET

Untitled

a guest Jan 12th, 2019 105 in 20 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd #version -.23.0
  2. import numpy as np  #version 1.14.3
  3.  
  4. def dummy_prep(data, method=None):
  5.     varlist = data.columns[(data.dtypes == 'category').values]
  6.     if not method:
  7.         return pd.get_dummies(data.loc[:,data.dtypes == 'category'])
  8.     if method == 'drop_first':
  9.         return pd.get_dummies(data.loc[:,data.dtypes == 'category'], drop_first=True)
  10.     if method == 'deviation':
  11.         dummies = pd.get_dummies(data.loc[:,data.dtypes == 'category'])
  12.         dummylist = {i:[x for x in dummies.columns if i in x] for i in varlist}
  13.         for var in dummylist:
  14.             dropout = dummylist[var][0]
  15.             keepers = dummylist[var][1:]
  16.             dummies.loc[dummies[dropout]==1, keepers] = -1
  17.             del dummies[dropout]
  18.         return dummies
  19.    
  20. test1 = pd.DataFrame()
  21. test1['cat2'] = pd.Categorical(np.random.randint(low=0, high=2, size=100))
  22. test1['cat3'] = pd.Categorical(np.random.randint(low=0, high=3, size=100))
  23. test1['cat4'] = pd.Categorical(np.random.randint(low=0, high=4, size=100))
  24. print(test1.groupby('cat4').cat3.count())
  25. print(test1.head())
  26. dummy_prep(test1[['cat4','cat3','cat2']], method='deviation').head()
  27.  
  28. ####################### OUTPUT#########################
  29.  
  30. # https://imgur.com/a/o8mSqDC
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top