furas

pandas - convert Series to columns

Jul 9th, 2020
26
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd
  2.  
  3. def test1():
  4.    
  5.     df = pd.DataFrame({
  6.         'col1': [1, 2, 3],
  7.         'colX': [
  8.             pd.Series([1, 2, 3, None, 0, '']),
  9.             pd.Series([4, 5, 6, None, 0, '']),
  10.             pd.Series([7, 8, 9, None, 0, '']),
  11.         ]        
  12.     })
  13.    
  14.     print('\n--- info ---\n')
  15.     df.info()
  16.     print('\n--- head ---\n')
  17.     print(df.head())
  18.     print('\n--- series dtype ---\n')
  19.     print(df['colX'][0].dtype)
  20.     print('---')
  21.    
  22.     df = df.astype({
  23.         "col1":  float,
  24.         #"colX":  float,
  25.     })
  26.  
  27.     print('\n--- info ---\n')
  28.     df.info()
  29.     print('\n--- head ---\n')
  30.     print(df.head())
  31.     print('\n--- series dtype ---\n')
  32.     print(df['colX'][0].dtype)
  33.     print('---')
  34.  
  35.     print(round( df.isnull().sum(axis=0)/len(df)*100, 2 ))
  36.    
  37.     print(round( 100 - df.count()/len(df)*100, 2 ))
  38.    
  39. def test2():
  40.  
  41.     df = pd.DataFrame({
  42.         'col1': [1, 2, 3],
  43.         'colX': [
  44.             pd.Series([1, 2, 3, None, 0, '']),
  45.             pd.Series([4, 5, 6, None, 0, '']),
  46.             pd.Series([7, 8, 9, None, 0, '']),
  47.         ]      
  48.     })
  49.  
  50.     # split to columns (and convert to float at the same time)
  51.     #df['X_0'] = df['colX'].str[0]
  52.     #df['X_1'] = df['colX'].str[1]
  53.     #df['X_2'] = df['colX'].str[2]
  54.     #df['X_3'] = df['colX'].str[3]
  55.     df['X_0'] = df['colX'].apply(lambda x:x[0])#.astype(float)
  56.     df['X_1'] = df['colX'].apply(lambda x:x[1])#.astype(float)
  57.     df['X_2'] = df['colX'].apply(lambda x:x[2])#.astype(float)
  58.     df['X_3'] = df['colX'].apply(lambda x:x[3])#.astype(float)
  59.     df['X_4'] = df['colX'].apply(lambda x:x[4])#.astype(float)
  60.     df['X_5'] = df['colX'].apply(lambda x:x[5])#.astype(float)
  61.     # remove column with series
  62.     df.drop(columns='colX', inplace=True)
  63.  
  64.     #df.info()
  65.     #print(df.head())
  66.  
  67.     # convert other columns (no need to convert `X_`)
  68.     df = df.astype({
  69.         "col1":  float,
  70.         #"X_0":  float,
  71.         #"X_1":  float,
  72.         #"X_2":  float,
  73.         #"X_3":  float,
  74.         #"X_4":  float,
  75.         #"X_5":  float,
  76.     })
  77.  
  78.     #df.info()
  79.     #print(df.head())
  80.  
  81.     print(round( df.isnull().sum(axis=0)/len(df)*100, 2 ))
  82.    
  83.     print(round( 100 - df.count()/len(df)*100, 2 ))
  84.    
  85. def test3():
  86.  
  87.     import pandas as pd
  88.  
  89.     df = pd.DataFrame({
  90.         'col1': [1, 2, 3],
  91.         'colX': [
  92.             pd.Series([1, 2, 3, None, 0, '']),
  93.             pd.Series([4, 5, 6, None, 0, '']),
  94.             pd.Series([7, 8, 9, None, 0, '']),
  95.         ]      
  96.     })
  97.  
  98.     #print(df['colX'][0].dtype)
  99.     #print(df.head())
  100.  
  101.     #print(round((df.isnull().sum(axis=0)/len(df.index))*100, 2))
  102.  
  103.     # convert to float values in Series
  104.     #df['colX'] = df['colX'].apply(lambda x: x.astype(float))  
  105.  
  106.     # convert Series to columns with prefix `X_` (need to apply pd.Series even if there are series)
  107.     df = df.assign( **(df['colX'].apply(pd.Series).add_prefix('X_')) )
  108.  
  109.     # remove column with series
  110.     df.drop(columns='colX', inplace=True)
  111.  
  112.     #df.info()
  113.     #print(df.head())
  114.  
  115.     # convert other columns (no need to convert `X_`)
  116.     df = df.astype({
  117.         "col1":  float,
  118.         #"X_0":  float,
  119.         #"X_1":  float,
  120.         #"X_2":  float,
  121.         #"X_3":  float,
  122.         #"X_4":  float,
  123.         #"X_5":  float,
  124.     })
  125.  
  126.     #df.info()
  127.     #print(df.head())
  128.  
  129.     print(round( df.isnull().sum(axis=0)/len(df)*100, 2 ))
  130.    
  131.     print(round( 100 - df.count()/len(df)*100, 2 ))
  132.  
  133. test1()
  134. test2()
  135. test3()
RAW Paste Data