m8_

groupby and map values

m8_
Sep 11th, 2019
320
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.48 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3.  
  4. # some data
  5. data3 = [["Alex","Tampa","A23","1","Ax","Red"],
  6.          ["Alex","Tampa","A23","1","Ay","Blue"],
  7.          ["Alex","Tampa","B43","1","Bx","Green"],
  8.          ["Alex","Tampa","B43","1","By","White"],
  9.          ["Alex","Tampa","C55","1","Cx","Red"],
  10.          ["Alex","Tampa","C55","1","Cy","White"],
  11.          ["Alex","Tampa","C55","2","Cx","Purple"],
  12.          ["Alex","Tampa","C55","2","Cy","Black"],
  13.          ["Tim","San Diego","A23","1","Ax","Green"],
  14.          ["Tim","San Diego","A23","1","Ay","Black"],
  15.          ["Tim","San Diego","B43","1","Bx","Yellow"],
  16.          ["Tim","San Diego","B43","1","By","Black"],
  17.          ["Tim","San Diego","C55","1","Cx","Pink"],
  18.          ["Tim","San Diego","C55","1","Cy","Orange"],
  19.          ["Tim","San Diego","A23","2","Ax","Green"],
  20.          ["Tim","San Diego","A23","2","Ay","Red"],
  21.          ["Tim","San Diego","B43","2","Bx",""],
  22.          ["Tim","San Diego","B43","2","By",""],        
  23.          ["Mark","Houston","A23","1","Ax","Purple"],
  24.          ["Mark","Houston","A23","1","Ay","Yellow"],
  25.          ["Mark","Houston","B43","1","Bx","Gray"],
  26.          ["Mark","Houston","B43","1","By","White"],
  27.          ["Mark","Houston","C55","1","Cx",""],
  28.          ["Mark","Houston","C55","1","Cy",""],        
  29.          ["Anthony","Seattle","A23","","Ax","Orange"],
  30.          ["Anthony","Seattle","A23","","Ay","Black"],
  31.          ["Anthony","Seattle","B43","","Bx","Red"],
  32.          ["Anthony","Seattle","B43","","By","Black"],
  33.          ["Anthony","Seattle","C55","","Cx","Blue"],
  34.          ["Anthony","Seattle","C55","","Cy","Pink"]]
  35.  
  36. # create dataframe
  37. df3 = pd.DataFrame(data3,columns=[
  38.         "Name","City","Domain","Sequence","Group","Value"])
  39.  
  40. # add Compared and Type columns
  41. df3["Compared Group"] = ""
  42. df3["Compared Value"] = ""
  43. df3["Type"] = ""
  44.  
  45. # replace nulls with np.NaN
  46. df3.replace(r"^s*$", np.nan, regex=True, inplace = True)
  47.  
  48. # fillna for missing Sequence
  49. df3.fillna({"Sequence":"N/A","Value":"NULL"},inplace=True)
  50.  
  51. # map groups with dictionary
  52. group_dict = {"Ax":"Ay","Bx":"By","Cx":"Cy"}
  53.  
  54. # groupby
  55. grouped = df3.groupby(["Name","Sequence","Domain","Group"], group_keys=False)
  56.  
  57. # print groups
  58. for group_name, df_group in grouped:
  59.     print(group_name)
  60.  
  61.  
  62. # expected result
  63. result = [["Alex","Tampa","A23","1","Ax","Red","Ay","Blue"],
  64.           ["Alex","Tampa","B43","1","Bx","Green","By","White"],
  65.           ["Alex","Tampa","C55","1","Cx","Red","Cy","White"],
  66.           ["Alex","Tampa","C55","2","Cx","Purple","Cy","Black"],
  67.          ["Tim","San Diego","A23","1","Ax","Green","Ay","Black"],
  68.          ["Tim","San Diego","A23","2","Ax","Green","Ay","Red"],
  69.          ["Tim","San Diego","B43","1","Bx","Yellow","By","Black"],
  70.          ["Tim","San Diego","B43","2","Bx","NULL","By","NULL"],        
  71.          ["Tim","San Diego","C55","1","Cx","Pink","Cy","Orange"],        
  72.          ["Mark","Houston","A23","1","Ax","Purple","Ay","Yellow"],
  73.          ["Mark","Houston","B43","1","Bx","Gray","By","White"],
  74.          ["Mark","Houston","C55","1","Cx","NULL","Cy","NULL"],
  75.          ["Anthony","Seattle","A23","","Ax","Orange","Ay","Black"],
  76.          ["Anthony","Seattle","B43","","Bx","Red","By","Black"],
  77.          ["Anthony","Seattle","C55","","Cx","Blue","Cy","Pink"]]
  78.  
  79. result_df = pd.DataFrame(result,columns=[
  80.         "Name","City","Domain","Sequence","Group",
  81.         "Value","Compared Group","Compared Value"])
  82.  
  83.  
  84. df3.to_csv("df3.csv", index = False)
  85. result_df.to_csv("result_df.csv", index = False)
Advertisement
Add Comment
Please, Sign In to add comment