Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- # some data
- data3 = [["Alex","Tampa","A23","1","Ax","Red"],
- ["Alex","Tampa","A23","1","Ay","Blue"],
- ["Alex","Tampa","B43","1","Bx","Green"],
- ["Alex","Tampa","B43","1","By","White"],
- ["Alex","Tampa","C55","1","Cx","Red"],
- ["Alex","Tampa","C55","1","Cy","White"],
- ["Alex","Tampa","C55","2","Cx","Purple"],
- ["Alex","Tampa","C55","2","Cy","Black"],
- ["Tim","San Diego","A23","1","Ax","Green"],
- ["Tim","San Diego","A23","1","Ay","Black"],
- ["Tim","San Diego","B43","1","Bx","Yellow"],
- ["Tim","San Diego","B43","1","By","Black"],
- ["Tim","San Diego","C55","1","Cx","Pink"],
- ["Tim","San Diego","C55","1","Cy","Orange"],
- ["Tim","San Diego","A23","2","Ax","Green"],
- ["Tim","San Diego","A23","2","Ay","Red"],
- ["Tim","San Diego","B43","2","Bx",""],
- ["Tim","San Diego","B43","2","By",""],
- ["Mark","Houston","A23","1","Ax","Purple"],
- ["Mark","Houston","A23","1","Ay","Yellow"],
- ["Mark","Houston","B43","1","Bx","Gray"],
- ["Mark","Houston","B43","1","By","White"],
- ["Mark","Houston","C55","1","Cx",""],
- ["Mark","Houston","C55","1","Cy",""],
- ["Anthony","Seattle","A23","","Ax","Orange"],
- ["Anthony","Seattle","A23","","Ay","Black"],
- ["Anthony","Seattle","B43","","Bx","Red"],
- ["Anthony","Seattle","B43","","By","Black"],
- ["Anthony","Seattle","C55","","Cx","Blue"],
- ["Anthony","Seattle","C55","","Cy","Pink"]]
- # create dataframe
- df3 = pd.DataFrame(data3,columns=[
- "Name","City","Domain","Sequence","Group","Value"])
- # add Compared and Type columns
- df3["Compared Group"] = ""
- df3["Compared Value"] = ""
- df3["Type"] = ""
- # replace nulls with np.NaN
- df3.replace(r"^s*$", np.nan, regex=True, inplace = True)
- # fillna for missing Sequence
- df3.fillna({"Sequence":"N/A","Value":"NULL"},inplace=True)
- # map groups with dictionary
- group_dict = {"Ax":"Ay","Bx":"By","Cx":"Cy"}
- # groupby
- grouped = df3.groupby(["Name","Sequence","Domain","Group"], group_keys=False)
- # print groups
- for group_name, df_group in grouped:
- print(group_name)
- # expected result
- result = [["Alex","Tampa","A23","1","Ax","Red","Ay","Blue"],
- ["Alex","Tampa","B43","1","Bx","Green","By","White"],
- ["Alex","Tampa","C55","1","Cx","Red","Cy","White"],
- ["Alex","Tampa","C55","2","Cx","Purple","Cy","Black"],
- ["Tim","San Diego","A23","1","Ax","Green","Ay","Black"],
- ["Tim","San Diego","A23","2","Ax","Green","Ay","Red"],
- ["Tim","San Diego","B43","1","Bx","Yellow","By","Black"],
- ["Tim","San Diego","B43","2","Bx","NULL","By","NULL"],
- ["Tim","San Diego","C55","1","Cx","Pink","Cy","Orange"],
- ["Mark","Houston","A23","1","Ax","Purple","Ay","Yellow"],
- ["Mark","Houston","B43","1","Bx","Gray","By","White"],
- ["Mark","Houston","C55","1","Cx","NULL","Cy","NULL"],
- ["Anthony","Seattle","A23","","Ax","Orange","Ay","Black"],
- ["Anthony","Seattle","B43","","Bx","Red","By","Black"],
- ["Anthony","Seattle","C55","","Cx","Blue","Cy","Pink"]]
- result_df = pd.DataFrame(result,columns=[
- "Name","City","Domain","Sequence","Group",
- "Value","Compared Group","Compared Value"])
- df3.to_csv("df3.csv", index = False)
- result_df.to_csv("result_df.csv", index = False)
Advertisement
Add Comment
Please, Sign In to add comment