Guest User

Untitled

a guest
Jun 8th, 2023
200
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.13 KB | None | 0 0
  1. import io
  2. from collections import defaultdict
  3.  
  4. import pandas as pd
  5.  
  6.  
  7. def main():
  8.     df = get_df()
  9.  
  10.     # make 'ID' the index
  11.     df = df.set_index("ID")
  12.  
  13.     # find the roots
  14.     roots = df[df["ParentID"] == 0].index
  15.     # interesting nodes to find the hierarchy for
  16.     interesting = set(df[df["Filter"].notna()].index)
  17.     # dict to store the id -> hierarchy mapping
  18.     hierarchy = {}
  19.  
  20.     # build a directed graph in adjacency list of parent -> children
  21.     graph = defaultdict(list)
  22.     for node, parent in zip(df.index, df["ParentID"]):
  23.         graph[parent].append(node)
  24.  
  25.     # run dfs on each root
  26.     for root in roots:
  27.         stack = [root]
  28.         dfs(graph, stack, interesting, hierarchy)
  29.  
  30.     # populate the hierarchy column
  31.     df["Hierarchy"] = ""
  32.     for node, path in hierarchy.items():
  33.         df.loc[node, "Hierarchy"] = "/".join(df.loc[path, "Text"])
  34.  
  35.     # make 'ID' a column again
  36.     df = df.reset_index()
  37.  
  38.     # now we're done!
  39.     print(df)
  40.  
  41.  
  42. # this is a recursive dfs code with additional logic to store the hierarchy
  43. # of interesting nodes
  44. def dfs(graph, stack, interesting, hierarchy):
  45.     node = stack[-1]
  46.     for child in graph[node]:
  47.         stack.append(child)
  48.         if child in interesting:
  49.             hierarchy[child] = stack[::-1]
  50.         dfs(graph, stack, interesting, hierarchy)
  51.     stack.pop()
  52.  
  53.  
  54. def get_df():
  55.     file = io.StringIO(
  56.         """
  57.       ID  ParentID  Filter Text
  58.       98        97     NaN   AA
  59.       99        98     NaN   BB
  60.      100        99     NaN   CC
  61.      107       100       1   DD
  62.     9999      1231     NaN   EE
  63.    10000      1334     NaN   FF
  64.    10001       850       2   GG
  65.      850       230     NaN   HH
  66.      230       121     NaN   II
  67.      121        96     NaN   JJ
  68.       96         0     NaN   KK
  69.       97         0     NaN   LL
  70.    """
  71.     )
  72.  
  73.     df = pd.read_csv(
  74.         file,
  75.         sep=r"\s+",
  76.         dtype={
  77.             "ID": "int64",
  78.             "ParentID": "int64",
  79.             "Filter": "Int64",
  80.             "Text": "string",
  81.         },
  82.     )
  83.  
  84.     return df
  85.  
  86.  
  87. if __name__ == "__main__":
  88.     main()
  89.  
Advertisement
Add Comment
Please, Sign In to add comment