Untitled

import io
from collections import defaultdict

import pandas as pd


def main():
    df = get_df()

    # make 'ID' the index
    df = df.set_index("ID")

    # find the roots
    roots = df[df["ParentID"] == 0].index
    # interesting nodes to find the hierarchy for
    interesting = set(df[df["Filter"].notna()].index)
    # dict to store the id -> hierarchy mapping
    hierarchy = {}

    # build a directed graph in adjacency list of parent -> children
    graph = defaultdict(list)
    for node, parent in zip(df.index, df["ParentID"]):
        graph[parent].append(node)

    # run dfs on each root
    for root in roots:
        stack = [root]
        dfs(graph, stack, interesting, hierarchy)

    # populate the hierarchy column
    df["Hierarchy"] = ""
    for node, path in hierarchy.items():
        df.loc[node, "Hierarchy"] = "/".join(df.loc[path, "Text"])

    # make 'ID' a column again
    df = df.reset_index()

    # now we're done!
    print(df)


# this is a recursive dfs code with additional logic to store the hierarchy
# of interesting nodes
def dfs(graph, stack, interesting, hierarchy):
    node = stack[-1]
    for child in graph[node]:
        stack.append(child)
        if child in interesting:
            hierarchy[child] = stack[::-1]
        dfs(graph, stack, interesting, hierarchy)
    stack.pop()


def get_df():
    file = io.StringIO(
        """
       ID  ParentID  Filter Text
       98        97     NaN   AA
       99        98     NaN   BB
      100        99     NaN   CC
      107       100       1   DD
     9999      1231     NaN   EE
    10000      1334     NaN   FF
    10001       850       2   GG
      850       230     NaN   HH
      230       121     NaN   II
      121        96     NaN   JJ
       96         0     NaN   KK
       97         0     NaN   LL
    """
    )

    df = pd.read_csv(
        file,
        sep=r"\s+",
        dtype={
            "ID": "int64",
            "ParentID": "int64",
            "Filter": "Int64",
            "Text": "string",
        },
    )

    return df


if __name__ == "__main__":
    main()