Untitled

import csv
import sys

from util import Node, StackFrontier, QueueFrontier

# Maps names to a set of corresponding person_ids
names = {}

# Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
people = {}

# Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
movies = {}


def load_data(directory):
    """
    Load data from CSV files into memory.
    """
    # Load people
    with open(f"{directory}/people.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            people[row["id"]] = {
                "name": row["name"],
                "birth": row["birth"],
                "movies": set()
            }
            if row["name"].lower() not in names:
                names[row["name"].lower()] = {row["id"]}
            else:
                names[row["name"].lower()].add(row["id"])

    # Load movies
    with open(f"{directory}/movies.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            movies[row["id"]] = {
                "title": row["title"],
                "year": row["year"],
                "stars": set()
            }

    # Load stars
    with open(f"{directory}/stars.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                people[row["person_id"]]["movies"].add(row["movie_id"])
                movies[row["movie_id"]]["stars"].add(row["person_id"])
            except KeyError:
                pass


def main():
    if len(sys.argv) > 2:
        sys.exit("Usage: python degrees.py [directory]")
    directory = sys.argv[1] if len(sys.argv) == 2 else "large"

    # Load data from files into memory
    print("Loading data...")
    load_data(directory)
    print("Data loaded.")

    source = person_id_for_name(input("Name: "))
    if source is None:
        sys.exit("Person not found.")
    target = person_id_for_name(input("Name: "))
    if target is None:
        sys.exit("Person not found.")

    path = shortest_path(source, target)

    if path is None:
        print("Not connected.")
    else:
        degrees = len(path)
        print(f"{degrees} degrees of separation.")
        path = [(None, source)] + path
        for i in range(degrees):
            person1 = people[path[i][1]]["name"]
            person2 = people[path[i + 1][1]]["name"]
            movie = movies[path[i + 1][0]]["title"]
            print(f"{i + 1}: {person1} and {person2} starred in {movie}")


def shortest_path(source, target):
    """
    Returns the shortest list of (movie_id, person_id) pairs
    that connect the source to the target.

    If no possible path, returns None.
    """
    if source == target: # return empty list if source and target is same as degree will be 0
        return []
    visited = []
    q = QueueFrontier()
    q.add(Node(source, None, None))
    flag = False

    # 3, 2, 4, 7, 5, 1, 9, 8...
    a, b, l, f = 0, 0, [], True # tracking node count till 7 for 6 edges(connections) degree theory
    while not q.empty(): # Adds nodes to visited list till you find the target
        r = q.remove()
        visited.append(r)
        n = neighbors_for_person(r.state)
        l.append(len(n))
        if f: # runs only once as an initialization code
            b = len(n)
            l = []
            f = False
        if b == 0:
            a += 1
            b = sum(l)
            l = []
        if a > 6: # complying with the theory, assumes no connection if degree of seperation more than 6
            return None
        for i in n:
            if i not in visited and i[1] != r.state:
                b -= 1
                q.add(Node(i[1], r.state, i[0]))
                if i[1] == target:
                    b = 0
                    flag = True
                    break
        if flag:
            break

    # print(source, target)
    # for i in visited:
    #     print(i.state, i.parent, i.action)
    t = visited.pop()
    visited.pop(0)
    if len(visited) == 0:
        return [(t.action, t.state)]
    i = 0
    v = [t]
    while True:
        if t.parent == visited[i].state:
            v.append(visited[i])
            if visited[i].parent == source:
                break
            else:
                t = visited[i]
        i += 1

    return [(i.action, i.state) for i in reversed(v)] # returns in correct order


def person_id_for_name(name):
    """
    Returns the IMDB id for a person's name,
    resolving ambiguities as needed.
    """
    person_ids = list(names.get(name.lower(), set()))
    if len(person_ids) == 0:
        return None
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        for person_id in person_ids:
            person = people[person_id]
            name = person["name"]
            birth = person["birth"]
            print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
        try:
            person_id = input("Intended Person ID: ")
            if person_id in person_ids:
                return person_id
        except ValueError:
            pass
        return None
    else:
        return person_ids[0]


def neighbors_for_person(person_id):
    """
    Returns (movie_id, person_id) pairs for people
    who starred with a given person.
    """
    movie_ids = people[person_id]["movies"]
    neighbors = set()
    for movie_id in movie_ids:
        for person_id in movies[movie_id]["stars"]:
            neighbors.add((movie_id, person_id))
    return neighbors


if __name__ == "__main__":
    main()
    #print(names, people, movies, sep='\n\n\n')

# sample data used:
'''
movies.csv
id,title,year
112384,"Apollo 13",1995
104257,"A Few Good Men",1992
109830,"Forrest Gump",1994
93779,"The Princess Bride",1987
95953,"Rain Man",1988

people.csv
id,name,birth
102,"Kevin Bacon",1958
129,"Tom Cruise",1962
144,"Cary Elwes",1962
158,"Tom Hanks",1956
1597,"Mandy Patinkin",1952
163,"Dustin Hoffman",1937
1697,"Chris Sarandon",1942
193,"Demi Moore",1962
197,"Jack Nicholson",1937
200,"Bill Paxton",1955
398,"Sally Field",1946
420,"Valeria Golino",1965
596520,"Gerald R. Molen",1935
641,"Gary Sinise",1955
705,"Robin Wright",1966
914612,"Emma Watson",1990

stars.csv
person_id,movie_id
102,104257
102,112384
129,104257
129,95953
144,93779
158,109830
158,112384
1597,93779
163,95953
1697,93779
193,104257
197,104257
200,112384
398,109830
420,95953
596520,95953
641,109830
641,112384
705,109830
705,93779
'''