Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import sys
- from util import Node, StackFrontier, QueueFrontier
- # Maps names to a set of corresponding person_ids
- names = {}
- # Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
- people = {}
- # Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
- movies = {}
- def load_data(directory):
- """
- Load data from CSV files into memory.
- """
- # Load people
- with open(f"{directory}/people.csv", encoding="utf-8") as f:
- reader = csv.DictReader(f)
- for row in reader:
- people[row["id"]] = {
- "name": row["name"],
- "birth": row["birth"],
- "movies": set()
- }
- if row["name"].lower() not in names:
- names[row["name"].lower()] = {row["id"]}
- else:
- names[row["name"].lower()].add(row["id"])
- # Load movies
- with open(f"{directory}/movies.csv", encoding="utf-8") as f:
- reader = csv.DictReader(f)
- for row in reader:
- movies[row["id"]] = {
- "title": row["title"],
- "year": row["year"],
- "stars": set()
- }
- # Load stars
- with open(f"{directory}/stars.csv", encoding="utf-8") as f:
- reader = csv.DictReader(f)
- for row in reader:
- try:
- people[row["person_id"]]["movies"].add(row["movie_id"])
- movies[row["movie_id"]]["stars"].add(row["person_id"])
- except KeyError:
- pass
- def main():
- if len(sys.argv) > 2:
- sys.exit("Usage: python degrees.py [directory]")
- directory = sys.argv[1] if len(sys.argv) == 2 else "large"
- # Load data from files into memory
- print("Loading data...")
- load_data(directory)
- print("Data loaded.")
- source = person_id_for_name(input("Name: "))
- if source is None:
- sys.exit("Person not found.")
- target = person_id_for_name(input("Name: "))
- if target is None:
- sys.exit("Person not found.")
- path = shortest_path(source, target)
- if path is None:
- print("Not connected.")
- else:
- degrees = len(path)
- print(f"{degrees} degrees of separation.")
- path = [(None, source)] + path
- for i in range(degrees):
- person1 = people[path[i][1]]["name"]
- person2 = people[path[i + 1][1]]["name"]
- movie = movies[path[i + 1][0]]["title"]
- print(f"{i + 1}: {person1} and {person2} starred in {movie}")
- def shortest_path(source, target):
- """
- Returns the shortest list of (movie_id, person_id) pairs
- that connect the source to the target.
- If no possible path, returns None.
- """
- if source == target: # return empty list if source and target is same as degree will be 0
- return []
- visited = []
- q = QueueFrontier()
- q.add(Node(source, None, None))
- flag = False
- # 3, 2, 4, 7, 5, 1, 9, 8...
- a, b, l, f = 0, 0, [], True # tracking node count till 7 for 6 edges(connections) degree theory
- while not q.empty(): # Adds nodes to visited list till you find the target
- r = q.remove()
- visited.append(r)
- n = neighbors_for_person(r.state)
- l.append(len(n))
- if f: # runs only once as an initialization code
- b = len(n)
- l = []
- f = False
- if b == 0:
- a += 1
- b = sum(l)
- l = []
- if a > 6: # complying with the theory, assumes no connection if degree of seperation more than 6
- return None
- for i in n:
- if i not in visited and i[1] != r.state:
- b -= 1
- q.add(Node(i[1], r.state, i[0]))
- if i[1] == target:
- b = 0
- flag = True
- break
- if flag:
- break
- # print(source, target)
- # for i in visited:
- # print(i.state, i.parent, i.action)
- t = visited.pop()
- visited.pop(0)
- if len(visited) == 0:
- return [(t.action, t.state)]
- i = 0
- v = [t]
- while True:
- if t.parent == visited[i].state:
- v.append(visited[i])
- if visited[i].parent == source:
- break
- else:
- t = visited[i]
- i += 1
- return [(i.action, i.state) for i in reversed(v)] # returns in correct order
- def person_id_for_name(name):
- """
- Returns the IMDB id for a person's name,
- resolving ambiguities as needed.
- """
- person_ids = list(names.get(name.lower(), set()))
- if len(person_ids) == 0:
- return None
- elif len(person_ids) > 1:
- print(f"Which '{name}'?")
- for person_id in person_ids:
- person = people[person_id]
- name = person["name"]
- birth = person["birth"]
- print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
- try:
- person_id = input("Intended Person ID: ")
- if person_id in person_ids:
- return person_id
- except ValueError:
- pass
- return None
- else:
- return person_ids[0]
- def neighbors_for_person(person_id):
- """
- Returns (movie_id, person_id) pairs for people
- who starred with a given person.
- """
- movie_ids = people[person_id]["movies"]
- neighbors = set()
- for movie_id in movie_ids:
- for person_id in movies[movie_id]["stars"]:
- neighbors.add((movie_id, person_id))
- return neighbors
- if __name__ == "__main__":
- main()
- #print(names, people, movies, sep='\n\n\n')
- # sample data used:
- '''
- movies.csv
- id,title,year
- 112384,"Apollo 13",1995
- 104257,"A Few Good Men",1992
- 109830,"Forrest Gump",1994
- 93779,"The Princess Bride",1987
- 95953,"Rain Man",1988
- people.csv
- id,name,birth
- 102,"Kevin Bacon",1958
- 129,"Tom Cruise",1962
- 144,"Cary Elwes",1962
- 158,"Tom Hanks",1956
- 1597,"Mandy Patinkin",1952
- 163,"Dustin Hoffman",1937
- 1697,"Chris Sarandon",1942
- 193,"Demi Moore",1962
- 197,"Jack Nicholson",1937
- 200,"Bill Paxton",1955
- 398,"Sally Field",1946
- 420,"Valeria Golino",1965
- 596520,"Gerald R. Molen",1935
- 641,"Gary Sinise",1955
- 705,"Robin Wright",1966
- 914612,"Emma Watson",1990
- stars.csv
- person_id,movie_id
- 102,104257
- 102,112384
- 129,104257
- 129,95953
- 144,93779
- 158,109830
- 158,112384
- 1597,93779
- 163,95953
- 1697,93779
- 193,104257
- 197,104257
- 200,112384
- 398,109830
- 420,95953
- 596520,95953
- 641,109830
- 641,112384
- 705,109830
- 705,93779
- '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement