Advertisement
Guest User

Untitled

a guest
Apr 22nd, 2025
195
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.57 KB | None | 0 0
  1. import csv
  2. import sys
  3.  
  4. from util import Node, StackFrontier, QueueFrontier
  5.  
  6. # Maps names to a set of corresponding person_ids
  7. names = {}
  8.  
  9. # Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
  10. people = {}
  11.  
  12. # Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
  13. movies = {}
  14.  
  15.  
  16. def load_data(directory):
  17.     """
  18.    Load data from CSV files into memory.
  19.    """
  20.     # Load people
  21.     with open(f"{directory}/people.csv", encoding="utf-8") as f:
  22.         reader = csv.DictReader(f)
  23.         for row in reader:
  24.             people[row["id"]] = {
  25.                 "name": row["name"],
  26.                 "birth": row["birth"],
  27.                 "movies": set()
  28.             }
  29.             if row["name"].lower() not in names:
  30.                 names[row["name"].lower()] = {row["id"]}
  31.             else:
  32.                 names[row["name"].lower()].add(row["id"])
  33.  
  34.     # Load movies
  35.     with open(f"{directory}/movies.csv", encoding="utf-8") as f:
  36.         reader = csv.DictReader(f)
  37.         for row in reader:
  38.             movies[row["id"]] = {
  39.                 "title": row["title"],
  40.                 "year": row["year"],
  41.                 "stars": set()
  42.             }
  43.  
  44.     # Load stars
  45.     with open(f"{directory}/stars.csv", encoding="utf-8") as f:
  46.         reader = csv.DictReader(f)
  47.         for row in reader:
  48.             try:
  49.                 people[row["person_id"]]["movies"].add(row["movie_id"])
  50.                 movies[row["movie_id"]]["stars"].add(row["person_id"])
  51.             except KeyError:
  52.                 pass
  53.  
  54.  
  55. def main():
  56.     if len(sys.argv) > 2:
  57.         sys.exit("Usage: python degrees.py [directory]")
  58.     directory = sys.argv[1] if len(sys.argv) == 2 else "large"
  59.  
  60.     # Load data from files into memory
  61.     print("Loading data...")
  62.     load_data(directory)
  63.     print("Data loaded.")
  64.  
  65.     source = person_id_for_name(input("Name: "))
  66.     if source is None:
  67.         sys.exit("Person not found.")
  68.     target = person_id_for_name(input("Name: "))
  69.     if target is None:
  70.         sys.exit("Person not found.")
  71.  
  72.     path = shortest_path(source, target)
  73.  
  74.     if path is None:
  75.         print("Not connected.")
  76.     else:
  77.         degrees = len(path)
  78.         print(f"{degrees} degrees of separation.")
  79.         path = [(None, source)] + path
  80.         for i in range(degrees):
  81.             person1 = people[path[i][1]]["name"]
  82.             person2 = people[path[i + 1][1]]["name"]
  83.             movie = movies[path[i + 1][0]]["title"]
  84.             print(f"{i + 1}: {person1} and {person2} starred in {movie}")
  85.  
  86.  
  87. def shortest_path(source, target):
  88.     """
  89.    Returns the shortest list of (movie_id, person_id) pairs
  90.    that connect the source to the target.
  91.  
  92.    If no possible path, returns None.
  93.    """
  94.     if source == target: # return empty list if source and target is same as degree will be 0
  95.         return []
  96.     visited = []
  97.     q = QueueFrontier()
  98.     q.add(Node(source, None, None))
  99.     flag = False
  100.  
  101.     # 3, 2, 4, 7, 5, 1, 9, 8...
  102.     a, b, l, f = 0, 0, [], True # tracking node count till 7 for 6 edges(connections) degree theory
  103.     while not q.empty(): # Adds nodes to visited list till you find the target
  104.         r = q.remove()
  105.         visited.append(r)
  106.         n = neighbors_for_person(r.state)
  107.         l.append(len(n))
  108.         if f: # runs only once as an initialization code
  109.             b = len(n)
  110.             l = []
  111.             f = False
  112.         if b == 0:
  113.             a += 1
  114.             b = sum(l)
  115.             l = []
  116.         if a > 6: # complying with the theory, assumes no connection if degree of seperation more than 6
  117.             return None
  118.         for i in n:
  119.             if i not in visited and i[1] != r.state:
  120.                 b -= 1
  121.                 q.add(Node(i[1], r.state, i[0]))
  122.                 if i[1] == target:
  123.                     b = 0
  124.                     flag = True
  125.                     break
  126.         if flag:
  127.             break
  128.    
  129.     # print(source, target)
  130.     # for i in visited:
  131.     #     print(i.state, i.parent, i.action)
  132.     t = visited.pop()
  133.     visited.pop(0)
  134.     if len(visited) == 0:
  135.         return [(t.action, t.state)]
  136.     i = 0
  137.     v = [t]
  138.     while True:
  139.         if t.parent == visited[i].state:
  140.             v.append(visited[i])
  141.             if visited[i].parent == source:
  142.                 break
  143.             else:
  144.                 t = visited[i]
  145.         i += 1
  146.  
  147.     return [(i.action, i.state) for i in reversed(v)] # returns in correct order
  148.  
  149.  
  150. def person_id_for_name(name):
  151.     """
  152.    Returns the IMDB id for a person's name,
  153.    resolving ambiguities as needed.
  154.    """
  155.     person_ids = list(names.get(name.lower(), set()))
  156.     if len(person_ids) == 0:
  157.         return None
  158.     elif len(person_ids) > 1:
  159.         print(f"Which '{name}'?")
  160.         for person_id in person_ids:
  161.             person = people[person_id]
  162.             name = person["name"]
  163.             birth = person["birth"]
  164.             print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
  165.         try:
  166.             person_id = input("Intended Person ID: ")
  167.             if person_id in person_ids:
  168.                 return person_id
  169.         except ValueError:
  170.             pass
  171.         return None
  172.     else:
  173.         return person_ids[0]
  174.  
  175.  
  176. def neighbors_for_person(person_id):
  177.     """
  178.    Returns (movie_id, person_id) pairs for people
  179.    who starred with a given person.
  180.    """
  181.     movie_ids = people[person_id]["movies"]
  182.     neighbors = set()
  183.     for movie_id in movie_ids:
  184.         for person_id in movies[movie_id]["stars"]:
  185.             neighbors.add((movie_id, person_id))
  186.     return neighbors
  187.  
  188.  
  189. if __name__ == "__main__":
  190.     main()
  191.     #print(names, people, movies, sep='\n\n\n')
  192.    
  193. # sample data used:
  194. '''
  195. movies.csv
  196. id,title,year
  197. 112384,"Apollo 13",1995
  198. 104257,"A Few Good Men",1992
  199. 109830,"Forrest Gump",1994
  200. 93779,"The Princess Bride",1987
  201. 95953,"Rain Man",1988
  202.  
  203. people.csv
  204. id,name,birth
  205. 102,"Kevin Bacon",1958
  206. 129,"Tom Cruise",1962
  207. 144,"Cary Elwes",1962
  208. 158,"Tom Hanks",1956
  209. 1597,"Mandy Patinkin",1952
  210. 163,"Dustin Hoffman",1937
  211. 1697,"Chris Sarandon",1942
  212. 193,"Demi Moore",1962
  213. 197,"Jack Nicholson",1937
  214. 200,"Bill Paxton",1955
  215. 398,"Sally Field",1946
  216. 420,"Valeria Golino",1965
  217. 596520,"Gerald R. Molen",1935
  218. 641,"Gary Sinise",1955
  219. 705,"Robin Wright",1966
  220. 914612,"Emma Watson",1990
  221.  
  222. stars.csv
  223. person_id,movie_id
  224. 102,104257
  225. 102,112384
  226. 129,104257
  227. 129,95953
  228. 144,93779
  229. 158,109830
  230. 158,112384
  231. 1597,93779
  232. 163,95953
  233. 1697,93779
  234. 193,104257
  235. 197,104257
  236. 200,112384
  237. 398,109830
  238. 420,95953
  239. 596520,95953
  240. 641,109830
  241. 641,112384
  242. 705,109830
  243. 705,93779
  244. '''
  245.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement