# Triple Gold Star # Only A Little Lucky # The Feeling Lucky question (from the regular homework) assumed it was enough # to find the best-ranked page for a given query. For most queries, though, we # don't just want the best page (according to the page ranking algorithm), we # want a list of many pages that match the query, ordered from the most likely # to be useful to the least likely. # Your goal for this question is to define a procedure, ordered_search(index, # ranks, keyword), that takes the same inputs as lucky_search from Question 5, # but returns an ordered list of all the URLs that match the query. # To order the pages, use the quicksort algorithm, invented by Sir Tony Hoare in # 1959. Quicksort provides a way to sort any list of data, using an expected # number of comparisons that scales as n log n where n is the number of elements # in the list. # The idea of quicksort is quite simple: # If the list has zero or one elements, it is already sorted. # Otherwise, pick a pivot element, and split the list into two partitions: one # contains all the elements equal to or lower than the value of the pivot # element, and the other contains all the elements that are greater than the # pivot element. Recursively sort each of the sub-lists, and then return the # result of concatenating the sorted left sub-list, the pivot element, and the # sorted right sub-list. # For simplicity, use the first element in the list as your pivot element (this # is not usually a good choice, since it means if the input list is already # nearly sorted, the actual work will be much worse than expected). def ordered_search(index, ranks, keyword): possible_results = lookup(index, keyword) return quicksort(possible_results, ranks) def quicksort(pages, ranks): if not pages or len(pages) <= 1: return pages else: pivot = ranks[pages[0]] worse = [] better = [] for page in pages[1:]: if ranks[page] <= pivot: worse.append(page) else: better.append(page) return quicksort(better, ranks) + [pages[0]] + quicksort(worse, ranks) cache = { 'http://udacity.com/cs101x/urank/index.html': """

Dave's Cooking Algorithms

Here are my favorite recipies:

For more expert opinions, check out the Nickel Chef and Zinc Chef. """, 'http://udacity.com/cs101x/urank/zinc.html': """

The Zinc Chef

I learned everything I know from the Nickel Chef.

For great hummus, try this recipe. """, 'http://udacity.com/cs101x/urank/nickel.html': """

The Nickel Chef

This is the best Hummus recipe! """, 'http://udacity.com/cs101x/urank/kathleen.html': """

Kathleen's Hummus Recipe

  1. Open a can of garbonzo beans.
  2. Crush them in a blender.
  3. Add 3 tablesppons of tahini sauce.
  4. Squeeze in one lemon.
  5. Add salt, pepper, and buttercream frosting to taste.
""", 'http://udacity.com/cs101x/urank/arsenic.html': """

The Arsenic Chef's World Famous Hummus Recipe

  1. Kidnap the Nickel Chef.
  2. Force her to make hummus for you.
""", 'http://udacity.com/cs101x/urank/hummus.html': """

Hummus Recipe

  1. Go to the store and buy a container of hummus.
  2. Open it.
""", } def get_page(url): if url in cache: return cache[url] return "" def get_next_target(page): start_link = page.find(', [list of pages it links to] index = {} while tocrawl: page = tocrawl.pop() if page not in crawled: content = get_page(page) add_page_to_index(index, page, content) outlinks = get_all_links(content) graph[page] = outlinks union(tocrawl, outlinks) crawled.append(page) return index, graph def compute_ranks(graph): d = 0.8 # damping factor numloops = 10 ranks = {} npages = len(graph) for page in graph: ranks[page] = 1.0 / npages for i in range(0, numloops): newranks = {} for page in graph: newrank = (1 - d) / npages for node in graph: if page in graph[node]: newrank = newrank + d * (ranks[node] / len(graph[node])) newranks[page] = newrank ranks = newranks return ranks # Here are some example showing what ordered_search should do: # Observe that the result list is sorted so the highest-ranking site is at the # beginning of the list. # Note: the intent of this question is for students to write their own sorting # code, not to use the built-in sort procedure. index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) print ordered_search(index, ranks, 'Hummus') #>>> ['http://udacity.com/cs101x/urank/kathleen.html', # 'http://udacity.com/cs101x/urank/nickel.html', # 'http://udacity.com/cs101x/urank/arsenic.html', # 'http://udacity.com/cs101x/urank/hummus.html', # 'http://udacity.com/cs101x/urank/index.html'] print ordered_search(index, ranks, 'the') #>>> ['http://udacity.com/cs101x/urank/nickel.html', # 'http://udacity.com/cs101x/urank/arsenic.html', # 'http://udacity.com/cs101x/urank/hummus.html', # 'http://udacity.com/cs101x/urank/index.html'] print ordered_search(index, ranks, 'babaganoush') #>>> None