Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import getpass
- import mysql.connector
- class WordNotFoundError(Exception):
- def __init__(self, word):
- self.word = word
- def __str__(self):
- return self.word + " was not found."
- class PageIdNotFoundError(Exception):
- def __init__(self, pid):
- self.pid = pid
- def __str(self):
- return self.pid + " was not found."
- class LinkNotFoundError(Exception):
- def __init__(self, msg):
- self.msg = msg
- def __str__(self):
- return self.msg
- def get_pageid(c, w):
- c.execute("SELECT page_id FROM page WHERE page_namespace=0 AND page_title=%s", (w,))
- result = c.fetchone()
- if result is None:
- raise WordNotFoundError(w)
- return result[0]
- def get_title(c, pid):
- c.execute("SELECT page_title FROM page WHERE page_id=%s", (pid,))
- result = c.fetchone()
- if result is None:
- raise PageIdNotFoundError(pid)
- return result[0].decode("utf-8")
- def get_linkfrom(c, w):
- c.execute("SELECT pl_from FROM pagelinks WHERE pl_from_namespace=0 AND pl_namespace=0 AND pl_title=%s", (w,))
- result = c.fetchall()
- if result:
- return [t[0] for t in result]
- else:
- return []
- def wp_hops(c, w_from, w_to):
- # Raise WordNotFoundError if input w_from, w_to are not in Wikipedia.
- w_to_pid = get_pageid(c, w_to)
- target = get_pageid(c, w_from)
- title_list = [w_to]
- links = {}
- pids = set()
- n_link = 0 # for debug purpose.
- while 1:
- next_title_list = []
- for title in title_list:
- print(n_link, title)
- linkfrom = get_linkfrom(c, title)
- if target in linkfrom:
- result = [w_from, title]
- t = title
- while t != w_to:
- t = links[t]
- result.append(t)
- return result
- for lf in linkfrom:
- if lf not in pids:
- try:
- t = get_title(c, lf)
- except PageIdNotFoundError:
- pass
- else:
- links[t] = title
- pids.add(lf)
- next_title_list.append(t)
- title_list = next_title_list
- n_link += 1
- if __name__ == "__main__":
- if len(sys.argv) == 4:
- user = sys.argv[1]
- pw = getpass.getpass()
- w_from = sys.argv[2]
- w_to = sys.argv[3]
- elif len(sys.argv) == 5:
- user = sys.argv[1]
- pw = sys.argv[2]
- w_from = sys.argv[3]
- w_to = sys.argv[4]
- conn = mysql.connector.Connect(user=user, password=pw, db="jawiki", charset="utf8")
- c = conn.cursor()
- print(wp_hops(c, w_from, w_to))
- c.close()
- conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement