Advertisement
cmiN

oaf

May 2nd, 2012
272
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 12.58 KB | None | 0 0
  1. #! /usr/bin/env python
  2. # Oracle Answer Finder
  3. # 02.05.2012 cmiN
  4.  
  5.  
  6. from Tkinter import *
  7. from urllib2 import build_opener, HTTPError, URLError
  8. from urlparse import urlparse
  9. from threading import Thread, Event
  10. from socket import setdefaulttimeout, timeout
  11.  
  12.  
  13. # constants
  14. SMAX = 8 # maximum number of results
  15. DIFF = 512 # how many chars to skip until the answer
  16. TOUT = 2 # timeout in seconds
  17.  
  18.  
  19. class GUI(Frame):
  20.  
  21.     def __init__(self, master=None, margin=10):
  22.         Frame.__init__(self, master)
  23.         self.app = Engine() # main app
  24.         self.thread = None # Thread object used for parallel processing
  25.         self.master.title("OAF - cmiN")
  26.         self.grid(padx=margin, pady=margin)
  27.         self.widgets()
  28.         self.mainloop()
  29.  
  30.     def callback(self, event):
  31.         """What happens after you press Enter in Question's entry."""
  32.         # get and edit question
  33.         qon = self.app.edit_qon(self.qEntry.get())
  34.         if self.thread: # if it was initialized before
  35.             # check question
  36.             if qon == self.app.qon[0]: # same question (no reset)
  37.                 if self.thread.is_alive(): # let it finish
  38.                     return
  39.                 else:
  40.                     self.app.data = "" # empty buffer
  41.                     self.app.dataLen = 0
  42.             else: # another
  43.                 if self.thread.is_alive():
  44.                     self.app.stop()
  45.                     self.thread.join()
  46.                 self.app.reset() # reset everything
  47.         # set question
  48.         self.app.set_qon(qon)
  49.         # process data
  50.         self.thread = Thread(target=self.app.process)
  51.         self.thread.start() # no return
  52.  
  53.     def widgets(self):
  54.         # variables
  55.         self.statusVar = StringVar(value="Ready.") # passed to Thread class for live updating
  56.         #self.ansVar = StringVar() # same thing, but for answer
  57.         self.app.statusVar = self.statusVar
  58.         #self.app.ansVar = self.ansVar
  59.         # label-entry pairs
  60.         self.qLabel = Label(self, text="Question:")
  61.         self.qEntry = Entry(self, width=100)
  62.         self.aLabel = Label(self, text="Answer:")
  63.         #self.aEntry = Entry(self, width=100, textvariable=self.ansVar)
  64.         self.qLabel.grid(row=0, column=0, sticky="w")
  65.         self.qEntry.grid(row=1, column=0)
  66.         self.aLabel.grid(row=2, column=0, sticky="w")
  67.         #self.aEntry.grid(row=3, column=0)
  68.         # status widget
  69.         self.sLabel1 = Label(self, text="Status:")
  70.         self.sLabel2 = Label(self, width=86, textvariable=self.statusVar,
  71.                              relief="sunken", bd=2, anchor="c")
  72.         self.sLabel1.grid(row=4, column=0, sticky="w")
  73.         self.sLabel2.grid(row=5, column=0, sticky="w")
  74.         # text instead entry for answer
  75.         self.ansText = Text(self, width=75, height=10)
  76.         self.ansText.grid(row=3, column=0)
  77.         self.app.ansText = self.ansText # make it available to the engine
  78.         # behavior
  79.         self.qEntry.bind("<Return>", self.callback)
  80.         self.qEntry.bind("<KP_Enter>", self.callback)
  81.  
  82.  
  83. class Engine:
  84.  
  85.     def __init__(self):
  86.         setdefaulttimeout(TOUT)
  87.         self.opener = build_opener()
  88.         self.opener.addheaders = [("User-agent", "Mozilla/5.0")]
  89.         self.qon = ["", ""] # question
  90.         self.data = "" # data contain the question [and answer]
  91.         self.dataLen = 0 # data length
  92.         self.start = 0 # google first result
  93.         self.first = "url?q=" # from
  94.         self.second = "&amp" # to
  95.         self.seen = set() # evidence of visited links
  96.         self.statusVar = None # through this set status
  97.         self.ansVar = None # through this set answer if available
  98.         self.ansText = None # almost same shit
  99.         self.__stop = Event() # stop the process
  100.         self.upEvent = Event() # solve the deadlock
  101.         self.upEvent.set() # means it's safe (.wait will wait until .set is called or .is_set() is True)
  102.  
  103.     def update(self, status, answer):
  104.         """Here is a fucking deadlock, when the same function is called simultaneously."""
  105.         self.upEvent.wait() # wait to be setted
  106.         self.upEvent.clear() # make it busy
  107.         # do your ugly things
  108.         if hasattr(self.statusVar, "set") and hasattr(self.statusVar, "get"):
  109.             self.statusVar.set(status)
  110.             self.statusVar.get() # just to make sure the update finished
  111.         if hasattr(self.ansVar, "set") and hasattr(self.ansVar, "get"):
  112.             self.ansVar.set(answer)
  113.             self.ansVar.get()
  114.         if hasattr(self.ansText, "insert") and hasattr(self.ansText, "delete"):
  115.             self.ansText.delete(0.0, END)
  116.             self.ansText.insert(0.0, answer)
  117.         self.upEvent.set() # make it available
  118.  
  119.     def stop(self):
  120.         self.update("Stopping...", "")
  121.         self.__stop.set()
  122.  
  123.     def stopped(self):
  124.         return self.__stop.is_set()
  125.  
  126.     def reset(self):
  127.         self.__stop.clear()
  128.         self.start = 0
  129.         self.seen = set()
  130.         #self.update("Ready.", "")
  131.  
  132.     def edit_qon(self, qon):
  133.         return qon.strip().split("\n")[0] # idiot proof
  134.  
  135.     def set_qon(self, qon):
  136.         """Set question."""
  137.         self.qon[0] = qon
  138.         self.qon[1] = '"' + self.qon[0].replace(" ", "+") + '"'
  139.  
  140.     def find(self):
  141.         """Find links corresponding to query using google.
  142.  
  143.        Returns:
  144.            0 -> match, link extracted, data updated
  145.            1 -> no match for exact string
  146.            2 -> url already visited or invalid url
  147.            3 -> banned
  148.            4 -> maximum number of results exceeded
  149.        """
  150.         if self.start >= SMAX:
  151.             return 4
  152.         link = "http://www.google.com/search?q={}&start={}".format(self.qon[1], self.start)
  153.         try:
  154.             fobj = self.opener.open(link)
  155.         except HTTPError:
  156.             self.update("Google banned you.", "")
  157.             return 3
  158.         except timeout:
  159.             self.update("Timed out or Google banned you.", "")
  160.             return 3
  161.         else:
  162.             data = fobj.read() # google's source
  163.             fobj.close()
  164.         # find a relevant closest position to the link
  165.         index1 = data.find(self.first)
  166.         if index1 == -1: # no results in page or modified pattern
  167.             return 1 # invalid source
  168.         self.start += 1 # now do the increment
  169.         index1 += len(self.first)
  170.         index2 = data.find(self.second, index1)
  171.         url = data[index1:index2]
  172.         # edit url
  173.         newurl = ""
  174.         i = 0
  175.         length = len(url)
  176.         while i < length:
  177.             if url[i] == "%":
  178.                 char = chr(int(url[i + 1] + url[i + 2], 16))
  179.                 i += 2
  180.             else:
  181.                 char = url[i]
  182.             newurl += char
  183.             i += 1
  184.         url = newurl
  185.         # process it
  186.         if url in self.seen: # link already visited
  187.             return 2
  188.         self.seen.add(url)
  189.         upo = urlparse(url)
  190.         self.update("Looking in %s..." % upo.netloc, "")
  191.         try:
  192.             fobj = self.opener.open(url)
  193.         except URLError:
  194.             self.update("Invalid link.", "")
  195.             return 2
  196.         except timeout:
  197.             self.update("Timed out.", "")
  198.             return 3
  199.         else:
  200.             self.data = fobj.read()
  201.             self.dataLen = len(self.data)
  202.             fobj.close()
  203.         return 0 # all fine
  204.  
  205.     def check(self, old, index):
  206.         if index < 0 or index >= self.dataLen:
  207.             return False # invalid index
  208.         if abs(old - index) > DIFF:
  209.             return False # too far
  210.         return True # ok
  211.  
  212.     def get_star(self, index):
  213.         """Find the line with (*).
  214.  
  215.        Returns:
  216.            str -> good answer
  217.            False -> invalid answer or couldn't find
  218.        """
  219.         ansNr = 1 # default answers (for multiple ones)
  220.         chunk = "(Choose "
  221.         firstTag = self.data.find("<", index)
  222.         chunkIndex = self.data.find(chunk, index, firstTag)
  223.         if chunkIndex != -1:
  224.             chunkIndex += len(chunk)
  225.             number = ""
  226.             while self.data[chunkIndex] != ")":
  227.                 number += self.data[chunkIndex]
  228.                 chunkIndex += 1
  229.             number = number.strip().lower()
  230.             if number == "two":
  231.                 ansNr = 2
  232.             elif number == "three":
  233.                 ansNr = 3
  234.             elif number == "four":
  235.                 ansNr = 4 # i don't think this actually exists :)
  236.         star = "(*)"
  237.         last = index
  238.         ans = ""
  239.         while ansNr:
  240.             index = self.data.find(star, last) - 1
  241.             last = index + 1 + len(star)
  242.             if index < 0:
  243.                 return False # invalid answer type
  244.             # ok now we're good
  245.             old = index
  246.             while True:
  247.                 tag = True
  248.                 while self.data[index] != ">":
  249.                     if tag and self.data[index].isspace():
  250.                         index -= 1
  251.                         continue
  252.                     if tag:
  253.                         ans = "\n" + ans
  254.                         tag = False # found alphanumeric
  255.                     ans = self.data[index] + ans
  256.                     index -= 1
  257.                     if not self.check(old, index):
  258.                         return False
  259.                 ans = ans.strip()
  260.                 if tag and len(ans) > 0:
  261.                     break
  262.                 while self.data[index] != "<":
  263.                     index -= 1
  264.                 index -= 1
  265.             ansNr -= 1
  266.             ans = "\n" + ans
  267.         return ans.strip()
  268.  
  269.     def get_single(self, index):
  270.         """Line with single answer.
  271.  
  272.        Returns:
  273.            str -> good answer
  274.            False -> invalid answer or couldn't find
  275.        """
  276.         new = True # first answer (if multiple)
  277.         ans = ""
  278.         smooth = 1 # tag number difference
  279.         while True:
  280.             old = index
  281.             # go to the first tag
  282.             while self.data[index] != "<":
  283.                 index += 1
  284.                 if not self.check(old, index):
  285.                     return False
  286.             # no skip all of them
  287.             nrTag = 0
  288.             while self.data[index] == "<":
  289.                 while self.data[index] != ">":
  290.                     index += 1
  291.                 index += 1
  292.                 nrTag += 1 # add tag
  293.                 if not self.check(old, index):
  294.                     return False
  295.             if new:
  296.                 new = False # not new anymore
  297.                 model = nrTag
  298.             if abs(nrTag - model) > smooth:
  299.                 break # no incoming answers to this question
  300.             while self.data[index] != "<":
  301.                 ans += self.data[index]
  302.                 index += 1
  303.                 if not self.check(old, index):
  304.                     return False
  305.             # add separator (don't worry, we will strip it at the end)
  306.             ans = ans.strip() + "\n\n"
  307.         ans = ans.strip()
  308.         if ans == "":
  309.             return False # empty one
  310.         return ans
  311.  
  312.     def process(self):
  313.         """Try to find answers to quiz questions by searching them on google.
  314.  
  315.        Format string, search it on google, locate first %d results,
  316.        then search among them for patterns (question<>...<>answer<> or question<>answer...(*)<>).
  317.  
  318.        Returns:
  319.            str -> answer found (or false positive)
  320.              1 -> invalid/inexistent question (or google invalid source pattern)
  321.              2 -> answer not found (or different search pattern)
  322.              3 -> stopped
  323.              4 -> banned
  324.        """ % SMAX
  325.         ret = 0 # virtual return
  326.         while True:
  327.             if self.stopped():
  328.                 return 3 # stopped
  329.             if ret == 1:
  330.                 self.update("Invalid question.", "")
  331.                 return 1 # invalid question
  332.             elif ret == 2: # already seen or invalid
  333.                 ret = self.find()
  334.                 continue
  335.             elif ret == 3:
  336.                 return 4 # timed out or banned (too many queries)
  337.             elif ret == 4:
  338.                 self.update("Nothing found.", "")
  339.                 return 2 # not found
  340.             index = self.data.find(self.qon[0])
  341.             if index >= 0:
  342.                 #index += len(self.qon[0]) # conflict with star (need some data from question)
  343.                 ans = self.get_star(index)
  344.                 if not ans:
  345.                     ans = self.get_single(index)
  346.                 if ans:
  347.                     self.update("Answer found!", ans)
  348.                     return ans # string ok
  349.             ret = self.find()
  350.  
  351.  
  352. if __name__ == "__main__":
  353.     GUI(Tk())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement