Guest User

RegexCrosswordSolver.py

a guest
Jan 31st, 2016
135
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.07 KB | None | 0 0
  1. from selenium import webdriver
  2. from bs4 import BeautifulSoup
  3. from bs4.element import Tag
  4. import itertools
  5. import re
  6.  
  7. # Python solver for RegexCrossword
  8. # https://regexcrossword.com
  9. class Solver:
  10.    
  11.     # Default constructor
  12.     def __init__(self, puzzleURL):
  13.    
  14.         # Valid character set for string testing against puzzle regex
  15.         self.characterSet = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'
  16.  
  17.         # Create a browser instance and scrape the puzzle from the given URL
  18.         browser = webdriver.PhantomJS()
  19.         browser.get(puzzleURL)
  20.         pageSoup = BeautifulSoup(browser.page_source, "html.parser")
  21.        
  22.         # Find our vertical and horizontal clues and store them in lists
  23.         vertClueSoup = pageSoup.find_all(re.compile('th'), class_ = "clue")
  24.         horizClueSoup = pageSoup.find_all(re.compile('div'), class_ = "clue")
  25.  
  26.         # Parse strings for horiz and vertical
  27.         self.horizClues = [ tag.contents[0] for soup in horizClueSoup for tag in soup.contents if type(tag) == Tag and len(tag.contents) > 0 ]
  28.         self.vertClues = [ tag.contents[0] for soup in vertClueSoup for tag in soup.contents if type(tag) == Tag and len(tag.contents) > 0 ]
  29.  
  30.     # Method to solve an entire puzzle
  31.     def solve(self):
  32.    
  33.         # For every square in the grid
  34.         for y, vc in enumerate(self.vertClues):
  35.             for x, hc in enumerate(self.horizClues):
  36.                 print(self.getVal(x, y))
  37.  
  38.     # Method to solve the character for a x,y location
  39.     def getVal(self, x, y):
  40.        
  41.         # Create a new generator for all possible string combinations at length
  42.         verticalPerms = itertools.permutations(list(self.characterSet), len(self.vertClues))
  43.         horizontalPerms = itertools.permutations(list(self.characterSet), len(self.horizClues))
  44.        
  45.         # Compile the regex for this square
  46.         hre = re.compile('^' + self.horizClues[x] + '$')
  47.         vre = re.compile('^' + self.vertClues[y] + '$')
  48.  
  49.         # Make a copy of the generators so we can iterate over them multiple times
  50.         hp, hp_b = itertools.tee(horizontalPerms)
  51.         vp, _ = itertools.tee(verticalPerms)    
  52.                
  53.         # For every possible combination of strings in vertical and horizontal perms
  54.         for vstr in vp:
  55.             hp, hp_b = itertools.tee(hp)
  56.             for hstr in hp_b:
  57.            
  58.                 # If we find an intersection of strings that match the regex, return the hstr[x] for that string as the answer
  59.                 if (list(hstr)[x] == list(vstr)[y] and hre.match("".join(list(hstr))) and vre.match("".join(list(vstr)))):
  60.                     retVal = "Solution (" + str(x) + "," + str(y) + ") = " + str(list(hstr)[x])
  61.                     return retVal
  62.                    
  63.         # This should never happen for a valid puzzle, no intersection found
  64.         return "Solution (" + str(x) + "," + str(y) + ") Not found"
  65.  
  66. # Create a solver and solve
  67. s = Solver('https://regexcrossword.com/challenges/beginner/puzzles/1')
  68. s.solve()
Advertisement
Add Comment
Please, Sign In to add comment