Advertisement
Fecka

Untitled

Dec 28th, 2017
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.10 KB | None | 0 0
  1. from datetime import datetime
  2. import multiprocessing, sys, json, threading, math
  3.  
  4. def filterLists():
  5.     with open('lists/words_alpha.txt') as alphaListFile:
  6.         words = alphaListFile.readlines()
  7.  
  8.     words = [x.strip() for x in words]
  9.  
  10.     neededWords = []
  11.  
  12.     for word in words:
  13.         wordNeeded = True
  14.         for char in word:
  15.             if letters.find(char.upper()) == -1:
  16.                 wordNeeded = False
  17.                 break
  18.         if wordNeeded == True and buildWord(letters,word) != False:
  19.             neededWords.append(word)
  20.  
  21.     print('Found ' + str(len(neededWords)) + ' words.')
  22.  
  23.     with open('lists/filteredWords.json', 'w+') as filteredWordsJSON:
  24.         json.dump(neededWords, filteredWordsJSON)
  25.  
  26.     with open('lists/filteredWords', 'w+') as filteredWordsFile:
  27.         for word in neededWords:
  28.             filteredWordsFile.write(word + '\n')
  29.         filteredWordsFile.close()
  30.     return
  31.  
  32. def buildWord(availableLetters, word):
  33.     for wordChar in word:
  34.         wordChar = wordChar.upper()
  35.         if availableLetters.find(wordChar) == -1:
  36.             return False
  37.         else:
  38.             availableLetters = availableLetters.replace(wordChar, '', 1)
  39.     return availableLetters
  40.  
  41. def buildSentence(sentences, words, availableLetters, wordIndex):
  42.     if words is None:
  43.         words = []
  44.     else:
  45.         words = list(words)
  46.  
  47.     sys.stdout.write('\r                                          ')
  48.     sys.stdout.write('\rWorking with: ' + ' '.join(words))
  49.     sys.stdout.flush()
  50.  
  51.     word = filteredWords[wordIndex]
  52.  
  53.     availableLetters = buildWord(availableLetters, word)
  54.     if availableLetters != False:
  55.         words.append(word)
  56.     else:
  57.         return
  58.  
  59.     if len(words) > maxWords:
  60.         return
  61.     elif len(availableLetters) == 0:
  62.         rawSentenceFile.write(' '.join(words) + '\n')
  63.         rawSentenceFile.flush()
  64.         sentences.append(' '.join(words))
  65.         return
  66.  
  67.     for iTpl in enumerate(filteredWords):
  68.         buildSentence(sentences,words,availableLetters,iTpl[0])
  69.  
  70. def createSentenceList(wordRange, send_end):
  71.     sentences = []
  72.  
  73.     for i in range(wordRange[0], wordRange[1]):
  74.         logger.increment()
  75.         buildSentence(sentences, None, letters, i)
  76.  
  77.     send_end.send(sentences)
  78.     send_end.close()
  79.  
  80. def processData():
  81.     sentences = []
  82.     jobs = []
  83.     pipes = []
  84.  
  85.     listLength = int(math.ceil(len(filteredWords) / float(maxWords)))
  86.     section = int(listLength / 4)
  87.  
  88.     for i in range(1, multiprocessing.cpu_count()+1):
  89.         recv_end, send_end = multiprocessing.Pipe(False)
  90.         startRange = (i-1)*section
  91.         endRange = section*i-1
  92.         if i == multiprocessing.cpu_count():
  93.             endRange = listLength
  94.         process = multiprocessing.Process(target=createSentenceList, args=((startRange, endRange), send_end))
  95.         jobs.append(process)
  96.         pipes.append(recv_end)
  97.         print 'Starting process #' + str(i) + ' for words from: ' + str(startRange) + ' to: ' + str(endRange)
  98.         process.start()
  99.  
  100.     logger.log()
  101.  
  102.     for pipe in pipes:
  103.         sentences.extend(pipe.recv())
  104.  
  105.     for job in jobs:
  106.         job.join()
  107.  
  108.     return sentences
  109.  
  110. class Logger(object):
  111.     def __init__(self, initval=0, wordCount=0):
  112.         self.avgList = []
  113.         self.val = multiprocessing.Value('i', initval)
  114.         self.wordCount = int(math.ceil(wordCount / float(maxWords)))
  115.         self.lock = multiprocessing.Lock()
  116.         self.e_stop = threading.Event()
  117.  
  118.     def increment(self):
  119.         with self.lock:
  120.             self.val.value += 1
  121.  
  122.     def reset(self):
  123.         with self.lock:
  124.             self.val.value = 0
  125.  
  126.     def getValue(self):
  127.         with self.lock:
  128.             return self.val.value
  129.  
  130.     def getWordCount(self):
  131.         return self.wordCount
  132.  
  133.     def addValToAvg(self,val):
  134.         if len(self.avgList) >= 10000:
  135.             self.avgList.pop(0)
  136.         self.avgList.append(val)
  137.  
  138.     def reduceWordCount(self,val):
  139.         self.wordCount = self.wordCount - val
  140.  
  141.     def calcAverage(self):
  142.         if len(self.avgList) == 0:
  143.             return 0
  144.         return sum(self.avgList) / float(len(self.avgList))
  145.  
  146.     def getEstimate(self):
  147.         avg = self.calcAverage()
  148.         if avg == 0:
  149.             return 0
  150.         return int((self.getWordCount()/avg)/60)
  151.  
  152.     def log(self):
  153.         self.addValToAvg(self.getValue())
  154.         self.reduceWordCount(self.getValue())
  155.         sys.stdout.write('\r\033[K                                           ')
  156.         sys.stdout.write('Processing around ' + '%.2f' % self.calcAverage() + ' words/second. ')
  157.         sys.stdout.write('Words remaining: ' + str(self.getWordCount()) + ' words. ')
  158.         if self.getEstimate() == 0:
  159.             sys.stdout.write('Estimated time remaining: probably an eternity.')
  160.         else:
  161.             sys.stdout.write('Estimated time remaining: ' + str(self.getEstimate()) + ' minutes.')
  162.         sys.stdout.flush()
  163.         self.reset()
  164.         if not self.e_stop.is_set():
  165.             t = threading.Timer(1, self.log)
  166.             t.daemon = True
  167.             t.start()
  168.  
  169.     def stop(self):
  170.         self.e_stop.set()
  171.  
  172. if __name__ == '__main__':
  173.     beginTime = datetime.now()
  174.     print 'Began processing at: ' + beginTime.strftime('%Y-%m-%d %H:%M:%S')
  175.  
  176.     letters = 'AMFLOIDYAMEULROLALCALHLT'
  177.     maxWords = 6
  178.  
  179.     rawSentenceFile = open('lists/rawSentences', 'w+')
  180.  
  181.     filterLists()
  182.     filteredWords = json.load(open('lists/filteredWords.json'))
  183.  
  184.     logger = Logger(0, len(filteredWords))
  185.  
  186.     sentenceList = processData()
  187.     with open('lists/sentences.json', 'w+') as sentencesFile:
  188.         json.dump(sentenceList, sentencesFile)
  189.  
  190.     rawSentenceFile.close()
  191.     logger.stop()
  192.  
  193.     endTime = datetime.now()
  194.     elapsedTime = endTime - beginTime
  195.     elapsedMinSec = divmod(elapsedTime.total_seconds(), 60)
  196.     print '\rDone processing data at:\033[K ' + endTime.strftime('%Y-%m-%d %H:%M:%S')
  197.     print 'Created a total of ' + str(len(sentenceList)) + ' sentences.'
  198.     print 'It took ' + str(int(elapsedMinSec[0])) + ' minutes ' + str(int(elapsedMinSec[1])) + ' seconds.'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement