Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from datetime import datetime
- import multiprocessing, sys, json, threading, math
- def filterLists():
- with open('lists/words_alpha.txt') as alphaListFile:
- words = alphaListFile.readlines()
- words = [x.strip() for x in words]
- neededWords = []
- for word in words:
- wordNeeded = True
- for char in word:
- if letters.find(char.upper()) == -1:
- wordNeeded = False
- break
- if wordNeeded == True and buildWord(letters,word) != False:
- neededWords.append(word)
- print('Found ' + str(len(neededWords)) + ' words.')
- with open('lists/filteredWords.json', 'w+') as filteredWordsJSON:
- json.dump(neededWords, filteredWordsJSON)
- with open('lists/filteredWords', 'w+') as filteredWordsFile:
- for word in neededWords:
- filteredWordsFile.write(word + '\n')
- filteredWordsFile.close()
- return
- def buildWord(availableLetters, word):
- for wordChar in word:
- wordChar = wordChar.upper()
- if availableLetters.find(wordChar) == -1:
- return False
- else:
- availableLetters = availableLetters.replace(wordChar, '', 1)
- return availableLetters
- def buildSentence(sentences, words, availableLetters, wordIndex):
- if words is None:
- words = []
- else:
- words = list(words)
- sys.stdout.write('\r ')
- sys.stdout.write('\rWorking with: ' + ' '.join(words))
- sys.stdout.flush()
- word = filteredWords[wordIndex]
- availableLetters = buildWord(availableLetters, word)
- if availableLetters != False:
- words.append(word)
- else:
- return
- if len(words) > maxWords:
- return
- elif len(availableLetters) == 0:
- rawSentenceFile.write(' '.join(words) + '\n')
- rawSentenceFile.flush()
- sentences.append(' '.join(words))
- return
- for iTpl in enumerate(filteredWords):
- buildSentence(sentences,words,availableLetters,iTpl[0])
- def createSentenceList(wordRange, send_end):
- sentences = []
- for i in range(wordRange[0], wordRange[1]):
- logger.increment()
- buildSentence(sentences, None, letters, i)
- send_end.send(sentences)
- send_end.close()
- def processData():
- sentences = []
- jobs = []
- pipes = []
- listLength = int(math.ceil(len(filteredWords) / float(maxWords)))
- section = int(listLength / 4)
- for i in range(1, multiprocessing.cpu_count()+1):
- recv_end, send_end = multiprocessing.Pipe(False)
- startRange = (i-1)*section
- endRange = section*i-1
- if i == multiprocessing.cpu_count():
- endRange = listLength
- process = multiprocessing.Process(target=createSentenceList, args=((startRange, endRange), send_end))
- jobs.append(process)
- pipes.append(recv_end)
- print 'Starting process #' + str(i) + ' for words from: ' + str(startRange) + ' to: ' + str(endRange)
- process.start()
- logger.log()
- for pipe in pipes:
- sentences.extend(pipe.recv())
- for job in jobs:
- job.join()
- return sentences
- class Logger(object):
- def __init__(self, initval=0, wordCount=0):
- self.avgList = []
- self.val = multiprocessing.Value('i', initval)
- self.wordCount = int(math.ceil(wordCount / float(maxWords)))
- self.lock = multiprocessing.Lock()
- self.e_stop = threading.Event()
- def increment(self):
- with self.lock:
- self.val.value += 1
- def reset(self):
- with self.lock:
- self.val.value = 0
- def getValue(self):
- with self.lock:
- return self.val.value
- def getWordCount(self):
- return self.wordCount
- def addValToAvg(self,val):
- if len(self.avgList) >= 10000:
- self.avgList.pop(0)
- self.avgList.append(val)
- def reduceWordCount(self,val):
- self.wordCount = self.wordCount - val
- def calcAverage(self):
- if len(self.avgList) == 0:
- return 0
- return sum(self.avgList) / float(len(self.avgList))
- def getEstimate(self):
- avg = self.calcAverage()
- if avg == 0:
- return 0
- return int((self.getWordCount()/avg)/60)
- def log(self):
- self.addValToAvg(self.getValue())
- self.reduceWordCount(self.getValue())
- sys.stdout.write('\r\033[K ')
- sys.stdout.write('Processing around ' + '%.2f' % self.calcAverage() + ' words/second. ')
- sys.stdout.write('Words remaining: ' + str(self.getWordCount()) + ' words. ')
- if self.getEstimate() == 0:
- sys.stdout.write('Estimated time remaining: probably an eternity.')
- else:
- sys.stdout.write('Estimated time remaining: ' + str(self.getEstimate()) + ' minutes.')
- sys.stdout.flush()
- self.reset()
- if not self.e_stop.is_set():
- t = threading.Timer(1, self.log)
- t.daemon = True
- t.start()
- def stop(self):
- self.e_stop.set()
- if __name__ == '__main__':
- beginTime = datetime.now()
- print 'Began processing at: ' + beginTime.strftime('%Y-%m-%d %H:%M:%S')
- letters = 'AMFLOIDYAMEULROLALCALHLT'
- maxWords = 6
- rawSentenceFile = open('lists/rawSentences', 'w+')
- filterLists()
- filteredWords = json.load(open('lists/filteredWords.json'))
- logger = Logger(0, len(filteredWords))
- sentenceList = processData()
- with open('lists/sentences.json', 'w+') as sentencesFile:
- json.dump(sentenceList, sentencesFile)
- rawSentenceFile.close()
- logger.stop()
- endTime = datetime.now()
- elapsedTime = endTime - beginTime
- elapsedMinSec = divmod(elapsedTime.total_seconds(), 60)
- print '\rDone processing data at:\033[K ' + endTime.strftime('%Y-%m-%d %H:%M:%S')
- print 'Created a total of ' + str(len(sentenceList)) + ' sentences.'
- print 'It took ' + str(int(elapsedMinSec[0])) + ' minutes ' + str(int(elapsedMinSec[1])) + ' seconds.'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement