Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- import re
- from collections import Counter
- lista_alteracoes={}
- flag=0
- def tp3():
- lista_palavras=[];
- pal=""
- while(pal!="FIMFIM"):
- pal=raw_input()
- lista_palavras.append(pal)
- for elem in lista_palavras:
- for item in lista_alteracoes:
- if elem in item:
- flag=1
- #propor como está
- #else trocar flag
- #print
- if flag==0:
- #else tem de ver no dicionário as correções e propor alteracao em d
- correcao=corrector(elem)
- #ocorrencias estao no dicionario
- dici_alteracoes.append((elem,correcao,ocorrencias))
- flag=0
- return
- def words(text): return re.findall(r'\w+', text.lower())
- WORDS = Counter(words(open('big.txt').read()))
- #Alterar, ficheiro descarregado e alterar leitura
- def P(word, N=sum(WORDS.values())):
- #alterar vem de ficheiro texto, ler e capturar num dicionario????
- "Probability of `word`."
- return WORDS[word] / N
- def corrector(palavra):
- return max(candidatos(palavra), chave=P)
- def candidatos(palavra):
- return (known([palavra]) or known(edits1(palavra)) or known(edits2(palavra)) or [palavra])
- def known(words):
- "The subset of `words` that appear in the dictionary of WORDS."
- return set(w for w in words if w in WORDS)
- def edits1(word):
- "All edits that are one edit away from `word`."
- letters = 'abcdefghijklmnopqrstuvwxyz'
- splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
- deletes = [L + R[1:] for L, R in splits if R]
- transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
- replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
- inserts = [L + c + R for L, R in splits for c in letters]
- return set(deletes + transposes + replaces + inserts)
- def edits2(word):
- "All edits that are two edits away from `word`."
- return (e2 for e1 in edits1(word) for e2 in edits1(e1))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement