Advertisement
Guest User

Untitled

a guest
Jun 15th, 2019
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.89 KB | None | 0 0
  1. # Primeira limpeza no texto
  2. def clean_text(text):
  3. text = text.lower()
  4. text = re.sub(r"i'm", "i am", text)
  5. text = re.sub(r"he's", "he is", text)
  6. text = re.sub(r"she's", "she is", text)
  7. text = re.sub(r"that's", "that is", text)
  8. text = re.sub(r"what's", "what is", text)
  9. text = re.sub(r"where's", "where is", text)
  10. text = re.sub(r"'ll", " will", text)
  11. text = re.sub(r"'ve", " have", text)
  12. text = re.sub(r"'re", " are", text)
  13. text = re.sub(r"'d", " would", text)
  14. text = re.sub(r"won't", "will not", text)
  15. text = re.sub(r"can't", "cannot", text)
  16. text = re.sub(r"[-()"#/@;:<>{}+=-|.?,]", "", text)
  17. return text
  18.  
  19. # Limpando as perguntas
  20. clean_questions = []
  21. for question in questions:
  22. clean_questions.append(clean_text(question))
  23.  
  24. # Limpando as respostas
  25. clean_answers = []
  26. for answer in answers:
  27. clean_answers.append(clean_text(answer))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement