Advertisement
Guest User

Untitled

a guest
Nov 20th, 2017
183
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.78 KB | None | 0 0
  1. fin = open('text.txt')
  2. #fin = open('kaztext.txt', encoding='utf-8')
  3. text = fin.read()
  4. text = text.lower()
  5. t = text.split()
  6. #print(t)
  7. def onlywords(s):
  8. taza = []
  9. abc = 'qwertyuiopasdfghjklzxcvbnmйцукенгшщзхъфывапролджэячсмитьбюәіңғүұқөһ'
  10. ABC = abc.upper()
  11. letters = abc + ABC
  12. for i in s:
  13. word = ''
  14. for e in i:
  15. if e in letters:
  16. word = word + e
  17. if word != '':
  18. taza.append(word)
  19. return taza
  20. tex = onlywords(t)
  21. print(tex)
  22. print("Total number of words in the book: ",len(tex))
  23. print("Total number of different words: ",len(set(tex)))
  24. #####################################################################################
  25. soz = dict()
  26. for i in tex:
  27. #print(i, tex.count(i))
  28. soz[i] = tex.count(i)
  29.  
  30. num = []
  31. ite = 0
  32. for i in sorted(soz.values(), reverse=True):
  33. ite = ite + 1
  34. if ite > 10:
  35. break
  36. else:
  37. num.append(i)
  38.  
  39. print(num)
  40.  
  41. tenword = []
  42. for i in range (len(num)):
  43. for j in tex:
  44. if num[i] == tex.count(j):
  45. #print(j)
  46. if j not in tenword:
  47. tenword.append(j)
  48.  
  49. #print(tenword)
  50.  
  51. print("10 most frequently used words in the book: ")
  52. print("================================================")
  53. for i in range (len(num)):
  54. print(tenword[i], '-', num[i])
  55. print("================================================")
  56.  
  57. #print(tex)
  58.  
  59. fin1 = open("words.txt")
  60. #fin1 = open("kzwords.txt", encoding='utf-8')
  61. allw = fin1.read()
  62.  
  63. print("Words that in the book but not in the word list: ")
  64. print("================================================")
  65. for i in tex:
  66. if i not in allw:
  67. print(i)
  68. print("================================================")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement