Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- fin = open('text.txt')
- #fin = open('kaztext.txt', encoding='utf-8')
- text = fin.read()
- text = text.lower()
- t = text.split()
- #print(t)
- def onlywords(s):
- taza = []
- abc = 'qwertyuiopasdfghjklzxcvbnmйцукенгшщзхъфывапролджэячсмитьбюәіңғүұқөһ'
- ABC = abc.upper()
- letters = abc + ABC
- for i in s:
- word = ''
- for e in i:
- if e in letters:
- word = word + e
- if word != '':
- taza.append(word)
- return taza
- tex = onlywords(t)
- print(tex)
- print("Total number of words in the book: ",len(tex))
- print("Total number of different words: ",len(set(tex)))
- #####################################################################################
- soz = dict()
- for i in tex:
- #print(i, tex.count(i))
- soz[i] = tex.count(i)
- num = []
- ite = 0
- for i in sorted(soz.values(), reverse=True):
- ite = ite + 1
- if ite > 10:
- break
- else:
- num.append(i)
- print(num)
- tenword = []
- for i in range (len(num)):
- for j in tex:
- if num[i] == tex.count(j):
- #print(j)
- if j not in tenword:
- tenword.append(j)
- #print(tenword)
- print("10 most frequently used words in the book: ")
- print("================================================")
- for i in range (len(num)):
- print(tenword[i], '-', num[i])
- print("================================================")
- #print(tex)
- fin1 = open("words.txt")
- #fin1 = open("kzwords.txt", encoding='utf-8')
- allw = fin1.read()
- print("Words that in the book but not in the word list: ")
- print("================================================")
- for i in tex:
- if i not in allw:
- print(i)
- print("================================================")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement