Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def isLegal(chr):
- return ('A' <= chr <= 'Z') or ('a' <= chr <= 'z') or chr == "'"
- def splitToWords(text):
- word_list = list()
- word = ""
- for chr in text:
- # add chr to current word if legal chr
- if isLegal(chr):
- word += chr
- continue
- # if not legal char, and word is not "", then add to word_list
- if word:
- word_list.append(word.lstrip("'"))
- word = ""
- # don't forget the last word (only happens if last chr is legal)
- if word:
- word_list.append(word.lstrip("'"))
- return word_list
- def filter(word_list): # 4 different kind of filterring
- # ususal filter if a character is uppercase
- for words in word_list:
- for i in words:
- if i.isupper():
- word_list.remove(words)
- break
- break
- # filter with string indexing if a character is uppercase
- for words in word_list:
- for i in range(len(words)):
- if words[i].isupper():
- word_list.remove(words)
- break
- break
- # if the the 1st letter is not uppercase
- for words in word_list:
- if len(words) > 0:
- if words[len(words)-1].isupper():
- word_list.remove(words)
- # if the word is is empty
- else:
- word_list.remove(words)
- return word_list
- def main():
- filename = "/home/zulkifl/Desktop/ahamed/New Folder/h_7_starting/alice.txt"
- text = open(filename, 'r').read()
- word_list = splitToWords(text)
- word_list = filter(word_list)
- word_list = filter(word_list)
- word_list = filter(word_list) # filtering multiple times for the equracy
- # word_list3 = filter2(word_list2)
- word_list = sorted(word_list)
- counts = len(word_list)
- for words in word_list:
- print(words)
- print(counts)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement