Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import re
- def convert_text(s):
- # Removes all characters from string except letters and digits and convert letters to lowercase
- return re.sub("[^a-zA-Z0-9]", " ", s.lower())
- def read_texts(dir_path="./Downloads/9sem_data/beatles/"):
- # Reads all files from directory
- if dir_path[-1] != os.path.sep:
- dir_path = dir_path + os.path.sep
- txt_list = []
- for file in os.listdir(dir_path):
- file = dir_path + file
- fin = open(file, 'r', encoding='latin1')
- txt = " ".join(fin.readlines())
- txt = convert_text(txt)
- txt_list.append(txt)
- return txt_list
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement