Advertisement
Guest User

Untitled

a guest
Apr 6th, 2017
267
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.63 KB | None | 0 0
  1. import os
  2. import re
  3.  
  4. def convert_text(s):
  5.     # Removes all characters from string except letters and digits and convert letters to lowercase
  6.     return re.sub("[^a-zA-Z0-9]", " ", s.lower())
  7.  
  8. def read_texts(dir_path="./Downloads/9sem_data/beatles/"):
  9.     # Reads all files from directory
  10.     if dir_path[-1] != os.path.sep:
  11.         dir_path = dir_path + os.path.sep
  12.     txt_list = []
  13.     for file in os.listdir(dir_path):
  14.         file = dir_path + file
  15.         fin = open(file, 'r', encoding='latin1')
  16.         txt = " ".join(fin.readlines())
  17.         txt = convert_text(txt)
  18.         txt_list.append(txt)
  19.     return txt_list
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement