Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- def load_data(dir):
- list = []
- for file in os.listdir(dir):
- with open(dir + ’/’ + file, ’rb’) as f:
- body = f.read().decode(’utf-8’, errors=’ignore’).splitlines() list.append(’ ’.join(body))
- return list
- BASE_DATA_DIR=’enron1’
- ham = [(text, ’ham’) for text in load_data(BASE_DATA_DIR + ’/ham’)] spam = [(text, ’spam’) for text in load_data(BASE_DATA_DIR +
- ’/spam’)]
- 2
- all = ham + spam
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement