dictionary

# Create bags-of-words matrix
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer(stop_words = 'english')
Z = count_vect.fit_transform(corpus)
# The function fit_transform() takes as input a list of strings and does two things:
# first, it "fits the model," i.e., it builds the vocabulary; second, it transforms the data into a matrix.