Untitled

from sklearn.feature_extraction.text import CountVectorizer

# list of text documents
text = ["this is test doc", "this is another test doc"]

# create the transform
vector = CountVectorizer()

# tokenize and build vocab
vector.fit(text)

# Print the summary
print(vectorizer.vocabulary_)

# Transform document
X_Train = vector.transform(text)

# Print summary of transformed vector
print(X_Train.shape)
print(type(X_Train))