Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.feature_extraction.text import CountVectorizer
- # list of text documents
- text = ["this is test doc", "this is another test doc"]
- # create the transform
- vector = CountVectorizer()
- # tokenize and build vocab
- vector.fit(text)
- # Print the summary
- print(vectorizer.vocabulary_)
- # Transform document
- X_Train = vector.transform(text)
- # Print summary of transformed vector
- print(X_Train.shape)
- print(type(X_Train))
Add Comment
Please, Sign In to add comment