Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
- chunks = pd.read_csv("data.csv.bz2",
- chunksize=1000000,
- nrows=120000000,
- )
- print(type(chunks)) # <class 'pandas.io.parsers.TextFileReader'>
- count_vectorizer = CountVectorizer()
- X_train_counts = count_vectorizer.fit_transform(data_train.comment)
- tfidf_transformer = TfidfTransformer()
- X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement