Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- data.columns = ['text', 'label']
- data= data.dropna()
- X = data['text']
- y = data['label']
- # Creating a TF-IDF vectorizer
- tfidf_vectorizer = TfidfVectorizer(max_features=5000) # You can adjust max_features as needed
- # Initialize the model you want to use
- model = LogisticRegression() # Change this to your preferred model
- # Define the metrics you want to track during cross-validation
- scoring = {
- 'accuracy': make_scorer(accuracy_score),
- 'precision': make_scorer(precision_score, average='weighted'),
- 'recall': make_scorer(recall_score, average='weighted'),
- 'f1': make_scorer(f1_score, average='weighted')
- }
- # Define the number of folds for cross-validation
- k_folds = 10 # You can change this as needed
- # Initialize the StratifiedKFold cross-validator
- stratified_kfold = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)
- # Perform k-fold cross-validation
- results = cross_validate(model, tfidf_vectorizer.fit_transform(X), y, cv=stratified_kfold, scoring=scoring)
- # Display the cross-validation results
- for metric, values in results.items():
- print(f"{metric.capitalize()} (Mean): {np.mean(values):.4f}")
- print(f"{metric.capitalize()} (Std): {np.std(values):.4f}")
- # Exporting the model
- import joblib
- # Assuming 'model' is your trained machine learning model
- # Save the model to a .pkl file
- joblib.dump(model, 'suicide_log_reg.pkl')
- # Save the tfidf_vectorizer to a .pkl file
- joblib.dump(tfidf_vectorizer, 'tfidf_vectorizer.pkl')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement