Advertisement
Hasan1026

Untitled

Sep 16th, 2023
26
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.48 KB | None | 0 0
  1. data.columns = ['text', 'label']
  2. data= data.dropna()
  3. X = data['text']
  4. y = data['label']
  5.  
  6. # Creating a TF-IDF vectorizer
  7. tfidf_vectorizer = TfidfVectorizer(max_features=5000) # You can adjust max_features as needed
  8.  
  9. # Initialize the model you want to use
  10. model = LogisticRegression() # Change this to your preferred model
  11.  
  12. # Define the metrics you want to track during cross-validation
  13. scoring = {
  14. 'accuracy': make_scorer(accuracy_score),
  15. 'precision': make_scorer(precision_score, average='weighted'),
  16. 'recall': make_scorer(recall_score, average='weighted'),
  17. 'f1': make_scorer(f1_score, average='weighted')
  18. }
  19.  
  20. # Define the number of folds for cross-validation
  21. k_folds = 10 # You can change this as needed
  22.  
  23. # Initialize the StratifiedKFold cross-validator
  24. stratified_kfold = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)
  25.  
  26. # Perform k-fold cross-validation
  27. results = cross_validate(model, tfidf_vectorizer.fit_transform(X), y, cv=stratified_kfold, scoring=scoring)
  28.  
  29. # Display the cross-validation results
  30. for metric, values in results.items():
  31. print(f"{metric.capitalize()} (Mean): {np.mean(values):.4f}")
  32. print(f"{metric.capitalize()} (Std): {np.std(values):.4f}")
  33.  
  34. # Exporting the model
  35. import joblib
  36.  
  37. # Assuming 'model' is your trained machine learning model
  38. # Save the model to a .pkl file
  39. joblib.dump(model, 'suicide_log_reg.pkl')
  40.  
  41. # Save the tfidf_vectorizer to a .pkl file
  42. joblib.dump(tfidf_vectorizer, 'tfidf_vectorizer.pkl')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement