Advertisement
Piotr_Laskowski

important

May 28th, 2025
698
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.94 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.ensemble import RandomForestClassifier
  5. from sklearn.preprocessing import StandardScaler
  6.  
  7. # Convert Potability into category
  8. #data['Potability'] = data['Potability'].astype('category')
  9.  
  10.  
  11.  
  12. # Handle missing values with median
  13. for column in data.columns:
  14.     if data[column].isnull().any():
  15.         data[column].fillna(data[column].median(), inplace=True)
  16.  
  17. # Separate features and target
  18. X = data.drop('Potability', axis=1)
  19. y = data['Potability']
  20.  
  21. # Scale the features
  22. scaler = StandardScaler()
  23. X_scaled = scaler.fit_transform(X)
  24.  
  25. # Train a Random Forest model
  26. rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
  27. rf_model.fit(X_scaled, y)
  28.  
  29. # Get feature importance
  30. feature_importance = pd.DataFrame({
  31.     'Feature': X.columns,
  32.     'Importance': rf_model.feature_importances_
  33. }).sort_values('Importance', ascending=False)
  34.  
  35. print("Feature Importance for Predicting Water Potability:")
  36. print(feature_importance)
  37.  
  38. # Calculate and print model accuracy
  39. from sklearn.metrics import accuracy_score, classification_report
  40.  
  41. # Split the data into training and testing sets
  42. X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
  43.  
  44. # Train the model on training data
  45. rf_model.fit(X_train, y_train)
  46.  
  47. # Make predictions on test data
  48. y_pred = rf_model.predict(X_test)
  49.  
  50. print("\nModel Performance:")
  51. print("\nAccuracy Score:", accuracy_score(y_test, y_pred))
  52. print("\nDetailed Classification Report:")
  53. print(classification_report(y_test, y_pred))
  54.  
  55. # Create feature importance plot
  56. import matplotlib.pyplot as plt
  57. import seaborn as sns
  58.  
  59. plt.figure(figsize=(10, 6))
  60. sns.barplot(x='Importance', y='Feature', data=feature_importance)
  61. plt.title('Feature Importance for Water Potability Prediction')
  62. plt.xlabel('Importance Score')
  63. plt.ylabel('Features')
  64. plt.tight_layout()
  65. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement