Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.preprocessing import StandardScaler
- # Convert Potability into category
- #data['Potability'] = data['Potability'].astype('category')
- # Handle missing values with median
- for column in data.columns:
- if data[column].isnull().any():
- data[column].fillna(data[column].median(), inplace=True)
- # Separate features and target
- X = data.drop('Potability', axis=1)
- y = data['Potability']
- # Scale the features
- scaler = StandardScaler()
- X_scaled = scaler.fit_transform(X)
- # Train a Random Forest model
- rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
- rf_model.fit(X_scaled, y)
- # Get feature importance
- feature_importance = pd.DataFrame({
- 'Feature': X.columns,
- 'Importance': rf_model.feature_importances_
- }).sort_values('Importance', ascending=False)
- print("Feature Importance for Predicting Water Potability:")
- print(feature_importance)
- # Calculate and print model accuracy
- from sklearn.metrics import accuracy_score, classification_report
- # Split the data into training and testing sets
- X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
- # Train the model on training data
- rf_model.fit(X_train, y_train)
- # Make predictions on test data
- y_pred = rf_model.predict(X_test)
- print("\nModel Performance:")
- print("\nAccuracy Score:", accuracy_score(y_test, y_pred))
- print("\nDetailed Classification Report:")
- print(classification_report(y_test, y_pred))
- # Create feature importance plot
- import matplotlib.pyplot as plt
- import seaborn as sns
- plt.figure(figsize=(10, 6))
- sns.barplot(x='Importance', y='Feature', data=feature_importance)
- plt.title('Feature Importance for Water Potability Prediction')
- plt.xlabel('Importance Score')
- plt.ylabel('Features')
- plt.tight_layout()
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement