ML Exp 6

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

data = pd.read_csv("Social_Network_Ads.csv")

le = LabelEncoder()
data['Gender'] = le.fit_transform(data['Gender'])  # Male=1, Female=0

X = data[['Gender', 'Age', 'EstimatedSalary']]
y = data['Purchased']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

id3 = DecisionTreeClassifier(criterion='entropy', random_state=42)
id3.fit(X_train, y_train)

# Predictions
y_pred = id3.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

plt.figure(figsize=(20,12))
plot_tree(
    id3,
    feature_names=['Gender','Age','EstimatedSalary'],
    class_names=['Not Purchased','Purchased'],
    fontsize=15,
    filled=True
)
plt.show()


new_sample = pd.DataFrame({'Gender':[1], 'Age':[30], 'EstimatedSalary':[100000]})
new_sample_scaled = scaler.transform(new_sample)
prediction = id3.predict(new_sample_scaled)

predicted_class = 'Purchased' if prediction[0]==1 else 'Not Purchased'
print(f"The predicted purchase status for the new sample is {predicted_class}")

https://jumpshare.com/s/qM5yLBS964qeCHauD2d2