Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """# Q.2 Write Python script to implement Random Forest Classifier. Use standard Class of Python to implement the algorithm by choosing your own dataset.
- Data Set Characteristics:
- Number of Instances
- 1797
- Number of Attributes
- 64
- Attribute Information
- 8x8 image of integer pixels in the range 0..16.
- Missing Attribute Values
- None
- The data set contains images of hand-written digits: 10 classes where each class refers to a digit.
- """
- # using standard class of python
- from collections import Counter
- import numpy as np
- from sklearn.tree import DecisionTreeClassifier
- def bootstrap_sample(X, y):
- n_samples = X.shape[0]
- idxs = np.random.choice(n_samples, n_samples, replace=True)
- return X[idxs], y[idxs]
- def most_common_label(y):
- counter = Counter(y)
- most_common = counter.most_common(1)[0][0]
- return most_common
- class RandomForest:
- def __init__(self, n_trees=10, min_samples_split=2, max_depth=100, n_feats=None):
- self.n_trees = n_trees
- self.min_samples_split = min_samples_split
- self.max_depth = max_depth
- self.n_feats = n_feats
- self.trees = []
- def fit(self, X, y):
- self.trees = []
- for _ in range(self.n_trees):
- tree = DecisionTreeClassifier(min_samples_split=self.min_samples_split,max_depth=self.max_depth,max_features=self.n_feats,)
- X_samp, y_samp = bootstrap_sample(X, y)
- tree.fit(X_samp, y_samp)
- self.trees.append(tree)
- def predict(self, X):
- tree_preds = np.array([tree.predict(X) for tree in self.trees])
- tree_preds = np.swapaxes(tree_preds, 0, 1)
- y_pred = [most_common_label(tree_pred) for tree_pred in tree_preds]
- return np.array(y_pred)
- # Importing relevant libraries
- from sklearn import datasets
- from sklearn.model_selection import train_test_split
- #function for accuracy
- def accuracy(y_true, y_pred):
- accuracy = np.sum(y_true == y_pred) / len(y_true)
- return accuracy
- #loading dataset
- data = datasets.load_digits()
- X = data.data
- y = data.target
- #splitting dataset into training and testing data
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
- clf = RandomForest(n_trees=3, max_depth=10)
- clf.fit(X_train, y_train)
- y_pred = clf.predict(X_test)
- acc = accuracy(y_test, y_pred)
- print("Accuracy:", acc)
Advertisement
Add Comment
Please, Sign In to add comment