CJamie

randomforestimpl

Mar 23rd, 2022
823
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.38 KB | None | 0 0
  1.  
  2. """# Q.2 Write Python script to implement Random Forest Classifier. Use standard Class of Python to implement the algorithm by choosing your own dataset.
  3.  
  4. Data Set Characteristics:
  5. Number of Instances
  6. 1797
  7. Number of Attributes
  8. 64
  9. Attribute Information
  10. 8x8 image of integer pixels in the range 0..16.
  11. Missing Attribute Values
  12. None
  13. The data set contains images of hand-written digits: 10 classes where each class refers to a digit.
  14. """
  15.  
  16. # using standard class of python
  17. from collections import Counter
  18.  
  19. import numpy as np
  20.  
  21. from sklearn.tree import DecisionTreeClassifier
  22.  
  23.  
  24. def bootstrap_sample(X, y):
  25.     n_samples = X.shape[0]
  26.     idxs = np.random.choice(n_samples, n_samples, replace=True)
  27.     return X[idxs], y[idxs]
  28.  
  29.  
  30. def most_common_label(y):
  31.     counter = Counter(y)
  32.     most_common = counter.most_common(1)[0][0]
  33.     return most_common
  34.  
  35.  
  36. class RandomForest:
  37.     def __init__(self, n_trees=10, min_samples_split=2, max_depth=100, n_feats=None):
  38.         self.n_trees = n_trees
  39.         self.min_samples_split = min_samples_split
  40.         self.max_depth = max_depth
  41.         self.n_feats = n_feats
  42.         self.trees = []
  43.  
  44.     def fit(self, X, y):
  45.         self.trees = []
  46.         for _ in range(self.n_trees):
  47.             tree =  DecisionTreeClassifier(min_samples_split=self.min_samples_split,max_depth=self.max_depth,max_features=self.n_feats,)
  48.             X_samp, y_samp = bootstrap_sample(X, y)
  49.             tree.fit(X_samp, y_samp)
  50.             self.trees.append(tree)
  51.  
  52.     def predict(self, X):
  53.         tree_preds = np.array([tree.predict(X) for tree in self.trees])
  54.         tree_preds = np.swapaxes(tree_preds, 0, 1)
  55.         y_pred = [most_common_label(tree_pred) for tree_pred in tree_preds]
  56.         return np.array(y_pred)
  57.  
  58. # Importing relevant libraries
  59. from sklearn import datasets
  60. from sklearn.model_selection import train_test_split
  61.  
  62. #function for accuracy  
  63. def accuracy(y_true, y_pred):
  64.     accuracy = np.sum(y_true == y_pred) / len(y_true)
  65.     return accuracy
  66.  
  67. #loading dataset
  68. data = datasets.load_digits()
  69. X = data.data
  70. y = data.target
  71.  
  72. #splitting dataset into training and testing data  
  73. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
  74.  
  75. clf = RandomForest(n_trees=3, max_depth=10)
  76.  
  77. clf.fit(X_train, y_train)
  78. y_pred = clf.predict(X_test)
  79. acc = accuracy(y_test, y_pred)
  80.  
  81. print("Accuracy:", acc)
  82.  
  83.  
Advertisement
Add Comment
Please, Sign In to add comment