Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from random import seed
- from random import randrange
- from csv import reader
- # fungsi load file CSV
- def load_csv(filename):
- dataset = list()
- with open(filename, 'r') as file:
- csv_reader = reader(file)
- for row in csv_reader:
- if not row:
- continue
- dataset.append(row)
- return dataset
- # fungsi mengubah tipe data string kolom dataset ke float
- def str_column_to_float(dataset, column):
- for row in dataset:
- row[column] = float(row[column].strip())
- # fungsi mengubah tipe data string kolom dataset ke integer
- def str_column_to_int(dataset, column):
- class_values = [row[column] for row in dataset]
- unique = set(class_values)
- lookup = dict()
- for i, value in enumerate(unique):
- lookup[value] = i
- for row in dataset:
- row[column] = lookup[row[column]]
- return lookup
- # membagi dataset menjadi k folds
- def cross_validation_split(dataset, n_folds):
- dataset_split = list()
- dataset_copy = list(dataset)
- fold_size = int(len(dataset) / n_folds)
- for i in range(n_folds):
- fold = list()
- while len(fold) < fold_size:
- index = randrange(len(dataset_copy))
- fold.append(dataset_copy.pop(index))
- dataset_split.append(fold)
- return dataset_split
- # kalkulasi persentase akurasi
- def accuracy_metric(actual, predicted):
- correct = 0
- for i in range(len(actual)):
- if actual[i] == predicted[i]:
- correct += 1
- return correct / float(len(actual)) * 100.0
- # mengevaluasi sebuah algoritma memakai cross_validation_split
- def evaluate_algorithm(dataset, algorithm, n_folds, *args):
- folds = cross_validation_split(dataset, n_folds)
- scores = list()
- for fold in folds:
- train_set = list(folds)
- train_set.remove(fold)
- train_set = sum(train_set, [])
- test_set = list()
- for row in fold:
- row_copy = list(row)
- test_set.append(row_copy)
- row_copy[-1] = None
- predicted = algorithm(train_set, test_set, *args)
- actual = [row[-1] for row in fold]
- accuracy = accuracy_metric(actual, predicted)
- scores.append(accuracy)
- return scores
- # membuat sebuah prediksi dengan bobot
- def predict(row, weights):
- activation = weights[0]
- for i in range(len(row)-1):
- activation += weights[i + 1] * row[i]
- return 1.0 if activation >= 0.0 else 0.0
- # menentukan bobot perceptron menggunakan stochastic gradient descent
- def train_weights(train, l_rate, n_epoch):
- weights = [0.0 for i in range(len(train[0]))]
- for epoch in range(n_epoch):
- for row in train:
- prediction = predict(row, weights)
- error = row[-1] - prediction
- weights[0] = weights[0] + l_rate * error
- for i in range(len(row)-1):
- weights[i + 1] = weights[i + 1] + l_rate * error * row[i]
- return weights
- # algo preceptron dengan memakai stochastic gradient descent
- def perceptron(train, test, l_rate, n_epoch):
- predictions = list()
- weights = train_weights(train, l_rate, n_epoch)
- for row in test:
- prediction = predict(row, weights)
- predictions.append(prediction)
- return(predictions)
- # tes algoritma perceptron pada dataset diabetes
- seed(1)
- # load dan siapkan data
- filename = 'diabetes.csv'
- dataset = load_csv(filename)
- for i in range(len(dataset[0])-1):
- str_column_to_float(dataset, i)
- # ubah kelas string ke integers
- str_column_to_int(dataset, len(dataset[0])-1)
- # evaluasi algoritma
- n_folds = 5
- l_rate = 0.01
- n_epoch = 1000
- scores = evaluate_algorithm(dataset, perceptron, n_folds, l_rate, n_epoch)
- print('Scores: %s' % scores)
- print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))
Add Comment
Please, Sign In to add comment