Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- import sys
- import os
- import itertools
- import random
- import math
- weight = None
- bias = random.gauss(0, 0.01)
- def chunk(arr, size):
- def chunks():
- for i in range(0, len(arr), size):
- yield arr[i:i + size]
- return list(chunks())
- def feedforward(inputVector):
- # result = inputVector
- # print(inputVector[0], weight[9])
- return sum([x * y for x, y in zip(inputVector, weight)]) + bias
- def load_files(path):
- def read_file(path):
- with open(path) as file:
- return file.read()
- return [read_file(os.path.join(path, f)) for f in os.listdir(path)]
- def make_alphabet(reviews):
- index = 0
- alphabet = {}
- for review in reviews:
- for word in review.split():
- if word not in alphabet:
- alphabet[word] = index
- index += 1
- return alphabet
- def vectorize_review(review, alphabet):
- vector = [0] * len(alphabet)
- n = 0
- for word in review.split():
- vector[alphabet[word]] += 1
- n += 1
- return [x / n for x in vector]
- def backprop(input_vector, output_vector):
- result = feedforward(input_vector)
- delta = result - output_vector
- gradient_b = delta
- gradient_w = [delta * x for x in input_vector]
- return (gradient_b, gradient_w)
- def sgd(mini_batch, learning_rate):
- global bias
- global weight
- gradient_b = 0
- gradient_w = [0] * len(weight)
- for review, sentiment in mini_batch:
- delta_gradient_b, delta_gradient_w = backprop(review, sentiment)
- gradient_b += delta_gradient_b
- gradient_w = [
- nw + dnw for nw,
- dnw in zip(
- gradient_w,
- delta_gradient_w)]
- # print(len(mini_batch))
- weight = [w - (learning_rate / len(mini_batch)) * nw
- for w, nw in zip(weight, gradient_w)]
- # print(weight)
- bias -= (learning_rate / len(mini_batch)) * gradient_b
- def train_epoch(epoch, training_data, learning_rate):
- for mini_batch in training_data:
- sgd(mini_batch, learning_rate)
- def train(
- training_data,
- num_epochs,
- learning_rate,
- chunk_size,
- test_data=None):
- # print("Epoch", -1, ",", evaluate(test_data))
- for epoch in range(num_epochs):
- train_epoch(epoch, chunk(training_data, chunk_size), learning_rate)
- # print("Epoch", epoch, ",", evaluate(test_data))
- def main():
- global weight
- positive, negative = (load_files(directory) for directory in sys.argv[1:3])
- # = load_files(sys.argv[2])
- alphabet = make_alphabet(itertools.chain(positive, negative))
- weight = [random.gauss(0, 0.01) for _ in range(len(alphabet))]
- train_data = ([(vectorize_review(r, alphabet), 1) for r in positive] +
- [(vectorize_review(r, alphabet), -1) for r in negative])
- random.shuffle(train_data)
- num_epochs, lr = 10, 3
- BATCH_SIZE = 1
- one = train_data[0]
- # two = vectorize_review(negative[0], alphabet)
- # print([(y, feedforward(x)) for x, y in train_data[:5]])
- train(train_data, num_epochs, lr, BATCH_SIZE)
- # print([(y, feedforward(x)) for x, y in train_data[:5]])
- # train(train_data[:5], num_epochs, lr, BATCH_SIZE)
- # print([(y, feedforward(x)) for x, y in train_data[:5]])
- # train(train_data[:5], num_epochs, lr, BATCH_SIZE)
- # print([(y, feedforward(x)) for x, y in train_data[:5]])
- print(weight)
- # print(one[1], feedforward(one[0]))
- def synthetic_data():
- x = [random.random() * 0.5 for _ in range(10)]
- return (x, x[0] * 10 + x[2] * 3 + 1)
- def boom():
- global weight
- # positive, negative = (load_files(directory) for directory in sys.argv[1:3])
- # = load_files(sys.argv[2])
- # alphabet = make_alphabet(itertools.chain(positive, negative))
- weight = [random.gauss(0, 0.01) for _ in range(10)]
- train_data = [synthetic_data() for _ in range(10000)]
- # random.shuffle(train_data)
- for x in train_data:
- pass
- # print(x)
- # print("\n".join(train_data))
- num_epochs, lr = 100, 0.1
- # print(weight)
- # one = vectorize_review(positive[0], alphabet)
- # two = vectorize_review(negative[0], alphabet)
- # print(feedforward(train_data[0][0]), train_data[0][1])
- train(train_data, num_epochs, lr, 100)
- print(weight, bias)
- # print(feedforward(train_data[0][0]), train_data[0][1])
- if __name__ == "__main__":
- # print(sys.argv[1:3])
- main()
- pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement