Advertisement
Guest User

Untitled

a guest
Nov 14th, 2019
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.55 KB | None | 0 0
  1. #!/usr/bin/python3
  2. import sys
  3. import os
  4. import itertools
  5. import random
  6. import math
  7.  
  8. weight = None
  9. bias = random.gauss(0, 0.01)
  10.  
  11.  
  12. def chunk(arr, size):
  13.     def chunks():
  14.         for i in range(0, len(arr), size):
  15.             yield arr[i:i + size]
  16.     return list(chunks())
  17.  
  18.  
  19. def feedforward(inputVector):
  20.     # result = inputVector
  21.     # print(inputVector[0], weight[9])
  22.     return sum([x * y for x, y in zip(inputVector, weight)]) + bias
  23.  
  24.  
  25. def load_files(path):
  26.     def read_file(path):
  27.         with open(path) as file:
  28.             return file.read()
  29.  
  30.     return [read_file(os.path.join(path, f)) for f in os.listdir(path)]
  31.  
  32.  
  33. def make_alphabet(reviews):
  34.     index = 0
  35.     alphabet = {}
  36.     for review in reviews:
  37.         for word in review.split():
  38.             if word not in alphabet:
  39.                 alphabet[word] = index
  40.                 index += 1
  41.  
  42.     return alphabet
  43.  
  44.  
  45. def vectorize_review(review, alphabet):
  46.     vector = [0] * len(alphabet)
  47.  
  48.     n = 0
  49.     for word in review.split():
  50.         vector[alphabet[word]] += 1
  51.         n += 1
  52.  
  53.     return [x / n for x in vector]
  54.  
  55.  
  56. def backprop(input_vector, output_vector):
  57.     result = feedforward(input_vector)
  58.  
  59.     delta = result - output_vector
  60.     gradient_b = delta
  61.     gradient_w = [delta * x for x in input_vector]
  62.  
  63.     return (gradient_b, gradient_w)
  64.  
  65.  
  66. def sgd(mini_batch, learning_rate):
  67.     global bias
  68.     global weight
  69.     gradient_b = 0
  70.     gradient_w = [0] * len(weight)
  71.  
  72.     for review, sentiment in mini_batch:
  73.         delta_gradient_b, delta_gradient_w = backprop(review, sentiment)
  74.         gradient_b += delta_gradient_b
  75.         gradient_w = [
  76.             nw + dnw for nw,
  77.             dnw in zip(
  78.                 gradient_w,
  79.                 delta_gradient_w)]
  80.  
  81.     # print(len(mini_batch))
  82.     weight = [w - (learning_rate / len(mini_batch)) * nw
  83.               for w, nw in zip(weight, gradient_w)]
  84.     # print(weight)
  85.     bias -= (learning_rate / len(mini_batch)) * gradient_b
  86.  
  87.  
  88. def train_epoch(epoch, training_data, learning_rate):
  89.     for mini_batch in training_data:
  90.         sgd(mini_batch, learning_rate)
  91.  
  92.  
  93. def train(
  94.         training_data,
  95.         num_epochs,
  96.         learning_rate,
  97.         chunk_size,
  98.         test_data=None):
  99.     # print("Epoch", -1, ",", evaluate(test_data))
  100.     for epoch in range(num_epochs):
  101.         train_epoch(epoch, chunk(training_data, chunk_size), learning_rate)
  102.         # print("Epoch", epoch, ",", evaluate(test_data))
  103.  
  104.  
  105. def main():
  106.     global weight
  107.     positive, negative = (load_files(directory) for directory in sys.argv[1:3])
  108.     #  = load_files(sys.argv[2])
  109.     alphabet = make_alphabet(itertools.chain(positive, negative))
  110.  
  111.     weight = [random.gauss(0, 0.01) for _ in range(len(alphabet))]
  112.  
  113.     train_data = ([(vectorize_review(r, alphabet), 1) for r in positive] +
  114.                   [(vectorize_review(r, alphabet), -1) for r in negative])
  115.  
  116.     random.shuffle(train_data)
  117.  
  118.     num_epochs, lr = 10, 3
  119.     BATCH_SIZE = 1
  120.  
  121.     one = train_data[0]
  122.     # two = vectorize_review(negative[0], alphabet)
  123.  
  124.     # print([(y, feedforward(x)) for x, y in train_data[:5]])
  125.     train(train_data, num_epochs, lr, BATCH_SIZE)
  126.     # print([(y, feedforward(x)) for x, y in train_data[:5]])
  127.     # train(train_data[:5], num_epochs, lr, BATCH_SIZE)
  128.     # print([(y, feedforward(x)) for x, y in train_data[:5]])
  129.     # train(train_data[:5], num_epochs, lr, BATCH_SIZE)
  130.     # print([(y, feedforward(x)) for x, y in train_data[:5]])
  131.     print(weight)
  132.  
  133.     # print(one[1], feedforward(one[0]))
  134.  
  135.  
  136. def synthetic_data():
  137.     x = [random.random() * 0.5 for _ in range(10)]
  138.     return (x, x[0] * 10 + x[2] * 3 + 1)
  139.  
  140.  
  141. def boom():
  142.     global weight
  143.     # positive, negative = (load_files(directory) for directory in sys.argv[1:3])
  144.     #  = load_files(sys.argv[2])
  145.     # alphabet = make_alphabet(itertools.chain(positive, negative))
  146.  
  147.     weight = [random.gauss(0, 0.01) for _ in range(10)]
  148.  
  149.     train_data = [synthetic_data() for _ in range(10000)]
  150.  
  151.     # random.shuffle(train_data)
  152.     for x in train_data:
  153.         pass
  154.         # print(x)
  155.         # print("\n".join(train_data))
  156.  
  157.     num_epochs, lr = 100, 0.1
  158.  
  159.     # print(weight)
  160.  
  161.     # one = vectorize_review(positive[0], alphabet)
  162.     # two = vectorize_review(negative[0], alphabet)
  163.  
  164.     # print(feedforward(train_data[0][0]), train_data[0][1])
  165.  
  166.     train(train_data, num_epochs, lr, 100)
  167.     print(weight, bias)
  168.     # print(feedforward(train_data[0][0]), train_data[0][1])
  169.  
  170.  
  171. if __name__ == "__main__":
  172.     # print(sys.argv[1:3])
  173.     main()
  174.     pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement