Untitled

import numpy as np
from athnlp.readers.brown_pos_corpus import BrownPosTag

corpus = BrownPosTag()
all_class_weights = [np.zeros(len(corpus.dictionary.x_dict)) for _ in range(len(corpus.dictionary.y_dict))]

def predict(phi, all_weights) -> int:
    scores = [np.dot(weights, phi) for weights in all_weights]
    return np.argmax(scores)

def generate_features(token_idx) -> np.array:
    feature_vector = np.zeros(len(corpus.dictionary.x_dict))  # [0, 0, 0, 0 ...]
    feature_vector[token_idx] = 1                                 # [0, 0, 1, 0 ...]
    return feature_vector

def accuracy(correct, total):
    return round(100*correct/total,2)

num_epochs = 3
for epoch in range(num_epochs):
    # Shuffle the training data each epoch
    np.random.shuffle(corpus.train)

    # Iterate through each sentence in training set and update weights
    for sentence in corpus.train:
        for token, true_label in zip(sentence.x,sentence.y):
            phi = generate_features(token)
            predicted_label = predict(phi, all_class_weights)           # label index of predicted class

            # Update model weights if label was predicted label index doesn't match the actual
            if true_label != predicted_label:
                all_class_weights[true_label] += phi
                all_class_weights[predicted_label] -= phi


    # Iterate through sentences in dev set for scoring
    correct_tokens, total_tokens = 0, 0
    for sentence in corpus.dev:
        for token, true_label in zip(sentence.x, sentence.y):
            phi = generate_features(token)
            predicted_label = predict(phi, all_class_weights)
            if predicted_label == true_label:
                correct_tokens += 1
            total_tokens += 1

    print("Accuracy (%): ", accuracy(correct_tokens, total_tokens))