Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from athnlp.readers.brown_pos_corpus import BrownPosTag
- corpus = BrownPosTag()
- all_class_weights = [np.zeros(len(corpus.dictionary.x_dict)) for _ in range(len(corpus.dictionary.y_dict))]
- def predict(phi, all_weights) -> int:
- scores = [np.dot(weights, phi) for weights in all_weights]
- return np.argmax(scores)
- def generate_features(token_idx) -> np.array:
- feature_vector = np.zeros(len(corpus.dictionary.x_dict)) # [0, 0, 0, 0 ...]
- feature_vector[token_idx] = 1 # [0, 0, 1, 0 ...]
- return feature_vector
- def accuracy(correct, total):
- return round(100*correct/total,2)
- num_epochs = 3
- for epoch in range(num_epochs):
- # Shuffle the training data each epoch
- np.random.shuffle(corpus.train)
- # Iterate through each sentence in training set and update weights
- for sentence in corpus.train:
- for token, true_label in zip(sentence.x,sentence.y):
- phi = generate_features(token)
- predicted_label = predict(phi, all_class_weights) # label index of predicted class
- # Update model weights if label was predicted label index doesn't match the actual
- if true_label != predicted_label:
- all_class_weights[true_label] += phi
- all_class_weights[predicted_label] -= phi
- # Iterate through sentences in dev set for scoring
- correct_tokens, total_tokens = 0, 0
- for sentence in corpus.dev:
- for token, true_label in zip(sentence.x, sentence.y):
- phi = generate_features(token)
- predicted_label = predict(phi, all_class_weights)
- if predicted_label == true_label:
- correct_tokens += 1
- total_tokens += 1
- print("Accuracy (%): ", accuracy(correct_tokens, total_tokens))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement