Advertisement
Guest User

Untitled

a guest
Aug 14th, 2019
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.44 KB | None | 0 0
  1. import fasttext
  2.  
  3. def load_data(path):
  4. file = open(path, "r",encoding="utf-8")
  5. data = file.readlines()
  6. return [line.split("\t") for line in data]
  7.  
  8. def save_data(path,data):
  9. with open(path, 'w',encoding="utf-8") as f:
  10. f.write("\n".join(data))
  11.  
  12. def train():
  13. traning_parameters = {'input': 'fasttext.train', 'epoch': 60, 'lr': 0.01, 'wordNgrams': 1, 'verbose': 2, 'minCount': 1, 'loss': "ns",
  14. 'lrUpdateRate': 100, 'thread': 1, 'ws':5, 'dim': 100}
  15. model = fasttext.train_supervised(**traning_parameters)
  16. model.save_model("model.bin")
  17. return model
  18.  
  19. def test(model):
  20. f1_score = lambda precision, recall: 2 * ((precision * recall) / (precision + recall))
  21. nexamples, recall, precision = model.test('fasttext.test')
  22. print (f'recall: {recall}' )
  23. print (f'precision: {precision}')
  24. print (f'f1 score: {f1_score(precision,recall)}')
  25. print (f'number of examples: {nexamples}')
  26.  
  27. def transform(input_file, output_file):
  28. # load data
  29. data = load_data(input_file)
  30. # transform it into fasttext format __label__other have a nice day
  31. data = [f"__label__{line[1]}\t{line[0]}" for line in data]
  32. # and save the data
  33. save_data(output_file,data)
  34.  
  35. if __name__ == "__main__":
  36. transform("data/germeval2018.training.txt","fasttext.train")
  37. transform("data/germeval2018.test.txt","fasttext.test")
  38.  
  39. # train the model
  40. model = train()
  41. test(model)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement