SHARE
TWEET

Untitled

a guest Aug 14th, 2019 54 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import fasttext
  2.  
  3. def load_data(path):
  4.     file = open(path, "r",encoding="utf-8")
  5.     data = file.readlines()
  6.     return [line.split("\t") for line in data]    
  7.  
  8. def save_data(path,data):
  9.     with open(path, 'w',encoding="utf-8") as f:        
  10.         f.write("\n".join(data))
  11.  
  12. def train():
  13.     traning_parameters = {'input': 'fasttext.train', 'epoch': 60, 'lr': 0.01, 'wordNgrams': 1, 'verbose': 2, 'minCount': 1, 'loss': "ns",
  14.                         'lrUpdateRate': 100, 'thread': 1, 'ws':5, 'dim': 100}  
  15.     model = fasttext.train_supervised(**traning_parameters)
  16.     model.save_model("model.bin")            
  17.     return model
  18.  
  19. def test(model):
  20.     f1_score = lambda precision, recall: 2 * ((precision * recall) / (precision + recall))
  21.     nexamples, recall, precision = model.test('fasttext.test')    
  22.     print (f'recall: {recall}' )
  23.     print (f'precision: {precision}')
  24.     print (f'f1 score: {f1_score(precision,recall)}')
  25.     print (f'number of examples: {nexamples}')
  26.  
  27. def transform(input_file, output_file):
  28.     # load data
  29.     data = load_data(input_file)
  30.     # transform it into fasttext format __label__other have a nice day
  31.     data = [f"__label__{line[1]}\t{line[0]}" for line in data]
  32.     # and save the data
  33.     save_data(output_file,data)
  34.  
  35. if __name__ == "__main__":
  36.     transform("data/germeval2018.training.txt","fasttext.train")
  37.     transform("data/germeval2018.test.txt","fasttext.test")
  38.  
  39.     # train the model
  40.     model = train()
  41.     test(model)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top