Guest User

Untitled

a guest
Dec 11th, 2018
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.26 KB | None | 0 0
  1. from sklearn_crfsuite import scorers,metrics
  2. from sklearn.metrics import make_scorer
  3. from sklearn.model_selection import cross_validate,train_test_split
  4. import sklearn_crfsuite
  5. def doc2features(doc, i):
  6. word = doc[i][0]
  7. postag = doc[i][1]
  8. # Features from current word
  9. features={
  10. 'word.word': word,
  11. 'word.isspace':word.isspace(),
  12. 'postag':postag,
  13. 'word.isdigit()': word.isdigit()
  14. }
  15. if i > 0:
  16. prevword = doc[i-1][0]
  17. postag1 = doc[i-1][1]
  18. features['word.prevword'] = prevword
  19. features['word.previsspace']=prevword.isspace()
  20. features['word.prepostag'] = postag1
  21. features['word.prevwordisdigit'] = prevword.isdigit()
  22. else:
  23. features['BOS'] = True # Special "Beginning of Sequence" tag
  24. # Features from next word
  25. if i < len(doc)-1:
  26. nextword = doc[i+1][0]
  27. postag1 = doc[i+1][1]
  28. features['word.nextword'] = nextword
  29. features['word.nextisspace']=nextword.isspace()
  30. features['word.nextpostag'] = postag1
  31. features['word.nextwordisdigit'] = nextword.isdigit()
  32. else:
  33. features['EOS'] = True # Special "End of Sequence" tag
  34. return features
  35.  
  36. def extract_features(doc):
  37. return [doc2features(doc, i) for i in range(len(doc))]
  38.  
  39. def get_labels(doc):
  40. return [tag for (token,postag,tag) in doc]
  41.  
  42.  
  43. X_data = [extract_features(doc) for doc in datatofile] # เอา คำ แยกออกมา
  44. y_data = [get_labels(doc) for doc in datatofile] # เอา tag แยกออกมา
  45.  
  46. X, X_test, y, y_test = train_test_split(X_data, y_data, test_size=0.1) # แบ่ง 0.1 หรือ 10%
  47. crf = sklearn_crfsuite.CRF(
  48. algorithm='lbfgs',
  49. c1=0.1,
  50. c2=0.1,
  51. max_iterations=500,
  52. all_possible_transitions=True,
  53. model_filename=file_name+"-pos.model0" # ตั้งชื่อโมเดล
  54. )
  55. crf.fit(X, y); # train
  56.  
  57. labels = list(crf.classes_)
  58. labels.remove('O')
  59. y_pred = crf.predict(X_test)
  60. e=metrics.flat_f1_score(y_test, y_pred,
  61. average='weighted', labels=labels)
  62. print(e) # โชว์ประสิทธิภาพ
  63. sorted_labels = sorted(
  64. labels,
  65. key=lambda name: (name[1:], name[0])
  66. )
  67. print(metrics.flat_classification_report(
  68. y_test, y_pred, labels=sorted_labels, digits=3
  69. ))
Add Comment
Please, Sign In to add comment