Guest User

Untitled

a guest
Jul 15th, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.42 KB | None | 0 0
  1. func main() {
  2. data, err := ioutil.ReadFile("reddit_product.jsonl")
  3. if err != nil {
  4. panic(err)
  5. }
  6. train, test := Split(ReadProdigy(data))
  7.  
  8. // Here, we're training a new model named PRODUCT with the training portion
  9. // of our annotated data.
  10. //
  11. // Depending on your hardware, this should take around 1 - 3 minutes.
  12. model := prose.ModelFromData("PRODUCT", prose.UsingEntities(train))
  13.  
  14. // Now, Let's test our model:
  15. correct := 0.0
  16. for _, entry := range test {
  17. // Create a document without segmentation, which isn't required for NER.
  18. doc, err := prose.NewDocument(
  19. entry.Text,
  20. prose.WithSegmentation(false),
  21. prose.UsingModel(model))
  22.  
  23. if err != nil {
  24. panic(err)
  25. }
  26. ents := doc.Entities()
  27.  
  28. if entry.Answer != "accept" && len(ents) == 0 {
  29. // If we rejected this entity during annotation, prose shouldn't
  30. // have labeled it.
  31. correct++
  32. } else {
  33. // Otherwise, we need to verify that we found the correct entities.
  34. expected := []string{}
  35. for _, span := range entry.Spans {
  36. expected = append(expected, entry.Text[span.Start:span.End])
  37. }
  38. if reflect.DeepEqual(expected, ents) {
  39. correct++
  40. }
  41. }
  42. }
  43. fmt.Printf("Correct (%%): %f\n", correct / float64(len(test)))
  44. }
Add Comment
Please, Sign In to add comment