Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- func main() {
- data, err := ioutil.ReadFile("reddit_product.jsonl")
- if err != nil {
- panic(err)
- }
- train, test := Split(ReadProdigy(data))
- // Here, we're training a new model named PRODUCT with the training portion
- // of our annotated data.
- //
- // Depending on your hardware, this should take around 1 - 3 minutes.
- model := prose.ModelFromData("PRODUCT", prose.UsingEntities(train))
- // Now, Let's test our model:
- correct := 0.0
- for _, entry := range test {
- // Create a document without segmentation, which isn't required for NER.
- doc, err := prose.NewDocument(
- entry.Text,
- prose.WithSegmentation(false),
- prose.UsingModel(model))
- if err != nil {
- panic(err)
- }
- ents := doc.Entities()
- if entry.Answer != "accept" && len(ents) == 0 {
- // If we rejected this entity during annotation, prose shouldn't
- // have labeled it.
- correct++
- } else {
- // Otherwise, we need to verify that we found the correct entities.
- expected := []string{}
- for _, span := range entry.Spans {
- expected = append(expected, entry.Text[span.Start:span.End])
- }
- if reflect.DeepEqual(expected, ents) {
- correct++
- }
- }
- }
- fmt.Printf("Correct (%%): %f\n", correct / float64(len(test)))
- }
Add Comment
Please, Sign In to add comment