Untitled

func main() {
    data, err := ioutil.ReadFile("reddit_product.jsonl")
    if err != nil {
        panic(err)
    }
    train, test := Split(ReadProdigy(data))

    // Here, we're training a new model named PRODUCT with the training portion
    // of our annotated data.
    //
    // Depending on your hardware, this should take around 1 - 3 minutes.
    model := prose.ModelFromData("PRODUCT", prose.UsingEntities(train))

    // Now, Let's test our model:
    correct := 0.0
    for _, entry := range test {
        // Create a document without segmentation, which isn't required for NER.
        doc, err := prose.NewDocument(
            entry.Text,
            prose.WithSegmentation(false),
            prose.UsingModel(model))

        if err != nil {
            panic(err)
        }
        ents := doc.Entities()

        if entry.Answer != "accept" && len(ents) == 0 {
            // If we rejected this entity during annotation, prose shouldn't
            // have labeled it.
            correct++
        } else {
            // Otherwise, we need to verify that we found the correct entities.
            expected := []string{}
            for _, span := range entry.Spans {
                expected = append(expected, entry.Text[span.Start:span.End])
            }
            if reflect.DeepEqual(expected, ents) {
                correct++
            }
        }
    }
    fmt.Printf("Correct (%%): %f\n", correct / float64(len(test)))
}