Untitled

using edu.stanford.nlp.pipeline;
using java.util;
using Services.NLPService.Configuration;
using Microsoft.ApplicationInsights;
using Microsoft.Extensions.Options;
using System;
using System.Collections.Concurrent;
using System.IO;
using Microsoft.Extensions.Logging;

namespace XApp.Services.NLPService.StanfordNLP
{
    public class AnnotationFactory : IDisposable
    {
        private readonly NLPParserConfig _nlpParserConfig;
        private ConcurrentDictionary<string, Lazy<StanfordCoreNLP>> _stanfordCoreNLPConInstances;
        private readonly TelemetryClient _telemetryClient;
        private readonly ILogger _logger;


        public AnnotationFactory(
            IOptions<NLPParserConfig> nlpParserConfig,
            TelemetryClient telemetryClient,
            ILogger<AnnotationFactory> logger)
        {
            _logger = logger;
            _logger.LogWarning($"* StanfordNLP Annotation Factory initializing started at {DateTime.Now.ToLongTimeString()}");
            System.Console.BackgroundColor = ConsoleColor.Red;

            _nlpParserConfig = nlpParserConfig.Value;
            _stanfordCoreNLPConInstances = new ConcurrentDictionary<string, Lazy<StanfordCoreNLP>>();
            _telemetryClient = telemetryClient;

            //initialize basic instance
            GetAnnotator();
        }

        public const string AnnotatorPosKey = "pos";
        public const string AnnotatorLemmaKey = "lemma";
        public const string AnnotatorRegexnerKey = "regexner";
        //public const string AnnotatorParseKey = "parse";
        public const string AnnotatorParseKey = "depparse";


        /// <summary>
        /// Return most basic annotators (tokenize, cleanxml, ssplit) if params is null.
        /// </summary>
        /// <param name="annotatorKeywordParams"></param>
        /// <returns></returns>
        public StanfordCoreNLP GetAnnotator(params string[] annotatorKeywordParams)
        {
            string basicAnnotatorKeywords = "tokenize, cleanxml, ssplit, ";

            string annotatorKeywords = basicAnnotatorKeywords + string.Join(", ", annotatorKeywordParams);

            StanfordCoreNLP stanfordCoreNLP = _stanfordCoreNLPConInstances.GetOrAdd(annotatorKeywords, key => new Lazy<StanfordCoreNLP>(() =>
            {
                //Create new instance, add to instance list and return;
                var stopwatch = System.Diagnostics.Stopwatch.StartNew();
                _logger.LogWarning($"* NLP Model loading started at {DateTime.Now.ToLongTimeString()}");

                string modelDataDir = Path.GetFullPath(Path.Combine(Environment.CurrentDirectory, @"wwwroot\stanfordnlp-models"));
                var currentDir = Environment.CurrentDirectory;
                Directory.SetCurrentDirectory(modelDataDir);


                Properties props = GetAnnotatorSettings();
                props.setProperty("annotators", annotatorKeywords);

                stanfordCoreNLP = new StanfordCoreNLP(props);
                //System.Console.WriteLine($"Annotation timings: {_pipeline.timingInformation()}");
                Directory.SetCurrentDirectory(currentDir);

                stopwatch.Stop();
                _telemetryClient.TrackMetric($"* StanfordNLP models loaded with: {annotatorKeywords}", stopwatch.Elapsed.Seconds);
                _logger.LogWarning($"* NLP Models loaded with \"{annotatorKeywords}\" ! - {stopwatch.Elapsed.Seconds} seconds. Ended on: {DateTime.Now.ToLongTimeString()}");

                return stanfordCoreNLP;
            })).Value;

            return stanfordCoreNLP;
        }


        private Properties GetAnnotatorSettings()
        {
            //TODO:Refactor > Cleaner way to set properties
            //Properties propsX = edu.stanford.nlp.util.PropertiesUtils.asProperties(
            //    //"annotators", "tokenize,ssplit,pos,depparse",
            //    "depparse.model", edu.stanford.nlp.parser.nndep.DependencyParser.DEFAULT_MODEL
            //);

            var props = new Properties();

            //---- ssplit
            //Split sentences at and only at newlines.
            //StanfordCoreNLP will treat the input as already tokenized and one sentence per line,
            //only seperating words on whitespace.
            bool eolOnly = _nlpParserConfig.SentenceSplitEndOfLineOnly.HasValue && _nlpParserConfig.SentenceSplitEndOfLineOnly.Value;
            props.setProperty("ssplit.eolonly", eolOnly.ToString().ToLower()); //"true"
            //props.setProperty("ssplit.newLineIsSentenceBreak", "always");

            //---- pos annotator settings
            //props.setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/wsj-0-18-caseless-left3words-distsim.tagger");
            //props.setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/english-bidirectional/english-bidirectional-distsim.tagger");
            //default:
            //props.setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");

            //---- depparse
            //depparse.model >  edu/stanford/nlp/models/parser/nndep/english_UD.gz

            //---- parse annotator settings,
            //Whether to print verbose messages while parsing.
            //props.setProperty("parse.debug", "true"); //>default false

            //!!Number of threads to use for parsing.
            //props.setProperty("parse.nthreads", "2"); //> default=1

            //default model
            //props.setProperty("parse.model", "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
            //There is a much faster and more memory efficient parser available in the shift reduce parser.
            //It takes quite a while to load, and the download is much larger, which is the main reason it is not the default.
            //props.setProperty("parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz");
            //If set to a positive number, the annotator parses only sentences of length at most this number (in terms of number of tokens).
            //props.setProperty("parse.maxlen", "25");
            //at the output
            //props.setProperty("parse.keepPunct", "false"); //>punct(think-2, .-9)

            //Generate dependency representations of the sentence, stored under the three Dependencies annotations mentioned in the introduction.
            //parse.buildgraphs > true


            //---- ner annotator settings
            props.setProperty("ner.useSUTime", "false");
            props.setProperty("ner.applyNumericClassifiers", "false");

            //---- regexner annotator settings
            //!!!! en-phrase-noun.tx dan dolayi new StanfordCoreNLP() yavas (3+ dk.)
            props.setProperty("regexner.mapping", "custom-ner/en-phrase-noun.txt;custom-ner/en-phrase-verb.txt;custom-ner/en-phrase-adjective.txt;custom-ner/en-phrase-adverb.txt");
            //props.setProperty("regexner.mapping", "custom-ner/en-phrase-verb.txt;custom-ner/en-phrase-adjective.txt;custom-ner/en-phrase-adverb.txt");
            //props.setProperty("regexner.mapping", "custom-ner/en-phrase-adverb.txt");

            //regexner ile birlike standar NER tanimlarini da yüklemeye calisiyor. /yukarda sadece regexner yuklendi
            // _pipeline.addAnnotator(new TokensRegexNERAnnotator("custom-ner/en-phrase-noun.txt;custom-ner/en-phrase-verb.txt;custom-ner/en-phrase-adjective.txt;custom-ner/en-phrase-adverb.txt", true));
            //props.setProperty("regexner.posmatchtype", "MATCH_ALL_TOKENS");
            props.setProperty("regexner.ignorecase", "true");
            props.setProperty("regexner.verbose", "true");
            //Custom NER dosyalarında her satira normalized eklendi
            //örn: assert (hers|herself|him|himself|hiss...)	VerbPhrase  assert_ones
            //wordnet mi wiki mi oldugunu anlamak icin assert_ones:wiki
            props.setProperty("regexner.mapping.header", "pattern,ner,normalized");
            //props.setProperty("regexner.mapping.header", "pattern,ner,normalized, overwrite, priority,group");
            props.setProperty("regexner.mapping.field.normalized", "edu.stanford.nlp.ling.CoreAnnotations$NormalizedNamedEntityTagAnnotation");

            return props;
        }

        public void Dispose()
        {
            foreach (var item in _stanfordCoreNLPConInstances)
            {
                //item.Value = null;
                //item.Value.unmount();
            }
            _stanfordCoreNLPConInstances.Clear();

            _telemetryClient.TrackMetric("*** StanfordParser3-Dispose", 0);
            _logger.LogWarning($"*** StanfordCoreParser disposed at {DateTime.UtcNow}.");
        }
    }
}