Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package authorReco;
- import java.util.*;
- import authorEval.*;
- import langModel.*;
- /**
- * Class AuthorRecognizer1: a first author recognition system that recognizes
- * the author of a sentence by using the language models read from a configuration system.
- * (no unknown author can be detected)
- *
- * @author N. Hernandez and S. Quiniou (2017)
- *
- */
- public class AuthorRecognizer1 extends AuthorRecognizerAbstractClass {
- /**
- * Map of LanguageModels associated with each author (each author can be
- * associated with several language models).
- * The keys to the first map are the names of the authors (e.g., "zola"), the keys
- * to the second map are the names associated with each file containing a language model
- * (e.g., "zola-bigrams"), and the values of the second map are the LanguageModel objects
- * built from the file path given in the AuthorConfigurationFile attribute.
- */
- protected Map<String, Map<String, LanguageModelInterface>> authorLangModelsMap;
- /**
- * Constructor.
- *
- * @param configFile the file path of the configuration file containing the information
- * on the language models (author, name and file path).
- * @param vocabFile the file path of the file containing the common vocabulary
- * for all the language models used in the recognition system.
- * @param authorFile the file path of the file containing
- * the names of the authors recognized by the system.
- */
- public AuthorRecognizer1(String configFile, String vocabFile, String authorFile) {
- //TODO
- // On charge les fichiers nécesaires à l'éxecution du programme
- this.authorLangModelsMap = new HashMap<String, Map<String, LanguageModelInterface>>();
- this.loadAuthorFile(authorFile);
- this.loadVocabularyFile(vocabFile);
- this.loadAuthorConfigurationFile(configFile);
- // On récupère les auteurs
- Set<String> authors = this.configLangModels.getAuthors();
- for(String author : authors) {
- System.out.println("\n< "+author+" >");
- NgramCounts ngramcounts = new NgramCounts();
- String[] path = new String[1];
- this.configLangModels.getNgramCountPath(author).toArray(path);
- String[] name = new String[1];
- this.configLangModels.getNgramCountNames(author).toArray(name);
- //Pour chaque ngram de chaque auteur
- for(int i = 0; i < name.length; i++) {
- System.out.println("n-gram path : "+path[i]);
- HashMap<String, LanguageModelInterface> modLangageNgram = new HashMap<String, LanguageModelInterface>();
- LanguageModelInterface modLangage = new LaplaceLanguageModel();
- ngramcounts.readNgramCountsFile(path[i]);
- modLangage.setNgramCounts(ngramcounts, this.vocabularyLM);
- modLangageNgram.put(name[i],modLangage);
- authorLangModelsMap.put(author, modLangageNgram);
- }
- }
- }
- /**
- * Method recognizing and returning the author of the given sentence
- * (the unknown author can also be picked up).
- *
- * @param sentence the sentence whose author is to recognize.
- * @return the author of the sentence as recognized by the recognition system.
- */
- public String recognizeAuthorSentence(String sentence) {
- Double proba = 0.0;
- String recognizeAuthor = "";
- for(Map.Entry<String, Map<String, LanguageModelInterface>> authorLangModel : authorLangModelsMap.entrySet()) {
- Map<String, LanguageModelInterface> ngramNamelangModel = authorLangModel.getValue();
- for(Map.Entry<String, LanguageModelInterface> modeleLangage : ngramNamelangModel.entrySet()) {
- if(modeleLangage.getValue().getSentenceProb(sentence) > proba) {
- proba = modeleLangage.getValue().getSentenceProb(sentence);
- recognizeAuthor = authorLangModel.getKey();
- }
- }
- }
- System.out.println(recognizeAuthor+" : \t"+proba);
- return recognizeAuthor;
- }
- /**
- * Main method.
- *
- * @param args arguments of the main method.
- */
- public static void main(String[] args) {
- //initialization of the recognition system
- AuthorRecognizer1 reco = new AuthorRecognizer1("lm/small_author_corpus/fichConfig_bigram_1000sentences.txt","lm/small_author_corpus/corpus_20000.vocab","data/small_author_corpus/validation/authors.txt");
- //computation of the hypothesis author file
- reco.recognizeFileLanguage("data/small_author_corpus/validation/sentences_100sentences.txt", "data/small_author_corpus/validation/authors_hyp1.txt");
- //computation of the performance of the recognition system
- System.out.println(RecognizerPerformance.evaluate("data/small_author_corpus/validation/authors_100sentences_ref.txt", "data/small_author_corpus/validation/authors_hyp1.txt"));
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement