Advertisement
Guest User

Untitled

a guest
Dec 11th, 2019
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.55 KB | None | 0 0
  1. package authorReco;
  2.  
  3.  
  4. import java.util.*;
  5.  
  6. import authorEval.*;
  7. import langModel.*;
  8.  
  9.  
  10. /**
  11. * Class AuthorRecognizer1: a first author recognition system that recognizes
  12. * the author of a sentence by using the language models read from a configuration system.
  13. * (no unknown author can be detected)
  14. *
  15. * @author N. Hernandez and S. Quiniou (2017)
  16. *
  17. */
  18. public class AuthorRecognizer1 extends AuthorRecognizerAbstractClass {
  19. /**
  20. * Map of LanguageModels associated with each author (each author can be
  21. * associated with several language models).
  22. * The keys to the first map are the names of the authors (e.g., "zola"), the keys
  23. * to the second map are the names associated with each file containing a language model
  24. * (e.g., "zola-bigrams"), and the values of the second map are the LanguageModel objects
  25. * built from the file path given in the AuthorConfigurationFile attribute.
  26. */
  27. protected Map<String, Map<String, LanguageModelInterface>> authorLangModelsMap;
  28.  
  29.  
  30.  
  31. /**
  32. * Constructor.
  33. *
  34. * @param configFile the file path of the configuration file containing the information
  35. * on the language models (author, name and file path).
  36. * @param vocabFile the file path of the file containing the common vocabulary
  37. * for all the language models used in the recognition system.
  38. * @param authorFile the file path of the file containing
  39. * the names of the authors recognized by the system.
  40. */
  41. public AuthorRecognizer1(String configFile, String vocabFile, String authorFile) {
  42. //TODO
  43. // On charge les fichiers nécesaires à l'éxecution du programme
  44. this.authorLangModelsMap = new HashMap<String, Map<String, LanguageModelInterface>>();
  45. this.loadAuthorFile(authorFile);
  46. this.loadVocabularyFile(vocabFile);
  47. this.loadAuthorConfigurationFile(configFile);
  48.  
  49. // On récupère les auteurs
  50. Set<String> authors = this.configLangModels.getAuthors();
  51.  
  52. for(String author : authors) {
  53. System.out.println("\n< "+author+" >");
  54.  
  55. NgramCounts ngramcounts = new NgramCounts();
  56.  
  57. String[] path = new String[1];
  58. this.configLangModels.getNgramCountPath(author).toArray(path);
  59. String[] name = new String[1];
  60. this.configLangModels.getNgramCountNames(author).toArray(name);
  61.  
  62. //Pour chaque ngram de chaque auteur
  63. for(int i = 0; i < name.length; i++) {
  64. System.out.println("n-gram path : "+path[i]);
  65.  
  66. HashMap<String, LanguageModelInterface> modLangageNgram = new HashMap<String, LanguageModelInterface>();
  67. LanguageModelInterface modLangage = new LaplaceLanguageModel();
  68.  
  69. ngramcounts.readNgramCountsFile(path[i]);
  70. modLangage.setNgramCounts(ngramcounts, this.vocabularyLM);
  71. modLangageNgram.put(name[i],modLangage);
  72. authorLangModelsMap.put(author, modLangageNgram);
  73. }
  74. }
  75. }
  76.  
  77.  
  78.  
  79. /**
  80. * Method recognizing and returning the author of the given sentence
  81. * (the unknown author can also be picked up).
  82. *
  83. * @param sentence the sentence whose author is to recognize.
  84. * @return the author of the sentence as recognized by the recognition system.
  85. */
  86. public String recognizeAuthorSentence(String sentence) {
  87. Double proba = 0.0;
  88. String recognizeAuthor = "";
  89.  
  90. for(Map.Entry<String, Map<String, LanguageModelInterface>> authorLangModel : authorLangModelsMap.entrySet()) {
  91. Map<String, LanguageModelInterface> ngramNamelangModel = authorLangModel.getValue();
  92. for(Map.Entry<String, LanguageModelInterface> modeleLangage : ngramNamelangModel.entrySet()) {
  93. if(modeleLangage.getValue().getSentenceProb(sentence) > proba) {
  94. proba = modeleLangage.getValue().getSentenceProb(sentence);
  95. recognizeAuthor = authorLangModel.getKey();
  96. }
  97. }
  98. }
  99. System.out.println(recognizeAuthor+" : \t"+proba);
  100. return recognizeAuthor;
  101. }
  102.  
  103.  
  104.  
  105. /**
  106. * Main method.
  107. *
  108. * @param args arguments of the main method.
  109. */
  110. public static void main(String[] args) {
  111. //initialization of the recognition system
  112. AuthorRecognizer1 reco = new AuthorRecognizer1("lm/small_author_corpus/fichConfig_bigram_1000sentences.txt","lm/small_author_corpus/corpus_20000.vocab","data/small_author_corpus/validation/authors.txt");
  113.  
  114. //computation of the hypothesis author file
  115. reco.recognizeFileLanguage("data/small_author_corpus/validation/sentences_100sentences.txt", "data/small_author_corpus/validation/authors_hyp1.txt");
  116.  
  117. //computation of the performance of the recognition system
  118. System.out.println(RecognizerPerformance.evaluate("data/small_author_corpus/validation/authors_100sentences_ref.txt", "data/small_author_corpus/validation/authors_hyp1.txt"));
  119.  
  120. }
  121. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement