Advertisement
alvations

configuration.class

Nov 26th, 2014
406
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 39.00 KB | None | 0 0
  1.  
  2. package it.uniroma1.lcl.jlt;
  3. import com.google.common.collect.Multimap;
  4. import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsDB;
  5. import it.uniroma1.lcl.jlt.util.Collections;
  6. import it.uniroma1.lcl.jlt.util.Language;
  7. import it.uniroma1.lcl.jlt.util.Pair;
  8. import it.uniroma1.lcl.jlt.util.Strings;
  9. import it.uniroma1.lcl.jlt.web.google.GoogleSearchMethod;
  10. import it.uniroma1.lcl.jlt.wiki.data.WikiVersions;
  11. import it.uniroma1.lcl.jlt.wordnet.WordNetVersion;
  12. import it.uniroma1.lcl.jlt.wordnet.data.WordNetWeightType;
  13. import java.io.File;
  14. import java.util.ArrayList;
  15. import java.util.Collection;
  16. import java.util.HashMap;
  17. import java.util.HashSet;
  18. import java.util.Iterator;
  19. import java.util.List;
  20. import java.util.Map;
  21. import java.util.Set;
  22. import org.apache.commons.configuration.ConfigurationException;
  23. import org.apache.commons.configuration.PropertiesConfiguration;
  24. import org.apache.commons.logging.Log;
  25. import org.apache.commons.logging.LogFactory;
  26.  
  27. public class Configuration
  28. {
  29. private PropertiesConfiguration config = null;
  30. private static final Log log = LogFactory.getLog(Configuration.class);
  31. private static Configuration instance = null;
  32. private static String CONFIG_DIR = "config/";
  33. public static String CONFIG_FILE = "jlt.properties";
  34. private Multimap<Language, String> categoryMap = null;
  35. private Multimap<Language, String> listMap = null;
  36. private Multimap<Language, String> glossaryMap = null;
  37. private Multimap<Language, String> redirectionMap = null;
  38. private Multimap<Language, String> infoboxImageFieldMap = null;
  39. private Multimap<Language, String> infoboxLabelMap = null;
  40. private Multimap<Language, String> disambiguationMap = null;
  41. private Multimap<Language, String> shortDisambiguationMap = null;
  42. private Multimap<Language, String> mosesIniMap;
  43.  
  44. private Configuration()
  45. {
  46. File configFile = new File(CONFIG_DIR, CONFIG_FILE);
  47.  
  48. boolean bDone = false;
  49. if (configFile.exists())
  50. {
  51. log.info("Loading " + CONFIG_FILE + " FROM " + configFile.getAbsolutePath());
  52. try
  53. {
  54. this.config = new PropertiesConfiguration(configFile);
  55. bDone = true;
  56. }
  57. catch (ConfigurationException ce)
  58. {
  59. ce.printStackTrace();
  60. }
  61. }
  62. if (!bDone)
  63. {
  64. log.info("JLT starts with empty configuration");
  65. this.config = new PropertiesConfiguration();
  66. }
  67. }
  68.  
  69. public static Configuration getInstance()
  70. {
  71. if (instance == null) {
  72. instance = new Configuration();
  73. }
  74. return instance;
  75. }
  76.  
  77. public void setConfigurationSubDirectory(String configurationSubdir)
  78. {
  79. setConfigurationFile(
  80. new File(CONFIG_DIR + File.separator + configurationSubdir,
  81. CONFIG_FILE));
  82. }
  83.  
  84. public void setConfigurationFile(File configurationFile)
  85. {
  86. log.info("Changing configuration properties to " + configurationFile);
  87. try
  88. {
  89. this.config = new PropertiesConfiguration(configurationFile);
  90. this.config.setBasePath(
  91. configurationFile.getParentFile().getAbsolutePath());
  92. }
  93. catch (ConfigurationException ce)
  94. {
  95. ce.printStackTrace();
  96. log.info("Setting JLT to an empty configuration");
  97. this.config = new PropertiesConfiguration();
  98. }
  99. }
  100.  
  101. public String getWordNetData(WordNetVersion wnv)
  102. {
  103. switch (wnv)
  104. {
  105. case WN_16:
  106. return this.config.getString("wordnet.wordnetData1.6");
  107. case WN_171:
  108. return this.config.getString("wordnet.wordnetData1.7.1");
  109. case WN_20:
  110. return this.config.getString("wordnet.wordnetData2.0");
  111. case WN_21:
  112. return this.config.getString("wordnet.wordnetData2.1");
  113. case WN_30:
  114. return this.config.getString("wordnet.wordnetData3.0");
  115. }
  116. throw new RuntimeException("Invalid WordNet version: " + wnv);
  117. }
  118.  
  119. public WordNetWeightType getWordNetWeightType()
  120. {
  121. return WordNetWeightType.valueOf(this.config.getString("wordnet.weightType"));
  122. }
  123.  
  124. public String getWordNetWeightFile(WordNetWeightType wType)
  125. {
  126. switch (wType)
  127. {
  128. case FREQUENCY:
  129. case GLOSS_OVERLAP:
  130. return this.config.getString("wordnet.weightPrefix") + "_" + wType.name().toLowerCase() + ".txt";
  131. }
  132. throw new RuntimeException("Invalid WordNet weight type: " + wType);
  133. }
  134.  
  135. public int getMaxFilesPerDir()
  136. {
  137. return this.config.getInt("wiki.dump.maxFilesPerDir");
  138. }
  139.  
  140. public int getDumpStartDir()
  141. {
  142. return this.config.getInt("wiki.dump.startDir");
  143. }
  144.  
  145. public int getDumpEndDir()
  146. {
  147. return this.config.getInt("wiki.dump.endDir");
  148. }
  149.  
  150. public String getDocDir()
  151. {
  152. return this.config.getString("wiki.dump.docDir");
  153. }
  154.  
  155. public String getWikipediaVersionFile()
  156. {
  157. return this.config.getString("wiki.dump.versionFile");
  158. }
  159.  
  160. public String getWikipediaXMLDump()
  161. {
  162. return getWikipediaXMLDump(getIndexFactoryLanguage());
  163. }
  164.  
  165. public String getWikipediaXMLDump(Language language)
  166. {
  167. WikiVersions wikiVersions = WikiVersions.getInstance();
  168. String version = wikiVersions.getVersion(language);
  169.  
  170. String base = this.config.getString("wiki.dump.dir");
  171. if (base == null) {
  172. throw new RuntimeException("Parameter 'wiki.dump.dir' not specified in the config file");
  173. }
  174. String wikiName = language.toString().toLowerCase() + "wiki";
  175. String xml = base + File.separator +
  176. wikiName + File.separator +
  177. wikiName + "-" + version + "-pages-articles.xml";
  178. return xml;
  179. }
  180.  
  181. public String getMosesIniFile(Language lang)
  182. {
  183. if (this.mosesIniMap == null)
  184. {
  185. List<String> paramlist = new ArrayList();
  186. for (Object elem : this.config.getList("moses.iniFiles")) {
  187. paramlist.add(elem.toString());
  188. }
  189. String parameters = Strings.join(paramlist, ",");
  190. this.mosesIniMap = Strings.parseLanguageParameterValues(parameters);
  191. if (this.mosesIniMap == null) {
  192. throw new RuntimeException("Invalid language specified in field moses.iniFiles");
  193. }
  194. }
  195. return (String)Collections.getFirst(this.mosesIniMap.get(lang));
  196. }
  197.  
  198. public String getMosesBin()
  199. {
  200. return this.config.getString("moses.bin");
  201. }
  202.  
  203. public String getRedirectionLabel()
  204. {
  205. return getRedirectionLabel(getIndexFactoryLanguage());
  206. }
  207.  
  208. public String getRedirectionLabel(Language language)
  209. {
  210. if (this.redirectionMap == null)
  211. {
  212. List<String> paramlist = new ArrayList();
  213. for (Object elem : this.config.getList("wiki.dump.redirectionLabel")) {
  214. paramlist.add(elem.toString());
  215. }
  216. String parameters = Strings.join(paramlist, ",");
  217. this.redirectionMap = Strings.parseLanguageParameterValues(parameters);
  218. if (this.redirectionMap == null) {
  219. throw new RuntimeException("Invalid language specified in field wiki.dump.redirectionLabel");
  220. }
  221. }
  222. String redirectionLabel = (String)Collections.getFirst(this.redirectionMap.get(language));
  223. return redirectionLabel;
  224. }
  225.  
  226. public Set<String> getListPrefix()
  227. {
  228. return getListPrefix(getIndexFactoryLanguage());
  229. }
  230.  
  231. public Set<String> getListPrefix(Language language)
  232. {
  233. if (this.listMap == null)
  234. {
  235. List<String> paramlist = new ArrayList();
  236. for (Object elem : this.config.getList("wiki.dump.listPrefix")) {
  237. paramlist.add(elem.toString());
  238. }
  239. String parameters = Strings.join(paramlist, ",");
  240. this.listMap = Strings.parseLanguageParameterValues(parameters);
  241. if (this.listMap == null) {
  242. throw new RuntimeException("Invalid language specified in field wiki.dump.listPrefix");
  243. }
  244. }
  245. Set<String> prefixes = new HashSet();
  246. prefixes.addAll(this.listMap.get(language));
  247.  
  248. return prefixes;
  249. }
  250.  
  251. public Set<String> getGlossaryPrefix()
  252. {
  253. return getGlossaryPrefix(getIndexFactoryLanguage());
  254. }
  255.  
  256. public Set<String> getGlossaryPrefix(Language language)
  257. {
  258. if (this.glossaryMap == null)
  259. {
  260. List<String> paramlist = new ArrayList();
  261. for (Object elem : this.config.getList("wiki.dump.glossaryPrefix")) {
  262. paramlist.add(elem.toString());
  263. }
  264. String parameters = Strings.join(paramlist, ",");
  265. this.glossaryMap = Strings.parseLanguageParameterValues(parameters);
  266. if (this.glossaryMap == null) {
  267. throw new RuntimeException("Invalid language specified in field wiki.dump.glossaryPrefix");
  268. }
  269. }
  270. Set<String> prefixes = new HashSet();
  271. prefixes.addAll(this.glossaryMap.get(language));
  272.  
  273. return prefixes;
  274. }
  275.  
  276. public String getCategoryPrefix()
  277. {
  278. return getCategoryPrefix(getIndexFactoryLanguage());
  279. }
  280.  
  281. public String getCategoryPrefix(Language language)
  282. {
  283. if (this.categoryMap == null)
  284. {
  285. List<String> paramlist = new ArrayList();
  286. for (Object elem : this.config.getList("wiki.dump.categoryPrefix")) {
  287. paramlist.add(elem.toString());
  288. }
  289. String parameters = Strings.join(paramlist, ",");
  290. this.categoryMap = Strings.parseLanguageParameterValues(parameters);
  291. if (this.categoryMap == null) {
  292. throw new RuntimeException("Invalid language specified in field wiki.dump.categoryPrefix");
  293. }
  294. }
  295. String categoryPrefix = (String)Collections.getFirst(this.categoryMap.get(language));
  296. if (!categoryPrefix.endsWith(":")) {
  297. categoryPrefix = categoryPrefix + ":";
  298. }
  299. return categoryPrefix;
  300. }
  301.  
  302. public Map<Language, String> getCategoryPrefixes()
  303. {
  304. if (this.categoryMap == null)
  305. {
  306. List<String> paramlist = new ArrayList();
  307. for (Object elem : this.config.getList("wiki.dump.categoryPrefix")) {
  308. paramlist.add(elem.toString());
  309. }
  310. String parameters = Strings.join(paramlist, ",");
  311. this.categoryMap = Strings.parseLanguageParameterValues(parameters);
  312. if (this.categoryMap == null) {
  313. throw new RuntimeException("Invalid language specified in field wiki.dump.categoryPrefix");
  314. }
  315. }
  316. Map<Language, String> categoryPrefixes = new HashMap();
  317. for (Language language : this.categoryMap.keySet())
  318. {
  319. String categoryPrefix = (String)Collections.getFirst(this.categoryMap.get(language));
  320. categoryPrefixes.put(language, categoryPrefix);
  321. }
  322. return categoryPrefixes;
  323. }
  324.  
  325. public Set<String> getInfoboxLabels()
  326. {
  327. return getInfoboxLabels(getIndexFactoryLanguage());
  328. }
  329.  
  330. public Set<String> getInfoboxLabels(Language language)
  331. {
  332. if (this.infoboxLabelMap == null)
  333. {
  334. List<String> paramlist = new ArrayList();
  335. for (Object elem : this.config.getList("wiki.dump.infoboxLabel")) {
  336. paramlist.add(elem.toString());
  337. }
  338. String parameters = Strings.join(paramlist, ",");
  339. this.infoboxLabelMap = Strings.parseLanguageParameterValues(parameters);
  340. if (this.infoboxLabelMap == null) {
  341. throw new RuntimeException("Invalid language specified in field wiki.dump.infoboxLabel");
  342. }
  343. }
  344. Set<String> labels = new HashSet();
  345. labels.addAll(this.infoboxLabelMap.get(language));
  346.  
  347. return labels;
  348. }
  349.  
  350. public Set<String> getInfoboxImageFields()
  351. {
  352. return getInfoboxImageFields(getIndexFactoryLanguage());
  353. }
  354.  
  355. public Set<String> getInfoboxImageFields(Language language)
  356. {
  357. if (this.infoboxImageFieldMap == null)
  358. {
  359. List<String> paramlist = new ArrayList();
  360. for (Object elem : this.config.getList("wiki.dump.infoboxImageField")) {
  361. paramlist.add(elem.toString());
  362. }
  363. String parameters = Strings.join(paramlist, ",");
  364. this.infoboxImageFieldMap = Strings.parseLanguageParameterValues(parameters);
  365. if (this.infoboxImageFieldMap == null) {
  366. throw new RuntimeException("Invalid language specified in field wiki.dump.infoboxImageField");
  367. }
  368. }
  369. Set<String> fields = new HashSet();
  370. fields.addAll(this.infoboxImageFieldMap.get(language));
  371.  
  372. return fields;
  373. }
  374.  
  375. public Set<String> getDisambiguationPrefixes()
  376. {
  377. return getDisambiguationPrefixes(getIndexFactoryLanguage());
  378. }
  379.  
  380. public Set<String> getDisambiguationPrefixes(Language language)
  381. {
  382. if (this.disambiguationMap == null)
  383. {
  384. List<String> paramlist = new ArrayList();
  385. for (Object elem : this.config.getList("wiki.dump.disambiguationPrefix")) {
  386. paramlist.add(elem.toString());
  387. }
  388. String parameters = Strings.join(paramlist, ",");
  389. this.disambiguationMap = Strings.parseLanguageParameterValues(parameters);
  390. if (this.disambiguationMap == null) {
  391. throw new RuntimeException("Invalid language specified in field wiki.dump.disambiguationPrefix");
  392. }
  393. }
  394. Set<String> prefixes = new HashSet();
  395. prefixes.addAll(this.disambiguationMap.get(language));
  396.  
  397. return prefixes;
  398. }
  399.  
  400. public Set<String> getShortDisambiguationPrefix()
  401. {
  402. return getShortDisambiguationPrefix(getIndexFactoryLanguage());
  403. }
  404.  
  405. public Set<String> getShortDisambiguationPrefix(Language language)
  406. {
  407. if (this.shortDisambiguationMap == null)
  408. {
  409. List<String> paramlist = new ArrayList();
  410. for (Object elem : this.config.getList("wiki.dump.shortDisambiguationPrefix")) {
  411. paramlist.add(elem.toString());
  412. }
  413. String parameters = Strings.join(paramlist, ",");
  414. this.shortDisambiguationMap = Strings.parseLanguageParameterValues(parameters);
  415. if (this.shortDisambiguationMap == null) {
  416. throw new RuntimeException("Invalid language specified in field wiki.dump.shortDisambiguationPrefix");
  417. }
  418. }
  419. Set<String> prefixes = new HashSet();
  420. prefixes.addAll(this.shortDisambiguationMap.get(language));
  421.  
  422. return prefixes;
  423. }
  424.  
  425. public String getHeiNERFile()
  426. {
  427. return this.config.getString("heiner.data");
  428. }
  429.  
  430. public String getMosesTempDir()
  431. {
  432. return this.config.getString("jlt.mosesTempDir");
  433. }
  434.  
  435. public String getTreeTaggerDirectory()
  436. {
  437. return this.config.getString("treetagger.home");
  438. }
  439.  
  440. public String getTreeTaggerModel()
  441. {
  442. return this.config.getString("treetagger.model");
  443. }
  444.  
  445. public int getIndexStartDir()
  446. {
  447. return this.config.getInt("wiki.index.startDir");
  448. }
  449.  
  450. public int getIndexEndDir()
  451. {
  452. return this.config.getInt("wiki.index.endDir");
  453. }
  454.  
  455. public Language getIndexFactoryLanguage()
  456. {
  457. return Language.valueOf(this.config.getString("wiki.index.indexFactoryLanguage"));
  458. }
  459.  
  460. public Language getCategoryIndexFactoryLanguage()
  461. {
  462. return Language.valueOf(this.config.getString("wiki.index.categoryIndexFactoryLanguage"));
  463. }
  464.  
  465. public String getIndexFactoryDir()
  466. {
  467. return getIndexDir(getIndexFactoryLanguage());
  468. }
  469.  
  470. public String getCategoryIndexFactoryDir()
  471. {
  472. return getCategoryIndexDir(getCategoryIndexFactoryLanguage());
  473. }
  474.  
  475. public String getIndexDir(Language language)
  476. {
  477. return this.config.getString("wiki.index.indexDir") + File.separator + language.toString().toLowerCase();
  478. }
  479.  
  480. public String getCategoryIndexDir(Language language)
  481. {
  482. return this.config.getString("wiki.index.categoryIndexDir") + File.separator + language.toString().toLowerCase();
  483. }
  484.  
  485. public String getWikiGlossIndexDir(Language language)
  486. {
  487. return this.config.getString("wiki.index.glossindexDir") + File.separator + language.toString().toLowerCase();
  488. }
  489.  
  490. public String getWikiGlossDump(Language language)
  491. {
  492. return
  493. this.config.getString("wiki.dump.glossFilePrefix") + File.separator + "gloss-" + language.toString().toLowerCase() + ".txt";
  494. }
  495.  
  496. public boolean getIndexResume()
  497. {
  498. return this.config.getBoolean("wiki.index.resume");
  499. }
  500.  
  501. public String getCategoriesConcepts()
  502. {
  503. return this.config.getString("wiki.categories.concepts");
  504. }
  505.  
  506. public String getCategoriesHeads()
  507. {
  508. return this.config.getString("wiki.categories.heads");
  509. }
  510.  
  511. public String getStopwordsFile(Language language)
  512. {
  513. return this.config.getString("stopwords.FilePrefix") + "_" + language.toString().toLowerCase() + ".txt";
  514. }
  515.  
  516. public String getFunctionwordsFile(Language language)
  517. {
  518. return this.config.getString("functionwords.FilePrefix") + "_" + language.toString().toLowerCase() + ".txt";
  519. }
  520.  
  521. public int getMaxMergeDocs()
  522. {
  523. return Integer.parseInt(this.config.getString("wiki.index.maxmergedocs"));
  524. }
  525.  
  526. public int getIndexNumThreads()
  527. {
  528. return this.config.getInt("wiki.index.numThread");
  529. }
  530.  
  531. public String getEntityList(Language language)
  532. {
  533. return this.config.getString("wiki.lexiconDir") + File.separator + "entities-" + language + ".txt";
  534. }
  535.  
  536. public String getTermList(Language language)
  537. {
  538. return this.config.getString("wiki.lexiconDir") + File.separator + "terms-" + language + ".txt";
  539. }
  540.  
  541. public List<String> getItalianLemmaTable()
  542. {
  543. return this.config.getList("lemmaTable.italian");
  544. }
  545.  
  546. public List<String> getEnglishLemmaTable()
  547. {
  548. return this.config.getList("lemmaTable.english");
  549. }
  550.  
  551. public boolean indexOnlyWikiPageRedirectionText()
  552. {
  553. return this.config.getBoolean("wiki.index.onlyWikiPageRedirectionText");
  554. }
  555.  
  556. public boolean indexOnlyWikiPageInformation()
  557. {
  558. return this.config.getBoolean("wiki.index.onlyWikiPageInformation");
  559. }
  560.  
  561. public int getMaxPagesPerFile()
  562. {
  563. return this.config.getInt("wiki.dump.maxPagesPerFile");
  564. }
  565.  
  566. public String getPagesSeparator()
  567. {
  568. return this.config.getString("wiki.dump.pagesSeparator");
  569. }
  570.  
  571. public String getLexiconFileName()
  572. {
  573. return this.config.getString("wiki.dump.lexiconFileName");
  574. }
  575.  
  576. public int getBufferSizeWriter()
  577. {
  578. return this.config.getInt("wiki.dump.bufferSizeWriter");
  579. }
  580.  
  581. public int getBufferSizeReader()
  582. {
  583. return this.config.getInt("wiki.dump.bufferSizeReader");
  584. }
  585.  
  586. public String getHypernymCategoryTreeFileName()
  587. {
  588. return this.config.getString("wiki.dump.categoryTreeFileName") + ".hyper.txt";
  589. }
  590.  
  591. public String getHyponymCategoryTreeFileName()
  592. {
  593. return this.config.getString("wiki.dump.categoryTreeFileName") + ".hypo.txt";
  594. }
  595.  
  596. public boolean getWordNetUnlimitedCache()
  597. {
  598. return this.config.getBoolean("wordnet.useUnlimitedCache");
  599. }
  600.  
  601. public String getWordNetMappings(WordNetVersion wnv)
  602. {
  603. switch (wnv)
  604. {
  605. case WN_15:
  606. return this.config.getString("wordnet.wordnetMap1.5");
  607. case WN_16:
  608. return this.config.getString("wordnet.wordnetMap1.6");
  609. case WN_171:
  610. return this.config.getString("wordnet.wordnetMap1.7.1");
  611. case WN_20:
  612. return this.config.getString("wordnet.wordnetMap2.0");
  613. case WN_21:
  614. return this.config.getString("wordnet.wordnetMap2.1");
  615. }
  616. throw new RuntimeException("Invalid WordNet version: " + wnv);
  617. }
  618.  
  619. public String getWordNetGlosses()
  620. {
  621. return this.config.getString("wordnet.wordnetGlosses");
  622. }
  623.  
  624. public boolean useWordNetMonosemousGlossWords()
  625. {
  626. return this.config.getBoolean("wordnet.useMonosemousGlossWords");
  627. }
  628.  
  629. public boolean useWordNetDisambiguatedGlossWords()
  630. {
  631. return this.config.getBoolean("wordnet.useDisambiguatedGlossWords");
  632. }
  633.  
  634. public String getSemcorHome()
  635. {
  636. return this.config.getString("semcor.semcorHome");
  637. }
  638.  
  639. public int getLogNumPages()
  640. {
  641. return this.config.getInt("wiki.dump.logNumPages");
  642. }
  643.  
  644. public String getFullGoogleDefineURL(String query)
  645. {
  646. return this.config.getString("google.fullDefineUrl").replaceAll("QUERY", query);
  647. }
  648.  
  649. public String getGoogleAjaxURL()
  650. {
  651. return this.config.getString("google.ajaxUrl");
  652. }
  653.  
  654. public String getYahooSearchUrl()
  655. {
  656. return this.config.getString("yahoo.searchUrl");
  657. }
  658.  
  659. public String getYahooSearchUrlAppendix()
  660. {
  661. return this.config.getString("yahoo.searchUrl.appendix");
  662. }
  663.  
  664. public String getGoogleRequester()
  665. {
  666. return this.config.getString("google.requester");
  667. }
  668.  
  669. public int getWikicoWindowRadius()
  670. {
  671. return this.config.getInt("wikico.windowRadius");
  672. }
  673.  
  674. public String getWikicoLinksFile()
  675. {
  676. return this.config.getString("wikico.links");
  677. }
  678.  
  679. public String getWikicoDBUser()
  680. {
  681. return this.config.getString("wikico.db.user");
  682. }
  683.  
  684. public String getWikicoDBPassword()
  685. {
  686. return this.config.getString("wikico.db.password");
  687. }
  688.  
  689. public String getWikicoDBName()
  690. {
  691. return this.config.getString("wikico.db.name");
  692. }
  693.  
  694. public String getWikicoDBURL()
  695. {
  696. return this.config.getString("wikico.db.url");
  697. }
  698.  
  699. public String getWikipediaSentenceIndexDirectory()
  700. {
  701. return this.config.getString("wiki.sentenceIndexDir");
  702. }
  703.  
  704. public String getWikipediaDisambiguationIndexDirectory()
  705. {
  706. return this.config.getString("wiki.index.disambiguationIndexDir");
  707. }
  708.  
  709. public String getUKWacSentenceIndexDirectory()
  710. {
  711. return this.config.getString("ukwac.sentenceIndexDir");
  712. }
  713.  
  714. public String getUKWacCompoundSentenceIndexDirectory()
  715. {
  716. return this.config.getString("ukwac.compoundSentenceIndexDir");
  717. }
  718.  
  719. public String getUKWacDirectory()
  720. {
  721. return this.config.getString("jlt.ukwacDir");
  722. }
  723.  
  724. public String getUkwacWordlistFilepath()
  725. {
  726. return this.config.getString("ukwac.wordlistFilepath");
  727. }
  728.  
  729. public int getUKWacCompundMaxLength()
  730. {
  731. return this.config.getInt("ukwac.compoundMaxLength");
  732. }
  733.  
  734. public int getUKWacCorrelationWindow()
  735. {
  736. return this.config.getInt("ukwac.correlationWindow");
  737. }
  738.  
  739. @Deprecated
  740. public String getUKWaccoDBUser()
  741. {
  742. return getCollocsUser(CollocsDB.UKWACCO);
  743. }
  744.  
  745. @Deprecated
  746. public String getUKWaccoDBPassword()
  747. {
  748. return getCollocsPassword(CollocsDB.UKWACCO);
  749. }
  750.  
  751. @Deprecated
  752. public String getUKWaccoDBURL()
  753. {
  754. return getCollocsLocation(CollocsDB.UKWACCO);
  755. }
  756.  
  757. public String getUKWaccoDBName()
  758. {
  759. return this.config.getString("ukwac.db.name");
  760. }
  761.  
  762. public Integer getMaxTopDocs()
  763. {
  764. return Integer.valueOf(this.config.getInt("jlt.maxTopDocs"));
  765. }
  766.  
  767. public boolean useProxy()
  768. {
  769. return this.config.getBoolean("web.useProxy");
  770. }
  771.  
  772. public String getProxyDirectory()
  773. {
  774. return this.config.getString("web.proxyList");
  775. }
  776.  
  777. public String getHtmlUtf8EncodeFile()
  778. {
  779. return this.config.getString("web.htmlUtf8Encode");
  780. }
  781.  
  782. public Integer getYahooResultsNumber()
  783. {
  784. return Integer.valueOf(this.config.getInt("yahoo.resultsNumber"));
  785. }
  786.  
  787. public String getGoogleTranslateAPIUSer()
  788. {
  789. return this.config.getString("google.translate.api.user");
  790. }
  791.  
  792. public String getGoogleTranslateAPIPassword()
  793. {
  794. return this.config.getString("google.translate.api.passwd");
  795. }
  796.  
  797. public String getGoogleTranslateAPIApplicationName()
  798. {
  799. return this.config.getString("google.translate.api.applicationName");
  800. }
  801.  
  802. public String getStanfordParserModel()
  803. {
  804. return this.config.getString("stanford.parser.model");
  805. }
  806.  
  807. public String getStanfordNERModel()
  808. {
  809. return this.config.getString("stanford.ner.model");
  810. }
  811.  
  812. public String getStanfordPOSModel()
  813. {
  814. return this.config.getString("stanford.pos.model");
  815. }
  816.  
  817. public List<Pair<String, String>> getClassSerializationParameterValues(Class<?> c)
  818. {
  819. return getClassSerializationParameterValues(c.getCanonicalName());
  820. }
  821.  
  822. public List<Pair<String, String>> getClassSerializationParameterValues(String className)
  823. {
  824. List<Pair<String, String>> pairs = new ArrayList();
  825. String serializationParameterList = "jlt.ser.parameters." + className;
  826. for (Object param : this.config.getList(serializationParameterList))
  827. {
  828. String paramName = param.toString();
  829. String paramValue = this.config.getList(paramName).toString();
  830. pairs.add(new Pair(paramName, paramValue));
  831. }
  832. return pairs;
  833. }
  834.  
  835. public String getSerializationDir()
  836. {
  837. return this.config.getString("jlt.ser.dir");
  838. }
  839.  
  840. public boolean isSerializationConfigured()
  841. {
  842. return getSerializationDir() != null;
  843. }
  844.  
  845. public List<String> getWordNetPlusPlusSourceFiles()
  846. {
  847. List<String> sources = new ArrayList();
  848. for (Object s : this.config.getList("wnpp.sourceFiles")) {
  849. sources.add(s.toString());
  850. }
  851. return sources;
  852. }
  853.  
  854. public CollocsDB getCollocsDB()
  855. {
  856. return CollocsDB.valueOf(this.config.getString("jlt.db.collocsDB"));
  857. }
  858.  
  859. public String getCollocsLocation(CollocsDB collocsDB)
  860. {
  861. Iterator<?> i = this.config.getList("jlt.db.locations").iterator();
  862. while (i.hasNext())
  863. {
  864. String dbType = i.next().toString();
  865. if (!i.hasNext()) {
  866. throw new RuntimeException("Missing location for CollocsDB type " + dbType);
  867. }
  868. String dbLocation = i.next().toString();
  869. if (collocsDB.toString().equals(dbType)) {
  870. return dbLocation;
  871. }
  872. }
  873. throw new RuntimeException("UNKNOWN COLLOCS DB: " + collocsDB);
  874. }
  875.  
  876. public String getCollocsUser(CollocsDB collocsDB)
  877. {
  878. Iterator<?> i = this.config.getList("jlt.db.users").iterator();
  879. if (collocsDB.isMySQLDB())
  880. {
  881. while (i.hasNext())
  882. {
  883. String dbType = i.next().toString();
  884. if (!i.hasNext()) {
  885. throw new RuntimeException("Missing user for CollocsDB type " + dbType);
  886. }
  887. String dbUser = i.next().toString();
  888. if (collocsDB.toString().equals(dbType)) {
  889. return dbUser;
  890. }
  891. }
  892. throw new RuntimeException("UNKNOWN COLLOCS DB: " + collocsDB);
  893. }
  894. return null;
  895. }
  896.  
  897. public String getCollocsPassword(CollocsDB collocsDB)
  898. {
  899. Iterator<?> i = this.config.getList("jlt.db.passwords").iterator();
  900. if (collocsDB.isMySQLDB())
  901. {
  902. while (i.hasNext())
  903. {
  904. String dbType = i.next().toString();
  905. if (!i.hasNext()) {
  906. throw new RuntimeException("Missing password for CollocsDB type " + dbType);
  907. }
  908. String dbPass = i.next().toString();
  909. if (collocsDB.toString().equals(dbType)) {
  910. return dbPass;
  911. }
  912. }
  913. throw new RuntimeException("UNKNOWN COLLOCS DB: " + collocsDB);
  914. }
  915. return null;
  916. }
  917.  
  918. public String getEuroParlAlignmentFile(Language source, Language target)
  919. {
  920. String base = this.config.getString("europarl.baseName");
  921. return base + "-" + source.name() + "-" + target.name() + ".txt";
  922. }
  923.  
  924. public String getSemEvalTestIndexDirectory()
  925. {
  926. return this.config.getString("semeval.index.testDir");
  927. }
  928.  
  929. public String getSemEvalTrainingIndexDirectory()
  930. {
  931. return this.config.getString("semeval.index.trainingDir");
  932. }
  933.  
  934. public String getSemEvalDataDirectory(CollocsDB collocsDB)
  935. {
  936. switch (collocsDB)
  937. {
  938. case SEMEVAL10_TRAIN:
  939. return getSemEvalTestDataDirectory();
  940. case SEMEVAL10_TEST:
  941. return getSemEvalTrainingDataDirectory();
  942. }
  943. throw new RuntimeException("UNKNOWN COLLOCS DB: " + collocsDB);
  944. }
  945.  
  946. public String getSemEvalTestDataDirectory()
  947. {
  948. return this.config.getString("semeval.data.testDir");
  949. }
  950.  
  951. public String getSemEvalTrainingDataDirectory()
  952. {
  953. return this.config.getString("semeval.data.trainingDir");
  954. }
  955.  
  956. public String getSemEvalDataset()
  957. {
  958. return this.config.getString("semeval.dataset");
  959. }
  960.  
  961. public String getSemEval07TestDataDirectory()
  962. {
  963. return this.config.getString("semeval07.data.testDir");
  964. }
  965.  
  966. public String getSemEval07TrainingDataDirectory()
  967. {
  968. return this.config.getString("semeval07.data.trainingDir");
  969. }
  970.  
  971. public String getSemEval07Dataset()
  972. {
  973. return this.config.getString("semeval07.dataset");
  974. }
  975.  
  976. public String getSemeval07ParsedFilePath()
  977. {
  978. return this.config.getString("semeval07.parsedSentences");
  979. }
  980.  
  981. public String getSemeval07Keys()
  982. {
  983. return this.config.getString("semeval07.keys");
  984. }
  985.  
  986. public String getSemeval07SyntacticRelations()
  987. {
  988. return this.config.getString("semeval07.syntacticRelations");
  989. }
  990.  
  991. public String getSemeval07WsiTestData()
  992. {
  993. return this.config.getString("semeval07.wsi.data.test");
  994. }
  995.  
  996. public String getSemeval07WsiTestWords()
  997. {
  998. return this.config.getString("semeval07.wsi.data.words");
  999. }
  1000.  
  1001. public List<String> getSemeval07WsiPoses()
  1002. {
  1003. return this.config.getList("semeval07.wsi.poses");
  1004. }
  1005.  
  1006. public String getSemeval07WSISimilarityDir()
  1007. {
  1008. return this.config.getString("semeval07.wsi.similarity_dir");
  1009. }
  1010.  
  1011. public String getURPGoogleSearchURL(Language targetLanguage)
  1012. {
  1013. List<String> paramlist = new ArrayList();
  1014. for (Object elem : this.config.getList("google.urp.searchURL")) {
  1015. paramlist.add(elem.toString());
  1016. }
  1017. String parameters = Strings.join(paramlist, ",");
  1018. Object lang2values = Strings.parseLanguageParameterValues(parameters);
  1019. if (((Multimap)lang2values).keySet().contains(targetLanguage))
  1020. {
  1021. Collection<String> values = ((Multimap)lang2values).get(targetLanguage);
  1022. if (!values.isEmpty()) {
  1023. return (String)values.iterator().next();
  1024. }
  1025. }
  1026. return null;
  1027. }
  1028.  
  1029. public String getURPGoogleSearchURL()
  1030. {
  1031. return getURPGoogleSearchURL(Language.EN);
  1032. }
  1033.  
  1034. public String getFullGoogleAjaxURL(Language targetLanguage)
  1035. {
  1036. List<String> paramlist = new ArrayList();
  1037. for (Object elem : this.config.getList("google.fullAjaxUrl")) {
  1038. paramlist.add(elem.toString());
  1039. }
  1040. String parameters = Strings.join(paramlist, ",");
  1041. Object lang2values = Strings.parseLanguageParameterValues(parameters);
  1042. if (((Multimap)lang2values).keySet().contains(targetLanguage))
  1043. {
  1044. Collection<String> values = ((Multimap)lang2values).get(targetLanguage);
  1045. if (!values.isEmpty()) {
  1046. return (String)values.iterator().next();
  1047. }
  1048. }
  1049. return null;
  1050. }
  1051.  
  1052. public String getFullGoogleAjaxURL()
  1053. {
  1054. return getFullGoogleAjaxURL(Language.EN);
  1055. }
  1056.  
  1057. public GoogleSearchMethod getGoogleSearchMethod()
  1058. {
  1059. return GoogleSearchMethod.valueOf(this.config.getString("google.searchMethod"));
  1060. }
  1061.  
  1062. public String getWebSnippetCache()
  1063. {
  1064. return this.config.getString("websnippetcache.folder");
  1065. }
  1066.  
  1067. public String getWebCounterCache()
  1068. {
  1069. return this.config.getString("webcountercache.folder");
  1070. }
  1071.  
  1072. public boolean getWebQueryEnabled()
  1073. {
  1074. return this.config.getBoolean("websnippetcache.postNewQueries");
  1075. }
  1076.  
  1077. public boolean getWebCounterEnabled()
  1078. {
  1079. return this.config.getBoolean("webcountercache.postNewQueries");
  1080. }
  1081.  
  1082. public String getMultilanguagePOSMappingFolder()
  1083. {
  1084. return this.config.getString("multilanguagePOSMapping.folder");
  1085. }
  1086.  
  1087. public String getWiktionaryDump()
  1088. {
  1089. return this.config.getString("wiktionary.dump");
  1090. }
  1091.  
  1092. public String getWiktionaryIndex()
  1093. {
  1094. return this.config.getString("wiktionary.index");
  1095. }
  1096.  
  1097. public String getGigawordTaggedDirectory()
  1098. {
  1099. return this.config.getString("jlt.gigawordTaggedDir");
  1100. }
  1101.  
  1102. public String getGigawordSentenceCompoundIndexDirectory()
  1103. {
  1104. return this.config.getString("jlt.gigawordCompoundIndexDir");
  1105. }
  1106.  
  1107. public String getGigawordDumpWordlistFilepath()
  1108. {
  1109. return this.config.getString("jlt.gigawordDumpWordListFilePath");
  1110. }
  1111.  
  1112. public List<String> getGigawordSubdirectories()
  1113. {
  1114. List<String> subdirs = new ArrayList();
  1115. for (Object elem : this.config.getList("jlt.gigawordSubdirectories")) {
  1116. subdirs.add(elem.toString());
  1117. }
  1118. return subdirs;
  1119. }
  1120.  
  1121. public String getGigawordIndexWordlistFilepath()
  1122. {
  1123. return this.config.getString("jlt.gigawordIndexWordListFilePath");
  1124. }
  1125.  
  1126. public String getGigawordParsedDir()
  1127. {
  1128. return this.config.getString("jlt.gigawordParsedDir");
  1129. }
  1130.  
  1131. public String getUkwaccoDefaultLexicon()
  1132. {
  1133. return this.config.getString("ukwac.factory.defaultLexicon");
  1134. }
  1135.  
  1136. public String getUkwaccoCustomLexiconLocation()
  1137. {
  1138. return this.config.getString("ukwac.factory.customLexicon");
  1139. }
  1140.  
  1141. public String getBncDirectory()
  1142. {
  1143. return this.config.getString("jlt.bncDir");
  1144. }
  1145.  
  1146. public String getBncLexiconFile()
  1147. {
  1148. return this.config.getString("jlt.bncLexiconFile");
  1149. }
  1150.  
  1151. public String getBncMatrixOfContextsPath()
  1152. {
  1153. return this.config.getString("jlt.bncContextMatrix");
  1154. }
  1155.  
  1156. public String getBncVectorMethod()
  1157. {
  1158. return this.config.getString("jlt.bncVectorMethod");
  1159. }
  1160.  
  1161. public String getBncDataInputDir()
  1162. {
  1163. return this.config.getString("jlt.bncDataInputDir");
  1164. }
  1165.  
  1166. public String getBncCooccurrenceFile()
  1167. {
  1168. return this.config.getString("jlt.bncCooccurrencesFile");
  1169. }
  1170.  
  1171. public boolean getBncOnlyContentWords()
  1172. {
  1173. return this.config.getBoolean("jlt.bncGetOnlyContentWords");
  1174. }
  1175.  
  1176. public String getBncDocuments()
  1177. {
  1178. return this.config.getString("jlt.bncDocuments");
  1179. }
  1180.  
  1181. public int getBncTopKWords()
  1182. {
  1183. return this.config.getInt("jlt.bncTopKWords");
  1184. }
  1185.  
  1186. public int getBncCooccurrenceWindow()
  1187. {
  1188. return this.config.getInt("jlt.bncCooccurrenceWindow");
  1189. }
  1190.  
  1191. public String getBncMatrixOfCooccurrencesPath()
  1192. {
  1193. return this.config.getString("jlt.bncCooccurrenceMatrix");
  1194. }
  1195.  
  1196. public String getMiniparDir()
  1197. {
  1198. return this.config.getString("minipar.miniparHomeDir");
  1199. }
  1200.  
  1201. public String getAolQueryDirectory()
  1202. {
  1203. return this.config.getString("aol.datasetDir");
  1204. }
  1205.  
  1206. public String getAolDBName()
  1207. {
  1208. return this.config.getString("aol.db.name");
  1209. }
  1210.  
  1211. public String getBncStopwordsFile()
  1212. {
  1213. return this.config.getString("jlt.bnc.stopwords.filename");
  1214. }
  1215.  
  1216. public String getGigawordDirectory()
  1217. {
  1218. return this.config.getString("jlt.gigawordOriginalDir");
  1219. }
  1220.  
  1221. public String getGigaword5Directory()
  1222. {
  1223. return this.config.getString("jlt.gigawordOriginal5Dir");
  1224. }
  1225.  
  1226. public String getBncStanfordParsedFile()
  1227. {
  1228. return this.config.getString("jlt.bnc.parsed.stanfordfile");
  1229. }
  1230.  
  1231. public String getJWeb1TDir()
  1232. {
  1233. return this.config.getString("jweb1t.indexdir");
  1234. }
  1235. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement