Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package it.uniroma1.lcl.jlt;
- import com.google.common.collect.Multimap;
- import it.uniroma1.lcl.jlt.collocs.enumeration.CollocsDB;
- import it.uniroma1.lcl.jlt.util.Collections;
- import it.uniroma1.lcl.jlt.util.Language;
- import it.uniroma1.lcl.jlt.util.Pair;
- import it.uniroma1.lcl.jlt.util.Strings;
- import it.uniroma1.lcl.jlt.web.google.GoogleSearchMethod;
- import it.uniroma1.lcl.jlt.wiki.data.WikiVersions;
- import it.uniroma1.lcl.jlt.wordnet.WordNetVersion;
- import it.uniroma1.lcl.jlt.wordnet.data.WordNetWeightType;
- import java.io.File;
- import java.util.ArrayList;
- import java.util.Collection;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Map;
- import java.util.Set;
- import org.apache.commons.configuration.ConfigurationException;
- import org.apache.commons.configuration.PropertiesConfiguration;
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
- public class Configuration
- {
- private PropertiesConfiguration config = null;
- private static final Log log = LogFactory.getLog(Configuration.class);
- private static Configuration instance = null;
- private static String CONFIG_DIR = "config/";
- public static String CONFIG_FILE = "jlt.properties";
- private Multimap<Language, String> categoryMap = null;
- private Multimap<Language, String> listMap = null;
- private Multimap<Language, String> glossaryMap = null;
- private Multimap<Language, String> redirectionMap = null;
- private Multimap<Language, String> infoboxImageFieldMap = null;
- private Multimap<Language, String> infoboxLabelMap = null;
- private Multimap<Language, String> disambiguationMap = null;
- private Multimap<Language, String> shortDisambiguationMap = null;
- private Multimap<Language, String> mosesIniMap;
- private Configuration()
- {
- File configFile = new File(CONFIG_DIR, CONFIG_FILE);
- boolean bDone = false;
- if (configFile.exists())
- {
- log.info("Loading " + CONFIG_FILE + " FROM " + configFile.getAbsolutePath());
- try
- {
- this.config = new PropertiesConfiguration(configFile);
- bDone = true;
- }
- catch (ConfigurationException ce)
- {
- ce.printStackTrace();
- }
- }
- if (!bDone)
- {
- log.info("JLT starts with empty configuration");
- this.config = new PropertiesConfiguration();
- }
- }
- public static Configuration getInstance()
- {
- if (instance == null) {
- instance = new Configuration();
- }
- return instance;
- }
- public void setConfigurationSubDirectory(String configurationSubdir)
- {
- setConfigurationFile(
- new File(CONFIG_DIR + File.separator + configurationSubdir,
- CONFIG_FILE));
- }
- public void setConfigurationFile(File configurationFile)
- {
- log.info("Changing configuration properties to " + configurationFile);
- try
- {
- this.config = new PropertiesConfiguration(configurationFile);
- this.config.setBasePath(
- configurationFile.getParentFile().getAbsolutePath());
- }
- catch (ConfigurationException ce)
- {
- ce.printStackTrace();
- log.info("Setting JLT to an empty configuration");
- this.config = new PropertiesConfiguration();
- }
- }
- public String getWordNetData(WordNetVersion wnv)
- {
- switch (wnv)
- {
- case WN_16:
- return this.config.getString("wordnet.wordnetData1.6");
- case WN_171:
- return this.config.getString("wordnet.wordnetData1.7.1");
- case WN_20:
- return this.config.getString("wordnet.wordnetData2.0");
- case WN_21:
- return this.config.getString("wordnet.wordnetData2.1");
- case WN_30:
- return this.config.getString("wordnet.wordnetData3.0");
- }
- throw new RuntimeException("Invalid WordNet version: " + wnv);
- }
- public WordNetWeightType getWordNetWeightType()
- {
- return WordNetWeightType.valueOf(this.config.getString("wordnet.weightType"));
- }
- public String getWordNetWeightFile(WordNetWeightType wType)
- {
- switch (wType)
- {
- case FREQUENCY:
- case GLOSS_OVERLAP:
- return this.config.getString("wordnet.weightPrefix") + "_" + wType.name().toLowerCase() + ".txt";
- }
- throw new RuntimeException("Invalid WordNet weight type: " + wType);
- }
- public int getMaxFilesPerDir()
- {
- return this.config.getInt("wiki.dump.maxFilesPerDir");
- }
- public int getDumpStartDir()
- {
- return this.config.getInt("wiki.dump.startDir");
- }
- public int getDumpEndDir()
- {
- return this.config.getInt("wiki.dump.endDir");
- }
- public String getDocDir()
- {
- return this.config.getString("wiki.dump.docDir");
- }
- public String getWikipediaVersionFile()
- {
- return this.config.getString("wiki.dump.versionFile");
- }
- public String getWikipediaXMLDump()
- {
- return getWikipediaXMLDump(getIndexFactoryLanguage());
- }
- public String getWikipediaXMLDump(Language language)
- {
- WikiVersions wikiVersions = WikiVersions.getInstance();
- String version = wikiVersions.getVersion(language);
- String base = this.config.getString("wiki.dump.dir");
- if (base == null) {
- throw new RuntimeException("Parameter 'wiki.dump.dir' not specified in the config file");
- }
- String wikiName = language.toString().toLowerCase() + "wiki";
- String xml = base + File.separator +
- wikiName + File.separator +
- wikiName + "-" + version + "-pages-articles.xml";
- return xml;
- }
- public String getMosesIniFile(Language lang)
- {
- if (this.mosesIniMap == null)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("moses.iniFiles")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- this.mosesIniMap = Strings.parseLanguageParameterValues(parameters);
- if (this.mosesIniMap == null) {
- throw new RuntimeException("Invalid language specified in field moses.iniFiles");
- }
- }
- return (String)Collections.getFirst(this.mosesIniMap.get(lang));
- }
- public String getMosesBin()
- {
- return this.config.getString("moses.bin");
- }
- public String getRedirectionLabel()
- {
- return getRedirectionLabel(getIndexFactoryLanguage());
- }
- public String getRedirectionLabel(Language language)
- {
- if (this.redirectionMap == null)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("wiki.dump.redirectionLabel")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- this.redirectionMap = Strings.parseLanguageParameterValues(parameters);
- if (this.redirectionMap == null) {
- throw new RuntimeException("Invalid language specified in field wiki.dump.redirectionLabel");
- }
- }
- String redirectionLabel = (String)Collections.getFirst(this.redirectionMap.get(language));
- return redirectionLabel;
- }
- public Set<String> getListPrefix()
- {
- return getListPrefix(getIndexFactoryLanguage());
- }
- public Set<String> getListPrefix(Language language)
- {
- if (this.listMap == null)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("wiki.dump.listPrefix")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- this.listMap = Strings.parseLanguageParameterValues(parameters);
- if (this.listMap == null) {
- throw new RuntimeException("Invalid language specified in field wiki.dump.listPrefix");
- }
- }
- Set<String> prefixes = new HashSet();
- prefixes.addAll(this.listMap.get(language));
- return prefixes;
- }
- public Set<String> getGlossaryPrefix()
- {
- return getGlossaryPrefix(getIndexFactoryLanguage());
- }
- public Set<String> getGlossaryPrefix(Language language)
- {
- if (this.glossaryMap == null)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("wiki.dump.glossaryPrefix")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- this.glossaryMap = Strings.parseLanguageParameterValues(parameters);
- if (this.glossaryMap == null) {
- throw new RuntimeException("Invalid language specified in field wiki.dump.glossaryPrefix");
- }
- }
- Set<String> prefixes = new HashSet();
- prefixes.addAll(this.glossaryMap.get(language));
- return prefixes;
- }
- public String getCategoryPrefix()
- {
- return getCategoryPrefix(getIndexFactoryLanguage());
- }
- public String getCategoryPrefix(Language language)
- {
- if (this.categoryMap == null)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("wiki.dump.categoryPrefix")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- this.categoryMap = Strings.parseLanguageParameterValues(parameters);
- if (this.categoryMap == null) {
- throw new RuntimeException("Invalid language specified in field wiki.dump.categoryPrefix");
- }
- }
- String categoryPrefix = (String)Collections.getFirst(this.categoryMap.get(language));
- if (!categoryPrefix.endsWith(":")) {
- categoryPrefix = categoryPrefix + ":";
- }
- return categoryPrefix;
- }
- public Map<Language, String> getCategoryPrefixes()
- {
- if (this.categoryMap == null)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("wiki.dump.categoryPrefix")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- this.categoryMap = Strings.parseLanguageParameterValues(parameters);
- if (this.categoryMap == null) {
- throw new RuntimeException("Invalid language specified in field wiki.dump.categoryPrefix");
- }
- }
- Map<Language, String> categoryPrefixes = new HashMap();
- for (Language language : this.categoryMap.keySet())
- {
- String categoryPrefix = (String)Collections.getFirst(this.categoryMap.get(language));
- categoryPrefixes.put(language, categoryPrefix);
- }
- return categoryPrefixes;
- }
- public Set<String> getInfoboxLabels()
- {
- return getInfoboxLabels(getIndexFactoryLanguage());
- }
- public Set<String> getInfoboxLabels(Language language)
- {
- if (this.infoboxLabelMap == null)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("wiki.dump.infoboxLabel")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- this.infoboxLabelMap = Strings.parseLanguageParameterValues(parameters);
- if (this.infoboxLabelMap == null) {
- throw new RuntimeException("Invalid language specified in field wiki.dump.infoboxLabel");
- }
- }
- Set<String> labels = new HashSet();
- labels.addAll(this.infoboxLabelMap.get(language));
- return labels;
- }
- public Set<String> getInfoboxImageFields()
- {
- return getInfoboxImageFields(getIndexFactoryLanguage());
- }
- public Set<String> getInfoboxImageFields(Language language)
- {
- if (this.infoboxImageFieldMap == null)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("wiki.dump.infoboxImageField")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- this.infoboxImageFieldMap = Strings.parseLanguageParameterValues(parameters);
- if (this.infoboxImageFieldMap == null) {
- throw new RuntimeException("Invalid language specified in field wiki.dump.infoboxImageField");
- }
- }
- Set<String> fields = new HashSet();
- fields.addAll(this.infoboxImageFieldMap.get(language));
- return fields;
- }
- public Set<String> getDisambiguationPrefixes()
- {
- return getDisambiguationPrefixes(getIndexFactoryLanguage());
- }
- public Set<String> getDisambiguationPrefixes(Language language)
- {
- if (this.disambiguationMap == null)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("wiki.dump.disambiguationPrefix")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- this.disambiguationMap = Strings.parseLanguageParameterValues(parameters);
- if (this.disambiguationMap == null) {
- throw new RuntimeException("Invalid language specified in field wiki.dump.disambiguationPrefix");
- }
- }
- Set<String> prefixes = new HashSet();
- prefixes.addAll(this.disambiguationMap.get(language));
- return prefixes;
- }
- public Set<String> getShortDisambiguationPrefix()
- {
- return getShortDisambiguationPrefix(getIndexFactoryLanguage());
- }
- public Set<String> getShortDisambiguationPrefix(Language language)
- {
- if (this.shortDisambiguationMap == null)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("wiki.dump.shortDisambiguationPrefix")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- this.shortDisambiguationMap = Strings.parseLanguageParameterValues(parameters);
- if (this.shortDisambiguationMap == null) {
- throw new RuntimeException("Invalid language specified in field wiki.dump.shortDisambiguationPrefix");
- }
- }
- Set<String> prefixes = new HashSet();
- prefixes.addAll(this.shortDisambiguationMap.get(language));
- return prefixes;
- }
- public String getHeiNERFile()
- {
- return this.config.getString("heiner.data");
- }
- public String getMosesTempDir()
- {
- return this.config.getString("jlt.mosesTempDir");
- }
- public String getTreeTaggerDirectory()
- {
- return this.config.getString("treetagger.home");
- }
- public String getTreeTaggerModel()
- {
- return this.config.getString("treetagger.model");
- }
- public int getIndexStartDir()
- {
- return this.config.getInt("wiki.index.startDir");
- }
- public int getIndexEndDir()
- {
- return this.config.getInt("wiki.index.endDir");
- }
- public Language getIndexFactoryLanguage()
- {
- return Language.valueOf(this.config.getString("wiki.index.indexFactoryLanguage"));
- }
- public Language getCategoryIndexFactoryLanguage()
- {
- return Language.valueOf(this.config.getString("wiki.index.categoryIndexFactoryLanguage"));
- }
- public String getIndexFactoryDir()
- {
- return getIndexDir(getIndexFactoryLanguage());
- }
- public String getCategoryIndexFactoryDir()
- {
- return getCategoryIndexDir(getCategoryIndexFactoryLanguage());
- }
- public String getIndexDir(Language language)
- {
- return this.config.getString("wiki.index.indexDir") + File.separator + language.toString().toLowerCase();
- }
- public String getCategoryIndexDir(Language language)
- {
- return this.config.getString("wiki.index.categoryIndexDir") + File.separator + language.toString().toLowerCase();
- }
- public String getWikiGlossIndexDir(Language language)
- {
- return this.config.getString("wiki.index.glossindexDir") + File.separator + language.toString().toLowerCase();
- }
- public String getWikiGlossDump(Language language)
- {
- return
- this.config.getString("wiki.dump.glossFilePrefix") + File.separator + "gloss-" + language.toString().toLowerCase() + ".txt";
- }
- public boolean getIndexResume()
- {
- return this.config.getBoolean("wiki.index.resume");
- }
- public String getCategoriesConcepts()
- {
- return this.config.getString("wiki.categories.concepts");
- }
- public String getCategoriesHeads()
- {
- return this.config.getString("wiki.categories.heads");
- }
- public String getStopwordsFile(Language language)
- {
- return this.config.getString("stopwords.FilePrefix") + "_" + language.toString().toLowerCase() + ".txt";
- }
- public String getFunctionwordsFile(Language language)
- {
- return this.config.getString("functionwords.FilePrefix") + "_" + language.toString().toLowerCase() + ".txt";
- }
- public int getMaxMergeDocs()
- {
- return Integer.parseInt(this.config.getString("wiki.index.maxmergedocs"));
- }
- public int getIndexNumThreads()
- {
- return this.config.getInt("wiki.index.numThread");
- }
- public String getEntityList(Language language)
- {
- return this.config.getString("wiki.lexiconDir") + File.separator + "entities-" + language + ".txt";
- }
- public String getTermList(Language language)
- {
- return this.config.getString("wiki.lexiconDir") + File.separator + "terms-" + language + ".txt";
- }
- public List<String> getItalianLemmaTable()
- {
- return this.config.getList("lemmaTable.italian");
- }
- public List<String> getEnglishLemmaTable()
- {
- return this.config.getList("lemmaTable.english");
- }
- public boolean indexOnlyWikiPageRedirectionText()
- {
- return this.config.getBoolean("wiki.index.onlyWikiPageRedirectionText");
- }
- public boolean indexOnlyWikiPageInformation()
- {
- return this.config.getBoolean("wiki.index.onlyWikiPageInformation");
- }
- public int getMaxPagesPerFile()
- {
- return this.config.getInt("wiki.dump.maxPagesPerFile");
- }
- public String getPagesSeparator()
- {
- return this.config.getString("wiki.dump.pagesSeparator");
- }
- public String getLexiconFileName()
- {
- return this.config.getString("wiki.dump.lexiconFileName");
- }
- public int getBufferSizeWriter()
- {
- return this.config.getInt("wiki.dump.bufferSizeWriter");
- }
- public int getBufferSizeReader()
- {
- return this.config.getInt("wiki.dump.bufferSizeReader");
- }
- public String getHypernymCategoryTreeFileName()
- {
- return this.config.getString("wiki.dump.categoryTreeFileName") + ".hyper.txt";
- }
- public String getHyponymCategoryTreeFileName()
- {
- return this.config.getString("wiki.dump.categoryTreeFileName") + ".hypo.txt";
- }
- public boolean getWordNetUnlimitedCache()
- {
- return this.config.getBoolean("wordnet.useUnlimitedCache");
- }
- public String getWordNetMappings(WordNetVersion wnv)
- {
- switch (wnv)
- {
- case WN_15:
- return this.config.getString("wordnet.wordnetMap1.5");
- case WN_16:
- return this.config.getString("wordnet.wordnetMap1.6");
- case WN_171:
- return this.config.getString("wordnet.wordnetMap1.7.1");
- case WN_20:
- return this.config.getString("wordnet.wordnetMap2.0");
- case WN_21:
- return this.config.getString("wordnet.wordnetMap2.1");
- }
- throw new RuntimeException("Invalid WordNet version: " + wnv);
- }
- public String getWordNetGlosses()
- {
- return this.config.getString("wordnet.wordnetGlosses");
- }
- public boolean useWordNetMonosemousGlossWords()
- {
- return this.config.getBoolean("wordnet.useMonosemousGlossWords");
- }
- public boolean useWordNetDisambiguatedGlossWords()
- {
- return this.config.getBoolean("wordnet.useDisambiguatedGlossWords");
- }
- public String getSemcorHome()
- {
- return this.config.getString("semcor.semcorHome");
- }
- public int getLogNumPages()
- {
- return this.config.getInt("wiki.dump.logNumPages");
- }
- public String getFullGoogleDefineURL(String query)
- {
- return this.config.getString("google.fullDefineUrl").replaceAll("QUERY", query);
- }
- public String getGoogleAjaxURL()
- {
- return this.config.getString("google.ajaxUrl");
- }
- public String getYahooSearchUrl()
- {
- return this.config.getString("yahoo.searchUrl");
- }
- public String getYahooSearchUrlAppendix()
- {
- return this.config.getString("yahoo.searchUrl.appendix");
- }
- public String getGoogleRequester()
- {
- return this.config.getString("google.requester");
- }
- public int getWikicoWindowRadius()
- {
- return this.config.getInt("wikico.windowRadius");
- }
- public String getWikicoLinksFile()
- {
- return this.config.getString("wikico.links");
- }
- public String getWikicoDBUser()
- {
- return this.config.getString("wikico.db.user");
- }
- public String getWikicoDBPassword()
- {
- return this.config.getString("wikico.db.password");
- }
- public String getWikicoDBName()
- {
- return this.config.getString("wikico.db.name");
- }
- public String getWikicoDBURL()
- {
- return this.config.getString("wikico.db.url");
- }
- public String getWikipediaSentenceIndexDirectory()
- {
- return this.config.getString("wiki.sentenceIndexDir");
- }
- public String getWikipediaDisambiguationIndexDirectory()
- {
- return this.config.getString("wiki.index.disambiguationIndexDir");
- }
- public String getUKWacSentenceIndexDirectory()
- {
- return this.config.getString("ukwac.sentenceIndexDir");
- }
- public String getUKWacCompoundSentenceIndexDirectory()
- {
- return this.config.getString("ukwac.compoundSentenceIndexDir");
- }
- public String getUKWacDirectory()
- {
- return this.config.getString("jlt.ukwacDir");
- }
- public String getUkwacWordlistFilepath()
- {
- return this.config.getString("ukwac.wordlistFilepath");
- }
- public int getUKWacCompundMaxLength()
- {
- return this.config.getInt("ukwac.compoundMaxLength");
- }
- public int getUKWacCorrelationWindow()
- {
- return this.config.getInt("ukwac.correlationWindow");
- }
- @Deprecated
- public String getUKWaccoDBUser()
- {
- return getCollocsUser(CollocsDB.UKWACCO);
- }
- @Deprecated
- public String getUKWaccoDBPassword()
- {
- return getCollocsPassword(CollocsDB.UKWACCO);
- }
- @Deprecated
- public String getUKWaccoDBURL()
- {
- return getCollocsLocation(CollocsDB.UKWACCO);
- }
- public String getUKWaccoDBName()
- {
- return this.config.getString("ukwac.db.name");
- }
- public Integer getMaxTopDocs()
- {
- return Integer.valueOf(this.config.getInt("jlt.maxTopDocs"));
- }
- public boolean useProxy()
- {
- return this.config.getBoolean("web.useProxy");
- }
- public String getProxyDirectory()
- {
- return this.config.getString("web.proxyList");
- }
- public String getHtmlUtf8EncodeFile()
- {
- return this.config.getString("web.htmlUtf8Encode");
- }
- public Integer getYahooResultsNumber()
- {
- return Integer.valueOf(this.config.getInt("yahoo.resultsNumber"));
- }
- public String getGoogleTranslateAPIUSer()
- {
- return this.config.getString("google.translate.api.user");
- }
- public String getGoogleTranslateAPIPassword()
- {
- return this.config.getString("google.translate.api.passwd");
- }
- public String getGoogleTranslateAPIApplicationName()
- {
- return this.config.getString("google.translate.api.applicationName");
- }
- public String getStanfordParserModel()
- {
- return this.config.getString("stanford.parser.model");
- }
- public String getStanfordNERModel()
- {
- return this.config.getString("stanford.ner.model");
- }
- public String getStanfordPOSModel()
- {
- return this.config.getString("stanford.pos.model");
- }
- public List<Pair<String, String>> getClassSerializationParameterValues(Class<?> c)
- {
- return getClassSerializationParameterValues(c.getCanonicalName());
- }
- public List<Pair<String, String>> getClassSerializationParameterValues(String className)
- {
- List<Pair<String, String>> pairs = new ArrayList();
- String serializationParameterList = "jlt.ser.parameters." + className;
- for (Object param : this.config.getList(serializationParameterList))
- {
- String paramName = param.toString();
- String paramValue = this.config.getList(paramName).toString();
- pairs.add(new Pair(paramName, paramValue));
- }
- return pairs;
- }
- public String getSerializationDir()
- {
- return this.config.getString("jlt.ser.dir");
- }
- public boolean isSerializationConfigured()
- {
- return getSerializationDir() != null;
- }
- public List<String> getWordNetPlusPlusSourceFiles()
- {
- List<String> sources = new ArrayList();
- for (Object s : this.config.getList("wnpp.sourceFiles")) {
- sources.add(s.toString());
- }
- return sources;
- }
- public CollocsDB getCollocsDB()
- {
- return CollocsDB.valueOf(this.config.getString("jlt.db.collocsDB"));
- }
- public String getCollocsLocation(CollocsDB collocsDB)
- {
- Iterator<?> i = this.config.getList("jlt.db.locations").iterator();
- while (i.hasNext())
- {
- String dbType = i.next().toString();
- if (!i.hasNext()) {
- throw new RuntimeException("Missing location for CollocsDB type " + dbType);
- }
- String dbLocation = i.next().toString();
- if (collocsDB.toString().equals(dbType)) {
- return dbLocation;
- }
- }
- throw new RuntimeException("UNKNOWN COLLOCS DB: " + collocsDB);
- }
- public String getCollocsUser(CollocsDB collocsDB)
- {
- Iterator<?> i = this.config.getList("jlt.db.users").iterator();
- if (collocsDB.isMySQLDB())
- {
- while (i.hasNext())
- {
- String dbType = i.next().toString();
- if (!i.hasNext()) {
- throw new RuntimeException("Missing user for CollocsDB type " + dbType);
- }
- String dbUser = i.next().toString();
- if (collocsDB.toString().equals(dbType)) {
- return dbUser;
- }
- }
- throw new RuntimeException("UNKNOWN COLLOCS DB: " + collocsDB);
- }
- return null;
- }
- public String getCollocsPassword(CollocsDB collocsDB)
- {
- Iterator<?> i = this.config.getList("jlt.db.passwords").iterator();
- if (collocsDB.isMySQLDB())
- {
- while (i.hasNext())
- {
- String dbType = i.next().toString();
- if (!i.hasNext()) {
- throw new RuntimeException("Missing password for CollocsDB type " + dbType);
- }
- String dbPass = i.next().toString();
- if (collocsDB.toString().equals(dbType)) {
- return dbPass;
- }
- }
- throw new RuntimeException("UNKNOWN COLLOCS DB: " + collocsDB);
- }
- return null;
- }
- public String getEuroParlAlignmentFile(Language source, Language target)
- {
- String base = this.config.getString("europarl.baseName");
- return base + "-" + source.name() + "-" + target.name() + ".txt";
- }
- public String getSemEvalTestIndexDirectory()
- {
- return this.config.getString("semeval.index.testDir");
- }
- public String getSemEvalTrainingIndexDirectory()
- {
- return this.config.getString("semeval.index.trainingDir");
- }
- public String getSemEvalDataDirectory(CollocsDB collocsDB)
- {
- switch (collocsDB)
- {
- case SEMEVAL10_TRAIN:
- return getSemEvalTestDataDirectory();
- case SEMEVAL10_TEST:
- return getSemEvalTrainingDataDirectory();
- }
- throw new RuntimeException("UNKNOWN COLLOCS DB: " + collocsDB);
- }
- public String getSemEvalTestDataDirectory()
- {
- return this.config.getString("semeval.data.testDir");
- }
- public String getSemEvalTrainingDataDirectory()
- {
- return this.config.getString("semeval.data.trainingDir");
- }
- public String getSemEvalDataset()
- {
- return this.config.getString("semeval.dataset");
- }
- public String getSemEval07TestDataDirectory()
- {
- return this.config.getString("semeval07.data.testDir");
- }
- public String getSemEval07TrainingDataDirectory()
- {
- return this.config.getString("semeval07.data.trainingDir");
- }
- public String getSemEval07Dataset()
- {
- return this.config.getString("semeval07.dataset");
- }
- public String getSemeval07ParsedFilePath()
- {
- return this.config.getString("semeval07.parsedSentences");
- }
- public String getSemeval07Keys()
- {
- return this.config.getString("semeval07.keys");
- }
- public String getSemeval07SyntacticRelations()
- {
- return this.config.getString("semeval07.syntacticRelations");
- }
- public String getSemeval07WsiTestData()
- {
- return this.config.getString("semeval07.wsi.data.test");
- }
- public String getSemeval07WsiTestWords()
- {
- return this.config.getString("semeval07.wsi.data.words");
- }
- public List<String> getSemeval07WsiPoses()
- {
- return this.config.getList("semeval07.wsi.poses");
- }
- public String getSemeval07WSISimilarityDir()
- {
- return this.config.getString("semeval07.wsi.similarity_dir");
- }
- public String getURPGoogleSearchURL(Language targetLanguage)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("google.urp.searchURL")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- Object lang2values = Strings.parseLanguageParameterValues(parameters);
- if (((Multimap)lang2values).keySet().contains(targetLanguage))
- {
- Collection<String> values = ((Multimap)lang2values).get(targetLanguage);
- if (!values.isEmpty()) {
- return (String)values.iterator().next();
- }
- }
- return null;
- }
- public String getURPGoogleSearchURL()
- {
- return getURPGoogleSearchURL(Language.EN);
- }
- public String getFullGoogleAjaxURL(Language targetLanguage)
- {
- List<String> paramlist = new ArrayList();
- for (Object elem : this.config.getList("google.fullAjaxUrl")) {
- paramlist.add(elem.toString());
- }
- String parameters = Strings.join(paramlist, ",");
- Object lang2values = Strings.parseLanguageParameterValues(parameters);
- if (((Multimap)lang2values).keySet().contains(targetLanguage))
- {
- Collection<String> values = ((Multimap)lang2values).get(targetLanguage);
- if (!values.isEmpty()) {
- return (String)values.iterator().next();
- }
- }
- return null;
- }
- public String getFullGoogleAjaxURL()
- {
- return getFullGoogleAjaxURL(Language.EN);
- }
- public GoogleSearchMethod getGoogleSearchMethod()
- {
- return GoogleSearchMethod.valueOf(this.config.getString("google.searchMethod"));
- }
- public String getWebSnippetCache()
- {
- return this.config.getString("websnippetcache.folder");
- }
- public String getWebCounterCache()
- {
- return this.config.getString("webcountercache.folder");
- }
- public boolean getWebQueryEnabled()
- {
- return this.config.getBoolean("websnippetcache.postNewQueries");
- }
- public boolean getWebCounterEnabled()
- {
- return this.config.getBoolean("webcountercache.postNewQueries");
- }
- public String getMultilanguagePOSMappingFolder()
- {
- return this.config.getString("multilanguagePOSMapping.folder");
- }
- public String getWiktionaryDump()
- {
- return this.config.getString("wiktionary.dump");
- }
- public String getWiktionaryIndex()
- {
- return this.config.getString("wiktionary.index");
- }
- public String getGigawordTaggedDirectory()
- {
- return this.config.getString("jlt.gigawordTaggedDir");
- }
- public String getGigawordSentenceCompoundIndexDirectory()
- {
- return this.config.getString("jlt.gigawordCompoundIndexDir");
- }
- public String getGigawordDumpWordlistFilepath()
- {
- return this.config.getString("jlt.gigawordDumpWordListFilePath");
- }
- public List<String> getGigawordSubdirectories()
- {
- List<String> subdirs = new ArrayList();
- for (Object elem : this.config.getList("jlt.gigawordSubdirectories")) {
- subdirs.add(elem.toString());
- }
- return subdirs;
- }
- public String getGigawordIndexWordlistFilepath()
- {
- return this.config.getString("jlt.gigawordIndexWordListFilePath");
- }
- public String getGigawordParsedDir()
- {
- return this.config.getString("jlt.gigawordParsedDir");
- }
- public String getUkwaccoDefaultLexicon()
- {
- return this.config.getString("ukwac.factory.defaultLexicon");
- }
- public String getUkwaccoCustomLexiconLocation()
- {
- return this.config.getString("ukwac.factory.customLexicon");
- }
- public String getBncDirectory()
- {
- return this.config.getString("jlt.bncDir");
- }
- public String getBncLexiconFile()
- {
- return this.config.getString("jlt.bncLexiconFile");
- }
- public String getBncMatrixOfContextsPath()
- {
- return this.config.getString("jlt.bncContextMatrix");
- }
- public String getBncVectorMethod()
- {
- return this.config.getString("jlt.bncVectorMethod");
- }
- public String getBncDataInputDir()
- {
- return this.config.getString("jlt.bncDataInputDir");
- }
- public String getBncCooccurrenceFile()
- {
- return this.config.getString("jlt.bncCooccurrencesFile");
- }
- public boolean getBncOnlyContentWords()
- {
- return this.config.getBoolean("jlt.bncGetOnlyContentWords");
- }
- public String getBncDocuments()
- {
- return this.config.getString("jlt.bncDocuments");
- }
- public int getBncTopKWords()
- {
- return this.config.getInt("jlt.bncTopKWords");
- }
- public int getBncCooccurrenceWindow()
- {
- return this.config.getInt("jlt.bncCooccurrenceWindow");
- }
- public String getBncMatrixOfCooccurrencesPath()
- {
- return this.config.getString("jlt.bncCooccurrenceMatrix");
- }
- public String getMiniparDir()
- {
- return this.config.getString("minipar.miniparHomeDir");
- }
- public String getAolQueryDirectory()
- {
- return this.config.getString("aol.datasetDir");
- }
- public String getAolDBName()
- {
- return this.config.getString("aol.db.name");
- }
- public String getBncStopwordsFile()
- {
- return this.config.getString("jlt.bnc.stopwords.filename");
- }
- public String getGigawordDirectory()
- {
- return this.config.getString("jlt.gigawordOriginalDir");
- }
- public String getGigaword5Directory()
- {
- return this.config.getString("jlt.gigawordOriginal5Dir");
- }
- public String getBncStanfordParsedFile()
- {
- return this.config.getString("jlt.bnc.parsed.stanfordfile");
- }
- public String getJWeb1TDir()
- {
- return this.config.getString("jweb1t.indexdir");
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement