////////////////////////////////////////////////////////////////// // // FreeLing - Open Source Language Analyzers // // Copyright (C) 2004 TALP Research Center // Universitat Politecnica de Catalunya // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // contact: Lluis Padro (padro@lsi.upc.es) // TALP Research Center // despatx C6.212 - Campus Nord UPC // 08034 Barcelona. SPAIN // //////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////// // // freeling_javaAPI.i // This is the SWIG input file, used to generate perl/pyhton/java APIs. // //////////////////////////////////////////////////////////////// %module freeling %{ #include "freeling.h" #include "freeling/tree.h" #include "freeling/morfo/traces.h" using namespace std; %} %include std_wstring.i %include std_list.i %include std_vector.i %include std_map.i %template(VectorWord) std::vector; %template(ListWord) std::list; %template(ListAnalysis) std::list; %template(ListSentence) std::list; %template(ListParagraph) std::list; %template(ListString) std::list; %template(ListInt) std::list; %template(VectorListInt) std::vector >; %template(VectorListString) std::vector >; ############### MORFO ##################### // forward declarations template class tree; template class preorder_iterator; template class generic_iterator { protected: tree *pnode; public: generic_iterator(void); generic_iterator(const generic_iterator &); generic_iterator(tree *); tree& operator*(void) const; tree* operator->(void) const; //bool operator==(const generic_iterator &) const; //bool operator!=(const generic_iterator &) const; }; /// traverse all children of the same node template class sibling_iterator : public generic_iterator { friend class preorder_iterator; public: sibling_iterator(void); sibling_iterator(const sibling_iterator &); sibling_iterator(tree *); //sibling_iterator& operator++(void); //sibling_iterator& operator--(void); //sibling_iterator& operator+=(unsigned int); //sibling_iterator& operator-=(unsigned int); }; /// traverse the tree in preorder (parent first, then children) template class preorder_iterator : public generic_iterator { public: preorder_iterator(void); preorder_iterator(const preorder_iterator &); preorder_iterator(tree *); preorder_iterator(sibling_iterator &); //preorder_iterator& operator++(void); //preorder_iterator& operator--(void); //preorder_iterator& operator+=(unsigned int); //preorder_iterator& operator-=(unsigned int); }; template class tree { friend class preorder_iterator; friend class sibling_iterator; public: T info; typedef class generic_iterator generic_iterator; typedef class preorder_iterator preorder_iterator; // class const_preorder_iterator; typedef class sibling_iterator sibling_iterator; // class const_sibling_iterator; typedef preorder_iterator iterator; tree(void); tree(const T&); tree(const tree&); tree(const preorder_iterator&); ~tree(void); // tree& operator=(const tree&); unsigned int num_children(void) const; sibling_iterator nth_child(unsigned int) const; tree& nth_child_ref(unsigned int) const; T& get_info(void); void append_child(const tree &); void hang_child(tree &); void clear(void); bool empty(void) const; sibling_iterator sibling_begin(void); sibling_iterator sibling_end(void) const; preorder_iterator begin(void); preorder_iterator end(void) const; }; %template(GenericIteratorNode) generic_iterator; %template(PreorderIteratorNode) preorder_iterator; %template(SiblingIteratorNode) sibling_iterator; %template(GenericIteratorDepnode) generic_iterator; %template(PreorderIteratorDepnode) preorder_iterator; %template(SiblingIteratorDepnode) sibling_iterator; %template(TreeNode) tree; %template(TreeDepnode) tree; %rename(operator_assignment) operator=; class analysis { public: /// user-managed data, we just store it. std::vector user; /// constructor analysis(void); /// constructor analysis(const std::wstring &, const std::wstring &); /// assignment analysis& operator=(const analysis&); void set_lemma(const std::wstring &); void set_tag(const std::wstring &); void set_prob(double); void set_distance(double); void set_retokenizable(const std::list &); bool has_prob(void) const; bool has_distance(void) const; std::wstring get_lemma(void) const; std::wstring get_tag(void) const; std::wstring get_short_tag(void) const; std::wstring get_short_tag(const std::wstring &) const; double get_prob(void) const; double get_distance(void) const; bool is_retokenizable(void) const; std::list get_retokenizable(void) const; std::list > get_senses(void) const; void set_senses(const std::list > &); // useful for java API std::wstring get_senses_string(void) const; }; //////////////////////////////////////////////////////////////// /// Class word stores all info related to a word: /// form, list of analysis, list of tokens (if multiword). //////////////////////////////////////////////////////////////// class word : public std::list { public: /// user-managed data, we just store it. std::vector user; /// constructor word(void); /// constructor word(const std::wstring &); /// constructor word(const std::wstring &, const std::list &); /// constructor word(const std::wstring &, const std::list &, const std::list &); /// Copy constructor word(const word &); /// assignment word& operator=(const word&); /// copy analysis from another word void copy_analysis(const word &); /// Get the number of selected analysis int get_n_selected(void) const; /// get the number of unselected analysis int get_n_unselected(void) const; /// true iff the word is a multiword compound bool is_multiword(void) const; /// get number of words in compound int get_n_words_mw(void) const; /// get word objects that compound the multiword std::list get_words_mw(void) const; /// get word form std::wstring get_form(void) const; /// Get word form, lowercased. std::wstring get_lc_form(void) const; /// Get an iterator to the first selected analysis word::iterator selected_begin(void); /// Get an iterator to the end of selected analysis list word::iterator selected_end(void); /// Get an iterator to the first unselected analysis word::iterator unselected_begin(void); /// Get an iterator to the end of unselected analysis list word::iterator unselected_end(void); /// get lemma for the selected analysis in list std::wstring get_lemma(void) const; /// get tag for the selected analysis std::wstring get_tag(void) const; /// get tag (short version) for the selected analysis, assuming eagles tagset std::wstring get_short_tag(void) const; /// get tag (short version) for the selected analysis std::wstring get_short_tag(const std::wstring &) const; /// get sense list for the selected analysis std::list > get_senses(void) const; // useful for java API std::wstring get_senses_string(void) const; /// set sense list for the selected analysis void set_senses(const std::list > &); /// get token span. unsigned long get_span_start(void) const; unsigned long get_span_finish(void) const; /// get in_dict bool found_in_dict(void) const; /// set in_dict void set_found_in_dict(bool); /// check if there is any retokenizable analysis bool has_retokenizable(void) const; /// mark word as having definitive analysis void lock_analysis(void); /// check if word is marked as having definitive analysis bool is_locked(void) const; /// add an alternative to the alternatives list void add_alternative(const word &, double); /// replace alternatives list with list given void set_alternatives(const std::list > &); /// find out if the speller checked alternatives bool has_alternatives(void) const; /// get alternatives list std::list > get_alternatives(void) const; /// get alternatives begin iterator std::list >::iterator alternatives_begin(void); /// get alternatives end iterator std::list >::iterator alternatives_end(void); /// add one analysis to current analysis list (no duplicate check!) void add_analysis(const analysis &); /// set analysis list to one single analysis, overwriting current values void set_analysis(const analysis &); /// set analysis list, overwriting current values void set_analysis(const std::list &); /// set word form void set_form(const std::wstring &); /// set token span void set_span(unsigned long, unsigned long); /// get number of analysis in current list int get_n_analysis(void) const; /// empty the list of selected analysis void unselect_all_analysis(void); /// mark all analysisi as selected void select_all_analysis(void); /// add the given analysis to selected list. void select_analysis(word::iterator); /// remove the given analysis from selected list. void unselect_analysis(word::iterator); /// get list of analysis (useful for perl API) std::list get_analysis(void) const; /// get begin iterator to analysis list (useful for perl/java API) word::iterator analysis_begin(void); /// get end iterator to analysis list (useful for perl/java API) word::iterator analysis_end(void); }; //////////////////////////////////////////////////////////////// /// Class parse tree is used to store the results of parsing /// Each node in the tree is either a label (intermediate node) /// or a word (leaf node) //////////////////////////////////////////////////////////////// class node { public: /// constructors node(void); node(const std::wstring &); /// get node identifier std::wstring get_node_id(void) const; /// set node identifier void set_node_id(const std::wstring &); /// get node label std::wstring get_label(void) const; /// get node word word get_word(void) const; /// set node label void set_label(const std::wstring &); /// set node word void set_word(word &); /// find out whether node is a head bool is_head(void) const; /// set whether node is a head void set_head(const bool); /// find out whether node is a chunk bool is_chunk(void) const; /// set position of the chunk in the sentence void set_chunk(const int); /// get position of the chunk in the sentence int get_chunk_ord(void) const; }; class parse_tree : public tree { public: parse_tree(void); parse_tree(const node &); }; //////////////////////////////////////////////////////////////// /// class denode stores nodes of a dependency tree and /// parse tree <-> deptree relations //////////////////////////////////////////////////////////////// class depnode : public node { public: depnode(void); depnode(const std::wstring &); depnode(const node &); void set_link(const parse_tree::iterator); parse_tree::iterator get_link(void); tree& get_link_ref(void); void set_label(const std::wstring &); }; //////////////////////////////////////////////////////////////// /// class dep_tree stores a dependency tree //////////////////////////////////////////////////////////////// class dep_tree : public tree { public: dep_tree(void); dep_tree(const depnode &); }; //////////////////////////////////////////////////////////////// /// Class sentence is just a list of words that someone /// (the splitter) has validated it as a complete sentence. /// It may include a parse tree. //////////////////////////////////////////////////////////////// class sentence : public std::list { public: sentence(void); void set_parse_tree(const parse_tree &); parse_tree & get_parse_tree(void); bool is_parsed(void) const; dep_tree & get_dep_tree(void); void set_dep_tree(const dep_tree &); bool is_dep_parsed(void) const; /// get word list (useful for perl API) std::vector get_words(void) const; /// get iterators to word list (useful for perl/java API) sentence::iterator words_begin(void); sentence::iterator words_end(void); }; //////////////////////////////////////////////////////////////// /// Class paragraph is just a list of sentences that someone /// has validated it as a paragraph. //////////////////////////////////////////////////////////////// class paragraph : public std::list {}; //////////////////////////////////////////////////////////////// /// Class document is a list of paragraphs. It may have additional /// information (such as title) //////////////////////////////////////////////////////////////// class document : public std::list { public: document(void); void add_positive(std::wstring, std::wstring); int get_coref_group(std::wstring) const; std::list get_coref_nodes(int) const; bool is_coref(std::wstring, std::wstring) const; }; ############### FREELING ##################### class traces { public: // current trace level static int TraceLevel; // modules to trace static unsigned long TraceModule; }; /*------------------------------------------------------------------------*/ class tokenizer { public: /// Constructor tokenizer(const std::wstring &); /// tokenize wstring with default options std::list tokenize(const std::wstring &); /// tokenize wstring with default options, tracking offset in given int param. std::list tokenize(const std::wstring &, unsigned long &); }; /*------------------------------------------------------------------------*/ class splitter { public: /// Constructor splitter(const std::wstring &); /// split sentences with default options std::list split(const std::list &, bool); }; /*------------------------------------------------------------------------*/ class maco_options { public: // Language analyzed std::wstring Lang; /// Morhpological analyzer active modules. bool AffixAnalysis, MultiwordsDetection, NumbersDetection, PunctuationDetection, DatesDetection, QuantitiesDetection, DictionarySearch, ProbabilityAssignment, OrthographicCorrection, UserMap; int NERecognition; /// Morphological analyzer modules configuration/data files. std::wstring LocutionsFile, QuantitiesFile, AffixFile, ProbabilityFile, DictionaryFile, NPdataFile, PunctuationFile, CorrectorFile, UserMapFile; /// module-specific parameters for number recognition std::wstring Decimal, Thousand; /// module-specific parameters for probabilities double ProbabilityThreshold; /// module-specific parameters for dictionary bool InverseDict,RetokContractions; /// constructor maco_options(const std::wstring &); /// Option setting methods provided to ease perl interface generation. /// Since option data members are public and can be accessed directly /// from C++, the following methods are not necessary, but may become /// convenient sometimes. void set_active_modules(bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool); void set_data_files(const std::wstring &,const std::wstring &,const std::wstring &, const std::wstring &,const std::wstring &,const std::wstring &, const std::wstring &,const std::wstring &, const std::wstring &); void set_nummerical_points(const std::wstring &,const std::wstring &); void set_threshold(double); void set_inverse_dict(bool); void set_retok_contractions(bool); }; /*------------------------------------------------------------------------*/ class maco { public: /// Constructor maco(const maco_options &); /// analyze sentences void analyze(std::list &); }; /*------------------------------------------------------------------------*/ class POS_tagger { public: POS_tagger(bool,unsigned int); virtual ~POS_tagger() {}; virtual void annotate(sentence &)=0; void analyze(std::list &); }; /*------------------------------------------------------------------------*/ class hmm_tagger : public POS_tagger { public: /// Constructor hmm_tagger(const std::wstring &, const std::wstring &, bool, unsigned int); /// analyze sentences void annotate(sentence &); }; /*------------------------------------------------------------------------*/ class relax_tagger : public POS_tagger { public: /// Constructor, given the constraints file and config parameters relax_tagger(const std::wstring &, int, double, double, bool, unsigned int); /// analyze sentences void annotate(sentence &); }; /*------------------------------------------------------------------------*/ class nec { public: /// Constructor nec(const std::wstring &); /// Destructor ~nec(void); /// Classify NEs in given sentence void analyze(std::list &); }; /*------------------------------------------------------------------------*/ class chart_parser { public: /// Constructors chart_parser(const std::wstring&); /// Get the start symbol of the grammar std::wstring get_start_symbol(void) const; /// parse sentences in list void analyze(std::list &); }; /*------------------------------------------------------------------------*/ class dependency_parser { public: dependency_parser(void); virtual ~dependency_parser(void) {}; virtual void analyze(std::list &)=0; }; /*------------------------------------------------------------------------*/ class dep_txala : public dependency_parser { public: dep_txala(const std::wstring &, const std::wstring &); void analyze(std::list &); }; /*------------------------------------------------------------------------*/ class senses { public: /// Constructor senses(const std::wstring &); /// Destructor ~senses(void); /// sense annotate selected analysis for each word in given sentences void analyze(std::list &); }; /*------------------------------------------------------------------------*/ class ukb_wrap { public: /// Constructor ukb_wrap(const std::wstring &); /// Destructor ~ukb_wrap(void); /// word sense disambiguation for each word in given sentences void analyze(std::list &); }; /*------------------------------------------------------------------------*/ class sense_info { public: /// sense code std::wstring sense; /// hyperonyms std::list parents; /// WN semantic file code std::wstring semfile; /// list of synonyms (words in the synset) std::list words; /// list of EWN top ontology properties std::list tonto; /// constructor sense_info(const std::wstring &,const std::wstring &); std::wstring get_parents_string(void) const; }; //////////////////////////////////////////////////////////////// /// Class semanticDB implements a semantic DB interface //////////////////////////////////////////////////////////////// class semanticDB { public: /// Constructor semanticDB(const std::wstring &); /// Destructor ~semanticDB(); /// Compute list of lemma-pos to search in WN for given word, according to mapping rules. void get_WN_keys(const std::wstring &, const std::wstring &, const std::wstring &, std::list > &) const; /// get list of words for a sense std::list get_sense_words(const std::wstring &) const; /// get list of senses for a lemma+pos std::list get_word_senses(const std::wstring &, const std::wstring &, const std::wstring &) const; /// get sense info for a sense sense_info get_sense_info(const std::wstring &) const; }; class util { public: /// Init the locale of the program, to properly handle unicode static void init_locale(const std::wstring &); };