Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package kodms;
- import java.io.IOException;
- import java.io.StringReader;
- import java.util.ArrayList;
- import java.util.HashSet;
- import no.uib.cipr.matrix.DenseMatrix;
- import no.uib.cipr.matrix.QRP;
- import org.apache.commons.math.linear.Array2DRowRealMatrix;
- import org.apache.commons.math.linear.RealMatrix;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.analysis.en.EnglishAnalyzer;
- import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- import org.apache.lucene.util.AttributeSource;
- import org.apache.lucene.util.Version;
- import cern.colt.matrix.tdouble.DoubleMatrix2D;
- import cern.colt.matrix.tdouble.algo.decomposition.DenseDoubleSingularValueDecomposition;
- import cern.colt.matrix.tdouble.impl.DenseDoubleMatrix2D;
- import magisterarbeit.SplitInSents;
- public class AlgorithmFactory implements AlgorithmFactoryInterface {
- @Override
- public AlgorithmInterface parser() {
- return new Parser();
- }
- public static class Parser implements AlgorithmInterface {
- protected ArrayList<String> dic;
- protected ArrayList<String> sentences;
- protected String text;
- @Override
- public void process() throws IOException {
- this.dic = new ArrayList<String>();
- this.sentences = new SplitInSents().split(text);
- Analyzer analyzer = new EnglishAnalyzer(Version.LUCENE_31);
- TokenStream stream = analyzer.tokenStream("contents",
- new StringReader(text));
- while (stream.incrementToken()) {
- AttributeSource token = stream.cloneAttributes();
- CharTermAttribute term = (CharTermAttribute) token
- .addAttribute(CharTermAttribute.class);
- dic.add(term.toString());
- }
- HashSet<String> hashSet = new HashSet<String>(dic);
- dic.clear();
- dic.addAll(hashSet);
- }
- public Parser init(String text) throws IOException {
- setText(text);
- process();
- return this;
- }
- public void setText(String text) {
- this.text = text;
- }
- public ArrayList<String> getDic() {
- return this.dic;
- }
- public ArrayList<String> getSentences() {
- return this.sentences;
- }
- }
- @Override
- public AlgorithmInterface st() {
- return new ST();
- }
- public static class ST implements AlgorithmInterface {
- protected ArrayList<String> dic;
- protected ArrayList<String> sentences;
- protected Matrix stmatrix;
- @Override
- public void process() throws IOException {
- this.stmatrix = new Matrix(new double[dic.size()][sentences.size()]);
- Analyzer analyzer = new EnglishAnalyzer(Version.LUCENE_31);
- for (int i = 0; i < dic.size(); i++) {
- for (int j = 0; j < sentences.size(); j++) {
- TokenStream streaming = analyzer.tokenStream("contents",
- new StringReader(sentences.get(j)));
- int count = 0;
- while (streaming.incrementToken()) {
- AttributeSource token = streaming.cloneAttributes();
- CharTermAttribute term = (CharTermAttribute) token
- .addAttribute(CharTermAttribute.class);
- if (term.toString().equalsIgnoreCase(dic.get(i)))
- count++;
- stmatrix.set(i, j, count);
- }
- }
- }
- }
- public ST init(ArrayList<String> dic, ArrayList<String> sentences)
- throws IOException {
- setDic(dic);
- setSentences(sentences);
- process();
- return this;
- }
- public void setDic(ArrayList<String> dic) {
- this.dic = dic;
- }
- public void setSentences(ArrayList<String> sentences) {
- this.sentences = sentences;
- }
- public Matrix getSTMatrix() {
- return stmatrix;
- }
- }
- @Override
- public AlgorithmInterface a() {
- return new A();
- }
- public static class A implements AlgorithmInterface {
- protected ArrayList<String> dic;
- protected ArrayList<String> sentences;
- protected MatrixInterface stmatrix;
- protected Matrix a;
- @Override
- public void process() {
- double[] g = new double[dic.size()];
- for (int i = 0; i < dic.size(); i++) {
- double chisum = 0;
- for (int j = 0; j < sentences.size(); j++) {
- chisum += Math.signum(stmatrix.get(i, j));
- }
- g[i] = Math.log(sentences.size() / chisum);
- }
- double[] d = new double[sentences.size()];
- for (int j = 0; j < sentences.size(); j++) {
- double lgsum = 0;
- for (int i = 0; i < dic.size(); i++) {
- lgsum += g[i] * Math.log(1 + stmatrix.get(i, j)) * g[i]
- * Math.log(1 + stmatrix.get(i, j));
- }
- d[j] = 1 / Math.sqrt(lgsum);
- }
- this.a = new Matrix(new double[dic.size()][sentences.size()]);
- for (int i = 0; i < dic.size(); i++) {
- for (int j = 0; j < sentences.size(); j++) {
- this.a.set(i, j, Math.log(1 + stmatrix.get(i, j)) * g[i]
- * d[j]);
- }
- }
- }
- public A init(ArrayList<String> dic, ArrayList<String> sentences,
- Matrix stmatrix) throws IOException {
- setDic(dic);
- setSentences(sentences);
- process();
- return this;
- }
- public void setDic(ArrayList<String> dic) {
- this.dic = dic;
- }
- public void setSentences(ArrayList<String> sentences) {
- this.sentences = sentences;
- }
- public void setSTMatrix(MatrixInterface stmatrix) {
- this.stmatrix = stmatrix;
- }
- public Matrix getA() {
- return a;
- }
- }
- @Override
- public AlgorithmInterface svd() {
- return new SVD();
- }
- public static class SVD implements AlgorithmInterface {
- protected MatrixInterface a;
- protected ColtMatrix v, u, s;
- @Override
- public void process() {
- DoubleMatrix2D A1 = new DenseDoubleMatrix2D(a.toArray());
- DenseDoubleSingularValueDecomposition svdcolt = new DenseDoubleSingularValueDecomposition(
- A1, true, true);
- v = new ColtMatrix(svdcolt.getV());
- u = new ColtMatrix(svdcolt.getU());
- s = new ColtMatrix(svdcolt.getS());
- }
- public SVD init(MatrixInterface a) {
- setA(a);
- process();
- return this;
- }
- public void setA(MatrixInterface a) {
- this.a = a;
- }
- public ColtMatrix getV() {
- return v;
- }
- public ColtMatrix getU() {
- return u;
- }
- public ColtMatrix getS() {
- return s;
- }
- }
- @Override
- public AlgorithmInterface qr() {
- return new QR();
- }
- public static class QR implements AlgorithmInterface {
- protected ArrayList<String> sentences;
- protected MatrixInterface v, u, s;
- protected MtjMatrix tp;
- protected int k;
- @Override
- public void process() {
- RealMatrix v1 = new Array2DRowRealMatrix(v.toArray());
- RealMatrix s1 = new Array2DRowRealMatrix(s.toArray());
- RealMatrix vt = v1.transpose();
- RealMatrix sthin = s1.getSubMatrix(0, k-1, 0, k-1);
- RealMatrix vtthin = vt.getSubMatrix(0, k-1, 0, vt.getColumnDimension()-1);
- RealMatrix dk = sthin.multiply(vtthin);
- double [][] fRow = new double [1][sentences.size()];
- for (int j = 0; j < sentences.size(); j++) {
- fRow[0][j] = j;
- }
- DenseMatrix tFirstRowInd = new DenseMatrix(fRow);
- DenseMatrix t = new DenseMatrix(dk.getData());
- QRP qrp = QRP.factorize(t);
- DenseMatrix qrP = (DenseMatrix) qrp.getP();
- DenseMatrix tp1 = new DenseMatrix(1,dk.getColumnDimension());
- tFirstRowInd.mult(qrP, tp1);
- this.tp = new MtjMatrix(tp1);
- }
- public QR init(ArrayList<String> sentences, MatrixInterface v, MatrixInterface u, MatrixInterface s, int k) {
- setSentences(sentences);
- setV(v);
- setU(u);
- setS(s);
- setK(k);
- process();
- return this;
- }
- public void setSentences(ArrayList<String> sentences) {
- this.sentences = sentences;
- }
- public void setV(MatrixInterface v) {
- this.v = v;
- }
- public void setU(MatrixInterface u) {
- this.u = u;
- }
- public void setS(MatrixInterface s) {
- this.s = s;
- }
- public void setK(int k) {
- this.k = k;
- }
- public MtjMatrix getTPMatrix() {
- return tp;
- }
- }
- }
Add Comment
Please, Sign In to add comment