Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package com.example.kamusindonesia;
- import java.io.BufferedReader;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import java.util.Locale;
- import java.util.Scanner;
- import android.content.Context;
- import android.util.Log;
- public class Stemming {
- Word originalword = new Word();
- String word = originalword.getWord();
- String stemword, bstemword = "";
- String suffix = "";
- String decodeword = "";
- String cword = "";
- String word1 = "";
- Scanner sc;
- int checked = 0;
- boolean found;
- public static int root=1;
- public void setStemWord(String word){
- this.stemword = word;
- }
- public String getStemWord(){
- return stemword;
- }
- public String stemWord(Context context, String word) throws IOException{
- word = word.toLowerCase();
- if (!word.trim().equals("")){
- stemword = word;
- found = checkWord(context, word);
- if(found==false && (stemword.length()>4)){
- root = 0;
- step2();
- found=checkWord(context, stemword);
- if(found==false){
- step3();
- found=checkWord(context, stemword);
- if(found==false){
- int i=0;
- while (found==false && i<3){
- step4();
- found=checkWord(context, stemword);
- i++;
- System.out.println("step 4.I ke : "+i);
- }
- if(found==false){
- if((suffix=="an")&&(stemword.endsWith("k"))){
- stemword=stemword.substring(0,stemword.length()-1);
- // suffix="kan";
- i = 0;
- while (found==false && i<3){
- step4();
- found=checkWord(context, stemword);
- i++;
- System.out.println("step 4.II ke : "+i);
- }
- if(found==false){
- System.out.println("Stemword 2: "+stemword);
- System.out.println("step 5.I");
- step5();
- checkWord(context, stemword);
- }
- }
- else {
- stemword=bstemword;
- System.out.println("Stemword 3: "+stemword);
- i=0;
- while (found==false && i<3){
- System.out.println("Stemword 4: "+stemword);
- step4();
- found=checkWord(context, stemword);
- i++;
- System.out.println("step 4.IV ke : "+i);
- }
- if(found==false){
- System.out.println("Stemword 5: "+stemword);
- System.out.println("step 5.II");
- step5();
- checkWord(context, stemword);
- }
- }
- }
- }
- }
- }
- }
- return stemword;
- }
- public boolean checkWord(Context context, String w) {
- CSearch c = new CSearch();
- found = c.search(context, w);
- return found;
- }
- public void step2(){
- System.out.println("Step 2");
- System.out.println("Stemword di step 2 : "+stemword);
- if (stemword.endsWith("kah")){
- stemword=stemword.substring(0, stemword.length()-3);
- System.out.println("Akhiran kah : "+stemword);
- }
- else if (stemword.endsWith("lah")){
- stemword=stemword.substring(0, stemword.length()-3);
- System.out.println("Akhiran lah : "+stemword);
- }
- else if (stemword.endsWith("tah")){
- stemword=stemword.substring(0, stemword.length()-3);
- }
- else if (stemword.endsWith("pun")){
- stemword=stemword.substring(0, stemword.length()-3);
- }
- if (stemword.endsWith("ku")){
- if(!stemword.endsWith("aku")){
- stemword=stemword.substring(0,stemword.length()-2);
- }
- }
- else if (stemword.endsWith("mu")){
- stemword=stemword.substring(0,stemword.length()-2);
- }
- else if (stemword.endsWith("nya")){
- stemword=stemword.substring(0,stemword.length()-3);
- }
- setStemWord(stemword);
- System.out.println("Stemword di akhir Step 2: "+stemword);
- }
- public void step3(){
- System.out.println("Step 3");
- bstemword = stemword;
- if (stemword.endsWith("i")){
- stemword=stemword.substring(0,stemword.length()-1);
- suffix = "i";
- }
- else if (stemword.endsWith("an")){
- stemword=stemword.substring(0,stemword.length()-2);
- suffix = "an";
- }
- setStemWord(stemword);
- System.out.println("Stemword di akhir Step 3: "+stemword);
- decodeword = stemword;
- }
- public void step4(){
- if((stemword.length()<5)){return;}
- System.out.println("Step 4");
- String word2=stemword.substring(2);
- String word3=stemword.substring(3);
- String word4=stemword.substring(4);
- String word5=stemword.substring(5);
- String prefix=word2.substring(0,1);
- String prefix2=word2.substring(1,2);
- String prefix3=word2.substring(2,3);
- String vowel="aiueo";
- String consonant="bcdfghjklmnpqrstvwxyz";
- String consonantr="bcdfghjklmnpqstvwxyz";
- String consonantrl="bcdfghjkmnpqstvwxyz";
- String consonantrwylmn="bcdfghjkpqstvxz";
- String lrwy="lrwy";
- String bfv="bfv";
- String rl="rl";
- String cdjz="cdjz";
- String ghq="ghq";
- String wy="wy";
- if (stemword.startsWith("di")){
- stemword=word2;
- }
- else if (stemword.startsWith("ke")){
- stemword=word2;
- }
- else if (stemword.startsWith("se")){
- stemword=word2;
- }
- else if (stemword.startsWith("be")){
- if (word2.startsWith("r")&&
- vowel.contains(prefix2)){
- stemword=word3;
- // tvresult.setText(stemword); //berV
- }
- else if(word2.startsWith("r")&&
- consonantr.contains(prefix2)&&
- !(word5.startsWith("er"))){
- stemword=word3;
- // tvresult.setText(stemword); //berCAP
- }
- else if(word2.startsWith("r")&&
- consonantr.contains(prefix2)&&
- (word5.startsWith("er")) &&
- vowel.contains(word2.substring(5, 6))){
- stemword=word3;
- // tvresult.setText(stemword); //berCAerV
- }
- else if(word2.startsWith("l")){
- stemword=word3;
- // tvresult.setText(stemword); //belajar
- }
- else if(consonantrl.contains(prefix)&&
- (word3.startsWith("er")) &&
- consonant.contains(word2.substring(3, 4))){
- stemword=word2;
- // tvresult.setText(stemword); //berC1erC2
- }
- }
- else if(stemword.startsWith("te")){
- if (word2.startsWith("r")&&
- vowel.contains(prefix2)){
- stemword=word3;
- // tvresult.setText(stemword);//terV
- }
- else if(word2.startsWith("r")&&
- consonantr.contains(prefix2)&&
- (word4.startsWith("er")) &&
- vowel.contains(word2.substring(4, 5))){
- stemword=word3;
- // tvresult.setText(stemword);//terCerV
- }
- else if(word2.startsWith("r")&&
- consonantr.contains(prefix2)&&
- !(word4.startsWith("er"))){
- stemword=word3;
- // tvresult.setText(stemword); //terCP
- }
- else if(consonantr.contains(prefix)&&
- (word3.startsWith("er")) &&
- consonant.contains(word2.substring(3, 4))){
- stemword=word2;
- // tvresult.setText(word1); //teC1erC2
- }
- }
- else if(stemword.startsWith("me")){
- if(lrwy.contains(prefix)&&
- vowel.contains(prefix2)){
- stemword=word2;
- // tvresult.setText(stemword);//me{l|r|w|y}V
- System.out.println("me{l|r|w|y}V");
- }
- else if(word2.startsWith("m")&&
- (bfv.contains(prefix2))){
- stemword=word3;
- // tvresult.setText(stemword);
- System.out.println("me{l|r|w|y}V");
- }
- else if (word2.startsWith("mpe")&&
- (rl.contains(word2.substring(3, 4)))){
- stemword=word3;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("n")&&
- (cdjz.contains(prefix2))) {
- stemword=word3;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("n")&&
- (vowel.contains(prefix2))){
- stemword=word2;
- // tvresult.setText(stemword);
- }
- else if (word2.startsWith("m")&&
- ((prefix2.contains("r")&&
- (vowel.contains(prefix3)))) ||
- (vowel.contains(prefix2))) {
- stemword=word3;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("ng")&&
- (ghq.contains(prefix3))){
- stemword=word4;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("ng")&&
- (vowel.contains(prefix3))){
- stemword=word4;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("ny")&&
- (vowel.contains(prefix3))){
- stemword=word4;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("mp")&&
- (vowel.contains(prefix3))){
- stemword=word3;
- // tvresult.setText(stemword);
- }
- }
- else if(stemword.startsWith("pe")){
- if(wy.contains(prefix)&&
- vowel.contains(prefix2)){
- stemword=word2;
- // tvresult.setText(stemword);//pe{w|y}V
- }
- else if(word2.startsWith("r")&&
- vowel.contains(prefix2)){
- stemword=word3;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("r")&&
- consonantr.contains(prefix2)&&
- !(word5.startsWith("er"))){
- stemword=word3;
- // tvresult.setText(stemword); //perCAP
- }
- else if(word2.startsWith("r")&&
- consonantr.contains(prefix2)&&
- (word5.startsWith("er")) &&
- vowel.contains(word2.substring(5, 6))){
- stemword=word3;
- // tvresult.setText(stemword); //perCAerV
- }
- else if(word2.startsWith("m")&&
- (bfv.contains(prefix2))){
- stemword=word3;
- // tvresult.setText(stemword);
- }
- else if (word2.startsWith("m")&&
- ((prefix2.contains("r")&&
- (vowel.contains(prefix3)))) ||
- (vowel.contains(prefix2))) {
- stemword=word3;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("n")&&
- (cdjz.contains(prefix2))) {
- stemword=word3;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("n")&&
- (vowel.contains(prefix2))){
- stemword=word2;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("ng")&&
- (ghq.contains(prefix3))){
- stemword=word4;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("ng")&&
- (vowel.contains(prefix3))){
- stemword=word4;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("ny")&&
- (vowel.contains(prefix3))){
- stemword="s"+word4;
- // tvresult.setText(stemword);
- }
- else if(word2.startsWith("l")){
- stemword=word2;
- // tvresult.setText(stemword); //
- }
- else if(word2.startsWith("r")&&
- consonantr.contains(prefix2)&&
- (word4.startsWith("er")) &&
- vowel.contains(word2.substring(4, 5))){
- stemword=word3;
- // tvresult.setText(stemword);//perCerV
- }
- else if((consonantr.contains(prefix2)&&
- (consonantrwylmn.contains(prefix2)))&&
- !(word4.startsWith("er"))){
- stemword=word2;
- // tvresult.setText(stemword); //perCP
- }
- }
- setStemWord(stemword);
- System.out.println("Stemword di akhir Step 4: "+stemword);
- }
- public void step5(){
- stemword = decodeword;
- System.out.println("Step 5");
- System.out.println("Stemword Step 5 :"+stemword);
- if((stemword.length()<5)){return;}
- // stemword=etword.getText().toString();
- String word2=stemword.substring(2);
- String word3=stemword.substring(3);
- String word4=stemword.substring(4);
- String prefix2=word2.substring(1,2);
- String prefix3=word2.substring(2,3);
- String vowel="aiueo";
- if (stemword.startsWith("be")){
- if (word2.startsWith("r")&&
- vowel.contains(prefix2)){
- stemword=word2;
- // tvresult.setText(stemword); //berV
- }
- }
- else if(stemword.startsWith("te")){
- if (word2.startsWith("r")&&
- vowel.contains(prefix2)){
- stemword=word2;
- // tvresult.setText(stemword);//terV
- }
- }
- else if(stemword.startsWith("me")){
- if(word2.startsWith("n")&&
- (vowel.contains(prefix2))){
- stemword=word3;
- stemword="t"+stemword;
- // tvresult.setText("t"+stemword);
- }
- else if (word2.startsWith("m")&&
- ((prefix2.contains("r")&&
- (vowel.contains(prefix3)))) ||
- (vowel.contains(prefix2))) {
- stemword=word3;
- stemword="p"+stemword;
- // tvresult.setText("p"+stemword);
- }
- else if(word2.startsWith("ng")&&
- (vowel.contains(prefix3))){
- stemword=word4;
- stemword="k"+stemword;
- // tvresult.setText("k"+stemword);
- }
- }
- else if(stemword.startsWith("pe")){
- if(word2.startsWith("r")&&
- vowel.contains(prefix2)){
- stemword=word3;
- stemword="r"+stemword;
- // tvresult.setText("r"+stemword);
- }
- else if(word2.startsWith("n")&&
- (vowel.contains(prefix2))){
- stemword=word3;
- stemword="t"+stemword;
- // tvresult.setText("t"+stemword);
- }
- else if (word2.startsWith("m")&&
- ((prefix2.contains("r")&&
- (vowel.contains(prefix3)))) ||
- (vowel.contains(prefix2))) {
- stemword=word3;
- stemword="p"+stemword;
- // tvresult.setText("p"+stemword);
- }
- else if(word2.startsWith("ng")&&
- (vowel.contains(prefix3))){
- stemword=word4;
- stemword="k"+stemword;
- // tvresult.setText("k"+stemword);
- }
- }
- setStemWord(stemword);
- System.out.println("Stemword di akhir Step 5: "+stemword);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement