Advertisement
BinYamin

UNICODE letter(combination) frequency counter

Nov 16th, 2014
261
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 3.00 KB | None | 0 0
  1. /*Author Shaha Hassan
  2.  *date 16/11/2014
  3.  * GNU License, feel free to do whatever with the code, as long as you give me Shaha Hassan the credit for the code.
  4.  *
  5.  * This program was written to count the occurances of UNICODE letters, or their combinations(2 and 3 char long).
  6.  * It can be used to find what letter occurs the most in a language, or what combinations of letters occur the most.
  7.  * A user can input what UNICODE characters are he wants to focus on in a file and the text to be analysed in another.
  8.  * The program then reads the text file, counts the number of occurences of combinations of consecutive required unicode characters
  9.  * and lists them in descending order.
  10.  */
  11.  
  12. /*
  13.  * this project is in java and has 4 files
  14.  * Driver
  15.  * Matrix
  16.  * Map
  17.  * Documentation
  18.  * this one is Driver.java (search though my pastes to find the other files.
  19.  */
  20.  
  21.  
  22. import java.io.*;
  23. import java.util.Scanner;
  24.  
  25. public class Driver {
  26.     public static void main(String args[]) throws IOException {
  27.         Reader s = null;
  28.         Writer[] files=null;
  29.         String fin = null;
  30.         Scanner scn = new Scanner(System.in);
  31.        
  32.         //delete commented stmts if you'd like a prompt which asks you to input custom file names or press '.' for default
  33.         //believe me that becomes tedious after running the program a few times.
  34.         //anyway, opens files
  35.         try {
  36. //          System.out.println("Enter input filename or . ");
  37. //          fin = scn.next();
  38. //          if(fin.equals("."))
  39.                 fin = "D:\\in.txt";
  40. //          else fin = fin + ".txt";
  41.             s = new InputStreamReader(new FileInputStream(fin),"UTF-8");
  42.  
  43. //          System.out.println("Enter output filename or . ");
  44.             fin=".";
  45. //          fin = scn.next();
  46.             if(fin.equals(".")) fin = "D:\\out";
  47.  
  48.             files = new Writer[3];
  49.             for(int i=1; i<4; i++ ) {
  50.                 files[i-1] = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fin+i+".txt"),"UTF-8"));
  51.             }
  52.         } catch (UnsupportedEncodingException e) {
  53.             System.out.print("Coding Format not supported");
  54.         } catch (FileNotFoundException e) {
  55.             System.out.println(fin + "not found.");
  56.         }
  57.  
  58.         scn.close();
  59.        
  60.         Map m = new Map();
  61.         Matrix tally = new Matrix(m);
  62.         System.out.println("Map " + m);
  63.        
  64.         char t1, t2, t3, pre, curr;
  65.         int pos1, pos2, pos3;
  66.         t1=(char)s.read();
  67.         t2 = (char)s.read();
  68.         pre = t2;
  69.         pos1 = m.getpos(t1);
  70.         pos2 = m.getpos(t2);
  71.         if ( (pos1>-1) && (pos2>-1) ) {
  72.             tally.add(pos1, pos2);
  73.             tally.add(pos1);
  74.             tally.add(pos2);
  75.         }
  76.         while(s.ready()) {
  77.             curr=(char)s.read();
  78.             t3 = curr;
  79.             pos1 = m.getpos(t1);
  80.             pos2 = m.getpos(t2);
  81.             pos3 = m.getpos(t3);
  82.        
  83.             if( pos3>-1) {
  84.                 tally.add(pos3);
  85.                 if(pos2>-1) {
  86.                     tally.add(pos2, pos3);
  87.                     if(pos1>-1) {
  88.                         tally.add(pos1, pos2, pos3);
  89.                     }
  90.                 }
  91.             }
  92.             t1 = pre;
  93.             pre = curr;
  94.             t2 = curr;
  95.         }
  96.        
  97.         tally.genlist(m);
  98.        
  99.        
  100.         tally.write(files);
  101.         for(int i=0; i<3; i++) {
  102.             files[i].close();
  103.         }
  104.         s.close();
  105.        
  106.         System.out.println("Output saved in following files: D:\\out1.txt, D:\\out2.txt, D:\\out3.txt");
  107.     }
  108.    
  109. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement