Advertisement
BinYamin

UNICODE letter(combination) frequency counter

Nov 16th, 2014
280
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 3.15 KB | None | 0 0
  1.    
  2.  
  3.     /*Author Shaha Hassan
  4.      *date 16/11/2014
  5.      * GNU License, feel free to do whatever with the code, as long as you give me Shaha Hassan the credit for the code.
  6.      *
  7.      * This program was written to count the occurances of UNICODE letters, or their combinations(2 and 3 char long).
  8.      * It can be used to find what letter occurs the most in a language, or what combinations of letters occur the most.
  9.      * A user can input what UNICODE characters are he wants to focus on in a file and the text to be analysed in another.
  10.      * The program then reads the text file, counts the number of occurences of combinations of consecutive required unicode characters
  11.      * and lists them in descending order.
  12.      */
  13.      
  14.     /*
  15.      * this project is in java and has 4 files
  16.      * Driver    http://pastebin.com/ZWBeq6MV
  17.      * Matrix    http://pastebin.com/SypU8jJe
  18.      * Map
  19.      * Documentation
  20.      * this one is Map.java (search though my pastes to find the other files.)
  21.      */
  22.      
  23. import java.io.*;
  24. import java.util.*;
  25.  
  26. public class Map {
  27. /* maps the characters to integers.
  28.  * ex- if  map[i] = 't'   then character t is mapped to number i.
  29.  * map[0] is always left blank so no character is mapped to 0. (refer matrix)
  30.  * */
  31.    
  32.     private char[] map;
  33.    
  34.  
  35.     Map() throws IOException {
  36.     //intializes 'map' with the valid chars, i.e. the characters we want to check.
  37.     //reads/parses the file D:\set.txt and initializes the map.
  38.        
  39. //      Scanner s = new Scanner(System.in);
  40. //      System.out.println("Enter char domain set file or . ");
  41.         String filename =".";
  42. //      filename = s.next();
  43.         if(filename.equals(".") )
  44.             filename = "D:\\set.txt";
  45.         Reader fin = null;
  46.         try {
  47.             fin = new InputStreamReader(new FileInputStream(filename),"UTF-8");
  48.         } catch (UnsupportedEncodingException | FileNotFoundException e) {
  49.             System.out.println(filename + ": file not found.");
  50.         }
  51.        
  52.         char []array = new char[1000];
  53.         int top =-1;
  54.         char tmp;
  55.         w1: while(true){
  56.             do{
  57.                 tmp = (char)fin.read();
  58.             } while( tmp=='\r' || tmp =='\n');
  59.             if( (int)tmp == -1) break;
  60.             switch(tmp) {
  61.             case '~':
  62.                 tmp = (char) fin.read();
  63.                 char tmp2 = (char) fin.read();
  64.                 for(char i=tmp; i<=tmp2; i++){
  65.                     array[++top] = i;
  66.                 }
  67.                 break;
  68.             case '^':
  69.                 while(true) {
  70.                     tmp = (char)fin.read();
  71.                     if(tmp=='\r' || tmp =='\n') break;
  72.                     array[++top] = tmp;
  73.                 }
  74.                 break;
  75.             case '*':
  76.                 break w1;
  77.             }
  78.         }
  79.  
  80.         Arrays.sort(array);
  81.         map = new char[top+2];
  82.         System.arraycopy(array, 1000-top-1, map, 1, top+1);
  83.         //remove possible redundancy of elements?
  84.     }
  85.  
  86.     Map(char[] set){
  87.         //initializes 'map' with the characters in 'set'
  88.         Arrays.sort(set);
  89.         map = set;
  90.     }
  91.  
  92.     public int getlength() {
  93.         //returns nmbr of characters to be checked
  94.         return map.length;
  95.     }
  96.  
  97.     public int getpos(char key){
  98.         //returns the pos of key; maps the char to a number
  99.         int p = Arrays.binarySearch(map, key);
  100.         return p;
  101.     }
  102.  
  103.     public char getchar(int pos){
  104.         //returns the char at pos, demapping
  105.         return map[pos];
  106.     }
  107.  
  108.     public String toString(){
  109.     //overriding toString function
  110.         String s = String.valueOf(map);
  111.         return s;
  112.     }
  113. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement