Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*Author Shaha Hassan
- *date 16/11/2014
- * GNU License, feel free to do whatever with the code, as long as you give me Shaha Hassan the credit for the code.
- *
- * This program was written to count the occurances of UNICODE letters, or their combinations(2 and 3 char long).
- * It can be used to find what letter occurs the most in a language, or what combinations of letters occur the most.
- * A user can input what UNICODE characters are he wants to focus on in a file and the text to be analysed in another.
- * The program then reads the text file, counts the number of occurences of combinations of consecutive required unicode characters
- * and lists them in descending order.
- */
- /*
- * this project is in java and has 4 files
- * Driver http://pastebin.com/ZWBeq6MV
- * Matrix http://pastebin.com/SypU8jJe
- * Map
- * Documentation
- * this one is Map.java (search though my pastes to find the other files.)
- */
- import java.io.*;
- import java.util.*;
- public class Map {
- /* maps the characters to integers.
- * ex- if map[i] = 't' then character t is mapped to number i.
- * map[0] is always left blank so no character is mapped to 0. (refer matrix)
- * */
- private char[] map;
- Map() throws IOException {
- //intializes 'map' with the valid chars, i.e. the characters we want to check.
- //reads/parses the file D:\set.txt and initializes the map.
- // Scanner s = new Scanner(System.in);
- // System.out.println("Enter char domain set file or . ");
- String filename =".";
- // filename = s.next();
- if(filename.equals(".") )
- filename = "D:\\set.txt";
- Reader fin = null;
- try {
- fin = new InputStreamReader(new FileInputStream(filename),"UTF-8");
- } catch (UnsupportedEncodingException | FileNotFoundException e) {
- System.out.println(filename + ": file not found.");
- }
- char []array = new char[1000];
- int top =-1;
- char tmp;
- w1: while(true){
- do{
- tmp = (char)fin.read();
- } while( tmp=='\r' || tmp =='\n');
- if( (int)tmp == -1) break;
- switch(tmp) {
- case '~':
- tmp = (char) fin.read();
- char tmp2 = (char) fin.read();
- for(char i=tmp; i<=tmp2; i++){
- array[++top] = i;
- }
- break;
- case '^':
- while(true) {
- tmp = (char)fin.read();
- if(tmp=='\r' || tmp =='\n') break;
- array[++top] = tmp;
- }
- break;
- case '*':
- break w1;
- }
- }
- Arrays.sort(array);
- map = new char[top+2];
- System.arraycopy(array, 1000-top-1, map, 1, top+1);
- //remove possible redundancy of elements?
- }
- Map(char[] set){
- //initializes 'map' with the characters in 'set'
- Arrays.sort(set);
- map = set;
- }
- public int getlength() {
- //returns nmbr of characters to be checked
- return map.length;
- }
- public int getpos(char key){
- //returns the pos of key; maps the char to a number
- int p = Arrays.binarySearch(map, key);
- return p;
- }
- public char getchar(int pos){
- //returns the char at pos, demapping
- return map[pos];
- }
- public String toString(){
- //overriding toString function
- String s = String.valueOf(map);
- return s;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement