Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public char getDelimeter(String file) {
- FileReader reader;
- try {
- reader = new FileReader(file);
- } catch (FileNotFoundException e) {
- return 0;
- }
- BufferedReader breader = new BufferedReader(reader);
- int linesRead = 0;
- // char counts for each line
- int[] charCounts = new int[256 * 100];
- int i;
- for (i = 0; i < charCounts.length; i++) {
- charCounts[i] = 0;
- }
- try {
- int prevChar = 0;
- // now we count how much each char occures on each line
- while (linesRead < 100) {
- int which;
- which = breader.read();
- if (which >= 256)
- continue;
- if (which == -1)
- break;
- if (which == '\n' || which == '\r') {
- if (prevChar != '\n' && prevChar != '\r')
- linesRead++;
- prevChar = which;
- continue;
- }
- charCounts[which + 256 * linesRead]++;
- }
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- try {
- breader.close();
- } catch (IOException e) {
- }
- float[] averages = new float[256];
- int[] peaky = new int[256];
- int j;
- // first get the averages
- for (j = 0; j < 256; j++) {
- peaky[j] = 0;
- averages[j] = 0;
- for (i = 0; i < linesRead; i++) {
- averages[j] += charCounts[j + i * 256];
- }
- averages[j] /= (float) linesRead;
- }
- // now we calculate the "peakyness". We assume that the delimiter will
- // appear
- // a constant number of times on each line, and hence its peakyness will
- // be 0
- for (j = 0; j < 256; j++) {
- peaky[j] = 0;
- // calculate peakiness
- for (i = 0; i < linesRead; i++) {
- float p = charCounts[j + i * 256] - averages[j];
- peaky[j] += p * p;
- }
- }
- // if the character never appears then we ignore it.
- // The peakyness will be 0
- boolean[] valid = new boolean[256];
- for (j = 0; j < 256; j++) {
- valid[j] = false;
- for (i = 0; i < linesRead; i++) {
- if (charCounts[i * 256 + j] > 0) {
- valid[j] = true;
- break;
- }
- }
- }
- // find the smallest peakyness character
- int smallest_char = -1;
- for (i = 0; i < 256; i++) {
- if (valid[i] == false)
- continue;
- if (smallest_char == -1 || peaky[i] < peaky[smallest_char]) {
- smallest_char = i;
- }
- }
- return Character.toChars(smallest_char)[0];
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement