Untitled

import java.util.*;

// author: j.lines@uea.ac.uk

public class CourseworkUtilities {

    public static Random rand = new Random(); // seed this if you want reproducible results

    // example usage creating a dictionary and a document
    public static void main(String[] args) throws Exception{
        for (int j = 0; j < 1; j++) {

            int numDocsMultiArray = 5;

            int numWordsInDoc = 10;
            int numWordsDict = 5;
            int wordLength = 2;


            int dsd = 0;

            // generate the dictionary
            String[] dict = generateDictionary(numWordsDict, wordLength);

        /* printing dictionary
        System.out.println("dictionary:");
        for (int i = 0; i < dict.length; i++) {
            //System.out.print(dict[i]+" ");
        }
        System.out.println();
        */
            // generating document by passing in the dictionary and a total number
            // of words for the document. The document will then be generated using
            // unseen words and some words from the dictionary. These are sampled using
            // the Random object on line 6 - seed this if you would like results to be
            // reproducible
            String[] doc = generateDocument(dict, numWordsInDoc);
            String[] doc1 = generateDocument(dict, numWordsInDoc);

        /*printing document
        System.out.println("\nDocument:");
        for (int i = 0; i < doc.length; i++) {
            //System.out.print(doc[i]+" ");
        }
        System.out.println();
        */
            long startTime = System.nanoTime();     //gets start time of methods execution time
            int[] featureVector = calculateFeatureVector(dict, doc);
            int[] featureVector1 = calculateFeatureVector(dict, doc1);
            long endTime = System.nanoTime();       //gets end time of methods execution time
            long duration = (endTime - startTime);  //calculate total duration in milliseconds

            //System.out.println("\nFeature Vector:");
            //for (int i = 0; i < featureVector.length; i++) {
            //    System.out.print(featureVector[i] + "     ");
            //}

            //System.out.println(duration);

            String[][] twoDimenArrDoc = generateTwoDimenArrDoc(dict, numWordsInDoc, numDocsMultiArray);

            int [] testArray = new int[dict.length];

            dsd = documentSimilarityDistance(featureVector, featureVector1);

            findNearestDocuments(twoDimenArrDoc, dict);

            System.out.print(Arrays.toString(featureVector) + Arrays.toString(featureVector1) + "DSD = " + dsd + "\n");


            //int[] dsdArr = new int[twoDimenArrDoc.length];

            //dsdArr = findNearestDocuments(twoDimenArrDoc, dict);

            //System.out.println(dsdArr);


        }
    }

    // generate a word of a given length by randomly generating letters
    public static String generateWord(int wordLength){
        StringBuilder st = new StringBuilder();
        for(int i =0; i < wordLength; i++){
            st.append((char)(rand.nextInt(26)+'a'));
        }
        return st.toString();
    }

    // generate a dictionary for a given word length and number of words to generate. Note that duplicates are not
    // allowed so this method checks before adding a new word. HOWEVER, as noted, this is a crude implementation and
    // not the most efficient. This is fine for now however and you'll learn better ways later on.
    public static String[] generateDictionary(int numWords, int wordLength) throws Exception{

        if(Math.pow(26,wordLength) < numWords){
            throw new Exception("Error: the input arguments could only result in "
                    +"26^"+wordLength+" ("+((int)(Math.pow(26,wordLength)))+ ") distinct words but the"
                    +" numWords argument is set to "+numWords);
        }

        // remember - DO NOT USE IN-BUILT JAVA DATA STRUCTURES IN YOUR OWN CODE FOR THIS ASSIGNMENT (you can still
        // use arrays wherever you like, however)
        //
        // It is fine to use ArrayList here as this has been given to you but do not use it
        // anywhere else in your coursework.

        ArrayList<String> dictionary = new ArrayList<>(numWords);
        String temp;
        while(dictionary.size() < numWords){
            temp = generateWord(wordLength);
            if(!dictionary.contains(temp)){
                dictionary.add(temp);
            }
        }
        return dictionary.toArray(new String[dictionary.size()]);
    }

    // similar to generating a dictionary but simpler - generate a given number of random words of a specified length.
    // No need to check for duplicates here - this method just fills up your document with other words for
    // testing/timing but it doesn't matter what they are
    public static String[] generateFillerWords(int numWords, int wordLength){

        String[] output = new String[numWords];
        for(int i = 0; i < numWords; i++){
            output[i] = generateWord(wordLength);
        }
        return output;
    }

    // uses all of the above to generate a document when passed a dictionary. Randomly samples with a uniform
    // distribution (i.e. each word is as likely to be picked as any other) so for very large documents you should
    // expect similar counts of each word
    public static String[] generateDocument(String[] dictionary, int numWordsInDoc){
        // generate other words to fill the document with
        String[] otherWords = generateFillerWords(dictionary.length*2,dictionary[0].length());

        String[] documentList = new String[numWordsInDoc];

        int nextWordIdx;
        int numDistinctWords = dictionary.length*3;

        StringBuilder st = new StringBuilder();
        for(int i = 0; i < numWordsInDoc;i++){
            nextWordIdx = rand.nextInt(numDistinctWords);
            if(nextWordIdx < dictionary.length) {
                documentList[i] = dictionary[nextWordIdx];
            }else{
                documentList[i] = otherWords[nextWordIdx-dictionary.length];
            }
        }
        return documentList;
    }

    public static int[] calculateFeatureVector(String dict[], String doc[]){

        int fVector[] = new int [dict.length];

        for (int i = 0; i < dict.length; i++) {
            for (int j = 0; j < doc.length; j++) {
                if (dict[i].equals(doc[j])){
                    fVector[i] = fVector[i] + 1; //change to j
                }
            }
        }
        return fVector;
    }

    public static String[][] generateTwoDimenArrDoc(String dict[], int numWordsDoc, int numDocs){

        String doc[] = generateDocument(dict, numWordsDoc);

        String twoDimenArrDoc [][] = new String[numDocs][numWordsDoc];

        for (int i = 0; i < numDocs; i++) {
            twoDimenArrDoc[i] = generateDocument(dict, numWordsDoc);
        }
        return twoDimenArrDoc;
    }

    public static void findNearestDocuments(String twoDimenArrDoc[][], String dict[]){
        int [] featureVector1 = new int [dict.length];
        int [] featureVector2 = new int [dict.length];

        int arrayTwoLength = twoDimenArrDoc[0].length;

        int[][] twoDimenArrFeatureVectors = new int[twoDimenArrDoc.length][twoDimenArrDoc[0].length];
        int [] findNearestDocument = new int[twoDimenArrDoc[0].length];

        for (int i = 0; i < twoDimenArrDoc.length; i++) {
            featureVector1 = calculateFeatureVector(dict, twoDimenArrDoc[i]);
            for (int j = 0; j < twoDimenArrDoc[0].length - 1; j++) {
                if (j == i){
                    twoDimenArrFeatureVectors[i][j] = 99999;
                }
                if (j != i){
                    featureVector2 = calculateFeatureVector(dict, twoDimenArrDoc[j]);
                    twoDimenArrFeatureVectors[i][j] = documentSimilarityDistance(featureVector1, featureVector2);   //[i] = the index of the first feature vector, [j] = the index of the compared doc
                }
            }
        }

        for (int i = 0; i < twoDimenArrFeatureVectors.length; i++) {
            for (int j = 0; j < twoDimenArrFeatureVectors[0].length; j++) {
                if (j + 1 != twoDimenArrFeatureVectors[0].length) {
                    if (i != j) {
                        if (twoDimenArrFeatureVectors[i][j] < twoDimenArrFeatureVectors[i][j + 1]) ;
                        findNearestDocument[i] = twoDimenArrFeatureVectors[i][j];
                    }
                }

            }
        }

    }

    public static int documentSimilarityDistance(int fVectorOne[], int fVectorTwo[]){
        int dsdSum = 0;
        for (int i = 0; i < fVectorOne.length; i++) {
            int dsd = 0;
            dsd += fVectorOne[i] - fVectorTwo[i];
            if (dsd < 0){
                dsd *= -1;
            }
            dsdSum = dsdSum + dsd;
        }
        return dsdSum;
    }


    public static int findIndex(int arr[], int t){

        if (arr == null){
            return -1;
        }

        int len = arr.length;
        int i = 0;

        while (i < len){
            if (arr[i] ==t){
                return i;
            }
            else {
                i = i + 1;
            }

        }
        return -1;
    }
}