Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.awt.List;
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.lang.reflect.Array;
- import java.math.RoundingMode;
- import java.security.KeyStore.Entry;
- import java.text.DecimalFormat;
- import java.util.ArrayList;
- import java.util.Collection;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.LinkedList;
- import java.util.Map;
- import java.util.Map;
- import java.util.SortedMap;
- import java.util.TreeMap;
- import java.util.concurrent.BrokenBarrierException;
- import java.util.concurrent.ConcurrentHashMap;
- import java.util.concurrent.CountDownLatch;
- import java.util.concurrent.CyclicBarrier;
- import java.util.concurrent.ExecutorService;
- import java.util.concurrent.Executors;
- import javax.swing.text.html.HTMLDocument.Iterator;
- /**
- * Clasa ce reprezinta o solutie partiala pentru problema de rezolvat. Aceste
- * solutii partiale constituie task-uri care sunt introduse in workpool.
- */
- class PartialSolution {
- // ...
- }
- /**
- * Clasa ce reprezinta un thread worker.
- */
- class WorkerMap extends Thread {
- WorkPool wp;
- String docName;
- int start;
- ConcurrentHashMap<Integer, Integer> hmap;
- ArrayList<String> maxWords;
- public WorkerMap(WorkPool workpool, String doc, int ofst) {
- this.wp = workpool;
- docName = new String(doc);
- start = ofst;
- hmap = new ConcurrentHashMap<Integer, Integer>();
- maxWords = new ArrayList<String>();
- }
- /**
- * Procesarea unei solutii partiale. Aceasta poate implica generarea unor
- * noi solutii partiale care se adauga in workpool folosind putWork().
- * Daca s-a ajuns la o solutie finala, aceasta va fi afisata.
- * @throws IOException
- * @throws BrokenBarrierException
- * @throws InterruptedException
- */
- void processPartialSolution(PartialSolution ps) throws IOException, InterruptedException, BrokenBarrierException {
- BufferedReader br = new BufferedReader(new FileReader(docName));
- int end = start + ReplicatedWorkers.chunkSize;
- File f = new File(docName);
- int fileLen = (int) f.length();
- end = Math.min(fileLen, end);
- char c[] = new char[1];
- br.mark(1000 + end);
- br.skip(start);
- br.read(c, 0, 1);
- br.reset();
- if (start != 0 && (ReplicatedWorkers.CheckDelim(c[0]) == false))
- {
- br.mark(1000 + end);
- br.skip(start - 1);
- br.read(c, 0, 1);
- br.reset();
- if (ReplicatedWorkers.CheckDelim(c[0]) == true)
- {
- }
- else while (start < end && (ReplicatedWorkers.CheckDelim(c[0]) == false))
- {
- start++;
- br.mark(1000 + end);
- br.skip(start);
- br.read(c, 0, 1);
- br.reset();
- }
- }
- br.mark(1000 + end);
- br.skip(end - 1);
- br.read(c, 0, 1);
- br.reset();
- while (ReplicatedWorkers.CheckDelim(c[0]) == true)
- {
- end++;
- br.mark(1000 + end);
- br.skip(end - 1);
- int readInvalid = br.read(c, 0, 1);
- br.reset();
- if(readInvalid == -1)
- {
- end--;
- break;
- }
- }
- c = new char[end - start];
- // System.out.println(c);
- br.mark(1000 + end);
- br.skip(start);
- br.read(c, 0, end - start);
- br.reset();
- String s = new String();
- int dim = 0;
- String[] words = new String[end - start];
- for (int i = 0; i < c.length; i++)
- {
- if (ReplicatedWorkers.CheckDelim(c[i]))
- {
- if (s.length() != 0)
- {
- words[dim] = s;
- dim++;
- }
- s = new String();
- }
- else
- {
- s += c[i];
- }
- }
- words[dim] = s;
- dim++;
- int maxLen = -1;
- for (int i = 0; i < dim; i++)
- {
- System.out.println(words[i]);
- int len = words[i].length();
- if (maxLen < len)
- maxLen = len;
- if (hmap.containsKey(len))
- {
- hmap.put(len, hmap.get(len) + 1);
- }
- else
- {
- hmap.put(len, 1);
- }
- }
- for (int i = 0; i < dim; i++)
- {
- if (words[i].length() == maxLen)
- maxWords.add(words[i]);
- }
- ConcurrentHashMap<String, ArrayList<String>> maxWordsMap = ReplicatedWorkers.maxWords;
- synchronized (ReplicatedWorkers.maxWords)
- {
- if (maxWordsMap.containsKey(docName))
- {
- ArrayList<String> aux = maxWordsMap.get(docName);
- aux.addAll(maxWords);
- maxWordsMap.put(docName, aux);
- }
- else
- {
- maxWordsMap.put(docName, maxWords);
- }
- }
- synchronized (ReplicatedWorkers.results)
- {
- if (!ReplicatedWorkers.results.containsKey(docName))
- {
- ArrayList<ConcurrentHashMap<Integer, Integer>> res = new ArrayList<ConcurrentHashMap<Integer, Integer>>();
- res.add(hmap);
- ReplicatedWorkers.results.put(docName, res);
- }
- else
- {
- ArrayList<ConcurrentHashMap<Integer, Integer>> res = ReplicatedWorkers.results.get(docName);
- res.add(hmap);
- ReplicatedWorkers.results.put(docName, res);
- }
- }
- }
- public void run() {
- PartialSolution ps = wp.getWork();
- if (ps == null)
- {
- ReplicatedWorkers.doneSignal.countDown();
- return;
- }
- try {
- processPartialSolution(ps);
- } catch (IOException e) {
- e.printStackTrace();
- } catch (InterruptedException e) {
- e.printStackTrace();
- } catch (BrokenBarrierException e) {
- e.printStackTrace();
- }
- ReplicatedWorkers.doneSignal.countDown();
- }
- }
- class WorkerReduce extends Thread {
- WorkPool wp;
- String docName;
- double rank;
- private int fib(int n)
- {
- int p = 0;
- int c = 1;
- int r = 0; // The result is initialized to 0 (undefined).
- for (int i = 0; i < n; i++)
- {
- r = p + c; // Produce next number in the sequence.
- p = c; // Save previous number.
- c = r; // Save current number.
- }
- return r;
- }
- public WorkerReduce(WorkPool workpool, String doc) {
- this.wp = workpool;
- docName = doc;
- }
- /**
- * Procesarea unei solutii partiale. Aceasta poate implica generarea unor
- * noi solutii partiale care se adauga in workpool folosind putWork().
- * Daca s-a ajuns la o solutie finala, aceasta va fi afisata.
- * @throws IOException
- * @throws BrokenBarrierException
- * @throws InterruptedException
- */
- void processPartialSolution(PartialSolution ps) throws IOException, InterruptedException, BrokenBarrierException {
- ConcurrentHashMap<String, ArrayList<ConcurrentHashMap<Integer, Integer>>> results =
- ReplicatedWorkers.results;
- ConcurrentHashMap<Integer, Integer> reduceResult =
- new ConcurrentHashMap<Integer, Integer>();
- ArrayList<ConcurrentHashMap<Integer, Integer>> hmapList = results.get(docName);
- synchronized (ReplicatedWorkers.results)
- {
- for (int i = 0; i < results.get(docName).size(); i++)
- {
- for (Map.Entry<Integer, Integer> e : results.get(docName).get(i).entrySet())
- {
- if (e.getKey() == 0)
- continue;
- synchronized(reduceResult)
- {
- if (reduceResult.containsKey(e.getKey()))
- {
- reduceResult.put(e.getKey(), e.getValue() + reduceResult.get(e.getKey()));
- }
- else
- {
- reduceResult.put(e.getKey(), e.getValue());
- }
- }
- }
- }
- }
- ArrayList<String> maxWords = new ArrayList<String>();
- ArrayList<String> maxWordsFromMap = ReplicatedWorkers.maxWords.get(docName);
- int maxLen = -1;
- synchronized (ReplicatedWorkers.maxWords) {
- for (int i = 0; i < maxWordsFromMap.size(); i++)
- {
- if (maxWordsFromMap.get(i).length() > maxLen)
- maxLen = maxWordsFromMap.get(i).length();
- }
- for (int i = 0; i < maxWordsFromMap.size(); i++)
- {
- if (maxWordsFromMap.get(i).length() == maxLen)
- maxWords.add(maxWordsFromMap.get(i));
- }
- }
- double numWords = 0;
- rank = 0;
- for (Map.Entry<Integer, Integer> e : reduceResult.entrySet())
- {
- numWords += e.getValue();
- rank += fib(e.getKey()) * e.getValue();
- }
- rank /= numWords;
- ArrayList<String> str = new ArrayList<String>();
- for (int i = 0; i < maxWords.size(); i++)
- {
- boolean add = true;
- for (int j = 0; j < str.size(); j++)
- {
- if (str.get(j).equals(maxWords.get(i)))
- {
- add = false;
- break;
- }
- }
- if (add)
- {
- str.add(maxWords.get(i));
- }
- }
- maxWords = str;
- synchronized (ReplicatedWorkers.ranks)
- {
- ReplicatedWorkers.ranks.put(docName, rank);
- }
- synchronized (ReplicatedWorkers.maxWordsEnd)
- {
- ReplicatedWorkers.maxWordsEnd.put(docName, maxWords);
- }
- }
- public void run() {
- PartialSolution ps = wp.getWork();
- if (ps == null)
- {
- ReplicatedWorkers.doneSignal.countDown();
- return;
- }
- try {
- processPartialSolution(ps);
- } catch (IOException e) {
- e.printStackTrace();
- } catch (InterruptedException e) {
- e.printStackTrace();
- } catch (BrokenBarrierException e) {
- e.printStackTrace();
- }
- ReplicatedWorkers.doneSignal.countDown();
- }
- }
- class Pair implements Comparable<Object>
- {
- Double v1;
- String v2;
- Pair(double d, String s)
- {
- v1 = new Double(d);
- v2 = new String(s);
- }
- @Override
- public int compareTo(Object arg0) {
- Pair p = (Pair)arg0;
- if (Math.abs(v1 - p.v1) < 0.01)
- {
- if (v2.equals(p.v2))
- return 0;
- String[] docs = ReplicatedWorkers.docs;
- int i1 = 0, i2 = 0;
- for (int i = 0; i < docs.length; i++)
- {
- if (docs[i].equals(v2))
- i1 = i;
- if (docs[i].equals(p.v2))
- i2 = i;
- }
- if (i1 > i2)
- return 1;
- else return -1;
- }
- else
- {
- if (v1 > p.v1)
- return -1;
- else return 1;
- }
- }
- }
- public class ReplicatedWorkers {
- static BufferedReader br;
- static int chunkSize;
- static String[] docs;
- static int numDocs;
- static ConcurrentHashMap<String, ArrayList<ConcurrentHashMap<Integer, Integer>>> results;
- static ConcurrentHashMap<String, ArrayList<String>> maxWords;
- static ConcurrentHashMap<String, Double> ranks;
- static ConcurrentHashMap<String, ArrayList<String>> maxWordsEnd;
- static int count = 0;
- static int activeThreads = 0;
- static String delim;
- static CountDownLatch doneSignal;
- static boolean CheckDelim(char c)
- {
- for (int i = 0; i < delim.length(); i++)
- {
- if (c == delim.charAt(i))
- {
- return true;
- }
- }
- return false;
- }
- public static void main(String args[]) throws NumberFormatException, IOException, InterruptedException {
- DecimalFormat df = new DecimalFormat("#.00");
- df.setRoundingMode(RoundingMode.DOWN);
- int numThreads = Integer.parseInt(args[0]);
- br = new BufferedReader(new FileReader(args[1]));
- ranks = new ConcurrentHashMap<String, Double>();
- maxWordsEnd = new ConcurrentHashMap<String, ArrayList<String>>();
- maxWords = new ConcurrentHashMap<String, ArrayList<String>>();
- chunkSize = Integer.parseInt(br.readLine());
- numDocs = Integer.parseInt(br.readLine());
- docs = new String[numDocs];
- for (int i = 0; i < numDocs; i++)
- docs[i] = br.readLine();
- delim = new String(";:/?~.,><~`[]{}()!@#$%^&-_+'=*|");
- delim += '"';
- delim += ' ';
- delim += '\n';
- delim += '\r';
- delim += '\t';
- results = new ConcurrentHashMap<String, ArrayList<ConcurrentHashMap<Integer, Integer>>>();
- BufferedWriter wr = new BufferedWriter(new FileWriter(args[2]));
- WorkPool wpMap = new WorkPool(numThreads);
- WorkPool wpReduce = new WorkPool(numThreads);
- ArrayList<Thread> mapThds = new ArrayList<Thread>();
- ArrayList<Thread> reduceThds = new ArrayList<Thread>();
- ExecutorService ex = Executors.newFixedThreadPool(numThreads);
- for (int i = 0; i < docs.length; i++)
- {
- File f = new File(docs[i]);
- int len = (int) f.length();
- for (int j = 0; j < len / chunkSize; j++)
- {
- count++;
- }
- if (len % chunkSize != 0)
- {
- count++;
- }
- }
- doneSignal = new CountDownLatch(count);
- for (int i = 0; i < docs.length; i++)
- {
- File f = new File(docs[i]);
- int len = (int) f.length();
- for (int j = 0; j < len / chunkSize; j++)
- {
- count++;
- wpMap.putWork(new PartialSolution());
- //mapThds.add(new WorkerMap(wpMap, docs[i], j * chunkSize));
- ex.execute(new WorkerMap(wpMap, docs[i], j * chunkSize));
- }
- if (len % chunkSize != 0)
- {
- count++;
- wpMap.putWork(new PartialSolution());
- //mapThds.add(new WorkerMap(wpMap, docs[i], chunkSize * (len / chunkSize)));
- ex.execute(new WorkerMap(wpMap, docs[i], chunkSize * (len / chunkSize)));
- }
- }
- doneSignal.await();
- /*for (int i = 0; i < mapThds.size(); i++)
- {
- mapThds.get(i).start();
- }
- for (int i = 0; i < mapThds.size(); i++)
- {
- mapThds.get(i).join();
- }*/
- /* System.out.println(1);
- while (count > 0)
- {
- System.out.println(count);
- }*/
- count = 0;
- System.out.println(3);
- ex.shutdown();
- ExecutorService ex2 = Executors.newFixedThreadPool(numThreads);
- doneSignal = new CountDownLatch(docs.length);
- for (int i = 0; i < docs.length; i++)
- {
- count++;
- wpReduce.putWork(new PartialSolution());
- //reduceThds.add(new WorkerReduce(wpReduce, docs[i]));
- ex2.execute(new WorkerReduce(wpReduce, docs[i]));
- }
- /*for (int i = 0; i < reduceThds.size(); i++)
- {
- reduceThds.get(i).start();
- }
- for (int i = 0; i < reduceThds.size(); i++)
- {
- reduceThds.get(i).join();
- }*/
- doneSignal.await();
- ex2.shutdown();
- ArrayList<Pair> sortedFiles = new ArrayList<Pair>();
- for (Map.Entry<String, Double> e : ranks.entrySet())
- {
- sortedFiles.add(new Pair(e.getValue(), e.getKey()));
- }
- Collections.sort(sortedFiles);
- for (int i = 0; i < sortedFiles.size(); i++)
- {
- String s = new String();
- String docName = sortedFiles.get(i).v2;
- s += docName;
- s += ';';
- s += df.format(sortedFiles.get(i).v1);
- s += ';';
- s = s + '[' + new Integer(maxWordsEnd.get(docName).get(0).length()).toString() + ','
- + new Integer(maxWordsEnd.get(docName).size()) + ']' + '\n';
- wr.write(s, 0, s.length());
- }
- wr.close();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement