Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.*;
- import java.net.*;
- import java.util.*;
- public class HttpRequest {
- TreeMap<Double, String[]> topResultsExtended = new TreeMap<>(Collections.reverseOrder());
- TreeMap<Double, String[]> topResults = new TreeMap<>(Collections.reverseOrder());
- public static void main(String[] args) throws Exception {
- String basicInput = "basic";
- String extendedInput = "extended";
- List<String> DocumentsWhole = new ArrayList<>();
- List<List<String>> DocumentTopValues = new ArrayList<>();
- List<Integer> DocumentLengths = new ArrayList<>();
- //String basicQuery = "http://136.206.115.117:8080/IRModelGenerator/SearchServlet?query=bone%20disease&simf=BM25&k=1.2&b=0.75&numwanted=50";
- //String basicQuery = "http://136.206.115.117:8080/IRModelGenerator/SearchServlet?query=hydrophonics&simf=BM25&k=1.2&b=0.75&numwanted=50";
- //String basicQuery = "http://136.206.115.117:8080/IRModelGenerator/SearchServlet?query=Adoptive%20Biological%20Parents&simf=BM25&k=1.2&b=0.75&numwanted=50";
- String basicQuery = "http://136.206.115.117:8080/IRModelGenerator/SearchServlet?query=International%20Organized%20Crime&simf=BM25&k=1.2&b=0.75&numwanted=50";
- //run the program with the normal URL
- TreeMap<Double, String[]> topResults = (TreeMap<Double, String[]>) collectVals(basicQuery, basicInput, DocumentsWhole, DocumentTopValues, DocumentLengths);
- trecEval("resultsba.res");
- String extendedQuery = "http://136.206.115.117:8080/IRModelGenerator/SearchServlet?query=";
- int counter=0;
- //make the new URL for the extended query
- for(Map.Entry<Double,String[]> entry : topResults.entrySet()){
- if (counter > 9){
- break;
- }
- String searchName[] = entry.getValue();
- if(counter == 0){
- extendedQuery = extendedQuery + searchName[0];
- //System.out.println(searchName[0]);
- //System.out.println(searchName[1]);
- }
- else{
- extendedQuery = extendedQuery +"%20"+ searchName[0];
- }
- counter++;
- }
- extendedQuery = extendedQuery + "&simf=BM25&k=1.2&b=0.75&numwanted=50";
- //run the program with extended URL
- TreeMap<Double, String[]> topResultsExtended = (TreeMap<Double, String[]>) collectVals(extendedQuery, extendedInput, DocumentsWhole, DocumentTopValues, DocumentLengths);
- //System.out.println(extendedQuery);
- trecEval("resultsexten.res");
- } // END OF MAIN
- //--------------------------------------------------------------------------------------------------------------
- //--------------------------------------------------------------------------------------------------------------
- //--------------------------------------------------------------------------------------------------------------
- public static String getHTML(String urlToRead) throws Exception
- {
- StringBuilder result = new StringBuilder();
- URL url = new URL(urlToRead);
- HttpURLConnection conn = (HttpURLConnection) url.openConnection();
- conn.setRequestMethod("GET");
- BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream()));
- String line;
- while ((line = rd.readLine()) != null)
- {
- result.append(line);
- }
- rd.close();
- return result.toString();
- }
- public static double Robertson(double ni, double n, double ri, double r) {
- double rwi = 0;
- double topline = (ri + 0.5) * (n - ni - r + ri + 0.5);
- double bottomline = (ni - ri + 0.5) * (r - ri + 0.5);
- rwi = Math.log10(topline / bottomline);
- double result = rwi * ri;
- return result;
- }
- public static Map<Double, String[]> collectVals(String query, String inputFile, List<String> DocumentsWhole, List<List<String>>DocumentTopValues, List<Integer>DocumentLengths) throws Exception {
- String URLData = getHTML(query);
- List<String> Links = new ArrayList<>();
- int counter2 = 0;
- for (int articleNum = 0; articleNum < 10; articleNum++) { // Loop for the first 10 articles
- //blank"> </a>
- URLData = URLData.substring(URLData.indexOf("<br>") + 4);
- String TempLinkToDoc = URLData.substring(URLData.indexOf("_blank") + 7);
- String InnerURLData = URLData.substring(0, URLData.indexOf("</div>"));
- String LinkToDoc = TempLinkToDoc.substring(0, TempLinkToDoc.indexOf("</a>"));
- Links.add(LinkToDoc);
- DocumentsWhole.add(InnerURLData); // adds the whole document to the array list
- List<String> documentTop = new ArrayList<>();
- int documentLength = 0;
- //record the word
- String current = "";
- //records the number of values being recorded
- int counter = 0;
- for (int i = 0; i < InnerURLData.length(); i++) {
- //reads character by character
- current = current + InnerURLData.charAt(i);
- //if the string has a space, we have a word e.g:bone:
- if (current.contains(":")) {
- //clean up the values when adding to list
- current = current.replace(":", "");
- if (counter < 30) {
- documentTop.add(current);
- documentTop.add(LinkToDoc);
- }
- //reset the string
- current = "";
- // up the counter for each word and IDF collected e.g 39, 169.98874
- counter++;
- }
- //if the string has a space, we have a value
- else if (current.contains(" ")) {
- //clean up the values when adding to list
- current = current.replace(" ", "");
- //For the Occurrences
- if (current.contains(",")) {
- current = current.replace(",", "");
- documentLength = documentLength + Integer.parseInt(current);
- }
- //For the IDFs
- else {
- current = current.replace(",", "");
- }
- if (counter < 30) {
- documentTop.add(current);
- }
- //reset the string
- current = "";
- // up the counter for each word and IDF collected
- counter++;
- }
- } // END OF SINGLE DOCUMENT LOOP
- DocumentLengths.add(documentLength);
- DocumentTopValues.add(documentTop);
- } // END OF TOP TEN DOCUMENTS LOOP
- TreeMap<Double, String[]> topResults = produceResults(inputFile, DocumentsWhole, DocumentTopValues, DocumentLengths, Links);
- return topResults;
- }
- public static TreeMap<Double, String[]> produceResults(String fileName, List<String> DocumentsWhole, List<List<String>>DocumentTopValues, List<Integer>DocumentLengths, List<String> Links){
- List<Double> robertResults = new ArrayList<>();
- List<String> recorder = new ArrayList<>();
- TreeMap<Double, String[]> topResults = new TreeMap<>(Collections.reverseOrder());
- //go through each document
- for(int outerLoop =0; outerLoop < DocumentTopValues.size(); outerLoop++){
- List<String> innerArray = DocumentTopValues.get(outerLoop);
- //go through the top values, use name, occurrence and IDF
- for(int innerLoop=0; innerLoop<innerArray.size(); innerLoop=innerLoop + 4 ) {
- String[] tempHolder = new String[3];
- //check if the name is in in any of the whole list of documents
- int counter=0;
- int finalLoop;
- for(finalLoop=0; finalLoop<DocumentsWhole.size(); finalLoop++) {
- if (DocumentsWhole.get(finalLoop).contains(innerArray.get(innerLoop))) {
- counter++;
- }
- }
- double IDF = Double.parseDouble(innerArray.get(innerLoop+2));
- double ni = (500000 / IDF ); // N / IDF
- double n = 500000; // 500,000
- double ri = counter; // Check if topValue is in DocumentsWhole
- double r = 10; // 10
- double robertsonValue = Robertson(ni, n, ri, r);
- robertResults.add(robertsonValue);
- String queryName = innerArray.get(innerLoop);
- String link = innerArray.get(innerLoop+1);
- //System.out.println(queryName + " : " + link);
- if(!recorder.contains(queryName)){
- tempHolder[0] = queryName;
- tempHolder[1] = link;
- //System.out.println( "----------------- "+robertsonValue+" ----------------"+queryName);
- //System.out.println(topResults.size());
- topResults.put(robertsonValue, tempHolder);
- recorder.add(queryName);
- }
- }
- }
- try {
- produceFile(fileName, topResults, Links);
- }
- catch (Exception e) {
- e.printStackTrace();
- }
- return topResults;
- }
- public static void produceFile(String fileName, TreeMap<Double, String[]> topResults, List<String> Links)throws Exception{
- String namer = fileName.substring(0, fileName.length()-4);
- PrintWriter out = new PrintWriter("results" + namer + ".res");
- int counter = 0;
- //System.out.println(Links);
- for(Map.Entry<Double,String[]> entry : topResults.entrySet()){
- if (counter > 9){
- break;
- }
- Double robVal = entry.getKey();
- String searchName[] = entry.getValue();
- String currentLink = Links.get(counter).replace(">", "");
- //currentLink = Links.get(counter).replace(" ", "");
- //currentLink = Links.get(counter).replace(">", "");
- //System.out.println("rob val:" + robVal + " " + " search name "+searchName[0] + " "+searchName[1]);
- out.println("301"+"\t"+"Q0"+"\t"+currentLink+"\t"+counter+"\t"+robVal+"\t"+"BM25.1.2.0.75");
- out.flush();
- counter++;
- }
- }
- public static void trecEval(String fileName) throws Exception{
- String namer = fileName.substring(0, fileName.length()-5);
- PrintWriter writer = new PrintWriter("MAP"+ namer + ".res");
- namer = namer + ".res";
- Process proc = Runtime.getRuntime().exec("/users/case4/millsk2/SearchLabs/trec_eval.8.1/trec_eval qrels.trec678.adhoc " + namer); // results.res");
- InputStream stdin = proc.getInputStream();
- InputStreamReader isr = new InputStreamReader(stdin);
- BufferedReader br = new BufferedReader(isr);
- String line = null;
- System.out.println(line);
- while ( (line = br.readLine()) != null){
- writer.println(line);
- System.out.println(line);
- writer.flush();
- }
- writer.close();
- }
- } // END OF PROGRAM
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement