Advertisement
KillianMills

HttpRequest.java

Nov 26th, 2015
185
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 11.21 KB | None | 0 0
  1. import java.io.*;
  2. import java.net.*;
  3. import java.util.*;
  4.  
  5. public class HttpRequest {
  6.  
  7.     TreeMap<Double, String[]> topResultsExtended = new TreeMap<>(Collections.reverseOrder());
  8.     TreeMap<Double, String[]> topResults = new TreeMap<>(Collections.reverseOrder());
  9.  
  10.     public static void main(String[] args) throws Exception {
  11.  
  12.         String basicInput = "basic";
  13.         String extendedInput = "extended";
  14.  
  15.         List<String> DocumentsWhole = new ArrayList<>();
  16.         List<List<String>> DocumentTopValues = new ArrayList<>();
  17.         List<Integer> DocumentLengths = new ArrayList<>();
  18.         //String basicQuery = "http://136.206.115.117:8080/IRModelGenerator/SearchServlet?query=bone%20disease&simf=BM25&k=1.2&b=0.75&numwanted=50";
  19.     //String basicQuery = "http://136.206.115.117:8080/IRModelGenerator/SearchServlet?query=hydrophonics&simf=BM25&k=1.2&b=0.75&numwanted=50";
  20.     //String basicQuery = "http://136.206.115.117:8080/IRModelGenerator/SearchServlet?query=Adoptive%20Biological%20Parents&simf=BM25&k=1.2&b=0.75&numwanted=50";
  21.     String basicQuery = "http://136.206.115.117:8080/IRModelGenerator/SearchServlet?query=International%20Organized%20Crime&simf=BM25&k=1.2&b=0.75&numwanted=50";
  22.  
  23.         //run the program with the normal URL
  24.         TreeMap<Double, String[]> topResults = (TreeMap<Double, String[]>) collectVals(basicQuery, basicInput, DocumentsWhole, DocumentTopValues, DocumentLengths);
  25.         trecEval("resultsba.res");
  26.         String extendedQuery = "http://136.206.115.117:8080/IRModelGenerator/SearchServlet?query=";
  27.  
  28.         int counter=0;
  29.         //make the new URL for the extended query
  30.         for(Map.Entry<Double,String[]> entry : topResults.entrySet()){
  31.             if (counter > 9){
  32.                 break;
  33.             }
  34.  
  35.             String searchName[] = entry.getValue();
  36.  
  37.             if(counter == 0){
  38.                 extendedQuery = extendedQuery + searchName[0];
  39.         //System.out.println(searchName[0]);
  40.         //System.out.println(searchName[1]);
  41.             }
  42.             else{
  43.                 extendedQuery = extendedQuery +"%20"+ searchName[0];
  44.             }
  45.             counter++;
  46.         }
  47.  
  48.         extendedQuery = extendedQuery + "&simf=BM25&k=1.2&b=0.75&numwanted=50";
  49.  
  50.         //run the program with extended URL
  51.         TreeMap<Double, String[]> topResultsExtended = (TreeMap<Double, String[]>) collectVals(extendedQuery, extendedInput, DocumentsWhole, DocumentTopValues, DocumentLengths);
  52.         //System.out.println(extendedQuery);
  53.     trecEval("resultsexten.res");
  54.  
  55.     } // END OF MAIN
  56. //--------------------------------------------------------------------------------------------------------------
  57. //--------------------------------------------------------------------------------------------------------------
  58. //--------------------------------------------------------------------------------------------------------------
  59.  
  60.     public static String getHTML(String urlToRead) throws Exception
  61.     {
  62.         StringBuilder result = new StringBuilder();
  63.  
  64.         URL url = new URL(urlToRead);
  65.  
  66.         HttpURLConnection conn = (HttpURLConnection) url.openConnection();
  67.  
  68.         conn.setRequestMethod("GET");
  69.  
  70.         BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream()));
  71.  
  72.         String line;
  73.  
  74.         while ((line = rd.readLine()) != null)
  75.         {
  76.             result.append(line);
  77.         }
  78.         rd.close();
  79.         return result.toString();
  80.     }
  81.  
  82.     public static double Robertson(double ni, double n, double ri, double r) {
  83.         double rwi = 0;
  84.         double topline = (ri + 0.5) * (n - ni - r + ri + 0.5);
  85.         double bottomline = (ni - ri + 0.5) * (r - ri + 0.5);
  86.  
  87.         rwi = Math.log10(topline / bottomline);
  88.  
  89.         double result = rwi * ri;
  90.         return result;
  91.     }
  92.  
  93.  
  94.     public static Map<Double, String[]> collectVals(String query, String inputFile, List<String> DocumentsWhole, List<List<String>>DocumentTopValues, List<Integer>DocumentLengths) throws Exception {
  95.         String URLData = getHTML(query);
  96.  
  97.     List<String> Links = new ArrayList<>();
  98.     int counter2 = 0;
  99.  
  100.         for (int articleNum = 0; articleNum < 10; articleNum++) { // Loop for the first 10 articles
  101.  
  102.     //blank"> </a>
  103.             URLData = URLData.substring(URLData.indexOf("<br>") + 4);
  104.         String TempLinkToDoc = URLData.substring(URLData.indexOf("_blank") + 7);
  105.             String InnerURLData = URLData.substring(0, URLData.indexOf("</div>"));
  106.             String LinkToDoc = TempLinkToDoc.substring(0, TempLinkToDoc.indexOf("</a>"));
  107.  
  108.         Links.add(LinkToDoc);
  109.  
  110.             DocumentsWhole.add(InnerURLData); // adds the whole document to the array list
  111.  
  112.             List<String> documentTop = new ArrayList<>();
  113.             int documentLength = 0;
  114.  
  115.             //record the word
  116.             String current = "";
  117.  
  118.             //records the number of values being recorded
  119.             int counter = 0;
  120.  
  121.             for (int i = 0; i < InnerURLData.length(); i++) {
  122.  
  123.                 //reads character by character
  124.                 current = current + InnerURLData.charAt(i);
  125.  
  126.                 //if the string has a space, we have a word e.g:bone:
  127.                 if (current.contains(":")) {
  128.  
  129.                     //clean up the values when adding to list
  130.                     current = current.replace(":", "");
  131.  
  132.                     if (counter < 30) {
  133.                         documentTop.add(current);
  134.             documentTop.add(LinkToDoc);
  135.                     }
  136.  
  137.                     //reset the string
  138.                     current = "";
  139.                     // up the counter for each word and IDF collected e.g 39, 169.98874
  140.                     counter++;
  141.                 }
  142.  
  143.                 //if the string has a space, we have a value
  144.                 else if (current.contains(" ")) {
  145.  
  146.                     //clean up the values when adding to list
  147.                     current = current.replace(" ", "");
  148.  
  149.                     //For the Occurrences
  150.                     if (current.contains(",")) {
  151.                         current = current.replace(",", "");
  152.                         documentLength = documentLength + Integer.parseInt(current);
  153.                     }
  154.  
  155.                     //For the IDFs
  156.                     else {
  157.                         current = current.replace(",", "");
  158.                     }
  159.  
  160.                     if (counter < 30) {
  161.                         documentTop.add(current);
  162.                     }
  163.  
  164.                     //reset the string
  165.                     current = "";
  166.                     // up the counter for each word and IDF collected
  167.                     counter++;
  168.                 }
  169.  
  170.             } // END OF SINGLE DOCUMENT LOOP
  171.  
  172.             DocumentLengths.add(documentLength);
  173.             DocumentTopValues.add(documentTop);
  174.  
  175.  
  176.         } // END OF TOP TEN DOCUMENTS LOOP
  177.  
  178.  
  179.         TreeMap<Double, String[]> topResults = produceResults(inputFile, DocumentsWhole, DocumentTopValues, DocumentLengths, Links);
  180.         return topResults;
  181.     }
  182.  
  183.     public static TreeMap<Double, String[]> produceResults(String fileName, List<String> DocumentsWhole, List<List<String>>DocumentTopValues, List<Integer>DocumentLengths, List<String> Links){
  184.  
  185.         List<Double> robertResults = new ArrayList<>();
  186.     List<String> recorder = new ArrayList<>();
  187.         TreeMap<Double, String[]> topResults = new TreeMap<>(Collections.reverseOrder());
  188.  
  189.         //go through each document
  190.         for(int outerLoop =0; outerLoop < DocumentTopValues.size(); outerLoop++){
  191.  
  192.             List<String> innerArray = DocumentTopValues.get(outerLoop);
  193.             //go through the top values, use name, occurrence and IDF
  194.             for(int innerLoop=0; innerLoop<innerArray.size(); innerLoop=innerLoop + 4 ) {
  195.         String[] tempHolder = new String[3];
  196.                
  197.                 //check if the name is in in any of the whole list of documents
  198.                 int counter=0;
  199.                 int finalLoop;
  200.                 for(finalLoop=0; finalLoop<DocumentsWhole.size(); finalLoop++) {
  201.  
  202.                     if (DocumentsWhole.get(finalLoop).contains(innerArray.get(innerLoop))) {
  203.  
  204.                         counter++;
  205.                     }
  206.                 }
  207.                 double IDF = Double.parseDouble(innerArray.get(innerLoop+2));
  208.                 double ni = (500000 / IDF );                            // N / IDF
  209.                 double n = 500000;                                      // 500,000
  210.                 double ri = counter;                                    // Check if topValue is in DocumentsWhole
  211.                 double r = 10;                                          // 10
  212.                 double robertsonValue = Robertson(ni, n, ri, r);
  213.  
  214.                 robertResults.add(robertsonValue);
  215.  
  216.                 String queryName = innerArray.get(innerLoop);
  217.         String link = innerArray.get(innerLoop+1);
  218.        
  219.         //System.out.println(queryName + " : " + link);
  220.  
  221.         if(!recorder.contains(queryName)){
  222.             tempHolder[0] = queryName;
  223.             tempHolder[1] = link;
  224.             //System.out.println( "----------------- "+robertsonValue+" ----------------"+queryName);
  225.             //System.out.println(topResults.size());
  226.                     topResults.put(robertsonValue, tempHolder);
  227.             recorder.add(queryName);
  228.         }
  229.        
  230.             }
  231.         }
  232.  
  233.         try {
  234.             produceFile(fileName, topResults, Links);
  235.         }
  236.  
  237.         catch (Exception e) {
  238.             e.printStackTrace();
  239.         }
  240.  
  241.         return topResults;
  242.     }
  243.  
  244.     public static void produceFile(String fileName, TreeMap<Double, String[]> topResults, List<String> Links)throws Exception{
  245.  
  246.     String namer = fileName.substring(0, fileName.length()-4);
  247.         PrintWriter out = new PrintWriter("results" + namer + ".res");
  248.         int counter = 0;
  249.     //System.out.println(Links);
  250.  
  251.         for(Map.Entry<Double,String[]> entry : topResults.entrySet()){
  252.             if (counter > 9){
  253.                break;
  254.             }
  255.  
  256.             Double robVal = entry.getKey();
  257.             String searchName[] = entry.getValue();
  258.         String currentLink = Links.get(counter).replace(">", "");
  259.         //currentLink = Links.get(counter).replace(" ", "");
  260.         //currentLink = Links.get(counter).replace(">", "");
  261.             //System.out.println("rob val:" + robVal + " " + " search name "+searchName[0] + " "+searchName[1]);
  262.             out.println("301"+"\t"+"Q0"+"\t"+currentLink+"\t"+counter+"\t"+robVal+"\t"+"BM25.1.2.0.75");
  263.             out.flush();
  264.         counter++;
  265.         }
  266.  
  267.     }
  268.  
  269.     public static void trecEval(String fileName) throws Exception{
  270.  
  271.     String namer = fileName.substring(0, fileName.length()-5);
  272.         PrintWriter writer = new PrintWriter("MAP"+ namer + ".res");
  273.     namer = namer + ".res";
  274.         Process proc = Runtime.getRuntime().exec("/users/case4/millsk2/SearchLabs/trec_eval.8.1/trec_eval qrels.trec678.adhoc " + namer); // results.res");
  275.         InputStream stdin = proc.getInputStream();
  276.         InputStreamReader isr = new InputStreamReader(stdin);
  277.         BufferedReader br = new BufferedReader(isr);
  278.         String line = null;
  279.  
  280.     System.out.println(line);
  281.         while ( (line = br.readLine()) != null){
  282.             writer.println(line);
  283.             System.out.println(line);
  284.             writer.flush();
  285.         }
  286.  
  287.         writer.close();
  288.  
  289.     }
  290.  
  291. } // END OF PROGRAM
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement