Advertisement
Guest User

Untitled

a guest
Nov 12th, 2019
157
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 5.15 KB | None | 0 0
  1.  
  2. import java.io.*;
  3. import java.util.*;
  4. import java.util.regex.*;
  5.  
  6. public class Main {
  7.  
  8.     private static HashSet<String> COMMON_WORDS = new HashSet<>();
  9.     private static HashMap<String, Integer> topicPlacesValue = new HashMap<>();
  10.     private static HashMap<String, Integer> bodyValue = new HashMap<>();
  11.     private static int counts = 0;
  12.     private static int bodyCount = 0;
  13.     private static int topicPlacesCount = 0;
  14.     private static int placesCount = 0;
  15.     private static int topicCount = 0;
  16.     private static String bodyString = "";
  17.     private static String topicPlacesString = "";
  18.     private static StringBuilder sb = new StringBuilder();
  19.     private static Pattern body = Pattern.compile("<BODY>(.*?)</BODY>");
  20.     private static Pattern topics = Pattern.compile("<TOPICS>(.*?)</TOPICS>");
  21.     private static Pattern places = Pattern.compile("<PLACES>(.*?)</PLACES>");
  22.  
  23.     public static void parseSGMAndFileWrite() {
  24.         Matcher bm = body.matcher(sb);
  25.         Matcher tm = topics.matcher(sb);
  26.         Matcher pm = places.matcher(sb);
  27.         while (bm.find()) {
  28.             String bodyFound = bm.group(0).replaceAll("\\<.*?>", " ");
  29.             bodyFound = bodyFound.replaceAll("(\"(?!\\\\b[^\\\\w\\\\s]\\\\b)[^\\\\w\\\\s]\", \" \")", " ");
  30.             bodyFound = bodyFound.replaceAll("[^a-zA-Z]", " ");
  31.             if (!bodyFound.trim().isEmpty() || !bodyFound.equals(null)) {
  32.                 counts++;
  33.                 bodyCount++;
  34.                 bodyString += bodyFound.toLowerCase();
  35.             }
  36.         }
  37.         //Turn output into an array to loop through it
  38.         String[] bodyArr = bodyString.split(" ");
  39.         //Loop through array checking if the key exists in hashmap
  40.         for (String s: bodyArr) {
  41.             if(!bodyValue.containsKey(s)) {
  42.                 bodyValue.put(s, 1);
  43.  
  44.             }else {
  45.                 int count = bodyValue.get(s);
  46.                 bodyValue.put(s, count + 1);
  47.             }
  48.         }
  49.         while (tm.find()) {
  50.             String topicFound = tm.group(0).replaceAll("\\<.*?>", " ");
  51.             if (!topicFound.trim().isEmpty()) {
  52.                 counts = counts + 1;
  53.                 topicCount = topicCount + 1;
  54.                 topicPlacesCount = topicPlacesCount + 1;
  55.                 topicPlacesString += topicFound.toLowerCase();
  56.             }
  57.         }
  58.         String[] topic = topicPlacesString.split(" ");
  59.         for (String s: topic) {
  60.             if(!topicPlacesValue.containsKey(s)) {
  61.                 topicPlacesValue.put(s, 1);
  62.             }else {
  63.                 int count = topicPlacesValue.get(s);
  64.                 topicPlacesValue.put(s, count + 1);
  65.             }
  66.         }
  67.         while (pm.find()) {
  68.             String placeFound = pm.group(0).replaceAll("\\<.*?>", " ");
  69.             if (!placeFound.trim().isEmpty()) {
  70.                 counts = counts + 1;
  71.                 topicPlacesCount = topicPlacesCount + 1;
  72.                 placesCount = placesCount + 1;
  73.                 topicPlacesString += placeFound.toLowerCase();
  74.             }
  75.         }
  76.         String[] place = topicPlacesString.split(" ");
  77.         for (String s: place) {
  78.             if(!topicPlacesValue.containsKey(s)) {
  79.                 topicPlacesValue.put(s, 1);
  80.             }else {
  81.                 int count = topicPlacesValue.get(s);
  82.                 topicPlacesValue.put(s, count + 1);
  83.             }
  84.         }
  85.         //Write to file using PrintWriter
  86.         try {
  87.             PrintWriter pr = new PrintWriter("body.txt");
  88.             pr.println(counts + " Count of all topics, places and body");
  89.             pr.println(bodyCount + " Count of body");
  90.             for (String name: bodyValue.keySet()){
  91.                 String key = name.toString();
  92.                 String value = bodyValue.get(name).toString();
  93.                 pr.println(key + " " + value);
  94.             }
  95.             pr.close();
  96.         } catch (FileNotFoundException e) {
  97.             e.printStackTrace();
  98.         }
  99.         PrintWriter topicPr = null;
  100.         try {
  101.             topicPr = new PrintWriter("places.txt");
  102.             topicPr.println(counts + " Count of all topics, places and body");
  103.             topicPr.println(topicCount + " Count of topic");
  104.             topicPr.println(placesCount + " Count of places");
  105.             topicPr.println(topicPlacesCount + " Count of topics and places");
  106.             for (String name: bodyValue.keySet()){
  107.                 String key = name.toString();
  108.                 String value = bodyValue.get(name).toString();
  109.                 topicPr.println(key + " " + value);
  110.             }
  111.             topicPr.close();
  112.         } catch (FileNotFoundException e) {
  113.             e.printStackTrace();
  114.         }
  115.  
  116.     }
  117.  
  118.     public static void main(String[] args) throws IOException {
  119.         for (int fileNumber = 0; fileNumber < 22; fileNumber++) {
  120.             String fileName = String.format("Project1_inputs/reut2-%03d.sgm", fileNumber);
  121.             File file = new File(fileName);
  122.             Scanner scan = new Scanner(file);
  123.             while(scan.hasNextLine()) {
  124.                 sb.append(scan.nextLine());
  125.             }
  126.             parseSGMAndFileWrite();
  127.         }
  128.     }
  129. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement