Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.*;
- import java.util.*;
- import java.util.regex.*;
- public class Main {
- private static HashSet<String> COMMON_WORDS = new HashSet<>();
- private static HashMap<String, Integer> topicPlacesValue = new HashMap<>();
- private static HashMap<String, Integer> bodyValue = new HashMap<>();
- private static int counts = 0;
- private static int bodyCount = 0;
- private static int topicPlacesCount = 0;
- private static int placesCount = 0;
- private static int topicCount = 0;
- private static String bodyString = "";
- private static String topicPlacesString = "";
- private static StringBuilder sb = new StringBuilder();
- private static Pattern body = Pattern.compile("<BODY>(.*?)</BODY>");
- private static Pattern topics = Pattern.compile("<TOPICS>(.*?)</TOPICS>");
- private static Pattern places = Pattern.compile("<PLACES>(.*?)</PLACES>");
- public static void parseSGMAndFileWrite() {
- Matcher bm = body.matcher(sb);
- Matcher tm = topics.matcher(sb);
- Matcher pm = places.matcher(sb);
- while (bm.find()) {
- String bodyFound = bm.group(0).replaceAll("\\<.*?>", " ");
- bodyFound = bodyFound.replaceAll("(\"(?!\\\\b[^\\\\w\\\\s]\\\\b)[^\\\\w\\\\s]\", \" \")", " ");
- bodyFound = bodyFound.replaceAll("[^a-zA-Z]", " ");
- if (!bodyFound.trim().isEmpty() || !bodyFound.equals(null)) {
- counts++;
- bodyCount++;
- bodyString += bodyFound.toLowerCase();
- }
- }
- //Turn output into an array to loop through it
- String[] bodyArr = bodyString.split(" ");
- //Loop through array checking if the key exists in hashmap
- for (String s: bodyArr) {
- if(!bodyValue.containsKey(s)) {
- bodyValue.put(s, 1);
- }else {
- int count = bodyValue.get(s);
- bodyValue.put(s, count + 1);
- }
- }
- while (tm.find()) {
- String topicFound = tm.group(0).replaceAll("\\<.*?>", " ");
- if (!topicFound.trim().isEmpty()) {
- counts = counts + 1;
- topicCount = topicCount + 1;
- topicPlacesCount = topicPlacesCount + 1;
- topicPlacesString += topicFound.toLowerCase();
- }
- }
- String[] topic = topicPlacesString.split(" ");
- for (String s: topic) {
- if(!topicPlacesValue.containsKey(s)) {
- topicPlacesValue.put(s, 1);
- }else {
- int count = topicPlacesValue.get(s);
- topicPlacesValue.put(s, count + 1);
- }
- }
- while (pm.find()) {
- String placeFound = pm.group(0).replaceAll("\\<.*?>", " ");
- if (!placeFound.trim().isEmpty()) {
- counts = counts + 1;
- topicPlacesCount = topicPlacesCount + 1;
- placesCount = placesCount + 1;
- topicPlacesString += placeFound.toLowerCase();
- }
- }
- String[] place = topicPlacesString.split(" ");
- for (String s: place) {
- if(!topicPlacesValue.containsKey(s)) {
- topicPlacesValue.put(s, 1);
- }else {
- int count = topicPlacesValue.get(s);
- topicPlacesValue.put(s, count + 1);
- }
- }
- //Write to file using PrintWriter
- try {
- PrintWriter pr = new PrintWriter("body.txt");
- pr.println(counts + " Count of all topics, places and body");
- pr.println(bodyCount + " Count of body");
- for (String name: bodyValue.keySet()){
- String key = name.toString();
- String value = bodyValue.get(name).toString();
- pr.println(key + " " + value);
- }
- pr.close();
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- }
- PrintWriter topicPr = null;
- try {
- topicPr = new PrintWriter("places.txt");
- topicPr.println(counts + " Count of all topics, places and body");
- topicPr.println(topicCount + " Count of topic");
- topicPr.println(placesCount + " Count of places");
- topicPr.println(topicPlacesCount + " Count of topics and places");
- for (String name: bodyValue.keySet()){
- String key = name.toString();
- String value = bodyValue.get(name).toString();
- topicPr.println(key + " " + value);
- }
- topicPr.close();
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- }
- }
- public static void main(String[] args) throws IOException {
- for (int fileNumber = 0; fileNumber < 22; fileNumber++) {
- String fileName = String.format("Project1_inputs/reut2-%03d.sgm", fileNumber);
- File file = new File(fileName);
- Scanner scan = new Scanner(file);
- while(scan.hasNextLine()) {
- sb.append(scan.nextLine());
- }
- parseSGMAndFileWrite();
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement