Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.*;
- import java.util.*;
- import java.util.regex.*;
- public class Project {
- public static void main(String[] args) {
- HashMap<String, Integer> bodyFrequency = new HashMap<String, Integer>();
- HashMap<String, Integer> tpFrequency = new HashMap<String, Integer>();
- String parseTopic = "";
- String file = "./Project1_inputs/reut2-000.sgm";
- Pattern tpName = Pattern.compile("<TOPICS>(.*?)</TOPICS>|<PLACES>(.*?)</PLACES>",Pattern.DOTALL);
- Pattern bodyName = Pattern.compile("<BODY>(.*?)</BODY>",Pattern.DOTALL);
- try {
- BufferedReader br = new BufferedReader(new FileReader(file));
- PrintWriter out = new PrintWriter("output.txt");
- for (int fileNumber = 0; fileNumber < 22; fileNumber++) {
- String filename = String.format("./Project1_inputs/reut2-%03d.sgm", fileNumber);
- }
- StringBuilder fileContents = new StringBuilder();
- for(String line; (line = br.readLine()) != null;) {
- fileContents.append(line);
- }
- Matcher tpMatcher = tpName.matcher(fileContents);
- Matcher bodyMatcher = bodyName.matcher(fileContents);
- while (tpMatcher.find()) {
- String tpWord = tpMatcher.group(0);
- tpWord = tpWord.toLowerCase().replaceAll("<[^>]*>", " ");
- if(!tpWord.isEmpty()||!tpWord.trim().equals("")) {
- parseTopic += tpWord.toLowerCase();
- }
- }
- String[] topicArray = parseTopic.split(" ");
- for(String tpProcessed:topicArray) {
- if (tpFrequency.containsKey(tpProcessed)) {
- tpFrequency.put(tpProcessed, tpFrequency.get(tpProcessed) + 1);
- } else {
- tpFrequency.put(tpProcessed, 1);
- //System.out.println(tpFrequency);
- }
- }
- for (String name: tpFrequency.keySet()){
- String key = name.toString();
- String value = tpFrequency.get(name).toString();
- out.println(key + " " + value);
- }
- while (bodyMatcher.find()) {
- String [] words = bodyMatcher.group(0).replaceAll("[^a-zA-Z ]", " ").split(" ");
- for(String word: words) {
- if(word.isEmpty()||word.trim().equals("")) {
- continue;
- }
- String processed = word.toLowerCase();
- if (bodyFrequency.containsKey(processed)) {
- bodyFrequency.put(processed, bodyFrequency.get(processed) + 1);
- } else {
- bodyFrequency.put(processed, 1);
- //System.out.println(bodyFrequency);
- }
- }
- for (String name: bodyFrequency.keySet()){
- String key = name.toString();
- String value = bodyFrequency.get(name).toString();
- out.println(key + " " + value);
- }
- out.close();
- }
- } catch (IOException e) {
- e.getMessage();
- }
- }}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement