Advertisement
Guest User

Untitled

a guest
Nov 12th, 2019
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.16 KB | None | 0 0
  1. import java.io.*;
  2. import java.util.*;
  3. import java.util.regex.*;
  4.  
  5. public class Project {
  6.  
  7. public static void main(String[] args) {
  8.  
  9. HashMap<String, Integer> bodyFrequency = new HashMap<String, Integer>();
  10. HashMap<String, Integer> tpFrequency = new HashMap<String, Integer>();
  11. String parseTopic = "";
  12. String file = "./Project1_inputs/reut2-000.sgm";
  13. Pattern tpName = Pattern.compile("<TOPICS>(.*?)</TOPICS>|<PLACES>(.*?)</PLACES>",Pattern.DOTALL);
  14. Pattern bodyName = Pattern.compile("<BODY>(.*?)</BODY>",Pattern.DOTALL);
  15.  
  16. try {
  17. BufferedReader br = new BufferedReader(new FileReader(file));
  18. PrintWriter out = new PrintWriter("output.txt");
  19.  
  20. for (int fileNumber = 0; fileNumber < 22; fileNumber++) {
  21. String filename = String.format("./Project1_inputs/reut2-%03d.sgm", fileNumber);
  22. }
  23.  
  24. StringBuilder fileContents = new StringBuilder();
  25.  
  26.  
  27. for(String line; (line = br.readLine()) != null;) {
  28. fileContents.append(line);
  29. }
  30.  
  31.  
  32. Matcher tpMatcher = tpName.matcher(fileContents);
  33. Matcher bodyMatcher = bodyName.matcher(fileContents);
  34.  
  35. while (tpMatcher.find()) {
  36. String tpWord = tpMatcher.group(0);
  37. tpWord = tpWord.toLowerCase().replaceAll("<[^>]*>", " ");
  38.  
  39. if(!tpWord.isEmpty()||!tpWord.trim().equals("")) {
  40. parseTopic += tpWord.toLowerCase();
  41. }
  42. }
  43.  
  44. String[] topicArray = parseTopic.split(" ");
  45.  
  46. for(String tpProcessed:topicArray) {
  47. if (tpFrequency.containsKey(tpProcessed)) {
  48. tpFrequency.put(tpProcessed, tpFrequency.get(tpProcessed) + 1);
  49. } else {
  50. tpFrequency.put(tpProcessed, 1);
  51. //System.out.println(tpFrequency);
  52. }
  53. }
  54.  
  55. for (String name: tpFrequency.keySet()){
  56. String key = name.toString();
  57. String value = tpFrequency.get(name).toString();
  58. out.println(key + " " + value);
  59. }
  60.  
  61. while (bodyMatcher.find()) {
  62. String [] words = bodyMatcher.group(0).replaceAll("[^a-zA-Z ]", " ").split(" ");
  63. for(String word: words) {
  64. if(word.isEmpty()||word.trim().equals("")) {
  65. continue;
  66. }
  67. String processed = word.toLowerCase();
  68.  
  69. if (bodyFrequency.containsKey(processed)) {
  70. bodyFrequency.put(processed, bodyFrequency.get(processed) + 1);
  71. } else {
  72. bodyFrequency.put(processed, 1);
  73. //System.out.println(bodyFrequency);
  74. }
  75. }
  76. for (String name: bodyFrequency.keySet()){
  77. String key = name.toString();
  78. String value = bodyFrequency.get(name).toString();
  79. out.println(key + " " + value);
  80.  
  81. }
  82. out.close();
  83. }
  84.  
  85. } catch (IOException e) {
  86. e.getMessage();
  87. }
  88. }}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement