Advertisement
Guest User

Untitled

a guest
Apr 21st, 2014
35
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.66 KB | None | 0 0
  1. import java.io.BufferedWriter;
  2. import java.io.File;
  3. import java.io.FileNotFoundException;
  4. import java.io.FileWriter;
  5. import java.io.IOException;
  6. import java.io.PrintWriter;
  7. import java.util.ArrayList;
  8. import java.util.Collections;
  9. import java.util.Scanner;
  10. import org.jsoup.Jsoup;
  11.  
  12. public class Parser {
  13.  
  14. public static File infolder = new File("input folder folder path goes here");
  15. static String temp = "";
  16. static ArrayList<String> list = new ArrayList<String>();
  17.  
  18. public static void main(String[] args) throws FileNotFoundException
  19. {
  20. String outfolder = "output folder path goes here";
  21.  
  22. File theDir = new File(outfolder);
  23. // if the directory does not exist, create it
  24. if (!theDir.exists()) {
  25. System.out.println("creating directory: " + outfolder);
  26. boolean result = theDir.mkdir();
  27. if (result) {
  28. System.out.println("DIR created");
  29. }
  30. }
  31.  
  32. System.out.println("Reading files under the folder " + infolder.getAbsolutePath());
  33. parseFiles(infolder);
  34. // System.out.println();
  35. }
  36.  
  37. public static void parseFiles(final File folder) throws FileNotFoundException
  38. {
  39. PrintWriter out = null;
  40. for (final File fileEntry : folder.listFiles()) {
  41. if (fileEntry.isFile()) {
  42. temp = fileEntry.getName();
  43. if ((temp.substring(temp.lastIndexOf('.') + 1, temp.length()).toLowerCase()).equals("html")) {
  44. System.out.println("File= " + folder.getAbsolutePath() + "\" + fileEntry.getName());
  45. File file = new File(folder.getAbsolutePath() + "\" + fileEntry.getName());
  46. ArrayList<String> filetext = new ArrayList<String>();
  47. Scanner in = new Scanner(file);
  48.  
  49. while (in.hasNextLine()) {
  50. filetext.add(in.nextLine());
  51. }
  52.  
  53. String filename = "tokenfile" + fileEntry.getName();
  54.  
  55. try {
  56. out = new PrintWriter(new BufferedWriter(new FileWriter("C:/Users/bounty213/Desktop/Output/" + filename + ".txt", true)));
  57. }
  58. catch (IOException e) {
  59. //exception handling left as an exercise for the reader
  60. }
  61.  
  62. String parsed;
  63. for (String word : filetext) {
  64. parsed = Jsoup.parse(word).text();
  65. System.out.println(parsed);
  66. out.println(parsed);
  67. }
  68. out.close();
  69. }
  70. }
  71. }
  72. }
  73. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement