Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.PrintWriter;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.Scanner;
- import org.jsoup.Jsoup;
- public class Parser {
- public static File infolder = new File("input folder folder path goes here");
- static String temp = "";
- static ArrayList<String> list = new ArrayList<String>();
- public static void main(String[] args) throws FileNotFoundException
- {
- String outfolder = "output folder path goes here";
- File theDir = new File(outfolder);
- // if the directory does not exist, create it
- if (!theDir.exists()) {
- System.out.println("creating directory: " + outfolder);
- boolean result = theDir.mkdir();
- if (result) {
- System.out.println("DIR created");
- }
- }
- System.out.println("Reading files under the folder " + infolder.getAbsolutePath());
- parseFiles(infolder);
- // System.out.println();
- }
- public static void parseFiles(final File folder) throws FileNotFoundException
- {
- PrintWriter out = null;
- for (final File fileEntry : folder.listFiles()) {
- if (fileEntry.isFile()) {
- temp = fileEntry.getName();
- if ((temp.substring(temp.lastIndexOf('.') + 1, temp.length()).toLowerCase()).equals("html")) {
- System.out.println("File= " + folder.getAbsolutePath() + "\" + fileEntry.getName());
- File file = new File(folder.getAbsolutePath() + "\" + fileEntry.getName());
- ArrayList<String> filetext = new ArrayList<String>();
- Scanner in = new Scanner(file);
- while (in.hasNextLine()) {
- filetext.add(in.nextLine());
- }
- String filename = "tokenfile" + fileEntry.getName();
- try {
- out = new PrintWriter(new BufferedWriter(new FileWriter("C:/Users/bounty213/Desktop/Output/" + filename + ".txt", true)));
- }
- catch (IOException e) {
- //exception handling left as an exercise for the reader
- }
- String parsed;
- for (String word : filetext) {
- parsed = Jsoup.parse(word).text();
- System.out.println(parsed);
- out.println(parsed);
- }
- out.close();
- }
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement