Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public static void main(String[] args) throws Exception {
- StopWatch stopwatch = new StopWatch();
- stopwatch.start();
- File folder = new File("D:\PDF1");
- File[] listOfFiles = folder.listFiles();
- for (File file : listOfFiles) {
- if (file.isFile()) {
- HashSet<String> uniqueWords = new HashSet<>();
- String path = "D:\PDF1\" + file.getName();
- try (PDDocument document = PDDocument.load(new File(path))) {
- if (!document.isEncrypted()) {
- PDFTextStripper tStripper = new PDFTextStripper();
- String pdfFileInText = tStripper.getText(document);
- String lines[] = pdfFileInText.split("\r?\n");
- for (String line : lines) {
- String[] words = line.split(" ");
- for (String word : words) {
- // check if one or more special characters at end of string then remove OR
- // check special characters in beginning of the string then remove
- uniqueWords.add(word.replaceAll("([\W]+$)|(^[\W]+)", ""));
- }
- }
- }
- } catch (IOException e) {
- System.err.println("Exception while trying to read pdf document - " + e);
- }
- String[] words1 = uniqueWords.toArray(new String[uniqueWords.size()]);
- MysqlAccessIndex connection = new MysqlAccessIndex();
- connection.readDataBase(path, words1);
- System.out.println("Completed");
- }
- }
Add Comment
Please, Sign In to add comment