Guest User

Untitled

a guest
Nov 14th, 2018
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.75 KB | None | 0 0
  1. public static void main(String[] args) throws Exception {
  2.  
  3. StopWatch stopwatch = new StopWatch();
  4. stopwatch.start();
  5.  
  6.  
  7. File folder = new File("D:\PDF1");
  8. File[] listOfFiles = folder.listFiles();
  9.  
  10. for (File file : listOfFiles) {
  11. if (file.isFile()) {
  12. HashSet<String> uniqueWords = new HashSet<>();
  13. String path = "D:\PDF1\" + file.getName();
  14. try (PDDocument document = PDDocument.load(new File(path))) {
  15.  
  16. if (!document.isEncrypted()) {
  17.  
  18. PDFTextStripper tStripper = new PDFTextStripper();
  19. String pdfFileInText = tStripper.getText(document);
  20. String lines[] = pdfFileInText.split("\r?\n");
  21. for (String line : lines) {
  22. String[] words = line.split(" ");
  23.  
  24.  
  25.  
  26. for (String word : words) {
  27. // check if one or more special characters at end of string then remove OR
  28. // check special characters in beginning of the string then remove
  29. uniqueWords.add(word.replaceAll("([\W]+$)|(^[\W]+)", ""));
  30.  
  31.  
  32.  
  33.  
  34. }
  35.  
  36. }
  37.  
  38.  
  39. }
  40. } catch (IOException e) {
  41. System.err.println("Exception while trying to read pdf document - " + e);
  42. }
  43. String[] words1 = uniqueWords.toArray(new String[uniqueWords.size()]);
  44.  
  45.  
  46.  
  47. MysqlAccessIndex connection = new MysqlAccessIndex();
  48.  
  49.  
  50.  
  51. connection.readDataBase(path, words1);
  52.  
  53.  
  54.  
  55. System.out.println("Completed");
  56.  
  57. }
  58. }
Add Comment
Please, Sign In to add comment