Advertisement
Guest User

Untitled

a guest
May 16th, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.52 KB | None | 0 0
  1. /*
  2. * To change this license header, choose License Headers in Project Properties.
  3. * To change this template file, choose Tools | Templates
  4. * and open the template in the editor.
  5. */
  6. package plagiarism;
  7. import java.sql.ResultSet;
  8. import java.io.BufferedReader;
  9. import java.io.File;
  10. import java.io.FileInputStream;
  11. import java.io.FileOutputStream;
  12. import java.io.FileReader;
  13. import java.io.FileWriter;
  14. import java.io.IOException;
  15. import java.io.PrintStream;
  16. import javax.swing.text.html.HTMLEditorKit;
  17. import org.apache.pdfbox.cos.COSDocument;
  18. import org.apache.pdfbox.io.RandomAccess;
  19. import org.apache.pdfbox.io.RandomAccessFile;
  20. import org.apache.pdfbox.io.RandomAccessRead;
  21. import org.apache.pdfbox.pdfparser.PDFParser;
  22. import org.apache.pdfbox.pdmodel.PDDocument;
  23. import org.apache.pdfbox.text.PDFTextStripper;
  24. import java.io.UnsupportedEncodingException;
  25. import java.sql.Connection;
  26. import java.sql.Driver;
  27. import java.sql.DriverManager;
  28. import java.sql.PreparedStatement;
  29. import java.sql.*;
  30. import java.sql.Statement;
  31. import java.util.Arrays;
  32.  
  33. /**
  34. *
  35. * @author Munkhdelger
  36. */
  37. public class Pirate {
  38. Pirate() throws IOException, SQLException {
  39.  
  40. String url = "jdbc:mysql://localhost:3306/sys?autoReconnect=true&useSSL=false";
  41. String user = "root";
  42. String password = "1234";
  43. PDFParser parser = null;
  44. PDDocument pdDoc = null;
  45. COSDocument cosDoc = null;
  46. PDFTextStripper pdfStripper;
  47.  
  48. int a = 0;
  49. int c = 0;
  50. String array[] = new String [1000000];
  51. String parsedText;
  52. String fileName = "C:\\Users\\Munkhdelger\\Documents\\NetBeansProjects\\pdf.pdf";
  53.  
  54. String name;
  55. try {
  56.  
  57. Connection conn = DriverManager.getConnection(url, user, password);
  58. Statement st = conn.createStatement();
  59. String sql = "SELECT pdfpath FROM pdfbase";
  60.  
  61. ResultSet rs = st.executeQuery(sql);
  62. while(rs.next()){
  63. String w = rs.getString(1);
  64. array[c] = w;
  65. c++;
  66. }
  67. System.out.println(c);
  68. for(int q = 0; q < c; q++){
  69. fileName = array[q];
  70. System.out.println(fileName);
  71. File file = new File(fileName);
  72. String file_name = "Piratetext";
  73.  
  74. parser = new PDFParser(new RandomAccessFile(file, "r"));
  75. parser.parse();
  76. cosDoc = parser.getDocument();
  77. pdfStripper = new PDFTextStripper();
  78. pdDoc = new PDDocument(cosDoc);
  79. parsedText = pdfStripper.getText(pdDoc);
  80.  
  81. name = file_name + (q + 1);
  82. File f = new File("D:\\Pirate_Text\\" + name + ".txt");
  83. if(f.exists() && !f.isDirectory()) {
  84. continue;
  85. } else {
  86. PrintStream out = new PrintStream(new FileOutputStream("D:\\Pirate_Text\\" + name + ".txt",true));
  87. out.print(parsedText);
  88. System.setOut(out);
  89. }
  90. }
  91. }
  92. catch (IOException e) {
  93. try {
  94. if (cosDoc != null)
  95. cosDoc.close();
  96. if (pdDoc != null)
  97. pdDoc.close();
  98. } catch (IOException e1) {}
  99. }
  100. catch (SQLException e){
  101. e.printStackTrace();
  102. }
  103. }
  104. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement