Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.File;
- import java.io.IOException;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.PreparedStatement;
- import java.sql.SQLException;
- import java.sql.Statement;
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.apache.pdfbox.text.PDFTextStripper;
- public class SO53538039
- {
- static PreparedStatement preparedStatement;
- static Connection con1;
- public static void main(String[] args) throws IOException, SQLException
- {
- con1 = DriverManager.getConnection("jdbc:mysql://127.0.0.1/XXXXX", "XXXX", "XXXX");
- con1.setAutoCommit(false);
- Statement stmt = con1.createStatement();
- stmt.execute("drop table if exists indextable");
- stmt.execute("create table indextable "
- + "("
- + "path varchar(300), "
- + "word varchar(300), "
- + "primary key(path,word)"
- + ")");
- // Anzahl Seiten: 18519
- try (PDDocument document = PDDocument.load(new File("10-million-password-list-top-1000000.pdf")))
- {
- PDFTextStripper tStripper = new PDFTextStripper();
- for (int p = 1; p <= document.getNumberOfPages(); ++p)
- {
- tStripper.setStartPage(p);
- tStripper.setEndPage(p);
- String pdfFileInText = tStripper.getText(document);
- processText(pdfFileInText);
- System.out.println("page " + p + " done");
- }
- }
- con1.commit();
- }
- static public void processText(String text) throws SQLException
- {
- String lines[] = text.split("\\r?\\n");
- for (String line : lines)
- {
- String[] words = line.split(" ");
- String sql = "insert IGNORE into indextable values (?,?);";
- preparedStatement = con1.prepareStatement(sql);
- int i = 0;
- for (String word : words)
- {
- // check if one or more special characters at end of string then remove OR
- // check special characters in beginning of the string then remove
- // insert every word directly to table db
- word = word.replaceAll("([\\W]+$)|(^[\\W]+)", "");
- preparedStatement.setString(1, "path1");
- preparedStatement.setString(2, word);
- preparedStatement.addBatch();
- i++;
- if (i % 1000 == 0)
- {
- preparedStatement.executeBatch();
- System.out.println("Add Thousand");
- }
- }
- if (i > 0)
- {
- preparedStatement.executeBatch();
- //System.out.println("Add Remaining");
- }
- }
- preparedStatement.close();
- //System.out.println("Successfully commited changes to the database!");
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement