Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
- package plagiarism;
- import java.sql.ResultSet;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.PrintStream;
- import javax.swing.text.html.HTMLEditorKit;
- import org.apache.pdfbox.cos.COSDocument;
- import org.apache.pdfbox.io.RandomAccess;
- import org.apache.pdfbox.io.RandomAccessFile;
- import org.apache.pdfbox.io.RandomAccessRead;
- import org.apache.pdfbox.pdfparser.PDFParser;
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.apache.pdfbox.text.PDFTextStripper;
- import java.io.UnsupportedEncodingException;
- import java.sql.Connection;
- import java.sql.Driver;
- import java.sql.DriverManager;
- import java.sql.PreparedStatement;
- import java.sql.*;
- import java.sql.Statement;
- import java.util.Arrays;
- /**
- *
- * @author Munkhdelger
- */
- public class pirate {
- pirate() throws IOException, SQLException {
- String url = "jdbc:mysql://localhost:3306/sys?autoReconnect=true&useSSL=false";
- String user = "root";
- String password = "1234";
- PDFParser parser = null;
- PDDocument pdDoc = null;
- COSDocument cosDoc = null;
- PDFTextStripper pdfStripper;
- int a = 0;
- int c = 0;
- String array[] = new String [1000000];
- String parsedText;
- String fileName = "C:\\Users\\Munkhdelger\\Documents\\NetBeansProjects\\pdf.pdf";
- String name;
- try {
- Connection conn = DriverManager.getConnection(url, user, password);
- Statement st = conn.createStatement();
- String sql = "SELECT pdfpath FROM pdfbase";
- ResultSet rs = st.executeQuery(sql);
- while(rs.next()){
- String w = rs.getString(1);
- array[c] = w;
- c++;
- }
- System.out.println(c);
- for(int q = 0; q < c; q++){
- fileName = array[q];
- System.out.println(fileName);
- File file = new File(fileName);
- String file_name = "Piratetext";
- int count = 1;
- parser = new PDFParser(new RandomAccessFile(file, "r"));
- parser.parse();
- cosDoc = parser.getDocument();
- pdfStripper = new PDFTextStripper();
- pdDoc = new PDDocument(cosDoc);
- parsedText = pdfStripper.getText(pdDoc);
- name = file_name + (q + 1);
- File f = new File("D:\\Pirate_Text\\" + name + ".txt");
- if(f.exists() && !f.isDirectory()) {
- continue;
- } else {
- PrintStream out = new PrintStream(new FileOutputStream("D:\\Pirate_Text\\" + name + ".txt",true));
- out.print(parsedText);
- System.setOut(out);
- }
- }
- }
- catch (IOException e) {
- try {
- if (cosDoc != null)
- cosDoc.close();
- if (pdDoc != null)
- pdDoc.close();
- } catch (IOException e1) {}
- }
- catch (SQLException e){
- e.printStackTrace();
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement