Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.File;
- import java.io.IOException;
- import java.util.Date;
- import java.util.HashMap;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import javax.swing.JFileChooser;
- import org.apache.pdfbox.contentstream.*;
- import org.apache.pdfbox.pdfparser.PDFParser;
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.apache.pdfbox.text.PDFTextStripper;
- public class Logger {
- public static void main(String[] args) throws IOException
- {
- new Logger();
- }
- public Logger() throws IOException
- {
- //org.apache.pdfbox.contentstream.PDFStreamEngine p;
- //PDFStreamEngine pdcs = new PDFStreamEngine();
- //new PDFParser();
- //Create a file chooser
- final JFileChooser fc = new JFileChooser();
- //In response to a button click:
- File pdfFile;
- if(fc.showOpenDialog(null) == JFileChooser.APPROVE_OPTION)
- pdfFile = fc.getSelectedFile();
- else
- return;
- PDFTextStripper ts = new PDFTextStripper();
- PDDocument d = PDDocument.load(pdfFile);
- System.out.println("There are "+d.getNumberOfPages()+" pages.");
- Pattern pdate, ptime, pmessage, pname;
- Matcher m;
- pdate = Pattern.compile(
- "(?<month>(January)|(February)|(March)|(April)|(May)|(June)|(July)|(August)|(September)|(October)|(November)|(December))"+
- ".(?<day>\\d+)(\\w+)."+
- ".(?<year>\\d+)"
- );
- ptime = Pattern.compile(
- "(?<hour>\\d+):(?<minute>\\d+) (?<ampm>am|pm)"
- );
- pmessage = Pattern.compile(
- "(?<message>.+)"
- );
- pname = Pattern.compile(
- "^(?<name>\\w+)$"
- );
- String sDate, sTime, sName, sMessage;
- sDate = sTime = sName = sMessage = "";
- String[] arr = ts.getText(d).split("\n");
- d.close();
- boolean timePatternWasLast = false;
- for(String s : arr)
- //String s = "May 1st, 2016 ";
- //while(true)
- {
- s = s.replace('\u00A0', ' ').replaceAll("\\s", " ").trim();
- /*
- for(char c : s.toCharArray())
- System.out.println(c+"\t"+(int)c);
- */
- if(timePatternWasLast)
- {
- if((m = pname.matcher(s)).find())
- {
- sName = m.group("name");
- timePatternWasLast = false;
- continue;
- }
- }
- if((m = pdate.matcher(s)).find())
- {
- sDate = m.group("month")+" "+m.group("day")+" "+m.group("year");
- timePatternWasLast = false;
- }
- else if((m = ptime.matcher(s)).find())
- {
- sTime = m.group("hour")+":"+m.group("minute")+" "+m.group("ampm");
- timePatternWasLast = true;
- }
- else if((m = pmessage.matcher(s)).find())
- {
- sMessage = m.group("message");
- String id;
- Message mes = new Message(id = getId(sName, sDate+"\t"+sTime, sMessage), sName, sDate+"\t"+sTime, sMessage);
- System.out.println(mes);
- map.put(id, mes);
- timePatternWasLast = false;
- }
- }
- }
- static HashMap<String, Message> map = new HashMap<String, Message>();
- public class Message
- {
- String id;
- String name;
- String date;
- String message;
- public Message(String id_, String name_, String date_, String message_)
- {
- id = id_;
- name = name_;
- date = date_;
- message = message_;
- }
- public String toString()
- {
- return
- //"id : ["+id+"]\n"+
- "name : ["+name+"]\n"+
- "date : ["+date+"]\n"+
- "message: ["+message+"]\n";
- }
- }
- public static String getId(String n, String d, String m)
- {
- //Unique ID (not guaranteed)
- // NAME : DATE \t TIME : MessageFirstChar MessageLastChar
- return n+":"+d+":"+m;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement