Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.List;
- import javax.xml.parsers.ParserConfigurationException;
- import javax.xml.parsers.SAXParser;
- import javax.xml.parsers.SAXParserFactory;
- import org.xml.sax.Attributes;
- import org.xml.sax.SAXException;
- import org.xml.sax.helpers.DefaultHandler;
- import java.sql.*;
- public class ProjectParser extends DefaultHandler{
- List myDocs;
- private String tempVal;
- //to maintain context
- private Document tempDoc;
- private String currentDoc;
- static Connection conn = null;
- static Statement stmt;
- public ProjectParser() {
- myDocs = new ArrayList();
- }
- public void runExample() {
- parseDocument();
- printData();
- }
- private void parseDocument() {
- //get a factory
- SAXParserFactory spf = SAXParserFactory.newInstance();
- try {
- //get a new instance of parser
- SAXParser sp = spf.newSAXParser();
- //parse the file and also register this class for call backs
- sp.parse("/Users/bshiaw/desktop/bigData/dblp-data.xml", this);
- }catch(SAXException se) {
- se.printStackTrace();
- }catch(ParserConfigurationException pce) {
- pce.printStackTrace();
- }catch (IOException ie) {
- ie.printStackTrace();
- }
- }
- /**
- * Iterate through the list and print
- * the contents
- */
- private void printData(){
- System.out.println("Number of Documents '" + myDocs.size() + "'.");
- Iterator it = myDocs.iterator();
- while(it.hasNext()) {
- System.out.println(it.next().toString());
- }
- }
- public static void main(String[] args) throws Exception {
- // Incorporate mySQL driver
- Class.forName("com.mysql.jdbc.Driver").newInstance();
- // Connect to the test database
- conn = DriverManager.getConnection("jdbc:mysql:///bookdb","root", "lakers");
- stmt = conn.createStatement();
- stmt.executeBatch();
- ProjectParser pp = new ProjectParser();
- pp.runExample();
- }
- //Event Handlers
- public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
- //reset
- tempVal = "";
- if(qName.equalsIgnoreCase("book")) {
- //create a new instance of employee
- tempDoc = new Document();
- tempDoc.setmdate(attributes.getValue("mdate"));
- tempDoc.setKey(attributes.getValue("key"));
- tempDoc.setDocType("Book");
- currentDoc = "Book";
- try {
- stmt.addBatch("INSERT INTO tbl_genres (genre_name) " +
- "VALUES('"+ currentDoc +"');");
- }catch (Exception e) {
- e.printStackTrace();
- }
- }
- else if (qName.equalsIgnoreCase("incollection")) {
- tempDoc = new Document();
- tempDoc.setmdate(attributes.getValue("mdate"));
- tempDoc.setKey(attributes.getValue("key"));
- tempDoc.setDocType("Incollection");
- currentDoc = "Incollection";
- try {
- stmt.addBatch("INSERT INTO tbl_genres (genre_name) " +
- "VALUES('"+ currentDoc +"');");
- }catch (Exception e) {
- e.printStackTrace();
- }
- }
- else if (qName.equalsIgnoreCase("proceedings")) {
- tempDoc = new Document();
- tempDoc.setmdate(attributes.getValue("mdate"));
- tempDoc.setKey(attributes.getValue("key"));
- tempDoc.setDocType("Proceedings");
- currentDoc = "Proceedings";
- try {
- stmt.addBatch("INSERT INTO tbl_genres (genre_name) " +
- "VALUES('"+ currentDoc +"');");
- }catch (Exception e) {
- e.printStackTrace();
- }
- }
- else if (qName.equalsIgnoreCase("inproceedings")) {
- tempDoc = new Document();
- tempDoc.setmdate(attributes.getValue("mdate"));
- tempDoc.setKey(attributes.getValue("key"));
- tempDoc.setDocType("Inproceedings");
- currentDoc = "Inproceedings";
- try {
- stmt.addBatch("INSERT INTO tbl_genres (genre_name) " +
- "VALUES('"+ currentDoc +"');");
- }catch (Exception e) {
- e.printStackTrace();
- }
- }
- }
- public void characters(char[] ch, int start, int length) throws SAXException {
- tempVal = new String(ch,start,length);
- }
- public void endElement(String uri, String localName, String qName) throws SAXException {
- if (qName.equalsIgnoreCase(currentDoc)){
- //add it to the list
- myDocs.add(tempDoc);
- }else if (qName.equalsIgnoreCase("editor")) {
- tempDoc.setEditor(tempVal);
- }else if (qName.equalsIgnoreCase("title")) {
- tempDoc.setTitle(tempVal);
- }else if (qName.equalsIgnoreCase("year")) {
- tempDoc.setYear(tempVal);
- }else if (qName.equalsIgnoreCase("booktitle")) {
- tempDoc.setBookTitle(tempVal);
- }else if (qName.equalsIgnoreCase("publisher")) {
- tempDoc.setPublisher(tempVal);
- }else if (qName.equalsIgnoreCase("isbn")) {
- tempDoc.setISBN(tempVal);
- }else if (qName.equalsIgnoreCase("url")) {
- tempDoc.setURL(tempVal);
- }else if (qName.equalsIgnoreCase("author")) {
- tempDoc.setAuthor(tempVal);
- }else if (qName.equalsIgnoreCase("pages")) {
- tempDoc.setPages(tempVal);
- }else if (qName.equalsIgnoreCase("address")) {
- tempDoc.setAddress(tempVal);
- }else if (qName.equalsIgnoreCase("journal")) {
- tempDoc.setJournal(tempVal);
- }else if (qName.equalsIgnoreCase("volume")) {
- tempDoc.setVolume(tempVal);
- }else if (qName.equalsIgnoreCase("number")) {
- tempDoc.setNumber(tempVal);
- }else if (qName.equalsIgnoreCase("month")) {
- tempDoc.setMonth(tempVal);
- }else if (qName.equalsIgnoreCase("ee")) {
- tempDoc.setEE(tempVal);
- }else if (qName.equalsIgnoreCase("cdrom")) {
- tempDoc.setCDRom(tempVal);
- }else if (qName.equalsIgnoreCase("cite")) {
- tempDoc.setCite(tempVal);
- }else if (qName.equalsIgnoreCase("note")) {
- tempDoc.setNote(tempVal);
- }else if (qName.equalsIgnoreCase("crossref")) {
- tempDoc.setCrossref(tempVal);
- }else if (qName.equalsIgnoreCase("series")) {
- tempDoc.setSeries(tempVal);
- }else if (qName.equalsIgnoreCase("school")) {
- tempDoc.setSchool(tempVal);
- }else if (qName.equalsIgnoreCase("chapter")) {
- tempDoc.setChapter(tempVal);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement