Advertisement
Guest User

Untitled

a guest
Jul 27th, 2017
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.95 KB | None | 0 0
  1. import java.io.IOException;
  2. import java.util.ArrayList;
  3. import java.util.Iterator;
  4. import java.util.List;
  5.  
  6. import javax.xml.parsers.ParserConfigurationException;
  7. import javax.xml.parsers.SAXParser;
  8. import javax.xml.parsers.SAXParserFactory;
  9.  
  10. import org.xml.sax.Attributes;
  11. import org.xml.sax.SAXException;
  12.  
  13. import org.xml.sax.helpers.DefaultHandler;
  14.  
  15. import java.sql.*;
  16.  
  17.  
  18. public class ProjectParser extends DefaultHandler{
  19. List myDocs;
  20.  
  21. private String tempVal;
  22. //to maintain context
  23. private Document tempDoc;
  24. private String currentDoc;
  25.  
  26. static Connection conn = null;
  27. static Statement stmt;
  28.  
  29.  
  30. public ProjectParser() {
  31. myDocs = new ArrayList();
  32. }
  33.  
  34.  
  35. public void runExample() {
  36. parseDocument();
  37. printData();
  38. }
  39.  
  40. private void parseDocument() {
  41.  
  42. //get a factory
  43. SAXParserFactory spf = SAXParserFactory.newInstance();
  44. try {
  45. //get a new instance of parser
  46. SAXParser sp = spf.newSAXParser();
  47.  
  48. //parse the file and also register this class for call backs
  49. sp.parse("/Users/bshiaw/desktop/bigData/dblp-data.xml", this);
  50.  
  51. }catch(SAXException se) {
  52. se.printStackTrace();
  53. }catch(ParserConfigurationException pce) {
  54. pce.printStackTrace();
  55. }catch (IOException ie) {
  56. ie.printStackTrace();
  57. }
  58. }
  59.  
  60. /**
  61. * Iterate through the list and print
  62. * the contents
  63. */
  64. private void printData(){
  65.  
  66. System.out.println("Number of Documents '" + myDocs.size() + "'.");
  67.  
  68. Iterator it = myDocs.iterator();
  69. while(it.hasNext()) {
  70. System.out.println(it.next().toString());
  71. }
  72. }
  73.  
  74. public static void main(String[] args) throws Exception {
  75. // Incorporate mySQL driver
  76. Class.forName("com.mysql.jdbc.Driver").newInstance();
  77.  
  78. // Connect to the test database
  79. conn = DriverManager.getConnection("jdbc:mysql:///bookdb","root", "lakers");
  80. stmt = conn.createStatement();
  81. stmt.executeBatch();
  82. ProjectParser pp = new ProjectParser();
  83. pp.runExample();
  84. }
  85.  
  86.  
  87.  
  88. //Event Handlers
  89. public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
  90. //reset
  91. tempVal = "";
  92. if(qName.equalsIgnoreCase("book")) {
  93. //create a new instance of employee
  94. tempDoc = new Document();
  95. tempDoc.setmdate(attributes.getValue("mdate"));
  96. tempDoc.setKey(attributes.getValue("key"));
  97. tempDoc.setDocType("Book");
  98. currentDoc = "Book";
  99. try {
  100. stmt.addBatch("INSERT INTO tbl_genres (genre_name) " +
  101. "VALUES('"+ currentDoc +"');");
  102. }catch (Exception e) {
  103. e.printStackTrace();
  104. }
  105.  
  106.  
  107. }
  108. else if (qName.equalsIgnoreCase("incollection")) {
  109. tempDoc = new Document();
  110. tempDoc.setmdate(attributes.getValue("mdate"));
  111. tempDoc.setKey(attributes.getValue("key"));
  112. tempDoc.setDocType("Incollection");
  113. currentDoc = "Incollection";
  114. try {
  115. stmt.addBatch("INSERT INTO tbl_genres (genre_name) " +
  116. "VALUES('"+ currentDoc +"');");
  117. }catch (Exception e) {
  118. e.printStackTrace();
  119. }
  120.  
  121. }
  122. else if (qName.equalsIgnoreCase("proceedings")) {
  123. tempDoc = new Document();
  124. tempDoc.setmdate(attributes.getValue("mdate"));
  125. tempDoc.setKey(attributes.getValue("key"));
  126. tempDoc.setDocType("Proceedings");
  127. currentDoc = "Proceedings";
  128. try {
  129. stmt.addBatch("INSERT INTO tbl_genres (genre_name) " +
  130. "VALUES('"+ currentDoc +"');");
  131. }catch (Exception e) {
  132. e.printStackTrace();
  133. }
  134.  
  135. }
  136. else if (qName.equalsIgnoreCase("inproceedings")) {
  137. tempDoc = new Document();
  138. tempDoc.setmdate(attributes.getValue("mdate"));
  139. tempDoc.setKey(attributes.getValue("key"));
  140. tempDoc.setDocType("Inproceedings");
  141. currentDoc = "Inproceedings";
  142. try {
  143. stmt.addBatch("INSERT INTO tbl_genres (genre_name) " +
  144. "VALUES('"+ currentDoc +"');");
  145. }catch (Exception e) {
  146. e.printStackTrace();
  147. }
  148.  
  149. }
  150. }
  151. public void characters(char[] ch, int start, int length) throws SAXException {
  152. tempVal = new String(ch,start,length);
  153. }
  154.  
  155. public void endElement(String uri, String localName, String qName) throws SAXException {
  156.  
  157.  
  158. if (qName.equalsIgnoreCase(currentDoc)){
  159. //add it to the list
  160. myDocs.add(tempDoc);
  161. }else if (qName.equalsIgnoreCase("editor")) {
  162. tempDoc.setEditor(tempVal);
  163. }else if (qName.equalsIgnoreCase("title")) {
  164. tempDoc.setTitle(tempVal);
  165. }else if (qName.equalsIgnoreCase("year")) {
  166. tempDoc.setYear(tempVal);
  167. }else if (qName.equalsIgnoreCase("booktitle")) {
  168. tempDoc.setBookTitle(tempVal);
  169. }else if (qName.equalsIgnoreCase("publisher")) {
  170. tempDoc.setPublisher(tempVal);
  171. }else if (qName.equalsIgnoreCase("isbn")) {
  172. tempDoc.setISBN(tempVal);
  173. }else if (qName.equalsIgnoreCase("url")) {
  174. tempDoc.setURL(tempVal);
  175. }else if (qName.equalsIgnoreCase("author")) {
  176. tempDoc.setAuthor(tempVal);
  177. }else if (qName.equalsIgnoreCase("pages")) {
  178. tempDoc.setPages(tempVal);
  179. }else if (qName.equalsIgnoreCase("address")) {
  180. tempDoc.setAddress(tempVal);
  181. }else if (qName.equalsIgnoreCase("journal")) {
  182. tempDoc.setJournal(tempVal);
  183. }else if (qName.equalsIgnoreCase("volume")) {
  184. tempDoc.setVolume(tempVal);
  185. }else if (qName.equalsIgnoreCase("number")) {
  186. tempDoc.setNumber(tempVal);
  187. }else if (qName.equalsIgnoreCase("month")) {
  188. tempDoc.setMonth(tempVal);
  189. }else if (qName.equalsIgnoreCase("ee")) {
  190. tempDoc.setEE(tempVal);
  191. }else if (qName.equalsIgnoreCase("cdrom")) {
  192. tempDoc.setCDRom(tempVal);
  193. }else if (qName.equalsIgnoreCase("cite")) {
  194. tempDoc.setCite(tempVal);
  195. }else if (qName.equalsIgnoreCase("note")) {
  196. tempDoc.setNote(tempVal);
  197. }else if (qName.equalsIgnoreCase("crossref")) {
  198. tempDoc.setCrossref(tempVal);
  199. }else if (qName.equalsIgnoreCase("series")) {
  200. tempDoc.setSeries(tempVal);
  201. }else if (qName.equalsIgnoreCase("school")) {
  202. tempDoc.setSchool(tempVal);
  203. }else if (qName.equalsIgnoreCase("chapter")) {
  204. tempDoc.setChapter(tempVal);
  205. }
  206.  
  207.  
  208. }
  209.  
  210.  
  211. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement