Advertisement
Guest User

Untitled

a guest
Jul 28th, 2017
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.01 KB | None | 0 0
  1. import java.io.IOException;
  2. import java.util.*;
  3.  
  4. import javax.xml.parsers.ParserConfigurationException;
  5. import javax.xml.parsers.SAXParser;
  6. import javax.xml.parsers.SAXParserFactory;
  7.  
  8. import org.xml.sax.Attributes;
  9. import org.xml.sax.SAXException;
  10.  
  11. import org.xml.sax.helpers.DefaultHandler;
  12.  
  13. import java.sql.*;
  14.  
  15.  
  16. public class ProjectParser extends DefaultHandler{
  17. List myDocs;
  18.  
  19. private String tempVal;
  20. //to maintain context
  21. private Document tempDoc = new Document();
  22. private String currentDoc;
  23.  
  24. static Connection conn = null;
  25.  
  26. static PreparedStatement psInsertGenre = null;
  27. static String sqlInsertGenre = null;
  28.  
  29. static PreparedStatement psInsertPeople = null;
  30. static String sqlInsertPeople = null;
  31.  
  32. static PreparedStatement psInsertTitle = null;
  33. static String sqlInsertTitle = null;
  34.  
  35. static PreparedStatement psInsertPublisher = null;
  36. static String sqlInsertPublisher = null;
  37.  
  38. static Hashtable<String, String> hashtable = new Hashtable<String, String>();
  39. static int hashID = 0;
  40.  
  41.  
  42. public ProjectParser() {
  43. myDocs = new ArrayList();
  44. }
  45.  
  46.  
  47. public void runExample() {
  48. parseDocument();
  49. }
  50.  
  51. private void parseDocument() {
  52.  
  53. //get a factory
  54. SAXParserFactory spf = SAXParserFactory.newInstance();
  55. try {
  56. //get a new instance of parser
  57. SAXParser sp = spf.newSAXParser();
  58.  
  59. //parse the file and also register this class for call backs
  60. sp.parse("/Users/bshiaw/desktop/bigData/dblp-data.xml", this);
  61.  
  62. }catch(SAXException se) {
  63. se.printStackTrace();
  64. }catch(ParserConfigurationException pce) {
  65. pce.printStackTrace();
  66. }catch (IOException ie) {
  67. ie.printStackTrace();
  68. }
  69. }
  70.  
  71. /**
  72. * Iterate through the list and print
  73. * the contents
  74. */
  75. private void printData(){
  76.  
  77. System.out.println("Number of Documents '" + myDocs.size() + "'.");
  78.  
  79. Iterator it = myDocs.iterator();
  80. while(it.hasNext()) {
  81. System.out.println(it.next().toString());
  82. }
  83. }
  84.  
  85. public static void main(String[] args) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
  86. // Incorporate mySQL driver
  87. Class.forName("com.mysql.jdbc.Driver").newInstance();
  88.  
  89. // Connect to the test database
  90. try {
  91. conn = DriverManager.getConnection("jdbc:mysql:///bookdb","root", "lakers");
  92. }catch (SQLException e) {
  93. e.printStackTrace();
  94. }
  95.  
  96. int[] iNoRows = null;
  97. //For tbl_genres
  98. sqlInsertGenre = "insert into tbl_genres (genre_name) values(?)";
  99. try {
  100. psInsertGenre = conn.prepareStatement(sqlInsertGenre);
  101. }catch (SQLException e) {
  102. e.printStackTrace();
  103. }
  104.  
  105.  
  106. //For tbl_people
  107. sqlInsertPeople = "INSERT INTO tbl_people (name) values(?)";
  108. try {
  109. psInsertPeople = conn.prepareStatement(sqlInsertPeople);
  110. }catch (SQLException e) {
  111. e.printStackTrace();
  112. }
  113. //For tbl_booktitle
  114. sqlInsertTitle = "INSERT INTO tbl_booktitle (title) values(?)";
  115. try {
  116. psInsertTitle = conn.prepareStatement(sqlInsertTitle);
  117. }catch (SQLException e) {
  118. e.printStackTrace();
  119. }
  120. //For tbl_publisher
  121. sqlInsertPublisher = "INSERT INTO tbl_publisher(publisher_name) values(?)";
  122. try {
  123. psInsertPublisher = conn.prepareStatement(sqlInsertPublisher);
  124. }catch (SQLException e) {
  125. e.printStackTrace();
  126. }
  127.  
  128.  
  129. ProjectParser pp = new ProjectParser();
  130. pp.runExample();
  131. System.out.println("Parsing Complete.\n");
  132.  
  133. try {
  134. iNoRows = psInsertGenre.executeBatch();
  135. }catch (SQLException e) {
  136. e.printStackTrace();
  137. }
  138.  
  139. try {
  140. iNoRows = psInsertPeople.executeBatch();
  141. }catch (SQLException e) {
  142. e.printStackTrace();
  143. }
  144.  
  145. try {
  146. iNoRows = psInsertTitle.executeBatch();
  147. }catch (SQLException e) {
  148. e.printStackTrace();
  149. }
  150.  
  151. try {
  152. iNoRows = psInsertPublisher.executeBatch();
  153. }catch (SQLException e) {
  154. e.printStackTrace();
  155. }
  156.  
  157.  
  158. System.out.println("Execute Batch Complete.");
  159.  
  160. try {
  161. if(psInsertGenre!=null) psInsertGenre.close();
  162. if(psInsertPeople!=null) psInsertPeople.close();
  163. if(psInsertTitle!=null) psInsertTitle.close();
  164. if(psInsertPublisher!=null) psInsertPublisher.close();
  165. if(conn!=null) conn.close();
  166.  
  167. }catch (SQLException e) {
  168. e.printStackTrace();
  169. }
  170. }
  171.  
  172.  
  173.  
  174. //Event Handlers
  175. public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
  176. //reset
  177. tempVal = "";
  178.  
  179. if(qName.equalsIgnoreCase("book")) {
  180. //create a new instance of employee
  181. // tempDoc = new Document();
  182. // tempDoc.setmdate(attributes.getValue("mdate"));
  183. // tempDoc.setKey(attributes.getValue("key"));
  184. // tempDoc.setDocType("Book");
  185. currentDoc = "Book";
  186. if (! hashtable.containsValue(currentDoc)) {
  187. hashtable.put(Integer.toString(hashID), currentDoc);
  188. hashID++;
  189. try {
  190. psInsertGenre.setString(1, currentDoc);
  191. psInsertGenre.addBatch();
  192. }catch (Exception e) {
  193. e.printStackTrace();
  194. }
  195. }
  196. }
  197.  
  198. else if (qName.equalsIgnoreCase("incollection")) {
  199. // tempDoc = new Document();
  200. // tempDoc.setmdate(attributes.getValue("mdate"));
  201. // tempDoc.setKey(attributes.getValue("key"));
  202. // tempDoc.setDocType("Incollection");
  203. currentDoc = "Incollection";
  204. if (! hashtable.containsValue(currentDoc)) {
  205. hashtable.put(Integer.toString(hashID), currentDoc);
  206. hashID++;
  207. try {
  208. psInsertGenre.setString(1, currentDoc);
  209. psInsertGenre.addBatch();
  210. }catch (Exception e) {
  211. e.printStackTrace();
  212. }
  213. }
  214. }
  215.  
  216.  
  217. else if (qName.equalsIgnoreCase("proceedings")) {
  218. // tempDoc = new Document();
  219. // tempDoc.setmdate(attributes.getValue("mdate"));
  220. // tempDoc.setKey(attributes.getValue("key"));
  221. // tempDoc.setDocType("Proceedings");
  222. currentDoc = "Proceedings";
  223. if (! hashtable.containsValue(currentDoc)) {
  224. hashtable.put(Integer.toString(hashID), currentDoc);
  225. hashID++;
  226. try {
  227. psInsertGenre.setString(1, currentDoc);
  228. psInsertGenre.addBatch();
  229. }catch (Exception e) {
  230. e.printStackTrace();
  231. }
  232. }
  233.  
  234. }
  235. else if (qName.equalsIgnoreCase("inproceedings")) {
  236. // tempDoc = new Document();
  237. // tempDoc.setmdate(attributes.getValue("mdate"));
  238. // tempDoc.setKey(attributes.getValue("key"));
  239. // tempDoc.setDocType("Inproceedings");
  240. currentDoc = "Inproceedings";
  241. if (! hashtable.containsValue(currentDoc)) {
  242. hashtable.put(Integer.toString(hashID), currentDoc);
  243. hashID++;
  244. try {
  245. psInsertGenre.setString(1, currentDoc);
  246. psInsertGenre.addBatch();
  247. }catch (Exception e) {
  248. e.printStackTrace();
  249. }
  250. }
  251. }
  252. }
  253.  
  254.  
  255.  
  256. public void characters(char[] ch, int start, int length) throws SAXException {
  257. tempVal = new String(ch,start,length);
  258. }
  259.  
  260. public void endElement(String uri, String localName, String qName) throws SAXException {
  261.  
  262.  
  263. if (qName.equalsIgnoreCase(currentDoc)){
  264. //add it to the list
  265. return;
  266. }
  267.  
  268. if ( (qName.equalsIgnoreCase("author")) || (qName.equalsIgnoreCase("editor")) ) {
  269. String author = tempVal;
  270. if (! hashtable.containsValue(author)) {
  271. hashtable.put(Integer.toString(hashID), author);
  272. hashID++;
  273. try {
  274. psInsertPeople.setString(1, author);
  275. psInsertPeople.addBatch();
  276. }catch (Exception e) {
  277. e.printStackTrace();
  278. }
  279. return;
  280. }}
  281.  
  282. //tempDoc.setPublisher(tempVal);
  283.  
  284. if (qName.equalsIgnoreCase("title")) {
  285. tempDoc.setTitle(tempVal);
  286. }
  287. if (qName.equalsIgnoreCase("year")) {
  288. tempDoc.setYear(tempVal);
  289. }
  290. if (qName.equalsIgnoreCase("booktitle")) {
  291. System.out.println("got to title");
  292. String title = tempVal;
  293. if (! hashtable.containsValue(title)) {
  294. hashtable.put(Integer.toString(hashID), title);
  295. hashID++;
  296. try {
  297. psInsertTitle.setString(1, title);
  298. psInsertTitle.addBatch();
  299. }catch (Exception e) {
  300. e.printStackTrace();
  301. }
  302. }
  303. return; }
  304.  
  305. if (qName.equalsIgnoreCase("publisher")) {
  306. String publisher = tempVal;
  307. System.out.println("OH HERRO");
  308. if (! hashtable.containsValue(publisher)) {
  309. hashtable.put(Integer.toString(hashID), publisher);
  310. hashID++;
  311. try {
  312. psInsertPublisher.setString(1, publisher);
  313. psInsertPublisher.addBatch();
  314. }catch (Exception e) {
  315. e.printStackTrace();
  316. }
  317. }return; }
  318. if (qName.equalsIgnoreCase("isbn")) {
  319. tempDoc.setISBN(tempVal);
  320. } if (qName.equalsIgnoreCase("url")) {
  321. tempDoc.setURL(tempVal);
  322. } if (qName.equalsIgnoreCase("pages")) {
  323. tempDoc.setPages(tempVal);
  324. } if (qName.equalsIgnoreCase("address")) {
  325. tempDoc.setAddress(tempVal);
  326. } if (qName.equalsIgnoreCase("journal")) {
  327. tempDoc.setJournal(tempVal);
  328. } if (qName.equalsIgnoreCase("volume")) {
  329. tempDoc.setVolume(tempVal);
  330. } if (qName.equalsIgnoreCase("number")) {
  331. tempDoc.setNumber(tempVal);
  332. } if (qName.equalsIgnoreCase("month")) {
  333. tempDoc.setMonth(tempVal);
  334. } if (qName.equalsIgnoreCase("ee")) {
  335. tempDoc.setEE(tempVal);
  336. } if (qName.equalsIgnoreCase("cdrom")) {
  337. tempDoc.setCDRom(tempVal);
  338. } if (qName.equalsIgnoreCase("cite")) {
  339. tempDoc.setCite(tempVal);
  340. } if (qName.equalsIgnoreCase("note")) {
  341. tempDoc.setNote(tempVal);
  342. } if (qName.equalsIgnoreCase("crossref")) {
  343. tempDoc.setCrossref(tempVal);
  344. } if (qName.equalsIgnoreCase("series")) {
  345. tempDoc.setSeries(tempVal);
  346. } if (qName.equalsIgnoreCase("school")) {
  347. tempDoc.setSchool(tempVal);
  348. } if (qName.equalsIgnoreCase("chapter")) {
  349. tempDoc.setChapter(tempVal);
  350. }
  351. // }
  352. // else if (qName.equalsIgnoreCase("editor")) {
  353. //// tempDoc.setEditor(tempVal);
  354. // String editor = tempVal;
  355. // if (! hashtable.containsValue(editor)) {
  356. // hashtable.put(Integer.toString(hashID), editor);
  357. // hashID++;
  358. // try {
  359. // psInsertPeople.setString(1, editor+" Editor");
  360. // psInsertPeople.addBatch();
  361. // }catch (Exception e) {
  362. // e.printStackTrace();
  363. // }
  364. // }
  365. // }
  366.  
  367.  
  368. }
  369.  
  370.  
  371. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement