Advertisement
Guest User

Untitled

a guest
Jan 26th, 2016
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.33 KB | None | 0 0
  1. public class RegexRemoverMain {
  2. public static void main(String[] args) throws SQLException, ClassNotFoundException{
  3. //Connection Parameters and Connect to Postgres Database
  4. String data = "jdbc:postgresql://localhost:5432/postgres";
  5. Class.forName("org.postgresql.Driver");
  6. Connection conn = null;
  7. //Connect to DB
  8. conn = DriverManager.getConnection(
  9. data, "postgres", "raha1986");
  10. //statements to get distinct owners
  11. Statement ownerSt = conn.createStatement(
  12. ResultSet.TYPE_SCROLL_INSENSITIVE,
  13. ResultSet.CONCUR_UPDATABLE);
  14. //statement to get Image Ids of a user
  15. Statement ownersImagesIdsSt = conn.createStatement(
  16. ResultSet.TYPE_SCROLL_INSENSITIVE,
  17. ResultSet.CONCUR_UPDATABLE);
  18. String insertSQL;
  19. //an arraylist to store unique titles+tags reported by user
  20. ArrayList<List<String>> result = new ArrayList<List<String>>();
  21. //list for storing those Ids of a users which are filtered
  22. List<String> filteredIds = new ArrayList<String>();
  23. //list for storing those Ids of a users which are kept
  24. List<String> ids = new ArrayList<String>();
  25.  
  26. //get the list of all the users
  27. ResultSet distinctOwner = ownerSt.executeQuery("select distinct owner from "flickrData_bulkUploadedFree"");
  28. distinctOwner.last();
  29. distinctOwner.beforeFirst();
  30. int count=0;
  31. //RegularExpression Pattern
  32. String theRegex= "((DSC)?(dsc)?(img)?(IMG)?(\s?)(\_?)((\-?))[0-9]{1,9})";
  33. Pattern checkRegex = Pattern.compile(theRegex);
  34. //loop is going through all user's Images and check whether their the titles is one of the patterns if yes, check their title+description which are unique or not
  35. //if yes, we keep them; if not; we throw them away or store in another place
  36. while(distinctOwner.next()){
  37. count = count++;
  38. Statement insertSt = conn.createStatement(
  39. ResultSet.TYPE_SCROLL_INSENSITIVE,
  40. ResultSet.CONCUR_UPDATABLE);
  41. //store filtered images
  42. String insertString = "INSERT INTO regexIamges"
  43. + "( id , owner, descriptio, title, tags) VALUES"
  44. + "(?,?,?,?,?)";
  45. PreparedStatement preparedStatement = conn.prepareStatement(insertString);
  46. //for each user exist in "flickrData_bulkUploadedFree"
  47. String owner = distinctOwner.getString("owner");
  48. ResultSet ownersImages;
  49. ownersImages = ownersImagesIdsSt.executeQuery("select id, title, tags, descriptio from "flickrData_bulkUploadedFree" where owner = '" + owner +"';");
  50. ownersImages.last();
  51. ownersImages.beforeFirst();
  52. //an list of images of a user's with the information about id, title, tags and descriptions in order to find unique Images
  53. ArrayList<List<String>> bulkUploadList = new ArrayList<List<String>>();
  54. while(ownersImages.next()){
  55. String id = ownersImages.getString("id");
  56. String title = ownersImages.getString("title");
  57. String tags = ownersImages.getString("tags");
  58. String description = ownersImages.getString("descriptio");
  59. Matcher regexMatcher = checkRegex.matcher(title);
  60. if (regexMatcher.find()){
  61. if(regexMatcher.group().length() != 0){
  62. List<String> rowsList = new ArrayList<String>();
  63. rowsList.add(id);
  64. rowsList.add(title);
  65. rowsList.add(tags);
  66. rowsList.add(description);
  67. bulkUploadList.add(rowsList);
  68. bulkUploadList.add(rowsList);
  69. }
  70. }
  71. else{
  72. insertSQL = "INSERT INTO "regBulkfreeFlickrData" SELECT * FROM "flickrData_bulkUploadedFree" where id ='"+id+"';";
  73. insertSt.addBatch(insertSQL);
  74. }
  75. }
  76. HashSet<String> hashSet = new HashSet<String>();
  77. for(List<String> item : bulkUploadList) {
  78. String title, tags, id, desc, uniqueString;
  79. title = item.get(1);
  80. tags = item.get(2);
  81. id = item.get(0);
  82. desc = item.get(3);
  83. uniqueString = (tags + "#" + desc).trim().toUpperCase();
  84. System.out.println(item);
  85. if(!hashSet.contains(uniqueString)) {
  86. result.add(item);
  87. hashSet.add(uniqueString);
  88. insertSQL = "INSERT INTO "regBulkfreeFlickrData" SELECT * FROM "flickrData_bulkUploadedFree" where id ='"+id+"';";
  89. insertSt.addBatch(insertSQL);
  90. } else {
  91. // System.out.println("Filtered element " + uniqueString + "id " + id);
  92. filteredIds.add(id);
  93. preparedStatement.setString(1, id);
  94. preparedStatement.setString(2, owner);
  95. preparedStatement.setString(3, desc);
  96. preparedStatement.setString(4, title);
  97. preparedStatement.setString(5, tags);
  98. preparedStatement.addBatch();
  99. }
  100. }
  101.  
  102. preparedStatement.executeBatch();
  103. preparedStatement.close();
  104. insertSt.executeBatch();
  105. insertSt.close();
  106. }
  107. }
  108.  
  109. Exception in thread "main" java.sql.BatchUpdateException: Batch entry 0 INSERT INTO regexIamges( id , owner, descriptio, title, tags) VALUES('4292220054.0000000000000','23352125@N07','NoValue','IMG_2720','NoValue') was aborted. Call getNextException to see the cause.
  110. at org.postgresql.jdbc2.AbstractJdbc2Statement$BatchResultHandler.handleError(AbstractJdbc2Statement.java:2743)
  111. at org.postgresql.core.v3.QueryExecutorImpl.processResults(QueryExecutorImpl.java:1928)
  112. at org.postgresql.core.v3.QueryExecutorImpl.execute(QueryExecutorImpl.java:405)
  113. at org.postgresql.jdbc2.AbstractJdbc2Statement.executeBatch(AbstractJdbc2Statement.java:2892)
  114. at uzh.textmining.RegexRemoverMain.main(RegexRemoverMain.java:116)
  115. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  116. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  117. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  118. at java.lang.reflect.Method.invoke(Method.java:497)
  119. at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134)
  120.  
  121. CREATE TABLE "RegexImages"
  122. (id numeric,
  123. owner character varying(254),
  124. descriptio character varying(254),
  125. title character varying(254),
  126. tags character varying(254),
  127. PRIMARY KEY (id)
  128. )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement