Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public class RegexRemoverMain {
- public static void main(String[] args) throws SQLException, ClassNotFoundException{
- //Connection Parameters and Connect to Postgres Database
- String data = "jdbc:postgresql://localhost:5432/postgres";
- Class.forName("org.postgresql.Driver");
- Connection conn = null;
- //Connect to DB
- conn = DriverManager.getConnection(
- data, "postgres", "raha1986");
- //statements to get distinct owners
- Statement ownerSt = conn.createStatement(
- ResultSet.TYPE_SCROLL_INSENSITIVE,
- ResultSet.CONCUR_UPDATABLE);
- //statement to get Image Ids of a user
- Statement ownersImagesIdsSt = conn.createStatement(
- ResultSet.TYPE_SCROLL_INSENSITIVE,
- ResultSet.CONCUR_UPDATABLE);
- String insertSQL;
- //an arraylist to store unique titles+tags reported by user
- ArrayList<List<String>> result = new ArrayList<List<String>>();
- //list for storing those Ids of a users which are filtered
- List<String> filteredIds = new ArrayList<String>();
- //list for storing those Ids of a users which are kept
- List<String> ids = new ArrayList<String>();
- //get the list of all the users
- ResultSet distinctOwner = ownerSt.executeQuery("select distinct owner from "flickrData_bulkUploadedFree"");
- distinctOwner.last();
- distinctOwner.beforeFirst();
- int count=0;
- //RegularExpression Pattern
- String theRegex= "((DSC)?(dsc)?(img)?(IMG)?(\s?)(\_?)((\-?))[0-9]{1,9})";
- Pattern checkRegex = Pattern.compile(theRegex);
- //loop is going through all user's Images and check whether their the titles is one of the patterns if yes, check their title+description which are unique or not
- //if yes, we keep them; if not; we throw them away or store in another place
- while(distinctOwner.next()){
- count = count++;
- Statement insertSt = conn.createStatement(
- ResultSet.TYPE_SCROLL_INSENSITIVE,
- ResultSet.CONCUR_UPDATABLE);
- //store filtered images
- String insertString = "INSERT INTO regexIamges"
- + "( id , owner, descriptio, title, tags) VALUES"
- + "(?,?,?,?,?)";
- PreparedStatement preparedStatement = conn.prepareStatement(insertString);
- //for each user exist in "flickrData_bulkUploadedFree"
- String owner = distinctOwner.getString("owner");
- ResultSet ownersImages;
- ownersImages = ownersImagesIdsSt.executeQuery("select id, title, tags, descriptio from "flickrData_bulkUploadedFree" where owner = '" + owner +"';");
- ownersImages.last();
- ownersImages.beforeFirst();
- //an list of images of a user's with the information about id, title, tags and descriptions in order to find unique Images
- ArrayList<List<String>> bulkUploadList = new ArrayList<List<String>>();
- while(ownersImages.next()){
- String id = ownersImages.getString("id");
- String title = ownersImages.getString("title");
- String tags = ownersImages.getString("tags");
- String description = ownersImages.getString("descriptio");
- Matcher regexMatcher = checkRegex.matcher(title);
- if (regexMatcher.find()){
- if(regexMatcher.group().length() != 0){
- List<String> rowsList = new ArrayList<String>();
- rowsList.add(id);
- rowsList.add(title);
- rowsList.add(tags);
- rowsList.add(description);
- bulkUploadList.add(rowsList);
- bulkUploadList.add(rowsList);
- }
- }
- else{
- insertSQL = "INSERT INTO "regBulkfreeFlickrData" SELECT * FROM "flickrData_bulkUploadedFree" where id ='"+id+"';";
- insertSt.addBatch(insertSQL);
- }
- }
- HashSet<String> hashSet = new HashSet<String>();
- for(List<String> item : bulkUploadList) {
- String title, tags, id, desc, uniqueString;
- title = item.get(1);
- tags = item.get(2);
- id = item.get(0);
- desc = item.get(3);
- uniqueString = (tags + "#" + desc).trim().toUpperCase();
- System.out.println(item);
- if(!hashSet.contains(uniqueString)) {
- result.add(item);
- hashSet.add(uniqueString);
- insertSQL = "INSERT INTO "regBulkfreeFlickrData" SELECT * FROM "flickrData_bulkUploadedFree" where id ='"+id+"';";
- insertSt.addBatch(insertSQL);
- } else {
- // System.out.println("Filtered element " + uniqueString + "id " + id);
- filteredIds.add(id);
- preparedStatement.setString(1, id);
- preparedStatement.setString(2, owner);
- preparedStatement.setString(3, desc);
- preparedStatement.setString(4, title);
- preparedStatement.setString(5, tags);
- preparedStatement.addBatch();
- }
- }
- preparedStatement.executeBatch();
- preparedStatement.close();
- insertSt.executeBatch();
- insertSt.close();
- }
- }
- Exception in thread "main" java.sql.BatchUpdateException: Batch entry 0 INSERT INTO regexIamges( id , owner, descriptio, title, tags) VALUES('4292220054.0000000000000','23352125@N07','NoValue','IMG_2720','NoValue') was aborted. Call getNextException to see the cause.
- at org.postgresql.jdbc2.AbstractJdbc2Statement$BatchResultHandler.handleError(AbstractJdbc2Statement.java:2743)
- at org.postgresql.core.v3.QueryExecutorImpl.processResults(QueryExecutorImpl.java:1928)
- at org.postgresql.core.v3.QueryExecutorImpl.execute(QueryExecutorImpl.java:405)
- at org.postgresql.jdbc2.AbstractJdbc2Statement.executeBatch(AbstractJdbc2Statement.java:2892)
- at uzh.textmining.RegexRemoverMain.main(RegexRemoverMain.java:116)
- at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
- at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
- at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- at java.lang.reflect.Method.invoke(Method.java:497)
- at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134)
- CREATE TABLE "RegexImages"
- (id numeric,
- owner character varying(254),
- descriptio character varying(254),
- title character varying(254),
- tags character varying(254),
- PRIMARY KEY (id)
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement