Advertisement
Guest User

Untitled

a guest
Jun 22nd, 2013
221
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 4.09 KB | None | 0 0
  1. package foo.bar;
  2.  
  3. import java.io.BufferedReader;
  4. import java.io.File;
  5. import java.io.FileNotFoundException;
  6. import java.io.FileOutputStream;
  7. import java.io.FileReader;
  8. import java.io.IOException;
  9. import java.util.ArrayList;
  10. import java.util.HashMap;
  11. import java.util.List;
  12. import org.apache.commons.lang3.StringUtils;
  13. import org.openrdf.model.Resource;
  14. import org.openrdf.model.Statement;
  15. import org.openrdf.model.URI;
  16. import org.openrdf.model.impl.StatementImpl;
  17. import org.openrdf.model.impl.URIImpl;
  18. import org.openrdf.repository.Repository;
  19. import org.openrdf.repository.RepositoryConnection;
  20. import org.openrdf.repository.RepositoryException;
  21. import org.openrdf.repository.sail.SailRepository;
  22. import org.openrdf.rio.RDFHandler;
  23. import org.openrdf.rio.RDFHandlerException;
  24. import org.openrdf.rio.ntriples.NTriplesWriter;
  25. import org.openrdf.rio.rdfxml.RDFXMLWriter;
  26. import org.openrdf.sail.memory.MemoryStore;
  27. import org.openrdf.sail.nativerdf.NativeStore;
  28.  
  29. import static foo.bar.Environment.*;
  30.  
  31. public class PLDReducer {
  32.  
  33.     public static void main(String[] args) throws IOException, RepositoryException, RDFHandlerException {
  34.         BufferedReader br = new BufferedReader(new FileReader("all_graphs.nt"));
  35.         String line;
  36.         int i = 0;
  37.         int httpoff, slashoff, dots, dotoff;
  38.         HashMap<String, List<String>> mappings = new HashMap<String, List<String>>();
  39.         try{
  40.             while ((line = br.readLine()) != null){
  41.                 i++;
  42.                 line = line.replace("\"", "");
  43.                 String oldLine = line;
  44.                
  45.                 if (line.startsWith("http://")) httpoff = 7;
  46.                 else if (line.startsWith("https://")) httpoff = 8;
  47.                 else throw new IllegalArgumentException("Not a URI: " + line);
  48.                 slashoff = line.indexOf("/", httpoff);
  49.                
  50.                 dots = StringUtils.countMatches(line.substring(0, slashoff), ".");
  51.                
  52.                 dotoff = dots > 1 ? line.indexOf(".") + 1 : httpoff;
  53. //              line = line.substring(0, httpoff) + line.substring(dotoff, slashoff);
  54.                 line = SDATASETS + line.substring(dotoff, slashoff);
  55.                 if(!mappings.containsKey(line)){
  56.                     ArrayList<String> newL = new ArrayList<String>();
  57.                     newL.add(oldLine);
  58.                     mappings.put(line, newL);
  59.                 }
  60.                 else {
  61.                     mappings.get(line).add(oldLine);
  62.                 }
  63.                 if(i%100000 == 0) System.out.print(".");
  64.                 //if(i > 1000) break;
  65.             }
  66.         }
  67.         finally {
  68.             br.close();
  69.         }
  70.         System.out.println("\nTotal URIs: " + i);
  71.         System.out.println("Total keys: " + mappings.keySet().size());
  72.         storeMappings(mappings);
  73. //      printMappings(mappings);
  74.     }
  75.    
  76.     private static void storeMappings(HashMap<String, List<String>> mappings) throws RepositoryException, RDFHandlerException, FileNotFoundException {
  77.         File dataDir = new File("repositories/mappingrepo");
  78.         Repository repo = new SailRepository( new NativeStore(dataDir) );
  79.         repo.initialize();
  80.         List<Statement> statements = new ArrayList<Statement>();
  81.         URI pred = new URIImpl(PR_GRAPH);
  82.         URI a = new URIImpl(RDFTYPE);
  83.         URI ds = new URIImpl(TP_DATASET);
  84.         int count = 0;
  85.         for(String key : mappings.keySet()){
  86.             URI subj = new URIImpl(key);
  87.             List<String> mappedURIs = mappings.get(key);
  88.             statements.add(new StatementImpl(subj, a, ds));
  89.             for (String mappedURI : mappedURIs) {
  90.                 statements.add(new StatementImpl(subj, pred, new URIImpl(mappedURI)));
  91.             }
  92.             System.out.println(++count);
  93.             if(count%50 == 0 || true){
  94.                 RepositoryConnection con = repo.getConnection();
  95.                 con.add(statements, (Resource) null);
  96.                 con.commit();
  97.                 statements.clear();
  98.                 con.close();
  99.             }
  100.            
  101.            
  102.         }
  103.         RepositoryConnection con = repo.getConnection();
  104.         con.add(statements, (Resource) null);
  105.         con.commit();
  106.         RDFHandler rdfxmlWriter = new NTriplesWriter(new FileOutputStream("foooo"));
  107.         con.export(rdfxmlWriter, (Resource)null);
  108.         con.close();
  109.     }
  110.    
  111.    
  112.  
  113.     private static void printMappings(HashMap<String, List<String>> mappings) {
  114.         for(String key : mappings.keySet()){
  115.             List<String> mappedURIs = mappings.get(key);
  116.             System.out.println(key);
  117.             System.out.println("----------------------");
  118.             for (String mappedURI : mappedURIs) {
  119.                 System.out.println("    " + mappedURI);
  120.             }
  121.             System.out.println();
  122.         }
  123.     }
  124. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement