Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package foo.bar;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.FileOutputStream;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import org.apache.commons.lang3.StringUtils;
- import org.openrdf.model.Resource;
- import org.openrdf.model.Statement;
- import org.openrdf.model.URI;
- import org.openrdf.model.impl.StatementImpl;
- import org.openrdf.model.impl.URIImpl;
- import org.openrdf.repository.Repository;
- import org.openrdf.repository.RepositoryConnection;
- import org.openrdf.repository.RepositoryException;
- import org.openrdf.repository.sail.SailRepository;
- import org.openrdf.rio.RDFHandler;
- import org.openrdf.rio.RDFHandlerException;
- import org.openrdf.rio.ntriples.NTriplesWriter;
- import org.openrdf.rio.rdfxml.RDFXMLWriter;
- import org.openrdf.sail.memory.MemoryStore;
- import org.openrdf.sail.nativerdf.NativeStore;
- import static foo.bar.Environment.*;
- public class PLDReducer {
- public static void main(String[] args) throws IOException, RepositoryException, RDFHandlerException {
- BufferedReader br = new BufferedReader(new FileReader("all_graphs.nt"));
- String line;
- int i = 0;
- int httpoff, slashoff, dots, dotoff;
- HashMap<String, List<String>> mappings = new HashMap<String, List<String>>();
- try{
- while ((line = br.readLine()) != null){
- i++;
- line = line.replace("\"", "");
- String oldLine = line;
- if (line.startsWith("http://")) httpoff = 7;
- else if (line.startsWith("https://")) httpoff = 8;
- else throw new IllegalArgumentException("Not a URI: " + line);
- slashoff = line.indexOf("/", httpoff);
- dots = StringUtils.countMatches(line.substring(0, slashoff), ".");
- dotoff = dots > 1 ? line.indexOf(".") + 1 : httpoff;
- // line = line.substring(0, httpoff) + line.substring(dotoff, slashoff);
- line = SDATASETS + line.substring(dotoff, slashoff);
- if(!mappings.containsKey(line)){
- ArrayList<String> newL = new ArrayList<String>();
- newL.add(oldLine);
- mappings.put(line, newL);
- }
- else {
- mappings.get(line).add(oldLine);
- }
- if(i%100000 == 0) System.out.print(".");
- //if(i > 1000) break;
- }
- }
- finally {
- br.close();
- }
- System.out.println("\nTotal URIs: " + i);
- System.out.println("Total keys: " + mappings.keySet().size());
- storeMappings(mappings);
- // printMappings(mappings);
- }
- private static void storeMappings(HashMap<String, List<String>> mappings) throws RepositoryException, RDFHandlerException, FileNotFoundException {
- File dataDir = new File("repositories/mappingrepo");
- Repository repo = new SailRepository( new NativeStore(dataDir) );
- repo.initialize();
- List<Statement> statements = new ArrayList<Statement>();
- URI pred = new URIImpl(PR_GRAPH);
- URI a = new URIImpl(RDFTYPE);
- URI ds = new URIImpl(TP_DATASET);
- int count = 0;
- for(String key : mappings.keySet()){
- URI subj = new URIImpl(key);
- List<String> mappedURIs = mappings.get(key);
- statements.add(new StatementImpl(subj, a, ds));
- for (String mappedURI : mappedURIs) {
- statements.add(new StatementImpl(subj, pred, new URIImpl(mappedURI)));
- }
- System.out.println(++count);
- if(count%50 == 0 || true){
- RepositoryConnection con = repo.getConnection();
- con.add(statements, (Resource) null);
- con.commit();
- statements.clear();
- con.close();
- }
- }
- RepositoryConnection con = repo.getConnection();
- con.add(statements, (Resource) null);
- con.commit();
- RDFHandler rdfxmlWriter = new NTriplesWriter(new FileOutputStream("foooo"));
- con.export(rdfxmlWriter, (Resource)null);
- con.close();
- }
- private static void printMappings(HashMap<String, List<String>> mappings) {
- for(String key : mappings.keySet()){
- List<String> mappedURIs = mappings.get(key);
- System.out.println(key);
- System.out.println("----------------------");
- for (String mappedURI : mappedURIs) {
- System.out.println(" " + mappedURI);
- }
- System.out.println();
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement