Advertisement
Guest User

Untitled

a guest
Jul 22nd, 2013
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 2.43 KB | None | 0 0
  1. package net.foben.schematizer.parse;
  2.  
  3. import java.io.FileWriter;
  4. import java.io.IOException;
  5. import java.util.HashSet;
  6. import java.util.Set;
  7.  
  8. import org.openrdf.model.Statement;
  9. import org.openrdf.rio.RDFHandler;
  10. import org.openrdf.rio.RDFHandlerException;
  11. import org.slf4j.Logger;
  12. import org.slf4j.LoggerFactory;
  13.  
  14. public class GraphExtractHandler implements RDFHandler {
  15.     private int stcount = 0;
  16.     private Logger _log;
  17.     private Set<String> graphs;
  18.     private double now, last;
  19.     private String outfile_base;
  20.     private String outfile;
  21.     private int outcount = 0;
  22.    
  23.     public GraphExtractHandler(String filename) throws IOException{
  24.         this._log = LoggerFactory.getLogger(GraphExtractHandler.class);
  25.         this.outfile_base = filename;
  26.         this.outfile = filename + "_0";
  27.         graphs = new HashSet<String>(3000000);
  28.         last = System.nanoTime();
  29.     }
  30.    
  31.     @Override
  32.     public void startRDF() throws RDFHandlerException { }
  33.    
  34.     @Override
  35.     public void endRDF() throws RDFHandlerException {
  36.         writeOut();
  37.        
  38.     }
  39.    
  40.     private void writeOut(){
  41.         try {
  42.             String linesep = System.getProperty("line.separator");
  43.             FileWriter out = new FileWriter(outfile);
  44.             int lines = 0;
  45.             for(String graph : graphs){
  46.                 lines++;
  47.                 out.write(graph);
  48.                 out.write(linesep);
  49.                 if(lines%10000 == 0){
  50.                     _log.info("Flushing 10,000 lines");
  51.                     out.flush();
  52.                 }
  53.             }
  54.             out.flush();
  55.             out.close();
  56.         } catch (IOException e) {
  57.             _log.error("Exception occurred while writing statistics!");
  58.             _log.error(e.getStackTrace().toString());
  59.             System.exit(-1);
  60.         }
  61.         outcount++;
  62.         graphs = new HashSet<String>();
  63.         outfile = outfile_base + "_" + outcount;
  64.        
  65.     }
  66.  
  67.     @Override
  68.     public void handleComment(String arg0) throws RDFHandlerException { }
  69.  
  70.     @Override
  71.     public void handleNamespace(String arg0, String arg1)
  72.             throws RDFHandlerException { }
  73.  
  74.    
  75.     @Override
  76.     public void handleStatement(Statement arg0) throws RDFHandlerException {
  77.        
  78.         try {
  79.             stcount++;
  80.             if(stcount%1000000 == 0){
  81.                 _log.info(stcount/1000000 + " million lines parsed. Speed: " + measure());
  82.                 if(stcount%100000000 == 0) writeOut();
  83.             }
  84.            
  85.             graphs.add(arg0.getContext().stringValue());
  86.            
  87.         } catch (IllegalArgumentException ia){
  88.             _log.warn("IAE occured: " + ia.getMessage());
  89.         }
  90.     }
  91.    
  92.     private String measure() {
  93.         now = System.nanoTime();
  94.         double dur = (now - last) / 1000000000d;
  95.         String result = ("" + dur);
  96.         last = now;
  97.         return result;
  98.     }
  99.  
  100. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement