Advertisement
brucewhealton

SimpleGraph Python In-Memory TripleStore

Jul 25th, 2014
306
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.57 KB | None | 0 0
  1. import csv
  2.  
  3. class SimpleGraph:
  4.     def __init__(self):
  5.         self._spo = {}
  6.         self._pos = {}
  7.         self._osp = {}
  8.        
  9.     def add(self, (sub, pred, obj)):
  10.         """
  11.        Adds a triple to the graph.
  12.        """
  13.         self._addToIndex(self._spo, sub, pred, obj)
  14.         self._addToIndex(self._pos, pred, obj, sub)
  15.         self._addToIndex(self._osp, obj, sub, pred)
  16.        
  17.     def _addToIndex(self, index, a, b, c):
  18.         """
  19.        Adds a triple to a specified index.
  20.        """
  21.         if a not in index: index[a] = {b:set([c])}
  22.         else:
  23.             if b not in index[a]: index[a][b] = set([c])
  24.             else: index[a][b].add(c)
  25.  
  26.     def remove(self, (sub, pred, obj)):
  27.         """
  28.        Remove a triple pattern from the graph.
  29.        """
  30.         triples = list(self.triples((sub, pred, obj)))
  31.         for (delSub, delPred, delObj) in triples:
  32.             self._removeFromIndex(self._spo, delSub, delPred, delObj)
  33.             self._removeFromIndex(self._pos, delPred, delObj, delSub)
  34.             self._removeFromIndex(self._osp, delObj, delSub, delPred)
  35.  
  36.     def _removeFromIndex(self, index, a, b, c):
  37.         """
  38.        Removes a triple from an index and clears up empty indermediate structures.
  39.        """
  40.         try:
  41.             bs = index[a]
  42.             cset = bs[b]
  43.             cset.remove(c)
  44.             if len(cset) == 0: del bs[b]
  45.             if len(bs) == 0: del index[a]
  46.         # KeyErrors occur if a term was missing, which means that it wasn't a valid delete:
  47.         except KeyError:
  48.             pass
  49.  
  50.     def triples(self, (sub, pred, obj)):
  51.         """
  52.        Generator over the triple store.
  53.        Returns triples that match the given triple pattern.
  54.        """
  55.         # check which terms are present in order to use the correct index:
  56.         try:
  57.             if sub != None:
  58.                 if pred != None:
  59.                     # sub pred obj
  60.                     if obj != None:
  61.                         if obj in self._spo[sub][pred]: yield (sub, pred, obj)
  62.                     # sub pred None
  63.                     else:
  64.                         for retObj in self._spo[sub][pred]: yield (sub, pred, retObj)
  65.                 else:
  66.                     # sub None obj
  67.                     if obj != None:
  68.                         for retPred in self._osp[obj][sub]: yield (sub, retPred, obj)
  69.                     # sub None None
  70.                     else:
  71.                         for retPred, objSet in self._spo[sub].items():
  72.                             for retObj in objSet:
  73.                                 yield (sub, retPred, retObj)
  74.             else:
  75.                 if pred != None:
  76.                     # None pred obj
  77.                     if obj != None:
  78.                         for retSub in self._pos[pred][obj]:
  79.                             yield (retSub, pred, obj)
  80.                     # None pred None
  81.                     else:
  82.                         for retObj, subSet in self._pos[pred].items():
  83.                             for retSub in subSet:
  84.                                 yield (retSub, pred, retObj)
  85.                 else:
  86.                     # None None obj
  87.                     if obj != None:
  88.                         for retSub, predSet in self._osp[obj].items():
  89.                             for retPred in predSet:
  90.                                 yield (retSub, retPred, obj)
  91.                     # None None None
  92.                     else:
  93.                         for retSub, predSet in self._spo.items():
  94.                             for retPred, objSet in predSet.items():
  95.                                 for retObj in objSet:
  96.                                     yield (retSub, retPred, retObj)
  97.         # KeyErrors occur if a query term wasn't in the index, so we yield nothing:
  98.         except KeyError:
  99.             pass
  100.            
  101.     def value(self, sub=None, pred=None, obj=None):
  102.         for retSub, retPred, retObj in self.triples((sub, pred, obj)):
  103.             if sub is None: return retSub
  104.             if pred is None: return retPred
  105.             if obj is None: return retObj
  106.             break
  107.         return None
  108.  
  109.     def load(self, filename):
  110.         f = open(filename, "rb")
  111.         reader = csv.reader(f)
  112.         for sub, pred, obj in reader:
  113.             sub = unicode(sub, "UTF-8")
  114.             pred = unicode(pred, "UTF-8")
  115.             obj = unicode(obj, "UTF-8")
  116.             self.add((sub, pred, obj))
  117.         f.close()
  118.  
  119.     def save(self, filename):
  120.         f = open(filename, "wb")
  121.         writer = csv.writer(f)
  122.         for sub, pred, obj in self.triples((None, None, None)):
  123.             writer.writerow([sub.encode("UTF-8"), pred.encode("UTF-8"), obj.encode("UTF-8")])
  124.         f.close()
  125.  
  126. if __name__ == "__main__":
  127.     g = SimpleGraph()
  128.     g.add(("blade_runner", "name", "Blade Runner"))
  129.     g.add(("blade_runner", "name", "Blade Runner"))
  130.     g.add(("blade_runner", "release_date", "June 25, 1982"))
  131.     g.add(("blade_runner", "directed_by", "Ridley Scott"))
  132.    
  133.     print list(g.triples((None, None, None)))
  134.     print list(g.triples(("blade_runner", None, None)))
  135.     print list(g.triples(("blade_runner", "name", None)))
  136.     print list(g.triples(("blade_runner", "name", "Blade Runner")))
  137.     print list(g.triples(("blade_runner", None, "Blade Runner")))
  138.     print list(g.triples((None, "name", "Blade Runner")))
  139.     print list(g.triples((None, None, "Blade Runner")))
  140.  
  141.     print list(g.triples(("foo", "name", "Blade Runner")))
  142.     print list(g.triples(("blade_runner", "foo", "Blade Runner")))
  143.     print list(g.triples(("blade_runner", "name", "foo")))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement