Advertisement
Guest User

listen.py

a guest
Sep 19th, 2019
163
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.14 KB | None | 0 0
  1. #-------------------------------------------------------------------------------
  2. # Name:        listex
  3. # Purpose: list data type helper functions that operate like relational
  4. #   algebra expressions such as union, intersection, etc. these are List to
  5. #   List functions e.g. [1,2,3] insersection [0,2,4] -> [2]
  6. #   Each function takes at least one list as argument(la) and a sort parameter
  7. #    (sa). If the list is already sorted (all of these functions result in a
  8. #   sorted list), about 1/2 processing time can be saved for subsequent
  9. #   functions. The result is always a list.
  10. #-------------------------------------------------------------------------------
  11.  
  12. def iter_intersection(a, b, key=None):
  13.     """intersect(a, b) -> elements from a that are also in b
  14.  
  15.    Note that both iterables should be sorted (with the same key function
  16.    passed here), for the same reason as with groupby.
  17.    """
  18.     a, b = iter(a), iter(b)
  19.     try:
  20.         nexta = next(a)
  21.         keya = key(nexta) if key else nexta
  22.         nextb = next(b)
  23.         keyb = key(nextb) if key else nextb
  24.         while True:
  25.             if keya == keyb:
  26.                 yield nexta
  27.                 nexta = next(a)
  28.                 keya = key(nexta) if key else nexta
  29.                 # NOTE: If you wanted to add flags for multiset
  30.                 # intersection or pure set intersection, this is
  31.                 # where you'd add the change.
  32.             elif keya < keyb:
  33.                 nexta = next(a)
  34.                 keya = key(nexta) if key else nexta
  35.             else:
  36.                 nextb = next(b)
  37.                 keyb = key(nextb) if key else nextb
  38.     except StopIteration:
  39.         return
  40.  
  41. #intersection
  42. # in A and B
  43. def list_intersection(la, lb, sa=True,sb=True):
  44.     """return list of values in both A and B
  45.    """
  46.     # return list of values in both lists
  47.     if sa:
  48.         la = sorted(la)
  49.     if sb:
  50.         lb = sorted(lb)
  51.  
  52.     lena = len(la)
  53.     lenb = len(lb)
  54.     ia = 0
  55.     ib = 0
  56.  
  57.     res = []
  58.     while(ia<lena and ib< lenb):
  59.         #print("{} == {}".format(la[ia],lb[ib]))
  60.         if (la[ia] < lb[ib]):
  61.             ia += 1
  62.         elif (la[ia] > lb[ib]):
  63.             ib += 1
  64.         else:
  65.             res.append(la[ia])
  66.             ia += 1
  67.             ib += 1
  68.  
  69.     return res
  70.  
  71. #symetric difference
  72. # in A XOR B
  73. def list_symdif(la, lb, sa=True,sb=True):
  74.    """return list of values in A XOR B
  75.   """
  76.    # return list of values in A XOR B
  77.    if sa:
  78.      la = sorted(la)
  79.    if sb:
  80.      lb = sorted(lb)
  81.  
  82.    lena = len(la)
  83.    lenb = len(lb)
  84.    ia = 0
  85.    ib = 0
  86.    res = []
  87.  
  88.    #head
  89.    if (la[0] < lb[0]):
  90.      while(ia<lena and la[ia] < lb[0]):
  91.          #print("{} == {}".format(la[ia],lb[ib]))
  92.          res.append(la[ia])
  93.          ia += 1
  94.    elif (la[0] > lb[0]):
  95.      while(ib < lenb and lb[ib] < la[0]):
  96.          #print("{} == {}".format(la[ia],lb[ib]))
  97.          res.append(lb[ib])
  98.          ib += 1
  99.  
  100.    #body
  101.    while(ia<lena and ib< lenb):
  102.      #print("{} == {}".format(la[ia],lb[ib]))
  103.      if (la[ia] < lb[ib]):
  104.          res.append(la[ia])
  105.          ia += 1
  106.      elif (la[ia] > lb[ib]):
  107.          res.append(lb[ib])
  108.          ib += 1
  109.      else:
  110.          ia += 1
  111.          ib += 1
  112.  
  113.    #tail
  114.    if (ia==lena):
  115.      for j in range(ib,lenb):
  116.          res.append(lb[j])
  117.    elif (ib==lenb):
  118.      for j in range(ia,lena):
  119.          res.append(la[j])
  120.  
  121.    return res
  122.  
  123. # UNION
  124. # in A OR B
  125. def list_union(la, lb, sa=True,sb=True):
  126.     """return list of values in A OR B
  127.    """
  128.     # return list of values in A OR B
  129.     if sa:
  130.         la = sorted(la)
  131.     if sb:
  132.         lb = sorted(lb)
  133.  
  134.     lena = len(la)
  135.     lenb = len(lb)
  136.     ia = 0
  137.     ib = 0
  138.     res = []
  139.  
  140.     #head
  141.     if lena>0 and lenb>0:
  142.         if (la[0] < lb[0]):
  143.             while(ia<lena and la[ia] < lb[0]):
  144.                 #print("{} == {}".format(la[ia],lb[ib]))
  145.                 res.append(la[ia])
  146.                 ia += 1
  147.         elif (la[0] > lb[0]):
  148.             while(ib < lenb and lb[ib] < la[0]):
  149.                 #print("{} == {}".format(la[ia],lb[ib]))
  150.                 res.append(lb[ib])
  151.                 ib += 1
  152.  
  153.     #body
  154.     while(ia<lena and ib< lenb):
  155.         #print("{} == {}".format(la[ia],lb[ib]))
  156.         if (la[ia] < lb[ib]):
  157.             res.append(la[ia])
  158.             ia += 1
  159.         elif (la[ia] > lb[ib]):
  160.             res.append(lb[ib])
  161.             ib += 1
  162.         else:
  163.             res.append(la[ia])
  164.             ia += 1
  165.             ib += 1
  166.  
  167.     #tail
  168.     if (ia==lena):
  169.         for j in range(ib,lenb):
  170.             res.append(lb[j])
  171.     elif (ib==lenb):
  172.         for j in range(ia,lena):
  173.             res.append(la[j])
  174.  
  175.     return res
  176. #end union()
  177.  
  178.  
  179. #relative complement
  180. # in A and not B
  181. def list_relcomp(la, lb, sa=True,sb=True):
  182.     """return list of values in A and not B
  183.    """
  184.     # return list of values in A but not B
  185.     if sa:
  186.         la = sorted(la)
  187.     if sb:
  188.         lb = sorted(lb)
  189.  
  190.     lena = len(la)
  191.     lenb = len(lb)
  192.     ia = 0
  193.     ib = 0
  194.  
  195.     res = []
  196.     while(ia<lena and ib< lenb):
  197.         #print("{} == {}".format(la[ia],lb[ib]))
  198.         if (la[ia] < lb[ib]):
  199.             res.append(la[ia])
  200.             ia += 1
  201.         elif (la[ia] > lb[ib]):
  202.             ib += 1
  203.         else:
  204.             ia += 1
  205.             ib += 1
  206.     #tail
  207.     if (ib==lenb):
  208.         for j in range(ia,lena):
  209.             res.append(la[j])
  210.  
  211.     return res
  212.  
  213.  
  214. def list_unique(la,sa=True):
  215.     """return list of unique values in A
  216.      sa - sort values in la first
  217.    """
  218.     if sa:
  219.         la = sorted(la)
  220.  
  221.     lena = len(la)
  222.     ia = 0
  223.     prev = la[ia]
  224.     res = []
  225.     res.append(prev)
  226.  
  227.     while(ia<lena):
  228.         if (la[ia] > prev):
  229.             res.append(la[ia])
  230.             prev = la[ia]
  231.             ia += 1
  232.         else:
  233.             ia += 1
  234.  
  235.     return res
  236.  
  237.  
  238.  
  239. #-------------------------------------------------------------------------------
  240. #       Performance Test Section
  241. #-------------------------------------------------------------------------------
  242. def niave_relcomp(a,b):
  243.  
  244.     res = [x for x in a if x not in b]
  245.     return res
  246.    
  247.    
  248. def relcomp_set(a, b):
  249.     aset = a
  250.     bset = b
  251.    
  252.     res = aset - bset
  253.     res= list(res)
  254.     return res
  255.  
  256.    
  257. def test(nA, nB, relationtype):
  258.     """time test functions.
  259.        relationtype 1=B subset of A, 2=A not in B
  260.    """
  261.     import collections    
  262.     import random
  263.     import timeit
  264.  
  265.     if relationtype==1:
  266.         print("\nFor A subset of B:")
  267.         A = ["string"+str(x) for x in range(0,nA)]
  268.         B = ["string"+str(x) for x in range(0,nB)]
  269.     elif relationtype==2:
  270.         print("\nFor A not in B:")
  271.         A = ["string"+str(x) for x in range(0,nA)]
  272.         B = ["strnng"+str(x) for x in range(0,nB)]
  273.     elif relationtype==3:
  274.         print("\nFor A not in B:")
  275.         A = [0x7EF83945641E0000|x for x in range(0,nA)]
  276.         B = [0x7EF43945641E0000|x for x in range(1,nB+1)]
  277.  
  278.     random.shuffle(A)
  279.     random.shuffle(B)
  280.          
  281.     print("trying A {0}, B {1}".format(len(A),len(B)))
  282.  
  283.     t = timeit.timeit(lambda: set(A).intersection(B), number=100)
  284.     print("set test", t)
  285.  
  286.     seta, setb = set(A), set(B)
  287.     t = timeit.timeit(lambda: seta&setb, number=1000)
  288.     print("prepared set test", t)
  289.  
  290.     t = timeit.timeit(lambda: list_intersection(A, B), number=100)
  291.     print("listex test", t)
  292.  
  293.     sorta, sortb = sorted(A), sorted(B)
  294.     t = timeit.timeit(lambda: list_intersection(sorta, sortb, False, False), number=1000)
  295.     print("prepared listex test", t)
  296.  
  297.     def consume(it): collections.deque(it, maxlen=0)
  298.     t = timeit.timeit(lambda: consume(iter_intersection(sorta, sortb)), number=1000)
  299.     print("iterable test", t)
  300.  
  301.    
  302. def main():
  303.     """run some performance tests on relative compare
  304.    Note the built in set functions are implemented in C so this isn't really fair
  305.    but seems to be useful for comparing large data sets.
  306.    Naive implementation may be extremely slow so there is an option to skip
  307.    those.
  308.    """
  309.  
  310.     test(1000,5000,2)
  311.  
  312.     test(10000,50000,2)
  313.  
  314.     test(100000,500000,1)
  315.  
  316.     test(100,500,2)
  317.  
  318.  
  319. if __name__ == '__main__':
  320.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement