SHARE
TWEET

test of list/dictionary reduction/union

dirknbr Sep 24th, 2012 81 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import time
  2. import random
  3. import itertools
  4. import collections
  5.  
  6. li1=[]
  7. li2=[]
  8. li3=[]
  9. li4=[]
  10. li5=[]
  11. di1={}
  12. di2={}
  13. di3={}
  14. di4={}
  15. di5={}
  16.  
  17. for i in range(400000):
  18.     i2=str(i)
  19.     v=random.random()
  20.     r1=random.random()
  21.     r2=random.random()
  22.     r3=random.random()
  23.     r4=random.random()
  24.     r5=random.random()
  25.    
  26.     if r1<.5: li1.append([i2,v])
  27.     if r2<.5: li2.append([i2,v])
  28.     if r3<.5: li3.append([i2,v])
  29.     if r4<.5: li4.append([i2,v])
  30.     if r5<.5: li5.append([i2,v])
  31.     if r1<.5: di1[i2]=v
  32.     if r2<.5: di2[i2]=v
  33.     if r3<.5: di3[i2]=v
  34.     if r4<.5: di4[i2]=v
  35.     if r5<.5: di5[i2]=v
  36.  
  37. #method 1
  38. start=time.time()
  39. #stack
  40. li=li1+li2+li3+li4+li5
  41. groups={}
  42. for key, group in itertools.groupby(sorted(li),lambda x:x[0]):
  43.     groups[key] = sum([y for x, y in group])
  44.    
  45. print len(groups), time.time()-start, groups.get('1',0)
  46.  
  47. #method 2
  48. start=time.time()
  49. groups={}
  50.    
  51. for k in set(di1.keys()+di2.keys()+di3.keys()+di4.keys()+di5.keys()):
  52.     groups[k]=di1.get(k,0)+di2.get(k,0)+di3.get(k,0)+di4.get(k,0)+di5.get(k,0)
  53.  
  54. print len(groups), time.time()-start, groups.get('1',0)
  55.  
  56. #method 3
  57. start=time.time()
  58.  
  59. groups=dict( (n, di1.get(n, 0)+di2.get(n, 0)+di3.get(n, 0)+di4.get(n, 0)+di5.get(n, 0))
  60.              for n in set(di1).union(set(di2)).union(set(di3)).union(set(di4)).union(set(di5)))
  61. print len(groups), time.time()-start, groups.get('1',0)
  62.  
  63. #method 4
  64. start=time.time()
  65. #stack
  66. li=li1+li2+li3+li4+li5
  67. groups={}
  68. for i in li:
  69.     groups[i[0]]=groups.get(i[0],0)+i[1]
  70.  
  71. print len(groups), time.time()-start, groups.get('1',0)
  72.  
  73. #method 5
  74. start=time.time()
  75. li=li1+li2+li3+li4+li5
  76. groups=collections.defaultdict(float)
  77. for k, v in li:
  78.     groups[k]+=v
  79.  
  80. print len(groups), time.time()-start, groups.get('1',0)
RAW Paste Data
Top