Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- b_data=[('example',123),('example-one',456),('example',987),.....]
- blockslst=[]
- for line in b_data:
- blockslst.append(line[0])
- blocklstgtone=[]
- for item in blockslst:
- if(blockslst.count(item)>1):
- blocklstgtone.append(item)
- from collections import Counter
- counts = Counter(x[0] for x in b_data)
- print(counts['example'])
- print(counts['foo'])
- print(counts.most_common(n))
- from itertools import takewhile
- l = [1, 1, 2, 2, 3, 3, 1, 1, 5, 4, 6, 7, 7, 8, 3, 3, 2, 1]
- c = Counter(l)
- list(takewhile(lambda x: x[-1] > 1, c.most_common()))
- [(1, 5), (3, 4), (2, 3), (7, 2)]
- [item[0] for item in counts.most_common() if item[-1] > 1]
- print(list(map(lambda x:x[0],b_data)).count('example'))
- 2
- b_data = [('example', 123), ('example-one', 456), ('example', 987)]
- dict_1={}
- for i in b_data:
- if i[0] not in dict_1:
- dict_1[i[0]]=1
- else:
- dict_1[i[0]]+=1
- print(dict_1)
- print(list(filter(lambda y:y!=None,(map(lambda x:(x,dict_1.get(x)) if dict_1.get(x)>1 else None,dict_1.keys())))))
- [('example', 2)]
- b_data = [('example', 123), ('example-one', 456), ('example', 987),('example-one', 456),('example-one', 456),('example-two', 456),('example-two', 456),('example-two', 456),('example-two', 456)]
- [('example-two', 4), ('example-one', 3), ('example', 2)]
- from collections import Counter
- import random
- from datetime import datetime # good enough for a loong running op
- dt_datagen = datetime.now()
- numberOfKeys = 100000
- # basis for testdata
- textData = ["example", "pose", "text","someone"]
- numData = [random.randint(100,1000) for _ in range(1,10)] # irrelevant
- # create random testdata from above lists
- tData = [(random.choice(textData)+str(a%10),random.choice(numData)) for a in range(numberOfKeys)]
- tData.append(("aaa",99))
- dt_dictioning = datetime.now()
- # create a dict
- countEm = {}
- # put all your data into dict, counting them
- for p in tData:
- if p[0] in countEm:
- countEm[p[0]] += 1
- else:
- countEm[p[0]] = 1
- dt_filtering = datetime.now()
- #comparison result-wise (commented out)
- #counts = Counter(x[0] for x in tData)
- #for c in sorted(counts):
- # print(c, " = ", counts[c])
- #print()
- # output dict if count > 1
- subList = [x for x in countEm if countEm[x] > 1] # without "aaa"
- dt_printing = datetime.now()
- for c in sorted(subList):
- if (countEm[c] > 1):
- print(c, " = ", countEm[c])
- dt_end = datetime.now()
- print( "nnCreating ", len(tData) , " testdataitems took:t", (dt_dictioning-dt_datagen).total_seconds(), " seconds")
- print( "Putting them into dictionary took t", (dt_filtering-dt_dictioning).total_seconds(), " seconds")
- print( "Filtering donw to those > 1 hits took t", (dt_printing-dt_filtering).total_seconds(), " seconds")
- print( "Printing all the items left took t", (dt_end-dt_printing).total_seconds(), " seconds")
- print( "nTotal time: t", (dt_end- dt_datagen).total_seconds(), " seconds" )
- # reformatted for bevity
- example0 = 2520 example1 = 2535 example2 = 2415
- example3 = 2511 example4 = 2511 example5 = 2444
- example6 = 2517 example7 = 2467 example8 = 2482
- example9 = 2501
- pose0 = 2528 pose1 = 2449 pose2 = 2520
- pose3 = 2503 pose4 = 2531 pose5 = 2546
- pose6 = 2511 pose7 = 2452 pose8 = 2538
- pose9 = 2554
- someone0 = 2498 someone1 = 2521 someone2 = 2527
- someone3 = 2456 someone4 = 2399 someone5 = 2487
- someone6 = 2463 someone7 = 2589 someone8 = 2404
- someone9 = 2543
- text0 = 2454 text1 = 2495 text2 = 2538
- text3 = 2530 text4 = 2559 text5 = 2523
- text6 = 2509 text7 = 2492 text8 = 2576
- text9 = 2402
- Creating 100001 testdataitems took: 4.728604 seconds
- Putting them into dictionary took 0.273245 seconds
- Filtering donw to those > 1 hits took 0.0 seconds
- Printing all the items left took 0.031234 seconds
- Total time: 5.033083 seconds
Add Comment
Please, Sign In to add comment