Guest User

Untitled

a guest
May 3rd, 2016
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.54 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. from invenio.search_engine import get_collection_reclist, get_record
  4. from invenio.intbitset import intbitset
  5. from click import progressbar
  6.  
  7. collection = sys.argv[1]
  8.  
  9. recids = list(get_collection_reclist(collection))
  10. recids.reverse()
  11.  
  12. repeatable_tags = {}
  13. repeatable_subfields = {}
  14.  
  15. with progressbar(recids) as recids:
  16. for recid in recids:
  17. record = get_record(recid)
  18. records_tags = set()
  19. for tag in record:
  20. for field in record[tag]:
  21. current_tag = tag + (field[1] or ' ') + (field[2] or ' ')
  22. if current_tag in records_tags:
  23. if current_tag not in repeatable_tags:
  24. repeatable_tags[current_tag] = intbitset([recid])
  25. else:
  26. repeatable_tags[current_tag].add(recid)
  27. records_tags.add(current_tag)
  28. current_codes = set()
  29. for code, value in field[0]:
  30. if code in current_codes:
  31. current_code = current_tag + code
  32. if current_code not in repeatable_subfields:
  33. repeatable_subfields[current_code] = intbitset([recid])
  34. else:
  35. repeatable_subfields[current_code].add(recid)
  36. current_codes.add(code)
  37.  
  38. for key, value in repeatable_tags.iteritems():
  39. print "%s -> %s" % (key, ", ".join(value[-3:]))
  40.  
  41. for key, value in repeatable_subfields.iteritems():
  42. print "%s -> %s" % (key, ", ".join(value[-3:]))
Add Comment
Please, Sign In to add comment