Advertisement
Gfy

sharding.py

Gfy
Apr 3rd, 2019
200
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.20 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3.  
  4. import os
  5. import sys
  6. import hashlib
  7.  
  8. def parse_and_shard(srr_releases_fn, action):
  9.     with open(srr_releases_fn) as fileobject:
  10.         for line in fileobject:
  11.             action(line.strip())
  12.  
  13. def shard_file(rls_name):
  14.     #print(repr(rls_name))
  15.     count = 0
  16.     sha1 = hashlib.sha1(rls_name.lower().encode('utf8')).hexdigest()
  17.     srr = rls_name + '.srr'
  18.     if os.path.exists(srr):
  19.         count += 1
  20.         destination = os.path.join(sha1[0], sha1[1:3], srr)
  21.         os.renames(srr, destination)
  22.         if count % 10000 == 0:
  23.             print("%d - %s" % (count, destination))
  24.    
  25. if len(sys.argv) == 1:
  26.     print("Script works on the current directory!")
  27.     print("Do the initial sorting based on list of release names:")
  28.     print("  sharding.py sharding.txt")
  29.     print("Sort all other srrs:")
  30.     print("  sharding.py --")
  31. elif (len(sys.argv) == 2 and sys.argv[1] == "--"):
  32.     # by listing all files
  33.     for f in os.listdir(os.curdir):
  34.         if f.endswith(".srr"):
  35.             shard_file(os.path.basename(f)[:-4])
  36.     print("Done.")
  37. elif len(sys.argv) == 2:
  38.     # use text file with known release names to speed up listing all files
  39.     parse_and_shard(sys.argv[1], shard_file)
  40.     print("Done.")
  41. else:
  42.     print("Parameters not understood")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement