Guest User

Untitled

a guest
Feb 22nd, 2018
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.50 KB | None | 0 0
  1. #!/usr/local/bin/python3
  2. import os
  3. import argparse
  4. import pickle
  5. import time
  6. from typing import Set, Iterable
  7.  
  8. import chainlet
  9. from chainlet.concurrency import threads
  10.  
  11. CLI = argparse.ArgumentParser('Clean an ALICE SE Namespace based on a whitelist')
  12. CLI.add_argument(
  13. 'WHITELIST',
  14. help='whitelist base path',
  15. type=lambda val: bytes(val, 'utf-8'),
  16. )
  17. CLI.add_argument(
  18. 'SEBASE',
  19. help='SE namespace base path',
  20. type=lambda val: bytes(val, 'utf-8'),
  21. )
  22. CLI.add_argument(
  23. '--ignore-after',
  24. help='ignore any file created after this epoch date',
  25. default=1512082800,
  26. )
  27.  
  28.  
  29. @chainlet.forklet
  30. @chainlet.genlet(prime=False)
  31. def walk_namespace(se_base_path: bytes):
  32. for base_path in (('%02d' % base).encode() for base in range(16)):
  33. # we only need to clean up files that exist
  34. yield (
  35. os.path.join(base_path, mid_path)
  36. for mid_path
  37. in os.listdir(os.path.join(se_base_path, base_path))
  38. )
  39.  
  40.  
  41. @chainlet.funclet
  42. def whitelist_files(value: bytes, whitelist_path: bytes, se_base_path: bytes):
  43. relative_path = value
  44. try:
  45. with open(os.path.join(whitelist_path, relative_path), 'rb') as whitelist_pickle:
  46. whitelist = pickle.load(whitelist_pickle) # type: Set[bytes]
  47. except FileNotFoundError:
  48. whitelist = set()
  49. try:
  50. file_list = os.listdir(os.path.join(se_base_path, relative_path))
  51. except FileNotFoundError:
  52. raise chainlet.StopTraversal
  53. else:
  54. return (os.path.join(relative_path, file_path) for file_path in file_list if file_path not in whitelist)
  55.  
  56.  
  57. @chainlet.forklet
  58. @chainlet.funclet
  59. def cull_new(value: Iterable[bytes], se_base_path: bytes, ignore_after: int):
  60. file_paths = value
  61. for rel_path in file_paths:
  62. file_path = (os.path.join(se_base_path, rel_path))
  63. file_stat = os.stat(file_path)
  64. if file_stat.st_mtime < ignore_after:
  65. print(
  66. rel_path.decode(), '%dB' % file_stat.st_size,
  67. time.strftime('%Y-%m-%d %H:%M:%S %Z', time.localtime(file_stat.st_mtime)), sep=','
  68. )
  69. yield rel_path
  70.  
  71.  
  72. def main():
  73. options = CLI.parse_args()
  74. se_base, whitelist, ignore_after = options.SEBASE, options.WHITELIST, options.ignore_after
  75. chain = walk_namespace(se_base_path=se_base) >> threads(
  76. whitelist_files(whitelist_path=whitelist, se_base_path=se_base) >> cull_new(se_base_path=se_base, ignore_after=ignore_after)
  77. )
  78. print(chain)
  79. for _ in chain:
  80. pass
  81.  
  82.  
  83. if __name__ == '__main__':
  84. main()
Add Comment
Please, Sign In to add comment