Guest User

Untitled

a guest
Oct 18th, 2018
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.35 KB | None | 0 0
  1. import gzip, zipfile, bz2, io
  2.  
  3. def read_compressed(filename):
  4. """
  5. Opens the file in read mode with appropriate decompression algorithm.
  6. """
  7.  
  8. # Standard header bytes for diff compression formats
  9. comp_bytes = {
  10. b"\x1f\x8b\x08": "gz",
  11. b"\x42\x5a\x68": "bz2",
  12. b"\x50\x4b\x03\x04": "zip"
  13. }
  14.  
  15. max_len = max(len(x) for x in comp_bytes)
  16.  
  17. def file_type(filename):
  18. """
  19. Compare header bytes with those in the file and return type.
  20. """
  21. with open(filename, 'rb') as f:
  22. file_start = f.read(max_len)
  23. for magic, filetype in comp_bytes.items():
  24. if file_start.startswith(magic):
  25. return filetype
  26. return "uncompressed"
  27.  
  28. # Open file with appropriate function
  29. comp = file_type(filename)
  30. if comp == 'gz':
  31. return gzip.open(filename, 'rt')
  32. elif comp == 'bz2':
  33. return bz2.BZ2File(filename, 'rt')
  34. elif comp == 'zip':
  35. zip_arch = zipfile.ZipFile(filename, 'r')
  36. if len(zip_arch.namelist()) > 1:
  37. raise IOError("Only a single fastq file must be in the zip archive.")
  38. else:
  39. # ZipFile open as bytes by default, using io to read as text
  40. zip_content = zip_arch.open(zip_arch.namelist()[0], 'r')
  41. return io.TextIOWrapper(zip_content)
  42. else :
  43. return open(filename, 'r')
Add Comment
Please, Sign In to add comment