Guest User

Untitled

a guest
Feb 25th, 2018
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.47 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # Created by: Lee Bergstrand
  3. # Descript: Converts multiline FASTAs to single line FASTAs
  4. #
  5. # Usage: FastaMLtoSL.py <sequences.faa>
  6. # Example: FastaMLtoSL.py mySeqs.faa
  7. #----------------------------------------------------------------------------------------
  8. #===========================================================================================================
  9. #Imports:
  10.  
  11. import sys
  12. import re
  13. #===========================================================================================================
  14. # Functions:
  15.  
  16. # 1: Checks if in proper number of arguments are passed gives instructions on proper use.
  17. def argsCheck(numArgs):
  18. if len(sys.argv) < numArgs or len(sys.argv) > numArgs:
  19. print("Converts multiline FASTAs to single line FASTAs")
  20. print("By Lee Bergstrand\n")
  21. print("Usage: " + sys.argv[0] + " <sequences.fasta>")
  22. print("Examples: " + sys.argv[0] + " mySeqs.fasta")
  23. exit(1) # Aborts program. (exit(1) indicates that an error occurred)
  24. #===========================================================================================================
  25. # Main program code:
  26.  
  27. # House keeping...
  28. argsCheck(2) # Checks if the number of arguments are correct.
  29.  
  30. # Stores file one for input checking.
  31. inFile = sys.argv[1]
  32. outFile = inFile + ".out"
  33.  
  34. print(">> Opening FASTA file...")
  35. # Reads sequence file list and stores it as a string object. Safely closes file:
  36. try:
  37. with open(inFile,"r") as newFile:
  38. sequences = newFile.read()
  39. sequences = re.split("^>", sequences, flags=re.MULTILINE) # Only splits string at the start of a line.
  40. del sequences[0] # The first fasta in the file is split into an empty empty element and and the first fasta
  41. # Del removes this empty element.
  42. newFile.close()
  43. except IOError:
  44. print("Failed to open " + inFile)
  45. exit(1)
  46.  
  47. print(">> Converting FASTA file from multiline to single line and writing to file.")
  48. # Conversts multiline fasta to single line. Writes new fasta to file.
  49. try:
  50. with open(outFile,"w") as newFasta:
  51. for fasta in sequences:
  52. try:
  53. header, sequence = fasta.split("\n", 1) # Split each fasta into header and sequence.
  54. except ValueError:
  55. print(fasta)
  56. header = ">" + header + "\n" # Replace ">" lost in ">" split, Replace "\n" lost in split directly above.
  57. sequence = sequence.replace("\n","") + "\n" # Replace newlines in sequence, remember to add one to the end.
  58. newFasta.write(header + sequence)
  59. newFasta.close()
  60. except IOError:
  61. print("Failed to open " + inFile)
  62. exit(1)
  63.  
  64. print(">> Done!")
Add Comment
Please, Sign In to add comment