Advertisement
Regional_Push

3 answert

Feb 17th, 2021
771
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.86 KB | None | 0 0
  1. # The folder can be unzipped in bash using - unzip 'folder name here'. Just make sure you have unzip installed.
  2. # Run the unzip command in bash once, if it does not run just type 'sudo apt-get install unzip'
  3.  
  4. #note for paul, I had to do this the long winded way. I will try and set a reminder to talk to you about trying to find a shorter method
  5.  
  6. import os # this is the module that I need to open the file
  7.  
  8. def bar_code_length(): # this is the function that checks for all the bar code length
  9.  
  10.     list_of_less_than_five = [] # setting up a list to do hold the abnormal fasta files if necessary
  11.  
  12.     os.chdir('C:\\Users\\dhaka\\OneDrive\\Desktop\\Exam one\\pauls_dna_seqs\\pauls_dna_seqs') # you will have to change the directory to your file location
  13.  
  14.     for i in os.listdir(): # opens all the files in the directory
  15.  
  16.         if i.endswith('.fasta'): # to open only fasta file
  17.  
  18.             file_pointer = open(i) # assign file pointer and open the file
  19.  
  20.             read_file_first_line = file_pointer.readline() # reads the first line
  21.  
  22.             read_file_first_line = read_file_first_line.strip('> ') # removes the unwanted '>'
  23.  
  24.             read_file_first_line = read_file_first_line.strip('\n') # removes the new line character
  25.  
  26.             read_file_first_line = list(read_file_first_line) # makes a list of the first line
  27.  
  28.             if len(read_file_first_line) < 5: # checks the length of the first line
  29.  
  30.                 list_of_less_than_five.append(i) # append if the lenght in less than five
  31.  
  32.     if len(list_of_less_than_five) == 0: # if all the files are okay then it just makes it just types that indeed all the files are okay
  33.  
  34.         print('1.Seems all of them have five bases\n')
  35.  
  36.     elif len(list_of_less_than_five) != 0: # if the list does receive something then lets us know which one is the abnormal file
  37.         print('These are the files that have less than five bases in the bar code\n')
  38.         print(list_of_less_than_five)  
  39.                
  40. def sequence_lenght(): # this function checks to see if all the sequences are long enough
  41.  
  42.     list_of_less_than_fifty = [] # setting up a list to do hold the abnormal fasta files if necessary
  43.  
  44.     os.chdir('C:\\Users\\dhaka\\OneDrive\\Desktop\\Exam one\\pauls_dna_seqs\\pauls_dna_seqs') # change this to your local file
  45.  
  46.     for i in os.listdir():  # opens all the files in the directory
  47.  
  48.         if i.endswith('.fasta'):# to open only fasta file
  49.  
  50.             file_pointer = open(i) # assign file pointer and open the file
  51.  
  52.             file_pointer.readline()  # reads the first line and skips it
  53.  
  54.             read_file_second_line = file_pointer.readline() # reads the second line
  55.  
  56.             read_file_second_line = read_file_second_line.strip('\n')# removes the new line character
  57.  
  58.             read_file_second_line = list(read_file_second_line)# makes a list of the second line
  59.  
  60.             if len(read_file_second_line) < 50: # checks if the second line in long enough
  61.  
  62.                 list_of_less_than_fifty.append(i) # if not passes it to the list that holds aberrant fies
  63.    
  64.     if len(list_of_less_than_fifty) == 0: # if not files are aberrant then we let the user know so
  65.        
  66.         print('2.Seems all of them have fifty bases\n')
  67.  
  68.     elif len(list_of_less_than_fifty) != 0: # if their are abnormal files, this lets the user know
  69.  
  70.         print('2.These are the files that have less than fifty bases in the sequence\n')
  71.         print(list_of_less_than_fifty)
  72.  
  73. def non_canonical_bases(): # the function to look for non canonical bases
  74.     list_of_non_canonical_based = []
  75.  
  76.     os.chdir('C:\\Users\\dhaka\\OneDrive\\Desktop\\Exam one\\pauls_dna_seqs\\pauls_dna_seqs') # set this to your local file
  77.  
  78.     for i in os.listdir(): # opens folder and loops through it  
  79.  
  80.         if i.endswith('fasta'): # makes sure they are fasta
  81.  
  82.             file_pointer  = open(i) # opens file
  83.  
  84.             file_pointer.readline() # skips first line
  85.  
  86.             read_second_line = file_pointer.readline() # reads second line
  87.  
  88.             read_second_line = read_second_line.strip('\n') # removes what we do not want
  89.  
  90.             read_second_line = list(read_second_line) # makes a list
  91.  
  92.             for my_base in read_second_line: # loops through the first line
  93.                
  94.                 if my_base == 'N': # if any of the bases are N then append it to the list
  95.                     list_of_non_canonical_based.append(i)
  96.  
  97.     if len(list_of_non_canonical_based) == 0: # if not non canonical bases were found then the list will be zero
  98.         print('3.Seems non of them have non canonical bases\n')
  99.  
  100.     elif len(list_of_non_canonical_based) != 0:
  101.         print('3.Here are the fasta files with non canonical bases\n')
  102.  
  103.         print(list_of_non_canonical_based)
  104.  
  105. bar_code_length() # these let the user know of the result
  106.  
  107. sequence_lenght()
  108.  
  109. non_canonical_bases()
  110.  
  111.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement