Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # DNA.py
- # Heidi Heffelfinger
- # October 17, 2012
- # Learn to Program - Coursera - University of Toronto
- # Week 4 Assignment
- # Python 3.2
- # This program examines DNA sequences, checks for validity, and makes
- # modifications to DNA sequences.
- def get_length(dna):
- ''' (str) -> int
- Return the length of the DNA sequence dna.
- >>> get_length('ATCGAT')
- 6
- >>> get_length('ATCG')
- 4
- >>> get_length('CGATAGCT')
- 8
- >>> get_length('CG')
- 2
- '''
- return len(dna)
- def is_longer(dna1, dna2):
- ''' (str, str) -> bool
- Return True if and only if DNA sequence dna1 is longer than DNA sequence
- dna2.
- >>> is_longer('ATCG', 'AT')
- True
- >>> is_longer('ATCG', 'ATCGGA')
- False
- >>> is_longer('ATCG', 'CGATAGCT')
- False
- >>> is_longer('ATCGAT', 'CGA')
- True
- '''
- return get_length(dna1) > get_length(dna2)
- def count_nucleotides(dna, nucleotide):
- ''' (str, str) -> int
- Return the number of occurrences of nucleotide in the DNA sequence dna.
- >>> count_nucleotides('ATCGGC', 'G')
- 2
- >>> count_nucleotides('ATCTA', 'G')
- 0
- >>> count_nucleotides('CGATAGCT', 'T')
- 2
- >>> count_nucleotides('ATCGGA', 'C')
- 1
- '''
- return dna.count(nucleotide)
- def contains_sequence(dna1, dna2):
- ''' (str, str) -> bool
- Return True if and only if DNA sequence dna2 occurs in the DNA sequence
- dna1.
- >>> contains_sequence('ATCGGC', 'GG')
- True
- >>> contains_sequence('ATCGGC', 'GT')
- False
- >>> contains_sequence('ATCGGC', 'CG')
- True
- >>> contains_sequence('ATCGGC', 'AT')
- True
- >>> contains_sequence('ATCGGC', 'AG')
- False
- '''
- return dna2 in dna1
- def is_valid_sequence(dna):
- ''' (str) -> bool
- Return True if dna sequence is valid, containing only nucleotide
- characters: 'A', 'T', 'C' and 'G'.
- >>> is_valid_sequence('ATCGGC')
- True
- >>> is_valid_sequence('ATcGGC')
- False
- >>> is_valid_sequence('CGATAGCT')
- True
- >>> is_valid_sequence('ABCDEFG')
- False
- >>> is_valid_sequence('atcggc')
- False
- >>> is_valid_sequence('CGATAGCT')
- True
- >>> is_valid_sequence('A')
- True
- '''
- dna_nucleotides = 'ATCG'
- for nucleotide in dna:
- if nucleotide not in dna_nucleotides:
- return False
- return True
- def insert_sequence(dna1, dna2, insertion):
- '''(str, str, int) -> str
- Returns DNA sequence obtained by inserting dna2 into dna1 at insertion point.
- >>> insert_sequence('CTGATAGCT', 'ATCGGC', 2)
- CTATCGGCGATAGCT
- >>> insert_sequence('ATCGGC', 'CG', 4)
- ATCGCGGC
- >>> insert_sequence('ATCGGC', 'TA', 1)
- ATATCGGC
- '''
- new_dna = dna1[:insertion] + dna2 + dna1[insertion:]
- return new_dna
- def get_complement(nucleotide):
- '''(str) -> str
- Returns complement (A, T, C, G) of nucleotide (A, T, C, G) where A is
- complement of T (& vice-versa) and C is complement of G (& vice versa).
- Precondition - nucleotide must be A, T, C, or G.
- >>> get_complement('A')
- T
- >>> get_complement('T')
- A
- >>> get_complement('C')
- G
- >>> get_complement('G')
- C
- '''
- if nucleotide == 'A':
- return 'T'
- elif nucleotide == 'T':
- return 'A'
- elif nucleotide == 'C':
- return 'G'
- elif nucleotide == 'G':
- return 'C'
- def get_complementary_sequence(dna):
- '''(str) -> str
- Returns DNA sequence that is complementary to given DNA sequence of dna.
- >>> get_complementary_sequence('ACGTACG')
- TGCATGC
- >>> get_complementary_sequence('CGATAGCT')
- GCTATCGA
- >>> get_complementary_sequence('ATCG')
- TAGC
- >>> get_complementary_sequence('ATCTA')
- TAGAT
- '''
- complement = ''
- for nucleotide in dna:
- complement += get_complement(nucleotide)
- return complement
Advertisement
Add Comment
Please, Sign In to add comment