Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // section 207
- //10-1-2017
- #include <iostream>
- using namespace std;
- #include <string>
- /*;
- Algorithm for part 1.
- 1. Create the similarityScore function
- 2. Create two strings for the sequence inputs.
- 3. In the main function, ask the user to input the strings to be tested.
- 3. Compare the two strings, first to see if their lengths match, if not, return zero.
- 4. Calculate the Hamming distance by comparing the characters in each string , and set it to a float.
- 5. Calculate the similarity_score by taking the string length and subtracting the hamming distance,
- then dividing by the string length. Output the resultant value.
- */
- float similarityScore(string sequence1, string sequence2){
- float sequence1Length = sequence1.length();
- float sequence2Length = sequence2.length();
- if (sequence1Length != sequence2Length){
- return 0;
- }
- //compare the stings to find mismatches.
- float Mismatches = 0.0;
- for (int i=0; i< sequence1Length; i++){
- if(!(sequence1[i] == sequence2[i])){
- Mismatches+1;
- }
- }
- //Return the similarity score
- return (sequence1Length - Mismatches) / (sequence1Length);
- }
- /* Part two
- 1. Create a function named countMatches
- 2. Create two string parameters, one for the inputted genome string, and one for a string containing the sequence to find.
- 3. Create a float value to contain the minimum similarity score.
- 4. Compare the strings to each other, and compare if the total number of matches results in a score above the minimum score.\
- 5. Return the number of matches.
- */
- int countMatches(string imputGenome,string sequence, float minimumScore){
- int matches = 0;
- int sequenceLength = sequence.length();
- for (int pos = 0; pos < imputGenome.length() - sequenceLength + 1; pos+1){ //pos refers to the position of the first character in the string.
- //Set it to 0 to use the search the whole string.
- float score = similarityScore(imputGenome.substr(pos, sequenceLength),sequence);
- if(score >= minimumScore){
- matches+1;
- }
- return matches;
- }
- }
- /* Part 3
- 1. Create a function findBestMatch
- 2. Use genome and sequence as parameters, and search for similarities.
- 3. Return the best match
- */
- float findBestMatch(string genome, string seq) {
- float bestMatch = 0.0;
- for (int pos = 0; pos < genome.length(); pos+1) {
- float score = similarityScore(genome.substr(pos, seq.length()), seq);
- if (score > bestMatch) {
- bestMatch = score;
- }
- }
- return bestMatch;
- }
- /* Part 3
- 1. Create a function to compare the three inputted genomes, and output a number to indicate which one is the best match with a given sequence.
- 2. Set the first genome, second genome, third genome, and input sequence as floats, calling the earlier function to find the best match.
- 4. Compare the score of each function with the other scores of the other functions.
- 3. Return 1,2,3 or zero depending on which genome matches the given sequence.
- */
- int findBestGenome(string genome1, string genome2, string genome3, string seq) {
- float genome1Score = findBestMatch(genome1, seq);
- float genome2Score = findBestMatch(genome2, seq);
- float genome3Score = findBestMatch(genome3, seq);
- if (genome1Score > genome2Score && genome1Score > genome3Score) {
- return 1;
- }
- else if (genome2Score > genome1Score && genome2Score > genome3Score) {
- return 2;
- }
- else if (genome3Score > genome1Score && genome3Score > genome2Score) {
- return 3;
- }
- else {
- return 0; //return zero if more than one sequence has the same similarity score.
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement