Untitled

from sys import argv, exit
import csv
import re
if len(argv) < 3:
    print("mising command-line argument")
    exit(1)

#INSTRUCTIONS

    #1. Read the sequence file into a string (you've got this).

    #2. Open the csv file and get each sub string ("AGATC", "TCTG", etc) from the first row into a list (you've essentially done this too).

    #3. For each item in the list of sub strings, look through the sequence you read in step 1 and see what the longest sequential run is. Store this number in a list so you have the longest runs of all sub strings.

    #4. Iterate through the rest of the CSV file, comparing the numbers in each row to the list of numbers you created in step 3. When all numbers match, the name from this row is the one to print.


with open(argv[1],"r") as file, open(argv[2],"r") as csvfile:
    count = 0
    contents = file.read() #1. Read the sequence file into a string (you've got this).
    csvcontents = csv.reader(csvfile)
    #2. Open the csv file and get each sub string
    #("AGATC", "TCTG", etc) from the first row
    #into a list (you've essentially done this too).
    header = next(csvcontents)
    print("how long is header?")
    print(len(header))
    sublist = []
    print("at the moment sublist prints")
    print(sublist)
    print("now appending sublist")
    #for i in range(len(header)):
    for item in header:
     #header[i] += sublist
     sublist.append(item)
    print("sublist now prints")
    print(sublist)
    print("print(sublist[1:]) prints: ")
    print(sublist[1:])

    #3. For each item in the list of sub strings,
     #look through the sequence you read
     #in step 1 and see what the longest
     #sequential run is. Store this number
     #in a list so you have the longest runs of all sub strings.

    for item in sublist[1:]:
     beg = 0
     end = len(item)
     seqrun = 0
     if contents[beg:end] == item:
      seqrun = 1
      while contents[beg + end:end + end] == item:
       seqrun += 1
       beg += end
       end += end
     else:
      beg += 1
      end += 1
     print(item + " prints " + str(seqrun) + "times")


    subindex = 1
    sublend = len(contents[subindex])
    while contents[subindex:sublend]:
     span = contents[subindex:sublend]
     repcount = 1
     while contents[subindex+sublend: sublend + sublend] == span:
      repcount += 1
      subindex += sublend
      sublend += sublend
     if repcount > 1:
      print(span + " repeats " + str(repcount) + " times")
      subindex += sublend
      sublend += sublend
      #sublend += len(contents[subindex+sublend])
     else:
      subindex += 1
      sublend += sublend
      listindex = 1
      for item in sublist[1:]:
       if item == span:
        print("item number " + listindex + "matches")


    #sublist = []
    #for item in sublist: # this list is empty
    #charcount = len(item)
     #index = 0
     #print("item " + item + " is " + str(charcount) + " long")
     #longest = 0
     #if charcount > longest:
      #longest = charcount

     #index = 0
     #while contents[index:charcount]:
      #span = contents[index:charcount]
      #repcount = 1
      #while contents[index + charcount: charcount + charcount] == span:
       #repcount += 1


        #Store this number
        #in a list so you
        #have the longest runs of all sub strings. ... why where?

       #index += charcount
       #charcount += charcount
     #sublist.append(repcount) # is this what is meant?
    #print("sublist prints: ")
    #print(sublist)

    #print("stringcompare prints: ")
    #print(stringcompare)
       #4. Iterate through the rest of the CSV file,
       #comparing the numbers in each row to the list
       #of numbers you created in step 3.
       #When all numbers match, the name
       #from this row is the one to print.
    #for i in header:
     #n = 0
     #print("header[" + n + "] prints:")
     #print(i)
     #n += 1