I'm taking CS50 and got stuck on this pset6.
I made this code and it's working fine for 'small' given database. On 'large' one i get wrong values in my DNA sequence. Like, using debug50 i got that Albus sequence should be 15,49,38... and my seq is 21, 55, 64...
whats wrong? AND why it works fine on small database and not in large one?
Thanks for the help!
# Import ARGV and CSV library
from sys import argv, exit
import pandas as pd
import csv
# Check if argv has 3 arguments (program name, cvs file and dna sequence)
while True:
if len(argv) != 3:
print("Usage: python dna.py data.csv sequence.txt")
exit(1)
else:
break
with open(argv[2], 'r', encoding="UTF-8") as txt:
dna_seq = txt.read()
#Find the number of STR - AGATC,TTTTTTCT,AATG,TCTAG,GATA,TATC,GAAA,TCTG
AGATC = dna_seq.count("AGATC")
TTTTTTCT = dna_seq.count("TTTTTTCT")
AATG = dna_seq.count("AATG")
TCTAG = dna_seq.count("TCTAG")
GATA = dna_seq.count("GATA")
TATC = dna_seq.count("TATC")
GAAA = dna_seq.count("GAAA")
TCTG = dna_seq.count("TCTG")
name = 0
if argv[1] == "databases/small.csv":
with open(argv[1], 'r') as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
dna_db = row['name'], row['AGATC'], row['AATG'], row['TATC']
dna_db = list(dna_db)
seq = [AGATC, AATG, TATC]
seq = [str(x) for x in seq]
if dna_db[1:4] == seq:
name = dna_db[:1]
break
else:
name = "No match"
elif argv[1] == "databases/large.csv":
with open(argv[1], 'r') as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
dna_db = row['name'], row['AGATC'], row['TTTTTTCT'], row['AATG'], row['TCTAG'],
row['GATA'], row['TATC'], row['GAAA'], row['TCTG']
dna_db = list(dna_db)
seq = [AGATC,TTTTTTCT,AATG,TCTAG,GATA,TATC,GAAA,TCTG]
seq = [str(x) for x in seq]
if dna_db[1:9] == seq:
name = dna_db[:1]
break
else:
name = "No match"
print(name)