CS50 PSET6 - DNA - Works fine on SMALL but not for LARGE database

378 Views Asked by At

I'm taking CS50 and got stuck on this pset6.

I made this code and it's working fine for 'small' given database. On 'large' one i get wrong values in my DNA sequence. Like, using debug50 i got that Albus sequence should be 15,49,38... and my seq is 21, 55, 64...

whats wrong? AND why it works fine on small database and not in large one?

Thanks for the help!

# Import ARGV and CSV library
from sys import argv, exit
import pandas as pd
import csv

# Check if argv has 3 arguments (program name, cvs file and dna sequence)
while True:
    if len(argv) != 3:
        print("Usage: python dna.py data.csv sequence.txt")
        exit(1)
    else:
        break

with open(argv[2], 'r', encoding="UTF-8") as txt:
    dna_seq = txt.read()
#Find the number of STR - AGATC,TTTTTTCT,AATG,TCTAG,GATA,TATC,GAAA,TCTG
AGATC = dna_seq.count("AGATC")
TTTTTTCT = dna_seq.count("TTTTTTCT")
AATG = dna_seq.count("AATG")
TCTAG = dna_seq.count("TCTAG")
GATA = dna_seq.count("GATA")
TATC = dna_seq.count("TATC")
GAAA = dna_seq.count("GAAA")
TCTG = dna_seq.count("TCTG")

name = 0
if argv[1] == "databases/small.csv":

    with open(argv[1], 'r') as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            dna_db = row['name'], row['AGATC'], row['AATG'], row['TATC']
            dna_db = list(dna_db)

            seq = [AGATC, AATG, TATC]
            seq = [str(x) for x in seq]

            if dna_db[1:4] == seq:
                name = dna_db[:1]
                break
            else:
                name = "No match"

elif argv[1] == "databases/large.csv":

    with open(argv[1], 'r') as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            dna_db = row['name'], row['AGATC'], row['TTTTTTCT'], row['AATG'], row['TCTAG'], 
row['GATA'], row['TATC'], row['GAAA'], row['TCTG']
            dna_db = list(dna_db)

            seq = [AGATC,TTTTTTCT,AATG,TCTAG,GATA,TATC,GAAA,TCTG]
            seq = [str(x) for x in seq]

            if dna_db[1:9] == seq:
                name = dna_db[:1]
                break
            else:
                name = "No match"

print(name)
0

There are 0 best solutions below