Protein Biosynthese: Open Reading Frames?
Hey, also ich habe fast files mit DNA Information. Nun wollte ich eine Funktion schreiben die folgendes umsetzt: Meine Funktion sieht so aus: complement = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'} def get_orfs(genome, frame, min_n_codons): # codons start_codon = 'ATG' stop_codons = ['TAG', 'TAA', 'TGA'] # change mode for iteration if frame.startswith('F'): sequence = genome.sequence frame_shifts = {'F1': 0, 'F2': 1, 'F3': 2} elif frame.startswith('R'): sequence = ''.join(complement[base] for base in reversed(genome.sequence)) frame_shifts = {'R1': 0, 'R2': 1, 'R3': 2} shift = frame_shifts[frame] start_index = shift orfs = [] # iterate until whole sequence is read while start_index <= len(sequence) - 3: # search for start codon if sequence[start_index:start_index + 3] == start_codon: found_stop = False # search for stop codon for i in range(start_index + 3, len(sequence) - 2, 3): # get codon codon = sequence[i:i + 3] # check if current codon is stop codon if codon in stop_codons: # calculate sequenye size codon_count = (i + 2 - start_index) // 3 if codon_count >= min_n_codons: orf_sequence = sequence[start_index:i + 3] start_pos = start_index + 1 end_pos = i + 4 orf_id = f"{frame}.{start_pos}.{end_pos}" orf = se.Sequence( id=orf_id, organism=genome.organism, type='orf', sequence=orf_sequence ) orfs.append(orf) found_stop = True # if stop sequence too close go no next stop codon start_index +=1 break if not found_stop: break else: # go to next base start_index += 1 orfs.sort(key=lambda x: int(x.id.split('.')[1])) return orfs Die Klasse sieht so aus: class Sequence: def __init__(self, id:str, organism:str, sequence:str, type:str ) -> None: self.id = id self.organism = organism self.sequence = sequence self.type = type Meine Funktion gibt mir aber deutlich zu viele orfs. Ich finde den Fehler aber nicht...
