How can I make the following comparison without having to write 20 if-statements or compile 20 lists / dictionaries?

This problem is related to biology, so for those who know what amino acids and codons are, it's great! For those who do not, I tried my best to expound this so that you can understand what I'm talking about.

So, I have a list of codons, I can also name 3-letter strings, which consist of a combination of the following four letters: A, G, C, T, i.e. AAT, GAT, GCT, etc. Each codon corresponds to a specific amino acid, but there are multiple codons that can correspond to the same amino acid. To illustrate this, follow this link: http://www.cbs.dtu.dk/courses/27619/codon.html . This should make it clear.

For each codon on my list, I want to ultimately find out which amino acid corresponds to it. Therefore, I have to make the program first compare this codon with the list of codons (64 full possible codons) that I posted the link to, and then I have to ask the program to see which amino acid corresponds to this codon. However, I cannot understand the quick access method for this without having to make a list for all the codons corresponding to a given amino acid and compare them or write 20 different if statements.

The list of codons that I have is called mutated_codon. So, I will need to create an “for,” where the program compares each codon in my mutated_codon list and compares it with a dictionary and displays the corresponding letter of the amino acid. What code should I write for this? I am not familiar with the syntax used to check values ​​in a dictionary.

Here is what I have done so far based on the suggestions:

codon_lookup = {'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'TGT': 'C', 'TGC': ' 'C', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'TTT': 'F', 'TTC': 'F ',' GGT ':' G ',' GGC ':' G ',' GGA ':' G ',' GGG ':' G ',' CAT ':' H ',' CAC ':' H ',' "ATT": "" , "ATC": "" , "ATA": "" , "AAA": "K", "AAG": "K", "TTA": "L", "TTG" ':' L ',' CTT ':' L ',' CTC ':' L ',' CTA ':' L ',' CTG ':' L ',' ATG ':' M ',' AAT ':' 'N', 'AAC': 'N', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAA': 'Q ',' CAG ':' Q ​​',' CGT ':' R ',' CGC ':' R ',' CGA ':' R ',' CGG ':' R ',' AGA ':' R ',' 'AGG': 'R', 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'AGT': 'S', 'AGC ':' S ',' ACT ':' T ',' ACC ':' T ',' ACA ':' T ',' ACG ':' T ',' GTT ':' V ',' GTC ':' 'V', 'GTA': 'V', 'GTG': 'V', 'TGG' = 'W', 'TAT': 'Y', 'TAC': 'Y', 'TAA': 'Z ',' TAG ':' Z ',' TGA ':' Z '}

for c in mutated_codon:
   print codon_lookup[c]

, , , , , KeyError: 4. , ?

+4
5

, . , , , . FoldBird , , , .

codon_lookup, mutated_codon, -

for codon in mutated_codon:
    print codon_lookup[codon]

, Python 3.X:

for codon in mutated_codon:
    print(codon_lookup[codon])

Python, mutated_codons, , . mutated_codon .

, , , , , . - :

acids = [codon_lookup[codon] for codon in mutated_codon]

for, :

acids = []
for codon in mutated_codon:
    acids.append(codon_lookup[codon])

. , , .

, , :

for acid in acids:
    print acid

.

, , - get . , KeyError, codon_lookup. , - , - , , mutated_codon, , , , . - , , get . . , . , ( ) None ( ). , , , , - :

for codon in mutated_codon:
    print codon_lookup.get(codon, '*')

biopython, , docs translate - :

>>> from Bio.Seq import Seq
>>> from Bio.Alphabet import generic_dna
>>> coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG", generic_dna)
>>> coding_dna.translate()
Seq('MAIVMGR*KGAR*', HasStopCodon(ExtendedIUPACProtein(), '*'))

, , . , , , , .

+3

:

codon_lookup = {
    'ATT':'Isoleucine',
    'ATC':'Isoleucine', 
    'ATA':'Isoleucine',
    'CTT':'Leucine',
    'CTC':'Leucine', 
    'CTA':'Leucine',
     # ... etc
} 

,

codon_lookup['ATT']

'Isoleucine'

EDIT:

:

codon_lookup = {
    'ATT':'I',
    'ATC':'I', 
    'ATA':'I',
    'CTT':'L',
    'CTC':'L', 
    'CTA':'L',
     # ... etc
} 

,

codon_lookup['ATT']

'I'

mutated_condons , . mutated_condons ['ACA','GTT',...], :

for mutated_codon in mutated_condons:
    print codon_lookup[mutated_codon]
+11

, , , , .

: SLC Codon (s) Codon to SLC.

slc_codon = {
    'I': ['ATT', 'ATC', 'ATA'],
    'L': ['CTT', 'CTC', 'CTA', 'CTG', 'TTA', 'TTG'],
    'V': ['GTT', 'GTC', 'GTA', 'GTG'],
    'F': ['TTT', 'TTC'],
    'M': ['ATG'],
    'C': ['TGT', 'TGC'],
    'A': ['GCT', 'GCC', 'GCA', 'GCG'],
    'G': ['GGT', 'GGC', 'GGA', 'GGG'],
    'P': ['CCT', 'CCC', 'CCA', 'CCG'],
    'T': ['ACT', 'ACC', 'ACA', 'ACG'],
    'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
    'Y': ['TAT', 'TAC'],
    'W': ['TGG'],
    'Q': ['CAA', 'CAG'],
    'N': ['AAT', 'AAC'],
    'H': ['CAT', 'CAC'],
    'E': ['GAA', 'GAG'],
    'D': ['GAT', 'GAC'],
    'K': ['AAA', 'AAG'],
    'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
    '*': ['TAA', 'TAG', 'TGA'],
}

codon_slc = dict((x, k) for k, v in slc_codon.items() for x in v)

>>> print codon_slc
>>> {'CTT': 'L', 'ATG': 'M', 'AAG': 'K', 'AAA': 'K', 'ATC': 'I', 'AAC': 'N', 'ATA': 'I', 'AGG': 'R', 'CCT': 'P', 'ACT': 'T', 'AGC': 'S', 'ACA': 'T', 'AGA': 'R', 'CAT': 'H', 'AAT': 'N', 'ATT': 'I', 'CTG': 'L', 'CTA': 'L', 'CTC': 'L', 'CAC': 'H', 'ACG': 'T', 'CAA': 'Q', 'AGT': 'S', 'CAG': 'Q', 'CCG': 'P', 'CCC': 'P', 'TAT': 'Y', 'GGT': 'G', 'TGT': 'C', 'CGA': 'R', 'CCA': 'P', 'TCT': 'S', 'GAT': 'D', 'CGG': 'R', 'TTT': 'F', 'TGC': 'C', 'GGG': 'G', 'TAG': '*', 'GGA': 'G', 'TAA': '*', 'GGC': 'G', 'TAC': 'Y', 'GAG': 'E', 'TCG': 'S', 'TTA': 'L', 'GAC': 'D', 'TCC': 'S', 'GAA': 'E', 'TCA': 'S', 'GCA': 'A', 'GTA': 'V', 'GCC': 'A', 'GTC': 'V', 'GCG': 'A', 'GTG': 'V', 'TTC': 'F', 'GTT': 'V', 'GCT': 'A', 'ACC': 'T', 'TGA': '*', 'TTG': 'L', 'CGT': 'R', 'TGG': 'W', 'CGC': 'R'}
+5

( ) , :

table =
    [
        {
            'amino_acid': 'Isoleucine',
            'codons': [ 'ATT', 'ATC', 'ATA' ]
        },
        {
            'amino_acid': 'Leucine',
            'codons': [ 'CTT', 'CTC', 'CTA', 'CTG', 'TTA', 'TTG' ]
        },
        ...etc...
    ]

, :

lookup = dict(itertools.chain.from_iterable(
    [[(codon, row['amino_acid']) for codon in row['codons']] for row in table]))

, lookup['TTA'] 'Leucine'.

, , , , . , , .

+2

, , http://www.cbs.dtu.dk/courses/27619/codon.html .

- , . -, . , ; , , - , . .

, HTML, . , ( , HTML CSS ..), , - , , . , , BeautifulSoup. ( , , .)

import urllib2
import bs4

url='http://www.cbs.dtu.dk/courses/27619/codon.html'
page = urllib2.urlopen(url)
soup = bs4.BeautifulSoup(page)
codon_lookup = {}
for row in soup.find_all('tr')[2:-1]:
    amino, slc, codons = (col.text.strip() for col in row.find_all('td'))
    if slc == 'Stop':
        slc = 'Z'
    for codon in codons.split(','):
        codon_lookup[codon.strip()] = slc

, , , , , . pickle ( JSON, , ). :

import pickle
with open('codons.pickle', 'wb') as f:
    pickle.dump(codon_lookup, f)

Now, in your main program, you simply start with:

with open('codons.pickle', 'rb') as f:
    codon_lookup = pickle.load(f)
+1
source

All Articles