Thursday, March 28, 2013

Two different ways to translate mRNA to Protein


from Bio.Seq import Seq
from Bio.Alphabet import IUPAC


def translate(mRNA):
    '''(str) -> str

    input is mRNA string and it returns a the corresponding protein string.

    >>>translate('AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA')
    MAMPRTEINSTRING

    Precondition: mRNA has to be Uppercase and include only GAUC
   
    '''
    #IUPAC.unambiguous_rna includes only Uppercase and GAUC
    messenger_rna = Seq(mRNA, IUPAC.unambiguous_rna)
    #translate using a dict already stored in the method
    protein = messenger_rna.translate()

    print protein


--------------------------------------------

def translate_2(mRNA):
    '''(str) -> str

    input is mRNA string and it returns a the corresponding protein string.

    >>>translate('AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA')
    MAMPRTEINSTRING
   
    '''
    #this string will be used to make a dict
    translation_table = '''UUU F      CUU L      AUU I      GUU V
    UUC F      CUC L      AUC I      GUC V
    UUA L      CUA L      AUA I      GUA V
    UUG L      CUG L      AUG M      GUG V
    UCU S      CCU P      ACU T      GCU A
    UCC S      CCC P      ACC T      GCC A
    UCA S      CCA P      ACA T      GCA A
    UCG S      CCG P      ACG T      GCG A
    UAU Y      CAU H      AAU N      GAU D
    UAC Y      CAC H      AAC N      GAC D
    UAA Stop   CAA Q      AAA K      GAA E
    UAG Stop   CAG Q      AAG K      GAG E
    UGU C      CGU R      AGU S      GGU G
    UGC C      CGC R      AGC S      GGC G
    UGA Stop   CGA R      AGA R      GGA G
    UGG W      CGG R      AGG R      GGG G'''

    #Make a list of the above string and remove all spaces or '/n
    translation_list =  translation_table.split()
    #Make dictionary from list
    translation_dict = dict(zip(translation_list[0::2], translation_list[1::2]))

    #Accumulator variable
    protein = ''
    for aa in range(0, len(mRNA)-3, 3):
        protein += translation_dict[mRNA[aa:aa+3]]

    print protein