🎉 Exercism Research is now launched. Help Exercism, help science and have some fun at research.exercism.io 🎉
Avatar of kevin-lindsay

kevin-lindsay's solution

to Protein Translation in the Go Track

Published at Apr 23 2021 · 0 comments
Instructions
Test suite
Solution

Translate RNA sequences into proteins.

RNA can be broken into three nucleotide sequences called codons, and then translated to a polypeptide like so:

RNA: "AUGUUUUCU" => translates to

Codons: "AUG", "UUU", "UCU" => which become a polypeptide with the following sequence =>

Protein: "Methionine", "Phenylalanine", "Serine"

There are 64 codons which in turn correspond to 20 amino acids; however, all of the codon sequences and resulting amino acids are not important in this exercise. If it works for one codon, the program should work for all of them. However, feel free to expand the list in the test suite to include them all.

There are also three terminating codons (also known as 'STOP' codons); if any of these codons are encountered (by the ribosome), all translation ends and the protein is terminated.

All subsequent codons after are ignored, like this:

RNA: "AUGUUUUCUUAAAUG" =>

Codons: "AUG", "UUU", "UCU", "UAA", "AUG" =>

Protein: "Methionine", "Phenylalanine", "Serine"

Note the stop codon "UAA" terminates the translation and the final methionine is not translated into the protein sequence.

Below are the codons and resulting Amino Acids needed for the exercise.

Codon Protein
AUG Methionine
UUU, UUC Phenylalanine
UUA, UUG Leucine
UCU, UCC, UCA, UCG Serine
UAU, UAC Tyrosine
UGU, UGC Cysteine
UGG Tryptophan
UAA, UAG, UGA STOP

Learn more about protein translation on Wikipedia

Coding the solution

Look for a stub file having the name protein_translation.go and place your solution code in that file.

Running the tests

To run the tests run the command go test from within the exercise directory.

If the test suite contains benchmarks, you can run these with the --bench and --benchmem flags:

go test -v --bench . --benchmem

Keep in mind that each reviewer will run benchmarks on a different machine, with different specs, so the results from these benchmark tests may vary.

Further information

For more detailed information about the Go track, including how to get help if you're having trouble, please visit the exercism.io Go language page.

Source

Tyler Long

Submitting Incomplete Solutions

It's possible to submit an incomplete solution so you can see how others have completed the exercise.

protein_translation_detailed_test.go

// +build detailed

package protein

import (
	"fmt"
	"testing"
)

func BenchmarkCodonDetailed(b *testing.B) {
	for _, test := range codonTestCases {
		b.Run(fmt.Sprintf("Codon%s", test.input), func(b *testing.B) {
			for i := 0; i < b.N; i++ {
				FromCodon(test.input)
			}
		})
	}
}

func BenchmarkProteinDetailed(b *testing.B) {
	for _, test := range proteinTestCases {
		b.Run(fmt.Sprintf("Protein%s", test.input), func(b *testing.B) {
			for i := 0; i < b.N; i++ {
				FromRNA(test.input)
			}
		})
	}
}

protein_translation_test.go

package protein

import (
	"reflect"
	"testing"
)

type codonCase struct {
	input         string
	expected      string
	errorExpected error
}

var codonTestCases = []codonCase{
	{
		"AUG",
		"Methionine",
		nil,
	},
	{
		"UUU",
		"Phenylalanine",
		nil,
	},
	{
		"UUC",
		"Phenylalanine",
		nil,
	},
	{
		"UUA",
		"Leucine",
		nil,
	},
	{
		"UUG",
		"Leucine",
		nil,
	},
	{
		"UCG",
		"Serine",
		nil,
	},
	{
		"UAU",
		"Tyrosine",
		nil,
	},
	{
		"UAC",
		"Tyrosine",
		nil,
	},
	{
		"UGU",
		"Cysteine",
		nil,
	},
	{
		"UGG",
		"Tryptophan",
		nil,
	},
	{
		"UAA",
		"",
		ErrStop,
	},
	{
		"UAG",
		"",
		ErrStop,
	},
	{
		"UGA",
		"",
		ErrStop,
	},
	{
		"ABC",
		"",
		ErrInvalidBase,
	},
}

func TestCodon(t *testing.T) {
	for _, test := range codonTestCases {
		actual, err := FromCodon(test.input)
		if test.errorExpected != nil {
			if test.errorExpected != err {
				t.Fatalf("FAIL: Protein translation test: %s\nExpected error: %q\nActual error: %q",
					test.input, test.errorExpected, err)
			}
		} else if err != nil {
			t.Fatalf("FAIL: Protein translation test: %s\nExpected: %s\nGot error: %q",
				test.input, test.expected, err)
		}
		if actual != test.expected {
			t.Fatalf("FAIL: Protein translation test: %s\nExpected: %s\nActual: %s",
				test.input, test.expected, actual)
		}
		t.Logf("PASS: Protein translation test: %s", test.input)
	}
}

type rnaCase struct {
	input         string
	expected      []string
	errorExpected error
}

var proteinTestCases = []rnaCase{
	{
		"AUGUUUUCUUAAAUG",
		[]string{"Methionine", "Phenylalanine", "Serine"},
		nil,
	},
	{
		"AUGUUUUGG",
		[]string{"Methionine", "Phenylalanine", "Tryptophan"},
		nil,
	},
	{
		"AUGUUUUAA",
		[]string{"Methionine", "Phenylalanine"},
		nil,
	},
	{
		"UGGUGUUAUUAAUGGUUU",
		[]string{"Tryptophan", "Cysteine", "Tyrosine"},
		nil,
	},
	{
		"UGGAGAAUUAAUGGUUU",
		[]string{"Tryptophan"},
		ErrInvalidBase,
	},
}

func TestProtein(t *testing.T) {
	for _, test := range proteinTestCases {
		actual, err := FromRNA(test.input)
		if test.errorExpected != nil {
			if test.errorExpected != err {
				t.Fatalf("FAIL: RNA translation test: %s\nExpected error: %q\nActual error: %q",
					test.input, test.errorExpected, err)
			}
		} else if err != nil {
			t.Fatalf("FAIL: RNA translation test: %s\nExpected: %s\nGot error: %q",
				test.input, test.expected, err)
		}
		if !reflect.DeepEqual(actual, test.expected) {
			t.Fatalf("FAIL: RNA Translation test: %s\nExpected: %q\nActual %q", test.input, test.expected, actual)
		}
		t.Logf("PASS: RNA translation test: %s", test.input)
	}
}

func BenchmarkCodon(b *testing.B) {
	for _, test := range codonTestCases {
		for i := 0; i < b.N; i++ {
			FromCodon(test.input)
		}
	}
}

func BenchmarkProtein(b *testing.B) {
	for _, test := range proteinTestCases {
		for i := 0; i < b.N; i++ {
			FromRNA(test.input)
		}
	}
}
// Package protien contains utilities for working with proteins.
package protein

import (
	"fmt"
	"strings"
)

type Codon string

const (
	AUG Codon = "AUG"
	UAA Codon = "UAA"
	UAC Codon = "UAC"
	UAG Codon = "UAG"
	UAU Codon = "UAU"
	UCA Codon = "UCA"
	UCC Codon = "UCC"
	UCG Codon = "UCG"
	UCU Codon = "UCU"
	UGA Codon = "UGA"
	UGC Codon = "UGC"
	UGG Codon = "UGG"
	UGU Codon = "UGU"
	UUA Codon = "UUA"
	UUC Codon = "UUC"
	UUG Codon = "UUG"
	UUU Codon = "UUU"
)

type Protein string

const (
	Cysteine      Protein = "Cysteine"
	Leucine       Protein = "Leucine"
	Methionine    Protein = "Methionine"
	Phenylalanine Protein = "Phenylalanine"
	Serine        Protein = "Serine"
	Tryptophan    Protein = "Tryptophan"
	Tyrosine      Protein = "Tyrosine"
)

var codonToProtein = map[Codon]Protein{
	AUG: Methionine,
	UUU: Phenylalanine,
	UUC: Phenylalanine,
	UUA: Leucine,
	UUG: Leucine,
	UCU: Serine,
	UCC: Serine,
	UCA: Serine,
	UCG: Serine,
	UAU: Tyrosine,
	UAC: Tyrosine,
	UGU: Cysteine,
	UGC: Cysteine,
	UGG: Tryptophan,
}

var ErrStop = fmt.Errorf("stop")
var ErrInvalidBase = fmt.Errorf("invalid base")

// FromCodon given a codon, output its corresponding protein.
// NOTE: codonString should instead be a Codon and this should return a Protein.
func FromCodon(codonString string) (string, error) {
	codon := Codon(codonString)

	if codon == UAA || codon == UAG || codon == UGA {
		return "", ErrStop
	}

	protein, ok := codonToProtein[codon]
	if !ok {
		return "", ErrInvalidBase
	}

	return string(protein), nil
}

// splitStringByN Splits a string by N characters, with the trailing characters
// not removed.
// For example, splitting "hello world" by 3 would return ["hel", "lo ", "wor", "ld"]
func splitStringByN(s string, n int) []string {
	chunks := []string{}

	chunk := strings.Builder{}
	for i, rune := range s {
		notDivisibleByN := (i+1)%n != 0

		chunk.WriteRune(rune)

		if notDivisibleByN {
			continue
		}

		chunks = append(chunks, chunk.String())
		chunk.Reset()
	}

	return chunks
}

// FromRNA given a sequence of RNA, output its corresponding proteins.
// NOTE: this should return a list of Proteins
func FromRNA(rna string) ([]string, error) {
	proteins := []string{}

	// break the sequence into codons
	codons := splitStringByN(rna, 3)

	for i, codon := range codons {
		// get the protein from the codon
		protein, err := FromCodon(codon)

		// if we get a STOP signal
		if err == ErrStop {
			// return error if it's the first thing we get
			if i == 0 {
				return proteins, ErrStop
			}

			// otherwise, we're done
			return proteins, nil
		}

		// rethrow any invalid bases
		if err == ErrInvalidBase {
			return proteins, ErrInvalidBase
		}

		// add the protein to our list
		proteins = append(proteins, protein)
	}

	return proteins, nil
}

Community comments

Find this solution interesting? Ask the author a question to learn more.

What can you learn from this solution?

A huge amount can be learned from reading other people’s code. This is why we wanted to give exercism users the option of making their solutions public.

Here are some questions to help you reflect on this solution and learn the most from it.

  • What compromises have been made?
  • Are there new concepts here that you could read more about to improve your understanding?