Avatar of paulfioravanti

paulfioravanti's solution

to Protein Translation in the Elixir Track

Published at Aug 19 2019 · 0 comments
Instructions
Test suite
Solution

Translate RNA sequences into proteins.

RNA can be broken into three nucleotide sequences called codons, and then translated to a polypeptide like so:

RNA: "AUGUUUUCU" => translates to

Codons: "AUG", "UUU", "UCU" => which become a polypeptide with the following sequence =>

Protein: "Methionine", "Phenylalanine", "Serine"

There are 64 codons which in turn correspond to 20 amino acids; however, all of the codon sequences and resulting amino acids are not important in this exercise. If it works for one codon, the program should work for all of them. However, feel free to expand the list in the test suite to include them all.

There are also three terminating codons (also known as 'STOP' codons); if any of these codons are encountered (by the ribosome), all translation ends and the protein is terminated.

All subsequent codons after are ignored, like this:

RNA: "AUGUUUUCUUAAAUG" =>

Codons: "AUG", "UUU", "UCU", "UAA", "AUG" =>

Protein: "Methionine", "Phenylalanine", "Serine"

Note the stop codon "UAA" terminates the translation and the final methionine is not translated into the protein sequence.

Below are the codons and resulting Amino Acids needed for the exercise.

Codon Protein
AUG Methionine
UUU, UUC Phenylalanine
UUA, UUG Leucine
UCU, UCC, UCA, UCG Serine
UAU, UAC Tyrosine
UGU, UGC Cysteine
UGG Tryptophan
UAA, UAG, UGA STOP

Learn more about protein translation on Wikipedia

Running tests

Execute the tests with:

$ mix test

Pending tests

In the test suites, all but the first test have been skipped.

Once you get a test passing, you can unskip the next one by commenting out the relevant @tag :pending with a # symbol.

For example:

# @tag :pending
test "shouting" do
  assert Bob.hey("WATCH OUT!") == "Whoa, chill out!"
end

Or, you can enable all the tests by commenting out the ExUnit.configure line in the test suite.

# ExUnit.configure exclude: :pending, trace: true

If you're stuck on something, it may help to look at some of the available resources out there where answers might be found.

Source

Tyler Long

Submitting Incomplete Solutions

It's possible to submit an incomplete solution so you can see how others have completed the exercise.

protein_translation_test.exs

defmodule ProteinTranslationTest do
  use ExUnit.Case

  # @tag :pending
  test "AUG translates to methionine" do
    assert ProteinTranslation.of_codon("AUG") == {:ok, "Methionine"}
  end

  @tag :pending
  test "identifies Phenylalanine codons" do
    assert ProteinTranslation.of_codon("UUU") == {:ok, "Phenylalanine"}
    assert ProteinTranslation.of_codon("UUC") == {:ok, "Phenylalanine"}
  end

  @tag :pending
  test "identifies Leucine codons" do
    assert ProteinTranslation.of_codon("UUA") == {:ok, "Leucine"}
    assert ProteinTranslation.of_codon("UUG") == {:ok, "Leucine"}
  end

  @tag :pending
  test "identifies Serine codons" do
    assert ProteinTranslation.of_codon("UCU") == {:ok, "Serine"}
    assert ProteinTranslation.of_codon("UCC") == {:ok, "Serine"}
    assert ProteinTranslation.of_codon("UCA") == {:ok, "Serine"}
    assert ProteinTranslation.of_codon("UCG") == {:ok, "Serine"}
  end

  @tag :pending
  test "identifies Tyrosine codons" do
    assert ProteinTranslation.of_codon("UAU") == {:ok, "Tyrosine"}
    assert ProteinTranslation.of_codon("UAC") == {:ok, "Tyrosine"}
  end

  @tag :pending
  test "identifies Cysteine codons" do
    assert ProteinTranslation.of_codon("UGU") == {:ok, "Cysteine"}
    assert ProteinTranslation.of_codon("UGC") == {:ok, "Cysteine"}
  end

  @tag :pending
  test "identifies Tryptophan codons" do
    assert ProteinTranslation.of_codon("UGG") == {:ok, "Tryptophan"}
  end

  @tag :pending
  test "identifies stop codons" do
    assert ProteinTranslation.of_codon("UAA") == {:ok, "STOP"}
    assert ProteinTranslation.of_codon("UAG") == {:ok, "STOP"}
    assert ProteinTranslation.of_codon("UGA") == {:ok, "STOP"}
  end

  @tag :pending
  test "translates rna strand into correct protein" do
    strand = "AUGUUUUGG"
    assert ProteinTranslation.of_rna(strand) == {:ok, ~w(Methionine Phenylalanine Tryptophan)}
  end

  @tag :pending
  test "stops translation if stop codon present" do
    strand = "AUGUUUUAA"
    assert ProteinTranslation.of_rna(strand) == {:ok, ~w(Methionine Phenylalanine)}
  end

  @tag :pending
  test "stops translation of longer strand" do
    strand = "UGGUGUUAUUAAUGGUUU"
    assert ProteinTranslation.of_rna(strand) == {:ok, ~w(Tryptophan Cysteine Tyrosine)}
  end

  @tag :pending
  test "invalid RNA" do
    assert ProteinTranslation.of_rna("CARROT") == {:error, "invalid RNA"}
  end

  @tag :pending
  test "invalid codon at end of RNA" do
    assert ProteinTranslation.of_rna("UUUROT") == {:error, "invalid RNA"}
  end

  @tag :pending
  test "invalid codon" do
    assert ProteinTranslation.of_codon("INVALID") == {:error, "invalid codon"}
  end
end

test_helper.exs

ExUnit.start()
ExUnit.configure(exclude: :pending, trace: true)
defmodule ProteinTranslation do
  @methionine "AUG"
  @phenylalanine ["UUC", "UUU"]
  @leucine ["UUA", "UUG"]
  @serine ["UCU", "UCC", "UCA", "UCG"]
  @tyrosine ["UAU", "UAC"]
  @cysteine ["UGU", "UGC"]
  @tryptophan "UGG"
  @terminating ["UAA", "UAG", "UGA"]
  @stop "STOP"
  @codon_length ~r/.{3}/

  defguardp methionine?(codon) when codon == @methionine
  defguardp phenylalanine?(codon) when codon in @phenylalanine
  defguardp leucine?(codon) when codon in @leucine
  defguardp serine?(codon) when codon in @serine
  defguardp tyrosine?(codon) when codon in @tyrosine
  defguardp cysteine?(codon) when codon in @cysteine
  defguardp tryptophan?(codon) when codon == @tryptophan
  defguardp terminating?(codon) when codon in @terminating

  @doc """
  Given an RNA string, return a list of proteins specified by codons, in order.
  """
  @spec of_rna(String.t()) :: {atom, list(String.t())}
  def of_rna(rna) do
    proteins =
      rna
      |> String.split(@codon_length, include_captures: true, trim: true)
      |> Enum.reduce([], &translate_codon/2)
      |> Enum.reverse()

    {:ok, proteins}
  catch
    {:halt, proteins} ->
      {:ok, proteins}

    :error ->
      {:error, "invalid RNA"}
  end

  @doc """
  Given a codon, return the corresponding protein

  UGU -> Cysteine
  UGC -> Cysteine
  UUA -> Leucine
  UUG -> Leucine
  AUG -> Methionine
  UUU -> Phenylalanine
  UUC -> Phenylalanine
  UCU -> Serine
  UCC -> Serine
  UCA -> Serine
  UCG -> Serine
  UGG -> Tryptophan
  UAU -> Tyrosine
  UAC -> Tyrosine
  UAA -> STOP
  UAG -> STOP
  UGA -> STOP
  """
  @spec of_codon(String.t()) :: {atom, String.t()}
  def of_codon(codon) when methionine?(codon), do: {:ok, "Methionine"}
  def of_codon(codon) when phenylalanine?(codon), do: {:ok, "Phenylalanine"}
  def of_codon(codon) when leucine?(codon), do: {:ok, "Leucine"}
  def of_codon(codon) when serine?(codon), do: {:ok, "Serine"}
  def of_codon(codon) when tyrosine?(codon), do: {:ok, "Tyrosine"}
  def of_codon(codon) when cysteine?(codon), do: {:ok, "Cysteine"}
  def of_codon(codon) when tryptophan?(codon), do: {:ok, "Tryptophan"}
  def of_codon(codon) when terminating?(codon), do: {:ok, @stop}
  def of_codon(_codon), do: {:error, "invalid codon"}

  defp translate_codon(codon, acc) do
    codon
    |> of_codon()
    |> apply_protein(acc)
  end

  defp apply_protein({:ok, @stop}, acc), do: throw({:halt, Enum.reverse(acc)})
  defp apply_protein({:ok, protein}, acc), do: [protein | acc]
  defp apply_protein({:error, _message}, _acc), do: throw(:error)
end

Community comments

Find this solution interesting? Ask the author a question to learn more.

What can you learn from this solution?

A huge amount can be learned from reading other people’s code. This is why we wanted to give exercism users the option of making their solutions public.

Here are some questions to help you reflect on this solution and learn the most from it.

  • What compromises have been made?
  • Are there new concepts here that you could read more about to improve your understanding?