Avatar of dbatis

dbatis's solution

to Nucleotide Count in the OCaml Track

Published at Aug 09 2019 · 0 comments
Instructions
Test suite
Solution

Given a single stranded DNA string, compute how many times each nucleotide occurs in the string.

The genetic language of every living thing on the planet is DNA. DNA is a large molecule that is built from an extremely long sequence of individual elements called nucleotides. 4 types exist in DNA and these differ only slightly and can be represented as the following symbols: 'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' thymine.

Here is an analogy:

  • twigs are to birds nests as
  • nucleotides are to DNA as
  • legos are to lego houses as
  • words are to sentences as...

Getting Started

  1. Install the Exercism CLI.

  2. Install OCaml.

  3. For library documentation, follow Useful OCaml resources.

Running Tests

A Makefile is provided with a default target to compile your solution and run the tests. At the command line, type:

make

Submitting Incomplete Solutions

It's possible to submit an incomplete solution so you can see how others have completed the exercise.

Feedback, Issues, Pull Requests

The exercism/ocaml repository on GitHub is the home for all of the Ocaml exercises.

If you have feedback about an exercise, or want to help implementing a new one, head over there and create an issue or submit a PR. We welcome new contributors!

Source

The Calculating DNA Nucleotides_problem at Rosalind http://rosalind.info/problems/dna/

test.ml

open Base
open OUnit2

module NC = Nucleotide_count

(* Assert that two 'int option' values are equivalent. *)
let aire exp got _ctxt =
  let printer m =
    Result.sexp_of_t
      Int.sexp_of_t
      Char.sexp_of_t
      m
    |> Sexp.to_string_hum ~indent:1
  in assert_equal exp got ~printer

(* Assert that two '(int Char.Map.t, char) Result.t' values are equivalent. *)
let amre exp got _ctxt =
  let sexp_of_map = Map.sexp_of_m__t (module Char) in
  let printer m =
    Result.sexp_of_t (sexp_of_map Int.sexp_of_t) Char.sexp_of_t m
    |> Sexp.to_string_hum ~indent:1
  in
  let cmp exp got = match exp, got with
    | Ok exp_map, Ok got_map -> Map.equal Int.equal exp_map got_map
    | Error c1, Error c2     -> Char.equal c1 c2
    | _ -> false
  in assert_equal exp got ~cmp ~printer

let tests =
  [ "Empty DNA string has no invalid nucleotides" >:: aire (Error 'X') (NC.count_nucleotide "" 'X');
    "Non-empty DNA string has no invalid nucleotides" >:: aire (Error 'X') (NC.count_nucleotide "ACGT" 'X');
    "Invalid DNA string has no invalid nucleotides" >:: aire (Error 'X') (NC.count_nucleotide "ACGXT" 'A');

    "Empty DNA string has zero Adenine nucleotides" >:: aire (Ok 0) (NC.count_nucleotide "" 'A');
    "DNA string with one Adenine nucleotide" >:: aire (Ok 1) (NC.count_nucleotide "A" 'A');
    "DNA string with five Cytosine nucleotides" >:: aire (Ok 5) (NC.count_nucleotide "CCCCC" 'C');
    "DNA string with two Guanine nucleotides" >:: aire (Ok 2) (NC.count_nucleotide "ACGGT" 'G');
    "DNA string with three Thymine nucleotides" >:: aire (Ok 3) (NC.count_nucleotide "CACTAGCTGCT" 'T');

    "Invalid DNA string has no nucleotides" >::
    amre (Error 'X') (NC.count_nucleotides "ACGXT");

    "Empty DNA string has zero nucleotides" >::
    amre (Ok (Map.empty (module Char))) (NC.count_nucleotides "");

    "DNA string with two Adenine nucleotides" >::
    amre (Ok (Map.singleton (module Char) 'A' 2)) (NC.count_nucleotides "AA");

    "DNA string with one Adenine, two Cytosine nucleotides" >::
    begin
      let exp = Ok ((Map.of_alist_exn (module Char)) [('A', 1); ('C', 2)])
      in amre exp (NC.count_nucleotides "ACC")
    end;

    "DNA string with one Adenine, two Cytosine, three Guanine, four Thymine nucleotides" >::
    begin
      let exp = Ok ((Map.of_alist_exn (module Char)) [('A', 1); ('C', 2); ('G', 3); ('T', 4)])
      in amre exp (NC.count_nucleotides "CGTATGTCTG")
    end;
  ]

let () =
  run_test_tt_main ("nucleotide-counts tests" >::: tests)
open Base
open Result.Monad_infix

let valid_characters = ['A';'C';'G';'T']

let is_valid_character c =
    List.mem valid_characters c ~equal:Char.equal

let rec count_nucleotide_rec l c counter =
    match l with
    | [] -> Ok counter
    | hd :: tl when ((is_valid_character hd) && (Char.equal hd c)) -> count_nucleotide_rec tl c (counter + 1)
    | hd :: tl when is_valid_character hd -> count_nucleotide_rec tl c counter
    | _ -> Error 'X'

let count_nucleotide s c =
    if is_valid_character c then
        count_nucleotide_rec (String.to_list s) c 0
    else
        Error 'X'

let count_nucleotides s =
  valid_characters
  |> List.map ~f:(fun v -> count_nucleotide s v >>| fun n -> (v, n))
  |> List.filter ~f:(function Ok (_, 0) -> false | _ -> true)
  |> List.fold_result ~init:[] ~f:(fun acc result ->
         match result with
         | Ok pair ->
             Ok (pair :: acc)
         | Error err ->
             Error err)
  >>| fun list -> Map.of_alist_exn (module Char) list

What can you learn from this solution?

A huge amount can be learned from reading other people’s code. This is why we wanted to give exercism users the option of making their solutions public.

Here are some questions to help you reflect on this solution and learn the most from it.

  • What compromises have been made?
  • Are there new concepts here that you could read more about to improve your understanding?