 # silmeth's solution

## to Nucleotide Count in the OCaml Track

Published at Mar 08 2019 · 0 comments
Instructions
Test suite
Solution

Given a single stranded DNA string, compute how many times each nucleotide occurs in the string.

The genetic language of every living thing on the planet is DNA. DNA is a large molecule that is built from an extremely long sequence of individual elements called nucleotides. 4 types exist in DNA and these differ only slightly and can be represented as the following symbols: 'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' thymine.

Here is an analogy:

• twigs are to birds nests as
• nucleotides are to DNA as
• legos are to lego houses as
• words are to sentences as...

## Getting Started

1. For library documentation, follow Useful OCaml resources.

## Running Tests

A `Makefile` is provided with a default target to compile your solution and run the tests. At the command line, type:

``````make
``````

## Submitting Incomplete Solutions

It's possible to submit an incomplete solution so you can see how others have completed the exercise.

## Feedback, Issues, Pull Requests

The exercism/ocaml repository on GitHub is the home for all of the Ocaml exercises.

If you have feedback about an exercise, or want to help implementing a new one, head over there and create an issue or submit a PR. We welcome new contributors!

## Source

The Calculating DNA Nucleotides_problem at Rosalind http://rosalind.info/problems/dna/

### test.ml

``````open Base
open OUnit2

module NC = Nucleotide_count

(* Assert that two 'int option' values are equivalent. *)
let aire exp got _ctxt =
let printer m =
Result.sexp_of_t
Int.sexp_of_t
Char.sexp_of_t
m
|> Sexp.to_string_hum ~indent:1
in assert_equal exp got ~printer

(* Assert that two '(int Char.Map.t, char) Result.t' values are equivalent. *)
let amre exp got _ctxt =
let sexp_of_map = Map.sexp_of_m__t (module Char) in
let printer m =
Result.sexp_of_t (sexp_of_map Int.sexp_of_t) Char.sexp_of_t m
|> Sexp.to_string_hum ~indent:1
in
let cmp exp got = match exp, got with
| Ok exp_map, Ok got_map -> Map.equal Int.equal exp_map got_map
| Error c1, Error c2     -> Char.equal c1 c2
| _ -> false
in assert_equal exp got ~cmp ~printer

let tests =
[ "Empty DNA string has no invalid nucleotides" >:: aire (Error 'X') (NC.count_nucleotide "" 'X');
"Non-empty DNA string has no invalid nucleotides" >:: aire (Error 'X') (NC.count_nucleotide "ACGT" 'X');
"Invalid DNA string has no invalid nucleotides" >:: aire (Error 'X') (NC.count_nucleotide "ACGXT" 'A');

"Empty DNA string has zero Adenine nucleotides" >:: aire (Ok 0) (NC.count_nucleotide "" 'A');
"DNA string with one Adenine nucleotide" >:: aire (Ok 1) (NC.count_nucleotide "A" 'A');
"DNA string with five Cytosine nucleotides" >:: aire (Ok 5) (NC.count_nucleotide "CCCCC" 'C');
"DNA string with two Guanine nucleotides" >:: aire (Ok 2) (NC.count_nucleotide "ACGGT" 'G');
"DNA string with three Thymine nucleotides" >:: aire (Ok 3) (NC.count_nucleotide "CACTAGCTGCT" 'T');

"Invalid DNA string has no nucleotides" >::
amre (Error 'X') (NC.count_nucleotides "ACGXT");

"Empty DNA string has zero nucleotides" >::
amre (Ok (Map.empty (module Char))) (NC.count_nucleotides "");

"DNA string with two Adenine nucleotides" >::
amre (Ok (Map.singleton (module Char) 'A' 2)) (NC.count_nucleotides "AA");

"DNA string with one Adenine, two Cytosine nucleotides" >::
begin
let exp = Ok ((Map.of_alist_exn (module Char)) [('A', 1); ('C', 2)])
in amre exp (NC.count_nucleotides "ACC")
end;

"DNA string with one Adenine, two Cytosine, three Guanine, four Thymine nucleotides" >::
begin
let exp = Ok ((Map.of_alist_exn (module Char)) [('A', 1); ('C', 2); ('G', 3); ('T', 4)])
in amre exp (NC.count_nucleotides "CGTATGTCTG")
end;
]

let () =
run_test_tt_main ("nucleotide-counts tests" >::: tests)``````

### nucleotide_count.ml

``````open Base

let (=.) (a : 'a) (b : 'a) = Caml.(a = b)

let is_nucleotide : char -> bool = function
| 'A' | 'C' | 'G' | 'T' -> true
| _ -> false

let empty = Map.empty (module Char)

let count_nucleotide s c = if is_nucleotide c then
let characters = String.to_list s in
let rec loop chars needle acc = match chars with
| [] -> Ok acc
| ch :: _ when not (is_nucleotide ch) -> Error ch
| ch :: tail ->
loop tail needle (if ch =. needle then acc + 1 else acc) in
loop characters c 0
else
Error c

let count_nucleotides s =
let add_result m c = (
let r = count_nucleotide s c in
Result.map r ~f:(fun it -> if it > 0 then Map.set m ~key:c ~data:it else m)
) in
let rec gather_all nucleotides m = match nucleotides with
| [] -> m
| c :: tail -> (
m |> Result.bind ~f:(fun it -> add_result it c)
|> gather_all tail
) in
let all_nucleotides = ['A'; 'C'; 'G'; 'T'] in
gather_all all_nucleotides (Ok empty)``````

### nucleotide_count.mli

``````open Base

(* Count the number of times a nucleotide occurs in the string. *)
val count_nucleotide : string -> char -> (int, char) Result.t

(* Count the nucleotides in the string. *)
val count_nucleotides : string -> (int Map.M(Char).t, char) Result.t``````