Avatar of shmibs

shmibs's solution

to Word Count in the OCaml Track

Published at Dec 14 2018 · 0 comments
Instructions
Test suite
Solution

Note:

This exercise has changed since this solution was written.

Given a phrase, count the occurrences of each word in that phrase.

For example for the input "olly olly in come free"

olly: 2
in: 1
come: 1
free: 1

Getting Started

For installation and learning resources, refer to the exercism help page.

Installation

To work on the exercises, you will need Opam and Base. Consult opam website for instructions on how to install opam for your OS. Once opam is installed open a terminal window and run the following command to install base:

opam install base

To run the tests you will need OUnit. Install it using opam:

opam install ounit

Running Tests

A Makefile is provided with a default target to compile your solution and run the tests. At the command line, type:

make

Interactive Shell

utop is a command line program which allows you to run Ocaml code interactively. The easiest way to install it is via opam:

opam install utop

Consult utop for more detail.

Feedback, Issues, Pull Requests

The exercism/ocaml repository on GitHub is the home for all of the Ocaml exercises.

If you have feedback about an exercise, or want to help implementing a new one, head over there and create an issue. We'll do our best to help you!

Source

This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour.

Submitting Incomplete Solutions

It's possible to submit an incomplete solution so you can see how others have completed the exercise.

test.ml

open Base
open OUnit2
open Word_count

let ae exp got _test_ctxt =
  let cmp = Map.equal (=) in
  let sexp_of_map = Map.sexp_of_m__t (module String) in
  let printer m = sexp_of_map Int.sexp_of_t m |> Sexp.to_string_hum ~indent:1 in
  assert_equal ((Map.of_alist_exn (module String)) exp) got ~cmp ~printer

let tests = [
   "count one word" >::
      ae [("word", 1)]
         (word_count "word");
   "count one of each word" >::
      ae [("one", 1); ("of", 1); ("each", 1)]
         (word_count "one of each");
   "multiple occurrences of a word" >::
      ae [("one", 1); ("fish", 4); ("two", 1); ("red", 1); ("blue", 1)]
         (word_count "one fish two fish red fish blue fish");
   "handles cramped lists" >::
      ae [("one", 1); ("two", 1); ("three", 1)]
         (word_count "one,two,three");
   "handles expanded lists" >::
      ae [("one", 1); ("two", 1); ("three", 1)]
         (word_count "one,\ntwo,\nthree");
   "ignore punctuation" >::
      ae [("car", 1); ("carpet", 1); ("as", 1); ("java", 1); ("javascript", 1)]
         (word_count "car: carpet as java: javascript!!&@$%^&");
   "include numbers" >::
      ae [("testing", 2); ("1", 1); ("2", 1)]
         (word_count "testing, 1, 2 testing");
   "normalize case" >::
      ae [("go", 3); ("stop", 2)]
         (word_count "go Go GO Stop stop");
   "with apostrophes" >::
      ae [("first", 1); ("don't", 2); ("laugh", 1); ("then", 1); ("cry", 1)]
         (word_count "First: don't laugh. Then: don't cry.");
   "with quotations" >::
      ae [("joe", 1); ("can't", 1); ("tell", 1); ("between", 1); ("large", 2); ("and", 1)]
         (word_count "Joe can't tell between 'large' and large.");
   "multiple spaces not detected as a word" >::
      ae [("multiple", 1); ("whitespaces", 1)]
         (word_count " multiple   whitespaces");
]

let () =
  run_test_tt_main ("word_count tests" >::: tests)
open Core

let word_count (s : string) =
  String.split_on_chars s ~on:[' '; '\014'; '\n'; '\r'; '\t'; '\013'; ',']
  |> List.map ~f:(fun s -> String.lowercase s
     |> String.filter ~f:(fun c -> Char.is_alphanum c || c = '\'')
     |> (fun os ->
         match (String.chop_prefix os ~prefix:"'") with
         | Some ns -> ns
         | None -> os)
     |> (fun os ->
         match (String.chop_suffix os ~suffix:"'") with
         | Some ns -> ns
         | None -> os)
  )
  |> List.filter ~f:(fun s -> s <> "")
  |> List.sort ~compare:String.compare
  |> List.group ~break:(fun s1 s2 -> s1 <> s2)
  |> List.map ~f:(fun l -> (List.hd_exn l, List.length l))
  |> Map.of_alist_exn (module String);;

Community comments

Find this solution interesting? Ask the author a question to learn more.

What can you learn from this solution?

A huge amount can be learned from reading other people’s code. This is why we wanted to give exercism users the option of making their solutions public.

Here are some questions to help you reflect on this solution and learn the most from it.

  • What compromises have been made?
  • Are there new concepts here that you could read more about to improve your understanding?