Avatar of fsouza

fsouza's solution

to Run Length Encoding in the OCaml Track

Published at Jul 06 2019 · 0 comments
Instructions
Test suite
Solution

Note:

This exercise has changed since this solution was written.

Implement run-length encoding and decoding.

Run-length encoding (RLE) is a simple form of data compression, where runs (consecutive data elements) are replaced by just one data value and count.

For example we can represent the original 53 characters with only 13.

"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB"  ->  "12WB12W3B24WB"

RLE allows the original data to be perfectly reconstructed from the compressed data, which makes it a lossless data compression.

"AABCCCDEEEE"  ->  "2AB3CD4E"  ->  "AABCCCDEEEE"

For simplicity, you can assume that the unencoded string will only contain the letters A through Z (either lower or upper case) and whitespace. This way data to be encoded will never contain any numbers and numbers inside data to be decoded always represent the count for the following character.

Getting Started

  1. Install the Exercism CLI.

  2. Install OCaml.

  3. For library documentation, follow Useful OCaml resources.

Running Tests

A Makefile is provided with a default target to compile your solution and run the tests. At the command line, type:

make

Submitting Incomplete Solutions

It's possible to submit an incomplete solution so you can see how others have completed the exercise.

Feedback, Issues, Pull Requests

The exercism/ocaml repository on GitHub is the home for all of the Ocaml exercises.

If you have feedback about an exercise, or want to help implementing a new one, head over there and create an issue or submit a PR. We welcome new contributors!

Source

Wikipedia https://en.wikipedia.org/wiki/Run-length_encoding

test.ml

open Base
open OUnit2
open Run_length_encoding

let ae exp got _test_ctxt = assert_equal exp got ~printer:Fn.id

let encode_tests = [
   "empty string" >::
     ae "" (encode "");
   "single characters only are encoded without count" >::
     ae "XYZ" (encode "XYZ");
   "string with no single characters" >::
     ae "2A3B4C" (encode "AABBBCCCC");
   "single characters mixed with repeated characters" >::
     ae "12WB12W3B24WB" (encode "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB");
   "multiple whitespace mixed in string" >::
     ae "2 hs2q q2w2 " (encode "  hsqq qww  ");
   "lowercase characters" >::
     ae "2a3b4c" (encode "aabbbcccc");
]


let decode_tests = [
   "empty string" >::
     ae "" (decode "");
   "single characters only" >::
     ae "XYZ" (decode "XYZ");
   "string with no single characters" >::
     ae "AABBBCCCC" (decode "2A3B4C");
   "single characters with repeated characters" >::
     ae "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB" (decode "12WB12W3B24WB");
   "multiple whitespace mixed in string" >::
     ae "  hsqq qww  " (decode "2 hs2q q2w2 ");
   "lower case string" >::
     ae "aabbbcccc" (decode "2a3b4c");
]


let encode_and_then_decode_tests = [
   "encode followed by decode gives original string" >::
     ae "zzz ZZ  zZ" (encode "zzz ZZ  zZ" |> decode);
]

let () =
  run_test_tt_main (
    "run length encoding tests" >:::
      List.concat [encode_tests; decode_tests; encode_and_then_decode_tests]
  )

run_length_encoding.ml

open Base

type rle = One of string | Many of int * string

let rle_of_pair (item, counter) =
  if counter > 1 then Many (counter, item) else One item

let rle_of_string input =
  if String.length input = 1 then One input
  else Many (Int.of_string @@ String.drop_suffix input 1, String.suffix input 1)

let rec repeat acc item = function
  | 0 ->
      acc
  | n ->
      repeat (item :: acc) item (n - 1)

let string_of_rle encoded = function
  | One s ->
      s
  | Many (n, s) ->
      if encoded then Int.to_string n ^ s
      else List.fold ~init:"" ~f:( ^ ) (repeat [] s n)

let string_of_rle_list_encoded = List.map ~f:(string_of_rle true)

let string_of_rle_list_decoded = List.map ~f:(string_of_rle false)

let split_string input ~f =
  let len = String.length input in
  let rec split_string' i tmp acc =
    if i = len then acc
    else if f input.[i] then
      split_string' (i + 1) "" ((tmp ^ Char.to_string input.[i]) :: acc)
    else split_string' (i + 1) (tmp ^ Char.to_string input.[i]) acc
  in
  split_string' 0 "" []

(* list is built in reverse order *)
let list_of_encoded input =
  input |> split_string ~f:(Fn.non Char.is_digit) |> List.map ~f:rle_of_string

(* list is built in reverse order *)
let list_of_raw input =
  let rec encode' current counter acc = function
    | [] ->
        rle_of_pair (Char.to_string current, counter) :: acc
    | hd :: tl when Char.equal hd current ->
        encode' current (counter + 1) acc tl
    | hd :: tl ->
        encode' hd 1 (rle_of_pair (Char.to_string current, counter) :: acc) tl
  in
  match input with [] -> [] | hd :: tl -> encode' hd 1 [] tl

let encode input =
  input |> String.to_list |> list_of_raw |> List.rev
  |> string_of_rle_list_encoded
  |> List.fold ~f:( ^ ) ~init:""

let decode input =
  input |> list_of_encoded |> List.rev |> string_of_rle_list_decoded
  |> List.fold ~f:( ^ ) ~init:""

run_length_encoding.mli

val encode : string -> string

val decode : string -> string

Community comments

Find this solution interesting? Ask the author a question to learn more.

What can you learn from this solution?

A huge amount can be learned from reading other people’s code. This is why we wanted to give exercism users the option of making their solutions public.

Here are some questions to help you reflect on this solution and learn the most from it.

  • What compromises have been made?
  • Are there new concepts here that you could read more about to improve your understanding?