Published at Sep 24 2019 · 0 comments
Test suite

Count the frequency of letters in texts using parallel computation.

Parallelism is about doing things in parallel that can also be done sequentially. A common example is counting the frequency of letters. Create a function that returns the total frequency of each letter in a list of texts and that employs parallelism.

Parallel Letter Frequency in Rust

Learn more about concurrency in Rust here:


This exercise also includes a benchmark, with a sequential implementation as a baseline. You can compare your solution to the benchmark. Observe the effect different size inputs have on the performance of each. Can you surpass the benchmark using concurrent programming techniques?

As of this writing, test::Bencher is unstable and only available on nightly Rust. Run the benchmarks with Cargo:

cargo bench

If you are using rustup.rs:

rustup run nightly cargo bench

Learn more about nightly Rust:

Writing the Code

Execute the tests with:

$ cargo test

All but the first test have been ignored. After you get the first test to pass, open the tests source file which is located in the tests directory and remove the #[ignore] flag from the next test and get the tests to pass again. Each separate test is a function with #[test] flag above it. Continue, until you pass every test.

If you wish to run all ignored tests without editing the tests source file, use:

$ cargo test -- --ignored

To run a specific test, for example some_test, you can use:

$ cargo test some_test

If the specific test is ignored use:

$ cargo test some_test -- --ignored

To learn more about Rust tests refer to the online test documentation

Make sure to read the Modules chapter if you haven't already, it will help you with organizing your files.

use std::collections::HashMap;

use parallel_letter_frequency as frequency;

// Poem by Friedrich Schiller. The corresponding music is the European Anthem.
const ODE_AN_DIE_FREUDE: [&str; 8] = [
    "Freude schöner Götterfunken",
    "Tochter aus Elysium,",
    "Wir betreten feuertrunken,",
    "Himmlische, dein Heiligtum!",
    "Deine Zauber binden wieder",
    "Was die Mode streng geteilt;",
    "Alle Menschen werden Brüder,",
    "Wo dein sanfter Flügel weilt.",

// Dutch national anthem
const WILHELMUS: [&str; 8] = [
    "Wilhelmus van Nassouwe",
    "ben ik, van Duitsen bloed,",
    "den vaderland getrouwe",
    "blijf ik tot in den dood.",
    "Een Prinse van Oranje",
    "ben ik, vrij, onverveerd,",
    "den Koning van Hispanje",
    "heb ik altijd geëerd.",

// American national anthem
const STAR_SPANGLED_BANNER: [&str; 8] = [
    "O say can you see by the dawn's early light,",
    "What so proudly we hailed at the twilight's last gleaming,",
    "Whose broad stripes and bright stars through the perilous fight,",
    "O'er the ramparts we watched, were so gallantly streaming?",
    "And the rockets' red glare, the bombs bursting in air,",
    "Gave proof through the night that our flag was still there;",
    "O say does that star-spangled banner yet wave,",
    "O'er the land of the free and the home of the brave?",

fn test_no_texts() {
    assert_eq!(frequency::frequency(&[], 4), HashMap::new());

fn test_one_letter() {
    let mut hm = HashMap::new();
    hm.insert('a', 1);
    assert_eq!(frequency::frequency(&["a"], 4), hm);

fn test_case_insensitivity() {
    let mut hm = HashMap::new();
    hm.insert('a', 2);
    assert_eq!(frequency::frequency(&["aA"], 4), hm);

fn test_many_empty_lines() {
    let mut v = Vec::with_capacity(1000);
    for _ in 0..1000 {
    assert_eq!(frequency::frequency(&v[..], 4), HashMap::new());

fn test_many_times_same_text() {
    let mut v = Vec::with_capacity(1000);
    for _ in 0..1000 {
    let mut hm = HashMap::new();
    hm.insert('a', 1000);
    hm.insert('b', 1000);
    hm.insert('c', 1000);
    assert_eq!(frequency::frequency(&v[..], 4), hm);

fn test_punctuation_doesnt_count() {
    assert!(!frequency::frequency(&WILHELMUS, 4).contains_key(&','));

fn test_numbers_dont_count() {
    assert!(!frequency::frequency(&["Testing, 1, 2, 3"], 4).contains_key(&'1'));

fn test_all_three_anthems_1_worker() {
    let mut v = Vec::new();
        for line in anthem.iter() {
    let freqs = frequency::frequency(&v[..], 1);
    assert_eq!(freqs.get(&'a'), Some(&49));
    assert_eq!(freqs.get(&'t'), Some(&56));
    assert_eq!(freqs.get(&'ü'), Some(&2));

fn test_all_three_anthems_3_workers() {
    let mut v = Vec::new();
        for line in anthem.iter() {
    let freqs = frequency::frequency(&v[..], 3);
    assert_eq!(freqs.get(&'a'), Some(&49));
    assert_eq!(freqs.get(&'t'), Some(&56));
    assert_eq!(freqs.get(&'ü'), Some(&2));


use rayon::prelude::*;
use rayon::ThreadPoolBuilder;
use std::collections::HashMap;

// assuming ascii text here (utf8 would require an additional flat_map)
fn count_chars(mut map: HashMap<char, usize>, line: &&str) -> HashMap<char, usize> {
    line.chars().filter(|c| c.is_alphabetic()).for_each(|c| {
        *map.entry(c.to_ascii_lowercase()).or_insert(0) += 1;

fn combine_counts(
    mut map1: HashMap<char, usize>,
    mut map2: HashMap<char, usize>,
) -> HashMap<char, usize> {
    // Optimization: always keep the larger map
    if map2.len() > map1.len() {
        std::mem::swap(&mut map1, &mut map2);

    // insert/modify items from smaller map to larger
    for (k, v) in map2 {
        *map1.entry(k).or_insert(0) += v;

pub fn parallel_frequency(input: &[&str]) -> HashMap<char, usize> {
    // count the characters in every line using rayon parallel fold.
    // Then reduce result hashmaps into a single hashmap
        .fold(HashMap::new, count_chars)
        .reduce(HashMap::new, combine_counts)

pub fn frequency(input: &[&str], worker_count: usize) -> HashMap<char, usize> {
    let pool = ThreadPoolBuilder::new()
    pool.install(|| parallel_frequency(input))


name = "parallel-letter-frequency"
version = "0.1.0"
authors = ["Jussi Kukkonen <jku@goto.fi>"]
edition = "2018"

rayon = "1.1.0"

