Given a phrase, count the occurrences of each word in that phrase.
For example for the input "olly olly in come free"
olly: 2
in: 1
come: 1
free: 1
Make sure you have read the C page on the Exercism site. This covers the basic information on setting up the development environment expected by the exercises.
Get the first test compiling, linking and passing by following the three rules of test-driven development.
The included makefile can be used to create and run the tests using the test
task.
make test
Create just the functions you need to satisfy any compiler errors and get the test to fail. Then write just enough code to get the test to pass. Once you've done that, move onto the next test.
As you progress through the tests, take the time to refactor your implementation for readability and expressiveness and then go on to the next test.
Try to use standard C99 facilities in preference to writing your own low-level algorithms or facilities by hand.
This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour.
It's possible to submit an incomplete solution so you can see how others have completed the exercise.
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "vendor/unity.h"
#include "../src/word_count.h"
#define STRING_SIZE (MAX_WORD_LENGTH + 1)
word_count_word_t actual_solution[MAX_WORDS];
word_count_word_t expected_solution[MAX_WORDS];
void setUp(void)
{
}
void tearDown(void)
{
}
static void check_solution(word_count_word_t * expected_solution,
int expected_word_count,
word_count_word_t * actual_solution,
int actual_word_count)
{
// All words counted?
TEST_ASSERT_EQUAL(expected_word_count, actual_word_count);
// now test the word count for the words...
for (int index = 0; index < MAX_WORDS; index++) {
TEST_ASSERT_EQUAL(expected_solution[index].count,
actual_solution[index].count);
TEST_ASSERT_EQUAL_UINT8_ARRAY(expected_solution[index].text,
actual_solution[index].text, STRING_SIZE);
}
}
// Tests Start here
void test_word_count_one_word(void)
{
int index = 0;
int actual_word_count;
char *input_text = "word";
const int expected_word_count = 1;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
// fill in the expected words
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "word", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_word_count_one_of_each_word(void)
{
TEST_IGNORE(); // delete this line to run test
int index = 0;
int actual_word_count;
char *input_text = "one of each";
const int expected_word_count = 3;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
// fill in the expected words
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "one", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "of", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "each", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_word_count_multiple_occurrences_of_a_word(void)
{
TEST_IGNORE();
int index = 0;
int actual_word_count;
char *input_text = "one fish two fish red fish blue fish";
const int expected_word_count = 5;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "one", STRING_SIZE);
expected_solution[index].count = 4;
strncpy(expected_solution[index++].text, "fish", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "two", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "red", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "blue", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_word_count_handles_cramped_lists(void)
{
TEST_IGNORE();
int index = 0;
int actual_word_count;
char *input_text = "one,two,three";
const int expected_word_count = 3;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "one", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "two", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "three", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_word_count_handles_expanded_lists(void)
{
TEST_IGNORE();
int index = 0;
int actual_word_count;
char *input_text = "one,\ntwo,\nthree";
const int expected_word_count = 3;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "one", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "two", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "three", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_word_count_ignore_punctuation(void)
{
TEST_IGNORE();
int index = 0;
int actual_word_count;
char *input_text = "car: carpet as java: javascript!!&@$%^&";
const int expected_word_count = 5;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "car", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "carpet", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "as", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "java", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "javascript", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_word_count_include_numbers(void)
{
TEST_IGNORE();
int index = 0;
int actual_word_count;
char *input_text = "testing, 1, 2 testing";
const int expected_word_count = 3;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 2;
strncpy(expected_solution[index++].text, "testing", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "1", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "2", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_word_count_normalize_case(void)
{
TEST_IGNORE();
int index = 0;
int actual_word_count;
char *input_text = "go Go GO Stop stop";
const int expected_word_count = 2;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 3;
strncpy(expected_solution[index++].text, "go", STRING_SIZE);
expected_solution[index].count = 2;
strncpy(expected_solution[index++].text, "stop", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_word_count_with_apostrophes(void)
{
TEST_IGNORE();
int index = 0;
int actual_word_count;
char *input_text = "First: don't laugh. Then: don't cry.";
const int expected_word_count = 5;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "first", STRING_SIZE);
expected_solution[index].count = 2;
strncpy(expected_solution[index++].text, "don't", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "laugh", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "then", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "cry", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_word_count_with_quotation(void)
{
TEST_IGNORE();
int index = 0;
int actual_word_count;
char *input_text = "Joe can't tell between 'large' and large.";
const int expected_word_count = 6;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "joe", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "can't", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "tell", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "between", STRING_SIZE);
expected_solution[index].count = 2;
strncpy(expected_solution[index++].text, "large", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "and", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_word_count_from_example(void)
{
TEST_IGNORE();
int index = 0;
int actual_word_count;
char *input_text = "olly olly in come free";
const int expected_word_count = 4;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 2;
strncpy(expected_solution[index++].text, "olly", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "in", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "come", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "free", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_max_length_word(void)
{
TEST_IGNORE();
int actual_word_count;
int index = 0;
char *input_text =
"Look thisisaveeeeeerylongwordtypedwithoutusinganyspaces and look again, thisisaveeeeeerylongwordtypedwithoutusinganyspaces";
const int expected_word_count = 4;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 2;
strncpy(expected_solution[index++].text, "look", STRING_SIZE);
expected_solution[index].count = 2;
strncpy(expected_solution[index++].text,
"thisisaveeeeeerylongwordtypedwithoutusinganyspaces", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "and", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "again", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_excessive_length_word(void)
{
TEST_IGNORE();
int actual_word_count;
int index = 0;
char *input_text =
"Look thisisanexcessivelylongwordthatsomeonetypedwithoutusingthespacebar enough";
const int expected_word_count = EXCESSIVE_LENGTH_WORD;
word_count_word_t expected_solution[MAX_WORDS];
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "look", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_max_number_words(void)
{
TEST_IGNORE();
int actual_word_count;
int index = 0;
char *input_text =
"Once upon a time, a long while in the past, there lived a strange little man who could spin straw into gold";
const int expected_word_count = 20;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "once", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "upon", STRING_SIZE);
expected_solution[index].count = 3;
strncpy(expected_solution[index++].text, "a", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "time", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "long", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "while", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "in", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "the", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "past", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "there", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "lived", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "strange", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "little", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "man", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "who", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "could", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "spin", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "straw", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "into", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "gold", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
void test_excessive_number_words(void)
{
TEST_IGNORE();
int actual_word_count;
int index = 0;
char *input_text =
"Once upon a time, a long while in the past, there lived a strange little man who could spin straw into gold. His name was...";
const int expected_word_count = EXCESSIVE_NUMBER_OF_WORDS;
// build the expected solution
memset(expected_solution, 0, sizeof(expected_solution)); // clear to start with a known value
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "once", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "upon", STRING_SIZE);
expected_solution[index].count = 3;
strncpy(expected_solution[index++].text, "a", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "time", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "long", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "while", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "in", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "the", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "past", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "there", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "lived", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "strange", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "little", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "man", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "who", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "could", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "spin", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "straw", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "into", STRING_SIZE);
expected_solution[index].count = 1;
strncpy(expected_solution[index++].text, "gold", STRING_SIZE);
actual_word_count = word_count(input_text, actual_solution);
check_solution(expected_solution,
expected_word_count, actual_solution, actual_word_count);
}
int main(void)
{
UnityBegin("test/test_word_count.c");
RUN_TEST(test_word_count_one_word);
RUN_TEST(test_word_count_one_of_each_word);
RUN_TEST(test_word_count_multiple_occurrences_of_a_word);
RUN_TEST(test_word_count_handles_cramped_lists);
RUN_TEST(test_word_count_handles_expanded_lists);
RUN_TEST(test_word_count_ignore_punctuation);
RUN_TEST(test_word_count_include_numbers);
RUN_TEST(test_word_count_normalize_case);
RUN_TEST(test_word_count_with_apostrophes);
RUN_TEST(test_word_count_with_quotation);
RUN_TEST(test_word_count_from_example);
RUN_TEST(test_max_length_word);
RUN_TEST(test_excessive_length_word);
RUN_TEST(test_max_number_words);
RUN_TEST(test_excessive_number_words);
UnityEnd();
return 0;
}
#include "word_count.h"
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
/* tree structure for indexing words[] */
struct tnode {
int pos; /* index in words[] */
struct tnode *left;
struct tnode *right;
};
int getword(const char *input, int *idx, char *word, int lim);
struct tnode *addtree(struct tnode *p, char *word, int len, int *wcount,
word_count_word_t * words);
void free_tree(struct tnode *p);
int word_count(const char *input_text, word_count_word_t * words)
{
memset(words, 0, MAX_WORDS * sizeof(*words));
int wlen; /* length of the current word */
int wcount = 0; /* word counter */
char word[MAX_WORD_LENGTH + 1]; /* word buffer */
struct tnode *root = NULL; /* tree root */
int idx = 0; /* index in input_text */
while ((wlen = getword(input_text, &idx, word, MAX_WORD_LENGTH)) > 0) {
if (wcount == MAX_WORDS) {
free_tree(root);
return EXCESSIVE_NUMBER_OF_WORDS;
}
if ((root = addtree(root, word, wlen, &wcount, words))
== NULL) {
free_tree(root);
return MEMORY_ERROR;
}
}
free_tree(root);
if (wlen == EXCESSIVE_LENGTH_WORD)
return EXCESSIVE_LENGTH_WORD;
return wcount;
}
/* addtree: add tree node */
struct tnode *addtree(struct tnode *p, char *word, int len, int *wcount,
word_count_word_t * words)
{
int cond;
if (p == NULL) { /* new word arrived */
p = (struct tnode *)malloc(sizeof(struct tnode));
if (p == NULL) {
fprintf(stderr, "Memory error!\n");
return NULL;
}
p->left = NULL;
p->right = NULL;
p->pos = *wcount;
strncpy(words[*wcount].text, word, len + 1);
words[p->pos].count = 1;
(*wcount)++;
} else if ((cond = strcmp(word, words[p->pos].text)) == 0) {
words[p->pos].count++; /* repeated word */
} else if (cond < 0) {
p->left = addtree(p->left, word, len, wcount, words);
} else {
p->right = addtree(p->right, word, len, wcount, words);
}
return p;
}
/* free_tree: free memory recursively */
void free_tree(struct tnode *p)
{
if (p != NULL) {
if (p->left != NULL) {
free_tree(p->left);
}
if (p->right != NULL) {
free_tree(p->right);
}
free(p);
}
}
/* getword: get a word from 'input' into 'word' up to 'lim' length */
int getword(const char *input, int *idx, char *word, int lim)
{
int wlen = 0;
word[0] = '\0';
char *w = word;
while (input[*idx] && !isalnum(input[*idx]))
(*idx)++;
if (input[*idx] == '\0')
return 0;
bool done = false;
while (!done) {
if (isalnum(input[*idx])) {
*w++ = tolower(input[(*idx)++]);
if (wlen == lim) {
*w = '\0';
return EXCESSIVE_LENGTH_WORD;
} else {
wlen++;
}
} else if (input[*idx] == '\'') {
if ((wlen > 1) && isalnum(input[*idx - 1]) &&
(wlen < lim -1) && isalnum(input[*idx +1])) {
*w++ = '\'';
(*idx)++;
wlen++;
} else {
done = true;
}
} else {
done = true;
}
}
*w = '\0';
return wlen;
}
#ifndef WORD_COUNT_H
#define WORD_COUNT_H
#define MAX_WORDS 20 // at most MAX_WORDS can be found in the test input string
#define MAX_WORD_LENGTH 50 // no individual word can exceed this length
// results structure
typedef struct word_count_word {
char text[MAX_WORD_LENGTH];
int count;
} word_count_word_t;
#define EXCESSIVE_LENGTH_WORD -1
#define EXCESSIVE_NUMBER_OF_WORDS -2
#define MEMORY_ERROR -9
// word_count - routine to classify the unique words and their frequency in a test input string
// inputs:
// input_text = a null-terminated string containing that is analyzed
//
// outputs:
// words = allocated structure to record the words found and their frequency
// uniqueWords - number of words in the words structure
// returns a negative number if an error.
// words will contain the results up to that point.
int word_count(const char *input_text, word_count_word_t * words);
#endif
A huge amount can be learned from reading other people’s code. This is why we wanted to give exercism users the option of making their solutions public.
Here are some questions to help you reflect on this solution and learn the most from it.
Level up your programming skills with 3,449 exercises across 52 languages, and insightful discussion with our volunteer team of welcoming mentors. Exercism is 100% free forever.
Sign up Learn More
Community comments
'memset' on line 22 can be actually omitted without any change in the functioning, because the strings from 'getword' are NULL terminated, but 'expected_solution' in the tests is 'memtest'-ed to NULL and TEST_ASSERT_EQUAL_UINT8_ARRAY compares the whole arrays, so the comparison would fail without this 'memtest' to NULL.