Avatar of matheussilvasantos

matheussilvasantos's solution

to Word Count in the C Track

Published at Jul 13 2018 · 0 comments
Instructions
Test suite
Solution

Note:

This solution was written on an old version of Exercism. The tests below might not correspond to the solution code, and the exercise may have changed since this code was written.

Given a phrase, count the occurrences of each word in that phrase.

For example for the input "olly olly in come free"

olly: 2
in: 1
come: 1
free: 1

Getting Started

Make sure you have read the C page on the Exercism site. This covers the basic information on setting up the development environment expected by the exercises.

Passing the Tests

Get the first test compiling, linking and passing by following the three rules of test-driven development.

The included makefile can be used to create and run the tests using the test task.

make test

Create just the functions you need to satisfy any compiler errors and get the test to fail. Then write just enough code to get the test to pass. Once you've done that, move onto the next test.

As you progress through the tests, take the time to refactor your implementation for readability and expressiveness and then go on to the next test.

Try to use standard C99 facilities in preference to writing your own low-level algorithms or facilities by hand.

Source

This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour.

Submitting Incomplete Solutions

It's possible to submit an incomplete solution so you can see how others have completed the exercise.

test_word_count.c

#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "vendor/unity.h"
#include "../src/word_count.h"

#define STRING_SIZE (MAX_WORD_LENGTH + 1)

word_count_word_t actual_solution[MAX_WORDS];
word_count_word_t expected_solution[MAX_WORDS];
void setUp(void)
{
}

void tearDown(void)
{
}

static void check_solution(word_count_word_t * expected_solution,
                           int expected_word_count,
                           word_count_word_t * actual_solution,
                           int actual_word_count)
{
   // All words counted?
   TEST_ASSERT_EQUAL(expected_word_count, actual_word_count);

   // now test the word count for the words...
   for (int index = 0; index < MAX_WORDS; index++) {
      TEST_ASSERT_EQUAL(expected_solution[index].count,
                        actual_solution[index].count);
      TEST_ASSERT_EQUAL_UINT8_ARRAY(expected_solution[index].text,
                                    actual_solution[index].text, STRING_SIZE);
   }
}

// Tests Start here
void test_word_count_one_word(void)
{
   int index = 0;
   int actual_word_count;
   char *input_text = "word";
   const int expected_word_count = 1;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   // fill in the expected words
   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "word", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_word_count_one_of_each_word(void)
{
   TEST_IGNORE();               // delete this line to run test
   int index = 0;
   int actual_word_count;
   char *input_text = "one of each";
   const int expected_word_count = 3;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   // fill in the expected words
   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "one", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "of", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "each", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_word_count_multiple_occurrences_of_a_word(void)
{
   TEST_IGNORE();
   int index = 0;
   int actual_word_count;
   char *input_text = "one fish two fish red fish blue fish";
   const int expected_word_count = 5;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "one", STRING_SIZE);

   expected_solution[index].count = 4;
   strncpy(expected_solution[index++].text, "fish", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "two", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "red", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "blue", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_word_count_handles_cramped_lists(void)
{
   TEST_IGNORE();
   int index = 0;
   int actual_word_count;
   char *input_text = "one,two,three";
   const int expected_word_count = 3;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "one", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "two", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "three", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_word_count_handles_expanded_lists(void)
{
   TEST_IGNORE();
   int index = 0;
   int actual_word_count;
   char *input_text = "one,\ntwo,\nthree";
   const int expected_word_count = 3;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "one", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "two", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "three", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_word_count_ignore_punctuation(void)
{
   TEST_IGNORE();
   int index = 0;
   int actual_word_count;
   char *input_text = "car: carpet as java: javascript!!&@$%^&";
   const int expected_word_count = 5;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "car", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "carpet", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "as", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "java", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "javascript", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_word_count_include_numbers(void)
{
   TEST_IGNORE();
   int index = 0;
   int actual_word_count;
   char *input_text = "testing, 1, 2 testing";
   const int expected_word_count = 3;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 2;
   strncpy(expected_solution[index++].text, "testing", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "1", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "2", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_word_count_normalize_case(void)
{
   TEST_IGNORE();
   int index = 0;
   int actual_word_count;
   char *input_text = "go Go GO Stop stop";
   const int expected_word_count = 2;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 3;
   strncpy(expected_solution[index++].text, "go", STRING_SIZE);

   expected_solution[index].count = 2;
   strncpy(expected_solution[index++].text, "stop", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_word_count_with_apostrophes(void)
{
   TEST_IGNORE();
   int index = 0;
   int actual_word_count;
   char *input_text = "First: don't laugh. Then: don't cry.";
   const int expected_word_count = 5;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "first", STRING_SIZE);

   expected_solution[index].count = 2;
   strncpy(expected_solution[index++].text, "don't", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "laugh", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "then", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "cry", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_word_count_with_quotation(void)
{
   TEST_IGNORE();
   int index = 0;
   int actual_word_count;
   char *input_text = "Joe can't tell between 'large' and large.";
   const int expected_word_count = 6;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "joe", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "can't", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "tell", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "between", STRING_SIZE);

   expected_solution[index].count = 2;
   strncpy(expected_solution[index++].text, "large", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "and", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_word_count_from_example(void)
{
   TEST_IGNORE();
   int index = 0;
   int actual_word_count;
   char *input_text = "olly olly in come free";
   const int expected_word_count = 4;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 2;
   strncpy(expected_solution[index++].text, "olly", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "in", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "come", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "free", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_max_length_word(void)
{
   TEST_IGNORE();
   int actual_word_count;
   int index = 0;
   char *input_text =
       "Look thisisaveeeeeerylongwordtypedwithoutusinganyspaces and look again, thisisaveeeeeerylongwordtypedwithoutusinganyspaces";

   const int expected_word_count = 4;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 2;
   strncpy(expected_solution[index++].text, "look", STRING_SIZE);

   expected_solution[index].count = 2;
   strncpy(expected_solution[index++].text,
           "thisisaveeeeeerylongwordtypedwithoutusinganyspaces", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "and", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "again", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_excessive_length_word(void)
{
   TEST_IGNORE();
   int actual_word_count;
   int index = 0;
   char *input_text =
       "Look thisisanexcessivelylongwordthatsomeonetypedwithoutusingthespacebar enough";

   const int expected_word_count = EXCESSIVE_LENGTH_WORD;
   word_count_word_t expected_solution[MAX_WORDS];

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value
   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "look", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_max_number_words(void)
{
   TEST_IGNORE();
   int actual_word_count;
   int index = 0;
   char *input_text =
       "Once upon a time, a long while in the past, there lived a strange little man who could spin straw into gold";

   const int expected_word_count = 20;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "once", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "upon", STRING_SIZE);

   expected_solution[index].count = 3;
   strncpy(expected_solution[index++].text, "a", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "time", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "long", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "while", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "in", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "the", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "past", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "there", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "lived", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "strange", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "little", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "man", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "who", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "could", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "spin", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "straw", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "into", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "gold", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

void test_excessive_number_words(void)
{
   TEST_IGNORE();
   int actual_word_count;
   int index = 0;
   char *input_text =
       "Once upon a time, a long while in the past, there lived a strange little man who could spin straw into gold. His name was...";

   const int expected_word_count = EXCESSIVE_NUMBER_OF_WORDS;

   // build the expected solution
   memset(expected_solution, 0, sizeof(expected_solution));     // clear to start with a known value

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "once", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "upon", STRING_SIZE);

   expected_solution[index].count = 3;
   strncpy(expected_solution[index++].text, "a", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "time", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "long", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "while", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "in", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "the", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "past", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "there", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "lived", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "strange", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "little", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "man", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "who", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "could", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "spin", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "straw", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "into", STRING_SIZE);

   expected_solution[index].count = 1;
   strncpy(expected_solution[index++].text, "gold", STRING_SIZE);

   actual_word_count = word_count(input_text, actual_solution);

   check_solution(expected_solution,
                  expected_word_count, actual_solution, actual_word_count);
}

int main(void)
{
   UnityBegin("test/test_word_count.c");

   RUN_TEST(test_word_count_one_word);
   RUN_TEST(test_word_count_one_of_each_word);
   RUN_TEST(test_word_count_multiple_occurrences_of_a_word);
   RUN_TEST(test_word_count_handles_cramped_lists);
   RUN_TEST(test_word_count_handles_expanded_lists);
   RUN_TEST(test_word_count_ignore_punctuation);
   RUN_TEST(test_word_count_include_numbers);
   RUN_TEST(test_word_count_normalize_case);
   RUN_TEST(test_word_count_with_apostrophes);
   RUN_TEST(test_word_count_with_quotation);
   RUN_TEST(test_word_count_from_example);
   RUN_TEST(test_max_length_word);
   RUN_TEST(test_excessive_length_word);
   RUN_TEST(test_max_number_words);
   RUN_TEST(test_excessive_number_words);

   UnityEnd();

   return 0;
}
#define _GNU_SOURCE
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "word_count.h"

void remove_quotation(char *token) {
  int len = strlen(token);

  if(token[0] == '\'' && token[len - 1] == '\'') {
    memcpy(token, &token[1], len - 1);
    token[len - 2] = '\0';
  }
}

int check_word(char *word, word_count_word_t * words) {
  for (int index = 0; index < MAX_WORDS; index++) {
    if(!strcmp(word, words[index].text)) {
      return index;
    }
  }
  return -1;
}

void clear_to_start_with_a_known_value(word_count_word_t * words) {
  word_count_word_t clear_solution[MAX_WORDS];
  memset(words, 0, sizeof(clear_solution));
}

int word_count(const char *input_text, word_count_word_t * words) {
  clear_to_start_with_a_known_value(words);

  int word_count = 0;
  char saveptr[1000];
  strcpy(saveptr, input_text);

  for(int i = 0; saveptr[i]; i++){
    saveptr[i] = tolower(saveptr[i]);
  }

  char separators[] = " ,:!%^&@$.\n";

  char *token = strtok(saveptr, separators);

  int index = 0;
  while(token != NULL) {
    remove_quotation(token);
    if(strlen(token) > MAX_WORD_LENGTH) return EXCESSIVE_LENGTH_WORD;
    int i = check_word(token, words);
    if(i >= 0) {
      words[i].count++;
    } else {
      if(++word_count > MAX_WORDS) return EXCESSIVE_NUMBER_OF_WORDS;
      words[index].count++;
      strcpy(words[index++].text, token);
    }
    token = strtok(NULL, separators);
  }

  return word_count;
}

Community comments

Find this solution interesting? Ask the author a question to learn more.

What can you learn from this solution?

A huge amount can be learned from reading other people’s code. This is why we wanted to give exercism users the option of making their solutions public.

Here are some questions to help you reflect on this solution and learn the most from it.

  • What compromises have been made?
  • Are there new concepts here that you could read more about to improve your understanding?