🎉 Exercism Research is now launched. Help Exercism, help science and have some fun at research.exercism.io 🎉
Avatar of vlzware

vlzware's solution

to Anagram in the C Track

Published at Jul 13 2018 · 0 comments
Instructions
Test suite
Solution

Note:

This solution was written on an old version of Exercism. The tests below might not correspond to the solution code, and the exercise may have changed since this code was written.

Given a word and a list of possible anagrams, select the correct sublist.

Given "listen" and a list of candidates like "enlists" "google" "inlets" "banana" the program should return a list containing "inlets".

Getting Started

Make sure you have read the C page on the Exercism site. This covers the basic information on setting up the development environment expected by the exercises.

Passing the Tests

Get the first test compiling, linking and passing by following the three rules of test-driven development.

The included makefile can be used to create and run the tests using the test task.

make test

Create just the functions you need to satisfy any compiler errors and get the test to fail. Then write just enough code to get the test to pass. Once you've done that, move onto the next test.

As you progress through the tests, take the time to refactor your implementation for readability and expressiveness and then go on to the next test.

Try to use standard C99 facilities in preference to writing your own low-level algorithms or facilities by hand.

Source

Inspired by the Extreme Startup game https://github.com/rchatley/extreme_startup

Submitting Incomplete Solutions

It's possible to submit an incomplete solution so you can see how others have completed the exercise.

test_anagram.c

#include <stdlib.h>
#include <string.h>

#include "vendor/unity.h"
#include "../src/anagram.h"

#define MAX_STR_LEN 20

struct candidates candidates;

void setUp(void)
{
}

void tearDown(void)
{
   free(candidates.candidate);
}

static struct candidates build_candidates(char *inputs, size_t count)
{
   struct candidates result;
   result.count = count;
   result.candidate = malloc(sizeof(struct candidate) * count);
   for (int i = 0; i < (int)count; i++) {
      result.candidate[i].candidate = &inputs[i * MAX_STR_LEN];
      result.candidate[i].is_anagram = UNCHECKED;
   }

   return result;
}

static void assert_correct_anagrams(struct candidates *candidates,
                                    enum anagram_status expected[])
{
   for (int i = 0; i < (int)candidates->count; i++) {
      TEST_ASSERT_EQUAL(expected[i], candidates->candidate[i].is_anagram);
   }
}

void test_no_matches(void)
{
   char inputs[][MAX_STR_LEN] = {
      "hello",
      "world",
      "zombies",
      "pants"
   };

   char word[] = { "diaper" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] =
       { NOT_ANAGRAM, NOT_ANAGRAM, NOT_ANAGRAM, NOT_ANAGRAM };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);
}

void test_detect_simple_anagram(void)
{
   TEST_IGNORE();               // delete this line to run test
   char inputs[][MAX_STR_LEN] = {
      "tan",
      "stand",
      "at"
   };

   char word[] = { "ant" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] = { IS_ANAGRAM, NOT_ANAGRAM, NOT_ANAGRAM };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);

}

void test_does_not_confuse_different_duplicates(void)
{
   TEST_IGNORE();
   char inputs[][MAX_STR_LEN] = {
      "eagle"
   };

   char word[] = { "galea" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] = { NOT_ANAGRAM };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);
}

void test_eliminate_anagram_subsets(void)
{
   TEST_IGNORE();
   char inputs[][MAX_STR_LEN] = {
      "dog",
      "goody"
   };

   char word[] = { "good" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] = { NOT_ANAGRAM, NOT_ANAGRAM };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);
}

void test_detect_anagram(void)
{
   TEST_IGNORE();
   char inputs[][MAX_STR_LEN] = {
      "enlists",
      "google",
      "inlets",
      "banana"
   };

   char word[] = { "listen" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] =
       { NOT_ANAGRAM, NOT_ANAGRAM, IS_ANAGRAM, NOT_ANAGRAM };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);
}

void test_multiple_anagrams(void)
{
   TEST_IGNORE();
   char inputs[][MAX_STR_LEN] = {
      "gallery",
      "ballerina",
      "regally",
      "clergy",
      "largely",
      "leading"
   };

   char word[] = { "allergy" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] =
       { IS_ANAGRAM, NOT_ANAGRAM, IS_ANAGRAM, NOT_ANAGRAM, IS_ANAGRAM,
      NOT_ANAGRAM
   };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);
}

void test_case_insensitive_anagrams(void)
{
   TEST_IGNORE();
   char inputs[][MAX_STR_LEN] = {
      "cashregister",
      "Carthorse",
      "radishes"
   };

   char word[] = { "Orchestra" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] = { NOT_ANAGRAM, IS_ANAGRAM, NOT_ANAGRAM };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);
}

void test_does_not_detect_a_word_as_its_own_anagram(void)
{
   TEST_IGNORE();
   char inputs[][MAX_STR_LEN] = {
      "banana"
   };

   char word[] = { "banana" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] = { NOT_ANAGRAM };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);
}

void test_does_not_detect_a_differently_cased_word_as_its_own_anagram(void)
{
   TEST_IGNORE();
   char inputs[][MAX_STR_LEN] = {
      "bAnana"
   };

   char word[] = { "banana" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] = { NOT_ANAGRAM };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);
}

void test_unicode_anagrams(void)
{
   TEST_IGNORE();               // This is an extra credit test.  Delete this line to accept the challenge
   // These words don't make sense, they're just greek letters cobbled together.
   char inputs[][MAX_STR_LEN] = {
      "ΒΓΑ",
      "ΒΓΔ",
      "γβα"
   };

   char word[] = { "ΑΒΓ" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] = { IS_ANAGRAM, NOT_ANAGRAM, NOT_ANAGRAM };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);
}

void test_misleading_unicode_anagrams(void)
{
   TEST_IGNORE();               //This is an extra credit test, are you up for the challenge
   // Despite what a human might think these words different letters, the input uses Greek A and B
   // while the list of potential anagrams uses Latin A and B.
   char inputs[][MAX_STR_LEN] = {
      "ABΓ"
   };

   char word[] = { "ΑΒΓ" };

   candidates = build_candidates(*inputs, sizeof(inputs) / MAX_STR_LEN);
   enum anagram_status expected[] = { NOT_ANAGRAM };

   anagrams_for(word, &candidates);
   assert_correct_anagrams(&candidates, expected);
}

int main(void)
{
   UnityBegin("anagram.c");

   RUN_TEST(test_no_matches);
   RUN_TEST(test_detect_simple_anagram);
   RUN_TEST(test_does_not_confuse_different_duplicates);
   RUN_TEST(test_eliminate_anagram_subsets);
   RUN_TEST(test_detect_anagram);
   RUN_TEST(test_multiple_anagrams);
   RUN_TEST(test_case_insensitive_anagrams);
   RUN_TEST(test_does_not_detect_a_word_as_its_own_anagram);
   RUN_TEST(test_does_not_detect_a_differently_cased_word_as_its_own_anagram);

   // Bonus points
   RUN_TEST(test_unicode_anagrams);
   RUN_TEST(test_misleading_unicode_anagrams);

   UnityEnd();
   return 0;
}

src/anagram.c

/**
 * Chech for anagrams:
 *
 * O(n) in the best case
 * O(log n) average
 * O(n^2) in the worst case
 *
 * i.e. one pass for each string + quicksort + strcmp
 *
 * A quicker solution is to use two vectors (arrays) of 26 (a-z)
 * but this would not work with unicode, or anything else than a-z
 */

#include "anagram.h"
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

/* get length and convert to lowercase, copy w to ref */
int mstrlen_cp(char* w, char *ref)
{
	int res = 0;
	while (*w) {
		*w = tolower(*w);
		*ref++ = *w;
		res++;
		w++;
	}
	*ref = '\0';

	return res;
}

/* get length and convert to lowercase, compare to ref */
int mstrlen_ref(char* w, char *ref)
{
	int res = 0;
	int same = 0;
	while (*w) {
		*w = tolower(*w);
		if (ref && *ref && (*w == *ref)) {
			same++;
			ref++;
		}
		res++;
		w++;
	}
	if (res == same)
		return -1;

	return res;
}

/* qsort comparing function for char's */
int qsortcmp(const void *a, const void *b)
{
	const char *p = (const char *) a;
	const char *q = (const char *) b;

	return (*p - *q);
}

void anagrams_for(const char *word, struct candidates *candidates)
{
	if ((word == NULL) || (candidates == NULL))
		return;

	char or_word[MAX_STR_LEN];

	const int wlen = mstrlen_cp((char*) word, or_word);
	qsort((void *)word, wlen, 1, qsortcmp);

	int i;
	for (i = 0; i < (int)candidates->count; i++) {
		struct candidate *tmp = &(candidates->candidate[i]);
		int llen = mstrlen_ref((char*) tmp->candidate,
			   (char*) or_word);
		if (llen != wlen) {
			tmp->is_anagram = NOT_ANAGRAM;
		} else {
			qsort((void *)tmp->candidate, llen, 1, qsortcmp);
			int res = strcmp(word, tmp->candidate);
			tmp->is_anagram = res ? NOT_ANAGRAM : IS_ANAGRAM;
		}
	}
}

src/anagram.h

#ifndef ANAGRAM_H
#define ANAGRAM_H
#include <stdlib.h>

#define MAX_STR_LEN 20

enum anagram_status {
   UNCHECKED = -1,
   NOT_ANAGRAM,
   IS_ANAGRAM
};

struct candidate {
   enum anagram_status is_anagram;
   const char *candidate;
};

struct candidates {
   struct candidate *candidate;
   size_t count;
};

/**
 * @description - determines if any of the words in candidate are anagrams
 *                for word.  Word buffer and candidate structures may be modified.
 */
void anagrams_for(const char *word, struct candidates *candidates);

#endif

Community comments

Find this solution interesting? Ask the author a question to learn more.

What can you learn from this solution?

A huge amount can be learned from reading other people’s code. This is why we wanted to give exercism users the option of making their solutions public.

Here are some questions to help you reflect on this solution and learn the most from it.

  • What compromises have been made?
  • Are there new concepts here that you could read more about to improve your understanding?