🎉 Exercism Research is now launched. Help Exercism, help science and have some fun at research.exercism.io 🎉
Avatar of Greycoat21

Greycoat21's solution

to Word Count in the Delphi Pascal Track

Published at Feb 01 2020 · 0 comments
Instructions
Test suite
Solution

Given a phrase, count the occurrences of each word in that phrase.

For the purposes of this exercise you can expect that a word will always be one of:

  1. A number composed of one or more ASCII digits (ie "0" or "1234") OR
  2. A simple word composed of one or more ASCII letters (ie "a" or "they") OR
  3. A contraction of two simple words joined by a single apostrophe (ie "it's" or "they're")

When counting words you can assume the following rules:

  1. The count is case insensitive (ie "You", "you", and "YOU" are 3 uses of the same word)
  2. The count is unordered; the tests will ignore how words and counts are ordered
  3. Other than the apostrophe in a contraction all forms of punctuation are ignored
  4. The words can be separated by any form of whitespace (ie "\t", "\n", " ")

For example, for the phrase "That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled. the count would be:

that's: 1
the: 2
password: 2
123: 1
cried: 1
special: 1
agent: 1
so: 1
i: 1
fled: 1

Testing

In order to run the tests for this track, you will need to install DUnitX. Please see the installation instructions for more information.

Loading Exercises into Delphi

If Delphi is properly installed, and *.dpr file types have been associated with Delphi, then double clicking the supplied *.dpr file will start Delphi and load the exercise/project. control + F9 is the keyboard shortcut to compile the project or pressing F9 will compile and run the project.

Alternatively you may opt to start Delphi and load your project via. the File drop down menu.

When Questions Come Up

We monitor the Pascal-Delphi support room on gitter.im to help you with any questions that might arise.

Submitting Exercises

Note that, when trying to submit an exercise, make sure the exercise file you're submitting is in the exercism/delphi/<exerciseName> directory.

For example, if you're submitting ubob.pas for the Bob exercise, the submit command would be something like exercism submit <path_to_exercism_dir>/delphi/bob/ubob.pas.

Source

This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour.

Submitting Incomplete Solutions

It's possible to submit an incomplete solution so you may request help from a mentor.

uWordCountTests.pas

unit uWordCountTests;

interface
uses
  System.Generics.Collections, DUnitX.TestFramework;

const
  CanonicalVersion = '1.3.0';

type

  [TestFixture]
  WordCountTests = class(TObject)
  private
    Expected,
    Actual: TDictionary<String, integer>;
    procedure CompareDictionaries(Expected, Actual: TDictionary<String, integer>);
  public
    [Setup]
    procedure Setup;

    [TearDown]
    procedure TearDown;

    [Test]
    procedure Validate_CompareDictionaries;

    [Test]
//    [Ignore('Comment the "[Ignore]" statement to run the test')]
    procedure Count_one_word;

    [Test]
    [Ignore]
    procedure Count_one_of_each_word;

    [Test]
    [Ignore]
    procedure Multiple_occurrences_of_a_word;

    [Test]
    [Ignore]
    procedure Handles_cramped_lists;

    [Test]
    [Ignore]
    procedure Handles_expanded_lists;

    [Test]
    [Ignore]
    procedure Ignore_punctuation;

    [Test]
    [Ignore]
    procedure Include_numbers;

    [Test]
    [Ignore]
    procedure Normalize_case;

    [Test]
    [Ignore]
    procedure With_apostrophes;

    [Test]
    [Ignore]
    procedure With_quotations;

    [Test]
    [Ignore]
    procedure Multiple_spaces_not_detected_as_a_word;

    [Test]
    [Ignore]
    procedure Alternating_word_separators_not_detected_as_a_word;
  end;

implementation

uses SysUtils, uWordCount;


procedure WordCountTests.CompareDictionaries(Expected, Actual: TDictionary<String, Integer>);
var
  expectedPair: TPair<string, Integer>;
begin
  Assert.AreEqual(Expected.Count, Actual.Count,
    '{Word counts should be equal}');
  for expectedPair in Expected do
  begin
    Assert.IsTrue(Actual.ContainsKey(expectedPair.Key),
      format('Actual doesn''t contain Expected "%s"',[expectedPair.Key]));
    Assert.AreEqual(expectedPair.Value, Actual[expectedPair.Key],
      format('{Expected %s: %d; Actual %s: %d}',
        [expectedPair.Key,
         expectedPair.Value,
         expectedPair.Key,
         Actual[expectedPair.Key]]));
  end;
end;

procedure WordCountTests.Validate_CompareDictionaries;
begin
  Expected.Add('r',5);
  Expected.Add('a',10);
  Expected.Add('n',15);
  Expected.Add('d',20);
  Expected.Add('o',25);
  Expected.Add('m',30);

  actual := TDictionary<String, Integer>.create(expected);

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Alternating_word_separators_not_detected_as_a_word;
begin
  Expected.Add('one',1);
  Expected.Add('two',1);
  Expected.Add('three',1);

  Actual := WordCount(',\n,one,\n ,two \n ''three''').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Count_one_word;
begin
  Expected.Add('word',1);

  Actual := WordCount('word').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Count_one_of_each_word;
begin
  Expected.Add('one',1);
  Expected.Add('of',1);
  Expected.Add('each',1);

  Actual :=  WordCount('one of each').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Multiple_occurrences_of_a_word;
begin
  Expected.Add('one',1);
  Expected.Add('fish',4);
  Expected.Add('two',1);
  Expected.Add('red',1);
  Expected.Add('blue',1);

  Actual := WordCount('one fish two fish red fish blue fish').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Handles_cramped_lists;
begin
  Expected.Add('one',1);
  Expected.Add('two',1);
  Expected.Add('three',1);

  Actual := WordCount('one,two,three').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Handles_expanded_lists;
begin
  Expected.Add('one',1);
  Expected.Add('two',1);
  Expected.Add('three',1);

  Actual := WordCount('one,\ntwo,\nthree').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Ignore_punctuation;
begin
  Expected.Add('car',1);
  Expected.Add('carpet',1);
  Expected.Add('as',1);
  Expected.Add('java',1);
  Expected.Add('javascript',1);

  Actual := WordCount('car: carpet as java: javascript!!&@$%^&').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Include_numbers;
begin
  Expected.Add('testing',2);
  Expected.Add('1',1);
  Expected.Add('2',1);

  Actual := WordCount('testing, 1, 2 testing').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Normalize_case;
begin
  Expected.Add('go',3);
  Expected.Add('stop',2);

  Actual := WordCount('go Go GO Stop stop').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Setup;
begin
  Expected := TDictionary<String, integer>.Create;
end;

procedure WordCountTests.TearDown;
begin
  Expected.DisposeOf;
  Actual.DisposeOf;
end;

procedure WordCountTests.With_apostrophes;
begin
  Expected.Add('first',1);
  Expected.Add('don''t',2);
  Expected.Add('laugh',1);
  Expected.Add('then',1);
  Expected.Add('cry',1);

  Actual := WordCount('First: don''t laugh. Then: don''t cry.').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.With_quotations;
begin
  Expected.Add('joe',1);
  Expected.Add('can''t',1);
  Expected.Add('tell',1);
  Expected.Add('between',1);
  Expected.Add('large',2);
  Expected.Add('and',1);

  Actual := WordCount('Joe can''t tell between ''large'' and large').countWords;

  CompareDictionaries(Expected, Actual);
end;

procedure WordCountTests.Multiple_spaces_not_detected_as_a_word;
begin
  Expected.Add('multiple',1);
  Expected.Add('whitespaces',1);

  Actual := WordCount(' multiple   whitespaces').countWords;

  CompareDictionaries(Expected, Actual);
end;

initialization
  TDUnitX.RegisterTestFixture(WordCountTests);
end.
unit uWordCount;

interface

uses
  Generics.Collections;

type
  IWordCount = interface
    function CountWords: TDictionary<String, integer>;
  end;

function WordCount(Str: string): IWordCount;

implementation

uses
  SysUtils,
  Character;

type
  TMap = TDictionary<string, Integer>;

  TWordCount = class(TInterfacedObject, IWordCount)
  private
    Str: string;
  public
    function CountWords: TMap;
    constructor Create(Str: string); overload;
  end;

function WordCount(Str: string): IWordCount;
begin
  Result := TWordCount.Create(Str);
end;

{ TWordCount }

procedure AddWordToMap(var Word: string; const Map: TMap);
var
  Count: Integer;
begin
  if Map.TryGetValue(Word, Count) then
    Map.AddOrSetValue(Word, Count + 1)
  else
    Map.Add(Word, 1);

  Word := '';
end;

function IsEscapeChar(Chr: Char): Boolean;
const
  cEscapeChar = '\';
begin
  Result := Chr = cEscapeChar;
end;
function IsQuoteChar(Chr: Char): Boolean;
const
  cQuoteChar = '''';
begin
  Result := Chr = cQuoteChar;
end;
function IsWordSepChar(Chr: Char): Boolean;
begin
  Result := not Chr.IsLetterOrDigit
    and not IsQuoteChar(Chr);
end;

function IsIgnoredQuote(c1, c2, c3: Char): Boolean;
begin
  Result :=
    { 'c}
    (    IsWordSepChar(c1)
     and IsQuoteChar(c2)
     and not IsWordSepChar(c3))
    or
    {c' }
    (not IsWordSepChar(c1)
     and IsQuoteChar(c2)
     and IsWordSepChar(c3));
end;

function CleanString(Str: string): string;
var
  i: Integer;
  Chr: Char;
begin
  Result := '';
  for i := Low(Str) to High(Str) do begin

    Chr := Str[i].ToLower;

    // Skip any escaped characters
    if IsEscapeChar(Chr) or IsEscapeChar(Str[i-1])
    // Skip quotes around words
    or IsIgnoredQuote(Str[i-1], Chr, Str[i+1])
    or IsWordSepChar(Chr) then begin
      Result := Result + ' ';
    end else if not Chr.IsWhiteSpace
    or (Chr.IsWhiteSpace
      and not Result[i-1].IsWhiteSpace) 
    then
      Result := Result + Chr;
  end;

  Result.Trim;
end;

function TWordCount.CountWords: TMap;
var
  i: Integer;
  Chr: Char;
  IsNewWord: Boolean;
  Word: string;
begin
  Result := TMap.Create;
  Word := '';
  IsNewWord := False;

  Str := CleanString(Str);
  for i := Low(Str) to High(Str) do begin

    Chr := Str[i].ToLower;

    IsNewWord := IsWordSepChar(Chr)
         and not IsWordSepChar(Str[i-1]);

    if IsNewWord then begin
      AddWordToMap(Word, Result);
    end else if Chr.IsLetterOrDigit or IsQuoteChar(Chr) then
      Word := Word + Chr;
  end;

  // Add Final Word
  if not IsNewWord and (Length(Word) <> 0) then begin
    AddWordToMap(Word, Result);
  end;
end;

constructor TWordCount.Create(Str: string);
begin
  Self.Str := Str;
end;

end.

Community comments

Find this solution interesting? Ask the author a question to learn more.

Greycoat21's Reflection

While this code passes the tests, I have begun to doubt this approach on account of the complexity/interactions of the logic involved. This would be simpler with RegEx, and/or some kind of compound string split.