ocr.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
Read and write numbers to and from a grid representation consisting of
pipes, underscores and spaces.
"""

from itertools import chain, zip_longest

all_digits = (
    " _     _  _     _  _  _  _  _ ",
    "| |  | _| _||_||_ |_   ||_||_|",
    "|_|  ||_  _|  | _||_|  ||_| _|",
    "                              ",
)

DIGIT_WIDTH = 3
DIGIT_HEIGHT = 4
DIGIT_SIZE = DIGIT_WIDTH * DIGIT_HEIGHT


def chunks(iterable, chunk_size):
    """Group an iterable into fixed length chunks"""
    args = [iter(iterable)] * chunk_size
    return zip_longest(*args)


def read_grid(grid):
    """
    Given a grid, return a list of string signatures
    representing the embedded digits
    """
    # Flip it sideways, so the digits can be read in columns.
    sideways_chars = chain(*zip(*grid))

    # Chunk it up into groups
    return (''.join(sig) for sig in chunks(sideways_chars, DIGIT_SIZE))


SIGNATURES = read_grid(all_digits)

# Maps digits 0-9 to their signatures in the grid
DIGIT_LOOKUP = dict(zip(map(str, range(10)), SIGNATURES))

# Maps grid signatures to digits
SIGNATURE_LOOKUP = {v: k for (k, v) in DIGIT_LOOKUP.items()}



def write_grid(signatures):
    """
    Given a list of digit signatures, build up a grid
    """
    # Combine all signatures into one long string
    sig_string = ''.join(signatures)

    # Split it into groups to match the digit height,
    # then flip it and convert each row to a string.
    return [
        ''.join(row)
        for row in zip(*chunks(sig_string, DIGIT_HEIGHT))
    ]

def is_valid_grid(grid):
    """
    Is the grid valid?
    """
    return (
        len(grid) == DIGIT_HEIGHT and
        all(len(row) % DIGIT_WIDTH == 0 for row in grid)
    )


def number(grid):
    """
    Read a string of digits from an ASCII grid representation
    """
    if not is_valid_grid(grid):
        raise ValueError

    return ''.join(
        SIGNATURE_LOOKUP.get(signature, '?')
        for signature in read_grid(grid)
    )


def grid(digits):
    """
    Encode a string of digits into an ASCII grid representation
    """
    if not digits.isdigit():
        raise ValueError

    return write_grid(DIGIT_LOOKUP[digit] for digit in digits)

Comments


You're not logged in right now. Please login via GitHub to comment