Get proquint documented

2021-08-03 19:04:23 -06:00 · 2021-08-03 19:04:23 -06:00 · abc4b8dddb
commit abc4b8dddb
parent 0bda1dd2f3
7 changed files with 330 additions and 151 deletions
--- a/projects/proquint/BUILD
+++ b/projects/proquint/BUILD
@ -0,0 +1,16 @@
+py_library(
+  name = "lib",
+  srcs = glob(["src/python/**/*.py"]),
+  imports = [
+    "src/python",
+  ],
+)
+
+py_pytest(
+  name = "test",
+  srcs = glob(["test/python/**/*.py"]),
+  deps = [
+    ":lib",
+    py_requirement("hypothesis"),
+  ],
+)
--- a/projects/proquint/README.md
+++ b/projects/proquint/README.md
@ -1,3 +1,53 @@
 # Proquint

-An alternative implementation to https://github.com/dsw/proquint/tree/master/python, which is kinda garbo.
+An implementation of [A Proposal for Proquints](https://arxiv.org/html/0901.4016).
+
+To summarize the paper, traditional decimal and hexadecimal codings are inconvenient for "large" bit-width identifiers.
+Decimal and hexadecimal codings offer no obvious dense enunciation and are traditionally presented without segmentation punctuation.
+The proquint format is a semantically dense coding for 16 bit hunks fitting within the enunciable space of English.
+
+## Demo
+
+``` python
+>>> from proquint import Proquint
+>>> Proquint.encode_i16(0)
+'babab'
+>>> Proquint.encode_i16(1)
+'babad'
+>>> Proquint.encode_i64(14708250061244963317)
+'subiv-gavab-sobiz-noluj'
+>>> Proquint.decode('babad')
+1
+```
+
+## API Overview
+
+### `proquint.Proquint.CONSONANTS`
+
+A string of consonants to use when encoding or decoding proquints.
+Must be of length 16.
+
+### `proquint.Proquint.VOWELS`
+
+A string of vowels to use when encoding or decoding proquints.
+Must be of length 4.
+
+### `proquint.Proquint.decode(buffer: str) -> int`
+
+Decode a proquint string to an integer value without restriction on bit-width.
+
+### `proquint.Proquint.encode(val: int, width: int) -> str`
+
+Encode an integer into a string which will decode to the same value.
+
+Note that the bit-width must be specified in order to determine the number of required segments.
+
+### `proquint.Proquint.encode_{i16, i32, i64}(val: int) -> str`
+
+Helpers for encoding known-width quantities.
+
+## LICENSE
+
+Copyright Reid 'arrdem' McKenzie August 2021.
+
+Published under the terms of the MIT license.
--- a/projects/proquint/proquint.py
+++ b/projects/proquint/proquint.py
@ -1,105 +0,0 @@
-"""Proquint - pronounceable codings of integers.
-
-Implemented from http://arxiv.org/html/0901.4016
-"""
-
-from functools import cache
-
-
-class Proquint(object):
-    # Class parameters
-    ################################################################################################
-    CONSONANTS = "bdfghjklmnprstvz"
-    VOWELS = "aiou"
-    BYTEORDER = "big"
-
-    # Implementation helpers
-    ################################################################################################
-    @classmethod
-    @cache
-    def _consonant_to_uint(cls, c: str) -> int:
-        if idx := cls.CONSONANTS.index(c) == -1:
-            raise KeyError
-        return idx
-
-    @classmethod
-    @cache
-    def _vowel_to_uint(cls, c: str) -> int:
-        if idx := cls.VOWELS.index(c) == -1:
-            raise KeyError
-        return idx
-
-    @classmethod
-    def _encode(cls, buffer: bytes) -> str:
-        for n, m in zip(buffer[0::2], buffer[1::2]):
-            n = n << 16 | m
-            c1 = n & 0x0F
-            v1 = (n >> 4) & 0x03
-            c2 = (n >> 6) & 0x0F
-            v2 = (n >> 10) & 0x03
-            c3 = (n >> 12) & 0x0F
-
-            yield f"{cls.CONSONANTS[c1]}{cls.VOWELS[v1]}{cls.CONSONANTS[c2]}{cls.VOWELS[v2]}{cls.CONSONANTS[c3]}"
-
-    # Core methods
-    ################################################################################################
-    @classmethod
-    def encode_bytes(cls, buffer: bytes) -> str:
-        """Encode a sequence of bytes into a proquint string.
-
-        >>>
-        """
-
-        return "-".join(cls._encode(buffer))
-
-    @classmethod
-    def decode(cls, buffer: str) -> int:
-        """Convert proquint string identifier into corresponding 32-bit integer value.
-
-        >>> hex(Proquint.decode('lusab-babad'))
-        '0x7F000001'
-        """
-
-        res = 0
-
-        for i, c in enumerate([c for c in buffer if c != '-']):
-            if mag := cls._consonant_to_uint(c) is not None:
-                res <<= 4
-                res += mag
-            else:
-                mag = cls._vowel_to_uint(c)
-                if mag is not None:
-                    res <<= 2
-                    res += mag
-                elif i != 5:
-                    raise ValueError('Bad proquint format')
-        return res
-
-    # Handy aliases
-    ################################################################################################
-    @classmethod
-    def encode(cls, val: int, width: int, byteorder=BYTEORDER):
-        """Encode an integer into a proquint string."""
-
-        if width % 8 != 0 or width < 8:
-            raise ValueError(f"Width must be a positive power of 2 greater than 8")
-
-        return cls.encode_bytes(val.to_bytes(width // 8, byteorder))
-
-    @classmethod
-    def encode_i16(cls, val: int):
-        """Encode a 16bi int to a proquint string."""
-
-        return cls.encode(val, 16)
-
-    @classmethod
-    def encode_i32(cls, val: int):
-        """Encode a 32bi int to a proquint string."""
-
-        return cls.encode(val, 32)
-
-    @classmethod
-    def encode_i64(cls, val: int):
-        """Encode a 64bi int into a proquint string."""
-
-        return cls.encode(val, 64)
--- a/projects/proquint/setup.py
+++ b/projects/proquint/setup.py
@ -1,33 +1,18 @@
-"""A setuptools based setup module.
-
-"""
-
-# io.open is needed for projects that support Python 2.7
-# It ensures open() defaults to text mode with universal newlines,
-# and accepts an argument to specify the text encoding
-# Python 3 only projects can skip this import
-from io import open
-from os import path
-
-# Always prefer setuptools over distutils
 from setuptools import find_packages, setup


 here = path.abspath(path.dirname(__file__))

-# Get the long description from the README file
 with open(path.join(here, "README.md"), encoding="utf-8") as f:
    long_description = f.read()

-# Arguments marked as "Required" below must be included for upload to PyPI.
-# Fields marked as "Optional" may be commented out.

 setup(
-    name="proquint",  # Required
-    version="0.1.0",  # Required
-    description="Enunciable numerics",
-    long_description=long_description,  # Optional
-    long_description_content_type="text/markdown",  # Optional (see note above)
+    name="arrdem.proquint",
+    version="0.1.0",
+    description="Enunciable numeric identifiers",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
    url="https://github.com/arrdem/source",
    author="Reid 'arrdem' McKenzie",
    author_email="me@arrdem.com",
@ -37,32 +22,13 @@ setup(
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Developers",
        "License :: OSI Approved :: BSD License",
-        "Programming Language :: Python :: 3.5",
+        "Programming Language :: Python :: 3.9",
    ],
-    # This field adds keywords for your project which will appear on the
-    # project page. What does your project relate to?
-    #
-    # Note that this is a string of words separated by whitespace, not a list.
-    keywords="sample setuptools development",  # Optional
-    # You can just specify package directories manually here if your project is
-    # simple. Or you can use find_packages().
-    #
-    # Alternatively, if you just want to distribute a single Python file, use
-    # the `py_modules` argument instead as follows, which will expect a file
-    # called `my_module.py` to exist:
-    #
-    #   py_modules=["my_module"],
-    #
-    packages=find_packages(exclude=["docs", "tests"]),
-    python_requires=">=3.5",
-    # List additional groups of dependencies here (e.g. development
-    # dependencies). Users will be able to install these using the "extras"
-    # syntax, for example:
-    #
-    #   $ pip install sampleproject[dev]
-    #
-    # Similar to `install_requires` above, these must be valid existing
-    # projects.
+    packages=[
+        "proquint",
+    ],
+    package_dir={"": "src/python"},
+    python_requires=">=3.9",
    extras_require={  # Optional
        "dev": ["check-manifest"],
        "test": ["pytest", "hypothesis"],
--- a/projects/proquint/src/python/proquint.py
+++ b/projects/proquint/src/python/proquint.py
@ -0,0 +1,164 @@
+"""Proquint - pronounceable codings of integers.
+
+Implemented from http://arxiv.org/html/0901.4016
+
+Quoting -
+
+    we propose encoding a 16-bit string as a proquint of alternating consonants and vowels as follows.
+
+    Four-bits as a consonant:
+
+        0 1 2 3 4 5 6 7 8 9 A B C D E F
+        b d f g h j k l m n p r s t v z
+
+    Two-bits as a vowel:
+
+        0 1 2 3
+        a i o u
+
+    Whole 16-bit word, where "con" = consonant, "vo" = vowel:
+
+         0 1 2 3 4 5 6 7 8 9 A B C D E F
+        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+        |con    |vo |con    |vo |con    |
+        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    Separate proquints using dashes, which can go un-pronounced or be pronounced "eh". The suggested optional magic number prefix to a sequence of proquints is "0q-".
+
+    Here are some IP dotted-quads and their corresponding proquints.
+
+        127.0.0.1       lusab-babad
+        63.84.220.193   gutih-tugad
+        63.118.7.35     gutuk-bisog
+        140.98.193.141  mudof-sakat
+        64.255.6.200    haguz-biram
+        128.30.52.45    mabiv-gibot
+        147.67.119.2    natag-lisaf
+        212.58.253.68   tibup-zujah
+        216.35.68.215   tobog-higil
+        216.68.232.21   todah-vobij
+        198.81.129.136  sinid-makam
+        12.110.110.204  budov-kuras
+
+"""
+
+from functools import cache
+
+
+class Proquint(object):
+    # Class parameters
+    ################################################################################################
+    CONSONANTS = "bdfghjklmnprstvz"
+    VOWELS = "aiou"
+
+    # Implementation helpers
+    ################################################################################################
+    @classmethod
+    @cache
+    def _consonant_to_uint(cls, c: str) -> int:
+        try:
+            return cls.CONSONANTS.index(c)
+        except ValueError:
+            return
+
+    @classmethod
+    @cache
+    def _vowel_to_uint(cls, c: str) -> int:
+        try:
+            return cls.VOWELS.index(c)
+        except ValueError:
+            return
+
+    @classmethod
+    def _encode(cls, buffer: bytes) -> str:
+        # This is a bit tricky.
+        # Proquints are encoded not out of 8bi quantities but out of 16bi quantities.
+        #
+        # Example from the proposal:
+        #
+        #      0 1 2 3 4 5 6 7 8 9 A B C D E F
+        #      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+        #      |con    |vo |con    |vo |con    |
+        #      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+        #
+        # Now, while this is the bit-order interpretation, note it's left-to-right not right-to-left
+        # as english is written. This means that the highest order bits in RTL will be written
+        # first, so the chunks are 0xC, 0xA, 0x6, 0x4, 0x0
+        for n, m in zip(buffer[0::2], buffer[1::2]):
+            # Rebuild the two 8bi pairs into a 16bi chunk
+            val = n << 8 | m
+
+            # This is slightly un-idiomatic, but it precisely captures the coding definition
+            yield "".join([
+                dict[val >> shift & mask]
+                for dict, shift, mask in [
+                        (cls.CONSONANTS, 0xC, 0xf),
+                        (cls.VOWELS,     0xA, 0x3),
+                        (cls.CONSONANTS, 0x6, 0xf),
+                        (cls.VOWELS,     0x4, 0x3),
+                        (cls.CONSONANTS, 0x0, 0xf)
+                ]
+            ])
+
+    # Core methods
+    ################################################################################################
+    @classmethod
+    def encode_bytes(cls, buffer: bytes) -> str:
+        """Encode a sequence of bytes into a proquint string.
+
+        >>>
+        """
+
+        return "-".join(cls._encode(buffer))
+
+    @classmethod
+    def decode(cls, buffer: str) -> int:
+        """Convert proquint string identifier into corresponding 32-bit integer value.
+
+        >>> hex(Proquint.decode('lusab-babad'))
+        '0x7F000001'
+        """
+
+        res = 0
+
+        for i, c in enumerate([c for c in buffer if c != '-']):
+            if (mag := cls._consonant_to_uint(c)) is not None:
+                res <<= 4
+                res += mag
+            else:
+                mag = cls._vowel_to_uint(c)
+                if mag is not None:
+                    res <<= 2
+                    res += mag
+                elif i != 5:
+                    raise ValueError('Bad proquint format')
+        return res
+
+    # Handy aliases
+    ################################################################################################
+    @classmethod
+    def encode(cls, val: int, width: int):
+        """Encode an integer into a proquint string."""
+
+        if width % 8 != 0 or width < 8:
+            raise ValueError(f"Width must be a positive power of 2 greater than 8")
+
+        return cls.encode_bytes(val.to_bytes(width // 8, "big"))
+
+    @classmethod
+    def encode_i16(cls, val: int):
+        """Encode a 16bi int to a proquint string."""
+
+        return cls.encode(val, 16)
+
+    @classmethod
+    def encode_i32(cls, val: int):
+        """Encode a 32bi int to a proquint string."""
+
+        return cls.encode(val, 32)
+
+    @classmethod
+    def encode_i64(cls, val: int):
+        """Encode a 64bi int into a proquint string."""
+
+        return cls.encode(val, 64)
--- a/projects/proquint/test/python/test_examples.py
+++ b/projects/proquint/test/python/test_examples.py
@ -0,0 +1,57 @@
+"""Tests based off of known examples."""
+
+import proquint
+
+import pytest
+
+
+examples = [
+    # Various single-bit data
+    (1, 32, "babab-babad"),
+    (2, 32, "babab-babaf"),
+    (4, 32, "babab-babah"),
+    (8, 32, "babab-babam"),
+    (16, 32, "babab-babib"),
+    (32, 32, "babab-babob"),
+    (64, 32, "babab-badab"),
+    (128, 32, "babab-bafab"),
+    (256, 32, "babab-bahab"),
+    (512, 32, "babab-bamab"),
+    (1024, 32, "babab-bibab"),
+    (2048, 32, "babab-bobab"),
+    (4096, 32, "babab-dabab"),
+    (8192, 32, "babab-fabab"),
+    (16384, 32, "babab-habab"),
+    (32768, 32, "babab-mabab"),
+    (65536, 32, "babad-babab"),
+    (131072, 32, "babaf-babab"),
+    (262144, 32, "babah-babab"),
+    (524288, 32, "babam-babab"),
+    (1048576, 32, "babib-babab"),
+    (2097152, 32, "babob-babab"),
+    (4194304, 32, "badab-babab"),
+    (8388608, 32, "bafab-babab"),
+    (16777216, 32, "bahab-babab"),
+    (33554432, 32, "bamab-babab"),
+    (67108864, 32, "bibab-babab"),
+    (134217728, 32, "bobab-babab"),
+    (268435456, 32, "dabab-babab"),
+    (536870912, 32, "fabab-babab"),
+    (1073741824, 32, "habab-babab"),
+    (2147483648, 32, "mabab-babab"),
+
+    # A random value
+    (3232235536, 32, "safom-babib"),
+]
+
+
+@pytest.mark.parametrize('val,width,qint', examples)
+def test_decode_examples(val, width, qint):
+    assert proquint.Proquint.decode(qint) == val, f"qint {qint} did not decode"
+
+
+@pytest.mark.parametrize('val,width,qint', examples)
+def test_encode_examples(val, width, qint):
+    encoded_qint = proquint.Proquint.encode(val, width)
+    decoded_val = proquint.Proquint.decode(encoded_qint)
+    assert encoded_qint == qint, f"did not encode {val} to {qint}; got {encoded_qint} ({decoded_val})"
--- a/projects/proquint/test/python/test_hypothesis.py
+++ b/projects/proquint/test/python/test_hypothesis.py
@ -0,0 +1,31 @@
+"""Tests based off of round-tripping randomly generated examples."""
+
+import proquint
+
+import pytest
+from hypothesis import given
+from hypothesis.strategies import integers
+
+
+@given(integers(min_value=0, max_value=1<<16))
+def test_round_trip_16(val):
+    assert proquint.Proquint.decode(
+        proquint.Proquint.encode(val, 16)) == val
+
+
+@given(integers(min_value=0, max_value=1<<32))
+def test_round_trip_32(val):
+    assert proquint.Proquint.decode(
+        proquint.Proquint.encode(val, 32)) == val
+
+
+@given(integers(min_value=0, max_value=1<<64))
+def test_round_trip_64(val):
+    assert proquint.Proquint.decode(
+        proquint.Proquint.encode(val, 64)) == val
+
+
+@given(integers(min_value=0, max_value=1<<512))
+def test_round_trip_512(val):
+    assert proquint.Proquint.decode(
+        proquint.Proquint.encode(val, 512)) == val