Get proquint documented

2021-08-03 19:04:23 -06:00 · 2021-08-03 19:04:23 -06:00 · abc4b8dddb
commit abc4b8dddb
parent 0bda1dd2f3
7 changed files with 330 additions and 151 deletions
--- a/projects/proquint/BUILD
+++ b/projects/proquint/BUILD
@ -0,0 +1,16 @@
 py_library(
  name = "lib",
  srcs = glob(["src/python/**/*.py"]),
  imports = [
    "src/python",
  ],
 )
 py_pytest(
  name = "test",
  srcs = glob(["test/python/**/*.py"]),
  deps = [
    ":lib",
    py_requirement("hypothesis"),
  ],
 )
--- a/projects/proquint/README.md
+++ b/projects/proquint/README.md
@ -1,3 +1,53 @@
 # Proquint
-An alternative implementation to https://github.com/dsw/proquint/tree/master/python, which is kinda garbo.
+An implementation of [A Proposal for Proquints](https://arxiv.org/html/0901.4016).
 To summarize the paper, traditional decimal and hexadecimal codings are inconvenient for "large" bit-width identifiers.
 Decimal and hexadecimal codings offer no obvious dense enunciation and are traditionally presented without segmentation punctuation.
 The proquint format is a semantically dense coding for 16 bit hunks fitting within the enunciable space of English.
 ## Demo
 ``` python
 >>> from proquint import Proquint
 >>> Proquint.encode_i16(0)
 'babab'
 >>> Proquint.encode_i16(1)
 'babad'
 >>> Proquint.encode_i64(14708250061244963317)
 'subiv-gavab-sobiz-noluj'
 >>> Proquint.decode('babad')
 1
 ```
 ## API Overview
 ### `proquint.Proquint.CONSONANTS`
 A string of consonants to use when encoding or decoding proquints.
 Must be of length 16.
 ### `proquint.Proquint.VOWELS`
 A string of vowels to use when encoding or decoding proquints.
 Must be of length 4.
 ### `proquint.Proquint.decode(buffer: str) -> int`
 Decode a proquint string to an integer value without restriction on bit-width.
 ### `proquint.Proquint.encode(val: int, width: int) -> str`
 Encode an integer into a string which will decode to the same value.
 Note that the bit-width must be specified in order to determine the number of required segments.
 ### `proquint.Proquint.encode_{i16, i32, i64}(val: int) -> str`
 Helpers for encoding known-width quantities.
 ## LICENSE
 Copyright Reid 'arrdem' McKenzie August 2021.
 Published under the terms of the MIT license.
--- a/projects/proquint/proquint.py
+++ b/projects/proquint/proquint.py
@ -1,105 +0,0 @@
 """Proquint - pronounceable codings of integers.
 Implemented from http://arxiv.org/html/0901.4016
 """
 from functools import cache
 class Proquint(object):
    # Class parameters
    ################################################################################################
    CONSONANTS = "bdfghjklmnprstvz"
    VOWELS = "aiou"
    BYTEORDER = "big"
    # Implementation helpers
    ################################################################################################
    @classmethod
    @cache
    def _consonant_to_uint(cls, c: str) -> int:
        if idx := cls.CONSONANTS.index(c) == -1:
            raise KeyError
        return idx
    @classmethod
    @cache
    def _vowel_to_uint(cls, c: str) -> int:
        if idx := cls.VOWELS.index(c) == -1:
            raise KeyError
        return idx
    @classmethod
    def _encode(cls, buffer: bytes) -> str:
        for n, m in zip(buffer[0::2], buffer[1::2]):
            n = n << 16 | m
            c1 = n & 0x0F
            v1 = (n >> 4) & 0x03
            c2 = (n >> 6) & 0x0F
            v2 = (n >> 10) & 0x03
            c3 = (n >> 12) & 0x0F
            yield f"{cls.CONSONANTS[c1]}{cls.VOWELS[v1]}{cls.CONSONANTS[c2]}{cls.VOWELS[v2]}{cls.CONSONANTS[c3]}"
    # Core methods
    ################################################################################################
    @classmethod
    def encode_bytes(cls, buffer: bytes) -> str:
        """Encode a sequence of bytes into a proquint string.
        >>>
        """
        return "-".join(cls._encode(buffer))
    @classmethod
    def decode(cls, buffer: str) -> int:
        """Convert proquint string identifier into corresponding 32-bit integer value.
        >>> hex(Proquint.decode('lusab-babad'))
        '0x7F000001'
        """
        res = 0
        for i, c in enumerate([c for c in buffer if c != '-']):
            if mag := cls._consonant_to_uint(c) is not None:
                res <<= 4
                res += mag
            else:
                mag = cls._vowel_to_uint(c)
                if mag is not None:
                    res <<= 2
                    res += mag
                elif i != 5:
                    raise ValueError('Bad proquint format')
        return res
    # Handy aliases
    ################################################################################################
    @classmethod
    def encode(cls, val: int, width: int, byteorder=BYTEORDER):
        """Encode an integer into a proquint string."""
        if width % 8 != 0 or width < 8:
            raise ValueError(f"Width must be a positive power of 2 greater than 8")
        return cls.encode_bytes(val.to_bytes(width // 8, byteorder))
    @classmethod
    def encode_i16(cls, val: int):
        """Encode a 16bi int to a proquint string."""
        return cls.encode(val, 16)
    @classmethod
    def encode_i32(cls, val: int):
        """Encode a 32bi int to a proquint string."""
        return cls.encode(val, 32)
    @classmethod
    def encode_i64(cls, val: int):
        """Encode a 64bi int into a proquint string."""
        return cls.encode(val, 64)
--- a/projects/proquint/setup.py
+++ b/projects/proquint/setup.py
@ -1,33 +1,18 @@
 """A setuptools based setup module.
 """
 # io.open is needed for projects that support Python 2.7
 # It ensures open() defaults to text mode with universal newlines,
 # and accepts an argument to specify the text encoding
 # Python 3 only projects can skip this import
 from io import open
 from os import path
 # Always prefer setuptools over distutils
 from setuptools import find_packages, setup
 here = path.abspath(path.dirname(__file__))
 # Get the long description from the README file
 with open(path.join(here, "README.md"), encoding="utf-8") as f:
    long_description = f.read()
 # Arguments marked as "Required" below must be included for upload to PyPI.
 # Fields marked as "Optional" may be commented out.
 setup(
-    name="proquint",  # Required
+    name="arrdem.proquint",
-    version="0.1.0",  # Required
+    version="0.1.0",
-    description="Enunciable numerics",
+    description="Enunciable numeric identifiers",
-    long_description=long_description,  # Optional
+    long_description=long_description,
-    long_description_content_type="text/markdown",  # Optional (see note above)
+    long_description_content_type="text/markdown",
    url="https://github.com/arrdem/source",
    author="Reid 'arrdem' McKenzie",
    author_email="me@arrdem.com",
@ -37,32 +22,13 @@ setup(
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Developers",
        "License :: OSI Approved :: BSD License",
-        "Programming Language :: Python :: 3.5",
+        "Programming Language :: Python :: 3.9",
    ],
-    # This field adds keywords for your project which will appear on the
+    packages=[
-    # project page. What does your project relate to?
+        "proquint",
-    #
+    ],
-    # Note that this is a string of words separated by whitespace, not a list.
+    package_dir={"": "src/python"},
-    keywords="sample setuptools development",  # Optional
+    python_requires=">=3.9",
    # You can just specify package directories manually here if your project is
    # simple. Or you can use find_packages().
    #
    # Alternatively, if you just want to distribute a single Python file, use
    # the `py_modules` argument instead as follows, which will expect a file
    # called `my_module.py` to exist:
    #
    #   py_modules=["my_module"],
    #
    packages=find_packages(exclude=["docs", "tests"]),
    python_requires=">=3.5",
    # List additional groups of dependencies here (e.g. development
    # dependencies). Users will be able to install these using the "extras"
    # syntax, for example:
    #
    #   $ pip install sampleproject[dev]
    #
    # Similar to `install_requires` above, these must be valid existing
    # projects.
    extras_require={  # Optional
        "dev": ["check-manifest"],
        "test": ["pytest", "hypothesis"],
--- a/projects/proquint/src/python/proquint.py
+++ b/projects/proquint/src/python/proquint.py
@ -0,0 +1,164 @@
 """Proquint - pronounceable codings of integers.
 Implemented from http://arxiv.org/html/0901.4016
 Quoting -
    we propose encoding a 16-bit string as a proquint of alternating consonants and vowels as follows.
    Four-bits as a consonant:
        0 1 2 3 4 5 6 7 8 9 A B C D E F
        b d f g h j k l m n p r s t v z
    Two-bits as a vowel:
        0 1 2 3
        a i o u
    Whole 16-bit word, where "con" = consonant, "vo" = vowel:
         0 1 2 3 4 5 6 7 8 9 A B C D E F
        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
        |con    |vo |con    |vo |con    |
        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    Separate proquints using dashes, which can go un-pronounced or be pronounced "eh". The suggested optional magic number prefix to a sequence of proquints is "0q-".
    Here are some IP dotted-quads and their corresponding proquints.
        127.0.0.1       lusab-babad
        63.84.220.193   gutih-tugad
        63.118.7.35     gutuk-bisog
        140.98.193.141  mudof-sakat
        64.255.6.200    haguz-biram
        128.30.52.45    mabiv-gibot
        147.67.119.2    natag-lisaf
        212.58.253.68   tibup-zujah
        216.35.68.215   tobog-higil
        216.68.232.21   todah-vobij
        198.81.129.136  sinid-makam
        12.110.110.204  budov-kuras
 """
 from functools import cache
 class Proquint(object):
    # Class parameters
    ################################################################################################
    CONSONANTS = "bdfghjklmnprstvz"
    VOWELS = "aiou"
    # Implementation helpers
    ################################################################################################
    @classmethod
    @cache
    def _consonant_to_uint(cls, c: str) -> int:
        try:
            return cls.CONSONANTS.index(c)
        except ValueError:
            return
    @classmethod
    @cache
    def _vowel_to_uint(cls, c: str) -> int:
        try:
            return cls.VOWELS.index(c)
        except ValueError:
            return
    @classmethod
    def _encode(cls, buffer: bytes) -> str:
        # This is a bit tricky.
        # Proquints are encoded not out of 8bi quantities but out of 16bi quantities.
        #
        # Example from the proposal:
        #
        #      0 1 2 3 4 5 6 7 8 9 A B C D E F
        #      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
        #      |con    |vo |con    |vo |con    |
        #      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
        #
        # Now, while this is the bit-order interpretation, note it's left-to-right not right-to-left
        # as english is written. This means that the highest order bits in RTL will be written
        # first, so the chunks are 0xC, 0xA, 0x6, 0x4, 0x0
        for n, m in zip(buffer[0::2], buffer[1::2]):
            # Rebuild the two 8bi pairs into a 16bi chunk
            val = n << 8 | m
            # This is slightly un-idiomatic, but it precisely captures the coding definition
            yield "".join([
                dict[val >> shift & mask]
                for dict, shift, mask in [
                        (cls.CONSONANTS, 0xC, 0xf),
                        (cls.VOWELS,     0xA, 0x3),
                        (cls.CONSONANTS, 0x6, 0xf),
                        (cls.VOWELS,     0x4, 0x3),
                        (cls.CONSONANTS, 0x0, 0xf)
                ]
            ])
    # Core methods
    ################################################################################################
    @classmethod
    def encode_bytes(cls, buffer: bytes) -> str:
        """Encode a sequence of bytes into a proquint string.
        >>>
        """
        return "-".join(cls._encode(buffer))
    @classmethod
    def decode(cls, buffer: str) -> int:
        """Convert proquint string identifier into corresponding 32-bit integer value.
        >>> hex(Proquint.decode('lusab-babad'))
        '0x7F000001'
        """
        res = 0
        for i, c in enumerate([c for c in buffer if c != '-']):
            if (mag := cls._consonant_to_uint(c)) is not None:
                res <<= 4
                res += mag
            else:
                mag = cls._vowel_to_uint(c)
                if mag is not None:
                    res <<= 2
                    res += mag
                elif i != 5:
                    raise ValueError('Bad proquint format')
        return res
    # Handy aliases
    ################################################################################################
    @classmethod
    def encode(cls, val: int, width: int):
        """Encode an integer into a proquint string."""
        if width % 8 != 0 or width < 8:
            raise ValueError(f"Width must be a positive power of 2 greater than 8")
        return cls.encode_bytes(val.to_bytes(width // 8, "big"))
    @classmethod
    def encode_i16(cls, val: int):
        """Encode a 16bi int to a proquint string."""
        return cls.encode(val, 16)
    @classmethod
    def encode_i32(cls, val: int):
        """Encode a 32bi int to a proquint string."""
        return cls.encode(val, 32)
    @classmethod
    def encode_i64(cls, val: int):
        """Encode a 64bi int into a proquint string."""
        return cls.encode(val, 64)
--- a/projects/proquint/test/python/test_examples.py
+++ b/projects/proquint/test/python/test_examples.py
@ -0,0 +1,57 @@
 """Tests based off of known examples."""
 import proquint
 import pytest
 examples = [
    # Various single-bit data
    (1, 32, "babab-babad"),
    (2, 32, "babab-babaf"),
    (4, 32, "babab-babah"),
    (8, 32, "babab-babam"),
    (16, 32, "babab-babib"),
    (32, 32, "babab-babob"),
    (64, 32, "babab-badab"),
    (128, 32, "babab-bafab"),
    (256, 32, "babab-bahab"),
    (512, 32, "babab-bamab"),
    (1024, 32, "babab-bibab"),
    (2048, 32, "babab-bobab"),
    (4096, 32, "babab-dabab"),
    (8192, 32, "babab-fabab"),
    (16384, 32, "babab-habab"),
    (32768, 32, "babab-mabab"),
    (65536, 32, "babad-babab"),
    (131072, 32, "babaf-babab"),
    (262144, 32, "babah-babab"),
    (524288, 32, "babam-babab"),
    (1048576, 32, "babib-babab"),
    (2097152, 32, "babob-babab"),
    (4194304, 32, "badab-babab"),
    (8388608, 32, "bafab-babab"),
    (16777216, 32, "bahab-babab"),
    (33554432, 32, "bamab-babab"),
    (67108864, 32, "bibab-babab"),
    (134217728, 32, "bobab-babab"),
    (268435456, 32, "dabab-babab"),
    (536870912, 32, "fabab-babab"),
    (1073741824, 32, "habab-babab"),
    (2147483648, 32, "mabab-babab"),
    # A random value
    (3232235536, 32, "safom-babib"),
 ]
@pytest.mark.parametrize('val,width,qint', examples)
 def test_decode_examples(val, width, qint):
    assert proquint.Proquint.decode(qint) == val, f"qint {qint} did not decode"
@pytest.mark.parametrize('val,width,qint', examples)
 def test_encode_examples(val, width, qint):
    encoded_qint = proquint.Proquint.encode(val, width)
    decoded_val = proquint.Proquint.decode(encoded_qint)
    assert encoded_qint == qint, f"did not encode {val} to {qint}; got {encoded_qint} ({decoded_val})"
--- a/projects/proquint/test/python/test_hypothesis.py
+++ b/projects/proquint/test/python/test_hypothesis.py
@ -0,0 +1,31 @@
 """Tests based off of round-tripping randomly generated examples."""
 import proquint
 import pytest
 from hypothesis import given
 from hypothesis.strategies import integers
@given(integers(min_value=0, max_value=1<<16))
 def test_round_trip_16(val):
    assert proquint.Proquint.decode(
        proquint.Proquint.encode(val, 16)) == val
@given(integers(min_value=0, max_value=1<<32))
 def test_round_trip_32(val):
    assert proquint.Proquint.decode(
        proquint.Proquint.encode(val, 32)) == val
@given(integers(min_value=0, max_value=1<<64))
 def test_round_trip_64(val):
    assert proquint.Proquint.decode(
        proquint.Proquint.encode(val, 64)) == val
@given(integers(min_value=0, max_value=1<<512))
 def test_round_trip_512(val):
    assert proquint.Proquint.decode(
        proquint.Proquint.encode(val, 512)) == val