diff --git a/projects/proquint/BUILD b/projects/proquint/BUILD new file mode 100644 index 0000000..f3fceeb --- /dev/null +++ b/projects/proquint/BUILD @@ -0,0 +1,16 @@ +py_library( + name = "lib", + srcs = glob(["src/python/**/*.py"]), + imports = [ + "src/python", + ], +) + +py_pytest( + name = "test", + srcs = glob(["test/python/**/*.py"]), + deps = [ + ":lib", + py_requirement("hypothesis"), + ], +) diff --git a/projects/proquint/README.md b/projects/proquint/README.md index f7fea94..d16fa68 100644 --- a/projects/proquint/README.md +++ b/projects/proquint/README.md @@ -1,3 +1,53 @@ # Proquint -An alternative implementation to https://github.com/dsw/proquint/tree/master/python, which is kinda garbo. +An implementation of [A Proposal for Proquints](https://arxiv.org/html/0901.4016). + +To summarize the paper, traditional decimal and hexadecimal codings are inconvenient for "large" bit-width identifiers. +Decimal and hexadecimal codings offer no obvious dense enunciation and are traditionally presented without segmentation punctuation. +The proquint format is a semantically dense coding for 16 bit hunks fitting within the enunciable space of English. + +## Demo + +``` python +>>> from proquint import Proquint +>>> Proquint.encode_i16(0) +'babab' +>>> Proquint.encode_i16(1) +'babad' +>>> Proquint.encode_i64(14708250061244963317) +'subiv-gavab-sobiz-noluj' +>>> Proquint.decode('babad') +1 +``` + +## API Overview + +### `proquint.Proquint.CONSONANTS` + +A string of consonants to use when encoding or decoding proquints. +Must be of length 16. + +### `proquint.Proquint.VOWELS` + +A string of vowels to use when encoding or decoding proquints. +Must be of length 4. + +### `proquint.Proquint.decode(buffer: str) -> int` + +Decode a proquint string to an integer value without restriction on bit-width. + +### `proquint.Proquint.encode(val: int, width: int) -> str` + +Encode an integer into a string which will decode to the same value. + +Note that the bit-width must be specified in order to determine the number of required segments. + +### `proquint.Proquint.encode_{i16, i32, i64}(val: int) -> str` + +Helpers for encoding known-width quantities. + +## LICENSE + +Copyright Reid 'arrdem' McKenzie August 2021. + +Published under the terms of the MIT license. diff --git a/projects/proquint/proquint.py b/projects/proquint/proquint.py deleted file mode 100644 index 45f9319..0000000 --- a/projects/proquint/proquint.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Proquint - pronounceable codings of integers. - -Implemented from http://arxiv.org/html/0901.4016 -""" - -from functools import cache - - -class Proquint(object): - # Class parameters - ################################################################################################ - CONSONANTS = "bdfghjklmnprstvz" - VOWELS = "aiou" - BYTEORDER = "big" - - # Implementation helpers - ################################################################################################ - @classmethod - @cache - def _consonant_to_uint(cls, c: str) -> int: - if idx := cls.CONSONANTS.index(c) == -1: - raise KeyError - return idx - - @classmethod - @cache - def _vowel_to_uint(cls, c: str) -> int: - if idx := cls.VOWELS.index(c) == -1: - raise KeyError - return idx - - @classmethod - def _encode(cls, buffer: bytes) -> str: - for n, m in zip(buffer[0::2], buffer[1::2]): - n = n << 16 | m - c1 = n & 0x0F - v1 = (n >> 4) & 0x03 - c2 = (n >> 6) & 0x0F - v2 = (n >> 10) & 0x03 - c3 = (n >> 12) & 0x0F - - yield f"{cls.CONSONANTS[c1]}{cls.VOWELS[v1]}{cls.CONSONANTS[c2]}{cls.VOWELS[v2]}{cls.CONSONANTS[c3]}" - - # Core methods - ################################################################################################ - @classmethod - def encode_bytes(cls, buffer: bytes) -> str: - """Encode a sequence of bytes into a proquint string. - - >>> - """ - - return "-".join(cls._encode(buffer)) - - @classmethod - def decode(cls, buffer: str) -> int: - """Convert proquint string identifier into corresponding 32-bit integer value. - - >>> hex(Proquint.decode('lusab-babad')) - '0x7F000001' - """ - - res = 0 - - for i, c in enumerate([c for c in buffer if c != '-']): - if mag := cls._consonant_to_uint(c) is not None: - res <<= 4 - res += mag - else: - mag = cls._vowel_to_uint(c) - if mag is not None: - res <<= 2 - res += mag - elif i != 5: - raise ValueError('Bad proquint format') - return res - - # Handy aliases - ################################################################################################ - @classmethod - def encode(cls, val: int, width: int, byteorder=BYTEORDER): - """Encode an integer into a proquint string.""" - - if width % 8 != 0 or width < 8: - raise ValueError(f"Width must be a positive power of 2 greater than 8") - - return cls.encode_bytes(val.to_bytes(width // 8, byteorder)) - - @classmethod - def encode_i16(cls, val: int): - """Encode a 16bi int to a proquint string.""" - - return cls.encode(val, 16) - - @classmethod - def encode_i32(cls, val: int): - """Encode a 32bi int to a proquint string.""" - - return cls.encode(val, 32) - - @classmethod - def encode_i64(cls, val: int): - """Encode a 64bi int into a proquint string.""" - - return cls.encode(val, 64) diff --git a/projects/proquint/setup.py b/projects/proquint/setup.py index d852561..db64e0a 100644 --- a/projects/proquint/setup.py +++ b/projects/proquint/setup.py @@ -1,33 +1,18 @@ -"""A setuptools based setup module. - -""" - -# io.open is needed for projects that support Python 2.7 -# It ensures open() defaults to text mode with universal newlines, -# and accepts an argument to specify the text encoding -# Python 3 only projects can skip this import -from io import open -from os import path - -# Always prefer setuptools over distutils from setuptools import find_packages, setup here = path.abspath(path.dirname(__file__)) -# Get the long description from the README file with open(path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() -# Arguments marked as "Required" below must be included for upload to PyPI. -# Fields marked as "Optional" may be commented out. setup( - name="proquint", # Required - version="0.1.0", # Required - description="Enunciable numerics", - long_description=long_description, # Optional - long_description_content_type="text/markdown", # Optional (see note above) + name="arrdem.proquint", + version="0.1.0", + description="Enunciable numeric identifiers", + long_description=long_description, + long_description_content_type="text/markdown", url="https://github.com/arrdem/source", author="Reid 'arrdem' McKenzie", author_email="me@arrdem.com", @@ -37,32 +22,13 @@ setup( "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.9", ], - # This field adds keywords for your project which will appear on the - # project page. What does your project relate to? - # - # Note that this is a string of words separated by whitespace, not a list. - keywords="sample setuptools development", # Optional - # You can just specify package directories manually here if your project is - # simple. Or you can use find_packages(). - # - # Alternatively, if you just want to distribute a single Python file, use - # the `py_modules` argument instead as follows, which will expect a file - # called `my_module.py` to exist: - # - # py_modules=["my_module"], - # - packages=find_packages(exclude=["docs", "tests"]), - python_requires=">=3.5", - # List additional groups of dependencies here (e.g. development - # dependencies). Users will be able to install these using the "extras" - # syntax, for example: - # - # $ pip install sampleproject[dev] - # - # Similar to `install_requires` above, these must be valid existing - # projects. + packages=[ + "proquint", + ], + package_dir={"": "src/python"}, + python_requires=">=3.9", extras_require={ # Optional "dev": ["check-manifest"], "test": ["pytest", "hypothesis"], diff --git a/projects/proquint/src/python/proquint.py b/projects/proquint/src/python/proquint.py new file mode 100644 index 0000000..2293c3f --- /dev/null +++ b/projects/proquint/src/python/proquint.py @@ -0,0 +1,164 @@ +"""Proquint - pronounceable codings of integers. + +Implemented from http://arxiv.org/html/0901.4016 + +Quoting - + + we propose encoding a 16-bit string as a proquint of alternating consonants and vowels as follows. + + Four-bits as a consonant: + + 0 1 2 3 4 5 6 7 8 9 A B C D E F + b d f g h j k l m n p r s t v z + + Two-bits as a vowel: + + 0 1 2 3 + a i o u + + Whole 16-bit word, where "con" = consonant, "vo" = vowel: + + 0 1 2 3 4 5 6 7 8 9 A B C D E F + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |con |vo |con |vo |con | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Separate proquints using dashes, which can go un-pronounced or be pronounced "eh". The suggested optional magic number prefix to a sequence of proquints is "0q-". + + Here are some IP dotted-quads and their corresponding proquints. + + 127.0.0.1 lusab-babad + 63.84.220.193 gutih-tugad + 63.118.7.35 gutuk-bisog + 140.98.193.141 mudof-sakat + 64.255.6.200 haguz-biram + 128.30.52.45 mabiv-gibot + 147.67.119.2 natag-lisaf + 212.58.253.68 tibup-zujah + 216.35.68.215 tobog-higil + 216.68.232.21 todah-vobij + 198.81.129.136 sinid-makam + 12.110.110.204 budov-kuras + +""" + +from functools import cache + + +class Proquint(object): + # Class parameters + ################################################################################################ + CONSONANTS = "bdfghjklmnprstvz" + VOWELS = "aiou" + + # Implementation helpers + ################################################################################################ + @classmethod + @cache + def _consonant_to_uint(cls, c: str) -> int: + try: + return cls.CONSONANTS.index(c) + except ValueError: + return + + @classmethod + @cache + def _vowel_to_uint(cls, c: str) -> int: + try: + return cls.VOWELS.index(c) + except ValueError: + return + + @classmethod + def _encode(cls, buffer: bytes) -> str: + # This is a bit tricky. + # Proquints are encoded not out of 8bi quantities but out of 16bi quantities. + # + # Example from the proposal: + # + # 0 1 2 3 4 5 6 7 8 9 A B C D E F + # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + # |con |vo |con |vo |con | + # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + # + # Now, while this is the bit-order interpretation, note it's left-to-right not right-to-left + # as english is written. This means that the highest order bits in RTL will be written + # first, so the chunks are 0xC, 0xA, 0x6, 0x4, 0x0 + for n, m in zip(buffer[0::2], buffer[1::2]): + # Rebuild the two 8bi pairs into a 16bi chunk + val = n << 8 | m + + # This is slightly un-idiomatic, but it precisely captures the coding definition + yield "".join([ + dict[val >> shift & mask] + for dict, shift, mask in [ + (cls.CONSONANTS, 0xC, 0xf), + (cls.VOWELS, 0xA, 0x3), + (cls.CONSONANTS, 0x6, 0xf), + (cls.VOWELS, 0x4, 0x3), + (cls.CONSONANTS, 0x0, 0xf) + ] + ]) + + # Core methods + ################################################################################################ + @classmethod + def encode_bytes(cls, buffer: bytes) -> str: + """Encode a sequence of bytes into a proquint string. + + >>> + """ + + return "-".join(cls._encode(buffer)) + + @classmethod + def decode(cls, buffer: str) -> int: + """Convert proquint string identifier into corresponding 32-bit integer value. + + >>> hex(Proquint.decode('lusab-babad')) + '0x7F000001' + """ + + res = 0 + + for i, c in enumerate([c for c in buffer if c != '-']): + if (mag := cls._consonant_to_uint(c)) is not None: + res <<= 4 + res += mag + else: + mag = cls._vowel_to_uint(c) + if mag is not None: + res <<= 2 + res += mag + elif i != 5: + raise ValueError('Bad proquint format') + return res + + # Handy aliases + ################################################################################################ + @classmethod + def encode(cls, val: int, width: int): + """Encode an integer into a proquint string.""" + + if width % 8 != 0 or width < 8: + raise ValueError(f"Width must be a positive power of 2 greater than 8") + + return cls.encode_bytes(val.to_bytes(width // 8, "big")) + + @classmethod + def encode_i16(cls, val: int): + """Encode a 16bi int to a proquint string.""" + + return cls.encode(val, 16) + + @classmethod + def encode_i32(cls, val: int): + """Encode a 32bi int to a proquint string.""" + + return cls.encode(val, 32) + + @classmethod + def encode_i64(cls, val: int): + """Encode a 64bi int into a proquint string.""" + + return cls.encode(val, 64) diff --git a/projects/proquint/test/python/test_examples.py b/projects/proquint/test/python/test_examples.py new file mode 100644 index 0000000..e700a6b --- /dev/null +++ b/projects/proquint/test/python/test_examples.py @@ -0,0 +1,57 @@ +"""Tests based off of known examples.""" + +import proquint + +import pytest + + +examples = [ + # Various single-bit data + (1, 32, "babab-babad"), + (2, 32, "babab-babaf"), + (4, 32, "babab-babah"), + (8, 32, "babab-babam"), + (16, 32, "babab-babib"), + (32, 32, "babab-babob"), + (64, 32, "babab-badab"), + (128, 32, "babab-bafab"), + (256, 32, "babab-bahab"), + (512, 32, "babab-bamab"), + (1024, 32, "babab-bibab"), + (2048, 32, "babab-bobab"), + (4096, 32, "babab-dabab"), + (8192, 32, "babab-fabab"), + (16384, 32, "babab-habab"), + (32768, 32, "babab-mabab"), + (65536, 32, "babad-babab"), + (131072, 32, "babaf-babab"), + (262144, 32, "babah-babab"), + (524288, 32, "babam-babab"), + (1048576, 32, "babib-babab"), + (2097152, 32, "babob-babab"), + (4194304, 32, "badab-babab"), + (8388608, 32, "bafab-babab"), + (16777216, 32, "bahab-babab"), + (33554432, 32, "bamab-babab"), + (67108864, 32, "bibab-babab"), + (134217728, 32, "bobab-babab"), + (268435456, 32, "dabab-babab"), + (536870912, 32, "fabab-babab"), + (1073741824, 32, "habab-babab"), + (2147483648, 32, "mabab-babab"), + + # A random value + (3232235536, 32, "safom-babib"), +] + + +@pytest.mark.parametrize('val,width,qint', examples) +def test_decode_examples(val, width, qint): + assert proquint.Proquint.decode(qint) == val, f"qint {qint} did not decode" + + +@pytest.mark.parametrize('val,width,qint', examples) +def test_encode_examples(val, width, qint): + encoded_qint = proquint.Proquint.encode(val, width) + decoded_val = proquint.Proquint.decode(encoded_qint) + assert encoded_qint == qint, f"did not encode {val} to {qint}; got {encoded_qint} ({decoded_val})" diff --git a/projects/proquint/test/python/test_hypothesis.py b/projects/proquint/test/python/test_hypothesis.py new file mode 100644 index 0000000..e3c82fa --- /dev/null +++ b/projects/proquint/test/python/test_hypothesis.py @@ -0,0 +1,31 @@ +"""Tests based off of round-tripping randomly generated examples.""" + +import proquint + +import pytest +from hypothesis import given +from hypothesis.strategies import integers + + +@given(integers(min_value=0, max_value=1<<16)) +def test_round_trip_16(val): + assert proquint.Proquint.decode( + proquint.Proquint.encode(val, 16)) == val + + +@given(integers(min_value=0, max_value=1<<32)) +def test_round_trip_32(val): + assert proquint.Proquint.decode( + proquint.Proquint.encode(val, 32)) == val + + +@given(integers(min_value=0, max_value=1<<64)) +def test_round_trip_64(val): + assert proquint.Proquint.decode( + proquint.Proquint.encode(val, 64)) == val + + +@given(integers(min_value=0, max_value=1<<512)) +def test_round_trip_512(val): + assert proquint.Proquint.decode( + proquint.Proquint.encode(val, 512)) == val