Get proquint documented
This commit is contained in:
parent
0bda1dd2f3
commit
abc4b8dddb
7 changed files with 330 additions and 151 deletions
16
projects/proquint/BUILD
Normal file
16
projects/proquint/BUILD
Normal file
|
@ -0,0 +1,16 @@
|
|||
py_library(
|
||||
name = "lib",
|
||||
srcs = glob(["src/python/**/*.py"]),
|
||||
imports = [
|
||||
"src/python",
|
||||
],
|
||||
)
|
||||
|
||||
py_pytest(
|
||||
name = "test",
|
||||
srcs = glob(["test/python/**/*.py"]),
|
||||
deps = [
|
||||
":lib",
|
||||
py_requirement("hypothesis"),
|
||||
],
|
||||
)
|
|
@ -1,3 +1,53 @@
|
|||
# Proquint
|
||||
|
||||
An alternative implementation to https://github.com/dsw/proquint/tree/master/python, which is kinda garbo.
|
||||
An implementation of [A Proposal for Proquints](https://arxiv.org/html/0901.4016).
|
||||
|
||||
To summarize the paper, traditional decimal and hexadecimal codings are inconvenient for "large" bit-width identifiers.
|
||||
Decimal and hexadecimal codings offer no obvious dense enunciation and are traditionally presented without segmentation punctuation.
|
||||
The proquint format is a semantically dense coding for 16 bit hunks fitting within the enunciable space of English.
|
||||
|
||||
## Demo
|
||||
|
||||
``` python
|
||||
>>> from proquint import Proquint
|
||||
>>> Proquint.encode_i16(0)
|
||||
'babab'
|
||||
>>> Proquint.encode_i16(1)
|
||||
'babad'
|
||||
>>> Proquint.encode_i64(14708250061244963317)
|
||||
'subiv-gavab-sobiz-noluj'
|
||||
>>> Proquint.decode('babad')
|
||||
1
|
||||
```
|
||||
|
||||
## API Overview
|
||||
|
||||
### `proquint.Proquint.CONSONANTS`
|
||||
|
||||
A string of consonants to use when encoding or decoding proquints.
|
||||
Must be of length 16.
|
||||
|
||||
### `proquint.Proquint.VOWELS`
|
||||
|
||||
A string of vowels to use when encoding or decoding proquints.
|
||||
Must be of length 4.
|
||||
|
||||
### `proquint.Proquint.decode(buffer: str) -> int`
|
||||
|
||||
Decode a proquint string to an integer value without restriction on bit-width.
|
||||
|
||||
### `proquint.Proquint.encode(val: int, width: int) -> str`
|
||||
|
||||
Encode an integer into a string which will decode to the same value.
|
||||
|
||||
Note that the bit-width must be specified in order to determine the number of required segments.
|
||||
|
||||
### `proquint.Proquint.encode_{i16, i32, i64}(val: int) -> str`
|
||||
|
||||
Helpers for encoding known-width quantities.
|
||||
|
||||
## LICENSE
|
||||
|
||||
Copyright Reid 'arrdem' McKenzie August 2021.
|
||||
|
||||
Published under the terms of the MIT license.
|
||||
|
|
|
@ -1,105 +0,0 @@
|
|||
"""Proquint - pronounceable codings of integers.
|
||||
|
||||
Implemented from http://arxiv.org/html/0901.4016
|
||||
"""
|
||||
|
||||
from functools import cache
|
||||
|
||||
|
||||
class Proquint(object):
|
||||
# Class parameters
|
||||
################################################################################################
|
||||
CONSONANTS = "bdfghjklmnprstvz"
|
||||
VOWELS = "aiou"
|
||||
BYTEORDER = "big"
|
||||
|
||||
# Implementation helpers
|
||||
################################################################################################
|
||||
@classmethod
|
||||
@cache
|
||||
def _consonant_to_uint(cls, c: str) -> int:
|
||||
if idx := cls.CONSONANTS.index(c) == -1:
|
||||
raise KeyError
|
||||
return idx
|
||||
|
||||
@classmethod
|
||||
@cache
|
||||
def _vowel_to_uint(cls, c: str) -> int:
|
||||
if idx := cls.VOWELS.index(c) == -1:
|
||||
raise KeyError
|
||||
return idx
|
||||
|
||||
@classmethod
|
||||
def _encode(cls, buffer: bytes) -> str:
|
||||
for n, m in zip(buffer[0::2], buffer[1::2]):
|
||||
n = n << 16 | m
|
||||
c1 = n & 0x0F
|
||||
v1 = (n >> 4) & 0x03
|
||||
c2 = (n >> 6) & 0x0F
|
||||
v2 = (n >> 10) & 0x03
|
||||
c3 = (n >> 12) & 0x0F
|
||||
|
||||
yield f"{cls.CONSONANTS[c1]}{cls.VOWELS[v1]}{cls.CONSONANTS[c2]}{cls.VOWELS[v2]}{cls.CONSONANTS[c3]}"
|
||||
|
||||
# Core methods
|
||||
################################################################################################
|
||||
@classmethod
|
||||
def encode_bytes(cls, buffer: bytes) -> str:
|
||||
"""Encode a sequence of bytes into a proquint string.
|
||||
|
||||
>>>
|
||||
"""
|
||||
|
||||
return "-".join(cls._encode(buffer))
|
||||
|
||||
@classmethod
|
||||
def decode(cls, buffer: str) -> int:
|
||||
"""Convert proquint string identifier into corresponding 32-bit integer value.
|
||||
|
||||
>>> hex(Proquint.decode('lusab-babad'))
|
||||
'0x7F000001'
|
||||
"""
|
||||
|
||||
res = 0
|
||||
|
||||
for i, c in enumerate([c for c in buffer if c != '-']):
|
||||
if mag := cls._consonant_to_uint(c) is not None:
|
||||
res <<= 4
|
||||
res += mag
|
||||
else:
|
||||
mag = cls._vowel_to_uint(c)
|
||||
if mag is not None:
|
||||
res <<= 2
|
||||
res += mag
|
||||
elif i != 5:
|
||||
raise ValueError('Bad proquint format')
|
||||
return res
|
||||
|
||||
# Handy aliases
|
||||
################################################################################################
|
||||
@classmethod
|
||||
def encode(cls, val: int, width: int, byteorder=BYTEORDER):
|
||||
"""Encode an integer into a proquint string."""
|
||||
|
||||
if width % 8 != 0 or width < 8:
|
||||
raise ValueError(f"Width must be a positive power of 2 greater than 8")
|
||||
|
||||
return cls.encode_bytes(val.to_bytes(width // 8, byteorder))
|
||||
|
||||
@classmethod
|
||||
def encode_i16(cls, val: int):
|
||||
"""Encode a 16bi int to a proquint string."""
|
||||
|
||||
return cls.encode(val, 16)
|
||||
|
||||
@classmethod
|
||||
def encode_i32(cls, val: int):
|
||||
"""Encode a 32bi int to a proquint string."""
|
||||
|
||||
return cls.encode(val, 32)
|
||||
|
||||
@classmethod
|
||||
def encode_i64(cls, val: int):
|
||||
"""Encode a 64bi int into a proquint string."""
|
||||
|
||||
return cls.encode(val, 64)
|
|
@ -1,33 +1,18 @@
|
|||
"""A setuptools based setup module.
|
||||
|
||||
"""
|
||||
|
||||
# io.open is needed for projects that support Python 2.7
|
||||
# It ensures open() defaults to text mode with universal newlines,
|
||||
# and accepts an argument to specify the text encoding
|
||||
# Python 3 only projects can skip this import
|
||||
from io import open
|
||||
from os import path
|
||||
|
||||
# Always prefer setuptools over distutils
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
|
||||
here = path.abspath(path.dirname(__file__))
|
||||
|
||||
# Get the long description from the README file
|
||||
with open(path.join(here, "README.md"), encoding="utf-8") as f:
|
||||
long_description = f.read()
|
||||
|
||||
# Arguments marked as "Required" below must be included for upload to PyPI.
|
||||
# Fields marked as "Optional" may be commented out.
|
||||
|
||||
setup(
|
||||
name="proquint", # Required
|
||||
version="0.1.0", # Required
|
||||
description="Enunciable numerics",
|
||||
long_description=long_description, # Optional
|
||||
long_description_content_type="text/markdown", # Optional (see note above)
|
||||
name="arrdem.proquint",
|
||||
version="0.1.0",
|
||||
description="Enunciable numeric identifiers",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/arrdem/source",
|
||||
author="Reid 'arrdem' McKenzie",
|
||||
author_email="me@arrdem.com",
|
||||
|
@ -37,32 +22,13 @@ setup(
|
|||
"Development Status :: 3 - Alpha",
|
||||
"Intended Audience :: Developers",
|
||||
"License :: OSI Approved :: BSD License",
|
||||
"Programming Language :: Python :: 3.5",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
],
|
||||
# This field adds keywords for your project which will appear on the
|
||||
# project page. What does your project relate to?
|
||||
#
|
||||
# Note that this is a string of words separated by whitespace, not a list.
|
||||
keywords="sample setuptools development", # Optional
|
||||
# You can just specify package directories manually here if your project is
|
||||
# simple. Or you can use find_packages().
|
||||
#
|
||||
# Alternatively, if you just want to distribute a single Python file, use
|
||||
# the `py_modules` argument instead as follows, which will expect a file
|
||||
# called `my_module.py` to exist:
|
||||
#
|
||||
# py_modules=["my_module"],
|
||||
#
|
||||
packages=find_packages(exclude=["docs", "tests"]),
|
||||
python_requires=">=3.5",
|
||||
# List additional groups of dependencies here (e.g. development
|
||||
# dependencies). Users will be able to install these using the "extras"
|
||||
# syntax, for example:
|
||||
#
|
||||
# $ pip install sampleproject[dev]
|
||||
#
|
||||
# Similar to `install_requires` above, these must be valid existing
|
||||
# projects.
|
||||
packages=[
|
||||
"proquint",
|
||||
],
|
||||
package_dir={"": "src/python"},
|
||||
python_requires=">=3.9",
|
||||
extras_require={ # Optional
|
||||
"dev": ["check-manifest"],
|
||||
"test": ["pytest", "hypothesis"],
|
||||
|
|
164
projects/proquint/src/python/proquint.py
Normal file
164
projects/proquint/src/python/proquint.py
Normal file
|
@ -0,0 +1,164 @@
|
|||
"""Proquint - pronounceable codings of integers.
|
||||
|
||||
Implemented from http://arxiv.org/html/0901.4016
|
||||
|
||||
Quoting -
|
||||
|
||||
we propose encoding a 16-bit string as a proquint of alternating consonants and vowels as follows.
|
||||
|
||||
Four-bits as a consonant:
|
||||
|
||||
0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
b d f g h j k l m n p r s t v z
|
||||
|
||||
Two-bits as a vowel:
|
||||
|
||||
0 1 2 3
|
||||
a i o u
|
||||
|
||||
Whole 16-bit word, where "con" = consonant, "vo" = vowel:
|
||||
|
||||
0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
|con |vo |con |vo |con |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
|
||||
Separate proquints using dashes, which can go un-pronounced or be pronounced "eh". The suggested optional magic number prefix to a sequence of proquints is "0q-".
|
||||
|
||||
Here are some IP dotted-quads and their corresponding proquints.
|
||||
|
||||
127.0.0.1 lusab-babad
|
||||
63.84.220.193 gutih-tugad
|
||||
63.118.7.35 gutuk-bisog
|
||||
140.98.193.141 mudof-sakat
|
||||
64.255.6.200 haguz-biram
|
||||
128.30.52.45 mabiv-gibot
|
||||
147.67.119.2 natag-lisaf
|
||||
212.58.253.68 tibup-zujah
|
||||
216.35.68.215 tobog-higil
|
||||
216.68.232.21 todah-vobij
|
||||
198.81.129.136 sinid-makam
|
||||
12.110.110.204 budov-kuras
|
||||
|
||||
"""
|
||||
|
||||
from functools import cache
|
||||
|
||||
|
||||
class Proquint(object):
|
||||
# Class parameters
|
||||
################################################################################################
|
||||
CONSONANTS = "bdfghjklmnprstvz"
|
||||
VOWELS = "aiou"
|
||||
|
||||
# Implementation helpers
|
||||
################################################################################################
|
||||
@classmethod
|
||||
@cache
|
||||
def _consonant_to_uint(cls, c: str) -> int:
|
||||
try:
|
||||
return cls.CONSONANTS.index(c)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
@classmethod
|
||||
@cache
|
||||
def _vowel_to_uint(cls, c: str) -> int:
|
||||
try:
|
||||
return cls.VOWELS.index(c)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
@classmethod
|
||||
def _encode(cls, buffer: bytes) -> str:
|
||||
# This is a bit tricky.
|
||||
# Proquints are encoded not out of 8bi quantities but out of 16bi quantities.
|
||||
#
|
||||
# Example from the proposal:
|
||||
#
|
||||
# 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
# |con |vo |con |vo |con |
|
||||
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
#
|
||||
# Now, while this is the bit-order interpretation, note it's left-to-right not right-to-left
|
||||
# as english is written. This means that the highest order bits in RTL will be written
|
||||
# first, so the chunks are 0xC, 0xA, 0x6, 0x4, 0x0
|
||||
for n, m in zip(buffer[0::2], buffer[1::2]):
|
||||
# Rebuild the two 8bi pairs into a 16bi chunk
|
||||
val = n << 8 | m
|
||||
|
||||
# This is slightly un-idiomatic, but it precisely captures the coding definition
|
||||
yield "".join([
|
||||
dict[val >> shift & mask]
|
||||
for dict, shift, mask in [
|
||||
(cls.CONSONANTS, 0xC, 0xf),
|
||||
(cls.VOWELS, 0xA, 0x3),
|
||||
(cls.CONSONANTS, 0x6, 0xf),
|
||||
(cls.VOWELS, 0x4, 0x3),
|
||||
(cls.CONSONANTS, 0x0, 0xf)
|
||||
]
|
||||
])
|
||||
|
||||
# Core methods
|
||||
################################################################################################
|
||||
@classmethod
|
||||
def encode_bytes(cls, buffer: bytes) -> str:
|
||||
"""Encode a sequence of bytes into a proquint string.
|
||||
|
||||
>>>
|
||||
"""
|
||||
|
||||
return "-".join(cls._encode(buffer))
|
||||
|
||||
@classmethod
|
||||
def decode(cls, buffer: str) -> int:
|
||||
"""Convert proquint string identifier into corresponding 32-bit integer value.
|
||||
|
||||
>>> hex(Proquint.decode('lusab-babad'))
|
||||
'0x7F000001'
|
||||
"""
|
||||
|
||||
res = 0
|
||||
|
||||
for i, c in enumerate([c for c in buffer if c != '-']):
|
||||
if (mag := cls._consonant_to_uint(c)) is not None:
|
||||
res <<= 4
|
||||
res += mag
|
||||
else:
|
||||
mag = cls._vowel_to_uint(c)
|
||||
if mag is not None:
|
||||
res <<= 2
|
||||
res += mag
|
||||
elif i != 5:
|
||||
raise ValueError('Bad proquint format')
|
||||
return res
|
||||
|
||||
# Handy aliases
|
||||
################################################################################################
|
||||
@classmethod
|
||||
def encode(cls, val: int, width: int):
|
||||
"""Encode an integer into a proquint string."""
|
||||
|
||||
if width % 8 != 0 or width < 8:
|
||||
raise ValueError(f"Width must be a positive power of 2 greater than 8")
|
||||
|
||||
return cls.encode_bytes(val.to_bytes(width // 8, "big"))
|
||||
|
||||
@classmethod
|
||||
def encode_i16(cls, val: int):
|
||||
"""Encode a 16bi int to a proquint string."""
|
||||
|
||||
return cls.encode(val, 16)
|
||||
|
||||
@classmethod
|
||||
def encode_i32(cls, val: int):
|
||||
"""Encode a 32bi int to a proquint string."""
|
||||
|
||||
return cls.encode(val, 32)
|
||||
|
||||
@classmethod
|
||||
def encode_i64(cls, val: int):
|
||||
"""Encode a 64bi int into a proquint string."""
|
||||
|
||||
return cls.encode(val, 64)
|
57
projects/proquint/test/python/test_examples.py
Normal file
57
projects/proquint/test/python/test_examples.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
"""Tests based off of known examples."""
|
||||
|
||||
import proquint
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
examples = [
|
||||
# Various single-bit data
|
||||
(1, 32, "babab-babad"),
|
||||
(2, 32, "babab-babaf"),
|
||||
(4, 32, "babab-babah"),
|
||||
(8, 32, "babab-babam"),
|
||||
(16, 32, "babab-babib"),
|
||||
(32, 32, "babab-babob"),
|
||||
(64, 32, "babab-badab"),
|
||||
(128, 32, "babab-bafab"),
|
||||
(256, 32, "babab-bahab"),
|
||||
(512, 32, "babab-bamab"),
|
||||
(1024, 32, "babab-bibab"),
|
||||
(2048, 32, "babab-bobab"),
|
||||
(4096, 32, "babab-dabab"),
|
||||
(8192, 32, "babab-fabab"),
|
||||
(16384, 32, "babab-habab"),
|
||||
(32768, 32, "babab-mabab"),
|
||||
(65536, 32, "babad-babab"),
|
||||
(131072, 32, "babaf-babab"),
|
||||
(262144, 32, "babah-babab"),
|
||||
(524288, 32, "babam-babab"),
|
||||
(1048576, 32, "babib-babab"),
|
||||
(2097152, 32, "babob-babab"),
|
||||
(4194304, 32, "badab-babab"),
|
||||
(8388608, 32, "bafab-babab"),
|
||||
(16777216, 32, "bahab-babab"),
|
||||
(33554432, 32, "bamab-babab"),
|
||||
(67108864, 32, "bibab-babab"),
|
||||
(134217728, 32, "bobab-babab"),
|
||||
(268435456, 32, "dabab-babab"),
|
||||
(536870912, 32, "fabab-babab"),
|
||||
(1073741824, 32, "habab-babab"),
|
||||
(2147483648, 32, "mabab-babab"),
|
||||
|
||||
# A random value
|
||||
(3232235536, 32, "safom-babib"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('val,width,qint', examples)
|
||||
def test_decode_examples(val, width, qint):
|
||||
assert proquint.Proquint.decode(qint) == val, f"qint {qint} did not decode"
|
||||
|
||||
|
||||
@pytest.mark.parametrize('val,width,qint', examples)
|
||||
def test_encode_examples(val, width, qint):
|
||||
encoded_qint = proquint.Proquint.encode(val, width)
|
||||
decoded_val = proquint.Proquint.decode(encoded_qint)
|
||||
assert encoded_qint == qint, f"did not encode {val} to {qint}; got {encoded_qint} ({decoded_val})"
|
31
projects/proquint/test/python/test_hypothesis.py
Normal file
31
projects/proquint/test/python/test_hypothesis.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
"""Tests based off of round-tripping randomly generated examples."""
|
||||
|
||||
import proquint
|
||||
|
||||
import pytest
|
||||
from hypothesis import given
|
||||
from hypothesis.strategies import integers
|
||||
|
||||
|
||||
@given(integers(min_value=0, max_value=1<<16))
|
||||
def test_round_trip_16(val):
|
||||
assert proquint.Proquint.decode(
|
||||
proquint.Proquint.encode(val, 16)) == val
|
||||
|
||||
|
||||
@given(integers(min_value=0, max_value=1<<32))
|
||||
def test_round_trip_32(val):
|
||||
assert proquint.Proquint.decode(
|
||||
proquint.Proquint.encode(val, 32)) == val
|
||||
|
||||
|
||||
@given(integers(min_value=0, max_value=1<<64))
|
||||
def test_round_trip_64(val):
|
||||
assert proquint.Proquint.decode(
|
||||
proquint.Proquint.encode(val, 64)) == val
|
||||
|
||||
|
||||
@given(integers(min_value=0, max_value=1<<512))
|
||||
def test_round_trip_512(val):
|
||||
assert proquint.Proquint.decode(
|
||||
proquint.Proquint.encode(val, 512)) == val
|
Loading…
Reference in a new issue