source/projects/proquint/src/python/proquint.py

164 lines
5.1 KiB
Python

"""Proquint - pronounceable codings of integers.
Implemented from http://arxiv.org/html/0901.4016
Quoting -
we propose encoding a 16-bit string as a proquint of alternating consonants and vowels as follows.
Four-bits as a consonant:
0 1 2 3 4 5 6 7 8 9 A B C D E F
b d f g h j k l m n p r s t v z
Two-bits as a vowel:
0 1 2 3
a i o u
Whole 16-bit word, where "con" = consonant, "vo" = vowel:
0 1 2 3 4 5 6 7 8 9 A B C D E F
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|con |vo |con |vo |con |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Separate proquints using dashes, which can go un-pronounced or be pronounced "eh". The suggested optional magic number prefix to a sequence of proquints is "0q-".
Here are some IP dotted-quads and their corresponding proquints.
127.0.0.1 lusab-babad
63.84.220.193 gutih-tugad
63.118.7.35 gutuk-bisog
140.98.193.141 mudof-sakat
64.255.6.200 haguz-biram
128.30.52.45 mabiv-gibot
147.67.119.2 natag-lisaf
212.58.253.68 tibup-zujah
216.35.68.215 tobog-higil
216.68.232.21 todah-vobij
198.81.129.136 sinid-makam
12.110.110.204 budov-kuras
"""
from functools import cache
class Proquint(object):
# Class parameters
################################################################################################
CONSONANTS = "bdfghjklmnprstvz"
VOWELS = "aiou"
# Implementation helpers
################################################################################################
@classmethod
@cache
def _consonant_to_uint(cls, c: str) -> int:
try:
return cls.CONSONANTS.index(c)
except ValueError:
return
@classmethod
@cache
def _vowel_to_uint(cls, c: str) -> int:
try:
return cls.VOWELS.index(c)
except ValueError:
return
@classmethod
def _encode(cls, buffer: bytes) -> str:
# This is a bit tricky.
# Proquints are encoded not out of 8bi quantities but out of 16bi quantities.
#
# Example from the proposal:
#
# 0 1 2 3 4 5 6 7 8 9 A B C D E F
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# |con |vo |con |vo |con |
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
#
# Now, while this is the bit-order interpretation, note it's left-to-right not right-to-left
# as english is written. This means that the highest order bits in RTL will be written
# first, so the chunks are 0xC, 0xA, 0x6, 0x4, 0x0
for n, m in zip(buffer[0::2], buffer[1::2]):
# Rebuild the two 8bi pairs into a 16bi chunk
val = n << 8 | m
# This is slightly un-idiomatic, but it precisely captures the coding definition
yield "".join([
dict[val >> shift & mask]
for dict, shift, mask in [
(cls.CONSONANTS, 0xC, 0xf),
(cls.VOWELS, 0xA, 0x3),
(cls.CONSONANTS, 0x6, 0xf),
(cls.VOWELS, 0x4, 0x3),
(cls.CONSONANTS, 0x0, 0xf)
]
])
# Core methods
################################################################################################
@classmethod
def encode_bytes(cls, buffer: bytes) -> str:
"""Encode a sequence of bytes into a proquint string.
>>>
"""
return "-".join(cls._encode(buffer))
@classmethod
def decode(cls, buffer: str) -> int:
"""Convert proquint string identifier into corresponding 32-bit integer value.
>>> hex(Proquint.decode('lusab-babad'))
'0x7F000001'
"""
res = 0
for i, c in enumerate([c for c in buffer if c != "-"]):
if (mag := cls._consonant_to_uint(c)) is not None:
res <<= 4
res += mag
else:
mag = cls._vowel_to_uint(c)
if mag is not None:
res <<= 2
res += mag
elif i != 5:
raise ValueError("Bad proquint format")
return res
# Handy aliases
################################################################################################
@classmethod
def encode(cls, val: int, width: int):
"""Encode an integer into a proquint string."""
if width % 8 != 0 or width < 8:
raise ValueError(f"Width must be a positive power of 2 greater than 8")
return cls.encode_bytes(val.to_bytes(width // 8, "big"))
@classmethod
def encode_i16(cls, val: int):
"""Encode a 16bi int to a proquint string."""
return cls.encode(val, 16)
@classmethod
def encode_i32(cls, val: int):
"""Encode a 32bi int to a proquint string."""
return cls.encode(val, 32)
@classmethod
def encode_i64(cls, val: int):
"""Encode a 64bi int into a proquint string."""
return cls.encode(val, 64)