164 lines
5.1 KiB
Python
164 lines
5.1 KiB
Python
"""Proquint - pronounceable codings of integers.
|
|
|
|
Implemented from http://arxiv.org/html/0901.4016
|
|
|
|
Quoting -
|
|
|
|
we propose encoding a 16-bit string as a proquint of alternating consonants and vowels as follows.
|
|
|
|
Four-bits as a consonant:
|
|
|
|
0 1 2 3 4 5 6 7 8 9 A B C D E F
|
|
b d f g h j k l m n p r s t v z
|
|
|
|
Two-bits as a vowel:
|
|
|
|
0 1 2 3
|
|
a i o u
|
|
|
|
Whole 16-bit word, where "con" = consonant, "vo" = vowel:
|
|
|
|
0 1 2 3 4 5 6 7 8 9 A B C D E F
|
|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|con |vo |con |vo |con |
|
|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
|
|
Separate proquints using dashes, which can go un-pronounced or be pronounced "eh". The suggested optional magic number prefix to a sequence of proquints is "0q-".
|
|
|
|
Here are some IP dotted-quads and their corresponding proquints.
|
|
|
|
127.0.0.1 lusab-babad
|
|
63.84.220.193 gutih-tugad
|
|
63.118.7.35 gutuk-bisog
|
|
140.98.193.141 mudof-sakat
|
|
64.255.6.200 haguz-biram
|
|
128.30.52.45 mabiv-gibot
|
|
147.67.119.2 natag-lisaf
|
|
212.58.253.68 tibup-zujah
|
|
216.35.68.215 tobog-higil
|
|
216.68.232.21 todah-vobij
|
|
198.81.129.136 sinid-makam
|
|
12.110.110.204 budov-kuras
|
|
|
|
"""
|
|
|
|
from functools import cache
|
|
|
|
|
|
class Proquint(object):
|
|
# Class parameters
|
|
################################################################################################
|
|
CONSONANTS = "bdfghjklmnprstvz"
|
|
VOWELS = "aiou"
|
|
|
|
# Implementation helpers
|
|
################################################################################################
|
|
@classmethod
|
|
@cache
|
|
def _consonant_to_uint(cls, c: str) -> int:
|
|
try:
|
|
return cls.CONSONANTS.index(c)
|
|
except ValueError:
|
|
return
|
|
|
|
@classmethod
|
|
@cache
|
|
def _vowel_to_uint(cls, c: str) -> int:
|
|
try:
|
|
return cls.VOWELS.index(c)
|
|
except ValueError:
|
|
return
|
|
|
|
@classmethod
|
|
def _encode(cls, buffer: bytes) -> str:
|
|
# This is a bit tricky.
|
|
# Proquints are encoded not out of 8bi quantities but out of 16bi quantities.
|
|
#
|
|
# Example from the proposal:
|
|
#
|
|
# 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
|
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
# |con |vo |con |vo |con |
|
|
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
#
|
|
# Now, while this is the bit-order interpretation, note it's left-to-right not right-to-left
|
|
# as english is written. This means that the highest order bits in RTL will be written
|
|
# first, so the chunks are 0xC, 0xA, 0x6, 0x4, 0x0
|
|
for n, m in zip(buffer[0::2], buffer[1::2]):
|
|
# Rebuild the two 8bi pairs into a 16bi chunk
|
|
val = n << 8 | m
|
|
|
|
# This is slightly un-idiomatic, but it precisely captures the coding definition
|
|
yield "".join([
|
|
dict[val >> shift & mask]
|
|
for dict, shift, mask in [
|
|
(cls.CONSONANTS, 0xC, 0xf),
|
|
(cls.VOWELS, 0xA, 0x3),
|
|
(cls.CONSONANTS, 0x6, 0xf),
|
|
(cls.VOWELS, 0x4, 0x3),
|
|
(cls.CONSONANTS, 0x0, 0xf)
|
|
]
|
|
])
|
|
|
|
# Core methods
|
|
################################################################################################
|
|
@classmethod
|
|
def encode_bytes(cls, buffer: bytes) -> str:
|
|
"""Encode a sequence of bytes into a proquint string.
|
|
|
|
>>>
|
|
"""
|
|
|
|
return "-".join(cls._encode(buffer))
|
|
|
|
@classmethod
|
|
def decode(cls, buffer: str) -> int:
|
|
"""Convert proquint string identifier into corresponding 32-bit integer value.
|
|
|
|
>>> hex(Proquint.decode('lusab-babad'))
|
|
'0x7F000001'
|
|
"""
|
|
|
|
res = 0
|
|
|
|
for i, c in enumerate([c for c in buffer if c != "-"]):
|
|
if (mag := cls._consonant_to_uint(c)) is not None:
|
|
res <<= 4
|
|
res += mag
|
|
else:
|
|
mag = cls._vowel_to_uint(c)
|
|
if mag is not None:
|
|
res <<= 2
|
|
res += mag
|
|
elif i != 5:
|
|
raise ValueError("Bad proquint format")
|
|
return res
|
|
|
|
# Handy aliases
|
|
################################################################################################
|
|
@classmethod
|
|
def encode(cls, val: int, width: int):
|
|
"""Encode an integer into a proquint string."""
|
|
|
|
if width % 8 != 0 or width < 8:
|
|
raise ValueError(f"Width must be a positive power of 2 greater than 8")
|
|
|
|
return cls.encode_bytes(val.to_bytes(width // 8, "big"))
|
|
|
|
@classmethod
|
|
def encode_i16(cls, val: int):
|
|
"""Encode a 16bi int to a proquint string."""
|
|
|
|
return cls.encode(val, 16)
|
|
|
|
@classmethod
|
|
def encode_i32(cls, val: int):
|
|
"""Encode a 32bi int to a proquint string."""
|
|
|
|
return cls.encode(val, 32)
|
|
|
|
@classmethod
|
|
def encode_i64(cls, val: int):
|
|
"""Encode a 64bi int into a proquint string."""
|
|
|
|
return cls.encode(val, 64)
|