Add bussard as-is

This commit is contained in:
Reid 'arrdem' McKenzie 2021-08-03 08:43:06 -06:00
parent 9b6a3fe164
commit c451d4cb00
11 changed files with 872 additions and 0 deletions

17
projects/bussard/Makefile Normal file
View file

@ -0,0 +1,17 @@
.PHONY: deploy test
src/python/bussard/gen/parser.py: Makefile src/canopy/zonefile.peg
mkdir -p tempdir
cp src/canopy/zonefile.peg tempdir/
canopy --lang=python tempdir/zonefile.peg
which gsed && gsed -i 's/ / /g' tempdir/zonefile.py || sed -i 's/ / /g' tempdir/zonefile.py
which gsed && gsed -i '1s/^/# checkstyle: noqa\n\n"""Generated code.\n\nDo not modify or lint\n"""\n\n/' tempdir/zonefile.py || sed -i '1s/^/# checkstyle: noqa\n\n"""Generated code.\n\nDo not modify or lint\n"""\n\n/' tempdir/zonefile.py
mv tempdir/zonefile.py src/python/bussard/gen/parser.py
src/python/bussard/gen/types.py: Makefile src/canopy/zonefile.peg src/awk/gen_types.awk
awk -f src/awk/gen_types.awk src/canopy/zonefile.peg > src/python/bussard/gen/types.py
all: src/python/bussard/gen/types.py src/python/bussard/gen/parser.py
test: all
pytest

View file

@ -0,0 +1,13 @@
# Bussard
> Sometimes you need an engine that works at the Bottom, near the Slow
Zone when you're crawling along just above the Unthinking Depths. Hard
to beat a ramscoop when it's time to go on ice.
Bussard is a small tooklit for parsing BIND zonefiles hence the
reference to Verner Vinge's ["Zones of Thought"](https://en.wikipedia.org/wiki/A_Fire_Upon_the_Deep)
series.
## Usage
FIXME

31
projects/bussard/setup.py Normal file
View file

@ -0,0 +1,31 @@
from setuptools import setup
setup(
name="arrdem.bussard",
# Package metadata
version="0.0.0",
license="MIT",
description="A DNS zonefile engine",
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
author="Reid 'arrdem' McKenzie",
author_email="me@arrdem.com",
url="https://git.arrdem.com/arrdem/bussard",
classifiers=[
"License :: OSI Approved :: MIT License",
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
],
# Package setup
package_dir={"": "src/python"},
packages=[
"bussard",
],
scripts=[
"src/python/bussard/bfmt",
"src/python/bussard/bparse",
],
)

View file

@ -0,0 +1,36 @@
BEGIN {
print "#!/usr/bin/env python3\n"
print "\"\"\"GENERATED.\n\nRecord types derived from the grammar.\n\"\"\"\n";
print "from typing import NamedTuple, Optional\n\n";
print "class Record(object):\n \"\"\"Base class for DNS records.\"\"\"\n\n"
spacing=""
}
/<-/ {
if (or(("\""$1"\"" == tolower($3)), ("\"$"$1"\"" == tolower($3)))) {
if (spacing)
print spacing;
print "class " toupper($1) "(NamedTuple, Record): # noqa: T000";
# If this isn't $TTL or $ORIGIN, it has a name.
if ($3 !~ /\$/)
print " name: str";
for(i=3;i<=NF;i++) {
if ($i ~ /:/) {
split($i,arr,":")
if ($i ~ /:(word|v[46]address|string)/) {
print " " arr[1] ": str";
} else if ($i ~ /:(num|seconds)/) {
print " " arr[1] ": int"
}
}
}
print " type: str = \"IN\"";
print " ttl: Optional[int] = None";
print " comment: Optional[str] = None";
spacing="\n";
}
}

View file

@ -0,0 +1,74 @@
# A zonefile parser.
#
# Based on RFC 883, RFC 1035
# - Drops WKS per RFC 1123
# - Drops NULL per RFC 1035
# - Drops MG per RFC 2505
# - Drops MR per RFC 2505
# - Drops MINFO per RFC 2505
# + Adds SRV from RFC 2782
# + Adds AAAA from RFC 3596
grammar Zonefile
# Baze zone rule
zone <- _one* %make_zone
_one <- origin / ttl / records / eol # helper for testing
# The origin and TTL special records
origin <- "$ORIGIN" ws name:word comment:eol %make_origin
ttl <- "$TTL" ws ttl:seconds comment:eol %make_ttl
# Base record rule
records <- name:word (_r_repeat / comment / eol)+ %make_records
_r_repeat <- ws (_r_with_ttl / _r_with_type / _r ) ws comment:eol %make_repeat
_r_with_ttl <- ttl:seconds ws (_r_with_type / _r) %make_record_ttl
_r_with_type <- type:"IN" ws (_r_with_ttl / _r) %make_record_type
####################################################################################################
# Record types
# A big alternation of the supported records
_r <- aaaa / a / cname / txt / mx / ns / ptr / soa / srv / rp
# Oh gawd SOAs
soa <- "SOA" ws mname:word ws rname:word ws "(" _ws_ serial:num _ws_ refresh:seconds _ws_ retry:seconds _ws_ expire:seconds _ws_ minimum:seconds _ws_ ")" %make_soa
a <- "A" ws address:v4address %make_a
aaaa <- "AAAA" ws address:v6address %make_aaaa
cname <- "CNAME" ws cname:word %make_cname
mx <- "MX" ws preference:num ws exchange:word %make_mx
ns <- "NS" ws nsdname:word %make_ns
ptr <- "PTR" ws ptrdname:word %make_ptr
txt <- "TXT" ws txt_data:string %make_txt
srv <- "SRV" ws priority:num ws weight:num ws port:num ws target:word %make_srv
rp <- "RP" ws mbox_dname:word ws txt_dname:word %make_rp
####################################################################################################
# Record fragments
# Massively overbroad word regex
word <- [@.*_A-Za-z0-9-]+ %make_word
# num
num <- [\d]+ %make_num
# seconds
seconds <- num sec_unit? %make_seconds
sec_unit <- [WwDdHhMmSs]
# v4address (AKA address in RFC-1035) is a 32bi address
v4address <- num '.' num '.' num '.' num %make_v4
# v6address is a 64bi aka IPV6 address
# This is a garbage, overbroad regex >.>
v6address <- [A-Za-z0-9:]+ %make_v6
string <- '"' [^\"]* '"' %make_string
# Whitespace in various forms
eol <- ws (comment / newline) %make_blank
_ws_ <- eol? ws? %make_blank
blank <- ws? newline %make_blank
comment <- ";" [^\n]* "\n" %make_blank
ws <- [ \t]* %make_blank
newline <- [\n] %make_blank

View file

@ -0,0 +1,182 @@
#!/usr/bin/env python3
import sys
import bussard.gen.types as t # for types
from bussard.reader import read
def format_time(num):
week = (7 * 24 * 60 * 60)
day = (24 * 60 * 60)
hour = (60 * 60)
minute = 60
if num % week == 0:
return f"{num//week}w"
elif num % day == 0:
return f"{num//day}d"
elif num % hour == 0:
return f"{num//hour}h"
elif num % minute == 0:
return f"{num//minute}m"
else:
return f"{num}s"
def format_comment(record):
return record.comment or "\n"
def format_record_name(record, cont=None, soa=None, name_width=None):
name = record.name
if name == soa.name:
name = "@"
if cont and name == cont.name:
name = " " * len(cont.name)
if name_width:
name = name.ljust(name_width)
return name
def format_record_ttl(record, ttl=None):
if ttl and record.ttl == ttl.ttl:
return ""
elif record.ttl:
return f"{record.ttl} "
else:
return ""
def format_record(record, cont=None, soa=None, ttl=None, name_width=None):
"""Given a single record, render it nicely."""
if isinstance(record, t.TTL):
return f"$TTL {format_time(record.ttl)}{format_comment(record)}"
elif isinstance(record, t.ORIGIN):
return f"$ORIGIN {record.name}{format_comment(record)}"
rname = format_record_name(record, cont=cont, soa=soa, name_width=name_width)
prefix = f"{rname} {format_record_ttl(record, ttl=ttl)}{record.type}"
if isinstance(record, t.SOA):
return f"""{prefix} SOA {record.mname} {record.rname} (
{record.serial: <10} ; serial
{format_time(record.refresh): <10} ; refresh after
{format_time(record.retry): <10} ; retry after
{format_time(record.expire): <10} ; expire after
{format_time(record.minimum): <10} ; negative cache
)"""
elif isinstance(record, t.A):
return f"""{prefix} A {record.address: <15}{format_comment(record)}"""
elif isinstance(record, t.AAAA):
return f"""{prefix} AAAA {record.address: <39}{format_comment(record)}"""
elif isinstance(record, t.CNAME):
return f"""{prefix} CNAME {record.cname}{format_comment(record)}"""
elif isinstance(record, t.MX):
return f"""{prefix} MX {record.preference} {record.exchange}{format_comment(record)}"""
elif isinstance(record, t.NS):
return f"""{prefix} NS {record.nsdname}{format_comment(record)}"""
elif isinstance(record, t.PTR):
return f"""{prefix} PTR {record.ptrdname}{format_comment(record)}"""
elif isinstance(record, t.TXT):
return f'''{prefix} TXT "{record.txt_data}"{format_comment(record)}'''
elif isinstance(record, t.SRV):
return f"""{prefix} SRV {record.priority} {record.weight} {record.port} {record.target}{format_comment(record)}"""
elif isinstance(record, t.RP):
return f"""{prefix} RP {record.mbox_dname} {record.txt_data}{format_comment(record)}"""
if __name__ == "__main__":
with open(sys.argv[1], "r") as f:
records = list(read(f.read()))
# Order records preferentially.
# $ORIGIN
# $TTL
# SOA
# $ORIGIN NS
# $ORIGIN MX
# then alphabetically by name.
# one space between groups.
origin = [r for r in records if isinstance(r, t.ORIGIN)]
if origin:
origin = origin[0]
else:
origin = None
ttl = [r for r in records if isinstance(r, t.TTL)]
if ttl:
ttl = ttl[0]
else:
ttl = None
soa = [r for r in records if isinstance(r, t.SOA)]
if soa:
soa = soa[0]
else:
soa = None
if soa and soa.name and not origin:
origin = t.ORIGIN(soa.name)
# Find the global nss and mxs
nss = [r for r in records if isinstance(r, t.NS) and r.name == origin.name]
mxs = [r for r in records if isinstance(r, t.MX) and r.name == origin.name]
# Sort the remaining records and comments
tail = [r for r in records
if (r != origin and r != ttl and r != soa and r not in nss and r not in mxs)]
def name_key(o):
if isinstance(o, str):
# It's a comment, sort by first word
return o.split()[0].replace(";", "").lower()
else:
# It's a record, return the name
return o.name
# Group chunks, preserving the original order.
chunk = []
chunks = [chunk]
for record in tail:
if record != "\n":
chunk.append(record)
elif chunk:
chunk = []
chunks.append(chunk)
# FIXME (arrdem 2020-02-01):
# Split chunks somehow???
# This is where the formater and linter diverge some.
# Now render
if origin.name != "@":
print(format_record(origin).strip())
if ttl:
print(format_record(ttl).strip())
print(format_record(soa, ttl=ttl, soa=soa).rstrip())
for ns in nss:
print(format_record(ns, cont=soa, ttl=ttl, soa=soa).rstrip())
for mx in mxs:
print(format_record(mx, cont=soa, ttl=ttl, soa=soa).rstrip())
for chunk in chunks:
if chunk:
width = max([len(r.name) if hasattr(r, "name") else 0 for r in chunk])
for record in chunk:
if isinstance(record, str):
print(record.rstrip())
else:
print(format_record(record, ttl=ttl, soa=soa, name_width=width).rstrip())
print()

View file

@ -0,0 +1,14 @@
#!/usr/bin/env python3
import sys
import bussard.gen.types as t # for types
from bussard.reader import read
if __name__ == "__main__":
with open(sys.argv[1], "r") as f:
records = list(read(f.read()))
for r in records:
print(r)

View file

@ -0,0 +1,236 @@
#!/usr/bin/env python3
"""A reader, integrating the generated parser and types.
Integrates the generated parser with the types, providing a reasonable way to interface with both
zonefiles through the parser.
"""
from types import LambdaType
from bussard.gen.parser import parse as _parse, Parser # noqa
from bussard.gen.types import * # noqa
def _merge(d1, d2):
res = {}
for k, v in d1.items():
res[k] = v
for k, v in d2.items():
if v is not None:
res[k] = v
return res
class PrintableLambda(object):
def __init__(self, fn, **kwargs):
self._target = fn
self._kwargs = kwargs
def __call__(self, *args, **kwargs):
return self._target(*args, **_merge(kwargs, self._kwargs))
def __repr__(self):
return f"lambda ({self._target!r}, **{self._kwargs!r})"
class Actions:
@staticmethod
def make_zone(_input, _index, _offset, elements):
"""Zones are just a sequence of entries. For now."""
origin = "@" # Preserve the default unless we get an explicit origin
ttl = None
for e in elements:
# $ORIGIN and $TTL set global defaults
if isinstance(e, ORIGIN):
if origin != "@":
raise RuntimeError("$ORIGIN occurs twice!")
origin = e.name
yield e
elif isinstance(e, TTL):
if ttl:
raise RuntimeError("$TTL occurs twice!")
ttl = e.ttl
yield e
# apply bindings to emit records
elif isinstance(e, list):
for fn in e:
if isinstance(fn, (LambdaType, PrintableLambda)):
yield fn(name=origin, ttl=ttl)
else:
yield fn
@staticmethod
def make_origin(_input, _index, _offset, elements):
return ORIGIN(elements[2])
@staticmethod
def make_ttl(_input, _index, _offset, elements):
return TTL(elements[2])
@staticmethod
def make_records(_input, _index, _offset, elements):
name, repetitions = elements
if name == "@":
# We allow make_zone to bind @ to $ORIGIN if present
name = None
return [
PrintableLambda(e, name=name) if isinstance(e, PrintableLambda) else e
for e in repetitions
]
@staticmethod
def make_repeat(input, _index, _offset, elements):
_, record, _, comment = elements
return PrintableLambda(record, comment=comment)
@staticmethod
def make_record_ttl(_input, _index, _offset, elements):
ttl, _, record = elements
return PrintableLambda(record, ttl=ttl)
@staticmethod
def make_record_type(_input, _index, _offset, elements):
type, _, record = elements
return PrintableLambda(record, type=type.text)
##################################################
@staticmethod
def make_a(_input, _index, _offset, elements):
_, _, address = elements
return PrintableLambda(A, address=address)
@staticmethod
def make_aaaa(_input, _index, _offset, elements):
_, _, address = elements
return PrintableLambda(AAAA, address=address)
@staticmethod
def make_cname(_input, _index, _offset, elements):
_, _, cname = elements
return PrintableLambda(CNAME, cname=cname)
@staticmethod
def make_mx(_input, _index, _offset, elements):
_, _, preference, _, mx = elements
return PrintableLambda(MX, preference=preference, exchange=mx)
@staticmethod
def make_ns(_input, _index, _offset, elements):
_, _, ns = elements
return PrintableLambda(NS, nsdname=ns)
@staticmethod
def make_soa(_input, _index, _offset, elements):
(
_,
_,
mname,
_,
rname,
_,
_,
_,
serial,
_,
refresh,
_,
retry,
_,
expire,
_,
minimum,
_,
_,
) = elements
return PrintableLambda(
SOA,
mname=mname,
rname=rname,
serial=serial,
refresh=refresh,
retry=retry,
expire=expire,
minimum=minimum,
)
@staticmethod
def make_srv(_input, _index, _offset, elements):
_, _, priority, _, weight, _, port, _, target = elements
return PrintableLambda(
SRV, priority=priority, weight=weight, port=port, target=target
)
@staticmethod
def make_txt(_input, _index, _offset, elements):
_, _, txt_data = elements
return PrintableLambda(TXT, txt_data=txt_data)
@staticmethod
def make_ptr(_input, _index, _offset, elements):
_, _, ptrdname = elements
return PrintableLambda(PTR, ptrdname=ptrdname)
@staticmethod
def make_rp(_input, _index, _offset, elements):
_, _, mbox_dname, _, txt_dname = elements
return PrintableLambda(RP, mbox_dname=mbox_dname, txt_dname=txt_dname)
@staticmethod
def make_string(input, start, end, _elements):
return input[start + 1 : end - 1]
##################################################
@staticmethod
def make_word(_input, _index, _offset, elements):
"""Words have many elements, but we want their whole text."""
return "".join(e.text for e in elements).lower() # Uppercase is a lie in DNS
@staticmethod
def make_num(input, start, end, _elements):
return int(input[start:end], 10)
@staticmethod
def make_seconds(_input, _, _end, elements):
base = elements[0]
factor = 1
unit = elements[1].text.lower()
if len(elements) == 2 and unit:
factor = {
"s": 1,
"m": 60,
"h": 60 * 60,
"d": 24 * 60 * 60,
"w": 7 * 24 * 60 * 60,
}[unit]
return base * factor
@staticmethod
def make_v4(input, start, end, _elements):
return input[start:end]
@staticmethod
def make_v6(input, start, end, _elements):
return input[start:end]
@staticmethod
def make_blank(input, start, end, *_):
return input[start:end]
def read(input):
"""Read an entire zonefile, returning an AST for it which contains formatting information."""
return _parse(input, actions=Actions())
def read1(input):
"""Read a single record as if it were part or a zonefile.
Really just for testing.
"""
return next(read(input))

View file

@ -0,0 +1,269 @@
"""
Tests of the Bussard reader.
"""
import bussard.reader as t
from bussard.reader import Actions, Parser, read, read1
def parse_word(input):
return Parser(input, Actions(), None)._read_word()
def test_parse_name():
assert "foo" == parse_word("foo")
assert "foo" == parse_word("foo bar")
assert "foo" == parse_word("foo bar")
assert "foo-bar" == parse_word("foo-bar")
assert "*" == parse_word("*")
assert "*.foo" == parse_word("*.foo")
def test_read_ttl():
assert isinstance(
read1(
"""$TTL 300
"""
),
t.TTL,
)
def test_read_origin():
assert isinstance(
read1(
"""$ORIGIN foobar.org
"""
),
t.ORIGIN,
)
def test_read_soa():
"""Test a couple of SOA cases, exercising both parsing and formatting."""
# Basically no formatting.
assert isinstance(
read1(
"""@ IN SOA ns1. root. (2020012301 60 90 1w 60)
"""
),
t.SOA,
)
# With a TTL
assert isinstance(
read1(
"""@ 300 IN SOA ns1. root. (2020012301 60 90 1w 60)
"""
),
t.SOA,
)
# Some meaningful formatting.
assert isinstance(
read1(
"""@ IN SOA ns1. root. (
2020012301 ; comment
60 ; comment
90 ; comment
1w ; comment
60 ; comment
)
"""
),
t.SOA,
)
def test_read_a():
"""Test that some A records parse."""
assert isinstance(
read1(
"""@ IN A 127.0.0.1
"""
),
t.A,
)
# With a TTL
assert isinstance(
read1(
"""@ 300 IN A 127.0.0.1
"""
),
t.A,
)
# With a TTL & comment
assert isinstance(
read1(
"""@ 300 IN A 127.0.0.1; comment
"""
),
t.A,
)
# With a TTL & comment & whitespace
assert isinstance(
read1(
"""@ 300 IN A 127.0.0.1 ; comment
"""
),
t.A,
)
def test_read_aaaa():
"""Test that some quad-a records parse"""
assert isinstance(
read1(
"""foo IN AAAA ::1
"""
),
t.AAAA,
)
# With a TTL
assert isinstance(
read1(
"""foo 300 IN AAAA ::1
"""
),
t.AAAA,
)
# With a TTL & comment
assert isinstance(
read1(
"""foo 300 IN AAAA ::1; comment
"""
),
t.AAAA,
)
# With a TTL & whitespace & comment
assert isinstance(
read1(
"""foo 300 IN AAAA ::1 ; comment
"""
),
t.AAAA,
)
def test_read_cname():
"""Test some CNAME cases."""
assert isinstance(
read1(
"""bar IN CNAME qux.
"""
),
t.CNAME,
)
assert isinstance(
read1(
"""bar IN CNAME bar-other
"""
),
t.CNAME,
)
# With TTL
assert isinstance(
read1(
"""bar 300 IN CNAME bar-other.
"""
),
t.CNAME,
)
# With TTL & comment
assert isinstance(
read1(
"""bar 300 IN CNAME bar-other.; comment
"""
),
t.CNAME,
)
# With TTL & comment
assert isinstance(
read1(
"""bar 300 IN CNAME bar-other. ; comment
"""
),
t.CNAME,
)
def test_read_mx():
"""Some MX record examples."""
assert isinstance(
read1(
"""@ IN MX 10 mx1.
"""
),
t.MX,
)
# With TTL
assert isinstance(
read1(
"""@ 300 IN MX 10 mx1.
"""
),
t.MX,
)
# With TTL & comment
assert isinstance(
read1(
"""@ 300 IN MX 10 mx1.;bar
"""
),
t.MX,
)
# With TTL & comment
assert isinstance(
read1(
"""@ 300 IN MX 10 mx1. ; bar
"""
),
t.MX,
)
def test_read_repeated():
"""Test t=support for repetition."""
assert all(
isinstance(e, t.A)
for e in read(
"""foo IN A 10.0.0.1
IN A 10.0.0.2; comment
IN A 10.0.0.3 ; with whitespace
"""
)
)
# Note that comments and newlines become raw strings
assert all(
list(
isinstance(e, (t.A, str))
for e in read(
"""foo IN A 10.0.0.1
; comment
IN A 10.0.0.2; comment
IN A 10.0.0.3 ; with whitespace
"""
)
)
)