From 21b4b4432a767408a0704a795e58fc577e69e530 Mon Sep 17 00:00:00 2001
From: Reid 'arrdem' McKenzie <me@arrdem.com>
Date: Sat, 2 Jul 2022 00:35:03 -0600
Subject: [PATCH] Slam out a two-pass assembler

---
 .../shoggoth/src/python/ichor/__init__.py     |  1 +
 .../shoggoth/src/python/ichor/assembler.py    | 62 +++++++++++++++++++
 .../shoggoth/src/python/ichor/bootstrap.py    |  1 +
 .../test/python/ichor/test_assembler.py       | 47 ++++++++++++++
 4 files changed, 111 insertions(+)
 create mode 100644 projects/shoggoth/src/python/ichor/assembler.py
 create mode 100644 projects/shoggoth/test/python/ichor/test_assembler.py

diff --git a/projects/shoggoth/src/python/ichor/__init__.py b/projects/shoggoth/src/python/ichor/__init__.py
index 375fa7c..c30965d 100644
--- a/projects/shoggoth/src/python/ichor/__init__.py
+++ b/projects/shoggoth/src/python/ichor/__init__.py
@@ -10,3 +10,4 @@ from ichor.bootstrap import *  # noqa
 from ichor.impl import *  # noqa
 from ichor.isa import *  # noqa
 from ichor.typing import *  # noqa
+from ichor.assembler import *
diff --git a/projects/shoggoth/src/python/ichor/assembler.py b/projects/shoggoth/src/python/ichor/assembler.py
new file mode 100644
index 0000000..dc47df3
--- /dev/null
+++ b/projects/shoggoth/src/python/ichor/assembler.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+from dataclasses import dataclass
+from random import choices
+from string import ascii_lowercase, digits
+from typing import List
+
+from ichor.isa import Opcode
+
+
+@dataclass
+class Label(object):
+    name: str
+
+    def __hash__(self):
+        return hash(self.name)
+
+class FuncBuilder(object):
+    def __init__(self) -> None:
+        self._opcodes = []
+
+    def write(self, op: Opcode):
+        self._opcodes.append(op)
+
+    def make_label(self, prefix=""):
+        frag = ''.join(choices(ascii_lowercase + digits, k=8))
+        return Label(f"{prefix or 'gensym'}_{frag}")
+
+    def set_label(self, label: Label):
+        self._opcodes.append(label)
+
+    def build(self) -> List[Opcode]:
+        """Assemble the written body into fully resolved opcodes."""
+
+        # The trivial two-pass assembler. First pass removes labels from the
+        # opcode stream, marking where they occurred.
+
+        labels = {}
+        unassembled = []
+        for op in self._opcodes:
+            match op:
+                case Label(_) as l:
+                    assert l not in labels  # Label marks must be unique.
+                    labels[l] = len(unassembled)
+                case o:
+                    unassembled.append(o)
+
+        # Second pass rewrites instructions (which can reference forwards OR
+        # backwards labels) with real targets instead of labels.
+        assembled = []
+        for op in unassembled:
+            match op:
+                case Opcode.GOTO(Label(_) as l):
+                    assembled.append(Opcode.GOTO(labels[l]))
+
+                case Opcode.VTEST(Label(_) as l):
+                    assembled.append(Opcode.VTEST(labels[l]))
+
+                case o:
+                    assembled.append(o)
+
+        return assembled
diff --git a/projects/shoggoth/src/python/ichor/bootstrap.py b/projects/shoggoth/src/python/ichor/bootstrap.py
index d34b42f..40dc611 100644
--- a/projects/shoggoth/src/python/ichor/bootstrap.py
+++ b/projects/shoggoth/src/python/ichor/bootstrap.py
@@ -6,6 +6,7 @@ Hopefully no "real" interpreter ever uses this code, since it's obviously replac
 
 from ichor.isa import Opcode
 from ichor.state import Module, Variant
+from ichor.assembler import FuncBuilder
 
 
 BOOTSTRAP = Module()
diff --git a/projects/shoggoth/test/python/ichor/test_assembler.py b/projects/shoggoth/test/python/ichor/test_assembler.py
new file mode 100644
index 0000000..c7deea5
--- /dev/null
+++ b/projects/shoggoth/test/python/ichor/test_assembler.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+
+from ichor import FuncBuilder, Opcode
+
+import pytest
+
+@pytest.fixture
+def builder() -> FuncBuilder:
+    return FuncBuilder()
+
+
+def test_forwards_label(builder: FuncBuilder):
+    l = builder.make_label()
+    builder.write(Opcode.GOTO(l))
+    builder.write(Opcode.DROP(0)) # no-op
+    builder.set_label(l)
+    builder.write(Opcode.DROP(0)) # no-op
+    instrs = builder.build()
+    assert instrs == [
+        Opcode.GOTO(2),
+        Opcode.DROP(0),
+        Opcode.DROP(0),
+    ]
+
+
+def test_backwards_label(builder: FuncBuilder):
+    l = builder.make_label()
+    builder.set_label(l)
+    builder.write(Opcode.DROP(0)) # no-op
+    builder.write(Opcode.GOTO(l))
+    instrs = builder.build()
+    assert instrs == [
+        Opcode.DROP(0),
+        Opcode.GOTO(0),
+    ]
+
+
+def test_self_label(builder: FuncBuilder):
+    l = builder.make_label()
+    builder.write(Opcode.DROP(0)) # no-op
+    builder.set_label(l)
+    builder.write(Opcode.GOTO(l))
+    instrs = builder.build()
+    assert instrs == [
+        Opcode.DROP(0),
+        Opcode.GOTO(1),
+    ]