Update benchmarks
This commit is contained in:
parent
edf5e4d231
commit
b52e0488d7
3 changed files with 110 additions and 39 deletions
|
@ -39,7 +39,7 @@ Benchmarks are extremely steady.
|
||||||
Flushing a sqlite file to disk seems to be the limiting factor of I/O, and pipelining multiple message writes is undoubtably the way to go.
|
Flushing a sqlite file to disk seems to be the limiting factor of I/O, and pipelining multiple message writes is undoubtably the way to go.
|
||||||
However the purpose of the API is to use the sqlite file as the shared checkpoint between potentially many processes, so 'large' transactions are an antipattern.
|
However the purpose of the API is to use the sqlite file as the shared checkpoint between potentially many processes, so 'large' transactions are an antipattern.
|
||||||
|
|
||||||
Tests suggest that this library is rock steady at 100 writes per sec. and 100 polls per sec. and completely bounded by sqlite controlled I/O as evidenced by using `":memory:"` which doesn't have to `fsync()`.
|
The `naive_fsync` benchmark takes how long a simple `f.write(); f.flush(); os.fsync(f.fnum())` takes, and represents a lower bound for "transactional" I/O in a strictly appending system. That `insert` clocks in at about 10ms/op whereas `naive_fsync` clocks in at about 4.5ms suggests that the "overhead" imposed by SQLite is actually pretty reasonable, and only ~2x gains are possible without sacrificing durability.
|
||||||
|
|
||||||
``` shell
|
``` shell
|
||||||
$ bazel run :benchmark
|
$ bazel run :benchmark
|
||||||
|
@ -51,40 +51,54 @@ Target //projects/jobq:benchmark up-to-date:
|
||||||
|
|
||||||
...
|
...
|
||||||
|
|
||||||
Ran 'insert' 10000 times, total time 101.810816516 (s)
|
Getting a baseline
|
||||||
mean: 0.010148992843 (s)
|
Ran 'naive_serialize' 10000 times, total time 61.108668 ms
|
||||||
median: 0.009474293 (s)
|
mean: 4.4800743 us
|
||||||
stddev: 0.006727934042954838 (s)
|
median: 3.876 us
|
||||||
test overhead: 3.20888086e-05 (s)
|
stddev: 2.9536885497940415 us
|
||||||
|
test overhead: 1.6307925 us
|
||||||
|
|
||||||
Ran 'poll' 10000 times, total time 100.482262487 (s)
|
Ran 'naive_fsync' 10000 times, total time 46.169303029 s
|
||||||
mean: 0.0100152467857 (s)
|
mean: 4.5833899318 ms
|
||||||
median: 0.0095528585 (s)
|
median: 4.3935955 ms
|
||||||
stddev: 0.00821730176268304 (s)
|
stddev: 3.6405235897574997 ms
|
||||||
test overhead: 3.2979463000000004e-05 (s)
|
test overhead: 33.540371099999994 us
|
||||||
|
|
||||||
Ran 'append_event' 10000 times, total time 105.015296419 (s)
|
Testing with /tmp/jobq-bench.sqlite3
|
||||||
mean: 0.0104681294652 (s)
|
Ran 'insert' 10000 times, total time 106.925228341 s
|
||||||
median: 0.009592544 (s)
|
mean: 10.660283108 ms
|
||||||
stddev: 0.007321370576225584 (s)
|
median: 9.6410375 ms
|
||||||
test overhead: 3.34001767e-05 (s)
|
stddev: 7.318317035793426 ms
|
||||||
|
test overhead: 32.2397261 us
|
||||||
|
|
||||||
|
Ran 'poll' 10000 times, total time 102.727780818 s
|
||||||
|
mean: 10.2409366588 ms
|
||||||
|
median: 9.7355345 ms
|
||||||
|
stddev: 6.566850750848292 ms
|
||||||
|
test overhead: 31.841423 us
|
||||||
|
|
||||||
|
Ran 'append_event' 10000 times, total time 105.075667417 s
|
||||||
|
mean: 10.4747147621 ms
|
||||||
|
median: 9.6445595 ms
|
||||||
|
stddev: 8.221955029149633 ms
|
||||||
|
test overhead: 32.8519796 us
|
||||||
|
|
||||||
Testing with :memory:
|
Testing with :memory:
|
||||||
Ran 'insert' 10000 times, total time 0.37031511 (s)
|
Ran 'insert' 10000 times, total time 527.735484 ms
|
||||||
mean: 3.3595880100000005e-05 (s)
|
mean: 49.054487699999996 us
|
||||||
median: 2.96015e-05 (s)
|
median: 43.823 us
|
||||||
stddev: 1.045088890675899e-05 (s)
|
stddev: 13.212556522688379 us
|
||||||
test overhead: 3.4356309e-06 (s)
|
test overhead: 3.7190607 us
|
||||||
|
|
||||||
Ran 'poll' 10000 times, total time 1.17148314 (s)
|
Ran 'poll' 10000 times, total time 4.995857505 s
|
||||||
mean: 0.0001128911222 (s)
|
mean: 495.27983409999996 us
|
||||||
median: 9.7398e-05 (s)
|
median: 161.8315 us
|
||||||
stddev: 3.213524197973896e-05 (s)
|
stddev: 443.6358523771585 us
|
||||||
test overhead: 4.2571917999999996e-06 (s)
|
test overhead: 4.3059164 us
|
||||||
|
|
||||||
Ran 'append_event' 10000 times, total time 0.415490332 (s)
|
Ran 'append_event' 10000 times, total time 579.750342 ms
|
||||||
mean: 3.78861989e-05 (s)
|
mean: 54.1721441 us
|
||||||
median: 3.3019e-05 (s)
|
median: 48.054 us
|
||||||
stddev: 1.1752889674795285e-05 (s)
|
stddev: 15.205861822465426 us
|
||||||
test overhead: 3.6628343e-06 (s)
|
test overhead: 3.8028901 us
|
||||||
```
|
```
|
||||||
|
|
|
@ -11,6 +11,7 @@ import string
|
||||||
from statistics import mean, median, stdev
|
from statistics import mean, median, stdev
|
||||||
import tempfile
|
import tempfile
|
||||||
import logging
|
import logging
|
||||||
|
import json
|
||||||
|
|
||||||
from jobq import JobQueue
|
from jobq import JobQueue
|
||||||
|
|
||||||
|
@ -39,6 +40,20 @@ def timing():
|
||||||
obj.end = perf_counter_ns()
|
obj.end = perf_counter_ns()
|
||||||
|
|
||||||
|
|
||||||
|
def timer(val: float) -> str:
|
||||||
|
"""Given a time in NS, convert it to integral NS/MS/S such that the non-decimal part is integral."""
|
||||||
|
|
||||||
|
for factor, unit in [
|
||||||
|
(1e9, 's'),
|
||||||
|
(1e6, 'ms'),
|
||||||
|
(1e3, 'us'),
|
||||||
|
(1, 'ns'),
|
||||||
|
]:
|
||||||
|
scaled_val = val / factor
|
||||||
|
if 1e4 > scaled_val > 1.0:
|
||||||
|
return f"{scaled_val} {unit}"
|
||||||
|
|
||||||
|
|
||||||
def bench(callable, reps):
|
def bench(callable, reps):
|
||||||
timings = []
|
timings = []
|
||||||
with timing() as run_t:
|
with timing() as run_t:
|
||||||
|
@ -46,16 +61,52 @@ def bench(callable, reps):
|
||||||
with timing() as t:
|
with timing() as t:
|
||||||
callable()
|
callable()
|
||||||
timings.append(t.duration)
|
timings.append(t.duration)
|
||||||
print(f"""Ran {callable.__name__!r} {reps} times, total time {run_t.duration / 1e9} (s)
|
print(f"""Ran {callable.__name__!r} {reps} times, total time {timer(run_t.duration)}
|
||||||
mean: {mean(timings) / 1e9} (s)
|
mean: {timer(mean(timings))}
|
||||||
median: {median(timings) / 1e9} (s)
|
median: {timer(median(timings))}
|
||||||
stddev: {stdev(timings) / 1e9} (s)
|
stddev: {timer(stdev(timings))}
|
||||||
test overhead: {(run_t.duration - sum(timings)) / reps / 1e9} (s)
|
test overhead: {timer((run_t.duration - sum(timings)) / reps)}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def test_reference_json(reps):
|
||||||
|
"""As a reference benchmark, test just appending to a file."""
|
||||||
|
|
||||||
|
jobs = [
|
||||||
|
{"user_id": randint(0, 1<<32), "msg": randstr(256)}
|
||||||
|
for _ in range(reps)
|
||||||
|
]
|
||||||
|
jobs_i = iter(jobs)
|
||||||
|
|
||||||
|
def naive_serialize():
|
||||||
|
json.dumps([next(jobs_i), ["CREATED"]])
|
||||||
|
|
||||||
|
bench(naive_serialize, reps)
|
||||||
|
|
||||||
|
|
||||||
|
def test_reference_fsync(reps):
|
||||||
|
"""As a reference benchmark, test just appending to a file."""
|
||||||
|
|
||||||
|
jobs = [
|
||||||
|
{"user_id": randint(0, 1<<32), "msg": randstr(256)}
|
||||||
|
for _ in range(reps)
|
||||||
|
]
|
||||||
|
jobs_i = iter(jobs)
|
||||||
|
|
||||||
|
handle, path = tempfile.mkstemp()
|
||||||
|
os.close(handle)
|
||||||
|
with open(path, "w") as fd:
|
||||||
|
def naive_fsync():
|
||||||
|
fd.write(json.dumps([next(jobs_i), ["CREATED"]]))
|
||||||
|
fd.flush()
|
||||||
|
os.fsync(fd.fileno())
|
||||||
|
|
||||||
|
bench(naive_fsync, reps)
|
||||||
|
|
||||||
|
|
||||||
def test_insert(q, reps):
|
def test_insert(q, reps):
|
||||||
# Measuring insertion time
|
"""Benchmark insertion time to a given SQLite DB."""
|
||||||
|
|
||||||
jobs = [
|
jobs = [
|
||||||
{"user_id": randint(0, 1<<32), "msg": randstr(256)}
|
{"user_id": randint(0, 1<<32), "msg": randstr(256)}
|
||||||
for _ in range(reps)
|
for _ in range(reps)
|
||||||
|
@ -69,14 +120,17 @@ def test_insert(q, reps):
|
||||||
|
|
||||||
|
|
||||||
def test_poll(q, reps):
|
def test_poll(q, reps):
|
||||||
|
"""Benchmark query/update time on a given SQLite DB."""
|
||||||
|
|
||||||
def poll():
|
def poll():
|
||||||
q.poll([["=", "json_extract(j.state, '$[0]')", "'CREATED'"]], ["POLLED"])
|
q.poll("json_extract(j.state, '$[0]') = 'CREATED'", ["POLLED"])
|
||||||
|
|
||||||
bench(poll, reps)
|
bench(poll, reps)
|
||||||
|
|
||||||
|
|
||||||
def test_append(q, reps):
|
def test_append(q, reps):
|
||||||
|
"""Benchmark adding an event on a given SQLite DB."""
|
||||||
|
|
||||||
def append_event():
|
def append_event():
|
||||||
q.append_event(randint(1, reps), {"foo": "bar"})
|
q.append_event(randint(1, reps), {"foo": "bar"})
|
||||||
|
|
||||||
|
@ -95,6 +149,10 @@ if __name__ == "__main__":
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
os.remove(path)
|
os.remove(path)
|
||||||
|
|
||||||
|
print("Getting a baseline")
|
||||||
|
test_reference_json(reps)
|
||||||
|
test_reference_fsync(reps)
|
||||||
|
|
||||||
# And the tests
|
# And the tests
|
||||||
print(f"Testing with {path}")
|
print(f"Testing with {path}")
|
||||||
q = JobQueue(path)
|
q = JobQueue(path)
|
||||||
|
|
|
@ -258,7 +258,6 @@ class JobQueue(object):
|
||||||
with self._db as db:
|
with self._db as db:
|
||||||
cur = db.cursor()
|
cur = db.cursor()
|
||||||
statement = _POLL_SQL.format(compile_query(query))
|
statement = _POLL_SQL.format(compile_query(query))
|
||||||
print(statement)
|
|
||||||
cur.execute(statement, {"state": json.dumps(new_state)})
|
cur.execute(statement, {"state": json.dumps(new_state)})
|
||||||
results = cur.fetchall()
|
results = cur.fetchall()
|
||||||
if results:
|
if results:
|
||||||
|
|
Loading…
Reference in a new issue