70 lines
1.5 KiB
Python
70 lines
1.5 KiB
Python
|
import typing as t
|
||
|
|
||
|
from hashlib import sha256
|
||
|
from pathlib import Path
|
||
|
|
||
|
|
||
|
def iter_chunks(fp):
|
||
|
yield from iter(lambda: fp.read(4096), b"")
|
||
|
|
||
|
|
||
|
def take(n, iter):
|
||
|
"""Take the first N items lazily off of an iterable."""
|
||
|
|
||
|
for _ in range(0, n):
|
||
|
try:
|
||
|
yield next(iter)
|
||
|
except StopIteration:
|
||
|
break
|
||
|
|
||
|
|
||
|
def checksum_list(iter, sum=sha256, salt=b";"):
|
||
|
"""Compute the checksum of a bunch of stuff from an iterable."""
|
||
|
|
||
|
sum = sum()
|
||
|
for i in iter:
|
||
|
if salt:
|
||
|
sum.update(salt) # Merkle tree salting.
|
||
|
if isinstance(i, str):
|
||
|
i = str.encode(i, "utf-8")
|
||
|
try:
|
||
|
sum.update(i)
|
||
|
except Exception as e:
|
||
|
print(i, type(i))
|
||
|
raise e
|
||
|
|
||
|
return sum.hexdigest()
|
||
|
|
||
|
|
||
|
def checksum_path_blocks(p: Path, sum=sha256) -> t.Iterable[str]:
|
||
|
"""Compute block-wise checksums of a file.
|
||
|
|
||
|
Inspired by the Dropbox content-hashing interface -
|
||
|
|
||
|
https://www.dropbox.com/developers/reference/content-hash
|
||
|
|
||
|
"""
|
||
|
|
||
|
def _helper():
|
||
|
with open(p, "rb") as fp:
|
||
|
for chunk in iter_chunks(fp):
|
||
|
digest = sum()
|
||
|
digest.update(chunk)
|
||
|
yield digest.hexdigest()
|
||
|
|
||
|
return list(_helper())
|
||
|
|
||
|
|
||
|
def checksum_path(p: Path, sum=sha256) -> str:
|
||
|
"""Compute 'the' checksum of an entire file.
|
||
|
|
||
|
Note that this does semi-streaming I/O.
|
||
|
|
||
|
"""
|
||
|
|
||
|
sum = sum()
|
||
|
with open(p, "rb") as fp:
|
||
|
for chunk in iter_chunks(fp):
|
||
|
sum.update(chunk)
|
||
|
return sum.hexdigest()
|