Spinner while copying; handle dirt files better
This commit is contained in:
parent
af63cd201f
commit
39eff4e53a
2 changed files with 128 additions and 73 deletions
|
@ -14,5 +14,6 @@ zapp_binary(
|
||||||
],
|
],
|
||||||
deps = [
|
deps = [
|
||||||
py_requirement("ExifRead"),
|
py_requirement("ExifRead"),
|
||||||
|
py_requirement("yaspin"),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
|
@ -29,6 +29,14 @@ from .util import *
|
||||||
|
|
||||||
# FIXME: use piexif, which supports writeback not exifread.
|
# FIXME: use piexif, which supports writeback not exifread.
|
||||||
import exifread
|
import exifread
|
||||||
|
from yaspin import Spinner, yaspin
|
||||||
|
|
||||||
|
|
||||||
|
_print = print
|
||||||
|
|
||||||
|
|
||||||
|
def print(*strs, **kwargs):
|
||||||
|
_print("\r", *strs, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
@ -38,6 +46,17 @@ parser.add_argument("destructive", action="store_true", default=False)
|
||||||
|
|
||||||
|
|
||||||
MODIFIED_ISO_DATE = "%Y:%m:%dT%H:%M:%SF%f"
|
MODIFIED_ISO_DATE = "%Y:%m:%dT%H:%M:%SF%f"
|
||||||
|
SPINNER = Spinner(["|", "/", "-", "\\"], 200)
|
||||||
|
KNOWN_IMG_TYPES = {
|
||||||
|
".jpg": ".jpeg",
|
||||||
|
".jpeg": ".jpeg",
|
||||||
|
".png": ".png",
|
||||||
|
".mov": ".mov",
|
||||||
|
".gif": ".gif",
|
||||||
|
".mp4": ".mp4",
|
||||||
|
".m4a": ".m4a",
|
||||||
|
".oga": ".oga", # How the hell do I have ogg files kicking around
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def exif_tags(p: Path) -> object:
|
def exif_tags(p: Path) -> object:
|
||||||
|
@ -60,14 +79,29 @@ def safe_strptime(date, format):
|
||||||
try:
|
try:
|
||||||
return datetime.strptime(date, format)
|
return datetime.strptime(date, format)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
pass
|
||||||
|
|
||||||
|
|
||||||
def date_from_name(p: Path):
|
def safe_ymdhmms(date):
|
||||||
"""Try to munge a datestamp out of a path."""
|
fmt = (
|
||||||
|
r"(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})"
|
||||||
|
r" "
|
||||||
|
r"(?P<hour>\d{2})(?P<minute>\d{2})(?P<second>\d{2})(?P<millisecond>\d{3})"
|
||||||
|
)
|
||||||
|
m = re.match(fmt, date)
|
||||||
|
if m:
|
||||||
|
return datetime(
|
||||||
|
year=int(m.group("year")),
|
||||||
|
month=int(m.group("month")),
|
||||||
|
day=int(m.group("day")),
|
||||||
|
hour=int(m.group("hour")),
|
||||||
|
minute=int(m.group("minute")),
|
||||||
|
second=int(m.group("second")),
|
||||||
|
microsecond=int(m.group("millisecond")) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
fname = ".".join(p.name.split(".")[:-1])
|
|
||||||
|
|
||||||
|
def date_from_name(fname: str):
|
||||||
# Discard common prefixes
|
# Discard common prefixes
|
||||||
fname = fname.replace("IMG_", "")
|
fname = fname.replace("IMG_", "")
|
||||||
fname = fname.replace("PXL_", "")
|
fname = fname.replace("PXL_", "")
|
||||||
|
@ -93,54 +127,59 @@ def date_from_name(p: Path):
|
||||||
|
|
||||||
# Try to guess the date
|
# Try to guess the date
|
||||||
# File date formats:
|
# File date formats:
|
||||||
for fmt in [
|
for unfmt in [
|
||||||
# Our date format
|
# Our date format
|
||||||
MODIFIED_ISO_DATE,
|
lambda d: safe_strptime(d, MODIFIED_ISO_DATE),
|
||||||
# A bug
|
# A bug
|
||||||
# 2014:08:21T19:4640F1408672000
|
# 2014:08:21T19:4640F1408672000
|
||||||
# 2015:12:14T23:0933F1450159773
|
# 2015:12:14T23:0933F1450159773
|
||||||
"%Y:%m:%dT%H:%M%SF%f",
|
lambda d: safe_strptime(d, "%Y:%m:%dT%H:%M%SF%f"),
|
||||||
# 2020-12-21 17.15.09.0
|
# 2020-12-21 17.15.09.0
|
||||||
"%Y-%m-%d %H.%M.%S.%f",
|
lambda d: safe_strptime(d, "%Y-%m-%d %H.%M.%S.%f"),
|
||||||
# 2020-12-21 17.15.09
|
# 2020-12-21 17.15.09
|
||||||
"%Y-%m-%d %H.%M.%S",
|
lambda d: safe_strptime(d, "%Y-%m-%d %H.%M.%S"),
|
||||||
# 2019-02-09 12.45.32-6
|
# 2019-02-09 12.45.32-6
|
||||||
# 2019-01-13 13.43.45-16
|
# 2019-01-13 13.43.45-16
|
||||||
"%Y-%m-%d %H.%M.%S-%f",
|
lambda d: safe_strptime(d, "%Y-%m-%d %H.%M.%S-%f"),
|
||||||
# Note the _1 or such may not be millis, but we assume it is.
|
# Note the _1 or such may not be millis, but we assume it is.
|
||||||
# 20171113_130826_1
|
# 20171113_130826_1
|
||||||
# 20171113 130826 1
|
# 20171113 130826 1
|
||||||
"%Y%m%d %H%M%S %f",
|
lambda d: safe_strptime(d, "%Y%m%d %H%M%S %f"),
|
||||||
# 20180404_114639
|
# 20180404_114639
|
||||||
# 20180404 114639
|
# 20180404 114639
|
||||||
"%Y%m%d %H%M%S",
|
lambda d: safe_strptime(d, "%Y%m%d %H%M%S"),
|
||||||
# 2017-11-05_15:15:55
|
# 2017-11-05_15:15:55
|
||||||
# 2017-11-05 15:15:55
|
# 2017-11-05 15:15:55
|
||||||
"%Y-%m-%d %H:%M:%S",
|
lambda d: safe_strptime(d, "%Y-%m-%d %H:%M:%S"),
|
||||||
|
lambda d: safe_strptime(d, "%Y%m%d %h%m%s%f"),
|
||||||
|
# HACK:
|
||||||
|
# Python doesn't support %s as milliseconds; these don't quite work.
|
||||||
|
# So use a custom matcher.
|
||||||
|
# 20210526 002327780
|
||||||
# 20210417_220753284
|
# 20210417_220753284
|
||||||
# 20210417 220753284
|
# 20210417 220753284
|
||||||
# 20210304 204755545
|
# 20210304 204755545
|
||||||
"%Y%m%d %h%m%s%f",
|
# 20211111 224304117
|
||||||
|
safe_ymdhmms,
|
||||||
]:
|
]:
|
||||||
try:
|
val = unfmt(fname)
|
||||||
return datetime.strptime(fname, fmt)
|
if val is not None:
|
||||||
except ValueError:
|
return val
|
||||||
continue
|
|
||||||
else:
|
|
||||||
|
def date_from_path(p: Path):
|
||||||
|
"""Try to munge a datestamp out of a path."""
|
||||||
|
|
||||||
|
fname = ".".join(p.name.split(".")[:-1])
|
||||||
|
|
||||||
|
date = date_from_name(fname)
|
||||||
|
if not date:
|
||||||
print(f"Warning: Unable to infer datetime from {fname!r}", file=sys.stderr)
|
print(f"Warning: Unable to infer datetime from {fname!r}", file=sys.stderr)
|
||||||
|
return date
|
||||||
|
|
||||||
|
|
||||||
def normalize_ext(p: Path):
|
def normalize_ext(p: Path):
|
||||||
renaming = {
|
renaming = KNOWN_IMG_TYPES
|
||||||
".jpg": ".jpeg",
|
|
||||||
".jpeg": ".jpeg",
|
|
||||||
".png": ".png",
|
|
||||||
".mov": ".mov",
|
|
||||||
".gif": ".gif",
|
|
||||||
".mp4": ".mp4",
|
|
||||||
".m4a": ".m4a",
|
|
||||||
".oga": ".oga", # How the hell do I have ogg files kicking around
|
|
||||||
}
|
|
||||||
exts = [e.lower() for e in p.suffixes]
|
exts = [e.lower() for e in p.suffixes]
|
||||||
# Guess an ext out of potentially many, allowing only for folding of effective dupes
|
# Guess an ext out of potentially many, allowing only for folding of effective dupes
|
||||||
exts = set(renaming[e] for e in exts if e in renaming)
|
exts = set(renaming[e] for e in exts if e in renaming)
|
||||||
|
@ -264,7 +303,7 @@ def img_info(p: Path) -> ImgInfo:
|
||||||
)
|
)
|
||||||
if date and (date := safe_strptime(date, "%Y:%m:%d %H:%M:%S")):
|
if date and (date := safe_strptime(date, "%Y:%m:%d %H:%M:%S")):
|
||||||
pass
|
pass
|
||||||
elif date := date_from_name(p):
|
elif date := date_from_path(p):
|
||||||
dirty |= True
|
dirty |= True
|
||||||
else:
|
else:
|
||||||
# The oldest of the mtime and the ctime
|
# The oldest of the mtime and the ctime
|
||||||
|
@ -285,6 +324,9 @@ def img_info(p: Path) -> ImgInfo:
|
||||||
|
|
||||||
date = date.replace(microsecond=subsec)
|
date = date.replace(microsecond=subsec)
|
||||||
|
|
||||||
|
if not (2015 <= date.year <= datetime.now().year):
|
||||||
|
raise ValueError(f"{p}'s inferred date ({date!r}) is beyond the sanity-check range!")
|
||||||
|
|
||||||
return ImgInfo(
|
return ImgInfo(
|
||||||
p,
|
p,
|
||||||
tags,
|
tags,
|
||||||
|
@ -310,66 +352,78 @@ def main():
|
||||||
raise OSError()
|
raise OSError()
|
||||||
|
|
||||||
src.rename(target) # Execute the rename
|
src.rename(target) # Execute the rename
|
||||||
except OSError: # cross-device move
|
|
||||||
copyfile(src, target)
|
|
||||||
|
|
||||||
if opts.destructive:
|
except OSError: # cross-device move
|
||||||
src.chmod(0o644)
|
with yaspin(SPINNER):
|
||||||
src.unlink()
|
copyfile(src, target)
|
||||||
print(" unlink: ok")
|
|
||||||
|
if opts.destructive:
|
||||||
|
src.unlink()
|
||||||
|
print(" unlink: ok")
|
||||||
|
|
||||||
print("---")
|
print("---")
|
||||||
|
|
||||||
sequence_name = None
|
sequence_name = None
|
||||||
sequence = 0
|
sequence = 0
|
||||||
|
|
||||||
for src in list(opts.src_dir.glob("**/*")):
|
for src in opts.src_dir.glob("**/*"):
|
||||||
|
print(f"{src}:")
|
||||||
|
ext = "." + src.name.lower().split(".")[-1]
|
||||||
|
print(f" msg: ext inferred as {ext}")
|
||||||
|
|
||||||
if src.is_dir():
|
if src.is_dir():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
elif src.name.startswith("."):
|
elif ext in ["thm", "lrv", "ico", "sav"] or src.name.startswith("._"):
|
||||||
|
if opts.destructive:
|
||||||
|
src.unlink()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f"{src}:")
|
elif ext in KNOWN_IMG_TYPES:
|
||||||
|
info = img_info(src)
|
||||||
info = img_info(src)
|
year_dir = Path(opts.dest_dir / str(info.date.year))
|
||||||
year_dir = Path(opts.dest_dir / str(info.date.year))
|
year_dir.mkdir(exist_ok=True) # Ignore existing and continue
|
||||||
year_dir.mkdir(exist_ok=True) # Ignore existing and continue
|
# Figure out a stable file name
|
||||||
# Figure out a stable file name
|
|
||||||
stable_name = f"v1_{info.date.strftime(MODIFIED_ISO_DATE)}_{sanitize(info.camera_make)}_{sanitize(info.camera_model)}_{info.device_fingerprint()}"
|
|
||||||
|
|
||||||
# De-conflict using a sequence number added to the sub-seconds field
|
|
||||||
if sequence_name == stable_name:
|
|
||||||
sequence += 1
|
|
||||||
info = info.incr(sequence)
|
|
||||||
print(f" warning: de-conflicting filenames with sequence {sequence}")
|
|
||||||
stable_name = f"v1_{info.date.strftime(MODIFIED_ISO_DATE)}_{sanitize(info.camera_make)}_{sanitize(info.camera_model)}_{info.device_fingerprint()}"
|
stable_name = f"v1_{info.date.strftime(MODIFIED_ISO_DATE)}_{sanitize(info.camera_make)}_{sanitize(info.camera_model)}_{info.device_fingerprint()}"
|
||||||
|
|
||||||
else:
|
# De-conflict using a sequence number added to the sub-seconds field
|
||||||
sequence = 0
|
if sequence_name == stable_name:
|
||||||
sequence_name = stable_name
|
sequence += 1
|
||||||
|
info = info.incr(sequence)
|
||||||
|
print(f" warning: de-conflicting filenames with sequence {sequence}")
|
||||||
|
stable_name = f"v1_{info.date.strftime(MODIFIED_ISO_DATE)}_{sanitize(info.camera_make)}_{sanitize(info.camera_model)}_{info.device_fingerprint()}"
|
||||||
|
|
||||||
try:
|
else:
|
||||||
ext = normalize_ext(src)
|
sequence = 0
|
||||||
except AssertionError:
|
sequence_name = stable_name
|
||||||
continue # Just skip fucked up files
|
|
||||||
target = Path(year_dir / f"{stable_name}{ext}")
|
try:
|
||||||
|
ext = normalize_ext(src)
|
||||||
|
except AssertionError:
|
||||||
|
continue # Just skip fucked up files
|
||||||
|
target = Path(year_dir / f"{stable_name}{ext}")
|
||||||
|
|
||||||
|
if not target.exists():
|
||||||
|
# src & !target => copy
|
||||||
|
_copy(src, target)
|
||||||
|
|
||||||
|
elif src == target:
|
||||||
|
# src == target; skip DO NOT DELETE SRC
|
||||||
|
pass
|
||||||
|
|
||||||
|
elif checksum_path_blocks(src) == checksum_path_blocks(target):
|
||||||
|
print(f" ok: {target}")
|
||||||
|
# src != target && id(src) == id(target); delete src
|
||||||
|
if opts.destructive:
|
||||||
|
src.unlink()
|
||||||
|
|
||||||
|
else:
|
||||||
|
# src != target && id(src) != id(target); replace target with src?
|
||||||
|
print(f" warning: {target} is a content-id collision with a different checksum; skipping")
|
||||||
|
|
||||||
if not target.exists():
|
|
||||||
# src & !target => copy
|
|
||||||
_copy(src, target)
|
|
||||||
elif src == target:
|
|
||||||
# src == target; skip DO NOT DELETE SRC
|
|
||||||
pass
|
|
||||||
elif checksum_path_blocks(src) == checksum_path_blocks(target):
|
|
||||||
print(f" ok: {target}")
|
|
||||||
# src != target && id(src) == id(target); delete src
|
|
||||||
if opts.destructive:
|
|
||||||
src.chmod(0o644)
|
|
||||||
src.unlink()
|
|
||||||
else:
|
else:
|
||||||
# src != target && id(src) != id(target); replace target with src?
|
print(f" msg: unknown filetype {ext}")
|
||||||
print(f" warning: {target} is a content-id collision with a different checksum; skipping")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
Loading…
Reference in a new issue