From 9aae53497152d01f0af13829aaa0ee4c53af906a Mon Sep 17 00:00:00 2001 From: Reid 'arrdem' McKenzie Date: Sat, 25 Sep 2021 00:43:50 -0600 Subject: [PATCH] [NO TESTS] WIP --- .bazelrc | 4 +- projects/archiver/BUILD | 6 + projects/archiver/hash_copy.py | 28 +-- projects/archiver/notes.md | 288 ++++++++++++++++++++++++ projects/archiver/org_photos.py | 379 +++----------------------------- projects/archiver/util.py | 69 ++++++ tools/autoflake/BUILD | 2 +- tools/fmt.sh | 3 +- 8 files changed, 402 insertions(+), 377 deletions(-) create mode 100644 projects/archiver/notes.md create mode 100644 projects/archiver/util.py diff --git a/.bazelrc b/.bazelrc index e824c02..e6dc119 100644 --- a/.bazelrc +++ b/.bazelrc @@ -1,5 +1,5 @@ test --test_output=errors # To enable flake8 on all build steps, uncomment this - -build --aspects="//tools/flake8:flake8.bzl%flake8_aspect" --output_groups=flake8_checks -build --aspects="//tools/black:black.bzl%black_aspect" --output_groups=black_checks +test --aspects="//tools/flake8:flake8.bzl%flake8_aspect" --output_groups=flake8_checks +test --aspects="//tools/black:black.bzl%black_aspect" --output_groups=black_checks diff --git a/projects/archiver/BUILD b/projects/archiver/BUILD index fe7ef5c..d60c6a2 100644 --- a/projects/archiver/BUILD +++ b/projects/archiver/BUILD @@ -1,11 +1,17 @@ zapp_binary( name = "hash_copy", main = "hash_copy.py", + srcs = [ + "util.py" + ], ) zapp_binary( name = "org_photos", main = "org_photos.py", + srcs = [ + "util.py" + ], deps = [ py_requirement("ExifRead"), ] diff --git a/projects/archiver/hash_copy.py b/projects/archiver/hash_copy.py index ad273e7..7fc439e 100644 --- a/projects/archiver/hash_copy.py +++ b/projects/archiver/hash_copy.py @@ -3,38 +3,16 @@ A tree deduplicator and archiver tool. """ import argparse -from hashlib import sha256 from pathlib import Path from shutil import copy2 as copyfile +from .util import * parser = argparse.ArgumentParser() parser.add_argument("from_dir", type=Path) parser.add_argument("to_dir", type=Path) -def checksum(p: Path, sum=sha256) -> str: - """Compute block-wise checksums of a file. - - Inspired by the Dropbox content-hashing interface - - - https://www.dropbox.com/developers/reference/content-hash - - """ - - def iter_chunks(fp): - yield from iter(lambda: fp.read(4096), b"") - - def _helper(): - with open(p, "rb") as fp: - for chunk in iter_chunks(fp): - digest = sum() - digest.update(chunk) - yield digest.hexdigest() - - return list(_helper()) - - def main(): opts, args = parser.parse_known_args() @@ -62,8 +40,8 @@ def main(): ): pass - elif (src_checksum := checksum(abs_src_path)) != ( - dest_checksum := checksum(abs_dest_path) + elif (src_checksum := checksum_path(abs_src_path)) != ( + dest_checksum := checksum_path(abs_dest_path) ): print( f"file conflict (src {src_checksum}, dest {dest_checksum}), correcting..." diff --git a/projects/archiver/notes.md b/projects/archiver/notes.md new file mode 100644 index 0000000..8eab1a1 --- /dev/null +++ b/projects/archiver/notes.md @@ -0,0 +1,288 @@ +# EXIF tags dataset (exifread edition) + +- 'EXIF ApertureValue' +- 'EXIF BodySerialNumber' +- 'EXIF BrightnessValue' +- 'EXIF CVAPattern' +- 'EXIF CameraOwnerName' +- 'EXIF ColorSpace' +- 'EXIF ComponentsConfiguration' +- 'EXIF CompressedBitsPerPixel' +- 'EXIF Contrast' +- 'EXIF CustomRendered' +- 'EXIF DateTimeDigitized' +- 'EXIF DateTimeOriginal' +- 'EXIF DeviceSettingDescription' +- 'EXIF DigitalZoomRatio' +- 'EXIF ExifImageLength' +- 'EXIF ExifImageWidth' +- 'EXIF ExifVersion' +- 'EXIF ExposureBiasValue' +- 'EXIF ExposureIndex' +- 'EXIF ExposureMode' +- 'EXIF ExposureProgram' +- 'EXIF ExposureTime' +- 'EXIF FNumber' +- 'EXIF FileSource' +- 'EXIF Flash' +- 'EXIF FlashEnergy' +- 'EXIF FlashPixVersion' +- 'EXIF FocalLength' +- 'EXIF FocalLengthIn35mmFilm' +- 'EXIF FocalPlaneResolutionUnit' +- 'EXIF FocalPlaneXResolution' +- 'EXIF FocalPlaneYResolution' +- 'EXIF GainControl' +- 'EXIF ISOSpeedRatings' +- 'EXIF ImageUniqueID' +- 'EXIF InteroperabilityOffset' +- 'EXIF JPEGInterchangeFormat' +- 'EXIF JPEGInterchangeFormatLength' +- 'EXIF LensMake' +- 'EXIF LensModel' +- 'EXIF LensSerialNumber' +- 'EXIF LensSpecification' +- 'EXIF LightSource' +- 'EXIF MakerNote' +- 'EXIF MaxApertureValue' +- 'EXIF MeteringMode' +- 'EXIF OffsetSchema' +- 'EXIF OffsetTime' +- 'EXIF OffsetTimeDigitized' +- 'EXIF OffsetTimeOriginal' +- 'EXIF Padding' +- 'EXIF RecommendedExposureIndex' +- 'EXIF Saturation' +- 'EXIF SceneCaptureType' +- 'EXIF SceneType' +- 'EXIF SensingMethod' +- 'EXIF SensitivityType' +- 'EXIF Sharpness' +- 'EXIF ShutterSpeedValue' +- 'EXIF SubSecTime' +- 'EXIF SubSecTimeDigitized' +- 'EXIF SubSecTimeOriginal' +- 'EXIF SubjectArea' +- 'EXIF SubjectDistance' +- 'EXIF SubjectDistanceRange' +- 'EXIF UserComment' +- 'EXIF WhiteBalance' +- 'GPS GPSAltitude' +- 'GPS GPSAltitudeRef' +- 'GPS GPSDOP' +- 'GPS GPSDate' +- 'GPS GPSImgDirection' +- 'GPS GPSImgDirectionRef' +- 'GPS GPSLatitude' +- 'GPS GPSLatitudeRef' +- 'GPS GPSLongitude' +- 'GPS GPSLongitudeRef' +- 'GPS GPSMapDatum' +- 'GPS GPSMeasureMode' +- 'GPS GPSProcessingMethod' +- 'GPS GPSTimeStamp' +- 'GPS GPSVersionID' +- 'GPS Tag 0xEA1C' +- 'Image Artist' +- 'Image BitsPerSample' +- 'Image Copyright' +- 'Image DateTime' +- 'Image DateTimeDigitized' +- 'Image ExifOffset' +- 'Image ExposureMode' +- 'Image ExposureProgram' +- 'Image ExposureTime' +- 'Image FNumber' +- 'Image Flash' +- 'Image FocalLength' +- 'Image GPSInfo' +- 'Image ISOSpeedRatings' +- 'Image ImageDescription' +- 'Image ImageLength' +- 'Image ImageWidth' +- 'Image JPEGInterchangeFormat' +- 'Image JPEGInterchangeFormatLength' +- 'Image LightSource' +- 'Image Make' +- 'Image MeteringMode' +- 'Image Model' +- 'Image Orientation' +- 'Image Padding' +- 'Image PhotometricInterpretation' +- 'Image PrintIM' +- 'Image ResolutionUnit' +- 'Image SamplesPerPixel' +- 'Image Software' +- 'Image UserComment' +- 'Image WhiteBalance' +- 'Image XPComment' +- 'Image XPKeywords' +- 'Image XPTitle' +- 'Image XResolution' +- 'Image YCbCrPositioning' +- 'Image YResolution' +- 'Interoperability InteroperabilityIndex' +- 'Interoperability InteroperabilityVersion' +- 'JPEGThumbnail' +- 'MakerNote AEBracketCompensationApplied' +- 'MakerNote AESetting' +- 'MakerNote AFAreaMode' +- 'MakerNote AFInfo2' +- 'MakerNote AFPointSelected' +- 'MakerNote AFPointUsed' +- 'MakerNote ActiveDLighting' +- 'MakerNote AspectInfo' +- 'MakerNote AutoBracketRelease' +- 'MakerNote AutoFlashMode' +- 'MakerNote BracketMode' +- 'MakerNote BracketShotNumber' +- 'MakerNote BracketValue' +- 'MakerNote BracketingMode' +- 'MakerNote CanonImageWidth' +- 'MakerNote ColorBalance' +- 'MakerNote ColorSpace' +- 'MakerNote ContinuousDriveMode' +- 'MakerNote Contrast' +- 'MakerNote CropHiSpeed' +- 'MakerNote CropInfo' +- 'MakerNote DigitalVariProgram' +- 'MakerNote DigitalZoom' +- 'MakerNote DustRemovalData' +- 'MakerNote EasyShootingMode' +- 'MakerNote ExposureDifference' +- 'MakerNote ExposureMode' +- 'MakerNote ExposureTuning' +- 'MakerNote ExternalFlashExposureComp' +- 'MakerNote FileInfo' +- 'MakerNote FileNumber' +- 'MakerNote FilterEffect' +- 'MakerNote FirmwareVersion' +- 'MakerNote FlashActivity' +- 'MakerNote FlashBias' +- 'MakerNote FlashBracketCompensationApplied' +- 'MakerNote FlashCompensation' +- 'MakerNote FlashDetails' +- 'MakerNote FlashExposureLock' +- 'MakerNote FlashInfo' +- 'MakerNote FlashMode' +- 'MakerNote FlashSetting' +- 'MakerNote FocalLength' +- 'MakerNote FocalType' +- 'MakerNote FocalUnitsPerMM' +- 'MakerNote FocusMode' +- 'MakerNote FocusType' +- 'MakerNote HDRImageType' +- 'MakerNote HighISONoiseReduction' +- 'MakerNote ISO' +- 'MakerNote ISOInfo' +- 'MakerNote ISOSetting' +- 'MakerNote ISOSpeedRequested' +- 'MakerNote ImageDataSize' +- 'MakerNote ImageSize' +- 'MakerNote ImageStabilization' +- 'MakerNote ImageType' +- 'MakerNote InternalSerialNumber' +- 'MakerNote LensData' +- 'MakerNote LensFStops' +- 'MakerNote LensMinMaxFocalMaxAperture' +- 'MakerNote LensModel' +- 'MakerNote LensType' +- 'MakerNote LiveViewShooting' +- 'MakerNote LongExposureNoiseReduction2' +- 'MakerNote LongFocalLengthOfLensInFocalUnits' +- 'MakerNote MacroMagnification' +- 'MakerNote Macromode' +- 'MakerNote MakernoteVersion' +- 'MakerNote ManualFlashOutput' +- 'MakerNote MeteringMode' +- 'MakerNote ModelID' +- 'MakerNote MultiExposure' +- 'MakerNote NikonPreview' +- 'MakerNote NoiseReduction' +- 'MakerNote NumAFPoints' +- 'MakerNote OwnerName' +- 'MakerNote PhotoCornerCoordinates' +- 'MakerNote PictureControl' +- 'MakerNote PowerUpTime' +- 'MakerNote ProgramShift' +- 'MakerNote Quality' +- 'MakerNote RawJpgQuality' +- 'MakerNote RawJpgSize' +- 'MakerNote RecordMode' +- 'MakerNote RetouchHistory' +- 'MakerNote Saturation' +- 'MakerNote SelfTimer' +- 'MakerNote SequenceNumber' +- 'MakerNote SerialNumber' +- 'MakerNote Sharpness' +- 'MakerNote ShortFocalLengthOfLensInFocalUnits' +- 'MakerNote ShotInfo' +- 'MakerNote SlowShutter' +- 'MakerNote SpotMeteringMode' +- 'MakerNote SubjectDistance' +- 'MakerNote Tag 0x0001' +- 'MakerNote Tag 0x0002' +- 'MakerNote Tag 0x0003' +- 'MakerNote Tag 0x0004' +- 'MakerNote Tag 0x0005' +- 'MakerNote Tag 0x0006' +- 'MakerNote Tag 0x0007' +- 'MakerNote Tag 0x0008' +- 'MakerNote Tag 0x0009' +- 'MakerNote Tag 0x000E' +- 'MakerNote Tag 0x0014' +- 'MakerNote Tag 0x0015' +- 'MakerNote Tag 0x0019' +- 'MakerNote Tag 0x002B' +- 'MakerNote Tag 0x002C' +- 'MakerNote Tag 0x002D' +- 'MakerNote Tag 0x0083' +- 'MakerNote Tag 0x0099' +- 'MakerNote Tag 0x009D' +- 'MakerNote Tag 0x00A0' +- 'MakerNote Tag 0x00A3' +- 'MakerNote Tag 0x00AA' +- 'MakerNote Tag 0x00BB' +- 'MakerNote Tag 0x00D0' +- 'MakerNote Tag 0x00E0' +- 'MakerNote Tag 0x4001' +- 'MakerNote Tag 0x4008' +- 'MakerNote Tag 0x4009' +- 'MakerNote Tag 0x4010' +- 'MakerNote Tag 0x4011' +- 'MakerNote Tag 0x4012' +- 'MakerNote Tag 0x4015' +- 'MakerNote Tag 0x4016' +- 'MakerNote Tag 0x4017' +- 'MakerNote Tag 0x4018' +- 'MakerNote Tag 0x4019' +- 'MakerNote Tag 0x4020' +- 'MakerNote ThumbnailImageValidArea' +- 'MakerNote ToningEffect' +- 'MakerNote TotalShutterReleases' +- 'MakerNote Unknown' +- 'MakerNote VRInfo' +- 'MakerNote ValidAFPoints' +- 'MakerNote WBBracketMode' +- 'MakerNote WBBracketValueAB' +- 'MakerNote WBBracketValueGM' +- 'MakerNote WhiteBalance' +- 'MakerNote WhiteBalanceBias' +- 'MakerNote WhiteBalanceRBCoeff' +- 'MakerNote Whitebalance' +- 'MakerNote WorldTime' +- 'Thumbnail Compression' +- 'Thumbnail DateTime' +- 'Thumbnail ImageDescription' +- 'Thumbnail ImageLength' +- 'Thumbnail ImageWidth' +- 'Thumbnail JPEGInterchangeFormat' +- 'Thumbnail JPEGInterchangeFormatLength' +- 'Thumbnail Make' +- 'Thumbnail Model' +- 'Thumbnail Orientation' +- 'Thumbnail ResolutionUnit' +- 'Thumbnail Software' +- 'Thumbnail XResolution' +- 'Thumbnail YCbCrPositioning' +- 'Thumbnail YResolution' diff --git a/projects/archiver/org_photos.py b/projects/archiver/org_photos.py index 38f7a11..8b3007d 100644 --- a/projects/archiver/org_photos.py +++ b/projects/archiver/org_photos.py @@ -25,6 +25,8 @@ from shutil import copy2 as copyfile import sys import typing as t +from .util import * + # FIXME: use piexif, which supports writeback not exifread. import exifread @@ -32,351 +34,18 @@ import exifread parser = argparse.ArgumentParser() parser.add_argument("src_dir", type=Path) parser.add_argument("dest_dir", type=Path) +parser.add_option("destructive", action="store_true", default=False) MODIFIED_ISO_DATE = "%Y:%m:%dT%H:%M:%SF%f" -def take(n, iter): - """Take the first N items lazily off of an iterable.""" - - for _ in range(0, n): - try: - yield next(iter) - except StopIteration: - break - - def exif_tags(p: Path) -> object: """Return the EXIF tags on an image.""" with open(p, "rb") as fp: return exifread.process_file(fp) -# EXIF tags dataset (exifread edition) - -# -# --- -# - 'EXIF ApertureValue' -# - 'EXIF BodySerialNumber' -# - 'EXIF BrightnessValue' -# - 'EXIF CVAPattern' -# - 'EXIF CameraOwnerName' -# - 'EXIF ColorSpace' -# - 'EXIF ComponentsConfiguration' -# - 'EXIF CompressedBitsPerPixel' -# - 'EXIF Contrast' -# - 'EXIF CustomRendered' -# - 'EXIF DateTimeDigitized' -# - 'EXIF DateTimeOriginal' -# - 'EXIF DeviceSettingDescription' -# - 'EXIF DigitalZoomRatio' -# - 'EXIF ExifImageLength' -# - 'EXIF ExifImageWidth' -# - 'EXIF ExifVersion' -# - 'EXIF ExposureBiasValue' -# - 'EXIF ExposureIndex' -# - 'EXIF ExposureMode' -# - 'EXIF ExposureProgram' -# - 'EXIF ExposureTime' -# - 'EXIF FNumber' -# - 'EXIF FileSource' -# - 'EXIF Flash' -# - 'EXIF FlashEnergy' -# - 'EXIF FlashPixVersion' -# - 'EXIF FocalLength' -# - 'EXIF FocalLengthIn35mmFilm' -# - 'EXIF FocalPlaneResolutionUnit' -# - 'EXIF FocalPlaneXResolution' -# - 'EXIF FocalPlaneYResolution' -# - 'EXIF GainControl' -# - 'EXIF ISOSpeedRatings' -# - 'EXIF ImageUniqueID' -# - 'EXIF InteroperabilityOffset' -# - 'EXIF JPEGInterchangeFormat' -# - 'EXIF JPEGInterchangeFormatLength' -# - 'EXIF LensMake' -# - 'EXIF LensModel' -# - 'EXIF LensSerialNumber' -# - 'EXIF LensSpecification' -# - 'EXIF LightSource' -# - 'EXIF MakerNote' -# - 'EXIF MaxApertureValue' -# - 'EXIF MeteringMode' -# - 'EXIF OffsetSchema' -# - 'EXIF OffsetTime' -# - 'EXIF OffsetTimeDigitized' -# - 'EXIF OffsetTimeOriginal' -# - 'EXIF Padding' -# - 'EXIF RecommendedExposureIndex' -# - 'EXIF Saturation' -# - 'EXIF SceneCaptureType' -# - 'EXIF SceneType' -# - 'EXIF SensingMethod' -# - 'EXIF SensitivityType' -# - 'EXIF Sharpness' -# - 'EXIF ShutterSpeedValue' -# - 'EXIF SubSecTime' -# - 'EXIF SubSecTimeDigitized' -# - 'EXIF SubSecTimeOriginal' -# - 'EXIF SubjectArea' -# - 'EXIF SubjectDistance' -# - 'EXIF SubjectDistanceRange' -# - 'EXIF UserComment' -# - 'EXIF WhiteBalance' -# - 'GPS GPSAltitude' -# - 'GPS GPSAltitudeRef' -# - 'GPS GPSDOP' -# - 'GPS GPSDate' -# - 'GPS GPSImgDirection' -# - 'GPS GPSImgDirectionRef' -# - 'GPS GPSLatitude' -# - 'GPS GPSLatitudeRef' -# - 'GPS GPSLongitude' -# - 'GPS GPSLongitudeRef' -# - 'GPS GPSMapDatum' -# - 'GPS GPSMeasureMode' -# - 'GPS GPSProcessingMethod' -# - 'GPS GPSTimeStamp' -# - 'GPS GPSVersionID' -# - 'GPS Tag 0xEA1C' -# - 'Image Artist' -# - 'Image BitsPerSample' -# - 'Image Copyright' -# - 'Image DateTime' -# - 'Image DateTimeDigitized' -# - 'Image ExifOffset' -# - 'Image ExposureMode' -# - 'Image ExposureProgram' -# - 'Image ExposureTime' -# - 'Image FNumber' -# - 'Image Flash' -# - 'Image FocalLength' -# - 'Image GPSInfo' -# - 'Image ISOSpeedRatings' -# - 'Image ImageDescription' -# - 'Image ImageLength' -# - 'Image ImageWidth' -# - 'Image JPEGInterchangeFormat' -# - 'Image JPEGInterchangeFormatLength' -# - 'Image LightSource' -# - 'Image Make' -# - 'Image MeteringMode' -# - 'Image Model' -# - 'Image Orientation' -# - 'Image Padding' -# - 'Image PhotometricInterpretation' -# - 'Image PrintIM' -# - 'Image ResolutionUnit' -# - 'Image SamplesPerPixel' -# - 'Image Software' -# - 'Image UserComment' -# - 'Image WhiteBalance' -# - 'Image XPComment' -# - 'Image XPKeywords' -# - 'Image XPTitle' -# - 'Image XResolution' -# - 'Image YCbCrPositioning' -# - 'Image YResolution' -# - 'Interoperability InteroperabilityIndex' -# - 'Interoperability InteroperabilityVersion' -# - 'JPEGThumbnail' -# - 'MakerNote AEBracketCompensationApplied' -# - 'MakerNote AESetting' -# - 'MakerNote AFAreaMode' -# - 'MakerNote AFInfo2' -# - 'MakerNote AFPointSelected' -# - 'MakerNote AFPointUsed' -# - 'MakerNote ActiveDLighting' -# - 'MakerNote AspectInfo' -# - 'MakerNote AutoBracketRelease' -# - 'MakerNote AutoFlashMode' -# - 'MakerNote BracketMode' -# - 'MakerNote BracketShotNumber' -# - 'MakerNote BracketValue' -# - 'MakerNote BracketingMode' -# - 'MakerNote CanonImageWidth' -# - 'MakerNote ColorBalance' -# - 'MakerNote ColorSpace' -# - 'MakerNote ContinuousDriveMode' -# - 'MakerNote Contrast' -# - 'MakerNote CropHiSpeed' -# - 'MakerNote CropInfo' -# - 'MakerNote DigitalVariProgram' -# - 'MakerNote DigitalZoom' -# - 'MakerNote DustRemovalData' -# - 'MakerNote EasyShootingMode' -# - 'MakerNote ExposureDifference' -# - 'MakerNote ExposureMode' -# - 'MakerNote ExposureTuning' -# - 'MakerNote ExternalFlashExposureComp' -# - 'MakerNote FileInfo' -# - 'MakerNote FileNumber' -# - 'MakerNote FilterEffect' -# - 'MakerNote FirmwareVersion' -# - 'MakerNote FlashActivity' -# - 'MakerNote FlashBias' -# - 'MakerNote FlashBracketCompensationApplied' -# - 'MakerNote FlashCompensation' -# - 'MakerNote FlashDetails' -# - 'MakerNote FlashExposureLock' -# - 'MakerNote FlashInfo' -# - 'MakerNote FlashMode' -# - 'MakerNote FlashSetting' -# - 'MakerNote FocalLength' -# - 'MakerNote FocalType' -# - 'MakerNote FocalUnitsPerMM' -# - 'MakerNote FocusMode' -# - 'MakerNote FocusType' -# - 'MakerNote HDRImageType' -# - 'MakerNote HighISONoiseReduction' -# - 'MakerNote ISO' -# - 'MakerNote ISOInfo' -# - 'MakerNote ISOSetting' -# - 'MakerNote ISOSpeedRequested' -# - 'MakerNote ImageDataSize' -# - 'MakerNote ImageSize' -# - 'MakerNote ImageStabilization' -# - 'MakerNote ImageType' -# - 'MakerNote InternalSerialNumber' -# - 'MakerNote LensData' -# - 'MakerNote LensFStops' -# - 'MakerNote LensMinMaxFocalMaxAperture' -# - 'MakerNote LensModel' -# - 'MakerNote LensType' -# - 'MakerNote LiveViewShooting' -# - 'MakerNote LongExposureNoiseReduction2' -# - 'MakerNote LongFocalLengthOfLensInFocalUnits' -# - 'MakerNote MacroMagnification' -# - 'MakerNote Macromode' -# - 'MakerNote MakernoteVersion' -# - 'MakerNote ManualFlashOutput' -# - 'MakerNote MeteringMode' -# - 'MakerNote ModelID' -# - 'MakerNote MultiExposure' -# - 'MakerNote NikonPreview' -# - 'MakerNote NoiseReduction' -# - 'MakerNote NumAFPoints' -# - 'MakerNote OwnerName' -# - 'MakerNote PhotoCornerCoordinates' -# - 'MakerNote PictureControl' -# - 'MakerNote PowerUpTime' -# - 'MakerNote ProgramShift' -# - 'MakerNote Quality' -# - 'MakerNote RawJpgQuality' -# - 'MakerNote RawJpgSize' -# - 'MakerNote RecordMode' -# - 'MakerNote RetouchHistory' -# - 'MakerNote Saturation' -# - 'MakerNote SelfTimer' -# - 'MakerNote SequenceNumber' -# - 'MakerNote SerialNumber' -# - 'MakerNote Sharpness' -# - 'MakerNote ShortFocalLengthOfLensInFocalUnits' -# - 'MakerNote ShotInfo' -# - 'MakerNote SlowShutter' -# - 'MakerNote SpotMeteringMode' -# - 'MakerNote SubjectDistance' -# - 'MakerNote Tag 0x0001' -# - 'MakerNote Tag 0x0002' -# - 'MakerNote Tag 0x0003' -# - 'MakerNote Tag 0x0004' -# - 'MakerNote Tag 0x0005' -# - 'MakerNote Tag 0x0006' -# - 'MakerNote Tag 0x0007' -# - 'MakerNote Tag 0x0008' -# - 'MakerNote Tag 0x0009' -# - 'MakerNote Tag 0x000E' -# - 'MakerNote Tag 0x0014' -# - 'MakerNote Tag 0x0015' -# - 'MakerNote Tag 0x0019' -# - 'MakerNote Tag 0x002B' -# - 'MakerNote Tag 0x002C' -# - 'MakerNote Tag 0x002D' -# - 'MakerNote Tag 0x0083' -# - 'MakerNote Tag 0x0099' -# - 'MakerNote Tag 0x009D' -# - 'MakerNote Tag 0x00A0' -# - 'MakerNote Tag 0x00A3' -# - 'MakerNote Tag 0x00AA' -# - 'MakerNote Tag 0x00BB' -# - 'MakerNote Tag 0x00D0' -# - 'MakerNote Tag 0x00E0' -# - 'MakerNote Tag 0x4001' -# - 'MakerNote Tag 0x4008' -# - 'MakerNote Tag 0x4009' -# - 'MakerNote Tag 0x4010' -# - 'MakerNote Tag 0x4011' -# - 'MakerNote Tag 0x4012' -# - 'MakerNote Tag 0x4015' -# - 'MakerNote Tag 0x4016' -# - 'MakerNote Tag 0x4017' -# - 'MakerNote Tag 0x4018' -# - 'MakerNote Tag 0x4019' -# - 'MakerNote Tag 0x4020' -# - 'MakerNote ThumbnailImageValidArea' -# - 'MakerNote ToningEffect' -# - 'MakerNote TotalShutterReleases' -# - 'MakerNote Unknown' -# - 'MakerNote VRInfo' -# - 'MakerNote ValidAFPoints' -# - 'MakerNote WBBracketMode' -# - 'MakerNote WBBracketValueAB' -# - 'MakerNote WBBracketValueGM' -# - 'MakerNote WhiteBalance' -# - 'MakerNote WhiteBalanceBias' -# - 'MakerNote WhiteBalanceRBCoeff' -# - 'MakerNote Whitebalance' -# - 'MakerNote WorldTime' -# - 'Thumbnail Compression' -# - 'Thumbnail DateTime' -# - 'Thumbnail ImageDescription' -# - 'Thumbnail ImageLength' -# - 'Thumbnail ImageWidth' -# - 'Thumbnail JPEGInterchangeFormat' -# - 'Thumbnail JPEGInterchangeFormatLength' -# - 'Thumbnail Make' -# - 'Thumbnail Model' -# - 'Thumbnail Orientation' -# - 'Thumbnail ResolutionUnit' -# - 'Thumbnail Software' -# - 'Thumbnail XResolution' -# - 'Thumbnail YCbCrPositioning' -# - 'Thumbnail YResolution' - - -def checksum(p: Path, sum=sha256) -> str: - """Compute a chunked checksum of a file. - - Does not produce individual block checksums. - """ - - def iter_blocks(fp): - yield from iter(lambda: fp.read(4096), b"") - - with open(p, "rb") as fp: - digest = sum() - for chunk in iter_blocks(fp): - digest.update(chunk) - return digest.hexdigest() - - -def checksum_list(iter, sum=sha256): - """Compute the checksum of a bunch of stuff from an iterable.""" - - sum = sum() - for i in iter: - sum.update(b";") # Merkle tree salting. - if isinstance(i, str): - i = str.encode(i, "utf-8") - try: - sum.update(i) - except Exception as e: - print(i, type(i)) - raise e - - return sum.hexdigest() - - def sanitize(s: str) -> str: """Something like b64encode; sanitize a string to a path-friendly version.""" @@ -524,10 +193,10 @@ class ImgInfo(t.NamedTuple): return self.file_sha256sum()[: self.shasum_prefix] def file_sha256sum(self): - return checksum(self.file_path, sha256) + return checksum_path(self.file_path, sha256) def file_sha512sum(self): - return checksum(self.file_path, sha512) + return checksum_path(self.file_path, sha512) def img_info(p: Path) -> ImgInfo: @@ -620,11 +289,27 @@ def img_info(p: Path) -> ImgInfo: def main(): opts, args = parser.parse_known_args() + def _copy(): + print(f" rename: {target}") + try: + if not opts.destructive: + raise OSError() + + src.rename(target) # Execute the rename + except OSError: # cross-device move + copyfile(src, target) + + if opts.destructive: + src.chmod(0o644) + src.unlink() + print("---") for src in list(opts.src_dir.glob("**/*")): if src.is_dir(): continue + print(f"{src}:") + info = img_info(src) year_dir = Path(opts.dest_dir / str(info.date.year)) year_dir.mkdir(exist_ok=True) # Ignore existing and continue @@ -636,20 +321,20 @@ def main(): continue # Just skip fucked up files target = Path(year_dir / f"{stable_name}{ext}") - print(f"{src}:") - print(f" rename: {target}") if not target.exists(): - try: - src.rename(target) # Execute the rename - except OSError: - copyfile(src, target) - target.chmod(0o644) - src.unlink() + # src & !target => copy + _copy() elif src == target: - pass # Nothing to do + # src == target; skip DO NOT DELETE SRC + pass + elif checksum_path_blocks(src) == checksum_path_blocks(target): + # src != target && id(src) == id(target); delete src + if opts.destructive: + src.chmod(0o644) + src.unlink() else: - target.chmod(0o644) - src.unlink() # Delete the source + # src != target && id(src) != id(target); replace target with src? + print(f" warning: {target} is a content-id collision with a different checksum") if __name__ == "__main__": diff --git a/projects/archiver/util.py b/projects/archiver/util.py new file mode 100644 index 0000000..0eaadb9 --- /dev/null +++ b/projects/archiver/util.py @@ -0,0 +1,69 @@ +import typing as t + +from hashlib import sha256 +from pathlib import Path + + +def iter_chunks(fp): + yield from iter(lambda: fp.read(4096), b"") + + +def take(n, iter): + """Take the first N items lazily off of an iterable.""" + + for _ in range(0, n): + try: + yield next(iter) + except StopIteration: + break + + +def checksum_list(iter, sum=sha256, salt=b";"): + """Compute the checksum of a bunch of stuff from an iterable.""" + + sum = sum() + for i in iter: + if salt: + sum.update(salt) # Merkle tree salting. + if isinstance(i, str): + i = str.encode(i, "utf-8") + try: + sum.update(i) + except Exception as e: + print(i, type(i)) + raise e + + return sum.hexdigest() + + +def checksum_path_blocks(p: Path, sum=sha256) -> t.Iterable[str]: + """Compute block-wise checksums of a file. + + Inspired by the Dropbox content-hashing interface - + + https://www.dropbox.com/developers/reference/content-hash + + """ + + def _helper(): + with open(p, "rb") as fp: + for chunk in iter_chunks(fp): + digest = sum() + digest.update(chunk) + yield digest.hexdigest() + + return list(_helper()) + + +def checksum_path(p: Path, sum=sha256) -> str: + """Compute 'the' checksum of an entire file. + + Note that this does semi-streaming I/O. + + """ + + sum = sum() + with open(p, "rb") as fp: + for chunk in iter_chunks(fp): + sum.update(chunk) + return sum.hexdigest() diff --git a/tools/autoflake/BUILD b/tools/autoflake/BUILD index 9088820..bf94b18 100644 --- a/tools/autoflake/BUILD +++ b/tools/autoflake/BUILD @@ -1,4 +1,4 @@ -py_binary( +zapp_binary( name = "autoflake", main = "__main__.py", deps = [ diff --git a/tools/fmt.sh b/tools/fmt.sh index 6e6640f..fac39f9 100755 --- a/tools/fmt.sh +++ b/tools/fmt.sh @@ -9,8 +9,7 @@ DIRS=(projects tools) function brl() { bin="$1" shift - bazel build "//${bin}" - "bazel-bin/${bin}/$(basename ${bin})" "$@" + bazel run "//${bin}" -- "$@" return "$?" }