import os from datetime import datetime import mimetypes from typing import Text from dataclasses import dataclass import hashlib import subprocess import mutagen LIBRARY_PATH = "/home/fmartingr/Code/memories/testphotos" RAW_MIMETYPES = { # RAW pictures "ARW": "image/x-sony-arw", "CR2": "image/x-canon-cr2", "CR3": "image/x-canon-cr3", "CRW": "image/x-canon-crw", "DCR": "image/x-kodak-dcr", "DNG": "image/x-adobe-dng", "ERF": "image/x-epson-erf", "K25": "image/x-kodak-k25", "KDC": "image/x-kodak-kdc", "MRW": "image/x-minolta-mrw", "NEF": "image/x-nikon-nef", "ORF": "image/x-olympus-orf", "PEF": "image/x-pentax-pef", "RAF": "image/x-fuji-raf", "RAW": "image/x-panasonic-raw", "SR2": "image/x-sony-sr2", "SRF": "image/x-sony-srf", "X3F": "image/x-sigma-x3f", } # High Efficiency Image/Video (apple) APPLE_MIMETYPES = {"HEIC": "image/heic", "HEIF": "image/heif", "HEVC": "video/hevc"} CUSTOM_MIMETYPES = {} CUSTOM_MIMETYPES.update(RAW_MIMETYPES) CUSTOM_MIMETYPES.update(APPLE_MIMETYPES) for extension, mimetype in CUSTOM_MIMETYPES.items(): mimetypes.add_type(mimetype, f".{extension}") mimetypes.add_type(mimetype, f".{extension.lower()}") def read_exif(path): output = {} with subprocess.Popen(["exiftool", path], stdout=subprocess.PIPE) as proc: for line in proc.stdout.readlines(): key, value = line.decode("utf-8").strip().split(":", maxsplit=1) output[key.strip()] = value.strip() return output @dataclass class File: __mro__ = {"path", "_type"} path: str @property def mimetype(self) -> Text: """Retrieves the file mimetype by extension""" if not getattr(self, "_mimetype", False): self._mimetype, _ = mimetypes.guess_type(self.path) if not self._mimetype: print(f"Can't guess type of file {self.path}") return self._mimetype @property def is_raw(self) -> bool: return self.mimetype in RAW_MIMETYPES.values() @property def is_image(self) -> bool: return "image" in self.mimetype @property def is_video(self) -> bool: return "video" in self.mimetype @property def stat(self): stat = os.stat(self.path) return {k: getattr(stat, k) for k in dir(stat) if k.startswith("st_")} @property def exif(self) -> dict: """ Retrieve EXIF data from the file and merge it with wathever mutagen finds in there for video files. """ if not getattr(self, "_exif", False): self._exif = read_exif(self.path) if self.is_video: self._exif.update(mutagen.File(self.path)) return self._exif def get_datetime(self) -> datetime: """ Tries to guess the original datetime for the provided file. This is done extracting several EXIF values and the file birthdate/modification date. The oldest one is the winner. """ CREATION_DATE_EXIF_KEYS = ( "Content Create Date", "Date/Time Original", "Create Date", "Date Created", "File Modification Date/Time", ) datetimes = [] for key in CREATION_DATE_EXIF_KEYS: try: dt = datetime.strptime(self.exif[key], "%Y:%m:%d %H:%M:%S%z") datetimes.append(dt.replace(tzinfo=None)) except KeyError: pass except ValueError: try: cleaned = self.exif[key].rsplit(".", maxsplit=1) datetimes.append(datetime.strptime(cleaned[0], "%Y:%m:%d %H:%M:%S")) except ValueError: pass # Last resort, use file creation/modification date stat = os.stat(self.path) try: datetimes.append(datetime.fromtimestamp(stat.st_birthtime)) except AttributeError: # Linux: No easy way to get creation dates here, # so we'll settle for when its content was last modified. datetimes.append(datetime.fromtimestamp(stat.st_mtime)) sorted_datetimes = sorted(datetimes) return sorted_datetimes[0] @property def datetime(self): if not getattr(self, "_datetime", False): self._datetime = self.get_datetime() return self._datetime @property def filename(self): return os.path.splitext(os.path.basename(self.path))[0] @property def extension(self): return os.path.splitext(self.path)[1][1:].lower() @property def checksum(self) -> Text: if not getattr(self, "_checksum", False): digest = hashlib.sha224() with open(self.path, "rb") as handler: digest.update(handler.read()) self._checksum = f"sha224-{digest.hexdigest()}" return self._checksum def as_dict(self): return { "path": self.path, "filename": self.filename, "extension": self.extension, "checksum": self.checksum, "datetime": self.datetime, "exif": self.exif, } def get_files(): for root, dirs, files in os.walk(LIBRARY_PATH): for filename in files: yield File(path=os.path.join(root, filename))