This repository has been archived on 2022-10-08. You can view files and clone it, but cannot push or open issues or pull requests.
memories/memories/collection/poc.py

178 lines
5.3 KiB
Python

import os
from datetime import datetime
import mimetypes
from typing import Text
from dataclasses import dataclass
import hashlib
import subprocess
import mutagen
LIBRARY_PATH = "/home/fmartingr/Code/memories/testphotos"
RAW_MIMETYPES = {
# RAW pictures
"ARW": "image/x-sony-arw",
"CR2": "image/x-canon-cr2",
"CR3": "image/x-canon-cr3",
"CRW": "image/x-canon-crw",
"DCR": "image/x-kodak-dcr",
"DNG": "image/x-adobe-dng",
"ERF": "image/x-epson-erf",
"K25": "image/x-kodak-k25",
"KDC": "image/x-kodak-kdc",
"MRW": "image/x-minolta-mrw",
"NEF": "image/x-nikon-nef",
"ORF": "image/x-olympus-orf",
"PEF": "image/x-pentax-pef",
"RAF": "image/x-fuji-raf",
"RAW": "image/x-panasonic-raw",
"SR2": "image/x-sony-sr2",
"SRF": "image/x-sony-srf",
"X3F": "image/x-sigma-x3f",
}
# High Efficiency Image/Video (apple)
APPLE_MIMETYPES = {"HEIC": "image/heic", "HEIF": "image/heif", "HEVC": "video/hevc"}
CUSTOM_MIMETYPES = {}
CUSTOM_MIMETYPES.update(RAW_MIMETYPES)
CUSTOM_MIMETYPES.update(APPLE_MIMETYPES)
for extension, mimetype in CUSTOM_MIMETYPES.items():
mimetypes.add_type(mimetype, f".{extension}")
mimetypes.add_type(mimetype, f".{extension.lower()}")
def read_exif(path):
output = {}
with subprocess.Popen(["exiftool", path], stdout=subprocess.PIPE) as proc:
for line in proc.stdout.readlines():
key, value = line.decode("utf-8").strip().split(":", maxsplit=1)
output[key.strip()] = value.strip()
return output
@dataclass
class File:
__mro__ = {"path", "_type"}
path: str
@property
def mimetype(self) -> Text:
"""Retrieves the file mimetype by extension"""
if not getattr(self, "_mimetype", False):
self._mimetype, _ = mimetypes.guess_type(self.path)
if not self._mimetype:
print(f"Can't guess type of file {self.path}")
return self._mimetype
@property
def is_raw(self) -> bool:
return self.mimetype in RAW_MIMETYPES.values()
@property
def is_image(self) -> bool:
return "image" in self.mimetype
@property
def is_video(self) -> bool:
return "video" in self.mimetype
@property
def stat(self):
stat = os.stat(self.path)
return {k: getattr(stat, k) for k in dir(stat) if k.startswith("st_")}
@property
def exif(self) -> dict:
"""
Retrieve EXIF data from the file and merge it with wathever mutagen finds in there for video files.
"""
if not getattr(self, "_exif", False):
self._exif = read_exif(self.path)
if self.is_video:
self._exif.update(mutagen.File(self.path))
return self._exif
def get_datetime(self) -> datetime:
"""
Tries to guess the original datetime for the provided file.
This is done extracting several EXIF values and the file birthdate/modification date.
The oldest one is the winner.
"""
CREATION_DATE_EXIF_KEYS = (
"Content Create Date",
"Date/Time Original",
"Create Date",
"Date Created",
"File Modification Date/Time",
)
datetimes = []
for key in CREATION_DATE_EXIF_KEYS:
try:
dt = datetime.strptime(self.exif[key], "%Y:%m:%d %H:%M:%S%z")
datetimes.append(dt.replace(tzinfo=None))
except KeyError:
pass
except ValueError:
try:
cleaned = self.exif[key].rsplit(".", maxsplit=1)
datetimes.append(datetime.strptime(cleaned[0], "%Y:%m:%d %H:%M:%S"))
except ValueError:
pass
# Last resort, use file creation/modification date
stat = os.stat(self.path)
try:
datetimes.append(datetime.fromtimestamp(stat.st_birthtime))
except AttributeError:
# Linux: No easy way to get creation dates here,
# so we'll settle for when its content was last modified.
datetimes.append(datetime.fromtimestamp(stat.st_mtime))
sorted_datetimes = sorted(datetimes)
return sorted_datetimes[0]
@property
def datetime(self):
if not getattr(self, "_datetime", False):
self._datetime = self.get_datetime()
return self._datetime
@property
def filename(self):
return os.path.splitext(os.path.basename(self.path))[0]
@property
def extension(self):
return os.path.splitext(self.path)[1][1:].lower()
@property
def checksum(self) -> Text:
if not getattr(self, "_checksum", False):
digest = hashlib.sha224()
with open(self.path, "rb") as handler:
digest.update(handler.read())
self._checksum = f"sha224-{digest.hexdigest()}"
return self._checksum
def as_dict(self):
return {
"path": self.path,
"filename": self.filename,
"extension": self.extension,
"checksum": self.checksum,
"datetime": self.datetime,
"exif": self.exif,
}
def get_files():
for root, dirs, files in os.walk(LIBRARY_PATH):
for filename in files:
yield File(path=os.path.join(root, filename))