178 lines
5.3 KiB
Python
178 lines
5.3 KiB
Python
import os
|
|
from datetime import datetime
|
|
import mimetypes
|
|
from typing import Text
|
|
from dataclasses import dataclass
|
|
import hashlib
|
|
import subprocess
|
|
|
|
import mutagen
|
|
|
|
LIBRARY_PATH = "/home/fmartingr/Code/memories/testphotos"
|
|
RAW_MIMETYPES = {
|
|
# RAW pictures
|
|
"ARW": "image/x-sony-arw",
|
|
"CR2": "image/x-canon-cr2",
|
|
"CR3": "image/x-canon-cr3",
|
|
"CRW": "image/x-canon-crw",
|
|
"DCR": "image/x-kodak-dcr",
|
|
"DNG": "image/x-adobe-dng",
|
|
"ERF": "image/x-epson-erf",
|
|
"K25": "image/x-kodak-k25",
|
|
"KDC": "image/x-kodak-kdc",
|
|
"MRW": "image/x-minolta-mrw",
|
|
"NEF": "image/x-nikon-nef",
|
|
"ORF": "image/x-olympus-orf",
|
|
"PEF": "image/x-pentax-pef",
|
|
"RAF": "image/x-fuji-raf",
|
|
"RAW": "image/x-panasonic-raw",
|
|
"SR2": "image/x-sony-sr2",
|
|
"SRF": "image/x-sony-srf",
|
|
"X3F": "image/x-sigma-x3f",
|
|
}
|
|
# High Efficiency Image/Video (apple)
|
|
APPLE_MIMETYPES = {"HEIC": "image/heic", "HEIF": "image/heif", "HEVC": "video/hevc"}
|
|
|
|
CUSTOM_MIMETYPES = {}
|
|
CUSTOM_MIMETYPES.update(RAW_MIMETYPES)
|
|
CUSTOM_MIMETYPES.update(APPLE_MIMETYPES)
|
|
|
|
for extension, mimetype in CUSTOM_MIMETYPES.items():
|
|
mimetypes.add_type(mimetype, f".{extension}")
|
|
mimetypes.add_type(mimetype, f".{extension.lower()}")
|
|
|
|
|
|
def read_exif(path):
|
|
output = {}
|
|
with subprocess.Popen(["exiftool", path], stdout=subprocess.PIPE) as proc:
|
|
for line in proc.stdout.readlines():
|
|
key, value = line.decode("utf-8").strip().split(":", maxsplit=1)
|
|
output[key.strip()] = value.strip()
|
|
return output
|
|
|
|
|
|
@dataclass
|
|
class File:
|
|
__mro__ = {"path", "_type"}
|
|
|
|
path: str
|
|
|
|
@property
|
|
def mimetype(self) -> Text:
|
|
"""Retrieves the file mimetype by extension"""
|
|
if not getattr(self, "_mimetype", False):
|
|
self._mimetype, _ = mimetypes.guess_type(self.path)
|
|
if not self._mimetype:
|
|
print(f"Can't guess type of file {self.path}")
|
|
return self._mimetype
|
|
|
|
@property
|
|
def is_raw(self) -> bool:
|
|
return self.mimetype in RAW_MIMETYPES.values()
|
|
|
|
@property
|
|
def is_image(self) -> bool:
|
|
return "image" in self.mimetype
|
|
|
|
@property
|
|
def is_video(self) -> bool:
|
|
return "video" in self.mimetype
|
|
|
|
@property
|
|
def stat(self):
|
|
stat = os.stat(self.path)
|
|
return {k: getattr(stat, k) for k in dir(stat) if k.startswith("st_")}
|
|
|
|
@property
|
|
def exif(self) -> dict:
|
|
"""
|
|
Retrieve EXIF data from the file and merge it with wathever mutagen finds in there for video files.
|
|
"""
|
|
if not getattr(self, "_exif", False):
|
|
self._exif = read_exif(self.path)
|
|
if self.is_video:
|
|
self._exif.update(mutagen.File(self.path))
|
|
return self._exif
|
|
|
|
def get_datetime(self) -> datetime:
|
|
"""
|
|
Tries to guess the original datetime for the provided file.
|
|
This is done extracting several EXIF values and the file birthdate/modification date.
|
|
The oldest one is the winner.
|
|
"""
|
|
|
|
CREATION_DATE_EXIF_KEYS = (
|
|
"Content Create Date",
|
|
"Date/Time Original",
|
|
"Create Date",
|
|
"Date Created",
|
|
"File Modification Date/Time",
|
|
)
|
|
|
|
datetimes = []
|
|
|
|
for key in CREATION_DATE_EXIF_KEYS:
|
|
try:
|
|
dt = datetime.strptime(self.exif[key], "%Y:%m:%d %H:%M:%S%z")
|
|
datetimes.append(dt.replace(tzinfo=None))
|
|
except KeyError:
|
|
pass
|
|
except ValueError:
|
|
try:
|
|
cleaned = self.exif[key].rsplit(".", maxsplit=1)
|
|
datetimes.append(datetime.strptime(cleaned[0], "%Y:%m:%d %H:%M:%S"))
|
|
except ValueError:
|
|
pass
|
|
|
|
# Last resort, use file creation/modification date
|
|
stat = os.stat(self.path)
|
|
try:
|
|
datetimes.append(datetime.fromtimestamp(stat.st_birthtime))
|
|
except AttributeError:
|
|
# Linux: No easy way to get creation dates here,
|
|
# so we'll settle for when its content was last modified.
|
|
datetimes.append(datetime.fromtimestamp(stat.st_mtime))
|
|
|
|
sorted_datetimes = sorted(datetimes)
|
|
return sorted_datetimes[0]
|
|
|
|
@property
|
|
def datetime(self):
|
|
if not getattr(self, "_datetime", False):
|
|
self._datetime = self.get_datetime()
|
|
return self._datetime
|
|
|
|
@property
|
|
def filename(self):
|
|
return os.path.splitext(os.path.basename(self.path))[0]
|
|
|
|
@property
|
|
def extension(self):
|
|
return os.path.splitext(self.path)[1][1:].lower()
|
|
|
|
@property
|
|
def checksum(self) -> Text:
|
|
if not getattr(self, "_checksum", False):
|
|
digest = hashlib.sha224()
|
|
with open(self.path, "rb") as handler:
|
|
digest.update(handler.read())
|
|
|
|
self._checksum = f"sha224-{digest.hexdigest()}"
|
|
return self._checksum
|
|
|
|
def as_dict(self):
|
|
return {
|
|
"path": self.path,
|
|
"filename": self.filename,
|
|
"extension": self.extension,
|
|
"checksum": self.checksum,
|
|
"datetime": self.datetime,
|
|
"exif": self.exif,
|
|
}
|
|
|
|
|
|
def get_files():
|
|
for root, dirs, files in os.walk(LIBRARY_PATH):
|
|
for filename in files:
|
|
yield File(path=os.path.join(root, filename))
|