2018-09-26 21:44:04 +00:00
|
|
|
from dataclasses import dataclass
|
|
|
|
from datetime import datetime
|
|
|
|
import hashlib
|
|
|
|
import mimetypes
|
|
|
|
import os.path
|
2018-09-29 21:55:20 +00:00
|
|
|
import subprocess
|
2018-09-26 21:54:29 +00:00
|
|
|
from typing import Text
|
2018-09-26 21:44:04 +00:00
|
|
|
|
|
|
|
import mutagen
|
|
|
|
|
|
|
|
|
|
|
|
# Config
|
|
|
|
SOURCE_PATH = '/Volumes/MEDIA/Photos'
|
|
|
|
TARGET_PATH = '/Volumes/MEDIA/Pictures'
|
2018-09-29 21:55:53 +00:00
|
|
|
CUSTOM_MIMETYPES = {
|
|
|
|
# RAW pictures
|
|
|
|
'ARW': 'image/x-sony-arw',
|
|
|
|
'CR2': 'image/x-canon-cr2',
|
|
|
|
'CRW': 'image/x-canon-crw',
|
|
|
|
'DCR': 'image/x-kodak-dcr',
|
|
|
|
'DNG': 'image/x-adobe-dng',
|
|
|
|
'ERF': 'image/x-epson-erf',
|
|
|
|
'K25': 'image/x-kodak-k25',
|
|
|
|
'KDC': 'image/x-kodak-kdc',
|
|
|
|
'MRW': 'image/x-minolta-mrw',
|
|
|
|
'NEF': 'image/x-nikon-nef',
|
|
|
|
'ORF': 'image/x-olympus-orf',
|
|
|
|
'PEF': 'image/x-pentax-pef',
|
|
|
|
'RAF': 'image/x-fuji-raf',
|
|
|
|
'RAW': 'image/x-panasonic-raw',
|
|
|
|
'SR2': 'image/x-sony-sr2',
|
|
|
|
'SRF': 'image/x-sony-srf',
|
|
|
|
'X3F': 'image/x-sigma-x3f',
|
|
|
|
# High Efficiency Image/Video
|
|
|
|
'HEIC': 'image/heic',
|
|
|
|
'HEIF': 'image/heif',
|
|
|
|
'HEVC': 'video/hevc',
|
|
|
|
}
|
|
|
|
for extension, mimetype in CUSTOM_MIMETYPES.items():
|
|
|
|
mimetypes.add_type(mimetype, f'.{extension}')
|
|
|
|
mimetypes.add_type(mimetype, f'.{extension.lower()}')
|
2018-09-26 21:44:04 +00:00
|
|
|
|
|
|
|
# Globals
|
|
|
|
file_list = []
|
|
|
|
|
|
|
|
|
2018-09-29 21:55:20 +00:00
|
|
|
def read_exif(path):
|
|
|
|
output = {}
|
|
|
|
with subprocess.Popen(['exiftool', path], stdout=subprocess.PIPE) as proc:
|
|
|
|
for line in proc.stdout.readlines():
|
|
|
|
key, value = line.decode('utf-8').strip().split(':', maxsplit=1)
|
|
|
|
output[key.strip()] = value.strip()
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
2018-09-26 21:44:04 +00:00
|
|
|
@dataclass
|
|
|
|
class File:
|
|
|
|
path: str
|
|
|
|
|
|
|
|
@property
|
2018-09-26 21:54:29 +00:00
|
|
|
def type(self) -> Text:
|
2018-09-26 21:44:04 +00:00
|
|
|
"""Retrieves the file mimetype by extension"""
|
|
|
|
if not getattr(self, '_type', False):
|
|
|
|
self._type, _ = mimetypes.guess_type(self.path)
|
|
|
|
if not self._type:
|
|
|
|
print(f"Can't guess type of file {self.path}")
|
|
|
|
return self._type
|
|
|
|
|
|
|
|
@property
|
2018-09-26 21:54:29 +00:00
|
|
|
def is_image(self) -> bool:
|
2018-09-26 21:44:04 +00:00
|
|
|
return 'image' in self.type
|
|
|
|
|
|
|
|
@property
|
2018-09-26 21:54:29 +00:00
|
|
|
def is_video(self) -> bool:
|
2018-09-26 21:44:04 +00:00
|
|
|
return 'video' in self.type
|
|
|
|
|
|
|
|
@property
|
2018-09-26 21:54:29 +00:00
|
|
|
def exif(self) -> dict:
|
2018-09-26 21:44:04 +00:00
|
|
|
"""
|
|
|
|
Retrieve EXIF data from the file and merge it with wathever mutagen finds in there for video files.
|
|
|
|
"""
|
|
|
|
if not getattr(self, '_exif', False):
|
2018-09-29 21:55:20 +00:00
|
|
|
self._exif = read_exif(self.path)
|
2018-09-26 21:44:04 +00:00
|
|
|
if self.is_video:
|
|
|
|
self._exif.update(mutagen.File(self.path))
|
|
|
|
return self._exif
|
|
|
|
|
|
|
|
@property
|
2018-09-26 21:54:29 +00:00
|
|
|
def datetime(self) -> datetime:
|
2018-09-26 21:44:04 +00:00
|
|
|
"""
|
|
|
|
Retrieves original creation date for the picture trying exif data first, filename guessing and finally
|
|
|
|
modification date. Make sure your pictures are exported unmodified so the file attributes maintain their
|
|
|
|
original values for this to work.
|
|
|
|
"""
|
2018-09-29 21:56:57 +00:00
|
|
|
if self.is_image and 'png' not in self.path.lower():
|
|
|
|
try:
|
|
|
|
date, time = self.exif['EXIF DateTimeOriginal'].values.split()
|
|
|
|
return datetime(*(int(x) for x in date.split(':') + time.split(':')))
|
|
|
|
except KeyError:
|
|
|
|
pass
|
2018-09-26 21:44:04 +00:00
|
|
|
|
|
|
|
if self.is_video:
|
|
|
|
# Apple iPhone tag
|
|
|
|
try:
|
|
|
|
return datetime.strptime(self.exif.get('©day')[0], '%Y-%m-%dT%H:%M:%S%z')
|
|
|
|
except TypeError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
# Tag not found, try to guess datetime from filename
|
|
|
|
# Format: YYYY-MM-DD HH.MM.SS.ext
|
|
|
|
try:
|
|
|
|
name, _ = os.path.basename(self.path).rsplit('.', maxsplit=1)
|
|
|
|
date, time = name.split(' ')
|
|
|
|
return datetime(*(int(x) for x in date.split('-') + time.split('.')))
|
|
|
|
except ValueError:
|
|
|
|
raise
|
|
|
|
|
|
|
|
# Last resort, use file creation/modification date
|
|
|
|
stat = os.stat(self.path)
|
|
|
|
try:
|
2018-09-26 21:54:29 +00:00
|
|
|
return datetime.fromtimestamp(stat.st_birthtime)
|
2018-09-26 21:44:04 +00:00
|
|
|
except AttributeError:
|
|
|
|
# Linux: No easy way to get creation dates here,
|
|
|
|
# so we'll settle for when its content was last modified.
|
2018-09-26 21:54:29 +00:00
|
|
|
return datetime.fromtimestamp(stat.st_mtime)
|
2018-09-26 21:44:04 +00:00
|
|
|
|
|
|
|
@property
|
2018-09-26 21:54:29 +00:00
|
|
|
def checksum(self) -> Text:
|
2018-09-26 21:44:04 +00:00
|
|
|
if not getattr(self, '_checksum', False):
|
|
|
|
digest = hashlib.sha1()
|
|
|
|
with open(self.path, 'rb') as handler:
|
|
|
|
digest.update(handler.read())
|
|
|
|
|
|
|
|
self._checksum = digest.hexdigest()
|
|
|
|
return self._checksum
|
|
|
|
|
|
|
|
|
|
|
|
def read_path():
|
|
|
|
for path, directories, files in os.walk(SOURCE_PATH):
|
|
|
|
for filename in files:
|
|
|
|
if not filename.startswith('.') and filename not in ['.', '..']:
|
|
|
|
yield File(path=os.path.join(path, filename))
|
|
|
|
|
|
|
|
|
|
|
|
def get_target_path(fileobj):
|
|
|
|
return os.path.join(TARGET_PATH, str(fileobj.datetime.year), '%02d' % fileobj.datetime.month)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
for fileobj in read_path():
|
|
|
|
target_path = get_target_path(fileobj)
|
|
|
|
os.makedirs(target_path, exist_ok=True)
|