use exiftool instead of ffprobe to fetch metadata

exiftool presents similar data to ffprobe, so it may be used as a replacement.

notably, this change will give us access to the "Orientation" tag,
 which ffprobe does not provide.

[server] install exiftool on the docker image
[server] replace Image._reload_info's implementation with a call to exiftool
[server] remove unused Image.frames property; replace with Image.duration
This commit is contained in:
Amras 2023-11-08 12:49:08 +01:00
parent 7c92ceaf6a
commit 3a4a94bdb3
2 changed files with 48 additions and 28 deletions

View file

@ -13,6 +13,7 @@ RUN apk --no-cache add \
libheif-dev \ libheif-dev \
libavif \ libavif \
libavif-dev \ libavif-dev \
exiftool \
ffmpeg \ ffmpeg \
# from requirements.txt: # from requirements.txt:
py3-yaml \ py3-yaml \

View file

@ -5,7 +5,8 @@ import re
import shlex import shlex
import subprocess import subprocess
from io import BytesIO from io import BytesIO
from typing import List from typing import List, Optional
import datetime
import HeifImagePlugin import HeifImagePlugin
import pillow_avif import pillow_avif
@ -31,15 +32,39 @@ class Image:
@property @property
def width(self) -> int: def width(self) -> int:
return self.info["streams"][0]["width"] return self.info["ImageWidth"]
@property @property
def height(self) -> int: def height(self) -> int:
return self.info["streams"][0]["height"] return self.info["ImageHeight"]
@property @property
def frames(self) -> int: def duration(self) -> Optional[datetime.timedelta]:
return self.info["streams"][0]["nb_read_frames"] try:
duration_data = self.info["Duration"]
except KeyError:
return None
time_formats = [
"%H:%M:%S",
"%H:%M:%S.%f",
"%M:%S",
"%M:%S.%f",
"%S.%f s",
]
for time_format in time_formats:
try:
duration = datetime.datetime.strptime(
duration_data, time_format).time()
return datetime.timedelta(
hours=duration.hour,
minutes=duration.minute,
seconds = duration.second,
microseconds=duration.microsecond)
except ValueError:
pass
logger.warning("Unexpected time format(duration=%r)", duration_data)
return None
def resize_fill(self, width: int, height: int) -> None: def resize_fill(self, width: int, height: int) -> None:
width_greater = self.width > self.height width_greater = self.width > self.height
@ -60,15 +85,15 @@ class Image:
"png", "png",
"-", "-",
] ]
duration = self.duration
if ( if (
"duration" in self.info["format"] duration is not None and self.info["FileType"] != "SWF"
and self.info["format"]["format_name"] != "swf"
): ):
duration = float(self.info["format"]["duration"]) total_seconds = duration.total_seconds()
if duration > 3: if total_seconds > 3:
cli = [ cli = [
"-ss", "-ss",
"%d" % math.floor(duration * 0.3), "%d" % math.floor(total_seconds * 0.3),
] + cli ] + cli
content = self._execute(cli, ignore_error_if_data=True) content = self._execute(cli, ignore_error_if_data=True)
if not content: if not content:
@ -274,8 +299,10 @@ class Image:
with util.create_temp_file(suffix="." + extension) as handle: with util.create_temp_file(suffix="." + extension) as handle:
handle.write(self.content) handle.write(self.content)
handle.flush() handle.flush()
cli = [program, "-loglevel", "32" if get_logs else "24"] + cli
cli = [part.format(path=handle.name) for part in cli] cli = [part.format(path=handle.name) for part in cli]
if program in ("ffmpeg", "ffprobe"):
cli = ["-loglevel", "32" if get_logs else "24"] + cli
cli = [program] + cli
proc = subprocess.Popen( proc = subprocess.Popen(
cli, cli,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
@ -285,7 +312,7 @@ class Image:
out, err = proc.communicate() out, err = proc.communicate()
if proc.returncode != 0: if proc.returncode != 0:
logger.warning( logger.warning(
"Failed to execute ffmpeg command (cli=%r, err=%r)", "Failed to execute command (cli=%r, err=%r)",
" ".join(shlex.quote(arg) for arg in cli), " ".join(shlex.quote(arg) for arg in cli),
err, err,
) )
@ -298,25 +325,17 @@ class Image:
return err if get_logs else out return err if get_logs else out
def _reload_info(self) -> None: def _reload_info(self) -> None:
self.info = json.loads( exiftool_data = json.loads(
self._execute( self._execute(
[ [
"-i",
"{path}", "{path}",
"-of", "-json",
"json",
"-select_streams",
"v",
"-show_format",
"-show_streams",
], ],
program="ffprobe", program="exiftool",
).decode("utf-8") ).decode("utf-8")
) )
assert "format" in self.info
assert "streams" in self.info if len(exiftool_data) != 1:
if len(self.info["streams"]) < 1: logger.warning("Unexpected output from exiftool")
logger.warning("The video contains no video streams.")
raise errors.ProcessingError( self.info = exiftool_data[0]
"The video contains no video streams."
)