From 3a4a94bdb367058eb3333dd127188d83cf294edf Mon Sep 17 00:00:00 2001 From: Amras Date: Wed, 8 Nov 2023 12:49:08 +0100 Subject: [PATCH 1/5] use exiftool instead of ffprobe to fetch metadata exiftool presents similar data to ffprobe, so it may be used as a replacement. notably, this change will give us access to the "Orientation" tag, which ffprobe does not provide. [server] install exiftool on the docker image [server] replace Image._reload_info's implementation with a call to exiftool [server] remove unused Image.frames property; replace with Image.duration --- server/Dockerfile | 1 + server/szurubooru/func/images.py | 75 ++++++++++++++++++++------------ 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/server/Dockerfile b/server/Dockerfile index c2640f16..fb42687b 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -13,6 +13,7 @@ RUN apk --no-cache add \ libheif-dev \ libavif \ libavif-dev \ + exiftool \ ffmpeg \ # from requirements.txt: py3-yaml \ diff --git a/server/szurubooru/func/images.py b/server/szurubooru/func/images.py index e135d182..93b66710 100644 --- a/server/szurubooru/func/images.py +++ b/server/szurubooru/func/images.py @@ -5,7 +5,8 @@ import re import shlex import subprocess from io import BytesIO -from typing import List +from typing import List, Optional +import datetime import HeifImagePlugin import pillow_avif @@ -31,15 +32,39 @@ class Image: @property def width(self) -> int: - return self.info["streams"][0]["width"] + return self.info["ImageWidth"] @property def height(self) -> int: - return self.info["streams"][0]["height"] + return self.info["ImageHeight"] @property - def frames(self) -> int: - return self.info["streams"][0]["nb_read_frames"] + def duration(self) -> Optional[datetime.timedelta]: + try: + duration_data = self.info["Duration"] + except KeyError: + return None + + time_formats = [ + "%H:%M:%S", + "%H:%M:%S.%f", + "%M:%S", + "%M:%S.%f", + "%S.%f s", + ] + for time_format in time_formats: + try: + duration = datetime.datetime.strptime( + duration_data, time_format).time() + return datetime.timedelta( + hours=duration.hour, + minutes=duration.minute, + seconds = duration.second, + microseconds=duration.microsecond) + except ValueError: + pass + logger.warning("Unexpected time format(duration=%r)", duration_data) + return None def resize_fill(self, width: int, height: int) -> None: width_greater = self.width > self.height @@ -60,15 +85,15 @@ class Image: "png", "-", ] + duration = self.duration if ( - "duration" in self.info["format"] - and self.info["format"]["format_name"] != "swf" + duration is not None and self.info["FileType"] != "SWF" ): - duration = float(self.info["format"]["duration"]) - if duration > 3: + total_seconds = duration.total_seconds() + if total_seconds > 3: cli = [ "-ss", - "%d" % math.floor(duration * 0.3), + "%d" % math.floor(total_seconds * 0.3), ] + cli content = self._execute(cli, ignore_error_if_data=True) if not content: @@ -274,8 +299,10 @@ class Image: with util.create_temp_file(suffix="." + extension) as handle: handle.write(self.content) handle.flush() - cli = [program, "-loglevel", "32" if get_logs else "24"] + cli cli = [part.format(path=handle.name) for part in cli] + if program in ("ffmpeg", "ffprobe"): + cli = ["-loglevel", "32" if get_logs else "24"] + cli + cli = [program] + cli proc = subprocess.Popen( cli, stdout=subprocess.PIPE, @@ -285,7 +312,7 @@ class Image: out, err = proc.communicate() if proc.returncode != 0: logger.warning( - "Failed to execute ffmpeg command (cli=%r, err=%r)", + "Failed to execute command (cli=%r, err=%r)", " ".join(shlex.quote(arg) for arg in cli), err, ) @@ -298,25 +325,17 @@ class Image: return err if get_logs else out def _reload_info(self) -> None: - self.info = json.loads( + exiftool_data = json.loads( self._execute( [ - "-i", "{path}", - "-of", - "json", - "-select_streams", - "v", - "-show_format", - "-show_streams", + "-json", ], - program="ffprobe", + program="exiftool", ).decode("utf-8") ) - assert "format" in self.info - assert "streams" in self.info - if len(self.info["streams"]) < 1: - logger.warning("The video contains no video streams.") - raise errors.ProcessingError( - "The video contains no video streams." - ) + + if len(exiftool_data) != 1: + logger.warning("Unexpected output from exiftool") + + self.info = exiftool_data[0] From 6e2a3eaf927aa0e9d97040250a7760b99fe0acbb Mon Sep 17 00:00:00 2001 From: Amras Date: Wed, 8 Nov 2023 17:24:40 +0100 Subject: [PATCH 2/5] Rotate images based on EXIF Orientation This change resolves https://github.com/rr-/szurubooru/issues/470 As well as correcting a related issue with thumbnail rotation. Based on the EXIF Orientation data, we correctly size the scaled image in the post view, and rotate the image before creating its thumbnail. --- server/szurubooru/func/images.py | 67 ++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/server/szurubooru/func/images.py b/server/szurubooru/func/images.py index 93b66710..1d55762b 100644 --- a/server/szurubooru/func/images.py +++ b/server/szurubooru/func/images.py @@ -16,6 +16,30 @@ from szurubooru import errors from szurubooru.func import mime, util logger = logging.getLogger(__name__) +logger.setLevel(level=logging.INFO) + +# Refer to: +# https://exiftool.org/TagNames/EXIF.html +# https://ffmpeg.org/ffmpeg-filters.html#transpose-1 +# https://www.impulseadventure.com/photo/images/orient_flag.gif +ORIENTATION_FILTER = { + "Horizontal (normal)": "null", + "Mirror Horizontal": "transpose=clock_flip,transpose=cclock", + "Rotate 180": "transpose=clock,transpose=clock", + "Mirror vertical": "transpose=clock_flip,transpose=clock", + "Mirror horizontal and rotate 270 CW": "transpose=cclock_flip,transpose=clock,transpose=clock", + "Rotate 90 CW": "transpose=clock", + "Mirror horizontal and rotate 90 CW": "transpose=clock_flip,transpose=clock,transpose=clock", + "Rotate 270 CW": "transpose=cclock", +} + + +ORTHOGONAL_ORIENTATIONS = ( + "Mirror horizontal and rotate 270 CW", + "Rotate 90 CW", + "Mirror horizontal and rotate 90 CW", + "Rotate 270 CW", +) def convert_heif_to_png(content: bytes) -> bytes: @@ -32,10 +56,14 @@ class Image: @property def width(self) -> int: + if self._is_orthogonal(): + return self.info["ImageHeight"] return self.info["ImageWidth"] @property def height(self) -> int: + if self._is_orthogonal(): + return self.info["ImageWidth"] return self.info["ImageHeight"] @property @@ -66,17 +94,32 @@ class Image: logger.warning("Unexpected time format(duration=%r)", duration_data) return None + def _orientation_filter(self) -> str: + try: + return ORIENTATION_FILTER[self.info["Orientation"]] + except KeyError: + return "null" + + def _is_orthogonal(self) -> bool: + try: + return self.info["Orientation"] in ORTHOGONAL_ORIENTATIONS + except KeyError: + return False + def resize_fill(self, width: int, height: int) -> None: width_greater = self.width > self.height width, height = (-1, height) if width_greater else (width, -1) + filters = "{orientation},scale='{width}:{height}'".format( + orientation=self._orientation_filter(), width=width, height=height) + cli = [ "-i", "{path}", "-f", "image2", "-filter:v", - "scale='{width}:{height}'".format(width=width, height=height), + filters, "-map", "0:v:0", "-vframes", @@ -86,9 +129,7 @@ class Image: "-", ] duration = self.duration - if ( - duration is not None and self.info["FileType"] != "SWF" - ): + if duration is not None and self.info["FileType"] != "SWF": total_seconds = duration.total_seconds() if total_seconds > 3: cli = [ @@ -108,6 +149,8 @@ class Image: "{path}", "-f", "image2", + "-filter:v", + self._orientation_filter(), "-map", "0:v:0", "-vframes", @@ -130,7 +173,7 @@ class Image: "-f", "image2", "-filter_complex", - "overlay", + "overlay," + self._orientation_filter(), "-map", "0:v:0", "-vframes", @@ -142,6 +185,7 @@ class Image: ) def to_webm(self) -> bytes: + filters = self._orientation_filter() with util.create_temp_file_path(suffix=".log") as phase_log_path: # Pass 1 self._execute( @@ -152,6 +196,8 @@ class Image: "1", "-passlogfile", phase_log_path, + "-filter:v", + filters, "-vcodec", "libvpx-vp9", "-crf", @@ -176,6 +222,8 @@ class Image: "2", "-passlogfile", phase_log_path, + "-filter:v", + filters, "-vcodec", "libvpx-vp9", "-crf", @@ -204,6 +252,10 @@ class Image: height = self.height - 1 altered_dimensions = True + filters = self._orientation_filter() + if altered_dimensions: + filters += ",scale='%d:%d'" % (width, height) + args = [ "-i", "{path}", @@ -223,11 +275,10 @@ class Image: "aac", "-f", "mp4", + "-filter:v", + filters, ] - if altered_dimensions: - args += ["-filter:v", "scale='%d:%d'" % (width, height)] - self._execute(args + ["-y", mp4_temp_path]) with open(mp4_temp_path, "rb") as mp4_temp: From e7ab2fe99f11872e6ed2d5a08e7d0d4cc848629a Mon Sep 17 00:00:00 2001 From: Amras Date: Thu, 9 Nov 2023 16:42:43 +0100 Subject: [PATCH 3/5] Account for EXIF data during hashing/searching PIL provides built-in support for transposing images based on EXIF Orientation. This change adds that call. --- server/szurubooru/func/image_hash.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/szurubooru/func/image_hash.py b/server/szurubooru/func/image_hash.py index 76d5a846..aa1a0dd8 100644 --- a/server/szurubooru/func/image_hash.py +++ b/server/szurubooru/func/image_hash.py @@ -7,7 +7,7 @@ from typing import Any, Callable, List, Optional, Set, Tuple import HeifImagePlugin import numpy as np import pillow_avif -from PIL import Image +from PIL import Image, ImageOps from szurubooru import config, errors @@ -40,7 +40,7 @@ NpMatrix = np.ndarray def _preprocess_image(content: bytes) -> NpMatrix: try: - img = Image.open(BytesIO(content)) + img = ImageOps.exif_transpose(Image.open(BytesIO(content))) return np.asarray(img.convert("L"), dtype=np.uint8) except (IOError, ValueError): raise errors.ProcessingError( From dadeeb6abdcab42e133f018717851858614c7075 Mon Sep 17 00:00:00 2001 From: Amras Date: Thu, 9 Nov 2023 16:44:27 +0100 Subject: [PATCH 4/5] Report exiftool errors Raise an exception when exiftool provides a Warning or Error, which usually indicates a corrupted file. This exception is handled by the allow_broken_uploads setting. --- server/szurubooru/func/images.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/server/szurubooru/func/images.py b/server/szurubooru/func/images.py index 1d55762b..c4d435ec 100644 --- a/server/szurubooru/func/images.py +++ b/server/szurubooru/func/images.py @@ -16,7 +16,6 @@ from szurubooru import errors from szurubooru.func import mime, util logger = logging.getLogger(__name__) -logger.setLevel(level=logging.INFO) # Refer to: # https://exiftool.org/TagNames/EXIF.html @@ -95,6 +94,8 @@ class Image: return None def _orientation_filter(self) -> str: + # This filter should be omitted in ffmpeg>=6.0, + # where it is automatically applied. try: return ORIENTATION_FILTER[self.info["Orientation"]] except KeyError: @@ -390,3 +391,11 @@ class Image: logger.warning("Unexpected output from exiftool") self.info = exiftool_data[0] + + if "Error" in self.info: + raise errors.ProcessingError( + "Error in metadata:" + str(self.info["Error"])) + + if "Warning" in self.info: + raise errors.ProcessingError( + "Warning in metadata:" + str(self.info["Warning"])) \ No newline at end of file From 4ad90007aa6285cb1e4b3625cebff2cfce03a0e3 Mon Sep 17 00:00:00 2001 From: Amras Date: Thu, 9 Nov 2023 16:47:05 +0100 Subject: [PATCH 5/5] Add unit test updating a post with EXIF metadata The test creates a new post, containing a known image which has been rotated 90 degrees. It then checks: - the size of the canvas - the size of the thumbnail - if the image is searchable using non-exif data --- server/szurubooru/tests/assets/exif.jpg | Bin 0 -> 4302 bytes server/szurubooru/tests/func/test_posts.py | 39 +++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 server/szurubooru/tests/assets/exif.jpg diff --git a/server/szurubooru/tests/assets/exif.jpg b/server/szurubooru/tests/assets/exif.jpg new file mode 100644 index 0000000000000000000000000000000000000000..784adec784ae16b45cfdeaeb8c918642c47216c8 GIT binary patch literal 4302 zcmYjS2Q-}B)_!MAPN8g4L}KE1}H8R2%x$kE`aji8US=bu>aE-knn#z7k7YJ{;My%e{ga7 z-#9-5nDp)Z9b_cLCB+dE62LhTPzPwJsHmx^Y5ombT3R|fx(oiZU4Vgs;eW&*#lU!p zk&*F&FEKGOF*8GP=cxd-6NO+fI_ZPGP59zs08lVAz1}+_C9Ko>6pi*Y}6UXcsRRyR5KI+T_jOZ zLn!`J>|%USimOJ5JIr=IkWv<0I`VIUzGl{Mlo+)={CO_`r2!lPJ&KD#f-h#t44f|k z41aopQa}L};7$L$8jg%K@31r_H+0q4Y&UXOe4PyGUkZF0r&#>Si28cdp;DH1lbGha zs2?^WD1(RF@_ufn-xJ(gkoWjfpRfD(aFBscpDXqtQL(a)p2%I*QkISDq$f58my}JO z!ZyFh$0Y+)Bf~ucdYzgh3&Oj?kvY|DS9C<<@%qdTYjWHR{yvX~bu&c-n&XdsPuE5b zwj-Ow)jz4T-VKpz5SWEs)+p4m_2W;9{#Z*Yq}gARBM&Bu@Jxy|G-GBSH0L#nc1(ZJ zdpt!yRp9L9?{E}`Qb?SU&Ej+BFt(6jMy&|#oS>}Xqxw~LuMCVY4Gp~;nCO{tB(Gcc zXqm6S+U`rPsSP9hEgf)i9sn+4-6n5)M+r%3j0ea8o8Dx3T$f3{*nZSpDF*+tF2$B& z=QvR}V++OI$5egPx>Xh?r6bQK!hiaqbv#j3>{)Y5lq&p)i3=P(-f3cJK*pB%6{|wx zkk^N4{J1gVF|J%U8(rqqP}etdN0T++_~+=hS24=Q*}-naX@&a8yA@|n=;OCa0&z{q z@-ZSm0q^=&(QTv|D<}e^*RuASW`77GWLSsLo9SV){Sbt~oU2m9ff^K42Mvm{7HK|5 zhVcWb{H@8;X*u2&S>+CcqbG%9k2UK;FJ*q_5hlT_TJ`c@_vgQixeoCjH((fQDcp|S z8`Uf-{Kgv1d#h?RlZD%=y{2kvLUOxb`uhBFUbpBi)72ID*~YfK3mv|!O;9p#o$C3? z=ZE6#QZq0Al7cgBd5!j*A}IErr6L~XHQV-H!*i~Ujq3!7F{VWJpQ`P3>ItF%k-j{# zC5cTM6^WYCqa@s!^M+<~bg2bY2{CVc|8D3GWLB{zypDP?1Vo3?dW6hbj!8}k=3Hyb zSVk-HQfak3vmw|jn5}vuoF_O&z20;sCgd41jV5mS@os!38?Dcplq?_Zt`*j=Y)qAl zl*29AO+{JZgI%z=v_qf7X}eDx({c&fa@LvNnIyl(ltOFf?S`i}f5l2YDll7W6d+6G zZVux%I}P&b-C;iz0$q)TO;clt+Rz&|EaS2fqkijyCg{4-pZBIZGTm-H8X>HGI843m zIW-~0kxmdpV&(Iov9$0)zZ9qt)?RFSX?g!Gk1KB3Wvnj)R+_}R8vf2kL6?reWi9_@ z=i$|n7yHairn_ykzsr;EC_YdL7vNSdDXVLxyg|;SbmsU4{?z~X{<4v_VQuV#taf#a zRu87`)iuZAF23rqAV`y5GGq{*oND_1b;7N?hGQX%I*^ZrU5%|g4ICRoMN3x2+`TJ= zL&3F?x@<*5COt3*G~8^uSR`?vs%nC;(O-nFDS$$TWSS0qwtl#8X0~#<67LGYb>zJ9 zi*qw*oe!1HSL#1?dA>j}yDlrMKH@apjKP1*6oD}}lB%8gH=nh!mQ&Nho+l+eW+Wu4 zjWhZ9hZOwUu4n!N*`dX^aX;d++_F|ee50DLJJ-A-Uwo` zy(2WZmESMFJOM=^2gj2Iz8_zi)p80Q^Zrt&fPBJNdZkLiAg--o;D-pV1BsR3cKR#! z@RssUJ7rml)>x($TU;F49L9$Cw_`5&1s+~SJLS9CsJ3>ia8EvQCgAII@QRVa%kfT! zSCx%U=<(DELLX}nKK~vPu*hMdfF@_FHfDeIB4-d17|#KPWH62mLC=+sQB*T zP_k!7(#7^f<=W9B@+Xf6dWfCkGL{#C!fD{oKkEr!@501SUwVm5Taki_F#{S7=6Y8; zY%dc8dIV5cQHSNiq0~%AmNfjU<8s~7)!g|<4_DAsdY6-0!rh#Etf}@d#tnuJf20QXgsr);mmtTL|EX(!j#Pt*)je(kB!BI!=XhMsxsidxO5`>l9{Hq{l&ex{rX_!kR?V;?q!a`rKvgDJ9jrS^SJ1sIKkX zKfSsG*VRV%x*T0bw|b9$wpT2K_R#l3q0LWn@->t9^Orl+69nT+m^tHGs^=nKpt;S< zIcG1wt2A%pObdFr`+#MaqnXgF1IPD%x21BeL*q6zczWUH%;4pZ1+P5YW^;f3?s`5F z*(f%gO*41J9ZCbW=R`XC&605grIaIZjDHpkY#EPMsBkXuU&;I(&y9ML`CeMEi+0J6 z815^xLGYhvN}#nJ@wgY0ea}YBY+T&KZ`NvRAVfPqG5V}J<(BXEP@>H2#|_yzP0Pb3 z-$!9CG(?Bo;OAL+S0xEGmGwqGH;G>&)IzO2&r048G9`;ea|?;r@dZ@mH2cT)iGhv> zJDBL_E53|1e;)U-I)x8cJO1!Xav85(_$Dv!k4J*$2K)jd1qIWnxh3rL?t=u^#R5}F z?Qgsxx7I<5*mZTOK_hBJ`iYDfV`so*vA({A&%+F}6xQ6nA8ra=Mg$kmxYzsINB1}< zr+pM12iV^K!vY&PT;~;<+Vcr72>gPjYQ~0C-9V?FP>54Ds|VAp{`#Demgs5@hb6O> z6wrzCFC&(e-ag(x)3PD9Yh|o)+GYDBR7j-kZ2f$N0RCP*Xnp?yr8SDfK zdGOR1oxwnpN=Z%;wxj#j3YaIz;pEb`8q^w}V>L~$x~4xXcEJ5wSv^64=analrem~! zZxHYINd%@3d{&fTh;}dGHJ5d0-Dzr0Sp?e&Pa*fWaUzr)tWMo?OY_K2>hU`pA-H^j75(L-W@En!C%7M z|5{~=Su&h6^DrK=H(krWKZhxY)n0pJW?YlL@OMLv5i*oh@UOFtAi2!VfJ7ocNA4UV zo>)JxdqrCcnA7~89)Ko-DRyd(eIr^igW|I zpM5#Xs|heRS29K+s=wCxKzvp3N(#>#xC0jT8b4BY(B)NSN!nxQ!6|(SpuVXxw1C$8 z#%Q0c28X{aZiEmE*xtpDu}7zKSq{j=gm&Dn)@E$QwD6g;S z>?^!1Cpx+8weWLzXldaScP{e5rtf7|7JC%_wQ!8}QF$bEoHw{-L86p2qFkDEv#3GI zr?FN&BGc-OjSZWfVd%MJv~N&~;*4Gm5Kg!h;73aphLMQX9yA^nn2T<`lRo+3;~T32 zL3*f!?|l&}q*&nb-x3JN-m*kr%Y&q=09inGZXScJnH*Jq1Pgsj;h*zFA+*;Q%rh~8Bt{^D%sS6TYP_p#W>~UO@mylvEmKj-w6ORgRkI)%Z7n!k4>m9J zEib)5v$$us{o>Y2*GP%375qwTJ<~lSW^-@n3|c790T0WqvkmXi$326w$|WKk{_Dy+ z$m*SNHHD;~wU@b~f@8hi+*zj)iVT;>pMVO^X6^K!PJLdloBQ6?+5OwIr(Pz=smYo@#hh#TJxr_n|zkzS(x~dbrdS%~V|rh-CQH+GQwxvkJ+B zbqowCQ|CNkSi6ahsLU{>R@3ZAN_8?s-^Z$SKR>%AULE*} z2p9QuGnrN7t+rsN!pL~ISP-M@l8|#UCr)jY*0>ase5YQT{ZadI*Te1y&lH12>Ea4>|O0ncdH&g7qRki7SO4kKhhhQ$APLSc;>+QygGPSKwcOF7+SQ;{g+|z2C QZ}blR*wQxN&~v`{FE(7Z%m4rY literal 0 HcmV?d00001 diff --git a/server/szurubooru/tests/func/test_posts.py b/server/szurubooru/tests/func/test_posts.py index fa1b3bb6..5250bf82 100644 --- a/server/szurubooru/tests/func/test_posts.py +++ b/server/szurubooru/tests/func/test_posts.py @@ -773,6 +773,45 @@ def test_update_post_content_convert_heif_to_png_when_processing( assert os.path.exists(generated_path) +def test_update_post_content_with_exif_orientation( + tmpdir, config_injector, read_asset, post_factory): + # exif.jpg is a copy of jpeg.jpg, + # rotated counter-clockwise by 90 degrees, + # and assigned the EXIF Orientation tag "Rotate 90 CW" + config_injector( + { + "data_dir": str(tmpdir.mkdir("data")), + "thumbnails": { + "post_width": 300, + "post_height": 300, + }, + "secret": "test", + "allow_broken_uploads": False, + } + ) + + post = post_factory(id=1) + db.session.add(post) + posts.update_post_content(post, read_asset("exif.jpg")) + thumbnail_path = ( + "{}/data/generated-thumbnails/1_244c8840887984c4.jpg".format(tmpdir)) + db.session.flush() + + assert post.canvas_width == 100 + assert post.canvas_height == 75 + + with open(thumbnail_path, "rb") as handle: + thumbnail = images.Image(handle.read()) + assert thumbnail.width == 400 + assert thumbnail.height == 300 + + search_result = posts.search_by_image(read_asset("jpeg.jpg")) + assert len(search_result) == 1 + search_distance, search_post = search_result[0] + assert search_post.post_id == post.post_id + assert abs(search_distance) < 0.15 + + def test_update_post_tags(tag_factory): post = model.Post() with patch("szurubooru.func.tags.get_or_create_tags_by_names"):