server/net: improve youtube-dl functionality, enforce size limits
This commit is contained in:
parent
2dfd1c2192
commit
c7461c7f65
4 changed files with 54 additions and 65 deletions
|
@ -21,7 +21,7 @@ RUN apk --no-cache add \
|
||||||
&& pip3 install --no-cache-dir --disable-pip-version-check \
|
&& pip3 install --no-cache-dir --disable-pip-version-check \
|
||||||
alembic \
|
alembic \
|
||||||
"coloredlogs==5.0" \
|
"coloredlogs==5.0" \
|
||||||
youtube-dl \
|
youtube_dl \
|
||||||
&& apk --no-cache del py3-pip
|
&& apk --no-cache del py3-pip
|
||||||
|
|
||||||
COPY ./ /opt/app/
|
COPY ./ /opt/app/
|
||||||
|
|
|
@ -9,4 +9,4 @@ pillow>=4.3.0
|
||||||
pynacl>=1.2.1
|
pynacl>=1.2.1
|
||||||
pytz>=2018.3
|
pytz>=2018.3
|
||||||
pyRFC3339>=1.0
|
pyRFC3339>=1.0
|
||||||
youtube-dl
|
youtube_dl
|
||||||
|
|
|
@ -1,76 +1,75 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import subprocess
|
||||||
import urllib.error
|
import urllib.error
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from tempfile import NamedTemporaryFile
|
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
from youtube_dl import YoutubeDL
|
|
||||||
from youtube_dl.utils import YoutubeDLError
|
|
||||||
|
|
||||||
from szurubooru import config, errors
|
from szurubooru import config, errors
|
||||||
from szurubooru.func import mime, util
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
_dl_chunk_size = 2 ** 15
|
||||||
|
|
||||||
|
|
||||||
|
class DownloadError(errors.ProcessingError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DownloadTooLargeError(DownloadError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def download(url: str, use_video_downloader: bool = False) -> bytes:
|
def download(url: str, use_video_downloader: bool = False) -> bytes:
|
||||||
assert url
|
assert url
|
||||||
|
if use_video_downloader:
|
||||||
|
url = _get_youtube_dl_content_url(url)
|
||||||
|
|
||||||
request = urllib.request.Request(url)
|
request = urllib.request.Request(url)
|
||||||
if config.config["user_agent"]:
|
if config.config["user_agent"]:
|
||||||
request.add_header("User-Agent", config.config["user_agent"])
|
request.add_header("User-Agent", config.config["user_agent"])
|
||||||
request.add_header("Referer", url)
|
request.add_header("Referer", url)
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(request) as handle:
|
content_buffer = b""
|
||||||
content = handle.read()
|
length_tally = 0
|
||||||
except Exception as ex:
|
with urllib.request.urlopen(request) as handle:
|
||||||
raise errors.ProcessingError("Error downloading %s (%s)" % (url, ex))
|
while True:
|
||||||
if (
|
try:
|
||||||
use_video_downloader
|
chunk = handle.read(_dl_chunk_size)
|
||||||
and mime.get_mime_type(content) == "application/octet-stream"
|
except Exception:
|
||||||
):
|
raise DownloadError(url) from None
|
||||||
return _youtube_dl_wrapper(url)
|
if not chunk:
|
||||||
return content
|
break
|
||||||
|
length_tally += len(chunk)
|
||||||
|
if length_tally > config.config["max_dl_filesize"]:
|
||||||
|
raise DownloadTooLargeError(url)
|
||||||
|
content_buffer += chunk
|
||||||
|
return content_buffer
|
||||||
|
|
||||||
|
|
||||||
def _youtube_dl_wrapper(url: str) -> bytes:
|
def _get_youtube_dl_content_url(url: str) -> str:
|
||||||
outpath = os.path.join(
|
cmd = ["youtube-dl", "--format", "best"]
|
||||||
config.config["data_dir"],
|
if config.config["user_agent"]:
|
||||||
"temporary-uploads",
|
cmd.extend(["--user-agent", config.config["user_agent"]])
|
||||||
"youtubedl-" + util.get_sha1(url)[0:8] + ".dat",
|
cmd.extend(["--get-url", url])
|
||||||
)
|
|
||||||
options = {
|
|
||||||
"ignoreerrors": False,
|
|
||||||
"format": "best[ext=webm]/best[ext=mp4]/best[ext=flv]",
|
|
||||||
"logger": logger,
|
|
||||||
"max_filesize": config.config["max_dl_filesize"],
|
|
||||||
"max_downloads": 1,
|
|
||||||
"outtmpl": outpath,
|
|
||||||
}
|
|
||||||
try:
|
try:
|
||||||
with YoutubeDL(options) as ydl:
|
return (
|
||||||
ydl.extract_info(url, download=True)
|
subprocess.run(cmd, text=True, capture_output=True, check=True)
|
||||||
with open(outpath, "rb") as f:
|
.stdout.split("\n")[0]
|
||||||
return f.read()
|
.strip()
|
||||||
except YoutubeDLError as ex:
|
|
||||||
raise errors.ThirdPartyError(
|
|
||||||
"Error downloading video %s (%s)" % (url, ex)
|
|
||||||
)
|
)
|
||||||
except FileNotFoundError:
|
except subprocess.CalledProcessError:
|
||||||
raise errors.ThirdPartyError(
|
raise errors.ThirdPartyError(
|
||||||
"Error downloading video %s (file could not be saved)" % (url)
|
"Could not extract content location from %s" % (url)
|
||||||
)
|
) from None
|
||||||
|
|
||||||
|
|
||||||
def post_to_webhooks(payload: Dict[str, Any]) -> List[Thread]:
|
def post_to_webhooks(payload: Dict[str, Any]) -> List[Thread]:
|
||||||
threads = [
|
threads = [
|
||||||
Thread(target=_post_to_webhook, args=(webhook, payload))
|
Thread(target=_post_to_webhook, args=(webhook, payload), daemon=False)
|
||||||
for webhook in (config.config["webhooks"] or [])
|
for webhook in (config.config["webhooks"] or [])
|
||||||
]
|
]
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
thread.daemon = False
|
|
||||||
thread.start()
|
thread.start()
|
||||||
return threads
|
return threads
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,3 @@
|
||||||
from datetime import datetime
|
|
||||||
from unittest.mock import patch
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from szurubooru import errors
|
from szurubooru import errors
|
||||||
|
@ -69,41 +66,34 @@ def test_download():
|
||||||
"url",
|
"url",
|
||||||
[
|
[
|
||||||
"https://samples.ffmpeg.org/MPEG-4/video.mp4",
|
"https://samples.ffmpeg.org/MPEG-4/video.mp4",
|
||||||
|
"https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_too_large_download(url):
|
def test_too_large_download(url):
|
||||||
pytest.xfail("Download limit not implemented yet")
|
with pytest.raises(net.DownloadTooLargeError):
|
||||||
with pytest.raises(errors.ProcessingError):
|
net.download(url, use_video_downloader=True)
|
||||||
net.download(url)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"url,expected_sha1",
|
"url,expected_sha1",
|
||||||
[
|
[
|
||||||
(
|
(
|
||||||
"https://www.youtube.com/watch?v=C0DPdy98e4c",
|
"https://gfycat.com/immaterialchillyiberianmole",
|
||||||
"365af1c8f59c6865e1a84c6e13e3e25ff89e0ba1",
|
"0125976d2439e651b6863438db30de58f79f7754",
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"https://gfycat.com/immaterialchillyiberianmole",
|
"https://upload.wikimedia.org/wikipedia/commons/a/ad/Utah_teapot.png", # noqa: E501
|
||||||
"953000e81d7bd1da95ce264f872e7b6c4a6484be",
|
"cfadcbdeda1204dc1363ee5c1969191f26be2e41",
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_video_download(url, expected_sha1):
|
def test_content_download(url, expected_sha1):
|
||||||
pytest.xfail("Current youtube-dl implementation is unstable")
|
|
||||||
actual_content = net.download(url, use_video_downloader=True)
|
actual_content = net.download(url, use_video_downloader=True)
|
||||||
assert get_sha1(actual_content) == expected_sha1
|
assert get_sha1(actual_content) == expected_sha1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
def test_bad_content_downlaod():
|
||||||
"url",
|
url = "http://info.cern.ch/hypertext/WWW/TheProject.html"
|
||||||
[
|
|
||||||
"https://samples.ffmpeg.org/flac/short.flac", # not a video
|
|
||||||
"https://www.youtube.com/watch?v=dQw4w9WgXcQ", # video too large
|
|
||||||
],
|
|
||||||
)
|
|
||||||
def test_failed_video_download(url):
|
|
||||||
with pytest.raises(errors.ThirdPartyError):
|
with pytest.raises(errors.ThirdPartyError):
|
||||||
net.download(url, use_video_downloader=True)
|
net.download(url, use_video_downloader=True)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue