server/net: improve youtube-dl functionality, enforce size limits
This commit is contained in:
parent
2dfd1c2192
commit
c7461c7f65
4 changed files with 54 additions and 65 deletions
|
@ -21,7 +21,7 @@ RUN apk --no-cache add \
|
|||
&& pip3 install --no-cache-dir --disable-pip-version-check \
|
||||
alembic \
|
||||
"coloredlogs==5.0" \
|
||||
youtube-dl \
|
||||
youtube_dl \
|
||||
&& apk --no-cache del py3-pip
|
||||
|
||||
COPY ./ /opt/app/
|
||||
|
|
|
@ -9,4 +9,4 @@ pillow>=4.3.0
|
|||
pynacl>=1.2.1
|
||||
pytz>=2018.3
|
||||
pyRFC3339>=1.0
|
||||
youtube-dl
|
||||
youtube_dl
|
||||
|
|
|
@ -1,76 +1,75 @@
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from tempfile import NamedTemporaryFile
|
||||
from threading import Thread
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.utils import YoutubeDLError
|
||||
|
||||
from szurubooru import config, errors
|
||||
from szurubooru.func import mime, util
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_dl_chunk_size = 2 ** 15
|
||||
|
||||
|
||||
class DownloadError(errors.ProcessingError):
|
||||
pass
|
||||
|
||||
|
||||
class DownloadTooLargeError(DownloadError):
|
||||
pass
|
||||
|
||||
|
||||
def download(url: str, use_video_downloader: bool = False) -> bytes:
|
||||
assert url
|
||||
if use_video_downloader:
|
||||
url = _get_youtube_dl_content_url(url)
|
||||
|
||||
request = urllib.request.Request(url)
|
||||
if config.config["user_agent"]:
|
||||
request.add_header("User-Agent", config.config["user_agent"])
|
||||
request.add_header("Referer", url)
|
||||
try:
|
||||
|
||||
content_buffer = b""
|
||||
length_tally = 0
|
||||
with urllib.request.urlopen(request) as handle:
|
||||
content = handle.read()
|
||||
except Exception as ex:
|
||||
raise errors.ProcessingError("Error downloading %s (%s)" % (url, ex))
|
||||
if (
|
||||
use_video_downloader
|
||||
and mime.get_mime_type(content) == "application/octet-stream"
|
||||
):
|
||||
return _youtube_dl_wrapper(url)
|
||||
return content
|
||||
|
||||
|
||||
def _youtube_dl_wrapper(url: str) -> bytes:
|
||||
outpath = os.path.join(
|
||||
config.config["data_dir"],
|
||||
"temporary-uploads",
|
||||
"youtubedl-" + util.get_sha1(url)[0:8] + ".dat",
|
||||
)
|
||||
options = {
|
||||
"ignoreerrors": False,
|
||||
"format": "best[ext=webm]/best[ext=mp4]/best[ext=flv]",
|
||||
"logger": logger,
|
||||
"max_filesize": config.config["max_dl_filesize"],
|
||||
"max_downloads": 1,
|
||||
"outtmpl": outpath,
|
||||
}
|
||||
while True:
|
||||
try:
|
||||
with YoutubeDL(options) as ydl:
|
||||
ydl.extract_info(url, download=True)
|
||||
with open(outpath, "rb") as f:
|
||||
return f.read()
|
||||
except YoutubeDLError as ex:
|
||||
raise errors.ThirdPartyError(
|
||||
"Error downloading video %s (%s)" % (url, ex)
|
||||
chunk = handle.read(_dl_chunk_size)
|
||||
except Exception:
|
||||
raise DownloadError(url) from None
|
||||
if not chunk:
|
||||
break
|
||||
length_tally += len(chunk)
|
||||
if length_tally > config.config["max_dl_filesize"]:
|
||||
raise DownloadTooLargeError(url)
|
||||
content_buffer += chunk
|
||||
return content_buffer
|
||||
|
||||
|
||||
def _get_youtube_dl_content_url(url: str) -> str:
|
||||
cmd = ["youtube-dl", "--format", "best"]
|
||||
if config.config["user_agent"]:
|
||||
cmd.extend(["--user-agent", config.config["user_agent"]])
|
||||
cmd.extend(["--get-url", url])
|
||||
try:
|
||||
return (
|
||||
subprocess.run(cmd, text=True, capture_output=True, check=True)
|
||||
.stdout.split("\n")[0]
|
||||
.strip()
|
||||
)
|
||||
except FileNotFoundError:
|
||||
except subprocess.CalledProcessError:
|
||||
raise errors.ThirdPartyError(
|
||||
"Error downloading video %s (file could not be saved)" % (url)
|
||||
)
|
||||
"Could not extract content location from %s" % (url)
|
||||
) from None
|
||||
|
||||
|
||||
def post_to_webhooks(payload: Dict[str, Any]) -> List[Thread]:
|
||||
threads = [
|
||||
Thread(target=_post_to_webhook, args=(webhook, payload))
|
||||
Thread(target=_post_to_webhook, args=(webhook, payload), daemon=False)
|
||||
for webhook in (config.config["webhooks"] or [])
|
||||
]
|
||||
for thread in threads:
|
||||
thread.daemon = False
|
||||
thread.start()
|
||||
return threads
|
||||
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from szurubooru import errors
|
||||
|
@ -69,41 +66,34 @@ def test_download():
|
|||
"url",
|
||||
[
|
||||
"https://samples.ffmpeg.org/MPEG-4/video.mp4",
|
||||
"https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
||||
],
|
||||
)
|
||||
def test_too_large_download(url):
|
||||
pytest.xfail("Download limit not implemented yet")
|
||||
with pytest.raises(errors.ProcessingError):
|
||||
net.download(url)
|
||||
with pytest.raises(net.DownloadTooLargeError):
|
||||
net.download(url, use_video_downloader=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url,expected_sha1",
|
||||
[
|
||||
(
|
||||
"https://www.youtube.com/watch?v=C0DPdy98e4c",
|
||||
"365af1c8f59c6865e1a84c6e13e3e25ff89e0ba1",
|
||||
"https://gfycat.com/immaterialchillyiberianmole",
|
||||
"0125976d2439e651b6863438db30de58f79f7754",
|
||||
),
|
||||
(
|
||||
"https://gfycat.com/immaterialchillyiberianmole",
|
||||
"953000e81d7bd1da95ce264f872e7b6c4a6484be",
|
||||
"https://upload.wikimedia.org/wikipedia/commons/a/ad/Utah_teapot.png", # noqa: E501
|
||||
"cfadcbdeda1204dc1363ee5c1969191f26be2e41",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_video_download(url, expected_sha1):
|
||||
pytest.xfail("Current youtube-dl implementation is unstable")
|
||||
def test_content_download(url, expected_sha1):
|
||||
actual_content = net.download(url, use_video_downloader=True)
|
||||
assert get_sha1(actual_content) == expected_sha1
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url",
|
||||
[
|
||||
"https://samples.ffmpeg.org/flac/short.flac", # not a video
|
||||
"https://www.youtube.com/watch?v=dQw4w9WgXcQ", # video too large
|
||||
],
|
||||
)
|
||||
def test_failed_video_download(url):
|
||||
def test_bad_content_downlaod():
|
||||
url = "http://info.cern.ch/hypertext/WWW/TheProject.html"
|
||||
with pytest.raises(errors.ThirdPartyError):
|
||||
net.download(url, use_video_downloader=True)
|
||||
|
||||
|
|
Loading…
Reference in a new issue