server/posts/upload: limit filesize for uploads through youtube-dl

This will be controlled by the config parameter 'max_dl_filesize'.

TODO: In a future commit, the regular downloader should also respect
this parameter.
This commit is contained in:
Shyam Sunder 2020-04-03 15:32:25 -04:00
parent 99a69333e6
commit 2c6434b08d
4 changed files with 50 additions and 19 deletions

View file

@ -5,8 +5,6 @@
name: szurubooru
# full url to the homepage of this szurubooru site, with no trailing slash
domain: # example: http://example.com
# user agent name used to download files from the web on behalf of the api users
user_agent:
# used to salt the users' password hashes and generate filenames for static content
secret: change
@ -21,6 +19,10 @@ thumbnails:
post_width: 300
post_height: 300
# settings used to download files from the web on behalf of the api users
user_agent:
max_dl_filesize: 25.0E+6 # maximum filesize limit in bytes
# automatically convert animated GIF uploads to video formats
convert:
gif:

View file

@ -56,3 +56,6 @@ class InvalidParameterError(ValidationError):
class ThirdPartyError(BaseError):
pass
class DownloadTooLargeError(ProcessingError):
pass

View file

@ -31,9 +31,12 @@ def download(url: str, use_video_downloader: bool = False) -> bytes:
def _youtube_dl_wrapper(url: str) -> bytes:
options = {
'quiet': True,
'ignoreerrors': False,
'format': 'webm/mp4',
'logger': logger,
'noplaylist': True,
'max_filesize': config.config['max_dl_filesize'],
'max_downloads': 1,
'outtmpl': os.path.join(
config.config['data_dir'],
'temporary-uploads',
@ -42,9 +45,19 @@ def _youtube_dl_wrapper(url: str) -> bytes:
with YoutubeDL(options) as ydl:
try:
ydl_info = ydl.extract_info(url, download=True)
# need to confirm if download was skipped due to size
if ydl_info['filesize'] > config.config['max_dl_filesize']:
raise errors.DownloadTooLargeError(
'Requested video too large (%d MB > %d MB)' % (
ydl_info['filesize'] / 1.0e6,
config.config['max_dl_filesize'] / 1.0e6))
ydl_filename = ydl.prepare_filename(ydl_info)
except YoutubeDLError as ex:
raise errors.ThirdPartyError(
'Error downloading video %s (%s)' % (url, ex))
with open(ydl_filename, 'rb') as f:
return f.read()
try:
with open(ydl_filename, 'rb') as f:
return f.read()
except FileNotFoundError as ex:
raise errors.ThirdPartyError(
'Error downloading video %s' % (url))

View file

@ -1,13 +1,19 @@
import pytest
from szurubooru.errors import ThirdPartyError
from szurubooru import errors
from szurubooru.func import net
from szurubooru.func.util import get_sha1
def test_download(config_injector):
@pytest.fixture(autouse=True)
def inject_config(tmpdir, config_injector):
config_injector({
'user_agent': None
'user_agent': None,
'max_dl_filesize': 1.0E+6,
'data_dir': str(tmpdir.mkdir('data')),
})
def test_download():
url = 'http://info.cern.ch/hypertext/WWW/TheProject.html'
expected_content = (
@ -52,11 +58,15 @@ def test_download(config_injector):
assert actual_content == expected_content
def test_video_download(tmpdir, config_injector):
config_injector({
'user_agent': None,
'data_dir': str(tmpdir.mkdir('data'))
})
def test_too_large_download():
pytest.xfail('Download limit not implemented yet')
url = 'https://samples.ffmpeg.org/MPEG-4/video.mp4'
with pytest.raises(errors.DownloadTooLargeError):
net.download(url)
def test_video_download():
url = 'https://www.youtube.com/watch?v=C0DPdy98e4c'
expected_sha1 = '508f89ee85bc6186e18cfaa4f4d0279bcf2418ab'
@ -64,12 +74,15 @@ def test_video_download(tmpdir, config_injector):
assert get_sha1(actual_content) == expected_sha1
def test_failed_video_download(tmpdir, config_injector):
config_injector({
'user_agent': None,
'data_dir': str(tmpdir.mkdir('data'))
})
url = 'http://info.cern.ch/hypertext/WWW/TheProject.html'
def test_failed_video_download():
url = 'https://samples.ffmpeg.org/flac/short.flac'
with pytest.raises(ThirdPartyError):
with pytest.raises(errors.ThirdPartyError):
net.download(url, use_video_downloader=True)
def test_too_large_video_download():
url = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
with pytest.raises(errors.DownloadTooLargeError):
net.download(url, use_video_downloader=True)