server/posts/upload: Add youtube-dl functionality

allows for video-based posts to be created by using youtube-dl
on the server. Access is controlled with the 'uploads:use_downloader'
permission.
This commit is contained in:
Shyam Sunder 2020-04-02 16:17:26 -04:00
parent 08e62ec885
commit 99a69333e6
12 changed files with 116 additions and 18 deletions

View file

@ -8,6 +8,7 @@ scrubbing](http://sjp.pwn.pl/sjp/;2527372). It is pronounced as *shoorubooru*.
## Features ## Features
- Post content: images (JPG, PNG, GIF, animated GIF), videos (MP4, WEBM), Flash animations - Post content: images (JPG, PNG, GIF, animated GIF), videos (MP4, WEBM), Flash animations
- Ability to retrieve web video content using [youtube-dl](https://github.com/ytdl-org/youtube-dl)
- Post comments - Post comments
- Post notes / annotations, including arbitrary polygons - Post notes / annotations, including arbitrary polygons
- Rich JSON REST API ([see documentation](doc/API.md)) - Rich JSON REST API ([see documentation](doc/API.md))

View file

@ -145,7 +145,10 @@ way. The files, however, should be passed as regular fields appended with a
`Url` suffix. For example, to use `http://example.com/file.jpg` in an API that `Url` suffix. For example, to use `http://example.com/file.jpg` in an API that
accepts a file named `content`, the client should pass accepts a file named `content`, the client should pass
`{"contentUrl":"http://example.com/file.jpg"}` as a part of the JSON message `{"contentUrl":"http://example.com/file.jpg"}` as a part of the JSON message
body. body. When creating or updating post content using this method, the server can
also be configured to employ [youtube-dl](https://github.com/ytdl-org/youtube-dl)
to download content from popular sites such as youtube, gfycat, etc. Access to
youtube-dl can be configured with the `'uploads:use_downloader'` permission
Finally, in some cases the user might want to reuse one file between the Finally, in some cases the user might want to reuse one file between the
requests to save the bandwidth (for example, reverse search + consecutive requests to save the bandwidth (for example, reverse search + consecutive

View file

@ -21,6 +21,7 @@ RUN \
pip3 install --no-cache-dir --disable-pip-version-check \ pip3 install --no-cache-dir --disable-pip-version-check \
alembic \ alembic \
"coloredlogs==5.0" \ "coloredlogs==5.0" \
youtube-dl \
&& exit 0 && exit 0
ARG PUID=1000 ARG PUID=1000

View file

@ -135,6 +135,7 @@ privileges:
'snapshots:list': power 'snapshots:list': power
'uploads:create': regular 'uploads:create': regular
'uploads:use_downloader': power
## ONLY SET THESE IF DEPLOYING OUTSIDE OF DOCKER ## ONLY SET THESE IF DEPLOYING OUTSIDE OF DOCKER
#debug: 0 # generate server logs? #debug: 0 # generate server logs?

View file

@ -2,7 +2,8 @@ from typing import Optional, Dict, List
from datetime import datetime from datetime import datetime
from szurubooru import db, model, errors, rest, search from szurubooru import db, model, errors, rest, search
from szurubooru.func import ( from szurubooru.func import (
auth, tags, posts, snapshots, favorites, scores, serialization, versions) auth, tags, posts, snapshots, favorites, scores,
serialization, versions, mime)
_search_executor_config = search.configs.PostSearchConfig() _search_executor_config = search.configs.PostSearchConfig()
@ -46,7 +47,10 @@ def create_post(
auth.verify_privilege(ctx.user, 'posts:create:anonymous') auth.verify_privilege(ctx.user, 'posts:create:anonymous')
else: else:
auth.verify_privilege(ctx.user, 'posts:create:identified') auth.verify_privilege(ctx.user, 'posts:create:identified')
content = ctx.get_file('content') content = ctx.get_file(
'content',
use_video_downloader=auth.has_privilege(
ctx.user, 'uploads:use_downloader'))
tag_names = ctx.get_param_as_string_list('tags', default=[]) tag_names = ctx.get_param_as_string_list('tags', default=[])
safety = ctx.get_param_as_string('safety') safety = ctx.get_param_as_string('safety')
source = ctx.get_param_as_string('source', default='') source = ctx.get_param_as_string('source', default='')
@ -105,7 +109,10 @@ def update_post(ctx: rest.Context, params: Dict[str, str]) -> rest.Response:
versions.bump_version(post) versions.bump_version(post)
if ctx.has_file('content'): if ctx.has_file('content'):
auth.verify_privilege(ctx.user, 'posts:edit:content') auth.verify_privilege(ctx.user, 'posts:edit:content')
posts.update_post_content(post, ctx.get_file('content')) posts.update_post_content(
post,
ctx.get_file('content', use_video_downloader=auth.has_privilege(
ctx.user, 'uploads:use_downloader')))
if ctx.has_param('tags'): if ctx.has_param('tags'):
auth.verify_privilege(ctx.user, 'posts:edit:tags') auth.verify_privilege(ctx.user, 'posts:edit:tags')
new_tags = posts.update_post_tags( new_tags = posts.update_post_tags(

View file

@ -7,6 +7,10 @@ from szurubooru.func import auth, file_uploads
def create_temporary_file( def create_temporary_file(
ctx: rest.Context, _params: Dict[str, str] = {}) -> rest.Response: ctx: rest.Context, _params: Dict[str, str] = {}) -> rest.Response:
auth.verify_privilege(ctx.user, 'uploads:create') auth.verify_privilege(ctx.user, 'uploads:create')
content = ctx.get_file('content', allow_tokens=False) content = ctx.get_file(
'content',
allow_tokens=False,
use_video_downloader=auth.has_privilege(
ctx.user, 'uploads:use_downloader'))
token = file_uploads.save(content) token = file_uploads.save(content)
return {'token': token} return {'token': token}

View file

@ -1,9 +1,17 @@
import logging
import urllib.request import urllib.request
from szurubooru import config import os
from szurubooru import errors from tempfile import NamedTemporaryFile
from szurubooru import config, errors
from szurubooru.func import mime, util
from youtube_dl import YoutubeDL
from youtube_dl.utils import YoutubeDLError
def download(url: str) -> bytes: logger = logging.getLogger(__name__)
def download(url: str, use_video_downloader: bool = False) -> bytes:
assert url assert url
request = urllib.request.Request(url) request = urllib.request.Request(url)
if config.config['user_agent']: if config.config['user_agent']:
@ -11,6 +19,32 @@ def download(url: str) -> bytes:
request.add_header('Referer', url) request.add_header('Referer', url)
try: try:
with urllib.request.urlopen(request) as handle: with urllib.request.urlopen(request) as handle:
return handle.read() content = handle.read()
except Exception as ex: except Exception as ex:
raise errors.ProcessingError('Error downloading %s (%s)' % (url, ex)) raise errors.ProcessingError('Error downloading %s (%s)' % (url, ex))
if (use_video_downloader and
mime.get_mime_type(content) == 'application/octet-stream'):
return _youtube_dl_wrapper(url)
return content
def _youtube_dl_wrapper(url: str) -> bytes:
options = {
'quiet': True,
'format': 'webm/mp4',
'logger': logger,
'noplaylist': True,
'outtmpl': os.path.join(
config.config['data_dir'],
'temporary-uploads',
'youtubedl-' + util.get_sha1(url)[0:8] + '.%(ext)s'),
}
with YoutubeDL(options) as ydl:
try:
ydl_info = ydl.extract_info(url, download=True)
ydl_filename = ydl.prepare_filename(ydl_info)
except YoutubeDLError as ex:
raise errors.ThirdPartyError(
'Error downloading video %s (%s)' % (url, ex))
with open(ydl_filename, 'rb') as f:
return f.read()

View file

@ -46,12 +46,15 @@ class Context:
self, self,
name: str, name: str,
default: Union[object, bytes] = MISSING, default: Union[object, bytes] = MISSING,
use_video_downloader: bool = False,
allow_tokens: bool = True) -> bytes: allow_tokens: bool = True) -> bytes:
if name in self._files and self._files[name]: if name in self._files and self._files[name]:
return self._files[name] return self._files[name]
if name + 'Url' in self._params: if name + 'Url' in self._params:
return net.download(self._params[name + 'Url']) return net.download(
self._params[name + 'Url'],
use_video_downloader=use_video_downloader)
if allow_tokens and name + 'Token' in self._params: if allow_tokens and name + 'Token' in self._params:
ret = file_uploads.get(self._params[name + 'Token']) ret = file_uploads.get(self._params[name + 'Token'])

View file

@ -11,6 +11,7 @@ def inject_config(config_injector):
'posts:create:anonymous': model.User.RANK_REGULAR, 'posts:create:anonymous': model.User.RANK_REGULAR,
'posts:create:identified': model.User.RANK_REGULAR, 'posts:create:identified': model.User.RANK_REGULAR,
'tags:create': model.User.RANK_REGULAR, 'tags:create': model.User.RANK_REGULAR,
'uploads:use_downloader': model.User.RANK_REGULAR,
}, },
'allow_broken_uploads': False, 'allow_broken_uploads': False,
}) })
@ -122,7 +123,10 @@ def test_anonymous_uploads(
patch('szurubooru.func.posts.create_post'), \ patch('szurubooru.func.posts.create_post'), \
patch('szurubooru.func.posts.update_post_source'): patch('szurubooru.func.posts.update_post_source'):
config_injector({ config_injector({
'privileges': {'posts:create:anonymous': model.User.RANK_REGULAR}, 'privileges': {
'posts:create:anonymous': model.User.RANK_REGULAR,
'uploads:use_downloader': model.User.RANK_POWER,
},
}) })
posts.create_post.return_value = [post, []] posts.create_post.return_value = [post, []]
api.post_api.create_post( api.post_api.create_post(
@ -152,7 +156,10 @@ def test_creating_from_url_saves_source(
patch('szurubooru.func.posts.create_post'), \ patch('szurubooru.func.posts.create_post'), \
patch('szurubooru.func.posts.update_post_source'): patch('szurubooru.func.posts.update_post_source'):
config_injector({ config_injector({
'privileges': {'posts:create:identified': model.User.RANK_REGULAR}, 'privileges': {
'posts:create:identified': model.User.RANK_REGULAR,
'uploads:use_downloader': model.User.RANK_POWER,
},
}) })
net.download.return_value = b'content' net.download.return_value = b'content'
posts.create_post.return_value = [post, []] posts.create_post.return_value = [post, []]
@ -164,7 +171,8 @@ def test_creating_from_url_saves_source(
'contentUrl': 'example.com', 'contentUrl': 'example.com',
}, },
user=auth_user)) user=auth_user))
net.download.assert_called_once_with('example.com') net.download.assert_called_once_with(
'example.com', use_video_downloader=False)
posts.create_post.assert_called_once_with( posts.create_post.assert_called_once_with(
b'content', ['tag1', 'tag2'], auth_user) b'content', ['tag1', 'tag2'], auth_user)
posts.update_post_source.assert_called_once_with(post, 'example.com') posts.update_post_source.assert_called_once_with(post, 'example.com')
@ -182,7 +190,10 @@ def test_creating_from_url_with_source_specified(
patch('szurubooru.func.posts.create_post'), \ patch('szurubooru.func.posts.create_post'), \
patch('szurubooru.func.posts.update_post_source'): patch('szurubooru.func.posts.update_post_source'):
config_injector({ config_injector({
'privileges': {'posts:create:identified': model.User.RANK_REGULAR}, 'privileges': {
'posts:create:identified': model.User.RANK_REGULAR,
'uploads:use_downloader': model.User.RANK_REGULAR,
},
}) })
net.download.return_value = b'content' net.download.return_value = b'content'
posts.create_post.return_value = [post, []] posts.create_post.return_value = [post, []]
@ -195,7 +206,8 @@ def test_creating_from_url_with_source_specified(
'source': 'example2.com', 'source': 'example2.com',
}, },
user=auth_user)) user=auth_user))
net.download.assert_called_once_with('example.com') net.download.assert_called_once_with(
'example.com', use_video_downloader=True)
posts.create_post.assert_called_once_with( posts.create_post.assert_called_once_with(
b'content', ['tag1', 'tag2'], auth_user) b'content', ['tag1', 'tag2'], auth_user)
posts.update_post_source.assert_called_once_with(post, 'example2.com') posts.update_post_source.assert_called_once_with(post, 'example2.com')
@ -261,6 +273,7 @@ def test_errors_not_spending_ids(
}, },
'privileges': { 'privileges': {
'posts:create:identified': model.User.RANK_REGULAR, 'posts:create:identified': model.User.RANK_REGULAR,
'uploads:use_downloader': model.User.RANK_POWER,
}, },
'secret': 'test', 'secret': 'test',
}) })
@ -325,6 +338,7 @@ def test_trying_to_create_tags_without_privileges(
'posts:create:anonymous': model.User.RANK_REGULAR, 'posts:create:anonymous': model.User.RANK_REGULAR,
'posts:create:identified': model.User.RANK_REGULAR, 'posts:create:identified': model.User.RANK_REGULAR,
'tags:create': model.User.RANK_ADMINISTRATOR, 'tags:create': model.User.RANK_ADMINISTRATOR,
'uploads:use_downloader': model.User.RANK_POWER,
}, },
}) })
with pytest.raises(errors.AuthError), \ with pytest.raises(errors.AuthError), \

View file

@ -18,6 +18,7 @@ def inject_config(config_injector):
'posts:edit:flags': model.User.RANK_REGULAR, 'posts:edit:flags': model.User.RANK_REGULAR,
'posts:edit:thumbnail': model.User.RANK_REGULAR, 'posts:edit:thumbnail': model.User.RANK_REGULAR,
'tags:create': model.User.RANK_MODERATOR, 'tags:create': model.User.RANK_MODERATOR,
'uploads:use_downloader': model.User.RANK_REGULAR,
}, },
'allow_broken_uploads': False, 'allow_broken_uploads': False,
}) })
@ -97,7 +98,8 @@ def test_uploading_from_url_saves_source(
params={'contentUrl': 'example.com', 'version': 1}, params={'contentUrl': 'example.com', 'version': 1},
user=user_factory(rank=model.User.RANK_REGULAR)), user=user_factory(rank=model.User.RANK_REGULAR)),
{'post_id': post.post_id}) {'post_id': post.post_id})
net.download.assert_called_once_with('example.com') net.download.assert_called_once_with(
'example.com', use_video_downloader=True)
posts.update_post_content.assert_called_once_with(post, b'content') posts.update_post_content.assert_called_once_with(post, b'content')
posts.update_post_source.assert_called_once_with(post, 'example.com') posts.update_post_source.assert_called_once_with(post, 'example.com')
@ -121,7 +123,8 @@ def test_uploading_from_url_with_source_specified(
'version': 1}, 'version': 1},
user=user_factory(rank=model.User.RANK_REGULAR)), user=user_factory(rank=model.User.RANK_REGULAR)),
{'post_id': post.post_id}) {'post_id': post.post_id})
net.download.assert_called_once_with('example.com') net.download.assert_called_once_with(
'example.com', use_video_downloader=True)
posts.update_post_content.assert_called_once_with(post, b'content') posts.update_post_content.assert_called_once_with(post, b'content')
posts.update_post_source.assert_called_once_with(post, 'example2.com') posts.update_post_source.assert_called_once_with(post, 'example2.com')

View file

@ -1,4 +1,7 @@
import pytest
from szurubooru.errors import ThirdPartyError
from szurubooru.func import net from szurubooru.func import net
from szurubooru.func.util import get_sha1
def test_download(config_injector): def test_download(config_injector):
@ -47,3 +50,26 @@ def test_download(config_injector):
actual_content = net.download(url) actual_content = net.download(url)
assert actual_content == expected_content assert actual_content == expected_content
def test_video_download(tmpdir, config_injector):
config_injector({
'user_agent': None,
'data_dir': str(tmpdir.mkdir('data'))
})
url = 'https://www.youtube.com/watch?v=C0DPdy98e4c'
expected_sha1 = '508f89ee85bc6186e18cfaa4f4d0279bcf2418ab'
actual_content = net.download(url, use_video_downloader=True)
assert get_sha1(actual_content) == expected_sha1
def test_failed_video_download(tmpdir, config_injector):
config_injector({
'user_agent': None,
'data_dir': str(tmpdir.mkdir('data'))
})
url = 'http://info.cern.ch/hypertext/WWW/TheProject.html'
with pytest.raises(ThirdPartyError):
net.download(url, use_video_downloader=True)

View file

@ -25,7 +25,8 @@ def test_get_file_from_url():
ctx = rest.Context( ctx = rest.Context(
env={}, method=None, url=None, params={'keyUrl': 'example.com'}) env={}, method=None, url=None, params={'keyUrl': 'example.com'})
assert ctx.get_file('key') == b'content' assert ctx.get_file('key') == b'content'
net.download.assert_called_once_with('example.com') net.download.assert_called_once_with(
'example.com', use_video_downloader=False)
with pytest.raises(errors.ValidationError): with pytest.raises(errors.ValidationError):
assert ctx.get_file('non-existing') assert ctx.get_file('non-existing')