server/posts/upload: Add youtube-dl functionality

allows for video-based posts to be created by using youtube-dl
on the server. Access is controlled with the 'uploads:use_downloader'
permission.
This commit is contained in:
Shyam Sunder 2020-04-02 16:17:26 -04:00
parent 08e62ec885
commit 99a69333e6
12 changed files with 116 additions and 18 deletions

View file

@ -8,6 +8,7 @@ scrubbing](http://sjp.pwn.pl/sjp/;2527372). It is pronounced as *shoorubooru*.
## Features
- Post content: images (JPG, PNG, GIF, animated GIF), videos (MP4, WEBM), Flash animations
- Ability to retrieve web video content using [youtube-dl](https://github.com/ytdl-org/youtube-dl)
- Post comments
- Post notes / annotations, including arbitrary polygons
- Rich JSON REST API ([see documentation](doc/API.md))

View file

@ -145,7 +145,10 @@ way. The files, however, should be passed as regular fields appended with a
`Url` suffix. For example, to use `http://example.com/file.jpg` in an API that
accepts a file named `content`, the client should pass
`{"contentUrl":"http://example.com/file.jpg"}` as a part of the JSON message
body.
body. When creating or updating post content using this method, the server can
also be configured to employ [youtube-dl](https://github.com/ytdl-org/youtube-dl)
to download content from popular sites such as youtube, gfycat, etc. Access to
youtube-dl can be configured with the `'uploads:use_downloader'` permission
Finally, in some cases the user might want to reuse one file between the
requests to save the bandwidth (for example, reverse search + consecutive

View file

@ -21,6 +21,7 @@ RUN \
pip3 install --no-cache-dir --disable-pip-version-check \
alembic \
"coloredlogs==5.0" \
youtube-dl \
&& exit 0
ARG PUID=1000

View file

@ -135,6 +135,7 @@ privileges:
'snapshots:list': power
'uploads:create': regular
'uploads:use_downloader': power
## ONLY SET THESE IF DEPLOYING OUTSIDE OF DOCKER
#debug: 0 # generate server logs?

View file

@ -2,7 +2,8 @@ from typing import Optional, Dict, List
from datetime import datetime
from szurubooru import db, model, errors, rest, search
from szurubooru.func import (
auth, tags, posts, snapshots, favorites, scores, serialization, versions)
auth, tags, posts, snapshots, favorites, scores,
serialization, versions, mime)
_search_executor_config = search.configs.PostSearchConfig()
@ -46,7 +47,10 @@ def create_post(
auth.verify_privilege(ctx.user, 'posts:create:anonymous')
else:
auth.verify_privilege(ctx.user, 'posts:create:identified')
content = ctx.get_file('content')
content = ctx.get_file(
'content',
use_video_downloader=auth.has_privilege(
ctx.user, 'uploads:use_downloader'))
tag_names = ctx.get_param_as_string_list('tags', default=[])
safety = ctx.get_param_as_string('safety')
source = ctx.get_param_as_string('source', default='')
@ -105,7 +109,10 @@ def update_post(ctx: rest.Context, params: Dict[str, str]) -> rest.Response:
versions.bump_version(post)
if ctx.has_file('content'):
auth.verify_privilege(ctx.user, 'posts:edit:content')
posts.update_post_content(post, ctx.get_file('content'))
posts.update_post_content(
post,
ctx.get_file('content', use_video_downloader=auth.has_privilege(
ctx.user, 'uploads:use_downloader')))
if ctx.has_param('tags'):
auth.verify_privilege(ctx.user, 'posts:edit:tags')
new_tags = posts.update_post_tags(

View file

@ -7,6 +7,10 @@ from szurubooru.func import auth, file_uploads
def create_temporary_file(
ctx: rest.Context, _params: Dict[str, str] = {}) -> rest.Response:
auth.verify_privilege(ctx.user, 'uploads:create')
content = ctx.get_file('content', allow_tokens=False)
content = ctx.get_file(
'content',
allow_tokens=False,
use_video_downloader=auth.has_privilege(
ctx.user, 'uploads:use_downloader'))
token = file_uploads.save(content)
return {'token': token}

View file

@ -1,9 +1,17 @@
import logging
import urllib.request
from szurubooru import config
from szurubooru import errors
import os
from tempfile import NamedTemporaryFile
from szurubooru import config, errors
from szurubooru.func import mime, util
from youtube_dl import YoutubeDL
from youtube_dl.utils import YoutubeDLError
def download(url: str) -> bytes:
logger = logging.getLogger(__name__)
def download(url: str, use_video_downloader: bool = False) -> bytes:
assert url
request = urllib.request.Request(url)
if config.config['user_agent']:
@ -11,6 +19,32 @@ def download(url: str) -> bytes:
request.add_header('Referer', url)
try:
with urllib.request.urlopen(request) as handle:
return handle.read()
content = handle.read()
except Exception as ex:
raise errors.ProcessingError('Error downloading %s (%s)' % (url, ex))
if (use_video_downloader and
mime.get_mime_type(content) == 'application/octet-stream'):
return _youtube_dl_wrapper(url)
return content
def _youtube_dl_wrapper(url: str) -> bytes:
options = {
'quiet': True,
'format': 'webm/mp4',
'logger': logger,
'noplaylist': True,
'outtmpl': os.path.join(
config.config['data_dir'],
'temporary-uploads',
'youtubedl-' + util.get_sha1(url)[0:8] + '.%(ext)s'),
}
with YoutubeDL(options) as ydl:
try:
ydl_info = ydl.extract_info(url, download=True)
ydl_filename = ydl.prepare_filename(ydl_info)
except YoutubeDLError as ex:
raise errors.ThirdPartyError(
'Error downloading video %s (%s)' % (url, ex))
with open(ydl_filename, 'rb') as f:
return f.read()

View file

@ -46,12 +46,15 @@ class Context:
self,
name: str,
default: Union[object, bytes] = MISSING,
use_video_downloader: bool = False,
allow_tokens: bool = True) -> bytes:
if name in self._files and self._files[name]:
return self._files[name]
if name + 'Url' in self._params:
return net.download(self._params[name + 'Url'])
return net.download(
self._params[name + 'Url'],
use_video_downloader=use_video_downloader)
if allow_tokens and name + 'Token' in self._params:
ret = file_uploads.get(self._params[name + 'Token'])

View file

@ -11,6 +11,7 @@ def inject_config(config_injector):
'posts:create:anonymous': model.User.RANK_REGULAR,
'posts:create:identified': model.User.RANK_REGULAR,
'tags:create': model.User.RANK_REGULAR,
'uploads:use_downloader': model.User.RANK_REGULAR,
},
'allow_broken_uploads': False,
})
@ -122,7 +123,10 @@ def test_anonymous_uploads(
patch('szurubooru.func.posts.create_post'), \
patch('szurubooru.func.posts.update_post_source'):
config_injector({
'privileges': {'posts:create:anonymous': model.User.RANK_REGULAR},
'privileges': {
'posts:create:anonymous': model.User.RANK_REGULAR,
'uploads:use_downloader': model.User.RANK_POWER,
},
})
posts.create_post.return_value = [post, []]
api.post_api.create_post(
@ -152,7 +156,10 @@ def test_creating_from_url_saves_source(
patch('szurubooru.func.posts.create_post'), \
patch('szurubooru.func.posts.update_post_source'):
config_injector({
'privileges': {'posts:create:identified': model.User.RANK_REGULAR},
'privileges': {
'posts:create:identified': model.User.RANK_REGULAR,
'uploads:use_downloader': model.User.RANK_POWER,
},
})
net.download.return_value = b'content'
posts.create_post.return_value = [post, []]
@ -164,7 +171,8 @@ def test_creating_from_url_saves_source(
'contentUrl': 'example.com',
},
user=auth_user))
net.download.assert_called_once_with('example.com')
net.download.assert_called_once_with(
'example.com', use_video_downloader=False)
posts.create_post.assert_called_once_with(
b'content', ['tag1', 'tag2'], auth_user)
posts.update_post_source.assert_called_once_with(post, 'example.com')
@ -182,7 +190,10 @@ def test_creating_from_url_with_source_specified(
patch('szurubooru.func.posts.create_post'), \
patch('szurubooru.func.posts.update_post_source'):
config_injector({
'privileges': {'posts:create:identified': model.User.RANK_REGULAR},
'privileges': {
'posts:create:identified': model.User.RANK_REGULAR,
'uploads:use_downloader': model.User.RANK_REGULAR,
},
})
net.download.return_value = b'content'
posts.create_post.return_value = [post, []]
@ -195,7 +206,8 @@ def test_creating_from_url_with_source_specified(
'source': 'example2.com',
},
user=auth_user))
net.download.assert_called_once_with('example.com')
net.download.assert_called_once_with(
'example.com', use_video_downloader=True)
posts.create_post.assert_called_once_with(
b'content', ['tag1', 'tag2'], auth_user)
posts.update_post_source.assert_called_once_with(post, 'example2.com')
@ -261,6 +273,7 @@ def test_errors_not_spending_ids(
},
'privileges': {
'posts:create:identified': model.User.RANK_REGULAR,
'uploads:use_downloader': model.User.RANK_POWER,
},
'secret': 'test',
})
@ -325,6 +338,7 @@ def test_trying_to_create_tags_without_privileges(
'posts:create:anonymous': model.User.RANK_REGULAR,
'posts:create:identified': model.User.RANK_REGULAR,
'tags:create': model.User.RANK_ADMINISTRATOR,
'uploads:use_downloader': model.User.RANK_POWER,
},
})
with pytest.raises(errors.AuthError), \

View file

@ -18,6 +18,7 @@ def inject_config(config_injector):
'posts:edit:flags': model.User.RANK_REGULAR,
'posts:edit:thumbnail': model.User.RANK_REGULAR,
'tags:create': model.User.RANK_MODERATOR,
'uploads:use_downloader': model.User.RANK_REGULAR,
},
'allow_broken_uploads': False,
})
@ -97,7 +98,8 @@ def test_uploading_from_url_saves_source(
params={'contentUrl': 'example.com', 'version': 1},
user=user_factory(rank=model.User.RANK_REGULAR)),
{'post_id': post.post_id})
net.download.assert_called_once_with('example.com')
net.download.assert_called_once_with(
'example.com', use_video_downloader=True)
posts.update_post_content.assert_called_once_with(post, b'content')
posts.update_post_source.assert_called_once_with(post, 'example.com')
@ -121,7 +123,8 @@ def test_uploading_from_url_with_source_specified(
'version': 1},
user=user_factory(rank=model.User.RANK_REGULAR)),
{'post_id': post.post_id})
net.download.assert_called_once_with('example.com')
net.download.assert_called_once_with(
'example.com', use_video_downloader=True)
posts.update_post_content.assert_called_once_with(post, b'content')
posts.update_post_source.assert_called_once_with(post, 'example2.com')

View file

@ -1,4 +1,7 @@
import pytest
from szurubooru.errors import ThirdPartyError
from szurubooru.func import net
from szurubooru.func.util import get_sha1
def test_download(config_injector):
@ -47,3 +50,26 @@ def test_download(config_injector):
actual_content = net.download(url)
assert actual_content == expected_content
def test_video_download(tmpdir, config_injector):
config_injector({
'user_agent': None,
'data_dir': str(tmpdir.mkdir('data'))
})
url = 'https://www.youtube.com/watch?v=C0DPdy98e4c'
expected_sha1 = '508f89ee85bc6186e18cfaa4f4d0279bcf2418ab'
actual_content = net.download(url, use_video_downloader=True)
assert get_sha1(actual_content) == expected_sha1
def test_failed_video_download(tmpdir, config_injector):
config_injector({
'user_agent': None,
'data_dir': str(tmpdir.mkdir('data'))
})
url = 'http://info.cern.ch/hypertext/WWW/TheProject.html'
with pytest.raises(ThirdPartyError):
net.download(url, use_video_downloader=True)

View file

@ -25,7 +25,8 @@ def test_get_file_from_url():
ctx = rest.Context(
env={}, method=None, url=None, params={'keyUrl': 'example.com'})
assert ctx.get_file('key') == b'content'
net.download.assert_called_once_with('example.com')
net.download.assert_called_once_with(
'example.com', use_video_downloader=False)
with pytest.raises(errors.ValidationError):
assert ctx.get_file('non-existing')