From 99a69333e656fcbf7acf55f8593cf2a70781fc6d Mon Sep 17 00:00:00 2001 From: Shyam Sunder Date: Thu, 2 Apr 2020 16:17:26 -0400 Subject: [PATCH] server/posts/upload: Add youtube-dl functionality allows for video-based posts to be created by using youtube-dl on the server. Access is controlled with the 'uploads:use_downloader' permission. --- README.md | 1 + doc/API.md | 5 ++- server/Dockerfile | 1 + server/config.yaml.dist | 1 + server/szurubooru/api/post_api.py | 13 ++++-- server/szurubooru/api/upload_api.py | 6 ++- server/szurubooru/func/net.py | 42 +++++++++++++++++-- server/szurubooru/rest/context.py | 5 ++- .../tests/api/test_post_creating.py | 24 ++++++++--- .../tests/api/test_post_updating.py | 7 +++- server/szurubooru/tests/func/test_net.py | 26 ++++++++++++ server/szurubooru/tests/rest/test_context.py | 3 +- 12 files changed, 116 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 698cfd22..98c0c0c5 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ scrubbing](http://sjp.pwn.pl/sjp/;2527372). It is pronounced as *shoorubooru*. ## Features - Post content: images (JPG, PNG, GIF, animated GIF), videos (MP4, WEBM), Flash animations +- Ability to retrieve web video content using [youtube-dl](https://github.com/ytdl-org/youtube-dl) - Post comments - Post notes / annotations, including arbitrary polygons - Rich JSON REST API ([see documentation](doc/API.md)) diff --git a/doc/API.md b/doc/API.md index 6a733f75..528b665c 100644 --- a/doc/API.md +++ b/doc/API.md @@ -145,7 +145,10 @@ way. The files, however, should be passed as regular fields appended with a `Url` suffix. For example, to use `http://example.com/file.jpg` in an API that accepts a file named `content`, the client should pass `{"contentUrl":"http://example.com/file.jpg"}` as a part of the JSON message -body. +body. When creating or updating post content using this method, the server can +also be configured to employ [youtube-dl](https://github.com/ytdl-org/youtube-dl) +to download content from popular sites such as youtube, gfycat, etc. Access to +youtube-dl can be configured with the `'uploads:use_downloader'` permission Finally, in some cases the user might want to reuse one file between the requests to save the bandwidth (for example, reverse search + consecutive diff --git a/server/Dockerfile b/server/Dockerfile index 5406a848..3d1c7f88 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -21,6 +21,7 @@ RUN \ pip3 install --no-cache-dir --disable-pip-version-check \ alembic \ "coloredlogs==5.0" \ + youtube-dl \ && exit 0 ARG PUID=1000 diff --git a/server/config.yaml.dist b/server/config.yaml.dist index 3ff3052a..f1ae8d25 100644 --- a/server/config.yaml.dist +++ b/server/config.yaml.dist @@ -135,6 +135,7 @@ privileges: 'snapshots:list': power 'uploads:create': regular + 'uploads:use_downloader': power ## ONLY SET THESE IF DEPLOYING OUTSIDE OF DOCKER #debug: 0 # generate server logs? diff --git a/server/szurubooru/api/post_api.py b/server/szurubooru/api/post_api.py index 09aea90a..6d48fbd3 100644 --- a/server/szurubooru/api/post_api.py +++ b/server/szurubooru/api/post_api.py @@ -2,7 +2,8 @@ from typing import Optional, Dict, List from datetime import datetime from szurubooru import db, model, errors, rest, search from szurubooru.func import ( - auth, tags, posts, snapshots, favorites, scores, serialization, versions) + auth, tags, posts, snapshots, favorites, scores, + serialization, versions, mime) _search_executor_config = search.configs.PostSearchConfig() @@ -46,7 +47,10 @@ def create_post( auth.verify_privilege(ctx.user, 'posts:create:anonymous') else: auth.verify_privilege(ctx.user, 'posts:create:identified') - content = ctx.get_file('content') + content = ctx.get_file( + 'content', + use_video_downloader=auth.has_privilege( + ctx.user, 'uploads:use_downloader')) tag_names = ctx.get_param_as_string_list('tags', default=[]) safety = ctx.get_param_as_string('safety') source = ctx.get_param_as_string('source', default='') @@ -105,7 +109,10 @@ def update_post(ctx: rest.Context, params: Dict[str, str]) -> rest.Response: versions.bump_version(post) if ctx.has_file('content'): auth.verify_privilege(ctx.user, 'posts:edit:content') - posts.update_post_content(post, ctx.get_file('content')) + posts.update_post_content( + post, + ctx.get_file('content', use_video_downloader=auth.has_privilege( + ctx.user, 'uploads:use_downloader'))) if ctx.has_param('tags'): auth.verify_privilege(ctx.user, 'posts:edit:tags') new_tags = posts.update_post_tags( diff --git a/server/szurubooru/api/upload_api.py b/server/szurubooru/api/upload_api.py index 6a6ecfef..16b46406 100644 --- a/server/szurubooru/api/upload_api.py +++ b/server/szurubooru/api/upload_api.py @@ -7,6 +7,10 @@ from szurubooru.func import auth, file_uploads def create_temporary_file( ctx: rest.Context, _params: Dict[str, str] = {}) -> rest.Response: auth.verify_privilege(ctx.user, 'uploads:create') - content = ctx.get_file('content', allow_tokens=False) + content = ctx.get_file( + 'content', + allow_tokens=False, + use_video_downloader=auth.has_privilege( + ctx.user, 'uploads:use_downloader')) token = file_uploads.save(content) return {'token': token} diff --git a/server/szurubooru/func/net.py b/server/szurubooru/func/net.py index e6326c06..9a03633d 100644 --- a/server/szurubooru/func/net.py +++ b/server/szurubooru/func/net.py @@ -1,9 +1,17 @@ +import logging import urllib.request -from szurubooru import config -from szurubooru import errors +import os +from tempfile import NamedTemporaryFile +from szurubooru import config, errors +from szurubooru.func import mime, util +from youtube_dl import YoutubeDL +from youtube_dl.utils import YoutubeDLError -def download(url: str) -> bytes: +logger = logging.getLogger(__name__) + + +def download(url: str, use_video_downloader: bool = False) -> bytes: assert url request = urllib.request.Request(url) if config.config['user_agent']: @@ -11,6 +19,32 @@ def download(url: str) -> bytes: request.add_header('Referer', url) try: with urllib.request.urlopen(request) as handle: - return handle.read() + content = handle.read() except Exception as ex: raise errors.ProcessingError('Error downloading %s (%s)' % (url, ex)) + if (use_video_downloader and + mime.get_mime_type(content) == 'application/octet-stream'): + return _youtube_dl_wrapper(url) + return content + + +def _youtube_dl_wrapper(url: str) -> bytes: + options = { + 'quiet': True, + 'format': 'webm/mp4', + 'logger': logger, + 'noplaylist': True, + 'outtmpl': os.path.join( + config.config['data_dir'], + 'temporary-uploads', + 'youtubedl-' + util.get_sha1(url)[0:8] + '.%(ext)s'), + } + with YoutubeDL(options) as ydl: + try: + ydl_info = ydl.extract_info(url, download=True) + ydl_filename = ydl.prepare_filename(ydl_info) + except YoutubeDLError as ex: + raise errors.ThirdPartyError( + 'Error downloading video %s (%s)' % (url, ex)) + with open(ydl_filename, 'rb') as f: + return f.read() diff --git a/server/szurubooru/rest/context.py b/server/szurubooru/rest/context.py index 2aad101a..3e902436 100644 --- a/server/szurubooru/rest/context.py +++ b/server/szurubooru/rest/context.py @@ -46,12 +46,15 @@ class Context: self, name: str, default: Union[object, bytes] = MISSING, + use_video_downloader: bool = False, allow_tokens: bool = True) -> bytes: if name in self._files and self._files[name]: return self._files[name] if name + 'Url' in self._params: - return net.download(self._params[name + 'Url']) + return net.download( + self._params[name + 'Url'], + use_video_downloader=use_video_downloader) if allow_tokens and name + 'Token' in self._params: ret = file_uploads.get(self._params[name + 'Token']) diff --git a/server/szurubooru/tests/api/test_post_creating.py b/server/szurubooru/tests/api/test_post_creating.py index 9d94a929..dc4d51c7 100644 --- a/server/szurubooru/tests/api/test_post_creating.py +++ b/server/szurubooru/tests/api/test_post_creating.py @@ -11,6 +11,7 @@ def inject_config(config_injector): 'posts:create:anonymous': model.User.RANK_REGULAR, 'posts:create:identified': model.User.RANK_REGULAR, 'tags:create': model.User.RANK_REGULAR, + 'uploads:use_downloader': model.User.RANK_REGULAR, }, 'allow_broken_uploads': False, }) @@ -122,7 +123,10 @@ def test_anonymous_uploads( patch('szurubooru.func.posts.create_post'), \ patch('szurubooru.func.posts.update_post_source'): config_injector({ - 'privileges': {'posts:create:anonymous': model.User.RANK_REGULAR}, + 'privileges': { + 'posts:create:anonymous': model.User.RANK_REGULAR, + 'uploads:use_downloader': model.User.RANK_POWER, + }, }) posts.create_post.return_value = [post, []] api.post_api.create_post( @@ -152,7 +156,10 @@ def test_creating_from_url_saves_source( patch('szurubooru.func.posts.create_post'), \ patch('szurubooru.func.posts.update_post_source'): config_injector({ - 'privileges': {'posts:create:identified': model.User.RANK_REGULAR}, + 'privileges': { + 'posts:create:identified': model.User.RANK_REGULAR, + 'uploads:use_downloader': model.User.RANK_POWER, + }, }) net.download.return_value = b'content' posts.create_post.return_value = [post, []] @@ -164,7 +171,8 @@ def test_creating_from_url_saves_source( 'contentUrl': 'example.com', }, user=auth_user)) - net.download.assert_called_once_with('example.com') + net.download.assert_called_once_with( + 'example.com', use_video_downloader=False) posts.create_post.assert_called_once_with( b'content', ['tag1', 'tag2'], auth_user) posts.update_post_source.assert_called_once_with(post, 'example.com') @@ -182,7 +190,10 @@ def test_creating_from_url_with_source_specified( patch('szurubooru.func.posts.create_post'), \ patch('szurubooru.func.posts.update_post_source'): config_injector({ - 'privileges': {'posts:create:identified': model.User.RANK_REGULAR}, + 'privileges': { + 'posts:create:identified': model.User.RANK_REGULAR, + 'uploads:use_downloader': model.User.RANK_REGULAR, + }, }) net.download.return_value = b'content' posts.create_post.return_value = [post, []] @@ -195,7 +206,8 @@ def test_creating_from_url_with_source_specified( 'source': 'example2.com', }, user=auth_user)) - net.download.assert_called_once_with('example.com') + net.download.assert_called_once_with( + 'example.com', use_video_downloader=True) posts.create_post.assert_called_once_with( b'content', ['tag1', 'tag2'], auth_user) posts.update_post_source.assert_called_once_with(post, 'example2.com') @@ -261,6 +273,7 @@ def test_errors_not_spending_ids( }, 'privileges': { 'posts:create:identified': model.User.RANK_REGULAR, + 'uploads:use_downloader': model.User.RANK_POWER, }, 'secret': 'test', }) @@ -325,6 +338,7 @@ def test_trying_to_create_tags_without_privileges( 'posts:create:anonymous': model.User.RANK_REGULAR, 'posts:create:identified': model.User.RANK_REGULAR, 'tags:create': model.User.RANK_ADMINISTRATOR, + 'uploads:use_downloader': model.User.RANK_POWER, }, }) with pytest.raises(errors.AuthError), \ diff --git a/server/szurubooru/tests/api/test_post_updating.py b/server/szurubooru/tests/api/test_post_updating.py index 229ecef7..887f54fb 100644 --- a/server/szurubooru/tests/api/test_post_updating.py +++ b/server/szurubooru/tests/api/test_post_updating.py @@ -18,6 +18,7 @@ def inject_config(config_injector): 'posts:edit:flags': model.User.RANK_REGULAR, 'posts:edit:thumbnail': model.User.RANK_REGULAR, 'tags:create': model.User.RANK_MODERATOR, + 'uploads:use_downloader': model.User.RANK_REGULAR, }, 'allow_broken_uploads': False, }) @@ -97,7 +98,8 @@ def test_uploading_from_url_saves_source( params={'contentUrl': 'example.com', 'version': 1}, user=user_factory(rank=model.User.RANK_REGULAR)), {'post_id': post.post_id}) - net.download.assert_called_once_with('example.com') + net.download.assert_called_once_with( + 'example.com', use_video_downloader=True) posts.update_post_content.assert_called_once_with(post, b'content') posts.update_post_source.assert_called_once_with(post, 'example.com') @@ -121,7 +123,8 @@ def test_uploading_from_url_with_source_specified( 'version': 1}, user=user_factory(rank=model.User.RANK_REGULAR)), {'post_id': post.post_id}) - net.download.assert_called_once_with('example.com') + net.download.assert_called_once_with( + 'example.com', use_video_downloader=True) posts.update_post_content.assert_called_once_with(post, b'content') posts.update_post_source.assert_called_once_with(post, 'example2.com') diff --git a/server/szurubooru/tests/func/test_net.py b/server/szurubooru/tests/func/test_net.py index fb149b05..43f7c28c 100644 --- a/server/szurubooru/tests/func/test_net.py +++ b/server/szurubooru/tests/func/test_net.py @@ -1,4 +1,7 @@ +import pytest +from szurubooru.errors import ThirdPartyError from szurubooru.func import net +from szurubooru.func.util import get_sha1 def test_download(config_injector): @@ -47,3 +50,26 @@ def test_download(config_injector): actual_content = net.download(url) assert actual_content == expected_content + + +def test_video_download(tmpdir, config_injector): + config_injector({ + 'user_agent': None, + 'data_dir': str(tmpdir.mkdir('data')) + }) + url = 'https://www.youtube.com/watch?v=C0DPdy98e4c' + expected_sha1 = '508f89ee85bc6186e18cfaa4f4d0279bcf2418ab' + + actual_content = net.download(url, use_video_downloader=True) + assert get_sha1(actual_content) == expected_sha1 + + +def test_failed_video_download(tmpdir, config_injector): + config_injector({ + 'user_agent': None, + 'data_dir': str(tmpdir.mkdir('data')) + }) + url = 'http://info.cern.ch/hypertext/WWW/TheProject.html' + + with pytest.raises(ThirdPartyError): + net.download(url, use_video_downloader=True) diff --git a/server/szurubooru/tests/rest/test_context.py b/server/szurubooru/tests/rest/test_context.py index 681d9d70..34cf7ac0 100644 --- a/server/szurubooru/tests/rest/test_context.py +++ b/server/szurubooru/tests/rest/test_context.py @@ -25,7 +25,8 @@ def test_get_file_from_url(): ctx = rest.Context( env={}, method=None, url=None, params={'keyUrl': 'example.com'}) assert ctx.get_file('key') == b'content' - net.download.assert_called_once_with('example.com') + net.download.assert_called_once_with( + 'example.com', use_video_downloader=False) with pytest.raises(errors.ValidationError): assert ctx.get_file('non-existing')