server/posts/upload: Add youtube-dl functionality

allows for video-based posts to be created by using youtube-dl on the server. Access is controlled with the 'uploads:use_downloader' permission.
2020-04-02 16:17:26 -04:00 · 2020-04-02 16:17:26 -04:00 · 99a69333e6
commit 99a69333e6
parent 08e62ec885
12 changed files with 116 additions and 18 deletions
--- a/README.md
+++ b/README.md
@ -8,6 +8,7 @@ scrubbing](http://sjp.pwn.pl/sjp/;2527372). It is pronounced as *shoorubooru*.
 ## Features

 - Post content: images (JPG, PNG, GIF, animated GIF), videos (MP4, WEBM), Flash animations
+- Ability to retrieve web video content using [youtube-dl](https://github.com/ytdl-org/youtube-dl)
 - Post comments
 - Post notes / annotations, including arbitrary polygons
 - Rich JSON REST API ([see documentation](doc/API.md))
--- a/doc/API.md
+++ b/doc/API.md
@ -145,7 +145,10 @@ way. The files, however, should be passed as regular fields appended with a
 `Url` suffix. For example, to use `http://example.com/file.jpg` in an API that
 accepts a file named `content`, the client should pass
 `{"contentUrl":"http://example.com/file.jpg"}` as a part of the JSON message
-body.
+body. When creating or updating post content using this method, the server can
+also be configured to employ [youtube-dl](https://github.com/ytdl-org/youtube-dl)
+to download content from popular sites such as youtube, gfycat, etc. Access to
+youtube-dl can be configured with the `'uploads:use_downloader'` permission

 Finally, in some cases the user might want to reuse one file between the
 requests to save the bandwidth (for example, reverse search + consecutive
--- a/server/Dockerfile
+++ b/server/Dockerfile
@ -21,6 +21,7 @@ RUN \
    pip3 install --no-cache-dir --disable-pip-version-check \
        alembic \
        "coloredlogs==5.0" \
+        youtube-dl \
    && exit 0

 ARG PUID=1000
--- a/server/config.yaml.dist
+++ b/server/config.yaml.dist
@ -135,6 +135,7 @@ privileges:
    'snapshots:list':               power

    'uploads:create':               regular
+    'uploads:use_downloader':       power

 ## ONLY SET THESE IF DEPLOYING OUTSIDE OF DOCKER
 #debug: 0 # generate server logs?
--- a/server/szurubooru/api/post_api.py
+++ b/server/szurubooru/api/post_api.py
@ -2,7 +2,8 @@ from typing import Optional, Dict, List
 from datetime import datetime
 from szurubooru import db, model, errors, rest, search
 from szurubooru.func import (
-    auth, tags, posts, snapshots, favorites, scores, serialization, versions)
+    auth, tags, posts, snapshots, favorites, scores,
+    serialization, versions, mime)


 _search_executor_config = search.configs.PostSearchConfig()
@ -46,7 +47,10 @@ def create_post(
        auth.verify_privilege(ctx.user, 'posts:create:anonymous')
    else:
        auth.verify_privilege(ctx.user, 'posts:create:identified')
-    content = ctx.get_file('content')
+    content = ctx.get_file(
+        'content',
+        use_video_downloader=auth.has_privilege(
+            ctx.user, 'uploads:use_downloader'))
    tag_names = ctx.get_param_as_string_list('tags', default=[])
    safety = ctx.get_param_as_string('safety')
    source = ctx.get_param_as_string('source', default='')
@ -105,7 +109,10 @@ def update_post(ctx: rest.Context, params: Dict[str, str]) -> rest.Response:
    versions.bump_version(post)
    if ctx.has_file('content'):
        auth.verify_privilege(ctx.user, 'posts:edit:content')
-        posts.update_post_content(post, ctx.get_file('content'))
+        posts.update_post_content(
+            post,
+            ctx.get_file('content', use_video_downloader=auth.has_privilege(
+                ctx.user, 'uploads:use_downloader')))
    if ctx.has_param('tags'):
        auth.verify_privilege(ctx.user, 'posts:edit:tags')
        new_tags = posts.update_post_tags(
--- a/server/szurubooru/api/upload_api.py
+++ b/server/szurubooru/api/upload_api.py
@ -7,6 +7,10 @@ from szurubooru.func import auth, file_uploads
 def create_temporary_file(
        ctx: rest.Context, _params: Dict[str, str] = {}) -> rest.Response:
    auth.verify_privilege(ctx.user, 'uploads:create')
-    content = ctx.get_file('content', allow_tokens=False)
+    content = ctx.get_file(
+        'content',
+        allow_tokens=False,
+        use_video_downloader=auth.has_privilege(
+            ctx.user, 'uploads:use_downloader'))
    token = file_uploads.save(content)
    return {'token': token}
--- a/server/szurubooru/func/net.py
+++ b/server/szurubooru/func/net.py
@ -1,9 +1,17 @@
+import logging
 import urllib.request
-from szurubooru import config
-from szurubooru import errors
+import os
+from tempfile import NamedTemporaryFile
+from szurubooru import config, errors
+from szurubooru.func import mime, util
+from youtube_dl import YoutubeDL
+from youtube_dl.utils import YoutubeDLError


-def download(url: str) -> bytes:
+logger = logging.getLogger(__name__)
+
+
+def download(url: str, use_video_downloader: bool = False) -> bytes:
    assert url
    request = urllib.request.Request(url)
    if config.config['user_agent']:
@ -11,6 +19,32 @@ def download(url: str) -> bytes:
    request.add_header('Referer', url)
    try:
        with urllib.request.urlopen(request) as handle:
-            return handle.read()
+            content = handle.read()
    except Exception as ex:
        raise errors.ProcessingError('Error downloading %s (%s)' % (url, ex))
+    if (use_video_downloader and
+            mime.get_mime_type(content) == 'application/octet-stream'):
+        return _youtube_dl_wrapper(url)
+    return content
+
+
+def _youtube_dl_wrapper(url: str) -> bytes:
+    options = {
+        'quiet': True,
+        'format': 'webm/mp4',
+        'logger': logger,
+        'noplaylist': True,
+        'outtmpl': os.path.join(
+            config.config['data_dir'],
+            'temporary-uploads',
+            'youtubedl-' + util.get_sha1(url)[0:8] + '.%(ext)s'),
+    }
+    with YoutubeDL(options) as ydl:
+        try:
+            ydl_info = ydl.extract_info(url, download=True)
+            ydl_filename = ydl.prepare_filename(ydl_info)
+        except YoutubeDLError as ex:
+            raise errors.ThirdPartyError(
+                'Error downloading video %s (%s)' % (url, ex))
+    with open(ydl_filename, 'rb') as f:
+        return f.read()
--- a/server/szurubooru/rest/context.py
+++ b/server/szurubooru/rest/context.py
@ -46,12 +46,15 @@ class Context:
            self,
            name: str,
            default: Union[object, bytes] = MISSING,
+            use_video_downloader: bool = False,
            allow_tokens: bool = True) -> bytes:
        if name in self._files and self._files[name]:
            return self._files[name]

        if name + 'Url' in self._params:
-            return net.download(self._params[name + 'Url'])
+            return net.download(
+                self._params[name + 'Url'],
+                use_video_downloader=use_video_downloader)

        if allow_tokens and name + 'Token' in self._params:
            ret = file_uploads.get(self._params[name + 'Token'])
--- a/server/szurubooru/tests/api/test_post_creating.py
+++ b/server/szurubooru/tests/api/test_post_creating.py
@ -11,6 +11,7 @@ def inject_config(config_injector):
            'posts:create:anonymous': model.User.RANK_REGULAR,
            'posts:create:identified': model.User.RANK_REGULAR,
            'tags:create': model.User.RANK_REGULAR,
+            'uploads:use_downloader': model.User.RANK_REGULAR,
        },
        'allow_broken_uploads': False,
    })
@ -122,7 +123,10 @@ def test_anonymous_uploads(
            patch('szurubooru.func.posts.create_post'), \
            patch('szurubooru.func.posts.update_post_source'):
        config_injector({
-            'privileges': {'posts:create:anonymous': model.User.RANK_REGULAR},
+            'privileges': {
+                'posts:create:anonymous': model.User.RANK_REGULAR,
+                'uploads:use_downloader': model.User.RANK_POWER,
+            },
        })
        posts.create_post.return_value = [post, []]
        api.post_api.create_post(
@ -152,7 +156,10 @@ def test_creating_from_url_saves_source(
            patch('szurubooru.func.posts.create_post'), \
            patch('szurubooru.func.posts.update_post_source'):
        config_injector({
-            'privileges': {'posts:create:identified': model.User.RANK_REGULAR},
+            'privileges': {
+                'posts:create:identified': model.User.RANK_REGULAR,
+                'uploads:use_downloader': model.User.RANK_POWER,
+            },
        })
        net.download.return_value = b'content'
        posts.create_post.return_value = [post, []]
@ -164,7 +171,8 @@ def test_creating_from_url_saves_source(
                    'contentUrl': 'example.com',
                },
                user=auth_user))
-        net.download.assert_called_once_with('example.com')
+        net.download.assert_called_once_with(
+            'example.com', use_video_downloader=False)
        posts.create_post.assert_called_once_with(
            b'content', ['tag1', 'tag2'], auth_user)
        posts.update_post_source.assert_called_once_with(post, 'example.com')
@ -182,7 +190,10 @@ def test_creating_from_url_with_source_specified(
            patch('szurubooru.func.posts.create_post'), \
            patch('szurubooru.func.posts.update_post_source'):
        config_injector({
-            'privileges': {'posts:create:identified': model.User.RANK_REGULAR},
+            'privileges': {
+                'posts:create:identified': model.User.RANK_REGULAR,
+                'uploads:use_downloader': model.User.RANK_REGULAR,
+            },
        })
        net.download.return_value = b'content'
        posts.create_post.return_value = [post, []]
@ -195,7 +206,8 @@ def test_creating_from_url_with_source_specified(
                    'source': 'example2.com',
                },
                user=auth_user))
-        net.download.assert_called_once_with('example.com')
+        net.download.assert_called_once_with(
+            'example.com', use_video_downloader=True)
        posts.create_post.assert_called_once_with(
            b'content', ['tag1', 'tag2'], auth_user)
        posts.update_post_source.assert_called_once_with(post, 'example2.com')
@ -261,6 +273,7 @@ def test_errors_not_spending_ids(
        },
        'privileges': {
            'posts:create:identified': model.User.RANK_REGULAR,
+            'uploads:use_downloader': model.User.RANK_POWER,
        },
        'secret': 'test',
    })
@ -325,6 +338,7 @@ def test_trying_to_create_tags_without_privileges(
            'posts:create:anonymous': model.User.RANK_REGULAR,
            'posts:create:identified': model.User.RANK_REGULAR,
            'tags:create': model.User.RANK_ADMINISTRATOR,
+            'uploads:use_downloader': model.User.RANK_POWER,
        },
    })
    with pytest.raises(errors.AuthError), \
--- a/server/szurubooru/tests/api/test_post_updating.py
+++ b/server/szurubooru/tests/api/test_post_updating.py
@ -18,6 +18,7 @@ def inject_config(config_injector):
            'posts:edit:flags': model.User.RANK_REGULAR,
            'posts:edit:thumbnail': model.User.RANK_REGULAR,
            'tags:create': model.User.RANK_MODERATOR,
+            'uploads:use_downloader': model.User.RANK_REGULAR,
        },
        'allow_broken_uploads': False,
    })
@ -97,7 +98,8 @@ def test_uploading_from_url_saves_source(
                params={'contentUrl': 'example.com', 'version': 1},
                user=user_factory(rank=model.User.RANK_REGULAR)),
            {'post_id': post.post_id})
-        net.download.assert_called_once_with('example.com')
+        net.download.assert_called_once_with(
+            'example.com', use_video_downloader=True)
        posts.update_post_content.assert_called_once_with(post, b'content')
        posts.update_post_source.assert_called_once_with(post, 'example.com')

@ -121,7 +123,8 @@ def test_uploading_from_url_with_source_specified(
                    'version': 1},
                user=user_factory(rank=model.User.RANK_REGULAR)),
            {'post_id': post.post_id})
-        net.download.assert_called_once_with('example.com')
+        net.download.assert_called_once_with(
+            'example.com', use_video_downloader=True)
        posts.update_post_content.assert_called_once_with(post, b'content')
        posts.update_post_source.assert_called_once_with(post, 'example2.com')

--- a/server/szurubooru/tests/func/test_net.py
+++ b/server/szurubooru/tests/func/test_net.py
@ -1,4 +1,7 @@
+import pytest
+from szurubooru.errors import ThirdPartyError
 from szurubooru.func import net
+from szurubooru.func.util import get_sha1


 def test_download(config_injector):
@ -47,3 +50,26 @@ def test_download(config_injector):

    actual_content = net.download(url)
    assert actual_content == expected_content
+
+
+def test_video_download(tmpdir, config_injector):
+    config_injector({
+        'user_agent': None,
+        'data_dir': str(tmpdir.mkdir('data'))
+    })
+    url = 'https://www.youtube.com/watch?v=C0DPdy98e4c'
+    expected_sha1 = '508f89ee85bc6186e18cfaa4f4d0279bcf2418ab'
+
+    actual_content = net.download(url, use_video_downloader=True)
+    assert get_sha1(actual_content) == expected_sha1
+
+
+def test_failed_video_download(tmpdir, config_injector):
+    config_injector({
+        'user_agent': None,
+        'data_dir': str(tmpdir.mkdir('data'))
+    })
+    url = 'http://info.cern.ch/hypertext/WWW/TheProject.html'
+
+    with pytest.raises(ThirdPartyError):
+        net.download(url, use_video_downloader=True)
--- a/server/szurubooru/tests/rest/test_context.py
+++ b/server/szurubooru/tests/rest/test_context.py
@ -25,7 +25,8 @@ def test_get_file_from_url():
        ctx = rest.Context(
            env={}, method=None, url=None, params={'keyUrl': 'example.com'})
        assert ctx.get_file('key') == b'content'
-        net.download.assert_called_once_with('example.com')
+        net.download.assert_called_once_with(
+            'example.com', use_video_downloader=False)
        with pytest.raises(errors.ValidationError):
            assert ctx.get_file('non-existing')