From 06ab98fa705fb0646f7ceaab429bf249f9abe64e Mon Sep 17 00:00:00 2001 From: rr- Date: Sat, 27 Aug 2016 01:21:59 +0200 Subject: [PATCH] server/search: fix sort:random breaking tags Using sqlalchemy's subqueryload to fetch tags works like this: 1. Get basic info about posts with query X 2. Copy query X 3. SELECT all tags WHERE post_id IN (SELECT post_ids FROM query X) 4. Associate the resulting tags with the posts When original query contains .order_by(func.random()), it looks like this: 1. SELECT post.* FROM post ORDER BY random() LIMIT 10 2. Copy "ORDER BY random() LIMIT 10" 3. SELECT tag.* FROM tag WHERE tag.post_id IN ( SELECT id FROM post ORDER BY random() LIMIT 10) 4. Disaster! Each post now has completely arbitrary tags! To circumvent this, we replace eager loading with lazy loading. This generates one extra query for each result row, but it has no chance of producing such anomalies. This behavior is activated only for queries containing "sort:random" and derivatives so it shouldn't hit performance too much. --- .../search/configs/base_search_config.py | 4 ++-- .../search/configs/comment_search_config.py | 6 +++--- .../search/configs/post_search_config.py | 18 ++++++++---------- .../search/configs/snapshot_search_config.py | 4 ++-- .../search/configs/tag_search_config.py | 15 ++++++++------- .../search/configs/user_search_config.py | 4 ++-- server/szurubooru/search/executor.py | 9 +++++++-- 7 files changed, 32 insertions(+), 28 deletions(-) diff --git a/server/szurubooru/search/configs/base_search_config.py b/server/szurubooru/search/configs/base_search_config.py index 342bdad9..c6ccace0 100644 --- a/server/szurubooru/search/configs/base_search_config.py +++ b/server/szurubooru/search/configs/base_search_config.py @@ -8,10 +8,10 @@ class BaseSearchConfig(object): def on_search_query_parsed(self, search_query): pass - def create_filter_query(self): + def create_filter_query(self, _disable_eager_loads): raise NotImplementedError() - def create_count_query(self): + def create_count_query(self, disable_eager_loads): raise NotImplementedError() def create_around_query(self): diff --git a/server/szurubooru/search/configs/comment_search_config.py b/server/szurubooru/search/configs/comment_search_config.py index 6f19aed9..9b2515e8 100644 --- a/server/szurubooru/search/configs/comment_search_config.py +++ b/server/szurubooru/search/configs/comment_search_config.py @@ -5,11 +5,11 @@ from szurubooru.search.configs.base_search_config import BaseSearchConfig class CommentSearchConfig(BaseSearchConfig): - def create_filter_query(self): + def create_filter_query(self, _disable_eager_loads): return db.session.query(db.Comment).join(db.User) - def create_count_query(self): - return self.create_filter_query() + def create_count_query(self, disable_eager_loads): + return self.create_filter_query(disable_eager_loads) def create_around_query(self): raise NotImplementedError() diff --git a/server/szurubooru/search/configs/post_search_config.py b/server/szurubooru/search/configs/post_search_config.py index 7036c0ca..55a9f463 100644 --- a/server/szurubooru/search/configs/post_search_config.py +++ b/server/szurubooru/search/configs/post_search_config.py @@ -103,9 +103,11 @@ class PostSearchConfig(BaseSearchConfig): def create_around_query(self): return db.session.query(db.Post.post_id) - def create_filter_query(self): + def create_filter_query(self, disable_eager_loads): + strategy = lazyload if disable_eager_loads else subqueryload return db.session.query(db.Post) \ .options( + lazyload('*'), # use config optimized for official client # defer(db.Post.score), # defer(db.Post.favorite_count), @@ -117,16 +119,12 @@ class PostSearchConfig(BaseSearchConfig): defer(db.Post.last_comment_edit_time), defer(db.Post.note_count), defer(db.Post.tag_count), - subqueryload(db.Post.tags).subqueryload(db.Tag.names), - subqueryload(db.Post.tags).defer(db.Tag.post_count), - subqueryload(db.Post.tags).lazyload(db.Tag.implications), - subqueryload(db.Post.tags).lazyload(db.Tag.suggestions), - lazyload(db.Post.user), - lazyload(db.Post.relations), - lazyload(db.Post.notes), - lazyload(db.Post.favorited_by)) + strategy(db.Post.tags).subqueryload(db.Tag.names), + strategy(db.Post.tags).defer(db.Tag.post_count), + strategy(db.Post.tags).lazyload(db.Tag.implications), + strategy(db.Post.tags).lazyload(db.Tag.suggestions)) - def create_count_query(self): + def create_count_query(self, _disable_eager_loads): return db.session.query(db.Post) def finalize_query(self, query): diff --git a/server/szurubooru/search/configs/snapshot_search_config.py b/server/szurubooru/search/configs/snapshot_search_config.py index d7705ab5..4ea7280a 100644 --- a/server/szurubooru/search/configs/snapshot_search_config.py +++ b/server/szurubooru/search/configs/snapshot_search_config.py @@ -4,10 +4,10 @@ from szurubooru.search.configs.base_search_config import BaseSearchConfig class SnapshotSearchConfig(BaseSearchConfig): - def create_filter_query(self): + def create_filter_query(self, _disable_eager_loads): return db.session.query(db.Snapshot) - def create_count_query(self): + def create_count_query(self, _disable_eager_loads): return db.session.query(db.Snapshot) def create_around_query(self): diff --git a/server/szurubooru/search/configs/tag_search_config.py b/server/szurubooru/search/configs/tag_search_config.py index 40f57e6e..9ce839dd 100644 --- a/server/szurubooru/search/configs/tag_search_config.py +++ b/server/szurubooru/search/configs/tag_search_config.py @@ -1,4 +1,4 @@ -from sqlalchemy.orm import subqueryload +from sqlalchemy.orm import subqueryload, lazyload from sqlalchemy.sql.expression import func from szurubooru import db from szurubooru.func import util @@ -7,16 +7,17 @@ from szurubooru.search.configs.base_search_config import BaseSearchConfig class TagSearchConfig(BaseSearchConfig): - def create_filter_query(self): + def create_filter_query(self, _disable_eager_loads): + strategy = lazyload if _disable_eager_loads else subqueryload return db.session.query(db.Tag) \ .join(db.TagCategory) \ .options( - subqueryload(db.Tag.names), - subqueryload(db.Tag.category), - subqueryload(db.Tag.suggestions).joinedload(db.Tag.names), - subqueryload(db.Tag.implications).joinedload(db.Tag.names)) + strategy(db.Tag.names), + strategy(db.Tag.category), + strategy(db.Tag.suggestions).joinedload(db.Tag.names), + strategy(db.Tag.implications).joinedload(db.Tag.names)) - def create_count_query(self): + def create_count_query(self, _disable_eager_loads): return db.session.query(db.Tag) def create_around_query(self): diff --git a/server/szurubooru/search/configs/user_search_config.py b/server/szurubooru/search/configs/user_search_config.py index 944f5dc8..c7e727e6 100644 --- a/server/szurubooru/search/configs/user_search_config.py +++ b/server/szurubooru/search/configs/user_search_config.py @@ -5,10 +5,10 @@ from szurubooru.search.configs.base_search_config import BaseSearchConfig class UserSearchConfig(BaseSearchConfig): - def create_filter_query(self): + def create_filter_query(self, _disable_eager_loads): return db.session.query(db.User) - def create_count_query(self): + def create_count_query(self, _disable_eager_loads): return db.session.query(db.User) def create_around_query(self): diff --git a/server/szurubooru/search/executor.py b/server/szurubooru/search/executor.py index 61d4f74d..511a9c2c 100644 --- a/server/szurubooru/search/executor.py +++ b/server/szurubooru/search/executor.py @@ -73,11 +73,16 @@ class Executor(object): search_query = self.parser.parse(query_text) self.config.on_search_query_parsed(search_query) + disable_eager_loads = False + for token in search_query.sort_tokens: + if token.name == 'random': + disable_eager_loads = True + key = (id(self.config), hash(search_query), page, page_size) if cache.has(key): return cache.get(key) - filter_query = self.config.create_filter_query() + filter_query = self.config.create_filter_query(disable_eager_loads) filter_query = filter_query.options(sqlalchemy.orm.lazyload('*')) filter_query = self._prepare_db_query(filter_query, search_query, True) entities = filter_query \ @@ -85,7 +90,7 @@ class Executor(object): .limit(page_size) \ .all() - count_query = self.config.create_count_query() + count_query = self.config.create_count_query(disable_eager_loads) count_query = count_query.options(sqlalchemy.orm.lazyload('*')) count_query = self._prepare_db_query(count_query, search_query, False) count_statement = count_query \