server/search: fix sort:random breaking tags

Using sqlalchemy's subqueryload to fetch tags works like this:

1. Get basic info about posts with query X
2. Copy query X
3. SELECT all tags WHERE post_id IN (SELECT post_ids FROM query X)
4. Associate the resulting tags with the posts

When original query contains .order_by(func.random()), it looks like
this:

1. SELECT post.* FROM post ORDER BY random() LIMIT 10
2. Copy "ORDER BY random() LIMIT 10"
3. SELECT tag.* FROM tag WHERE tag.post_id IN (
       SELECT id FROM post ORDER BY random() LIMIT 10)
4. Disaster! Each post now has completely arbitrary tags!

To circumvent this, we replace eager loading with lazy loading. This
generates one extra query for each result row, but it has no chance of
producing such anomalies. This behavior is activated only for
queries containing "sort:random" and derivatives so it shouldn't hit
performance too much.
This commit is contained in:
rr- 2016-08-27 01:21:59 +02:00
parent f8e91a10e8
commit 06ab98fa70
7 changed files with 32 additions and 28 deletions

View file

@ -8,10 +8,10 @@ class BaseSearchConfig(object):
def on_search_query_parsed(self, search_query):
pass
def create_filter_query(self):
def create_filter_query(self, _disable_eager_loads):
raise NotImplementedError()
def create_count_query(self):
def create_count_query(self, disable_eager_loads):
raise NotImplementedError()
def create_around_query(self):

View file

@ -5,11 +5,11 @@ from szurubooru.search.configs.base_search_config import BaseSearchConfig
class CommentSearchConfig(BaseSearchConfig):
def create_filter_query(self):
def create_filter_query(self, _disable_eager_loads):
return db.session.query(db.Comment).join(db.User)
def create_count_query(self):
return self.create_filter_query()
def create_count_query(self, disable_eager_loads):
return self.create_filter_query(disable_eager_loads)
def create_around_query(self):
raise NotImplementedError()

View file

@ -103,9 +103,11 @@ class PostSearchConfig(BaseSearchConfig):
def create_around_query(self):
return db.session.query(db.Post.post_id)
def create_filter_query(self):
def create_filter_query(self, disable_eager_loads):
strategy = lazyload if disable_eager_loads else subqueryload
return db.session.query(db.Post) \
.options(
lazyload('*'),
# use config optimized for official client
# defer(db.Post.score),
# defer(db.Post.favorite_count),
@ -117,16 +119,12 @@ class PostSearchConfig(BaseSearchConfig):
defer(db.Post.last_comment_edit_time),
defer(db.Post.note_count),
defer(db.Post.tag_count),
subqueryload(db.Post.tags).subqueryload(db.Tag.names),
subqueryload(db.Post.tags).defer(db.Tag.post_count),
subqueryload(db.Post.tags).lazyload(db.Tag.implications),
subqueryload(db.Post.tags).lazyload(db.Tag.suggestions),
lazyload(db.Post.user),
lazyload(db.Post.relations),
lazyload(db.Post.notes),
lazyload(db.Post.favorited_by))
strategy(db.Post.tags).subqueryload(db.Tag.names),
strategy(db.Post.tags).defer(db.Tag.post_count),
strategy(db.Post.tags).lazyload(db.Tag.implications),
strategy(db.Post.tags).lazyload(db.Tag.suggestions))
def create_count_query(self):
def create_count_query(self, _disable_eager_loads):
return db.session.query(db.Post)
def finalize_query(self, query):

View file

@ -4,10 +4,10 @@ from szurubooru.search.configs.base_search_config import BaseSearchConfig
class SnapshotSearchConfig(BaseSearchConfig):
def create_filter_query(self):
def create_filter_query(self, _disable_eager_loads):
return db.session.query(db.Snapshot)
def create_count_query(self):
def create_count_query(self, _disable_eager_loads):
return db.session.query(db.Snapshot)
def create_around_query(self):

View file

@ -1,4 +1,4 @@
from sqlalchemy.orm import subqueryload
from sqlalchemy.orm import subqueryload, lazyload
from sqlalchemy.sql.expression import func
from szurubooru import db
from szurubooru.func import util
@ -7,16 +7,17 @@ from szurubooru.search.configs.base_search_config import BaseSearchConfig
class TagSearchConfig(BaseSearchConfig):
def create_filter_query(self):
def create_filter_query(self, _disable_eager_loads):
strategy = lazyload if _disable_eager_loads else subqueryload
return db.session.query(db.Tag) \
.join(db.TagCategory) \
.options(
subqueryload(db.Tag.names),
subqueryload(db.Tag.category),
subqueryload(db.Tag.suggestions).joinedload(db.Tag.names),
subqueryload(db.Tag.implications).joinedload(db.Tag.names))
strategy(db.Tag.names),
strategy(db.Tag.category),
strategy(db.Tag.suggestions).joinedload(db.Tag.names),
strategy(db.Tag.implications).joinedload(db.Tag.names))
def create_count_query(self):
def create_count_query(self, _disable_eager_loads):
return db.session.query(db.Tag)
def create_around_query(self):

View file

@ -5,10 +5,10 @@ from szurubooru.search.configs.base_search_config import BaseSearchConfig
class UserSearchConfig(BaseSearchConfig):
def create_filter_query(self):
def create_filter_query(self, _disable_eager_loads):
return db.session.query(db.User)
def create_count_query(self):
def create_count_query(self, _disable_eager_loads):
return db.session.query(db.User)
def create_around_query(self):

View file

@ -73,11 +73,16 @@ class Executor(object):
search_query = self.parser.parse(query_text)
self.config.on_search_query_parsed(search_query)
disable_eager_loads = False
for token in search_query.sort_tokens:
if token.name == 'random':
disable_eager_loads = True
key = (id(self.config), hash(search_query), page, page_size)
if cache.has(key):
return cache.get(key)
filter_query = self.config.create_filter_query()
filter_query = self.config.create_filter_query(disable_eager_loads)
filter_query = filter_query.options(sqlalchemy.orm.lazyload('*'))
filter_query = self._prepare_db_query(filter_query, search_query, True)
entities = filter_query \
@ -85,7 +90,7 @@ class Executor(object):
.limit(page_size) \
.all()
count_query = self.config.create_count_query()
count_query = self.config.create_count_query(disable_eager_loads)
count_query = count_query.options(sqlalchemy.orm.lazyload('*'))
count_query = self._prepare_db_query(count_query, search_query, False)
count_statement = count_query \