server/search: add search term escaping

This commit is contained in:
rr- 2017-04-24 21:51:49 +02:00
parent 9814b132c3
commit ba4df16499
7 changed files with 164 additions and 35 deletions

8
API.md
View file

@ -2258,6 +2258,9 @@ Date/time values can be of following form:
Some fields, such as user names, can take wildcards (`*`).
You can escape special characters such as `:` and `-` by prepending them with a
backslash: `\\`.
**Example**
Searching for posts with following query:
@ -2266,3 +2269,8 @@ Searching for posts with following query:
will show flash files tagged as sea, that were liked by seven people at most,
uploaded by user Pirate.
Searching for posts with `re:zero` will show an error message about unknown
named token.
Searching for posts with `re\:zero` will show posts tagged with `re:zero`.

View file

@ -80,6 +80,9 @@ take following form:</p>
<code>,desc</code> to control the sort direction, which can be also controlled
by negating the whole token.</p>
<p>You can escape special characters such as <code>:</code> and <code>-</code>
by prepending them with a backslash: <code>\\</code>.</p>
<h1>Example</h1>
<p>Searching for posts with following query:</p>
@ -89,3 +92,8 @@ by negating the whole token.</p>
<p>will show flash files tagged as sea, that were liked by seven people at
most, uploaded by user Pirate.</p>
<p>Searching for posts with <code>re:zero</code> will show an error message
about unknown named token.</p>
<p>Searching for posts with <code>re\:zero</code> will show posts tagged with
<code>re:zero</code>.</p>

View file

@ -10,15 +10,6 @@ from szurubooru.search.configs.base_search_config import (
BaseSearchConfig, Filter)
def _enum_transformer(available_values: Dict[str, Any], value: str) -> str:
try:
return available_values[value.lower()]
except KeyError:
raise errors.SearchError(
'Invalid value: %r. Possible values: %r.' % (
value, list(sorted(available_values.keys()))))
def _type_transformer(value: str) -> str:
available_values = {
'image': model.Post.TYPE_IMAGE,
@ -31,7 +22,7 @@ def _type_transformer(value: str) -> str:
'flash': model.Post.TYPE_FLASH,
'swf': model.Post.TYPE_FLASH,
}
return _enum_transformer(available_values, value)
return search_util.enum_transformer(available_values, value)
def _safety_transformer(value: str) -> str:
@ -41,7 +32,7 @@ def _safety_transformer(value: str) -> str:
'questionable': model.Post.SAFETY_SKETCHY,
'unsafe': model.Post.SAFETY_UNSAFE,
}
return _enum_transformer(available_values, value)
return search_util.enum_transformer(available_values, value)
def _create_score_filter(score: int) -> Filter:

View file

@ -1,4 +1,4 @@
from typing import Any, Optional, Union, Callable
from typing import Any, Optional, Union, Dict, Callable
import sqlalchemy as sa
from szurubooru import db, errors
from szurubooru.func import util
@ -8,27 +8,62 @@ from szurubooru.search.configs.base_search_config import Filter
Number = Union[int, float]
WILDCARD = '(--wildcard--)' # something unlikely to be used by the users
def unescape(text: str, make_wildcards_special: bool = False) -> str:
output = ''
i = 0
while i < len(text):
if text[i] == '\\':
try:
char = text[i+1]
i += 1
except IndexError:
raise errors.SearchError(
'Unterminated escape sequence (did you forget to escape '
'the ending backslash?)')
if char not in '*\\:-.,':
raise errors.SearchError(
'Unknown escape sequence (did you forget to escape '
'the backslash?)')
elif text[i] == '*' and make_wildcards_special:
char = WILDCARD
else:
char = text[i]
output += char
i += 1
return output
def wildcard_transformer(value: str) -> str:
return (
value
unescape(value, make_wildcards_special=True)
.replace('\\', '\\\\')
.replace('%', '\\%')
.replace('_', '\\_')
.replace('*', '%'))
.replace(WILDCARD, '%'))
def enum_transformer(available_values: Dict[str, Any], value: str) -> str:
try:
return available_values[unescape(value.lower())]
except KeyError:
raise errors.SearchError(
'Invalid value: %r. Possible values: %r.' % (
value, list(sorted(available_values.keys()))))
def integer_transformer(value: str) -> int:
return int(value)
return int(unescape(value))
def float_transformer(value: str) -> float:
for sep in list('/:'):
if sep in value:
a, b = value.split(sep, 1)
return float(a) / float(b)
return float(value)
return float(unescape(a)) / float(unescape(b))
return float(unescape(value))
def apply_num_criterion_to_column(
@ -84,23 +119,23 @@ def apply_str_criterion_to_column(
for value in criterion.values:
expr = expr | column.ilike(transformer(value))
elif isinstance(criterion, criteria.RangedCriterion):
expr = column.ilike(transformer(criterion.original_text))
raise errors.SearchError(
'Ranged criterion is invalid in this context. '
'Did you forget to escape the dots?')
else:
assert False
return expr
def create_str_filter(
column: SaColumn,
transformer: Callable[[str], str]=wildcard_transformer
column: SaColumn, transformer: Callable[[str], str]=wildcard_transformer
) -> Filter:
def wrapper(
query: SaQuery,
criterion: Optional[criteria.BaseCriterion],
negated: bool) -> SaQuery:
assert criterion
expr = apply_str_criterion_to_column(
column, criterion, transformer)
expr = apply_str_criterion_to_column(column, criterion, transformer)
if negated:
expr = ~expr
return query.filter(expr)

View file

@ -1,17 +1,20 @@
import re
from typing import List
from typing import Match, List
from szurubooru import errors
from szurubooru.search import criteria, tokens
from szurubooru.search.query import SearchQuery
from szurubooru.search.configs import util
def _create_criterion(
original_value: str, value: str) -> criteria.BaseCriterion:
if ',' in value:
return criteria.ArrayCriterion(
original_value, value.split(','))
if '..' in value:
low, high = value.split('..', 1)
if re.search(r'(?<!\\),', value):
values = re.split(r'(?<!\\),', value)
if any(not term.strip() for term in values):
raise errors.SearchError('Empty compound value')
return criteria.ArrayCriterion(original_value, values)
if re.search(r'(?<!\\)\.(?<!\\)\.', value):
low, high = re.split(r'(?<!\\)\.(?<!\\)\.', value, 1)
if not low and not high:
raise errors.SearchError('Empty ranged value')
return criteria.RangedCriterion(original_value, low, high)
@ -82,9 +85,10 @@ class Parser:
negated = True
if not chunk:
raise errors.SearchError('Empty negated token.')
match = re.match('([a-z_-]+):(.*)', chunk)
match = re.match(r'^(.*?)(?<!\\):(.*)$', chunk)
if match:
key, value = list(match.groups())
key = util.unescape(key)
if key == 'sort':
query.sort_tokens.append(
_parse_sort(value, negated))

View file

@ -35,10 +35,77 @@ def test_filter_anonymous(
verify_unpaged(input, expected_tag_names)
@pytest.mark.parametrize('db_driver,input,expected_tag_names', [
(None, ',', None),
(None, 't1,', None),
(None, 't1,t2', ['t1', 't2']),
(None, 't1\\,', []),
(None, 'asd..asd', None),
(None, 'asd\\..asd', []),
(None, 'asd.\\.asd', []),
(None, 'asd\\.\\.asd', []),
(None, '-', None),
(None, '\\-', ['-']),
(None, '--', [
't1', 't2', '*', '*asd*', ':', 'asd:asd', '\\', '\\asd', '-asd',
]),
(None, '\\--', []),
(None, '-\\-', [
't1', 't2', '*', '*asd*', ':', 'asd:asd', '\\', '\\asd', '-asd',
]),
(None, '-*', []),
(None, '\\-*', ['-', '-asd']),
(None, ':', None),
(None, '\\:', [':']),
(None, '\\:asd', []),
(None, '*\\:*', [':', 'asd:asd']),
(None, 'asd:asd', None),
(None, 'asd\\:asd', ['asd:asd']),
(None, '*', [
't1', 't2', '*', '*asd*', ':', 'asd:asd', '\\', '\\asd', '-', '-asd'
]),
(None, '\\*', ['*']),
(None, '\\', None),
(None, '\\asd', None),
('psycopg2', '\\\\', ['\\']),
('psycopg2', '\\\\asd', ['\\asd']),
])
def test_escaping(
executor, tag_factory, input, expected_tag_names, db_driver):
db.session.add_all([
tag_factory(names=['t1']),
tag_factory(names=['t2']),
tag_factory(names=['*']),
tag_factory(names=['*asd*']),
tag_factory(names=[':']),
tag_factory(names=['asd:asd']),
tag_factory(names=['\\']),
tag_factory(names=['\\asd']),
tag_factory(names=['-']),
tag_factory(names=['-asd'])
])
db.session.flush()
if db_driver:
if db.sessionmaker.kw['bind'].driver != db_driver:
pytest.xfail()
if expected_tag_names is None:
with pytest.raises(errors.SearchError):
executor.execute(input, offset=0, limit=100)
else:
actual_count, actual_tags = executor.execute(
input, offset=0, limit=100)
actual_tag_names = [u.names[0].name for u in actual_tags]
assert actual_count == len(expected_tag_names)
assert sorted(actual_tag_names) == sorted(expected_tag_names)
def test_filter_anonymous_starting_with_colon(verify_unpaged, tag_factory):
db.session.add(tag_factory(names=[':t']))
db.session.flush()
verify_unpaged(':t', [':t'])
with pytest.raises(errors.SearchError):
verify_unpaged(':t', [':t'])
verify_unpaged('\\:t', [':t'])
@pytest.mark.parametrize('input,expected_tag_names', [

View file

@ -86,12 +86,24 @@ def test_filter_by_name(
@pytest.mark.parametrize('input,expected_user_names', [
('name:u1', ['u1']),
('name:u2..', ['u2..']),
('name:u2*', ['u2..']),
('name:*..*', ['u2..', 'u3..x']),
('name:u3..x', ['u3..x']),
('name:*..x', ['u3..x']),
('name:u1,u3..x', ['u1', 'u3..x']),
('name:u2..', None),
('name:*..*', None),
('name:u3..x', None),
('name:*..x', None),
('name:u2\\..', ['u2..']),
('name:*\\..*', ['u2..', 'u3..x']),
('name:u3\\..x', ['u3..x']),
('name:*\\..x', ['u3..x']),
('name:u2.\\.', ['u2..']),
('name:*.\\.*', ['u2..', 'u3..x']),
('name:u3.\\.x', ['u3..x']),
('name:*.\\.x', ['u3..x']),
('name:u2\\.\\.', ['u2..']),
('name:*\\.\\.*', ['u2..', 'u3..x']),
('name:u3\\.\\.x', ['u3..x']),
('name:*\\.\\.x', ['u3..x']),
])
def test_filter_by_name_that_looks_like_range(
verify_unpaged, input, expected_user_names, user_factory):
@ -99,7 +111,11 @@ def test_filter_by_name_that_looks_like_range(
db.session.add(user_factory(name='u2..'))
db.session.add(user_factory(name='u3..x'))
db.session.flush()
verify_unpaged(input, expected_user_names)
if not expected_user_names:
with pytest.raises(errors.SearchError):
verify_unpaged(input, expected_user_names)
else:
verify_unpaged(input, expected_user_names)
@pytest.mark.parametrize('input,expected_user_names', [