server/search: add search term escaping

2017-04-24 21:51:49 +02:00 · 2017-04-24 21:51:49 +02:00 · ba4df16499
commit ba4df16499
parent 9814b132c3
7 changed files with 164 additions and 35 deletions
--- a/API.md
+++ b/API.md
@ -2258,6 +2258,9 @@ Date/time values can be of following form:
 Some fields, such as user names, can take wildcards (`*`).
 You can escape special characters such as `:` and `-` by prepending them with a
 backslash: `\\`.
 **Example**
 Searching for posts with following query:
@ -2266,3 +2269,8 @@ Searching for posts with following query:
 will show flash files tagged as sea, that were liked by seven people at most,
 uploaded by user Pirate.
 Searching for posts with `re:zero` will show an error message about unknown
 named token.
 Searching for posts with `re\:zero` will show posts tagged with `re:zero`.
--- a/client/html/help_search_general.tpl
+++ b/client/html/help_search_general.tpl
@ -80,6 +80,9 @@ take following form:</p>
 <code>,desc</code> to control the sort direction, which can be also controlled
 by negating the whole token.</p>
 <p>You can escape special characters such as <code>:</code> and <code>-</code>
 by prepending them with a backslash: <code>\\</code>.</p>
 <h1>Example</h1>
 <p>Searching for posts with following query:</p>
@ -89,3 +92,8 @@ by negating the whole token.</p>
 <p>will show flash files tagged as sea, that were liked by seven people at
 most, uploaded by user Pirate.</p>
 <p>Searching for posts with <code>re:zero</code> will show an error message
 about unknown named token.</p>
 <p>Searching for posts with <code>re\:zero</code> will show posts tagged with
 <code>re:zero</code>.</p>
--- a/server/szurubooru/search/configs/post_search_config.py
+++ b/server/szurubooru/search/configs/post_search_config.py
@ -10,15 +10,6 @@ from szurubooru.search.configs.base_search_config import (
    BaseSearchConfig, Filter)
 def _enum_transformer(available_values: Dict[str, Any], value: str) -> str:
    try:
        return available_values[value.lower()]
    except KeyError:
        raise errors.SearchError(
            'Invalid value: %r. Possible values: %r.' % (
                value, list(sorted(available_values.keys()))))
 def _type_transformer(value: str) -> str:
    available_values = {
        'image': model.Post.TYPE_IMAGE,
@ -31,7 +22,7 @@ def _type_transformer(value: str) -> str:
        'flash': model.Post.TYPE_FLASH,
        'swf': model.Post.TYPE_FLASH,
    }
-    return _enum_transformer(available_values, value)
+    return search_util.enum_transformer(available_values, value)
 def _safety_transformer(value: str) -> str:
@ -41,7 +32,7 @@ def _safety_transformer(value: str) -> str:
        'questionable': model.Post.SAFETY_SKETCHY,
        'unsafe': model.Post.SAFETY_UNSAFE,
    }
-    return _enum_transformer(available_values, value)
+    return search_util.enum_transformer(available_values, value)
 def _create_score_filter(score: int) -> Filter:
--- a/server/szurubooru/search/configs/util.py
+++ b/server/szurubooru/search/configs/util.py
@ -1,4 +1,4 @@
-from typing import Any, Optional, Union, Callable
+from typing import Any, Optional, Union, Dict, Callable
 import sqlalchemy as sa
 from szurubooru import db, errors
 from szurubooru.func import util
@ -8,27 +8,62 @@ from szurubooru.search.configs.base_search_config import Filter
 Number = Union[int, float]
 WILDCARD = '(--wildcard--)'  # something unlikely to be used by the users
 def unescape(text: str, make_wildcards_special: bool = False) -> str:
    output = ''
    i = 0
    while i < len(text):
        if text[i] == '\\':
            try:
                char = text[i+1]
                i += 1
            except IndexError:
                raise errors.SearchError(
                    'Unterminated escape sequence (did you forget to escape '
                    'the ending backslash?)')
            if char not in '*\\:-.,':
                raise errors.SearchError(
                    'Unknown escape sequence (did you forget to escape '
                    'the backslash?)')
        elif text[i] == '*' and make_wildcards_special:
            char = WILDCARD
        else:
            char = text[i]
        output += char
        i += 1
    return output
 def wildcard_transformer(value: str) -> str:
    return (
-        value
+        unescape(value, make_wildcards_special=True)
        .replace('\\', '\\\\')
        .replace('%', '\\%')
        .replace('_', '\\_')
-        .replace('*', '%'))
+        .replace(WILDCARD, '%'))
 def enum_transformer(available_values: Dict[str, Any], value: str) -> str:
    try:
        return available_values[unescape(value.lower())]
    except KeyError:
        raise errors.SearchError(
            'Invalid value: %r. Possible values: %r.' % (
                value, list(sorted(available_values.keys()))))
 def integer_transformer(value: str) -> int:
-    return int(value)
+    return int(unescape(value))
 def float_transformer(value: str) -> float:
    for sep in list('/:'):
        if sep in value:
            a, b = value.split(sep, 1)
-            return float(a) / float(b)
+            return float(unescape(a)) / float(unescape(b))
-    return float(value)
+    return float(unescape(value))
 def apply_num_criterion_to_column(
@ -84,23 +119,23 @@ def apply_str_criterion_to_column(
        for value in criterion.values:
            expr = expr | column.ilike(transformer(value))
    elif isinstance(criterion, criteria.RangedCriterion):
-        expr = column.ilike(transformer(criterion.original_text))
+        raise errors.SearchError(
            'Ranged criterion is invalid in this context. '
            'Did you forget to escape the dots?')
    else:
        assert False
    return expr
 def create_str_filter(
-    column: SaColumn,
+    column: SaColumn, transformer: Callable[[str], str]=wildcard_transformer
    transformer: Callable[[str], str]=wildcard_transformer
 ) -> Filter:
    def wrapper(
            query: SaQuery,
            criterion: Optional[criteria.BaseCriterion],
            negated: bool) -> SaQuery:
        assert criterion
-        expr = apply_str_criterion_to_column(
+        expr = apply_str_criterion_to_column(column, criterion, transformer)
            column, criterion, transformer)
        if negated:
            expr = ~expr
        return query.filter(expr)
--- a/server/szurubooru/search/parser.py
+++ b/server/szurubooru/search/parser.py
@ -1,17 +1,20 @@
 import re
-from typing import List
+from typing import Match, List
 from szurubooru import errors
 from szurubooru.search import criteria, tokens
 from szurubooru.search.query import SearchQuery
 from szurubooru.search.configs import util
 def _create_criterion(
        original_value: str, value: str) -> criteria.BaseCriterion:
-    if ',' in value:
+    if re.search(r'(?<!\\),', value):
-        return criteria.ArrayCriterion(
+        values = re.split(r'(?<!\\),', value)
-            original_value, value.split(','))
+        if any(not term.strip() for term in values):
-    if '..' in value:
+            raise errors.SearchError('Empty compound value')
-        low, high = value.split('..', 1)
+        return criteria.ArrayCriterion(original_value, values)
    if re.search(r'(?<!\\)\.(?<!\\)\.', value):
        low, high = re.split(r'(?<!\\)\.(?<!\\)\.', value, 1)
        if not low and not high:
            raise errors.SearchError('Empty ranged value')
        return criteria.RangedCriterion(original_value, low, high)
@ -82,9 +85,10 @@ class Parser:
                negated = True
            if not chunk:
                raise errors.SearchError('Empty negated token.')
-            match = re.match('([a-z_-]+):(.*)', chunk)
+            match = re.match(r'^(.*?)(?<!\\):(.*)$', chunk)
            if match:
                key, value = list(match.groups())
                key = util.unescape(key)
                if key == 'sort':
                    query.sort_tokens.append(
                        _parse_sort(value, negated))
--- a/server/szurubooru/tests/search/configs/test_tag_search_config.py
+++ b/server/szurubooru/tests/search/configs/test_tag_search_config.py
@ -35,10 +35,77 @@ def test_filter_anonymous(
    verify_unpaged(input, expected_tag_names)
@pytest.mark.parametrize('db_driver,input,expected_tag_names', [
    (None, ',', None),
    (None, 't1,', None),
    (None, 't1,t2', ['t1', 't2']),
    (None, 't1\\,', []),
    (None, 'asd..asd', None),
    (None, 'asd\\..asd', []),
    (None, 'asd.\\.asd', []),
    (None, 'asd\\.\\.asd', []),
    (None, '-', None),
    (None, '\\-', ['-']),
    (None, '--', [
        't1', 't2', '*', '*asd*', ':', 'asd:asd', '\\', '\\asd', '-asd',
    ]),
    (None, '\\--', []),
    (None, '-\\-', [
        't1', 't2', '*', '*asd*', ':', 'asd:asd', '\\', '\\asd', '-asd',
    ]),
    (None, '-*', []),
    (None, '\\-*', ['-', '-asd']),
    (None, ':', None),
    (None, '\\:', [':']),
    (None, '\\:asd', []),
    (None, '*\\:*', [':', 'asd:asd']),
    (None, 'asd:asd', None),
    (None, 'asd\\:asd', ['asd:asd']),
    (None, '*', [
        't1', 't2', '*', '*asd*', ':', 'asd:asd', '\\', '\\asd', '-', '-asd'
    ]),
    (None, '\\*', ['*']),
    (None, '\\', None),
    (None, '\\asd', None),
    ('psycopg2', '\\\\', ['\\']),
    ('psycopg2', '\\\\asd', ['\\asd']),
 ])
 def test_escaping(
        executor, tag_factory, input, expected_tag_names, db_driver):
    db.session.add_all([
        tag_factory(names=['t1']),
        tag_factory(names=['t2']),
        tag_factory(names=['*']),
        tag_factory(names=['*asd*']),
        tag_factory(names=[':']),
        tag_factory(names=['asd:asd']),
        tag_factory(names=['\\']),
        tag_factory(names=['\\asd']),
        tag_factory(names=['-']),
        tag_factory(names=['-asd'])
    ])
    db.session.flush()
    if db_driver:
        if db.sessionmaker.kw['bind'].driver != db_driver:
            pytest.xfail()
    if expected_tag_names is None:
        with pytest.raises(errors.SearchError):
            executor.execute(input, offset=0, limit=100)
    else:
        actual_count, actual_tags = executor.execute(
            input, offset=0, limit=100)
        actual_tag_names = [u.names[0].name for u in actual_tags]
        assert actual_count == len(expected_tag_names)
        assert sorted(actual_tag_names) == sorted(expected_tag_names)
 def test_filter_anonymous_starting_with_colon(verify_unpaged, tag_factory):
    db.session.add(tag_factory(names=[':t']))
    db.session.flush()
-    verify_unpaged(':t', [':t'])
+    with pytest.raises(errors.SearchError):
        verify_unpaged(':t', [':t'])
    verify_unpaged('\\:t', [':t'])
@pytest.mark.parametrize('input,expected_tag_names', [
--- a/server/szurubooru/tests/search/configs/test_user_search_config.py
+++ b/server/szurubooru/tests/search/configs/test_user_search_config.py
@ -86,12 +86,24 @@ def test_filter_by_name(
@pytest.mark.parametrize('input,expected_user_names', [
    ('name:u1', ['u1']),
    ('name:u2..', ['u2..']),
    ('name:u2*', ['u2..']),
    ('name:*..*', ['u2..', 'u3..x']),
    ('name:u3..x', ['u3..x']),
    ('name:*..x', ['u3..x']),
    ('name:u1,u3..x', ['u1', 'u3..x']),
    ('name:u2..', None),
    ('name:*..*', None),
    ('name:u3..x', None),
    ('name:*..x', None),
    ('name:u2\\..', ['u2..']),
    ('name:*\\..*', ['u2..', 'u3..x']),
    ('name:u3\\..x', ['u3..x']),
    ('name:*\\..x', ['u3..x']),
    ('name:u2.\\.', ['u2..']),
    ('name:*.\\.*', ['u2..', 'u3..x']),
    ('name:u3.\\.x', ['u3..x']),
    ('name:*.\\.x', ['u3..x']),
    ('name:u2\\.\\.', ['u2..']),
    ('name:*\\.\\.*', ['u2..', 'u3..x']),
    ('name:u3\\.\\.x', ['u3..x']),
    ('name:*\\.\\.x', ['u3..x']),
 ])
 def test_filter_by_name_that_looks_like_range(
        verify_unpaged, input, expected_user_names, user_factory):
@ -99,7 +111,11 @@ def test_filter_by_name_that_looks_like_range(
    db.session.add(user_factory(name='u2..'))
    db.session.add(user_factory(name='u3..x'))
    db.session.flush()
-    verify_unpaged(input, expected_user_names)
+    if not expected_user_names:
        with pytest.raises(errors.SearchError):
            verify_unpaged(input, expected_user_names)
    else:
        verify_unpaged(input, expected_user_names)
@pytest.mark.parametrize('input,expected_user_names', [