server/func/image_hash: added docstrings to functions

This commit is contained in:
Shyam Sunder 2020-03-13 19:12:09 -04:00
parent 1a8de9ef3a
commit db0c33bb14
3 changed files with 40 additions and 16 deletions

View file

@ -25,10 +25,11 @@ SAMPLE_WORDS = 16
MAX_WORDS = 63 MAX_WORDS = 63
SIG_CHUNK_BITS = 32 SIG_CHUNK_BITS = 32
SIG_BASE = 2*N_LEVELS + 2 SIG_NUMS = 8 * N * N
SIG_BASE = 2 * N_LEVELS + 2
SIG_CHUNK_WIDTH = int(SIG_CHUNK_BITS / math.log2(SIG_BASE)) SIG_CHUNK_WIDTH = int(SIG_CHUNK_BITS / math.log2(SIG_BASE))
SIG_CHUNK_NUMS = 8*N*N / SIG_CHUNK_WIDTH SIG_CHUNK_NUMS = SIG_NUMS / SIG_CHUNK_WIDTH
assert 8*N*N % SIG_CHUNK_WIDTH == 0 assert SIG_NUMS % SIG_CHUNK_WIDTH == 0
Window = Tuple[Tuple[float, float], Tuple[float, float]] Window = Tuple[Tuple[float, float], Tuple[float, float]]
NpMatrix = np.ndarray NpMatrix = np.ndarray
@ -229,6 +230,18 @@ def normalized_distance(
def pack_signature(signature: NpMatrix) -> bytes: def pack_signature(signature: NpMatrix) -> bytes:
'''
Serializes the signature vector for efficient storage in a database.
Shifts the range of the signature vector from [-N_LEVELS,+N_LEVELS]
to [0, base]
The vector can then be broken up into chunks, with each chunk
consisting of SIG_CHUNK_WIDTH digits of radix `base`.
This is then converted into a more packed array consisting of
uint32 elements (for SIG_CHUNK_BITS = 32).
'''
base = 2 * N_LEVELS + 1 base = 2 * N_LEVELS + 1
coding_vector = np.flipud(SIG_BASE**np.arange(SIG_CHUNK_WIDTH)) coding_vector = np.flipud(SIG_BASE**np.arange(SIG_CHUNK_WIDTH))
return np.array([ return np.array([
@ -238,6 +251,11 @@ def pack_signature(signature: NpMatrix) -> bytes:
def unpack_signature(packed: bytes) -> NpMatrix: def unpack_signature(packed: bytes) -> NpMatrix:
'''
Deserializes the signature vector once recieved from the database.
Functions as an inverse transformation of pack_signature()
'''
base = 2 * N_LEVELS + 1 base = 2 * N_LEVELS + 1
return np.ravel(np.array([ return np.ravel(np.array([
[ [

View file

@ -479,13 +479,10 @@ def test_sound(post: model.Post, content: bytes) -> None:
def purge_post_signature(post: model.Post) -> None: def purge_post_signature(post: model.Post) -> None:
old_signature = ( (db.session
db.session
.query(model.PostSignature) .query(model.PostSignature)
.filter(model.PostSignature.post_id == post.post_id) .filter(model.PostSignature.post_id == post.post_id)
.one_or_none()) .delete())
if old_signature:
db.session.delete(old_signature)
def generate_post_signature(post: model.Post, content: bytes) -> None: def generate_post_signature(post: model.Post, content: bytes) -> None:
@ -788,24 +785,25 @@ def merge_posts(
.values(child_id=target_post_id)) .values(child_id=target_post_id))
db.session.execute(update_stmt) db.session.execute(update_stmt)
def transfer_flags(source_post_id: int, target_post_id: int) -> None:
target = get_post_by_id(target_post_id)
source = get_post_by_id(source_post_id)
target.flags = source.flags
merge_tags(source_post.post_id, target_post.post_id) merge_tags(source_post.post_id, target_post.post_id)
merge_comments(source_post.post_id, target_post.post_id) merge_comments(source_post.post_id, target_post.post_id)
merge_scores(source_post.post_id, target_post.post_id) merge_scores(source_post.post_id, target_post.post_id)
merge_favorites(source_post.post_id, target_post.post_id) merge_favorites(source_post.post_id, target_post.post_id)
merge_relations(source_post.post_id, target_post.post_id) merge_relations(source_post.post_id, target_post.post_id)
def transfer_flags(source_post_id: int, target_post_id: int) -> None:
target = get_post_by_id(target_post_id)
source = get_post_by_id(source_post_id)
target.flags = source.flags
db.session.flush()
content = None content = None
if replace_content: if replace_content:
content = files.get(get_post_content_path(source_post)) content = files.get(get_post_content_path(source_post))
transfer_flags(source_post.post_id, target_post.post_id) transfer_flags(source_post.post_id, target_post.post_id)
purge_post_signature(source_post)
purge_post_signature(target_post)
# fixes unknown issue with SA's cascade deletions
purge_post_signature(source_post)
delete(source_post) delete(source_post)
db.session.flush() db.session.flush()
@ -826,6 +824,14 @@ def search_by_image(image_content: bytes) -> List[Tuple[float, model.Post]]:
query_signature = image_hash.generate_signature(image_content) query_signature = image_hash.generate_signature(image_content)
query_words = image_hash.generate_words(query_signature) query_words = image_hash.generate_words(query_signature)
'''
The unnest function is used here to expand one row containing the 'words'
array into multiple rows each containing a singular word.
Documentation of the unnest function can be found here:
https://www.postgresql.org/docs/9.2/functions-array.html
'''
dbquery = ''' dbquery = '''
SELECT s.post_id, s.signature, count(a.query) AS score SELECT s.post_id, s.signature, count(a.query) AS score
FROM post_signature AS s, unnest(s.words, :q) AS a(word, query) FROM post_signature AS s, unnest(s.words, :q) AS a(word, query)

View file

@ -207,7 +207,7 @@ class Post(Base):
signature = sa.orm.relationship( signature = sa.orm.relationship(
'PostSignature', 'PostSignature',
uselist=False, uselist=False,
cascade='all, delete-orphan', cascade='all, delete, delete-orphan',
lazy='joined') lazy='joined')
relations = sa.orm.relationship( relations = sa.orm.relationship(
'Post', 'Post',