Skip to content

Commit e0c22ef

Browse files
committed
IGNITE-14059: Fix hashing of complex objects
This closes #5
1 parent b13d43b commit e0c22ef

2 files changed

Lines changed: 77 additions & 9 deletions

File tree

pyignite/utils.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,20 +106,37 @@ def unwrap_binary(client: 'Client', wrapped: tuple) -> object:
106106
return result
107107

108108

109-
def hashcode(string: Union[str, bytes]) -> int:
109+
def hashcode(data: Union[str, bytes]) -> int:
110110
"""
111111
Calculate hash code used for identifying objects in Ignite binary API.
112112
113-
:param string: UTF-8-encoded string identifier of binary buffer,
113+
:param data: UTF-8-encoded string identifier of binary buffer or byte array
114114
:return: hash code.
115115
"""
116-
result = 1 if isinstance(string, (bytes, bytearray)) else 0
117-
for char in string:
118-
try:
119-
char = ord(char)
120-
except TypeError:
121-
pass
122-
result = int_overflow(31 * result + char)
116+
if isinstance(data, str):
117+
"""
118+
For strings we iterate over code point which are of the int type
119+
and can take up to 4 bytes and can only be positive.
120+
"""
121+
result = 0
122+
for char in data:
123+
try:
124+
char_val = ord(char)
125+
result = int_overflow(31 * result + char_val)
126+
except TypeError:
127+
pass
128+
else:
129+
"""
130+
For byte array we iterate over bytes which only take 1 byte. But
131+
according to protocol, bytes during hashing should be treated as signed
132+
integer numbers 8 bits long. On other hand elements in Python's `bytes`
133+
are unsigned. For this reason we use ctypes.c_byte() to make them
134+
signed.
135+
"""
136+
result = 1
137+
for byte in data:
138+
byte = ctypes.c_byte(byte).value
139+
result = int_overflow(31 * result + byte)
123140
return result
124141

125142

tests/test_binary.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,3 +304,54 @@ class NonPythonicallyNamedType(
304304
obj = cache.get(key)
305305
assert obj.type_name == type_name, 'Complex type name mismatch'
306306
assert obj.field == data, 'Complex object data failure'
307+
308+
309+
def test_complex_object_hash(client):
310+
"""
311+
Test that Python client correctly calculates hash of the binary
312+
object that contains negative bytes.
313+
"""
314+
class Internal(
315+
metaclass=GenericObjectMeta,
316+
type_name='Internal',
317+
schema=OrderedDict([
318+
('id', IntObject),
319+
('str', String),
320+
])
321+
):
322+
pass
323+
324+
class TestObject(
325+
metaclass=GenericObjectMeta,
326+
type_name='TestObject',
327+
schema=OrderedDict([
328+
('id', IntObject),
329+
('str', String),
330+
('internal', BinaryObject),
331+
])
332+
):
333+
pass
334+
335+
obj_ascii = TestObject()
336+
obj_ascii.id = 1
337+
obj_ascii.str = 'test_string'
338+
339+
obj_ascii.internal = Internal()
340+
obj_ascii.internal.id = 2
341+
obj_ascii.internal.str = 'lorem ipsum'
342+
343+
hash_ascii = BinaryObject.hashcode(obj_ascii, client=client)
344+
345+
assert hash_ascii == -1314567146, 'Invalid hashcode value for object with ASCII strings'
346+
347+
obj_utf8 = TestObject()
348+
obj_utf8.id = 1
349+
obj_utf8.str = 'юникод'
350+
351+
obj_utf8.internal = Internal()
352+
obj_utf8.internal.id = 2
353+
obj_utf8.internal.str = 'ユニコード'
354+
355+
hash_utf8 = BinaryObject.hashcode(obj_utf8, client=client)
356+
357+
assert hash_utf8 == -1945378474, 'Invalid hashcode value for object with UTF-8 strings'

0 commit comments

Comments
 (0)