Skip to content

Commit 4290f23

Browse files
authored
GG-31727: Fix hashing (#11)
1 parent 9f7c5ba commit 4290f23

2 files changed

Lines changed: 75 additions & 8 deletions

File tree

pygridgain/utils.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,36 @@ def unwrap_binary(client: 'Client', wrapped: tuple) -> object:
105105
return result
106106

107107

108-
def hashcode(string: Union[str, bytes]) -> int:
108+
def hashcode(data: Union[str, bytes]) -> int:
109109
"""
110110
Calculate hash code used for identifying objects in GridGain binary API.
111111
112112
:param string: UTF-8-encoded string identifier of binary buffer,
113113
:return: hash code.
114114
"""
115-
result = 1 if isinstance(string, (bytes, bytearray)) else 0
116-
for char in string:
117-
try:
118-
char = ord(char)
119-
except TypeError:
120-
pass
121-
result = int_overflow(31 * result + char)
115+
116+
if isinstance(data, str):
117+
"""
118+
For strings we iterate over code point which are of the int type
119+
and can take up to 4 bytes and can only be positive.
120+
"""
121+
result = 0
122+
for char in data:
123+
try:
124+
char_val = ord(char)
125+
result = int_overflow(31 * result + char_val)
126+
except TypeError:
127+
pass
128+
else:
129+
"""
130+
For byte array we iterate over bytes which only take 1 byte and can
131+
be negative. For this reason we use ctypes.c_byte() to
132+
"""
133+
result = 1
134+
for byte in data:
135+
byte = ctypes.c_byte(byte).value
136+
result = int_overflow(31 * result + byte)
137+
122138
return result
123139

124140

tests/test_binary.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,3 +304,54 @@ class NonPythonicallyNamedType(
304304
obj = cache.get(key)
305305
assert obj.type_name == type_name, 'Complex type name mismatch'
306306
assert obj.field == data, 'Complex object data failure'
307+
308+
309+
def test_complex_object_hash(client):
310+
"""
311+
Test that Python client correctly calculates hash of the binary
312+
object that contains negative bytes.
313+
"""
314+
class Internal(
315+
metaclass=GenericObjectMeta,
316+
type_name='Internal',
317+
schema=OrderedDict([
318+
('id', IntObject),
319+
('str', String),
320+
])
321+
):
322+
pass
323+
324+
class TestObject(
325+
metaclass=GenericObjectMeta,
326+
type_name='TestObject',
327+
schema=OrderedDict([
328+
('id', IntObject),
329+
('str', String),
330+
('internal', BinaryObject),
331+
])
332+
):
333+
pass
334+
335+
obj_ascii = TestObject()
336+
obj_ascii.id = 1
337+
obj_ascii.str = 'test_string'
338+
339+
obj_ascii.internal = Internal()
340+
obj_ascii.internal.id = 2
341+
obj_ascii.internal.str = 'lorem ipsum'
342+
343+
hash_ascii = BinaryObject.hashcode(obj_ascii, client=client)
344+
345+
assert hash_ascii == -1314567146, 'Invalid hashcode value for object with ASCII strings'
346+
347+
obj_utf8 = TestObject()
348+
obj_utf8.id = 1
349+
obj_utf8.str = 'юникод'
350+
351+
obj_utf8.internal = Internal()
352+
obj_utf8.internal.id = 2
353+
obj_utf8.internal.str = 'ユニコード'
354+
355+
hash_utf8 = BinaryObject.hashcode(obj_utf8, client=client)
356+
357+
assert hash_utf8 == -1945378474, 'Invalid hashcode value for object with UTF-8 strings'

0 commit comments

Comments
 (0)