Skip to content

Commit 2f2f573

Browse files
dmelnichukisapego
authored andcommitted
NBL-67: Python: Add examples on complex data types. Fix serialization defaults
This closes #502
1 parent c468d85 commit 2f2f573

6 files changed

Lines changed: 246 additions & 30 deletions

File tree

docs/datatypes/parsers.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,17 +122,17 @@ with your data, in to some API function as a *type conversion hint*.
122122
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
123123
|*Object collections, special types, and complex object* |
124124
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
125-
|0x17 |`Object array`_ |iterable/list |:class:`~pygridgain.datatypes.complex.ObjectArrayObject` |
125+
|0x17 |`Object array`_ |tuple[int, iterable/list] |:class:`~pygridgain.datatypes.complex.ObjectArrayObject` |
126126
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
127-
|0x18 |`Collection`_ |tuple |:class:`~pygridgain.datatypes.complex.CollectionObject` |
127+
|0x18 |`Collection`_ |tuple[int, iterable/list] |:class:`~pygridgain.datatypes.complex.CollectionObject` |
128128
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
129-
|0x19 |`Map`_ |dict, collections.OrderedDict |:class:`~pygridgain.datatypes.complex.MapObject` |
129+
|0x19 |`Map`_ |tuple[int, dict/OrderedDict] |:class:`~pygridgain.datatypes.complex.MapObject` |
130130
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
131131
|0x1d |`Enum array`_ |iterable/list |:class:`~pygridgain.datatypes.standard.EnumArrayObject` |
132132
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
133133
|0x67 |`Complex object`_ |object |:class:`~pygridgain.datatypes.complex.BinaryObject` |
134134
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
135-
|0x1b |`Wrapped data`_ |tuple |:class:`~pygridgain.datatypes.complex.WrappedDataObject` |
135+
|0x1b |`Wrapped data`_ |tuple[int, bytes] |:class:`~pygridgain.datatypes.complex.WrappedDataObject` |
136136
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
137137

138138
.. _Byte: https://apacheignite.readme.io/docs/binary-client-protocol-data-format#section-byte

docs/examples.rst

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,62 @@ Destroy created cache and close connection.
120120

121121
.. _sql_examples:
122122

123+
Object collections
124+
------------------
125+
126+
File: `get_and_put_complex.py`_.
127+
128+
GridGain collection types are represented in `pygridgain` as two-tuples.
129+
First comes collection type ID or deserialization hint, which is specific for
130+
each of the collection type. Second comes the data value.
131+
132+
.. literalinclude:: ../examples/get_and_put_complex.py
133+
:language: python
134+
:lines: 19-21
135+
136+
Map
137+
===
138+
139+
For Python prior to 3.6, it might be important to distinguish between ordered
140+
(`collections.OrderedDict`) and unordered (`dict`) dictionary types, so you
141+
could use :py:attr:`~pygridgain.datatypes.complex.Map.LINKED_HASH_MAP`
142+
for the former and :py:attr:`~pygridgain.datatypes.complex.Map.HASH_MAP`
143+
for the latter.
144+
145+
Since CPython 3.6 all dictionaries became de facto ordered. You can always use
146+
`LINKED_HASH_MAP` as a safe default.
147+
148+
.. literalinclude:: ../examples/get_and_put_complex.py
149+
:language: python
150+
:lines: 29-41
151+
152+
Collection
153+
==========
154+
155+
See :class:`~pygridgain.datatypes.complex.CollectionObject` and GridGain
156+
documentation on `Collection`_ type for the description of various Java
157+
collection types. Note that not all of them have a direct Python
158+
representative. For example, Python do not have ordered sets (it is indeed
159+
recommended to use `OrderedDict`'s keys and disregard its values).
160+
161+
As for the `pygridgain`, the rules are simple: pass any iterable as a data,
162+
and you always get `list` back.
163+
164+
.. literalinclude:: ../examples/get_and_put_complex.py
165+
:language: python
166+
:lines: 43-57
167+
168+
Object array
169+
============
170+
171+
:class:`~pygridgain.datatypes.complex.ObjectArrayObject` has a very limited
172+
functionality in `pygridgain`, since no type checks can be enforced on its
173+
contents. But it still can be used for interoperability with Java.
174+
175+
.. literalinclude:: ../examples/get_and_put_complex.py
176+
:language: python
177+
:lines: 59-68
178+
123179
SQL
124180
---
125181
File: `sql.py`_.
@@ -611,4 +667,6 @@ with the following message:
611667
.. _openssl: https://www.openssl.org/docs/manmaster/man1/openssl.html
612668
.. _Authentication: https://apacheignite.readme.io/docs/advanced-security#section-authentication
613669
.. _attrs: https://pypi.org/project/attrs/
614-
.. _simple class names: https://apacheignite.readme.io/docs/binary-marshaller#binary-name-mapper-and-binary-id-mapper
670+
.. _get_and_put_complex.py: https://github.com/gridgain/gridgain/tree/master/modules/platforms/python/examples/get_and_put_complex.py
671+
.. _Collection: https://apacheignite.readme.io/docs/binary-client-protocol-data-format#section-collection
672+
.. _simple class names: https://apacheignite.readme.io/docs/binary-marshaller#binary-name-mapper-and-binary-id-mapper

examples/get_and_put_complex.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#
2+
# Copyright 2019 GridGain Systems, Inc. and Contributors.
3+
#
4+
# Licensed under the GridGain Community Edition License (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# https://www.gridgain.com/products/software/community-edition/gridgain-community-edition-license
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
from collections import OrderedDict
17+
18+
from pygridgain import Client
19+
from pygridgain.datatypes import (
20+
CollectionObject, MapObject, ObjectArrayObject,
21+
)
22+
23+
24+
client = Client()
25+
client.connect('127.0.0.1', 10800)
26+
27+
my_cache = client.get_or_create_cache('my cache')
28+
29+
value = OrderedDict([(1, 'test'), ('key', 2.0)])
30+
31+
# saving ordered dictionary
32+
type_id = MapObject.LINKED_HASH_MAP
33+
my_cache.put('my dict', (type_id, value))
34+
result = my_cache.get('my dict')
35+
print(result) # (2, OrderedDict([(1, 'test'), ('key', 2.0)]))
36+
37+
# saving unordered dictionary
38+
type_id = MapObject.HASH_MAP
39+
my_cache.put('my dict', (type_id, value))
40+
result = my_cache.get('my dict')
41+
print(result) # (1, {'key': 2.0, 1: 'test'})
42+
43+
type_id = CollectionObject.LINKED_LIST
44+
value = [1, '2', 3.0]
45+
46+
my_cache.put('my list', (type_id, value))
47+
48+
result = my_cache.get('my list')
49+
print(result) # (2, [1, '2', 3.0])
50+
51+
type_id = CollectionObject.HASH_SET
52+
value = [4, 4, 'test', 5.6]
53+
54+
my_cache.put('my set', (type_id, value))
55+
56+
result = my_cache.get('my set')
57+
print(result) # (3, [5.6, 4, 'test'])
58+
59+
type_id = ObjectArrayObject.OBJECT
60+
value = [7, '8', 9.0]
61+
62+
my_cache.put(
63+
'my array of objects',
64+
(type_id, value),
65+
value_hint=ObjectArrayObject # this hint is mandatory!
66+
)
67+
result = my_cache.get('my array of objects')
68+
print(result) # (-1, [7, '8', 9.0])

pygridgain/datatypes/complex.py

Lines changed: 105 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,16 @@
3535

3636
class ObjectArrayObject(GridGainDataType):
3737
"""
38-
Array of objects of any type. Its Python representation is
39-
tuple(type_id, iterable of any type).
38+
Array of Ignite objects of any consistent type. Its Python representation
39+
is tuple(type_id, iterable of any type). The only type ID that makes sense
40+
in Python client is :py:attr:`~OBJECT`, that corresponds directly to
41+
the root object type in Java type hierarchy (`java.lang.Object`).
4042
"""
43+
OBJECT = -1
44+
4145
_type_name = NAME_OBJ_ARR
4246
_type_id = TYPE_OBJ_ARR
4347
type_code = TC_OBJECT_ARRAY
44-
type_or_id_name = 'type_id'
4548

4649
@staticmethod
4750
def hashcode(value: Iterable) -> int:
@@ -95,7 +98,7 @@ def to_python(cls, ctype_object, *args, **kwargs):
9598
*args, **kwargs
9699
)
97100
)
98-
return getattr(ctype_object, cls.type_or_id_name), result
101+
return ctype_object.type_id, result
99102

100103
@classmethod
101104
def from_python(cls, value):
@@ -112,7 +115,7 @@ def from_python(cls, value):
112115
value = [value]
113116
length = 1
114117
header.length = length
115-
setattr(header, cls.type_or_id_name, type_or_id)
118+
header.type_id = type_or_id
116119
buffer = bytearray(header)
117120

118121
for x in value:
@@ -176,18 +179,45 @@ def from_python(cls, value):
176179
raise ParseError('Send unwrapped data.')
177180

178181

179-
class CollectionObject(ObjectArrayObject):
182+
class CollectionObject(GridGainDataType):
180183
"""
181-
Just like object array, but contains deserialization type hint instead of
182-
type id. This hint is also useless in Python, because the list type along
183-
covers all the use cases.
184-
185-
Also represented as tuple(type_id, iterable of any type) in Python.
184+
Similar to object array, but contains platform-agnostic deserialization
185+
type hint instead of type ID.
186+
187+
Represented as tuple(hint, iterable of any type) in Python. Hints are:
188+
189+
* :py:attr:`~pygridgain.datatypes.complex.CollectionObject.USER_SET` −
190+
a set of unique Ignite thin data objects. The exact Java type of a set
191+
is undefined,
192+
* :py:attr:`~pygridgain.datatypes.complex.CollectionObject.USER_COL` −
193+
a collection of Ignite thin data objects. The exact Java type
194+
of a collection is undefined,
195+
* :py:attr:`~pygridgain.datatypes.complex.CollectionObject.ARR_LIST` −
196+
represents the `java.util.ArrayList` type,
197+
* :py:attr:`~pygridgain.datatypes.complex.CollectionObject.LINKED_LIST` −
198+
represents the `java.util.LinkedList` type,
199+
* :py:attr:`~pygridgain.datatypes.complex.CollectionObject.HASH_SET`−
200+
represents the `java.util.HashSet` type,
201+
* :py:attr:`~pygridgain.datatypes.complex.CollectionObject.LINKED_HASH_SET` −
202+
represents the `java.util.LinkedHashSet` type,
203+
* :py:attr:`~pygridgain.datatypes.complex.CollectionObject.SINGLETON_LIST` −
204+
represents the return type of the `java.util.Collection.singletonList`
205+
method.
206+
207+
It is safe to say that `USER_SET` (`set` in Python) and `USER_COL` (`list`)
208+
can cover all the imaginable use cases from Python perspective.
186209
"""
210+
USER_SET = -1
211+
USER_COL = 0
212+
ARR_LIST = 1
213+
LINKED_LIST = 2
214+
HASH_SET = 3
215+
LINKED_HASH_SET = 4
216+
SINGLETON_LIST = 5
217+
187218
_type_name = NAME_COL
188219
_type_id = TYPE_COL
189220
type_code = TC_COLLECTION
190-
type_or_id_name = 'type'
191221
pythonic = list
192222
default = []
193223

@@ -211,13 +241,69 @@ def build_header(cls):
211241
}
212242
)
213243

244+
@classmethod
245+
def parse(cls, client: 'Client'):
246+
header_class = cls.build_header()
247+
buffer = client.recv(ctypes.sizeof(header_class))
248+
header = header_class.from_buffer_copy(buffer)
249+
fields = []
250+
251+
for i in range(header.length):
252+
c_type, buffer_fragment = AnyDataObject.parse(client)
253+
buffer += buffer_fragment
254+
fields.append(('element_{}'.format(i), c_type))
255+
256+
final_class = type(
257+
cls.__name__,
258+
(header_class,),
259+
{
260+
'_pack_': 1,
261+
'_fields_': fields,
262+
}
263+
)
264+
return final_class, buffer
265+
266+
@classmethod
267+
def to_python(cls, ctype_object, *args, **kwargs):
268+
result = []
269+
for i in range(ctype_object.length):
270+
result.append(
271+
AnyDataObject.to_python(
272+
getattr(ctype_object, 'element_{}'.format(i)),
273+
*args, **kwargs
274+
)
275+
)
276+
return ctype_object.type, result
277+
278+
@classmethod
279+
def from_python(cls, value):
280+
type_or_id, value = value
281+
header_class = cls.build_header()
282+
header = header_class()
283+
header.type_code = int.from_bytes(
284+
cls.type_code,
285+
byteorder=PROTOCOL_BYTE_ORDER
286+
)
287+
try:
288+
length = len(value)
289+
except TypeError:
290+
value = [value]
291+
length = 1
292+
header.length = length
293+
header.type = type_or_id
294+
buffer = bytearray(header)
295+
296+
for x in value:
297+
buffer += infer_from_python(x)
298+
return bytes(buffer)
299+
214300

215301
class Map(GridGainDataType):
216302
"""
217303
Dictionary type, payload-only.
218304
219-
GridGain does not track the order of key-value pairs in its caches, hence
220-
the ordinary Python dict type, not the collections.OrderedDict.
305+
Keys and values in map are independent data objects, but `count`
306+
counts pairs. Very annoying.
221307
"""
222308
_type_name = NAME_MAP
223309
_type_id = TYPE_MAP
@@ -304,11 +390,12 @@ def from_python(cls, value, type_id=None):
304390

305391
class MapObject(Map):
306392
"""
307-
This is a dictionary type. Type conversion hint can be a `HASH_MAP`
308-
(ordinary dict) or `LINKED_HASH_MAP` (collections.OrderedDict).
393+
This is a dictionary type.
309394
310-
Keys and values in map are independent data objects, but `count`
311-
counts pairs. Very annoying.
395+
Represented as tuple(type_id, value).
396+
397+
Type ID can be a :py:attr:`~HASH_MAP` (corresponds to an ordinary `dict`
398+
in Python) or a :py:attr:`~LINKED_HASH_MAP` (`collections.OrderedDict`).
312399
"""
313400
_type_name = NAME_MAP
314401
_type_id = TYPE_MAP

pygridgain/datatypes/internal.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ def _init_python_array_map(cls):
348348
@classmethod
349349
def map_python_type(cls, value):
350350
from pygridgain.datatypes import (
351-
MapObject, ObjectArrayObject, BinaryObject,
351+
MapObject, CollectionObject, BinaryObject,
352352
)
353353

354354
if cls._python_map is None:
@@ -362,7 +362,7 @@ def map_python_type(cls, value):
362362
if value_subtype in cls._python_array_map:
363363
return cls._python_array_map[value_subtype]
364364

365-
# a little heuristics (order may be important)
365+
# a little heuristics (order is important)
366366
if all([
367367
value_subtype is None,
368368
len(value) == 2,
@@ -377,7 +377,9 @@ def map_python_type(cls, value):
377377
isinstance(value[0], int),
378378
is_iterable(value[1]),
379379
]):
380-
return ObjectArrayObject
380+
return CollectionObject
381+
382+
# no default for ObjectArrayObject, sorry
381383

382384
raise TypeError(
383385
'Type `array of {}` is invalid'.format(value_subtype)

tests/test_datatypes.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16+
from collections import OrderedDict
1617
import ctypes
1718
from datetime import datetime, timedelta
1819
import decimal
@@ -118,14 +119,14 @@
118119
((-1, [(6001, 1), (6002, 2), (6003, 3)]), BinaryEnumArrayObject),
119120
120121
# object array
121-
((-1, [1, 2, decimal.Decimal('3')]), None),
122+
((ObjectArrayObject.OBJECT, [1, 2, decimal.Decimal('3')]), ObjectArrayObject),
122123
123124
# collection
124-
((3, [1, 2, 3]), CollectionObject),
125+
((CollectionObject.LINKED_LIST, [1, 2, 3]), None),
125126
126127
# map
127-
((1, {'key': 4, 5: 6.0}), None),
128-
((2, {'key': 4, 5: 6.0}), None),
128+
((MapObject.HASH_MAP, {'key': 4, 5: 6.0}), None),
129+
((MapObject.LINKED_HASH_MAP, OrderedDict([('key', 4), (5, 6.0)])), None),
129130
]
130131
)
131132
def test_put_get_data(client, cache, value, value_hint):

0 commit comments

Comments
 (0)