Skip to content

Commit c468d85

Browse files
committed
NBL-48 Make correct Python identifiers out of GridGain binary type names
This closes #297
1 parent d7d613a commit c468d85

5 files changed

Lines changed: 97 additions & 4 deletions

File tree

docs/datatypes/parsers.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ with your data, in to some API function as a *type conversion hint*.
131131
|0x1d |`Enum array`_ |iterable/list |:class:`~pygridgain.datatypes.standard.EnumArrayObject` |
132132
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
133133
|0x67 |`Complex object`_ |object |:class:`~pygridgain.datatypes.complex.BinaryObject` |
134-
+-----------+--------------------+-------------------------------+---------------------------------------------------------------- -+
134+
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
135135
|0x1b |`Wrapped data`_ |tuple |:class:`~pygridgain.datatypes.complex.WrappedDataObject` |
136136
+-----------+--------------------+-------------------------------+------------------------------------------------------------------+
137137

docs/examples.rst

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,23 @@ Here you can see how :class:`~pygridgain.binary.GenericObjectMeta` uses
240240
`attrs`_ package internally for creating nice `__init__()` and `__repr__()`
241241
methods.
242242

243-
You can reuse the autogenerated class for subsequent writes:
243+
In this case the autogenerated dataclass's name `Person` is exactly matches
244+
the type name of the Complex object it represents (the content of the
245+
:py:attr:`~pygridgain.datatypes.base.GridGainDataTypeProps.type_name`
246+
property). But when Complex object's class name contains characters, that
247+
can not be used in a Python identifier, for example:
248+
249+
- `.`, when fully qualified Java class names are used,
250+
- `$`, a common case for Scala classes,
251+
- `+`, internal class name separator in C#,
252+
253+
then `pygridgain` can not maintain this match. In such cases `pyignite` tries
254+
to sanitize a type name to derive a “good” dataclass name from it.
255+
256+
If your code needs consistent naming between the server and the client, make
257+
sure that your GridGain cluster is configured to use `simple class names`_.
258+
259+
Anyway, you can reuse the autogenerated dataclass for subsequent writes:
244260

245261
.. literalinclude:: ../examples/binary_basics.py
246262
:language: python
@@ -595,3 +611,4 @@ with the following message:
595611
.. _openssl: https://www.openssl.org/docs/manmaster/man1/openssl.html
596612
.. _Authentication: https://apacheignite.readme.io/docs/advanced-security#section-authentication
597613
.. _attrs: https://pypi.org/project/attrs/
614+
.. _simple class names: https://apacheignite.readme.io/docs/binary-marshaller#binary-name-mapper-and-binary-id-mapper

pygridgain/client.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
from collections import defaultdict, OrderedDict
4444
import random
45+
import re
4546
from typing import Dict, Iterable, List, Optional, Tuple, Type, Union
4647

4748
from .api.binary import get_binary_type, put_binary_type
@@ -56,7 +57,8 @@
5657
BinaryTypeError, CacheError, ReconnectError, SQLError, connection_errors,
5758
)
5859
from .utils import (
59-
entity_id, schema_id, select_version, status_to_exception, is_iterable
60+
capitalize, entity_id, schema_id, process_delimiter, select_version,
61+
status_to_exception, is_iterable,
6062
)
6163
from .binary import GenericObjectMeta
6264

@@ -82,6 +84,10 @@ class Client:
8284
_current_node: int = None
8385
_nodes: List[Connection] = None
8486

87+
# used for Complex object data class names sanitizing
88+
_identifier = re.compile(r'[^0-9a-zA-Z_.+$]', re.UNICODE)
89+
_ident_start = re.compile(r'^[^a-zA-Z_]+', re.UNICODE)
90+
8591
affinity_version: Optional[Tuple] = None
8692
protocol_version: Optional[Tuple] = None
8793

@@ -383,11 +389,40 @@ def _sync_binary_registry(self, type_id: int):
383389
for schema in type_info['schemas']:
384390
if not self._registry[type_id].get(schema_id(schema), None):
385391
data_class = self._create_dataclass(
386-
type_info['type_name'],
392+
self._create_type_name(type_info['type_name']),
387393
schema,
388394
)
389395
self._registry[type_id][schema_id(schema)] = data_class
390396

397+
@classmethod
398+
def _create_type_name(cls, type_name: str) -> str:
399+
"""
400+
Creates Python data class name from GridGain binary type name.
401+
402+
Handles all the special cases found in
403+
`java.org.apache.ignite.binary.BinaryBasicNameMapper.simpleName()`.
404+
Tries to adhere to PEP8 along the way.
405+
"""
406+
407+
# general sanitizing
408+
type_name = cls._identifier.sub('', type_name)
409+
410+
# - name ending with '$' (Scala)
411+
# - name + '$' + some digits (anonymous class)
412+
# - '$$Lambda$' in the middle
413+
type_name = process_delimiter(type_name, '$')
414+
415+
# .NET outer/inner class delimiter
416+
type_name = process_delimiter(type_name, '+')
417+
418+
# Java fully qualified class name
419+
type_name = process_delimiter(type_name, '.')
420+
421+
# start chars sanitizing
422+
type_name = capitalize(cls._ident_start.sub('', type_name))
423+
424+
return type_name
425+
391426
def register_binary_type(
392427
self, data_class: Type, affinity_key_field: str = None,
393428
):

pygridgain/utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,3 +304,18 @@ def run(self):
304304
if not self.finished.is_set():
305305
self.function(*self.args, **self.kwargs)
306306
self.finished.set()
307+
308+
309+
def capitalize(string: str) -> str:
310+
"""
311+
Capitalizing the string, assuming the first character is a letter.
312+
Does not touch any other character, unlike the `string.capitalize()`.
313+
"""
314+
return string[:1].upper() + string[1:]
315+
316+
317+
def process_delimiter(name: str, delimiter: str) -> str:
318+
"""
319+
Splits the name by delimiter, capitalize each part, merge.
320+
"""
321+
return ''.join([capitalize(x) for x in name.split(delimiter)])

tests/test_binary.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,3 +278,29 @@ class MyBinaryTypeV2(
278278
assert not hasattr(result, 'test_bool')
279279

280280
migrate_cache.destroy()
281+
282+
283+
def test_complex_object_names(client):
284+
"""
285+
Test the ability to work with Complex types, which names contains symbols
286+
not suitable for use in Python identifiers.
287+
"""
288+
type_name = 'Non.Pythonic#type-name$'
289+
key = 'key'
290+
data = 'test'
291+
292+
class NonPythonicallyNamedType(
293+
metaclass=GenericObjectMeta,
294+
type_name=type_name,
295+
schema=OrderedDict([
296+
('field', String),
297+
])
298+
):
299+
pass
300+
301+
cache = client.get_or_create_cache('test_name_cache')
302+
cache.put(key, NonPythonicallyNamedType(field=data))
303+
304+
obj = cache.get(key)
305+
assert obj.type_name == type_name, 'Complex type name mismatch'
306+
assert obj.field == data, 'Complex object data failure'

0 commit comments

Comments
 (0)