scim2-server/scim2_server/backend.py at 15c4f4f7d4f98f5dad5452b48a623bda8f32e260 · python-scim/scim2-server · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
import dataclasses
import datetime
import operator
import pickle
import uuid
from threading import Lock
from typing import Union

from scim2_filter_parser import lexer
from scim2_filter_parser.parser import SCIMParser
from scim2_models import Attribute
from scim2_models import BaseModel
from scim2_models import CaseExact
from scim2_models import Extension
from scim2_models import Meta
from scim2_models import Resource
from scim2_models import ResourceType
from scim2_models import Schema
from scim2_models import SearchRequest
from scim2_models import Uniqueness
from scim2_models import UniquenessException
from werkzeug.http import generate_etag

from scim2_server.filter import evaluate_filter
from scim2_server.operators import ResolveSortOperator
from scim2_server.utils import get_by_alias


class Backend:
    """The base class for a SCIM provider backend."""

    def __init__(self):
        self.schemas: dict[str, Schema] = {}
        self.resource_types: dict[str, ResourceType] = {}
        self.resource_types_by_endpoint: dict[str, ResourceType] = {}
        self.models_dict: dict[str, BaseModel] = {}

    def __enter__(self):
        """Allow the backend to be used as a context manager.

        This enables support for transactions.
        """
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Exit the transaction."""
        pass

    def register_schema(self, schema: Schema):
        """Register a Schema for use with the backend."""
        self.schemas[schema.id] = schema

    def get_schemas(self):
        """Return all schemas registered with the backend."""
        return self.schemas.values()

    def get_schema(self, schema_id: str) -> Schema | None:
        """Get a schema by its id."""
        return self.schemas.get(schema_id)

    def register_resource_type(self, resource_type: ResourceType):
        """Register a ResourceType for use with the backend.

        The schemas used for the resource and its extensions must have
        been registered with the Backend beforehand.
        """
        if resource_type.schema_ not in self.schemas:
            raise RuntimeError(f"Unknown schema: {resource_type.schema_}")
        for resource_extension in resource_type.schema_extensions or []:
            if resource_extension.schema_ not in self.schemas:
                raise RuntimeError(f"Unknown schema: {resource_extension.schema_}")

        self.resource_types[resource_type.id] = resource_type
        self.resource_types_by_endpoint[resource_type.endpoint.lower()] = resource_type

        extensions = [
            Extension.from_schema(self.get_schema(se.schema_))
            for se in resource_type.schema_extensions or []
        ]
        base_schema = self.get_schema(resource_type.schema_)
        self.models_dict[resource_type.id] = Resource.from_schema(base_schema)
        if extensions:
            self.models_dict[resource_type.id] = self.models_dict[resource_type.id][
                Union[tuple(extensions)]  # noqa: UP007
            ]

    def get_resource_types(self):
        """Return all resource types registered with the backend."""
        return self.resource_types.values()

    def get_resource_type(self, resource_type_id: str) -> ResourceType | None:
        """Return the resource type by its id."""
        return self.resource_types.get(resource_type_id)

    def get_resource_type_by_endpoint(self, endpoint: str) -> ResourceType | None:
        """Return the resource type by its endpoint."""
        return self.resource_types_by_endpoint.get(endpoint.lower())

    def get_model(self, resource_type_id: str) -> BaseModel | None:
        """Return the Pydantic Python model for a given resource type."""
        return self.models_dict.get(resource_type_id)

    def get_models(self):
        """Return all Pydantic Python models for all known resource types."""
        return self.models_dict.values()

    def query_resources(
        self,
        search_request: SearchRequest,
        resource_type_id: str | None = None,
    ) -> tuple[int, list[Resource]]:
        """Query the backend for a set of resources.

        :param search_request: SearchRequest instance describing the
            query.
        :param resource_type_id: ID of the resource type to query. If
            None, all resource types are queried.
        :return: A tuple of "total results" and a List of found
            Resources. The List must contain a copy of resources.
            Mutating elements in the List must not modify the data
            stored in the backend.
        :raises TooManyException: If the backend only supports querying
            for one resource type at a time, setting resource_type_id to
            None the backend may raise TooManyException.
        """
        raise NotImplementedError

    def get_resource(self, resource_type_id: str, object_id: str) -> Resource | None:
        """Query the backend for a resources by its ID.

        :param resource_type_id: ID of the resource type to get the
            object from.
        :param object_id: ID of the object to get.
        :return: The resource object if it exists, None otherwise. The
            resource must be a copy, modifying it must not change the
            data stored in the backend.
        """
        raise NotImplementedError

    def delete_resource(self, resource_type_id: str, object_id: str) -> bool:
        """Delete a resource.

        :param resource_type_id: ID of the resource type to delete the
            object from.
        :param object_id: ID of the object to delete.
        :return: True if the resource was deleted, False otherwise.
        """
        raise NotImplementedError

    def create_resource(
        self, resource_type_id: str, resource: Resource
    ) -> Resource | None:
        """Create a resource.

        :param resource_type_id: ID of the resource type to create.
        :param resource: Resource to create.
        :return: The created resource. Creation should set system-
            defined attributes (ID, Metadata). May be the same object
            that is passed in.
        """
        raise NotImplementedError

    def update_resource(
        self, resource_type_id: str, resource: Resource
    ) -> Resource | None:
        """Update a resource. The resource is identified by its ID.

        :param resource_type_id: ID of the resource type to update.
        :param resource: Resource to update.
        :return: The updated resource. Updating should update the
            "meta.lastModified" data. May be the same object that is
            passed in.
        """
        raise NotImplementedError


class InMemoryBackend(Backend):
    """An example in-memory backend for the SCIM provider.

    It is not optimized for performance. Many operations are O(n) or
    worse, whereas they would perform better with an actual production
    database in the backend. This is intentional to keep the
    implementation simple.
    """

    @dataclasses.dataclass
    class UniquenessDescriptor:
        """Used to mimic uniqueness constraints e.g. from a SQL database."""

        schema: str | None
        attribute_name: str
        case_exact: bool

        def get_attribute(self, resource: Resource):
            if self.schema is not None:
                schema_field = get_by_alias(type(resource), self.schema)
                resource = getattr(resource, schema_field)

            attribute_field = get_by_alias(type(resource), self.attribute_name)
            result = getattr(resource, attribute_field)
            if not self.case_exact:
                result = result.lower()
            return result

    @classmethod
    def collect_unique_attrs(
        cls, attributes: list[Attribute], schema: str | None
    ) -> list[UniquenessDescriptor]:
        ret = []
        for attr in attributes:
            if attr.uniqueness != Uniqueness.none:
                ret.append(
                    cls.UniquenessDescriptor(
                        schema, attr.name, attr.case_exact == CaseExact.true
                    )
                )
        return ret

    @classmethod
    def collect_resource_unique_attrs(
        cls, resource_type: ResourceType, schemas: dict[str, Schema]
    ) -> list[list[UniquenessDescriptor]]:
        ret = cls.collect_unique_attrs(schemas[resource_type.schema_].attributes, None)
        for extension in resource_type.schema_extensions or []:
            ret.extend(
                InMemoryBackend.collect_unique_attrs(
                    schemas[extension.schema_].attributes, extension.schema_
                )
            )
        return ret

    def __init__(self):
        super().__init__()
        self.resources: list[Resource] = []
        self.unique_attributes: dict[str, list[list[str]]] = {}
        self.lock: Lock = Lock()

    def __enter__(self):
        """See super docs.

        The InMemoryBackend uses a simple Lock to synchronize all
        access.
        """
        super().__enter__()
        self.lock.acquire()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        super().__exit__(exc_type, exc_val, exc_tb)
        self.lock.release()

    def register_resource_type(self, resource_type: ResourceType):
        super().register_resource_type(resource_type)
        self.unique_attributes[resource_type.id] = self.collect_resource_unique_attrs(
            resource_type, self.schemas
        )

    def query_resources(
        self,
        search_request: SearchRequest,
        resource_type_id: str | None = None,
    ) -> tuple[int, list[Resource]]:
        start_index = (search_request.start_index or 1) - 1

        tree = None
        if search_request.filter is not None:
            token_stream = lexer.SCIMLexer().tokenize(search_request.filter)
            tree = SCIMParser().parse(token_stream)

        found_resources = [
            r
            for r in self.resources
            if (resource_type_id is None or r.meta.resource_type == resource_type_id)
            and (tree is None or evaluate_filter(r, tree))
        ]

        if search_request.sort_by is not None:
            descending = search_request.sort_order == SearchRequest.SortOrder.descending
            sort_operator = ResolveSortOperator(str(search_request.sort_by))

            # To ensure that unset attributes are sorted last (when ascending, as defined in the RFC),
            # we have to divide the result set into a set and unset subset.
            unset_values = []
            set_values = []
            for resource in found_resources:
                result = sort_operator(resource)
                if result is None:
                    unset_values.append(resource)
                else:
                    set_values.append((resource, result))

            set_values.sort(key=operator.itemgetter(1), reverse=descending)
            set_values = [value[0] for value in set_values]
            if descending:
                found_resources = unset_values + set_values
            else:
                found_resources = set_values + unset_values

        found_resources = found_resources[start_index:]
        if search_request.count is not None:
            found_resources = found_resources[: search_request.count]
        return len(found_resources), found_resources

    def _get_resource_idx(self, resource_type_id: str, object_id: str) -> int | None:
        return next(
            (
                idx
                for idx, r in enumerate(self.resources)
                if r.meta.resource_type == resource_type_id and r.id == object_id
            ),
            None,
        )

    def get_resource(self, resource_type_id: str, object_id: str) -> Resource | None:
        resource_dict_idx = self._get_resource_idx(resource_type_id, object_id)
        if resource_dict_idx is not None:
            return self.resources[resource_dict_idx].model_copy(deep=True)
        return None

    def delete_resource(self, resource_type_id: str, object_id: str) -> bool:
        found = self.get_resource(resource_type_id, object_id)
        if found:
            self.resources = [
                r
                for r in self.resources
                if not (r.meta.resource_type == resource_type_id and r.id == object_id)
            ]
            return True
        return False

    def create_resource(
        self, resource_type_id: str, resource: Resource
    ) -> Resource | None:
        resource = resource.model_copy(deep=True)
        resource.id = uuid.uuid4().hex
        utcnow = datetime.datetime.now(datetime.timezone.utc)
        resource.meta = Meta(
            resource_type=self.resource_types[resource_type_id].name,
            created=utcnow,
            last_modified=utcnow,
            location="/v2"
            + self.resource_types[resource_type_id].endpoint
            + "/"
            + resource.id,
        )
        self._touch_resource(resource, utcnow)

        for unique_attribute in self.unique_attributes[resource_type_id]:
            new_value = unique_attribute.get_attribute(resource)
            for existing_resource in self.resources:
                if existing_resource.meta.resource_type == resource_type_id:
                    existing_value = unique_attribute.get_attribute(existing_resource)
                    if existing_value == new_value:
                        raise UniquenessException()

        self.resources.append(resource)
        return resource

    @staticmethod
    def _touch_resource(resource: Resource, last_modified: datetime.datetime):
        """Touches a resource (updates last_modified and version).

        Version is generated by hashing last_modified. Another option
        would be to hash the entire resource instead.
        """
        resource.meta.last_modified = last_modified
        etag = generate_etag(pickle.dumps(resource.meta.last_modified))
        resource.meta.version = f'W/"{etag}"'

    def update_resource(
        self, resource_type_id: str, resource: Resource
    ) -> Resource | None:
        found_res_idx = self._get_resource_idx(resource_type_id, resource.id)
        if found_res_idx is not None:
            updated_resource = self.models_dict[resource_type_id].model_validate(
                resource.model_dump()
            )
            self._touch_resource(
                updated_resource, datetime.datetime.now(datetime.timezone.utc)
            )

            for unique_attribute in self.unique_attributes[resource_type_id]:
                new_value = unique_attribute.get_attribute(updated_resource)
                for existing_resource in self.resources:
                    if (
                        existing_resource.meta.resource_type == resource_type_id
                        and existing_resource.id != updated_resource.id
                    ):
                        existing_value = unique_attribute.get_attribute(
                            existing_resource
                        )
                        if existing_value == new_value:
                            raise UniquenessException()

            self.resources[found_res_idx] = updated_resource
            return updated_resource
        return None