From 8cabb91feb20538a83054264a0cc8696c2fc53e9 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Mon, 18 May 2026 09:54:12 -0600 Subject: [PATCH] Split collection pulling from SBOL indexing --- src/buildcompiler/buildcompiler.py | 142 +++++++++++++++-------------- tests/test_buildcompiler.py | 70 +++++++++++++- 2 files changed, 144 insertions(+), 68 deletions(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index 47a1838..cfef2a9 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -42,7 +42,6 @@ ENGINEERED_PLASMID, PLASMID_CLONING_VECTOR, ORGANISM_STRAIN, - PLATING_ACTIVITY_ROLE, ) @@ -103,22 +102,24 @@ def index_document(self, collection_doc: sbol2.Document): self._merge_document(collection_doc) self._index_document_objects(collection_doc) - def _index_collections(self, collections: List[str]): - """Index input collections into plasmids and backbones. + def pull_collection_uris(self, uris: list[str]) -> sbol2.Document: + """Pull SynBioHub collection URIs into ``self.sbol_doc``.""" + for uri in uris: + print(f"Indexing collection: {uri}") + try: + self.sbh.pull(uri, self.sbol_doc) + except Exception as exc: + raise RuntimeError(f"Failed to pull collection URI: {uri}") from exc + return self.sbol_doc - Parses the provided collections (which may contain plasmids, backbones, strains, and enzymes) - and normalizes them into internal Plasmid/enzyme records that remain linked to - their originating strain and implementation definitions. + def index_sbol_document(self, doc: sbol2.Document, source: str = "local"): + """Index plasmids, backbones, and reagents from an SBOL document.""" + self._index_document_objects(doc, source=source) - :param collections: Iterable of user-provided collections/documents. - :type collections: Iterable - :returns: None. Updates ``self.indexed_plasmids`` in place. - :rtype: None - """ - for uri in collections: - print(f"Indexing collection: {uri}") - self.sbh.pull(uri, self.sbol_doc) - self._index_current_document() + def _index_collections(self, collections: List[str]): + """Compatibility wrapper for URI pull + indexing.""" + doc = self.pull_collection_uris(collections) + self.index_sbol_document(doc, source="synbiohub") def _merge_document(self, source_doc: sbol2.Document): try: @@ -144,70 +145,77 @@ def _resolve_object(self, uri: str): return get_or_pull(self.sbol_doc, self.sbh, uri) def _index_current_document(self): - self._index_document_objects(self.sbol_doc) + self._index_document_objects(self.sbol_doc, source="current") def _append_implementation_once(self, implementations: list, implementation): if not any(existing.identity == implementation.identity for existing in implementations): implementations.append(implementation) - def _index_document_objects(self, source_doc: sbol2.Document): + def _index_document_objects(self, source_doc: sbol2.Document, source: str = "local"): for implementation in source_doc.implementations: - built_object = self._resolve_object(implementation.built) - if ( - type(built_object) is sbol2.ModuleDefinition - and ORGANISM_STRAIN in built_object.roles - ): - self._extract_plasmids_from_strain( - built_object, implementation, self.sbol_doc - ) - elif ( - type(built_object) is sbol2.ComponentDefinition - and len(built_object.components) > 1 - ): - if ENGINEERED_PLASMID in built_object.roles: - existing_plasmid = self._get_indexed_plasmid( - self.indexed_plasmids, built_object + self._index_implementation(implementation) + + for strain in source_doc.moduleDefinitions: + self._index_strain_module(strain, implementation=None) + + for definition in self.sbol_doc.componentDefinitions: + self._index_plasmid_or_backbone_definition(definition, implementation=None) + + def _index_implementation(self, implementation: sbol2.Implementation): + built_object = self._resolve_object(implementation.built) + if type(built_object) is sbol2.ModuleDefinition: + self._index_strain_module(built_object, implementation=implementation) + elif type(built_object) is sbol2.ComponentDefinition: + self._index_plasmid_or_backbone_definition( + built_object, implementation=implementation + ) + self._index_reagent_implementation(implementation, built_object) + + def _index_strain_module( + self, strain: sbol2.ModuleDefinition, implementation: sbol2.Implementation | None + ): + if ORGANISM_STRAIN in strain.roles: + self._extract_plasmids_from_strain(strain, implementation, self.sbol_doc) + + def _index_plasmid_or_backbone_definition( + self, definition: sbol2.ComponentDefinition, implementation: sbol2.Implementation | None + ): + if implementation is not None and len(definition.components) > 1: + if ENGINEERED_PLASMID in definition.roles: + existing_plasmid = self._get_indexed_plasmid(self.indexed_plasmids, definition) + if existing_plasmid: + self._append_implementation_once( + existing_plasmid.plasmid_implementations, implementation ) - if existing_plasmid: - self._append_implementation_once( - existing_plasmid.plasmid_implementations, implementation - ) - else: - self.indexed_plasmids.append( - Plasmid( - built_object, None, [implementation], [], self.sbol_doc - ) - ) - elif PLASMID_CLONING_VECTOR in built_object.roles: - existing_backbone = self._get_indexed_plasmid( - self.indexed_backbones, built_object + else: + self.indexed_plasmids.append( + Plasmid(definition, None, [implementation], [], self.sbol_doc) ) - if existing_backbone: - self._append_implementation_once( - existing_backbone.plasmid_implementations, implementation - ) - else: - self.indexed_backbones.append( - Plasmid( - built_object, None, [implementation], [], self.sbol_doc - ) - ) - elif sbol2.BIOPAX_PROTEIN in built_object.types: - if RESTRICTION_ENZYME in built_object.roles: + elif PLASMID_CLONING_VECTOR in definition.roles: + existing_backbone = self._get_indexed_plasmid( + self.indexed_backbones, definition + ) + if existing_backbone: self._append_implementation_once( - self.restriction_enzyme_implementations, implementation + existing_backbone.plasmid_implementations, implementation ) - elif LIGASE in built_object.roles: - self._append_implementation_once( - self.ligase_implementations, implementation + else: + self.indexed_backbones.append( + Plasmid(definition, None, [implementation], [], self.sbol_doc) ) - for strain in source_doc.moduleDefinitions: - if ORGANISM_STRAIN in strain.roles: - self._extract_plasmids_from_strain(strain, None, self.sbol_doc) + self._sort_plasmid_components(definition, self.sbol_doc) - for definition in source_doc.componentDefinitions: - self._sort_plasmid_components(definition, self.sbol_doc) + def _index_reagent_implementation( + self, implementation: sbol2.Implementation, built_object: sbol2.ComponentDefinition + ): + if sbol2.BIOPAX_PROTEIN in built_object.types: + if RESTRICTION_ENZYME in built_object.roles: + self._append_implementation_once( + self.restriction_enzyme_implementations, implementation + ) + elif LIGASE in built_object.roles: + self._append_implementation_once(self.ligase_implementations, implementation) def domestication( self, @@ -833,7 +841,7 @@ def _extract_design_parts( ] def _get_abstract_design(self) -> sbol2.ComponentDefinition: - for definition in source_doc.componentDefinitions: + for definition in self.sbol_doc.componentDefinitions: if ( ENGINEERED_PLASMID in definition.roles or PLASMID_CLONING_VECTOR in definition.roles diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index 65f124d..1606b57 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -2,7 +2,7 @@ import os import sys import unittest -from unittest.mock import patch +from unittest.mock import MagicMock, patch import sbol2 @@ -79,5 +79,73 @@ def test_local_mode_raises_when_reference_missing(self): self.assertIn('Local mode does not pull from SynBioHub', str(ctx.exception)) + + +class TestBuildCompilerCollectionIndexing(unittest.TestCase): + def _make_compiler_without_init(self): + compiler = BuildCompiler.__new__(BuildCompiler) + compiler.sbh = MagicMock() + compiler.sbol_doc = sbol2.Document() + compiler.indexed_plasmids = [] + compiler.indexed_backbones = [] + compiler.restriction_enzyme_implementations = [] + compiler.ligase_implementations = [] + return compiler + + def test_index_sbol_document_local_does_not_pull(self): + compiler = self._make_compiler_without_init() + + enzyme = sbol2.ComponentDefinition('BsaI_local') + enzyme.types = [sbol2.BIOPAX_PROTEIN] + enzyme.roles = [RESTRICTION_ENZYME] + compiler.sbol_doc.add(enzyme) + implementation = sbol2.Implementation('BsaI_local_impl') + implementation.built = enzyme.identity + compiler.sbol_doc.add(implementation) + + compiler.index_sbol_document(compiler.sbol_doc, source='local') + + compiler.sbh.pull.assert_not_called() + self.assertEqual(len(compiler.restriction_enzyme_implementations), 1) + + def test_index_collections_pulls_then_indexes(self): + compiler = self._make_compiler_without_init() + call_order = [] + + def fake_pull(uris): + call_order.append('pull') + return compiler.sbol_doc + + def fake_index(doc, source='local'): + call_order.append(f'index:{source}') + + compiler.pull_collection_uris = fake_pull + compiler.index_sbol_document = fake_index + + compiler._index_collections(['https://example.org/collection']) + + self.assertEqual(call_order, ['pull', 'index:synbiohub']) + + def test_pull_failure_has_uri_context(self): + compiler = self._make_compiler_without_init() + compiler.sbh.pull.side_effect = ValueError('network timeout') + + with self.assertRaises(RuntimeError) as ctx: + compiler.pull_collection_uris(['https://example.org/fail']) + + self.assertIn('Failed to pull collection URI: https://example.org/fail', str(ctx.exception)) + + def test_indexing_failure_is_distinct_from_pull_failure(self): + compiler = self._make_compiler_without_init() + bad_doc = sbol2.Document() + bad_impl = sbol2.Implementation('impl_missing_built') + bad_doc.add(bad_impl) + + with self.assertRaises(Exception) as ctx: + compiler.index_sbol_document(bad_doc, source='local') + + self.assertNotIn('Failed to pull collection URI', str(ctx.exception)) + + if __name__ == '__main__': unittest.main()