From b4fb2cae2d1e39269405b5f6d6ffe7cad42c31cf Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Fri, 8 May 2026 14:03:39 -0600 Subject: [PATCH 1/2] Add local SBOL document indexing entry point --- src/buildcompiler/buildcompiler.py | 64 +++++++++++++++++++++++++++--- tests/test_buildcompiler.py | 58 +++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 tests/test_buildcompiler.py diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index c1e646a..57b4b58 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -76,6 +76,33 @@ def __init__( self._index_collections(collections) + @classmethod + def from_local_documents( + cls, + collection_docs: list[sbol2.Document], + design_doc: sbol2.Document | None = None, + ): + """Create a BuildCompiler instance from already-loaded local SBOL documents.""" + compiler = cls.__new__(cls) + compiler.sbh = None + compiler.sbol_doc = sbol2.Document() + compiler.indexed_plasmids = [] + compiler.indexed_backbones = [] + compiler.restriction_enzyme_implementations = [] + compiler.ligase_implementations = [] + + if design_doc is not None: + compiler.index_document(design_doc) + + for collection_doc in collection_docs: + compiler.index_document(collection_doc) + + return compiler + + def index_document(self, collection_doc: sbol2.Document): + self._merge_document(collection_doc) + self._index_current_document() + def _index_collections(self, collections: List[str]): """Index input collections into plasmids and backbones. @@ -91,9 +118,34 @@ def _index_collections(self, collections: List[str]): for uri in collections: print(f"Indexing collection: {uri}") self.sbh.pull(uri, self.sbol_doc) + self._index_current_document() + + def _merge_document(self, source_doc: sbol2.Document): + try: + self.sbol_doc.appendString(source_doc.writeString()) + except RuntimeError as exc: + if "SBOL_ERROR_URI_NOT_UNIQUE" in str(exc): + for top_level in source_doc.SBOLObjects.values(): + if top_level.identity in self.sbol_doc: + continue + self.sbol_doc.add(top_level.copy()) + else: + raise + + def _resolve_object(self, uri: str): + existing = self.sbol_doc.find(uri) + if existing is not None: + return existing + if self.sbh is None: + raise ValueError( + f"Referenced SBOL object not found in local documents: {uri}. " + "Local mode does not pull from SynBioHub." + ) + return get_or_pull(self.sbol_doc, self.sbh, uri) + def _index_current_document(self): for implementation in self.sbol_doc.implementations: - built_object = get_or_pull(self.sbol_doc, self.sbh, implementation.built) + built_object = self._resolve_object(implementation.built) if ( type(built_object) is sbol2.ModuleDefinition and ORGANISM_STRAIN in built_object.roles @@ -627,7 +679,7 @@ def _extract_plasmids_from_strain( ): # strain_implementation = optional param for plasmid in strain.functionalComponents: - plasmid_definition = get_or_pull(doc, self.sbh, plasmid.definition) + plasmid_definition = self._resolve_object(plasmid.definition) if ENGINEERED_PLASMID in plasmid_definition.roles: existing = self._get_indexed_plasmid( @@ -761,7 +813,7 @@ def _extract_design_parts( """ component_list = [c for c in design.getInSequentialOrder()] return [ - get_or_pull(self.sbol_doc, self.sbh, component.definition) + self._resolve_object(component.definition) for component in component_list ] @@ -775,7 +827,7 @@ def _get_abstract_design(self) -> sbol2.ComponentDefinition: continue component_definitions = [ - get_or_pull(self.sbol_doc, self.sbh, component.definition) + self._resolve_object(component.definition) for component in definition.getInSequentialOrder() ] if any( @@ -837,7 +889,7 @@ def _is_single_part(self, plasmid: sbol2.ComponentDefinition) -> bool: return False else: component_definitions = [ - get_or_pull(self.sbol_doc, self.sbh, comp.definition) + self._resolve_object(comp.definition) for comp in plasmid.getInSequentialOrder() ] @@ -1017,7 +1069,7 @@ def _expand_combinatorial_derivation( derivation: sbol2.CombinatorialDerivation, product_name_prefix: str = None, ) -> list[sbol2.ComponentDefinition]: - master_template = get_or_pull(self.sbol_doc, self.sbh, derivation.masterTemplate) + master_template = self._resolve_object(derivation.masterTemplate) component_variants = extract_combinatorial_design_parts( master_template, self.sbol_doc, self.sbol_doc ) diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py new file mode 100644 index 0000000..4acd224 --- /dev/null +++ b/tests/test_buildcompiler.py @@ -0,0 +1,58 @@ +import inspect +import os +import sys +import unittest +from unittest.mock import patch + +import sbol2 + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) + +from buildcompiler.buildcompiler import BuildCompiler +from buildcompiler.constants import RESTRICTION_ENZYME + + +class TestBuildCompilerLocalIndexing(unittest.TestCase): + def test_constructor_signature_unchanged(self): + params = list(inspect.signature(BuildCompiler.__init__).parameters.keys()) + self.assertEqual( + params, + ['self', 'collections', 'sbh_registry', 'auth_token', 'sbol_doc'], + ) + + def test_from_local_documents_indexes_without_partshop(self): + collection_doc = sbol2.Document() + enzyme = sbol2.ComponentDefinition('BsaI') + enzyme.types = [sbol2.BIOPAX_PROTEIN] + enzyme.roles = [RESTRICTION_ENZYME] + collection_doc.add(enzyme) + implementation = sbol2.Implementation('BsaI_impl') + implementation.built = enzyme.identity + collection_doc.add(implementation) + + with patch('sbol2.PartShop', side_effect=AssertionError('PartShop should not be constructed in local mode')): + compiler = BuildCompiler.from_local_documents([collection_doc]) + + self.assertIsNone(compiler.sbh) + self.assertEqual(len(compiler.indexed_plasmids), 0) + self.assertEqual(len(compiler.indexed_backbones), 0) + self.assertEqual(len(compiler.restriction_enzyme_implementations), 1) + self.assertIsInstance(compiler.restriction_enzyme_implementations, list) + self.assertIsInstance(compiler.ligase_implementations, list) + + def test_local_mode_raises_when_reference_missing(self): + doc = sbol2.Document() + strain = sbol2.ModuleDefinition('strain_missing_ref') + strain.roles = ['https://identifiers.org/ncit/NCIT:C14419'] + fc = strain.functionalComponents.create('plasmid_ref') + fc.definition = 'https://example.org/missing_plasmid/1' + doc.add(strain) + + with self.assertRaises(ValueError) as ctx: + BuildCompiler.from_local_documents([doc]) + + self.assertIn('Local mode does not pull from SynBioHub', str(ctx.exception)) + + +if __name__ == '__main__': + unittest.main() From 769bd06c4f1a12c79d6ff15494fb0658f9b85671 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Mon, 18 May 2026 09:33:02 -0600 Subject: [PATCH 2/2] Fix local document indexing to avoid duplicate re-indexing --- src/buildcompiler/buildcompiler.py | 33 ++++++++++++++++++++++-------- tests/test_buildcompiler.py | 27 +++++++++++++++++++++++- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index 57b4b58..47a1838 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -101,7 +101,7 @@ def from_local_documents( def index_document(self, collection_doc: sbol2.Document): self._merge_document(collection_doc) - self._index_current_document() + self._index_document_objects(collection_doc) def _index_collections(self, collections: List[str]): """Index input collections into plasmids and backbones. @@ -144,7 +144,14 @@ def _resolve_object(self, uri: str): return get_or_pull(self.sbol_doc, self.sbh, uri) def _index_current_document(self): - for implementation in self.sbol_doc.implementations: + self._index_document_objects(self.sbol_doc) + + def _append_implementation_once(self, implementations: list, implementation): + if not any(existing.identity == implementation.identity for existing in implementations): + implementations.append(implementation) + + def _index_document_objects(self, source_doc: sbol2.Document): + for implementation in source_doc.implementations: built_object = self._resolve_object(implementation.built) if ( type(built_object) is sbol2.ModuleDefinition @@ -162,7 +169,9 @@ def _index_current_document(self): self.indexed_plasmids, built_object ) if existing_plasmid: - existing_plasmid.plasmid_implementations.append(implementation) + self._append_implementation_once( + existing_plasmid.plasmid_implementations, implementation + ) else: self.indexed_plasmids.append( Plasmid( @@ -174,7 +183,9 @@ def _index_current_document(self): self.indexed_backbones, built_object ) if existing_backbone: - existing_backbone.plasmid_implementations.append(implementation) + self._append_implementation_once( + existing_backbone.plasmid_implementations, implementation + ) else: self.indexed_backbones.append( Plasmid( @@ -183,15 +194,19 @@ def _index_current_document(self): ) elif sbol2.BIOPAX_PROTEIN in built_object.types: if RESTRICTION_ENZYME in built_object.roles: - self.restriction_enzyme_implementations.append(implementation) + self._append_implementation_once( + self.restriction_enzyme_implementations, implementation + ) elif LIGASE in built_object.roles: - self.ligase_implementations.append(implementation) + self._append_implementation_once( + self.ligase_implementations, implementation + ) - for strain in self.sbol_doc.moduleDefinitions: + for strain in source_doc.moduleDefinitions: if ORGANISM_STRAIN in strain.roles: self._extract_plasmids_from_strain(strain, None, self.sbol_doc) - for definition in self.sbol_doc.componentDefinitions: + for definition in source_doc.componentDefinitions: self._sort_plasmid_components(definition, self.sbol_doc) def domestication( @@ -818,7 +833,7 @@ def _extract_design_parts( ] def _get_abstract_design(self) -> sbol2.ComponentDefinition: - for definition in self.sbol_doc.componentDefinitions: + for definition in source_doc.componentDefinitions: if ( ENGINEERED_PLASMID in definition.roles or PLASMID_CLONING_VECTOR in definition.roles diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index 4acd224..65f124d 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -9,7 +9,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) from buildcompiler.buildcompiler import BuildCompiler -from buildcompiler.constants import RESTRICTION_ENZYME +from buildcompiler.constants import LIGASE, RESTRICTION_ENZYME class TestBuildCompilerLocalIndexing(unittest.TestCase): @@ -40,6 +40,31 @@ def test_from_local_documents_indexes_without_partshop(self): self.assertIsInstance(compiler.restriction_enzyme_implementations, list) self.assertIsInstance(compiler.ligase_implementations, list) + + def test_from_local_documents_does_not_reindex_prior_documents(self): + restriction_doc = sbol2.Document() + enzyme = sbol2.ComponentDefinition('BsaI') + enzyme.types = [sbol2.BIOPAX_PROTEIN] + enzyme.roles = [RESTRICTION_ENZYME] + restriction_doc.add(enzyme) + restriction_impl = sbol2.Implementation('BsaI_impl') + restriction_impl.built = enzyme.identity + restriction_doc.add(restriction_impl) + + ligase_doc = sbol2.Document() + ligase = sbol2.ComponentDefinition('T4Ligase') + ligase.types = [sbol2.BIOPAX_PROTEIN] + ligase.roles = [LIGASE] + ligase_doc.add(ligase) + ligase_impl = sbol2.Implementation('T4Ligase_impl') + ligase_impl.built = ligase.identity + ligase_doc.add(ligase_impl) + + compiler = BuildCompiler.from_local_documents([restriction_doc, ligase_doc]) + + self.assertEqual(len(compiler.restriction_enzyme_implementations), 1) + self.assertEqual(len(compiler.ligase_implementations), 1) + def test_local_mode_raises_when_reference_missing(self): doc = sbol2.Document() strain = sbol2.ModuleDefinition('strain_missing_ref')