From a70c7a50ea48143e5d85a7dfec1b7c4bb99470de Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Thu, 26 Mar 2026 14:33:26 -0600 Subject: [PATCH 01/47] established missing implementation warning framework --- src/buildcompiler/buildcompiler.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index 241b67c..da562c4 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -1,5 +1,6 @@ -import random import sbol2 +import random +import warnings from typing import List, Dict from buildcompiler.plasmid import Plasmid @@ -165,16 +166,19 @@ def _remove_internal_bsai_sites(sequence: str) -> tuple[str, int]: None, ) if bsaI_impl is None: - raise ValueError( - "BsaI Restriction enzyme not found in provided collections. Terminating domestication." + self._create_RE_implementation("BsaI") + warnings.warn( + "BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.", + RuntimeWarning, ) ligase_impl = ( self.ligase_implementations[0] if self.ligase_implementations else None ) if ligase_impl is None: - raise ValueError( - "No appropriate ligase found in provided collections. Terminating domestication." + self._create_ligase_implementation() + warnings.warn( + "No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol." ) dsDNAs = [] @@ -591,3 +595,9 @@ def _is_single_part(self, plasmid: sbol2.ComponentDefinition) -> bool: return True return False + + def _create_RE_implementation(name: str): + pass + + def _create_ligase_implementation(): + pass From 2653177479b66de3256bfe85fa0c9fd4d13c13a6 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 31 Mar 2026 14:01:20 -0600 Subject: [PATCH 02/47] incorrect draft of lvl2 --- notebooks/build_compiler_test.ipynb | 114 ++++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 8 deletions(-) diff --git a/notebooks/build_compiler_test.ipynb b/notebooks/build_compiler_test.ipynb index b9f1e14..d30433f 100644 --- a/notebooks/build_compiler_test.ipynb +++ b/notebooks/build_compiler_test.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 19, "id": "87bdb42e", "metadata": {}, "outputs": [], @@ -14,24 +14,32 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 20, "id": "e60a9c84", "metadata": {}, "outputs": [], "source": [ "design_doc = sbol2.Document()\n", - "design_doc.read(\"../tests/test_files/moclo_parts_circuit.xml\")\n", - "design = extract_toplevel_definition(design_doc)" + "design_doc.read(\"../tests/test_files/ExampleLvl2_design.xml\")" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 21, "id": "90648527", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Indexing collection: https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\n", + "Indexing collection: https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\n" + ] + } + ], "source": [ - "auth = \"51102d98-f852-4386-9ae8-7c5814d679c1\"\n", + "auth = \"b9a7ee3a-5a02-42cd-ad1a-454b68cbe2a1\"\n", "collections = [\n", " \"https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\",\n", " \"https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\",\n", @@ -39,6 +47,96 @@ "buildcompiler = BuildCompiler(collections, \"https://synbiohub.org\", auth, None)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "1712f5ce", + "metadata": {}, + "outputs": [], + "source": [ + "from buildcompiler.plasmid import Plasmid\n", + "from buildcompiler.abstract_translator import get_or_pull\n", + "from typing import List\n", + "from buildcompiler.constants import AMP\n", + "\n", + "\n", + "def _extract_lvl2_design_parts(\n", + " self, design_doc: sbol2.Document\n", + ") -> List[List[sbol2.ComponentDefinition]]:\n", + " \"\"\"\n", + " Returns definitions of level-0 parts grouped by level-1 components.\n", + "\n", + " Args:\n", + " design: :class:`sbol2.Document` containing the design.\n", + "\n", + " Returns:\n", + " A list where each element corresponds to a level-1 component\n", + " and contains a list of its part definitions in sequential order.\n", + " \"\"\"\n", + " result = []\n", + "\n", + " design = extract_toplevel_definition(design_doc)\n", + "\n", + " for lvl1_comp in design.getInSequentialOrder():\n", + " lvl0_ordered = self.sbol_doc.get(lvl1_comp.definition).getInSequentialOrder()\n", + "\n", + " parts = [\n", + " get_or_pull(self.sbol_doc, self.sbh, lvl0_comp.definition)\n", + " for lvl0_comp in lvl0_ordered\n", + " ]\n", + "\n", + " result.append(parts)\n", + "\n", + " return result\n", + "\n", + "\n", + "def assembly_lvl2(\n", + " self_buildcompiler, abstract_design_doc: sbol2.Document, backbone: Plasmid = None\n", + ") -> list[sbol2.ComponentDefinition]:\n", + " \"\"\"Assemble level-2 plasmids for the full design.\n", + "\n", + " Uses the assembled lvl1 plasmids and the current design to assemble\n", + " lvl2 plasmids in the correct order.\n", + "\n", + " :returns: List of assembled lvl2 plasmids.\n", + " :rtype: list[Plasmid]\n", + " :raises LookupError: If compatible plasmids or backbones cannot be found.\n", + " \"\"\"\n", + " # get high level genes, send to assembly_lvl1\n", + " # send original abstract_design to get a new dictionary\n", + " # send new dictionary to _get_backbone or get_compatible plasmids with AMP\n", + " TUs = _extract_lvl2_design_parts(self_buildcompiler, abstract_design_doc)\n", + "\n", + " for lvl1_comp_list in TUs:\n", + " plasmid_dict = self_buildcompiler._construct_plasmid_dict(lvl1_comp_list, AMP)\n", + " print(plasmid_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "3e2272ff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1\n", + "https://synbiohub.org/user/Gon/CIDARMoCloParts/B0032/1\n", + "https://synbiohub.org/user/Gon/CIDARMoCloParts/E0040m_gfp/1\n", + "https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1\n", + "https://synbiohub.org/user/Gon/CIDARMoCloParts/J23116/1\n", + "https://synbiohub.org/user/Gon/CIDARMoCloParts/B0033/1\n", + "https://synbiohub.org/user/Gon/CIDARMoCloParts/E1010m_rfp/1\n", + "https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1\n" + ] + } + ], + "source": [ + "assembly_lvl2(buildcompiler, design_doc)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -72,7 +170,7 @@ "print(buildcompiler.restriction_enzyme_implementations)\n", "print(buildcompiler.ligase_implementations)\n", "\n", - "composite_plasmids = buildcompiler.assembly_lvl1(design, None)" + "# composite_plasmids = buildcompiler.assembly_lvl1(design, None)" ] }, { From 8f8c624435c1f93bbf01e0fb98e0902faa332121 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 31 Mar 2026 17:43:33 -0600 Subject: [PATCH 03/47] resolving original plasmid part overwrite issue in digestion --- src/buildcompiler/sbol2build.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index fc731bf..7a19bdb 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -50,7 +50,9 @@ def __init__( # TODO add fields for activity/agent/plan self.ligase = ligase self.extracted_parts = [] # list of tuples [ComponentDefinition, Sequence] self.source_document = document - self.final_document = sbol2.Document() + self.final_document = ( + sbol2.Document() + ) # TODO change to allow this to be passed in as a parameter self.assembly_activity = initialize_assembly_activity() self.composites = [] @@ -497,8 +499,20 @@ def part_digestion( # find + add original component to product def & annotation for comp in reactant_component_definition.components: if comp.definition == original_part_def_URI: - prod_component_definition.components.add(comp) - part_extract_annotation.component = comp + new_comp = prod_component_definition.components.create(comp.displayId) + new_comp.definition = comp.definition + part_extract_annotation.component = new_comp + + original_cd = document.getComponentDefinition(comp.definition) + seq = document.get(original_cd.sequences[0]) + + new_seq = sbol2.Sequence( + uri=f"{reactant_component_definition.displayId}_extracted_part_seq", + elements=seq.elements, + encoding=seq.encoding, + ) + prod_component_definition.sequences.append(new_seq) + extracts_list.append((new_comp, new_seq)) prod_component_definition.sequenceAnnotations.add(three_prime_overhang_annotation) prod_component_definition.sequenceAnnotations.add(five_prime_overhang_annotation) @@ -1000,6 +1014,9 @@ def ligation( composite_implementation.built = composite_component_definition.identity composite_implementation.wasGeneratedBy = assembly_activity.identity + source_document.add_list( + [composite_component_definition, composite_seq, composite_implementation] + ) final_document.add_list( [composite_component_definition, composite_seq, composite_implementation] ) From 6083612393e8d83dc1f0acd16d99864593ab6ccc Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 31 Mar 2026 19:41:50 -0600 Subject: [PATCH 04/47] unique objects for each lvl1 + passing in final doc for continuity --- src/buildcompiler/buildcompiler.py | 16 +++++-- src/buildcompiler/sbol2build.py | 68 +++++++++++++++--------------- 2 files changed, 48 insertions(+), 36 deletions(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index da562c4..01f49f4 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -278,7 +278,11 @@ def _remove_internal_bsai_sites(sequence: str) -> tuple[str, int]: return domesticated_parts def assembly_lvl1( - self, abstract_design: sbol2.ComponentDefinition, backbone: Plasmid = None + self, + abstract_design: sbol2.ComponentDefinition, + final_doc: sbol2.Document = sbol2.Document(), + product_name: str = None, + backbone: Plasmid = None, ) -> list[sbol2.ComponentDefinition]: """Assemble level-1 plasmids for each gene/transcriptional unit. @@ -322,9 +326,15 @@ def assembly_lvl1( ) assembly = Assembly( - compatible_plasmids, backbone, bsaI_impl, ligase_impl, self.sbol_doc + compatible_plasmids, + backbone, + bsaI_impl, + ligase_impl, + self.sbol_doc, + final_doc, + product_name, ) - composite_plasmids, product_doc = assembly.run() + composite_plasmids, product_doc = assembly.run() # TODO upload product_doc? self.indexed_plasmids.extend(composite_plasmids) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index 7a19bdb..4298c18 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -42,22 +42,23 @@ def __init__( # TODO add fields for activity/agent/plan backbone_plasmid: Plasmid, restriction_enzyme: sbol2.Implementation, # TODO search for implementation in document, or domesticate the RE ligase: sbol2.Implementation, - document: sbol2.Document, + source_document: sbol2.Document, + final_document: sbol2.Document, + composite_prefix: str = "composite", ): self.part_plasmids = part_plasmids self.backbone = backbone_plasmid self.restriction_enzyme = restriction_enzyme self.ligase = ligase self.extracted_parts = [] # list of tuples [ComponentDefinition, Sequence] - self.source_document = document - self.final_document = ( - sbol2.Document() - ) # TODO change to allow this to be passed in as a parameter - self.assembly_activity = initialize_assembly_activity() + self.source_document = source_document + self.final_document = final_document + self.composite_prefix = composite_prefix + self.assembly_activity = self.initialize_assembly_activity() self.composites = [] def run( - self, include_extracted_parts=False + self, include_extracted_parts: bool = False ) -> List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]: """Runs full assembly simulation. @@ -98,6 +99,7 @@ def run( self.composites = ligation( self.extracted_parts, self.assembly_activity, + self.composite_prefix, self.source_document, self.final_document, self.ligase, @@ -118,6 +120,27 @@ def run( return composite_plasmid_objs, self.final_document + def initialize_assembly_activity(self): + activity = sbol2.Activity(f"{self.composite_prefix}_assembly") + + activity.name = "DNA Assembly" + activity.types = "http://sbols.org/v2#build" + + activity_association = sbol2.Association("assemble_") + + assembly_plan = sbol2.Plan("assembly_plan") + + assembly_plan.description = "MoClo DNA Assembly With Opentrons OT2" + + activity_association.plan = assembly_plan + + activity_agent = sbol2.Agent("BuildCompiler") + activity_association.agent = activity_agent + + activity.associations = [activity_association] + + return activity + def rebase_restriction_enzyme(name: str, **kwargs) -> sbol2.ComponentDefinition: """Creates an ComponentDefinition Restriction Enzyme Component from rebase. @@ -749,6 +772,7 @@ def number_to_suffix(n): def ligation( reactants: List[sbol2.ComponentDefinition], assembly_activity: sbol2.Activity, + composite_prefix: str, source_document: sbol2.Document, final_document: sbol2.Document, ligase: sbol2.Implementation, @@ -980,10 +1004,12 @@ def ligation( # create dna component and sequence composite_component_definition, composite_seq = ( dna_componentdefinition_with_sequence( - f"composite_{composite_number}", composite_sequence_str, molecule=True + f"{composite_prefix}_{composite_number}", + composite_sequence_str, + molecule=True, ) ) - composite_component_definition.name = f"composite_{composite_number}" + composite_component_definition.name = f"{composite_prefix}_{composite_number}" composite_component_definition.addRole(ENGINEERED_REGION) composite_component_definition.addType(CIRCULAR) @@ -1004,8 +1030,6 @@ def ligation( prev_part_extract = comp - # _create_precedes_restriction(composite_component_definition, prev_part_extract, composite_component_definition.components[0]) # final component precedes first component; defining circular order - composite_component_definition.sequenceAnnotations = anno_list composite_implementation = sbol2.Implementation( @@ -1060,28 +1084,6 @@ def add_object_to_doc( raise e -def initialize_assembly_activity(): - activity = sbol2.Activity("assembly") - - activity.name = "DNA Assembly" - activity.types = "http://sbols.org/v2#build" - - activity_association = sbol2.Association("assemble_") - - assembly_plan = sbol2.Plan("assembly_plan") - - assembly_plan.description = "MoClo DNA Assembly With Opentrons OT2" - - activity_association.plan = assembly_plan - - activity_agent = sbol2.Agent("BuildCompiler") - activity_association.agent = activity_agent - - activity.associations = [activity_association] - - return activity - - def _create_precedes_restriction( parent_definition: sbol2.ComponentDefinition, subject: sbol2.Component, From 7f68281fdcbf1349c70f84f810ab9f9ae52e74de Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 31 Mar 2026 19:42:22 -0600 Subject: [PATCH 05/47] only grab first and last fusion site from new composite --- src/buildcompiler/plasmid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/buildcompiler/plasmid.py b/src/buildcompiler/plasmid.py index 9b81dfe..d8950ec 100644 --- a/src/buildcompiler/plasmid.py +++ b/src/buildcompiler/plasmid.py @@ -36,7 +36,7 @@ def _match_fusion_sites(self, doc: sbol2.document) -> List[str]: fusion_sites.append(key) fusion_sites.sort() - return fusion_sites + return [fusion_sites[0], fusion_sites[-1]] def _get_antibiotic_resistance(self, doc: sbol2.Document) -> str: for component in ( From 53716c04e85a800d284db8f09d6ccff763bc28ad Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Thu, 2 Apr 2026 16:24:16 -0600 Subject: [PATCH 06/47] codex check --- src/buildcompiler/abstract_translator.py | 2 +- src/buildcompiler/plasmid.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/buildcompiler/abstract_translator.py b/src/buildcompiler/abstract_translator.py index 5c2ef02..5765123 100644 --- a/src/buildcompiler/abstract_translator.py +++ b/src/buildcompiler/abstract_translator.py @@ -90,7 +90,7 @@ def extract_fusion_sites( A list of fusion site component definitions. """ fusion_sites = [] - for component in plasmid.components: + for component in plasmid.getInSequentialOrder(): definition = doc.getComponentDefinition(component.definition) if RESTRICTION_ENZYME_ASSEMBLY_SCAR in definition.roles: fusion_sites.append(definition) diff --git a/src/buildcompiler/plasmid.py b/src/buildcompiler/plasmid.py index d8950ec..1ebb600 100644 --- a/src/buildcompiler/plasmid.py +++ b/src/buildcompiler/plasmid.py @@ -35,8 +35,8 @@ def _match_fusion_sites(self, doc: sbol2.document) -> List[str]: if seq == sequence.upper(): fusion_sites.append(key) - fusion_sites.sort() - return [fusion_sites[0], fusion_sites[-1]] + # fusion_sites.sort() + return fusion_sites def _get_antibiotic_resistance(self, doc: sbol2.Document) -> str: for component in ( From 1ad96f2e87f6a4ff6ee13dadcf2d7ad2f05c1e68 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Mon, 6 Apr 2026 13:22:59 -0600 Subject: [PATCH 07/47] backbone now listed last in product --- src/buildcompiler/sbol2build.py | 36 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index 4298c18..06958de 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -857,6 +857,7 @@ def ligation( insert_sequence_uri = combination[0].sequences[0] insert_sequence = source_document.getSequence(insert_sequence_uri).elements remaining_parts = list(combination[1:]) + insert_3prime_match_id = None it = 1 while remaining_parts: remaining_parts_before = len(remaining_parts) @@ -869,27 +870,31 @@ def ligation( .lower() == insert_sequence[-fusion_site_length:].lower() ): - insert_sequence = ( - insert_sequence[:-fusion_site_length] - + source_document.getSequence(part_sequence_uri).elements - ) - list_of_parts_per_composite.append( - part - ) # add sequence annotation here, index based on insert_sequence - remaining_parts.remove(part) - # match insert sequence 3' to part 5' + if ( + len(remaining_parts) == 1 + and part.identity == insert_3prime_match_id + ): # check flag and match backbone 5' on final part 3' + insert_sequence = ( + insert_sequence[:-fusion_site_length] + + source_document.getSequence(part_sequence_uri).elements + ) + list_of_parts_per_composite.append(part) + remaining_parts.remove(part) + elif len(remaining_parts) > 1: + insert_sequence = ( + insert_sequence[:-fusion_site_length] + + source_document.getSequence(part_sequence_uri).elements + ) + list_of_parts_per_composite.append(part) + remaining_parts.remove(part) + # match backbone 5' to insert sequence 3', set flag elif ( source_document.getSequence(part_sequence_uri) .elements[-fusion_site_length:] .lower() == insert_sequence[:fusion_site_length].lower() ): - insert_sequence = ( - source_document.getSequence(part_sequence_uri).elements - + insert_sequence[fusion_site_length:] - ) - list_of_parts_per_composite.insert(0, part) - remaining_parts.remove(part) + insert_3prime_match_id = part.identity remaining_parts_after = len(remaining_parts) if remaining_parts_before == remaining_parts_after: @@ -1041,6 +1046,7 @@ def ligation( source_document.add_list( [composite_component_definition, composite_seq, composite_implementation] ) + final_document.add_list( [composite_component_definition, composite_seq, composite_implementation] ) From fd65c9a8ce5c1c7087db86c8ccc8909724b01bdf Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 7 Apr 2026 16:08:31 -0600 Subject: [PATCH 08/47] type correction on ligation products --- src/buildcompiler/sbol2build.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index 06958de..54b08ef 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -10,7 +10,6 @@ DNA_TYPES, ENGINEERED_INSERT, ENGINEERED_PLASMID, - ENGINEERED_REGION, FIVE_PRIME_OVERHANG, FUSION_SITES, LINEAR, @@ -1015,7 +1014,7 @@ def ligation( ) ) composite_component_definition.name = f"{composite_prefix}_{composite_number}" - composite_component_definition.addRole(ENGINEERED_REGION) + composite_component_definition.addRole(ENGINEERED_PLASMID) composite_component_definition.addType(CIRCULAR) prev_part_extract = None From ee7451f222b52a5cb2b53433c5717e4cb8677ab7 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 7 Apr 2026 16:40:51 -0600 Subject: [PATCH 09/47] function to simplify complex plasmid part representation to a scar-TU-scar-bb representation --- src/buildcompiler/buildcompiler.py | 212 ++++++++++++++++++++++++++++- 1 file changed, 209 insertions(+), 3 deletions(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index 01f49f4..82b4e72 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -1,7 +1,7 @@ import sbol2 import random import warnings -from typing import List, Dict +from typing import List, Dict, Tuple from buildcompiler.plasmid import Plasmid from buildcompiler.sbol2build import Assembly, dna_componentdefinition_with_sequence @@ -11,10 +11,12 @@ ) from .constants import ( AMP, + ENGINEERED_REGION, KAN, FUSION_SITES, LIGASE, PART_ROLES, + PLASMID_VECTOR, RESTRICTION_ENZYME, RESTRICTION_ENZYME_ASSEMBLY_SCAR, ENGINEERED_PLASMID, @@ -320,7 +322,7 @@ def assembly_lvl1( ) ligase_impl = self.ligase_implementations[0] - if bsaI_impl is None: + if ligase_impl is None: raise ValueError( "No appropriate ligase found in provided collections. Terminating assembly." ) @@ -338,7 +340,7 @@ def assembly_lvl1( self.indexed_plasmids.extend(composite_plasmids) - return composite_plasmids + return composite_plasmids, product_doc # TODO: Create a SBOL representation of the assembly process, updating the SBOL Document. # Using he selected parts create the representation, you need Plasmids, BsaI and T4 Ligase. @@ -606,6 +608,210 @@ def _is_single_part(self, plasmid: sbol2.ComponentDefinition) -> bool: return False + def _encapsulate_TU( + self, plasmid: Plasmid + ) -> Tuple[Plasmid, List[sbol2.ComponentDefinition]]: + """ + Collapse a detailed plasmid with a transcriptional unit (pro, rbs, cds, terminator) + into a simplified representation: + + fusion_site_L -> TU_gene -> fusion_site_R -> backbone + + Returns + ------- + Tuple[Plasmid, List[ComponentDefinition]] + simplified plasmid and any new component definitions created + """ + new_defs = [] + plasmid_def = plasmid.plasmid_definition + + fusion_left, fusion_right = plasmid.fusion_sites + left_seq = FUSION_SITES[fusion_left] + right_seq = FUSION_SITES[fusion_right] + + left_def = None + right_def = None + backbone_def = None + promoter = None + terminator = None + + # scan subcomponents for pro and term to establish range + get backbone + for comp in plasmid_def.components: + comp_def = self.sbol_doc.get(comp.definition) + + # find fusion sites of interest + if RESTRICTION_ENZYME_ASSEMBLY_SCAR in comp_def.roles: + seq_obj = self.sbol_doc.get(comp_def.sequences[0]) + if seq_obj.elements == left_seq: + left_def = comp_def + continue + if seq_obj.elements == right_seq: + right_def = comp_def + continue + elif PLASMID_VECTOR in comp_def.roles: + backbone_def = comp_def + elif sbol2.SO_PROMOTER in comp_def.roles: + promoter = comp + elif sbol2.SO_TERMINATOR in comp_def.roles: + terminator = comp + + if promoter is None or terminator is None: + raise ValueError("Could not locate promoter or terminator in plasmid TU") + + comp_dict = {c.identity: c for c in plasmid_def.components} + + follows = {} + for ( + sc + ) in plasmid_def.sequenceConstraints: # TODO replace with getInSequentialOrder? + if sc.restriction == sbol2.SBOL_RESTRICTION_PRECEDES: + subject_comp = comp_dict[sc.subject] + object_comp = comp_dict[sc.object] + follows[subject_comp] = object_comp + + old_tu_components = [] + curr_comp = promoter + + while True: + old_tu_components.append(curr_comp) + + if curr_comp.identity == terminator.identity: + break + + if curr_comp not in follows: + raise ValueError("Broken sequence constraint chain in TU") + + curr_comp = follows[curr_comp] + + tu_def = sbol2.ComponentDefinition(plasmid_def.displayId + "_TU") + tu_def.roles = [ENGINEERED_REGION] + + self.sbol_doc.add(tu_def) + new_defs.append(tu_def) + + # map old components to new ones + comp_map = {} + + for comp in old_tu_components: + new_comp = tu_def.components.create(comp.displayId) + new_comp.definition = comp.definition + comp_map[comp.identity] = new_comp.identity + + # Copy sequence annotations inside TU + for sa in plasmid_def.sequenceAnnotations: + if sa.component in comp_map: + new_sa = tu_def.sequenceAnnotations.create(sa.displayId) + new_sa.component = comp_map[sa.component] + + for loc in sa.locations: + new_range = sbol2.Range( + uri=loc.displayId, start=loc.start, end=loc.end + ) + + if hasattr(loc, "orientation"): + new_range.orientation = loc.orientation + + new_sa.locations.add(new_range) + + # Copy sequence constraints + for sc in plasmid_def.sequenceConstraints: + if sc.subject in comp_map and sc.object in comp_map: + new_sc = tu_def.sequenceConstraints.create(sc.displayId) + new_sc.subject = comp_map[sc.subject] + new_sc.object = comp_map[sc.object] + new_sc.restriction = sc.restriction + + # Build simplified plasmid definition + simple_plasmid_def = sbol2.ComponentDefinition( + plasmid_def.displayId + "_simple" + ) + self.sbol_doc.addComponentDefinition(simple_plasmid_def) + new_defs.append(simple_plasmid_def) + + simple_plasmid_def.types = list(plasmid_def.types) + simple_plasmid_def.roles = list(plasmid_def.roles) + + fusion_left_comp = simple_plasmid_def.components.create("fusion_left") + fusion_left_comp.definition = left_def.identity + + tu_comp = simple_plasmid_def.components.create("TU") + tu_comp.definition = tu_def.identity + + fusion_right_comp = simple_plasmid_def.components.create("fusion_right") + fusion_right_comp.definition = right_def.identity + + backbone_comp = None + if backbone_def: + backbone_comp = simple_plasmid_def.components.create("backbone") + backbone_comp.definition = backbone_def.identity + + # Sequence Constraints (ordering) + constraint_counter = 0 + + def add_precedes(subj, obj): + nonlocal constraint_counter + sc = simple_plasmid_def.sequenceConstraints.create( + f"constraint_{constraint_counter}" + ) + sc.subject = subj.identity + sc.object = obj.identity + sc.restriction = sbol2.SBOL_RESTRICTION_PRECEDES + constraint_counter += 1 + + add_precedes(fusion_left_comp, tu_comp) + add_precedes(tu_comp, fusion_right_comp) + + if backbone_comp: + add_precedes(fusion_right_comp, backbone_comp) + + component_map = { + left_def.identity: fusion_left_comp.identity, + tu_def.identity: tu_comp.identity, + right_def.identity: fusion_right_comp.identity, + } + + if backbone_def: + component_map[backbone_def.identity] = backbone_comp.identity + + for sa in plasmid_def.sequenceAnnotations: + comp_uri = sa.component + + if comp_uri not in component_map: + continue + + if len(sa.locations) == 0: + continue + + new_sa = simple_plasmid_def.sequenceAnnotations.create(sa.displayId) + new_sa.component = component_map[comp_uri] + + for loc in sa.locations: + if isinstance(loc, sbol2.Range): + new_loc = new_sa.locations.createRange(loc.displayId) + new_loc.start = loc.start + new_loc.end = loc.end + new_loc.orientation = loc.orientation + + elif isinstance(loc, sbol2.Cut): + new_loc = new_sa.locations.createCut(loc.displayId) + new_loc.at = loc.at + new_loc.orientation = loc.orientation + + else: + new_loc = new_sa.locations.createGenericLocation(loc.displayId) + new_loc.orientation = loc.orientation + + # Construct new plasmid object + new_plasmid = Plasmid( + simple_plasmid_def, + plasmid.strain_definitions[0], + plasmid.plasmid_implementations, + plasmid.strain_implementations, + self.sbol_doc, + ) + + return new_plasmid, new_defs + def _create_RE_implementation(name: str): pass From 5640cd8973722fbb04e58086f99245753ef34308 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Wed, 8 Apr 2026 02:12:58 -0600 Subject: [PATCH 10/47] intermediate w/ annos not working --- src/buildcompiler/buildcompiler.py | 155 +++++++++++++++++++++-------- 1 file changed, 115 insertions(+), 40 deletions(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index 82b4e72..71b4be0 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -610,19 +610,22 @@ def _is_single_part(self, plasmid: sbol2.ComponentDefinition) -> bool: def _encapsulate_TU( self, plasmid: Plasmid - ) -> Tuple[Plasmid, List[sbol2.ComponentDefinition]]: + ) -> Tuple[Plasmid, List[sbol2.Identified]]: """ Collapse a detailed plasmid with a transcriptional unit (pro, rbs, cds, terminator) into a simplified representation: - fusion_site_L -> TU_gene -> fusion_site_R -> backbone + fusion_site_L -> TU -> fusion_site_R -> backbone + + Builds new sequences for both the TU and simplified plasmid. Returns ------- - Tuple[Plasmid, List[ComponentDefinition]] - simplified plasmid and any new component definitions created + Tuple[Plasmid, List[Identified]] + simplified plasmid and all new SBOL objects created """ - new_defs = [] + + new_objs = [] plasmid_def = plasmid.plasmid_definition fusion_left, fusion_right = plasmid.fusion_sites @@ -639,7 +642,6 @@ def _encapsulate_TU( for comp in plasmid_def.components: comp_def = self.sbol_doc.get(comp.definition) - # find fusion sites of interest if RESTRICTION_ENZYME_ASSEMBLY_SCAR in comp_def.roles: seq_obj = self.sbol_doc.get(comp_def.sequences[0]) if seq_obj.elements == left_seq: @@ -648,10 +650,13 @@ def _encapsulate_TU( if seq_obj.elements == right_seq: right_def = comp_def continue + elif PLASMID_VECTOR in comp_def.roles: backbone_def = comp_def + elif sbol2.SO_PROMOTER in comp_def.roles: promoter = comp + elif sbol2.SO_TERMINATOR in comp_def.roles: terminator = comp @@ -661,9 +666,7 @@ def _encapsulate_TU( comp_dict = {c.identity: c for c in plasmid_def.components} follows = {} - for ( - sc - ) in plasmid_def.sequenceConstraints: # TODO replace with getInSequentialOrder? + for sc in plasmid_def.sequenceConstraints: if sc.restriction == sbol2.SBOL_RESTRICTION_PRECEDES: subject_comp = comp_dict[sc.subject] object_comp = comp_dict[sc.object] @@ -683,13 +686,38 @@ def _encapsulate_TU( curr_comp = follows[curr_comp] + def build_sequence_from_components(components): + seq = "" + ranges = {} + cursor = 1 + + for comp in components: + comp_def = self.sbol_doc.get(comp.definition) + + if not comp_def.sequences: + raise ValueError(f"{comp_def.displayId} has no sequence") + + seq_obj = self.sbol_doc.get(comp_def.sequences[0]) + part_seq = seq_obj.elements + + start = cursor + end = cursor + len(part_seq) - 1 + + ranges[comp.identity] = (start, end) + + seq += part_seq + cursor = end + 1 + + return seq, ranges + + # Create TU definition tu_def = sbol2.ComponentDefinition(plasmid_def.displayId + "_TU") tu_def.roles = [ENGINEERED_REGION] self.sbol_doc.add(tu_def) - new_defs.append(tu_def) + new_objs.append(tu_def) - # map old components to new ones + # map old components to new comp_map = {} for comp in old_tu_components: @@ -697,23 +725,43 @@ def _encapsulate_TU( new_comp.definition = comp.definition comp_map[comp.identity] = new_comp.identity - # Copy sequence annotations inside TU + # Build TU sequence + tu_seq_string, tu_ranges = build_sequence_from_components(old_tu_components) + + tu_seq = sbol2.Sequence( + tu_def.displayId + "_seq", + elements=tu_seq_string, + encoding=sbol2.SBOL_ENCODING_IUPAC, + ) + + self.sbol_doc.addSequence(tu_seq) + tu_def.sequences = [tu_seq.identity] + + new_objs.append(tu_seq) + + # Copy TU annotations for sa in plasmid_def.sequenceAnnotations: - if sa.component in comp_map: - new_sa = tu_def.sequenceAnnotations.create(sa.displayId) - new_sa.component = comp_map[sa.component] + if sa.component not in comp_map: + continue - for loc in sa.locations: - new_range = sbol2.Range( - uri=loc.displayId, start=loc.start, end=loc.end - ) + new_sa = tu_def.sequenceAnnotations.create(sa.displayId) + new_sa.component = comp_map[sa.component] + + offset_start, _ = tu_ranges[sa.component] - if hasattr(loc, "orientation"): - new_range.orientation = loc.orientation + for loc in sa.locations: + if isinstance(loc, sbol2.Range): + new_start = offset_start + loc.start - 1 + new_end = offset_start + loc.end - 1 - new_sa.locations.add(new_range) + new_loc = new_sa.locations.createRange(loc.displayId) + new_loc.start = new_start + new_loc.end = new_end + new_loc.orientation = loc.orientation - # Copy sequence constraints + # -------------------------------------------------- + # Copy TU sequence constraints + # -------------------------------------------------- for sc in plasmid_def.sequenceConstraints: if sc.subject in comp_map and sc.object in comp_map: new_sc = tu_def.sequenceConstraints.create(sc.displayId) @@ -721,12 +769,15 @@ def _encapsulate_TU( new_sc.object = comp_map[sc.object] new_sc.restriction = sc.restriction + # -------------------------------------------------- # Build simplified plasmid definition + # -------------------------------------------------- simple_plasmid_def = sbol2.ComponentDefinition( plasmid_def.displayId + "_simple" ) + self.sbol_doc.addComponentDefinition(simple_plasmid_def) - new_defs.append(simple_plasmid_def) + new_objs.append(simple_plasmid_def) simple_plasmid_def.types = list(plasmid_def.types) simple_plasmid_def.roles = list(plasmid_def.roles) @@ -745,7 +796,9 @@ def _encapsulate_TU( backbone_comp = simple_plasmid_def.components.create("backbone") backbone_comp.definition = backbone_def.identity - # Sequence Constraints (ordering) + # -------------------------------------------------- + # Sequence ordering constraints + # -------------------------------------------------- constraint_counter = 0 def add_precedes(subj, obj): @@ -764,6 +817,32 @@ def add_precedes(subj, obj): if backbone_comp: add_precedes(fusion_right_comp, backbone_comp) + # -------------------------------------------------- + # Build simplified plasmid sequence + # -------------------------------------------------- + ordered_components = [fusion_left_comp, tu_comp, fusion_right_comp] + + if backbone_comp: + ordered_components.append(backbone_comp) + + plas_seq_string, plas_ranges = build_sequence_from_components( + ordered_components + ) + + plas_seq = sbol2.Sequence( + simple_plasmid_def.displayId + "_seq", + elements=plas_seq_string, + encoding=sbol2.SBOL_ENCODING_IUPAC, + ) + + self.sbol_doc.addSequence(plas_seq) + simple_plasmid_def.sequences = [plas_seq.identity] + + new_objs.append(plas_seq) + + # -------------------------------------------------- + # Copy simplified plasmid annotations + # -------------------------------------------------- component_map = { left_def.identity: fusion_left_comp.identity, tu_def.identity: tu_comp.identity, @@ -777,31 +856,27 @@ def add_precedes(subj, obj): comp_uri = sa.component if comp_uri not in component_map: - continue - - if len(sa.locations) == 0: + print(f"{comp_uri} not found in map: {component_map}") continue new_sa = simple_plasmid_def.sequenceAnnotations.create(sa.displayId) new_sa.component = component_map[comp_uri] + offset_start, _ = plas_ranges[component_map[comp_uri]] + for loc in sa.locations: if isinstance(loc, sbol2.Range): - new_loc = new_sa.locations.createRange(loc.displayId) - new_loc.start = loc.start - new_loc.end = loc.end - new_loc.orientation = loc.orientation - - elif isinstance(loc, sbol2.Cut): - new_loc = new_sa.locations.createCut(loc.displayId) - new_loc.at = loc.at - new_loc.orientation = loc.orientation + new_start = offset_start + loc.start - 1 + new_end = offset_start + loc.end - 1 - else: - new_loc = new_sa.locations.createGenericLocation(loc.displayId) + new_loc = new_sa.locations.createRange(loc.displayId) + new_loc.start = new_start + new_loc.end = new_end new_loc.orientation = loc.orientation + # -------------------------------------------------- # Construct new plasmid object + # -------------------------------------------------- new_plasmid = Plasmid( simple_plasmid_def, plasmid.strain_definitions[0], @@ -810,7 +885,7 @@ def add_precedes(subj, obj): self.sbol_doc, ) - return new_plasmid, new_defs + return new_plasmid, new_objs def _create_RE_implementation(name: str): pass From d4ee967c39a551deff51827ed1987ce35122f411 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 14 Apr 2026 13:43:32 -0600 Subject: [PATCH 11/47] encapsulate TU annotaitons and sequence --- src/buildcompiler/buildcompiler.py | 44 ++++++++---------------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index 71b4be0..9595fed 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -615,7 +615,7 @@ def _encapsulate_TU( Collapse a detailed plasmid with a transcriptional unit (pro, rbs, cds, terminator) into a simplified representation: - fusion_site_L -> TU -> fusion_site_R -> backbone + fusion_site_left -> TU -> fusion_site_right -> backbone Builds new sequences for both the TU and simplified plasmid. @@ -840,39 +840,17 @@ def add_precedes(subj, obj): new_objs.append(plas_seq) - # -------------------------------------------------- - # Copy simplified plasmid annotations - # -------------------------------------------------- - component_map = { - left_def.identity: fusion_left_comp.identity, - tu_def.identity: tu_comp.identity, - right_def.identity: fusion_right_comp.identity, - } - - if backbone_def: - component_map[backbone_def.identity] = backbone_comp.identity - - for sa in plasmid_def.sequenceAnnotations: - comp_uri = sa.component - - if comp_uri not in component_map: - print(f"{comp_uri} not found in map: {component_map}") - continue - - new_sa = simple_plasmid_def.sequenceAnnotations.create(sa.displayId) - new_sa.component = component_map[comp_uri] - - offset_start, _ = plas_ranges[component_map[comp_uri]] - - for loc in sa.locations: - if isinstance(loc, sbol2.Range): - new_start = offset_start + loc.start - 1 - new_end = offset_start + loc.end - 1 + for comp_uri, (start, end) in plas_ranges.items(): + anno = simple_plasmid_def.sequenceAnnotations.create( + f"simple_plasmid_def_{start}_{end}_annotation" + ) + anno.component = comp_uri - new_loc = new_sa.locations.createRange(loc.displayId) - new_loc.start = new_start - new_loc.end = new_end - new_loc.orientation = loc.orientation + location = anno.locations.createRange( + f"{simple_plasmid_def.displayId}_{start}_{end}_location" + ) + location.start = start + location.end = end # -------------------------------------------------- # Construct new plasmid object From e073084a5dff81ae18be8997e9bc5c945e3e9f32 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 14 Apr 2026 13:45:09 -0600 Subject: [PATCH 12/47] constant list of tuples defining TU fusion site order for l2 assemblies --- src/buildcompiler/constants.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/buildcompiler/constants.py b/src/buildcompiler/constants.py index 19c25a2..f39a8fe 100644 --- a/src/buildcompiler/constants.py +++ b/src/buildcompiler/constants.py @@ -23,12 +23,15 @@ "kan": KAN, "amp": AMP, } + +LVL2_FUSION_SITE_ORDER = [["A", "E"], ["E", "F"], ["F", "G"], ["G", "H"]] + # TODO http or https for identifiers? ENGINEERED_PLASMID = "http://identifiers.org/so/SO:0000637" ENGINEERED_INSERT = "https://identifiers.org/so/SO:0000915" ENGINEERED_REGION = "http://identifiers.org/so/SO:0000804" -PLASMID_VECTOR = "https://identifiers.org/so/SO:0000755" +PLASMID_VECTOR = "http://identifiers.org/so/SO:0000755" PLASMID_CLONING_VECTOR = "https://identifiers.org/ncit/NCIT:C1919" ANTIBIOTIC_RESISTANCE = "https://identifiers.org/ncit/NCIT:C17449" LIGASE = "http://identifiers.org/ncit/NCIT:C16796" From 3c26af008fe643332383f1f915388bfacc091241 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 14 Apr 2026 13:45:57 -0600 Subject: [PATCH 13/47] grab first and last fusion site now in order they appear --- src/buildcompiler/plasmid.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/buildcompiler/plasmid.py b/src/buildcompiler/plasmid.py index 1ebb600..57fada4 100644 --- a/src/buildcompiler/plasmid.py +++ b/src/buildcompiler/plasmid.py @@ -35,8 +35,7 @@ def _match_fusion_sites(self, doc: sbol2.document) -> List[str]: if seq == sequence.upper(): fusion_sites.append(key) - # fusion_sites.sort() - return fusion_sites + return [fusion_sites[0], fusion_sites[-1]] def _get_antibiotic_resistance(self, doc: sbol2.Document) -> str: for component in ( From def5b8bf92639e25c7c7fc17eaf47bf68f5e8ad9 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 14 Apr 2026 13:47:18 -0600 Subject: [PATCH 14/47] digestion now takes in Plasmid object; implementation selection happens within digestion --- src/buildcompiler/sbol2build.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index 54b08ef..b925f6f 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -68,11 +68,8 @@ def run( :return: List of all composites generated, in the form of tuples of ComponentDefinition and Sequence. """ for plasmid in self.part_plasmids: - plasmid_impl = plasmid.plasmid_implementations[ - 0 - ] # TODO update with more sophisticated selection process? extracts_tuple_list, _ = part_digestion( - plasmid_impl, + plasmid, [self.restriction_enzyme], self.assembly_activity, self.source_document, @@ -322,7 +319,7 @@ def is_circular(obj: sbol2.ComponentDefinition) -> bool: def part_digestion( - reactant: sbol2.Implementation, + reactant: Plasmid, restriction_enzymes: List[sbol2.Implementation], assembly_activity: sbol2.Activity, document: sbol2.Document, @@ -339,27 +336,27 @@ def part_digestion( :param document: original SBOL2 document to be used to extract referenced objects. :return: A tuple of a list ComponentDefinitions and Sequences, and an assembly plan ModuleDefinition. """ - - reactant_component_definition = document.get(reactant.built) + reactant_impl = reactant.plasmid_implementations[0] + reactant_component_definition = reactant.plasmid_definition reactant_displayId = reactant_component_definition.displayId types = set(reactant_component_definition.types or []) if not types.intersection(DNA_TYPES): raise TypeError( - f"The reactant should have a DNA type. Types found: {reactant.types}." + f"The reactant should have a DNA type. Types found: {reactant_component_definition.types}." ) if len(reactant_component_definition.sequences) != 1: raise ValueError( - f"The reactant needs to have precisely one sequence. The input reactant has {len(reactant.sequences)} sequences" + f"The reactant needs to have precisely one sequence. The input reactant has {len(reactant_component_definition.sequences)} sequences" ) extracts_list = [] restriction_enzymes_pydna = [] assembly_activity.usages.add( sbol2.Usage( - uri=f"{reactant.displayId}", - entity=reactant.identity, + uri=f"{reactant_impl.displayId}", + entity=reactant_impl.identity, role="http://sbols.org/v2#build", ) ) From 274775850289ff4e0d4e2311d9058d505d974dfa Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 21 Apr 2026 11:31:42 -0600 Subject: [PATCH 15/47] remove todo --- src/buildcompiler/sbol2build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index b925f6f..8f7abf3 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -39,7 +39,7 @@ def __init__( # TODO add fields for activity/agent/plan self, part_plasmids: List[Plasmid], backbone_plasmid: Plasmid, - restriction_enzyme: sbol2.Implementation, # TODO search for implementation in document, or domesticate the RE + restriction_enzyme: sbol2.Implementation, ligase: sbol2.Implementation, source_document: sbol2.Document, final_document: sbol2.Document, From 494ddcf1b6f73f8e540790e934ecaa7ce8cc98be Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 21 Apr 2026 16:20:12 -0600 Subject: [PATCH 16/47] ligase and RE flow optimized + domestication added if not found --- notebooks/build_compiler_test.ipynb | 529 +++++++++++++++++++++++----- src/buildcompiler/buildcompiler.py | 110 +++--- src/buildcompiler/constants.py | 2 +- 3 files changed, 513 insertions(+), 128 deletions(-) diff --git a/notebooks/build_compiler_test.ipynb b/notebooks/build_compiler_test.ipynb index d30433f..1ed655c 100644 --- a/notebooks/build_compiler_test.ipynb +++ b/notebooks/build_compiler_test.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "id": "87bdb42e", "metadata": {}, "outputs": [], @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "id": "e60a9c84", "metadata": {}, "outputs": [], @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 6, "id": "90648527", "metadata": {}, "outputs": [ @@ -33,65 +33,61 @@ "name": "stdout", "output_type": "stream", "text": [ - "Indexing collection: https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\n", - "Indexing collection: https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\n" + "Indexing collection: https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\n" ] } ], "source": [ - "auth = \"b9a7ee3a-5a02-42cd-ad1a-454b68cbe2a1\"\n", + "auth = \"1812840e-aa95-4588-9dc3-2a94e0bc1ed4\"\n", + "# collections = [\n", + "# \"https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\",\n", + "# \"https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\",\n", + "# ]\n", "collections = [\n", " \"https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\",\n", - " \"https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\",\n", "]\n", "buildcompiler = BuildCompiler(collections, \"https://synbiohub.org\", auth, None)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "1712f5ce", "metadata": {}, "outputs": [], "source": [ "from buildcompiler.plasmid import Plasmid\n", - "from buildcompiler.abstract_translator import get_or_pull\n", "from typing import List\n", - "from buildcompiler.constants import AMP\n", + "from buildcompiler.constants import LVL2_FUSION_SITE_ORDER, KAN, AMP\n", + "from buildcompiler.sbol2build import Assembly\n", + "import warnings\n", "\n", "\n", - "def _extract_lvl2_design_parts(\n", - " self, design_doc: sbol2.Document\n", - ") -> List[List[sbol2.ComponentDefinition]]:\n", + "def _extract_lvl2_TUs( # TODO send to misc helper file instead of buildcompiler.py?\n", + " design_doc: sbol2.Document,\n", + ") -> List[sbol2.ComponentDefinition]:\n", " \"\"\"\n", - " Returns definitions of level-0 parts grouped by level-1 components.\n", + " Returns the component definitions of each level-1 component (TU)\n", + " in the design.\n", "\n", " Args:\n", " design: :class:`sbol2.Document` containing the design.\n", "\n", " Returns:\n", - " A list where each element corresponds to a level-1 component\n", - " and contains a list of its part definitions in sequential order.\n", + " A list of TU component definitions in sequential order.\n", " \"\"\"\n", - " result = []\n", - "\n", - " design = extract_toplevel_definition(design_doc)\n", - "\n", - " for lvl1_comp in design.getInSequentialOrder():\n", - " lvl0_ordered = self.sbol_doc.get(lvl1_comp.definition).getInSequentialOrder()\n", + " top_design = extract_toplevel_definition(design_doc)\n", "\n", - " parts = [\n", - " get_or_pull(self.sbol_doc, self.sbh, lvl0_comp.definition)\n", - " for lvl0_comp in lvl0_ordered\n", - " ]\n", - "\n", - " result.append(parts)\n", - "\n", - " return result\n", + " return [\n", + " design_doc.get(comp.definition) for comp in top_design.getInSequentialOrder()\n", + " ]\n", "\n", "\n", "def assembly_lvl2(\n", - " self_buildcompiler, abstract_design_doc: sbol2.Document, backbone: Plasmid = None\n", + " self_buildcompiler,\n", + " abstract_design_doc: sbol2.Document,\n", + " backbone: Plasmid = None,\n", + " product_name: str = None,\n", ") -> list[sbol2.ComponentDefinition]:\n", " \"\"\"Assemble level-2 plasmids for the full design.\n", "\n", @@ -105,16 +101,84 @@ " # get high level genes, send to assembly_lvl1\n", " # send original abstract_design to get a new dictionary\n", " # send new dictionary to _get_backbone or get_compatible plasmids with AMP\n", - " TUs = _extract_lvl2_design_parts(self_buildcompiler, abstract_design_doc)\n", + " TUs = _extract_lvl2_TUs(abstract_design_doc)\n", + " lvl1_plasmids = []\n", + "\n", + " for i, TU in enumerate(TUs):\n", + " print(TU.displayId)\n", + "\n", + " # l1 backbone zselection\n", + " backbone_fusion_sites = LVL2_FUSION_SITE_ORDER[i]\n", + " backbone = next(\n", + " plasmid\n", + " for plasmid in self_buildcompiler.indexed_backbones\n", + " if plasmid.fusion_sites == backbone_fusion_sites\n", + " and plasmid.antibiotic_resistance == KAN\n", + " )\n", + "\n", + " print(backbone)\n", + "\n", + " # TODO insert check here to see if the TU exists already (#43). should not be too expensive, as long as we search only indexed_plasmids where AR=KAN\n", + " composite_plasmids, final_doc = self_buildcompiler.assembly_lvl1(\n", + " TU, backbone=backbone, product_name=f\"{TU.displayId}_plas\"\n", + " )\n", + "\n", + " simplified_representation, new_defs = self_buildcompiler._encapsulate_TU(\n", + " composite_plasmids[0]\n", + " )\n", + " final_doc.add_list(new_defs)\n", + " lvl1_plasmids.append(simplified_representation)\n", + " print(simplified_representation)\n", + "\n", + " final_doc.write(\"encap.xml\")\n", + "\n", + " # get l2 backbone\n", + " plasmid_dict = {}\n", + " for p in lvl1_plasmids:\n", + " key = p.plasmid_definition.displayId\n", + " plasmid_dict.setdefault(key, []).append(p)\n", + "\n", + " backbone, _ = self_buildcompiler._get_backbone(\n", + " plasmid_dict, antibiotic_resistance=AMP\n", + " )\n", + "\n", + " print(backbone)\n", "\n", - " for lvl1_comp_list in TUs:\n", - " plasmid_dict = self_buildcompiler._construct_plasmid_dict(lvl1_comp_list, AMP)\n", - " print(plasmid_dict)" + " # BbsI for l2\n", + " if self_buildcompiler.BbsI_impl is None:\n", + " self_buildcompiler._create_RE_implementation(\"BbsI\")\n", + " warnings.warn(\n", + " \"BbsI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\",\n", + " RuntimeWarning,\n", + " )\n", + "\n", + " # TODO see about making these common enzymes (BsaI, BbSI, T4) global or class variables, so they only need to be searched for once\n", + " if self_buildcompiler.T4_ligase_impl is None:\n", + " self_buildcompiler._create_ligase_implementation()\n", + " warnings.warn(\n", + " \"No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.\",\n", + " RuntimeWarning,\n", + " )\n", + "\n", + " assembly = Assembly(\n", + " lvl1_plasmids,\n", + " backbone,\n", + " self_buildcompiler.BbsI_impl,\n", + " self_buildcompiler.T4_ligase_impl,\n", + " self_buildcompiler.sbol_doc,\n", + " final_doc,\n", + " product_name,\n", + " )\n", + "\n", + " lvl2_plasmids, final_doc = assembly.run() # TODO upload product_doc?\n", + " self_buildcompiler.indexed_plasmids.extend(lvl2_plasmids)\n", + "\n", + " return lvl2_plasmids, final_doc" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 8, "id": "3e2272ff", "metadata": {}, "outputs": [ @@ -122,50 +186,366 @@ "name": "stdout", "output_type": "stream", "text": [ - "https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1\n", - "https://synbiohub.org/user/Gon/CIDARMoCloParts/B0032/1\n", - "https://synbiohub.org/user/Gon/CIDARMoCloParts/E0040m_gfp/1\n", - "https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1\n", - "https://synbiohub.org/user/Gon/CIDARMoCloParts/J23116/1\n", - "https://synbiohub.org/user/Gon/CIDARMoCloParts/B0033/1\n", - "https://synbiohub.org/user/Gon/CIDARMoCloParts/E1010m_rfp/1\n", - "https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1\n" + "Gen\n", + "Plasmid:\n", + " Name: DVK_AE_A_E\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_AE/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'E']\n", + " Antibiotic Resistance: Kanamycin\n", + "\n", + "matched pJ23100_AB_A_B with DVK_AE_A_E on fusion site A!\n", + "matched pB0032_BC_B_C with pJ23100_AB_A_B on fusion site B!\n", + "matched pE0040_CD_C_D with pB0032_BC_B_C on fusion site C!\n", + "matched final component pB0015_DE_D_E with pE0040_CD_C_D and DVK_AE_A_E on fusion sites (D, E)!\n", + "Plasmid:\n", + " Name: Gen_plas_1_simple_A_E\n", + " Plasmid Definition: https://SBOL2Build.org/Gen_plas_1_simple/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/Gen_plas_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['A', 'E']\n", + " Antibiotic Resistance: Kanamycin\n", + "\n", + "Gen1\n", + "Plasmid:\n", + " Name: DVK_EF_E_F\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_EF/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_EF_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'F']\n", + " Antibiotic Resistance: Kanamycin\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ryan/GitRepo/SBOL2Build/src/buildcompiler/buildcompiler.py:310: RuntimeWarning: BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\n", + " warnings.warn(\n", + "/Users/ryan/GitRepo/SBOL2Build/src/buildcompiler/buildcompiler.py:317: RuntimeWarning: No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "matched pJ23116_EB_E_B with DVK_EF_E_F on fusion site E!\n", + "matched pB0033_BC_B_C with pJ23116_EB_E_B on fusion site B!\n", + "matched pE1010_CD_C_D with pB0033_BC_B_C on fusion site C!\n", + "matched final component pB0015_DF_D_F with pE1010_CD_C_D and DVK_EF_E_F on fusion sites (D, F)!\n", + "Plasmid:\n", + " Name: Gen1_plas_1_simple_E_F\n", + " Plasmid Definition: https://SBOL2Build.org/Gen1_plas_1_simple/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/Gen1_plas_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['E', 'F']\n", + " Antibiotic Resistance: Kanamycin\n", + "\n", + "matched Gen_plas_1_simple_A_E with DVA_AF_A_F on fusion site A!\n", + "matched final component Gen1_plas_1_simple_E_F with Gen_plas_1_simple_A_E and DVA_AF_A_F on fusion sites (E, F)!\n", + "Success with backbone: DVA_AF_A_F and plasmids: ['Gen_plas_1_simple_A_E', 'Gen1_plas_1_simple_E_F']\n", + "Plasmid:\n", + " Name: DVA_AF_A_F\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_AF/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVA_AF_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'F']\n", + " Antibiotic Resistance: Ampicillin\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/ss/w4r72t4j057bp2m46gq_kjwr0000gn/T/ipykernel_72008/472231404.py:82: RuntimeWarning: BbsI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\n", + " warnings.warn(\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Not supported number of products. Found: 0", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mValueError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m composite_plasmids, final_doc = \u001b[43massembly_lvl2\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuildcompiler\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdesign_doc\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 106\u001b[39m, in \u001b[36massembly_lvl2\u001b[39m\u001b[34m(self_buildcompiler, abstract_design_doc, backbone, product_name)\u001b[39m\n\u001b[32m 91\u001b[39m warnings.warn(\n\u001b[32m 92\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mNo appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.\u001b[39m\u001b[33m\"\u001b[39m, \n\u001b[32m 93\u001b[39m \u001b[38;5;167;01mRuntimeWarning\u001b[39;00m,\n\u001b[32m 94\u001b[39m )\n\u001b[32m 96\u001b[39m assembly = Assembly(\n\u001b[32m 97\u001b[39m lvl1_plasmids,\n\u001b[32m 98\u001b[39m backbone,\n\u001b[32m (...)\u001b[39m\u001b[32m 103\u001b[39m product_name,\n\u001b[32m 104\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m106\u001b[39m lvl2_plasmids, final_doc = \u001b[43massembly\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# TODO upload product_doc?\u001b[39;00m\n\u001b[32m 107\u001b[39m self_buildcompiler.indexed_plasmids.extend(lvl2_plasmids)\n\u001b[32m 109\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m lvl2_plasmids, final_doc\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/GitRepo/SBOL2Build/src/buildcompiler/sbol2build.py:83\u001b[39m, in \u001b[36mAssembly.run\u001b[39m\u001b[34m(self, include_extracted_parts)\u001b[39m\n\u001b[32m 80\u001b[39m \u001b[38;5;28mself\u001b[39m.extracted_parts.append(extracts_tuple_list[\u001b[32m0\u001b[39m][\u001b[32m0\u001b[39m])\n\u001b[32m 82\u001b[39m backbone_impl = \u001b[38;5;28mself\u001b[39m.backbone.plasmid_implementations[\u001b[32m0\u001b[39m]\n\u001b[32m---> \u001b[39m\u001b[32m83\u001b[39m extracts_tuple_list, _ = \u001b[43mbackbone_digestion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 84\u001b[39m \u001b[43m \u001b[49m\u001b[43mbackbone_impl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 85\u001b[39m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mrestriction_enzyme\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 86\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43massembly_activity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 87\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msource_document\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 88\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 90\u001b[39m append_extracts_to_doc(extracts_tuple_list, \u001b[38;5;28mself\u001b[39m.source_document)\n\u001b[32m 91\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m include_extracted_parts:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/GitRepo/SBOL2Build/src/buildcompiler/sbol2build.py:626\u001b[39m, in \u001b[36mbackbone_digestion\u001b[39m\u001b[34m(reactant, restriction_enzymes, assembly_activity, document)\u001b[39m\n\u001b[32m 623\u001b[39m digested_reactant = ds_reactant.cut(restriction_enzymes_pydna)\n\u001b[32m 625\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(digested_reactant) < \u001b[32m2\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(digested_reactant) > \u001b[32m3\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m626\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 627\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mNot supported number of products. Found: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(digested_reactant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 628\u001b[39m )\n\u001b[32m 629\u001b[39m \u001b[38;5;66;03m# TODO select them based on content rather than size.\u001b[39;00m\n\u001b[32m 630\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m circular \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(digested_reactant) == \u001b[32m2\u001b[39m:\n", + "\u001b[31mValueError\u001b[39m: Not supported number of products. Found: 0" ] } ], "source": [ - "assembly_lvl2(buildcompiler, design_doc)" + "composite_plasmids, final_doc = assembly_lvl2(buildcompiler, design_doc)" ] }, { "cell_type": "code", "execution_count": null, - "id": "7c12e504", + "id": "2ee945ec", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[, ]\n", - "[]\n", - "matched pJ23100_AB_A_B with DVK_AE_A_E on fusion site A!\n", - "matched pB0034_BC_B_C with pJ23100_AB_A_B on fusion site B!\n", - "matched pE0030_CD_C_D with pB0034_BC_B_C on fusion site C!\n", - "matched final component pB0015_DE_D_E with pE0030_CD_C_D and DVK_AE_A_E on fusion sites (D, E)!\n", - "Success with backbone: DVK_AE_A_E and plasmids: ['pJ23100_AB_A_B', 'pB0034_BC_B_C', 'pE0030_CD_C_D', 'pB0015_DE_D_E']\n", "[Plasmid:\n", - " Name: composite_1_A_B_C_D_E\n", - " Plasmid Definition: https://SBOL2Build.org/composite_1/1\n", + " Name: pE0040_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0040_CD/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pE0040_CD_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['C', 'D']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0034_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0034_BC/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0034_BC_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['B', 'C']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_EB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_EB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_AB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0033_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0033_BC/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0033_BC_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['B', 'C']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_FB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_EB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_EB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_GB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_GB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pE0030_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0030_CD/1\n", " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://SBOL2Build.org/composite_1_impl/1']\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pE0030_CD_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['C', 'D']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_FB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_AB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_AB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0032_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0032_BC/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0032_BC_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['B', 'C']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DF_D_F\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DF/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DF_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'F']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DG_D_G\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DG/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DG_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'G']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DH_D_H\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DH/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DH_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'H']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_AB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_AB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_GB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_GB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_GB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_GB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pE1010_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE1010_CD/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pE1010_CD_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['C', 'D']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DE_D_E\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DE/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DE_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'E']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_FB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_EB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_EB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: Gen_plas_1_A_E\n", + " Plasmid Definition: https://SBOL2Build.org/Gen_plas_1/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/Gen_plas_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['A', 'E']\n", + " Antibiotic Resistance: Kanamycin\n", + ", Plasmid:\n", + " Name: Gen1_plas_1_E_F\n", + " Plasmid Definition: https://SBOL2Build.org/Gen1_plas_1/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/Gen1_plas_1_impl/1']\n", " Strain Implementations: [None]\n", - " Fusion Sites: ['A', 'B', 'C', 'D', 'E']\n", + " Fusion Sites: ['E', 'F']\n", + " Antibiotic Resistance: Kanamycin\n", + "] [Plasmid:\n", + " Name: DVK_EF_E_F\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_EF/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_EF_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'F']\n", + " Antibiotic Resistance: Kanamycin\n", + ", Plasmid:\n", + " Name: DVK_AE_A_E\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_AE/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'E']\n", + " Antibiotic Resistance: Kanamycin\n", + ", Plasmid:\n", + " Name: DVK_FG_F_G\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_FG/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_FG_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'G']\n", + " Antibiotic Resistance: Kanamycin\n", + ", Plasmid:\n", + " Name: DVK_GH_G_H\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_GH/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_GH_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'H']\n", " Antibiotic Resistance: Kanamycin\n", "]\n" ] } ], + "source": [ + "print(buildcompiler.indexed_plasmids, buildcompiler.indexed_backbones)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c12e504", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[, ]\n", + "[]\n" + ] + } + ], "source": [ "print(buildcompiler.restriction_enzyme_implementations)\n", "print(buildcompiler.ligase_implementations)\n", @@ -175,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "79fd0cb5", "metadata": {}, "outputs": [], @@ -210,19 +590,7 @@ "execution_count": null, "id": "8f4ea67c", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'composite_plasmids' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m bacterial_transformation(\u001b[43mcomposite_plasmids\u001b[49m, chassis_implementation, chassis, final_doc)\n", - "\u001b[31mNameError\u001b[39m: name 'composite_plasmids' is not defined" - ] - } - ], + "outputs": [], "source": [ "# bacterial_transformation(composite_plasmids, chassis_implementation, chassis, final_doc)" ] @@ -230,23 +598,10 @@ { "cell_type": "code", "execution_count": null, - "id": "5553bfc3", + "id": "cd334add", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Valid.'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# final_doc.write(\"fullbuild.xml\")" - ] + "outputs": [], + "source": [] } ], "metadata": { diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index 9595fed..a40a1dc 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -4,7 +4,11 @@ from typing import List, Dict, Tuple from buildcompiler.plasmid import Plasmid -from buildcompiler.sbol2build import Assembly, dna_componentdefinition_with_sequence +from buildcompiler.sbol2build import ( + Assembly, + dna_componentdefinition_with_sequence, + rebase_restriction_enzyme, +) from .abstract_translator import ( get_or_pull, get_compatible_plasmids, @@ -50,8 +54,9 @@ def __init__( self.sbol_doc = sbol_doc or sbol2.Document() self.indexed_plasmids = [] self.indexed_backbones = [] - self.restriction_enzyme_implementations = [] - self.ligase_implementations = [] + self.BsaI_impl = None + self.BbsI_impl = None + self.T4_ligase_impl = None self._index_collections(collections) @@ -110,9 +115,18 @@ def _index_collections(self, collections: List[str]): ) elif sbol2.BIOPAX_PROTEIN in built_object.types: if RESTRICTION_ENZYME in built_object.roles: - self.restriction_enzyme_implementations.append(implementation) + if ( + built_object.definition + == "http://rebase.neb.com/rebase/enz/BsaI.html" + ): + self.BsaI_impl = implementation + elif ( + built_object.definition + == "http://rebase.neb.com/rebase/enz/BbsI.html" + ): + self.BbsI_impl = implementation elif LIGASE in built_object.roles: - self.ligase_implementations.append(implementation) + self.T4_ligase_impl = implementation for strain in self.sbol_doc.moduleDefinitions: if ORGANISM_STRAIN in strain.roles: @@ -159,28 +173,18 @@ def _remove_internal_bsai_sites(sequence: str) -> tuple[str, int]: removals += 1 return domesticated_sequence, removals - bsaI_impl = next( - ( - impl - for impl in self.restriction_enzyme_implementations - if self.sbol_doc.find(impl.built).displayId == "BsaI" - ), - None, - ) - if bsaI_impl is None: + if self.BsaI_impl is None: self._create_RE_implementation("BsaI") warnings.warn( "BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.", RuntimeWarning, ) - ligase_impl = ( - self.ligase_implementations[0] if self.ligase_implementations else None - ) - if ligase_impl is None: + if self.T4_ligase_impl is None: self._create_ligase_implementation() warnings.warn( - "No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol." + "No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.", + RuntimeWarning, ) dsDNAs = [] @@ -269,8 +273,8 @@ def _remove_internal_bsai_sites(sequence: str) -> tuple[str, int]: assembly = Assembly( [Plasmid(insert_definition, None, [insert_impl], [], self.sbol_doc)], backbone, - bsaI_impl, - ligase_impl, + self.bsaI_impl, + self.T4_ligase_impl, self.sbol_doc, ) assembly_products, assembly_doc = assembly.run() @@ -311,27 +315,25 @@ def assembly_lvl1( else: compatible_plasmids = get_compatible_plasmids(plasmid_dict, backbone) - bsaI_impl = next( - impl - for impl in self.restriction_enzyme_implementations - if self.sbol_doc.find(impl.built).displayId == "BsaI" - ) - if bsaI_impl is None: - raise ValueError( - "BsaI Restriction enzyme not found in provided collections. Terminating assembly." + if self.BsaI_impl is None: + self._create_RE_implementation("BsaI") + warnings.warn( + "BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.", + RuntimeWarning, ) - ligase_impl = self.ligase_implementations[0] - if ligase_impl is None: - raise ValueError( - "No appropriate ligase found in provided collections. Terminating assembly." + if self.T4_ligase_impl is None: + self._create_ligase_implementation() + warnings.warn( + "No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.", + RuntimeWarning, ) assembly = Assembly( compatible_plasmids, backbone, - bsaI_impl, - ligase_impl, + self.BsaI_impl, + self.T4_ligase_impl, self.sbol_doc, final_doc, product_name, @@ -483,7 +485,6 @@ def _get_backbone( for backbone in sorted_backbones: if backbone.antibiotic_resistance == antibiotic_resistance: # check for compatibility - # also, if we find a hit here we may not need to run get_compatible plasmids later, work is already done try: compatible_plasmids = get_compatible_plasmids( plasmid_dict, backbone @@ -865,8 +866,37 @@ def add_precedes(subj, obj): return new_plasmid, new_objs - def _create_RE_implementation(name: str): - pass + def _create_RE_implementation(self, name: str): + RE_def = rebase_restriction_enzyme(name) + + RE_sourcing = sbol2.Activity(f"{name}_restriction_enzyme_purchase") + RE_sourcing.name = "Restriction Enzyme Purchase" + + RE_impl = sbol2.Implementation(f"{RE_def.displayId}_impl") + + RE_impl.built = RE_def.identity + RE_impl.wasGeneratedBy = RE_sourcing.identity + + self.sbol_doc.add_list([RE_impl, RE_def]) + + if name == "BsaI": + self.BsaI_impl = RE_impl + elif name == "BbsI": + self.BbsI_impl = RE_impl + + def _create_ligase_implementation(self): + ligase_def = sbol2.ComponentDefinition("T4_Ligase") + ligase_def.name = "T4_Ligase" + ligase_def.types = [sbol2.BIOPAX_PROTEIN] + ligase_def.roles = ["http://identifiers.org/ncit/NCIT:C16796"] + + ligase_sourcing = sbol2.Activity("ligase_purchase") + ligase_sourcing.name = "Ligase Purchase" + + T4_impl = sbol2.Implementation(f"{ligase_def.displayId}_impl") + + T4_impl.built = ligase_def.identity + T4_impl.wasGeneratedBy = ligase_sourcing.identity - def _create_ligase_implementation(): - pass + self.sbol_doc.add_list([T4_impl, ligase_def]) + self.T4_ligase_impl = T4_impl diff --git a/src/buildcompiler/constants.py b/src/buildcompiler/constants.py index f39a8fe..e455c66 100644 --- a/src/buildcompiler/constants.py +++ b/src/buildcompiler/constants.py @@ -26,7 +26,7 @@ LVL2_FUSION_SITE_ORDER = [["A", "E"], ["E", "F"], ["F", "G"], ["G", "H"]] -# TODO http or https for identifiers? +# TODO CHANGE ALL TO HTTP ENGINEERED_PLASMID = "http://identifiers.org/so/SO:0000637" ENGINEERED_INSERT = "https://identifiers.org/so/SO:0000915" From 69fea09856a2cf591d72dec16a75876ca63aa97a Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 21 Apr 2026 16:33:36 -0600 Subject: [PATCH 17/47] definition->identity --- notebooks/build_compiler_test.ipynb | 28 ++++++++++++---------------- src/buildcompiler/buildcompiler.py | 4 ++-- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/notebooks/build_compiler_test.ipynb b/notebooks/build_compiler_test.ipynb index 1ed655c..caea5e6 100644 --- a/notebooks/build_compiler_test.ipynb +++ b/notebooks/build_compiler_test.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "87bdb42e", "metadata": {}, "outputs": [], @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "id": "e60a9c84", "metadata": {}, "outputs": [], @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "90648527", "metadata": {}, "outputs": [ @@ -33,25 +33,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "Indexing collection: https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\n" + "Indexing collection: https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\n", + "Indexing collection: https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\n" ] } ], "source": [ "auth = \"1812840e-aa95-4588-9dc3-2a94e0bc1ed4\"\n", - "# collections = [\n", - "# \"https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\",\n", - "# \"https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\",\n", - "# ]\n", "collections = [\n", " \"https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\",\n", + " \"https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\",\n", "]\n", "buildcompiler = BuildCompiler(collections, \"https://synbiohub.org\", auth, None)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "id": "1712f5ce", "metadata": {}, "outputs": [], @@ -178,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "3e2272ff", "metadata": {}, "outputs": [ @@ -225,9 +223,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/ryan/GitRepo/SBOL2Build/src/buildcompiler/buildcompiler.py:310: RuntimeWarning: BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\n", - " warnings.warn(\n", - "/Users/ryan/GitRepo/SBOL2Build/src/buildcompiler/buildcompiler.py:317: RuntimeWarning: No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.\n", + "/Users/ryan/GitRepo/SBOL2Build/src/buildcompiler/buildcompiler.py:320: RuntimeWarning: BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\n", " warnings.warn(\n" ] }, @@ -266,7 +262,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/ss/w4r72t4j057bp2m46gq_kjwr0000gn/T/ipykernel_72008/472231404.py:82: RuntimeWarning: BbsI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\n", + "/var/folders/ss/w4r72t4j057bp2m46gq_kjwr0000gn/T/ipykernel_78396/3312876232.py:92: RuntimeWarning: BbsI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\n", " warnings.warn(\n" ] }, @@ -277,8 +273,8 @@ "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mValueError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m composite_plasmids, final_doc = \u001b[43massembly_lvl2\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuildcompiler\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdesign_doc\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 106\u001b[39m, in \u001b[36massembly_lvl2\u001b[39m\u001b[34m(self_buildcompiler, abstract_design_doc, backbone, product_name)\u001b[39m\n\u001b[32m 91\u001b[39m warnings.warn(\n\u001b[32m 92\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mNo appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.\u001b[39m\u001b[33m\"\u001b[39m, \n\u001b[32m 93\u001b[39m \u001b[38;5;167;01mRuntimeWarning\u001b[39;00m,\n\u001b[32m 94\u001b[39m )\n\u001b[32m 96\u001b[39m assembly = Assembly(\n\u001b[32m 97\u001b[39m lvl1_plasmids,\n\u001b[32m 98\u001b[39m backbone,\n\u001b[32m (...)\u001b[39m\u001b[32m 103\u001b[39m product_name,\n\u001b[32m 104\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m106\u001b[39m lvl2_plasmids, final_doc = \u001b[43massembly\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# TODO upload product_doc?\u001b[39;00m\n\u001b[32m 107\u001b[39m self_buildcompiler.indexed_plasmids.extend(lvl2_plasmids)\n\u001b[32m 109\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m lvl2_plasmids, final_doc\n", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m composite_plasmids, final_doc = \u001b[43massembly_lvl2\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuildcompiler\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdesign_doc\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 115\u001b[39m, in \u001b[36massembly_lvl2\u001b[39m\u001b[34m(self_buildcompiler, abstract_design_doc, backbone, product_name)\u001b[39m\n\u001b[32m 100\u001b[39m warnings.warn(\n\u001b[32m 101\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mNo appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 102\u001b[39m \u001b[38;5;167;01mRuntimeWarning\u001b[39;00m,\n\u001b[32m 103\u001b[39m )\n\u001b[32m 105\u001b[39m assembly = Assembly(\n\u001b[32m 106\u001b[39m lvl1_plasmids,\n\u001b[32m 107\u001b[39m backbone,\n\u001b[32m (...)\u001b[39m\u001b[32m 112\u001b[39m product_name,\n\u001b[32m 113\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m115\u001b[39m lvl2_plasmids, final_doc = \u001b[43massembly\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# TODO upload product_doc?\u001b[39;00m\n\u001b[32m 116\u001b[39m self_buildcompiler.indexed_plasmids.extend(lvl2_plasmids)\n\u001b[32m 118\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m lvl2_plasmids, final_doc\n", "\u001b[36mFile \u001b[39m\u001b[32m~/GitRepo/SBOL2Build/src/buildcompiler/sbol2build.py:83\u001b[39m, in \u001b[36mAssembly.run\u001b[39m\u001b[34m(self, include_extracted_parts)\u001b[39m\n\u001b[32m 80\u001b[39m \u001b[38;5;28mself\u001b[39m.extracted_parts.append(extracts_tuple_list[\u001b[32m0\u001b[39m][\u001b[32m0\u001b[39m])\n\u001b[32m 82\u001b[39m backbone_impl = \u001b[38;5;28mself\u001b[39m.backbone.plasmid_implementations[\u001b[32m0\u001b[39m]\n\u001b[32m---> \u001b[39m\u001b[32m83\u001b[39m extracts_tuple_list, _ = \u001b[43mbackbone_digestion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 84\u001b[39m \u001b[43m \u001b[49m\u001b[43mbackbone_impl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 85\u001b[39m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mrestriction_enzyme\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 86\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43massembly_activity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 87\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msource_document\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 88\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 90\u001b[39m append_extracts_to_doc(extracts_tuple_list, \u001b[38;5;28mself\u001b[39m.source_document)\n\u001b[32m 91\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m include_extracted_parts:\n", "\u001b[36mFile \u001b[39m\u001b[32m~/GitRepo/SBOL2Build/src/buildcompiler/sbol2build.py:626\u001b[39m, in \u001b[36mbackbone_digestion\u001b[39m\u001b[34m(reactant, restriction_enzymes, assembly_activity, document)\u001b[39m\n\u001b[32m 623\u001b[39m digested_reactant = ds_reactant.cut(restriction_enzymes_pydna)\n\u001b[32m 625\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(digested_reactant) < \u001b[32m2\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(digested_reactant) > \u001b[32m3\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m626\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 627\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mNot supported number of products. Found: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(digested_reactant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 628\u001b[39m )\n\u001b[32m 629\u001b[39m \u001b[38;5;66;03m# TODO select them based on content rather than size.\u001b[39;00m\n\u001b[32m 630\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m circular \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(digested_reactant) == \u001b[32m2\u001b[39m:\n", "\u001b[31mValueError\u001b[39m: Not supported number of products. Found: 0" diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index a40a1dc..4119eb2 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -116,12 +116,12 @@ def _index_collections(self, collections: List[str]): elif sbol2.BIOPAX_PROTEIN in built_object.types: if RESTRICTION_ENZYME in built_object.roles: if ( - built_object.definition + built_object.identity == "http://rebase.neb.com/rebase/enz/BsaI.html" ): self.BsaI_impl = implementation elif ( - built_object.definition + built_object.identity == "http://rebase.neb.com/rebase/enz/BbsI.html" ): self.BbsI_impl = implementation From d9fbdf20605587038e60d9b49947178ae801b093 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Mon, 27 Apr 2026 14:46:04 -0600 Subject: [PATCH 18/47] lvl2 and multi-design lvl1 drafts --- src/buildcompiler/buildcompiler.py | 179 ++++++++++++++++++++++------- 1 file changed, 135 insertions(+), 44 deletions(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index 4119eb2..7b2323e 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -12,6 +12,7 @@ from .abstract_translator import ( get_or_pull, get_compatible_plasmids, + extract_toplevel_definition, ) from .constants import ( AMP, @@ -19,6 +20,7 @@ KAN, FUSION_SITES, LIGASE, + LVL2_FUSION_SITE_ORDER, PART_ROLES, PLASMID_VECTOR, RESTRICTION_ENZYME, @@ -285,11 +287,11 @@ def _remove_internal_bsai_sites(sequence: str) -> tuple[str, int]: def assembly_lvl1( self, - abstract_design: sbol2.ComponentDefinition, + abstract_designs: List[sbol2.ComponentDefinition], final_doc: sbol2.Document = sbol2.Document(), product_name: str = None, backbone: Plasmid = None, - ) -> list[sbol2.ComponentDefinition]: + ) -> Tuple[Dict, sbol2.Document]: """Assemble level-1 plasmids for each gene/transcriptional unit. Uses indexed plasmids/backbones and the current design to assemble @@ -300,49 +302,51 @@ def assembly_lvl1( :raises LookupError: If compatible plasmids or backbones cannot be found. """ - # TODO: Identify parts from the abstract design needed for lvl1 assembly and find compatible indexed plasmids/backbones. - # if backbone provided then use it.Then look for parts constraind by the backbone fusion sites. - # else, run an algorithm to try a backbone from 4 the choices. If it fails on the 4 raise an error. - - plasmid_dict = self._get_input_plasmids( - design=abstract_design, antibiotic_resistance=AMP - ) + assembly_dict = {} - if not backbone: - backbone, compatible_plasmids = self._get_backbone( - plasmid_dict, antibiotic_resistance=KAN + for abstract_design in abstract_designs: + plasmid_dict = self._get_input_plasmids( + design=abstract_design, antibiotic_resistance=AMP ) - else: - compatible_plasmids = get_compatible_plasmids(plasmid_dict, backbone) - if self.BsaI_impl is None: - self._create_RE_implementation("BsaI") - warnings.warn( - "BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.", - RuntimeWarning, - ) + if not backbone: + backbone, compatible_plasmids = self._get_backbone( + plasmid_dict, antibiotic_resistance=KAN + ) + else: + compatible_plasmids = get_compatible_plasmids(plasmid_dict, backbone) + + if self.BsaI_impl is None: + self._create_RE_implementation("BsaI") + warnings.warn( + "BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.", + RuntimeWarning, + ) - if self.T4_ligase_impl is None: - self._create_ligase_implementation() - warnings.warn( - "No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.", - RuntimeWarning, - ) + if self.T4_ligase_impl is None: + self._create_ligase_implementation() + warnings.warn( + "No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.", + RuntimeWarning, + ) - assembly = Assembly( - compatible_plasmids, - backbone, - self.BsaI_impl, - self.T4_ligase_impl, - self.sbol_doc, - final_doc, - product_name, - ) - composite_plasmids, product_doc = assembly.run() # TODO upload product_doc? + assembly = Assembly( + compatible_plasmids, + backbone, + self.BsaI_impl, + self.T4_ligase_impl, + self.sbol_doc, + final_doc, + product_name, + ) + composite_plasmids, product_doc = assembly.run() # TODO upload product_doc? - self.indexed_plasmids.extend(composite_plasmids) + self.indexed_plasmids.extend( + composite_plasmids + ) # see about using a wrapper function to do this, where it checks if the design already exists (like in index_collections). this way we avoid duplicate issues that might come with loading the abstract design definitions into the self.sbol_doc ahead of time + assembly_dict[abstract_design.identity] = composite_plasmids - return composite_plasmids, product_doc + return assembly_dict, product_doc # TODO: Create a SBOL representation of the assembly process, updating the SBOL Document. # Using he selected parts create the representation, you need Plasmids, BsaI and T4 Ligase. @@ -354,6 +358,9 @@ def assembly_lvl1( def assembly_lvl2( self, + abstract_design_doc: sbol2.Document, + backbone: Plasmid = None, + product_name: str = None, ) -> list[sbol2.ComponentDefinition]: """Assemble level-2 plasmids for the full design. @@ -364,14 +371,78 @@ def assembly_lvl2( :rtype: list[Plasmid] :raises LookupError: If compatible plasmids or backbones cannot be found. """ + # get high level genes, send to assembly_lvl1 + # send original abstract_design to get a new dictionary + # send new dictionary to _get_backbone or get_compatible plasmids with AMP + TUs = _extract_lvl2_TUs(abstract_design_doc) + lvl1_plasmids = [] - # TODO: Identify parts from the abstract design needed for lvl2 assembly and find compatible indexed plasmids/backbones. - # TODO: Create a SBOL representation of the assembly process, updating the SBOL Document. - # TODO: Generate a protocol for the assembly process. - protocol = "To be implemented by PUDU" - # TODO: Updates indexed plasmids with assembled versions. + for i, TU in enumerate(TUs): + print(TU.displayId) - return protocol + # l1 backbone zselection + backbone_fusion_sites = LVL2_FUSION_SITE_ORDER[i] + backbone = next( + plasmid + for plasmid in self.indexed_backbones + if plasmid.fusion_sites == backbone_fusion_sites + and plasmid.antibiotic_resistance == KAN + ) + + print(backbone) + + # TODO insert check here to see if the TU exists already (#43). should not be too expensive, as long as we search only indexed_plasmids where AR=KAN + composite_plasmids, final_doc = self.assembly_lvl1( + TU, backbone=backbone, product_name=f"{TU.displayId}_plas" + ) + + simplified_representation, new_defs = self._encapsulate_TU( + composite_plasmids[0] + ) + final_doc.add_list(new_defs) + lvl1_plasmids.append(simplified_representation) + print(simplified_representation) + + # get l2 backbone + plasmid_dict = {} + for p in lvl1_plasmids: + key = p.plasmid_definition.displayId + plasmid_dict.setdefault(key, []).append(p) + + backbone, _ = self._get_backbone(plasmid_dict, antibiotic_resistance=AMP) + + print(backbone) + + # BbsI for l2 + if self.BbsI_impl is None: + self._create_RE_implementation("BbsI") + warnings.warn( + "BbsI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.", + RuntimeWarning, + ) + + # TODO see about making these common enzymes (BsaI, BbSI, T4) global or class variables, so they only need to be searched for once + if self.T4_ligase_impl is None: + self._create_ligase_implementation() + warnings.warn( + "No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.", + RuntimeWarning, + ) + + assembly = Assembly( + lvl1_plasmids, + backbone, + self.BbsI_impl, + self.T4_ligase_impl, + self.sbol_doc, + final_doc, + product_name, + ) + + lvl2_plasmids, final_doc = assembly.run() # TODO upload product_doc? + self.indexed_plasmids.extend(lvl2_plasmids) + + return lvl2_plasmids, final_doc def _extract_plasmids_from_strain( self, @@ -900,3 +971,23 @@ def _create_ligase_implementation(self): self.sbol_doc.add_list([T4_impl, ligase_def]) self.T4_ligase_impl = T4_impl + + +def _extract_lvl2_TUs( # TODO send to misc helper file instead of buildcompiler.py? + design_doc: sbol2.Document, +) -> List[sbol2.ComponentDefinition]: + """ + Returns the component definitions of each level-1 component (TU) + in the design. + + Args: + design: :class:`sbol2.Document` containing the design. + + Returns: + A list of TU component definitions in sequential order. + """ + top_design = extract_toplevel_definition(design_doc) + + return [ + design_doc.get(comp.definition) for comp in top_design.getInSequentialOrder() + ] From 88a3b44b5c759abce9ef91fc4e3aea800bc632f0 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Mon, 27 Apr 2026 15:17:42 -0600 Subject: [PATCH 19/47] docstring updates --- src/buildcompiler/sbol2build.py | 122 ++++++++++++++++++++++---------- 1 file changed, 83 insertions(+), 39 deletions(-) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index 8f7abf3..1de23c9 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -28,11 +28,18 @@ class Assembly: """Creates an Assembly Plan. - :param name: Name of the assembly plan ModuleDefinition. - :param part_plasmids: Parts in backbone to be assembled. - :param plasmid_acceptor_backbone: Backbone in which parts are inserted on the assembly. - :param restriction_enzyme: Restriction enzyme name used by PyDNA. Case sensitive, follow standard restriction enzyme nomenclature, i.e. 'BsaI' - :param document: SBOL Document where the assembly plan will be created. + :param part_plasmids: List of part-in-backbone plasmids to be assembled. + :param backbone_plasmid: Acceptor backbone into which parts are inserted. + :param restriction_enzyme: SBOL Implementation representing the restriction enzyme + (e.g. BsaI) used to digest parts during assembly. + :param ligase: SBOL Implementation representing the ligase (e.g. T4) used to + ligate digested parts. + :param source_document: SBOL Document containing the source part/plasmid definitions. + :param final_document: SBOL Document where assembled composite plasmid definitions + will be written. + :param composite_prefix: Prefix used when naming composite plasmid definitions. + Defaults to 'composite'. + """ def __init__( # TODO add fields for activity/agent/plan @@ -58,14 +65,27 @@ def __init__( # TODO add fields for activity/agent/plan def run( self, include_extracted_parts: bool = False - ) -> List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]: - """Runs full assembly simulation. - - `document` parameter of golden_gate_assembly_plan object is updated by reference to include assembly plan ModuleDefinition and all related information. - - Runs :func:`part_digestion` for all `part_plasmids` and :func:`backbone_digestion` for `plasmid_acceptor_backbone` with `restriction_enzyme`. Then runs :func:`ligation` with these parts to form composites. - - :return: List of all composites generated, in the form of tuples of ComponentDefinition and Sequence. + ) -> Tuple[List[Plasmid], sbol2.Document]: + """Run the full Golden Gate assembly simulation. + + Executes the following steps in order: + + 1. Calls :func:`part_digestion` on each plasmid in ``part_plasmids`` using + ``restriction_enzyme``, appending extracted parts to ``source_document``. + 2. Calls :func:`backbone_digestion` on the first implementation of ``backbone``, + appending the linearised backbone to ``source_document``. + 3. Calls :func:`ligation` on all extracted parts and the backbone to produce + composite plasmid implementations, written to ``final_document``. + 4. Wraps each composite implementation in a :class:`Plasmid` object and returns + the full list alongside the populated ``final_document``. + + :param include_extracted_parts: If ``True``, extracted part and backbone + definitions are also written to ``final_document`` in addition to + ``source_document``. Defaults to ``False``. + :return: A tuple of (composite plasmids, final document), where composite + plasmids is a list of :class:`Plasmid` objects built from the ligated + implementations, and final document is the populated ``sbol2.Document`` + containing all assembly outputs. """ for plasmid in self.part_plasmids: extracts_tuple_list, _ = part_digestion( @@ -324,17 +344,28 @@ def part_digestion( assembly_activity: sbol2.Activity, document: sbol2.Document, ) -> Tuple[List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]]: - """Runs a simulated digestion on the top level sequence in the reactant ComponentDefinition or ModuleDefinition with the given restriciton enzymes, creating a extracted part ComponentDefinition, a digestion Interaction, and converts existing scars to 5' and 3' overhangs. - The product ComponentDefinition is assumed the digested part in this case. - - Written for use with the SBOL2.3 output of https://sbolcanvas.org - - :param reactant: Plasmid DNA to be digested as SBOL ComponentDefinition - :param restriction_enzymes: Restriction enzymes as :class:`sbol2.ComponentDefinition` - (generate with :func:`rebase_restriction_enzyme`). - :param assembly_plan: SBOL ModuleDefinition to contain the functional components, interactions, and participations - :param document: original SBOL2 document to be used to extract referenced objects. - :return: A tuple of a list ComponentDefinitions and Sequences, and an assembly plan ModuleDefinition. + """Simulate restriction digestion of a part plasmid and extract the insert. + + Uses PyDNA to cut the reactant sequence, then constructs SBOL representations + of the extracted part, its 5' and 3' overhangs, and any derived scar sequences. + Each enzyme and the reactant implementation are recorded as usages on + ``assembly_activity``. + + Expects the reactant to be circular with 2 digest products, or linear with 3 + (backbone | part | backbone). The shorter circular product or middle linear + product is taken as the extracted insert. + + :param reactant: Part-in-backbone plasmid to digest. + :param restriction_enzymes: Restriction enzyme implementations; the corresponding + ``ComponentDefinition.name`` must match a PyDNA/ReBase enzyme name (e.g. ``'BsaI'``). + :param assembly_activity: SBOL Activity to record reactant and enzyme usages on. + :param document: Source SBOL document used to resolve referenced definitions and sequences. + :return: A tuple of (extracts, activity), where extracts is a list of + ``(ComponentDefinition, Sequence)`` pairs covering the extracted part, + overhangs, and scar definitions, and activity is the updated ``assembly_activity``. + :raises TypeError: If the reactant has no recognised DNA type. + :raises ValueError: If the reactant does not have exactly one sequence, or if + the number of digest products is unsupported for the reactant topology. """ reactant_impl = reactant.plasmid_implementations[0] reactant_component_definition = reactant.plasmid_definition @@ -548,17 +579,27 @@ def backbone_digestion( assembly_activity: sbol2.Activity, document: sbol2.Document, ) -> Tuple[List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]]: - """Runs a simulated digestion on the top level sequence in the reactant ComponentDefinition or ModuleDefinition with the given restriciton enzymes, creating an open backbone ComponentDefinition, a digestion Interaction, and converts existing scars to 5' and 3' overhangs. - The product ComponentDefinition is assumed the open backbone in this case. - - Written for use with the SBOL2.3 output of https://sbolcanvas.org - - :param reactant: DNA to be digested as SBOL ComponentDefinition or ModuleDefinition, usually a part_in_backbone. ComponentDefinition is the best-practice type for plasmids. - :param restriction_enzymes: Restriction enzymes as :class:`sbol2.ComponentDefinition` - (generate with :func:`rebase_restriction_enzyme`). - :param assembly_plan: SBOL ModuleDefinition to contain the functional components, interactions, and participations - :param document: original SBOL2 document to be used to extract referenced objects. - :return: A tuple of a list ComponentDefinitions and Sequences, and an assembly plan ModuleDefinition. + """Simulate restriction digestion of a backbone plasmid and extract the linearised vector. + + Mirrors :func:`part_digestion` but targets the backbone: for a circular reactant + with 2 digest products the longer fragment is taken as the open backbone; for a + linear reactant with 3 products the outer prefix/suffix fragments are used. + The resulting open-backbone ``ComponentDefinition``, its 5' and 3' overhangs, and + any matched scar sequences are returned as SBOL objects. The reactant implementation + and each enzyme are recorded as usages on ``assembly_activity``. + + :param reactant: SBOL Implementation whose ``built`` URI resolves to the + backbone ``ComponentDefinition`` in ``document``. + :param restriction_enzymes: Restriction enzyme implementations; the corresponding + ``ComponentDefinition.name`` must match a PyDNA/ReBase enzyme name (e.g. ``'BsaI'``). + :param assembly_activity: SBOL Activity to record reactant and enzyme usages on. + :param document: Source SBOL document used to resolve referenced definitions and sequences. + :return: A tuple of (extracts, activity), where extracts is a list of + ``(ComponentDefinition, Sequence)`` pairs covering the open backbone, + overhangs, and scar definitions, and activity is the updated ``assembly_activity``. + :raises TypeError: If the reactant has no recognised DNA type. + :raises ValueError: If the reactant does not have exactly one sequence, or if + the number of digest products is unsupported for the reactant topology. """ reactant_component_definition = document.get(reactant.built) reactant_displayId = reactant_component_definition.displayId @@ -775,11 +816,14 @@ def ligation( ) -> List[sbol2.Implementation]: """Ligates Components using base complementarity and creates product Components and a ligation Interaction. - :param reactants: DNA parts to be ligated as SBOL ModuleDefinition. + :param reactants: Extracted part and backbone ``ComponentDefinition`` objects to ligate. :param assembly_activity: SBOL activity to track assembly inputs & outputs - :param document: SBOL2 document containing all reactant ComponentDefinitions. - :param ligase: as SBOL Implementation - :return: List of all composites generated, in the form of tuples of ComponentDefinition and Sequence. + :param composite_prefix: Prefix used when naming composite ``ComponentDefinition`` + and ``Implementation`` identities. + :param source_document: SBOL Document containing all reactant definitions. + :param final_document: SBOL Document that receives composite definitions and implementations. + :param ligase: SBOL Implementation of the ligase (e.g. T4). + :return: List of ``sbol2.Implementation`` objects, one per composite plasmid generated. """ enzyme_definition = source_document.get(ligase.built) From 230f2ba133ac65f79d28dfc0c849251337ee0d1f Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 28 Apr 2026 14:58:08 -0600 Subject: [PATCH 20/47] get referencing CD in extract toplevel definition --- src/buildcompiler/abstract_translator.py | 25 +++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/buildcompiler/abstract_translator.py b/src/buildcompiler/abstract_translator.py index 5765123..c4fc666 100644 --- a/src/buildcompiler/abstract_translator.py +++ b/src/buildcompiler/abstract_translator.py @@ -187,7 +187,30 @@ def extract_combinatorial_design_parts( def extract_toplevel_definition(doc: sbol2.Document) -> sbol2.ComponentDefinition: - return doc.componentDefinitions[0] + cds = list(doc.componentDefinitions) + + # identities of definitions used as subcomponents + used_defs = set() + + for cd in cds: + for comp in cd.components: + used_defs.add(comp.definition) + + # candidates = composite designs not used inside another design + candidates = [ + cd for cd in cds if len(cd.components) > 0 and cd.identity not in used_defs + ] + + if len(candidates) == 1: + return candidates[0] + + if len(candidates) == 0: + raise ValueError("No top-level composite ComponentDefinition found") + + raise ValueError( + f"Multiple top-level ComponentDefinitions found: " + f"{[c.displayId for c in candidates]}" + ) def enumerate_design_variants(component_dict): From de292b81394c5bd5659f654f175e4b4d15ec3268 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 28 Apr 2026 14:59:55 -0600 Subject: [PATCH 21/47] ironing out iterative lvl1 --- notebooks/build_compiler_test.ipynb | 316 ++++++++++------------------ src/buildcompiler/buildcompiler.py | 16 +- 2 files changed, 122 insertions(+), 210 deletions(-) diff --git a/notebooks/build_compiler_test.ipynb b/notebooks/build_compiler_test.ipynb index caea5e6..62a0cbe 100644 --- a/notebooks/build_compiler_test.ipynb +++ b/notebooks/build_compiler_test.ipynb @@ -12,15 +12,45 @@ "from buildcompiler.abstract_translator import extract_toplevel_definition" ] }, + { + "cell_type": "markdown", + "id": "238f2456", + "metadata": {}, + "source": [ + "## Multi-Design Lvl 1 Testing:" + ] + }, { "cell_type": "code", "execution_count": 2, "id": "e60a9c84", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['i6TQvNp91', 'i0mwvNcgH']\n" + ] + } + ], "source": [ - "design_doc = sbol2.Document()\n", - "design_doc.read(\"../tests/test_files/ExampleLvl2_design.xml\")" + "design_path_list = [\n", + " \"../tests/test_files/moclo_parts_circuit.xml\",\n", + " \"../tests/test_files/mocloparts116.xml\",\n", + "]\n", + "design_defs = []\n", + "sbol_doc = sbol2.Document()\n", + "\n", + "for design in design_path_list:\n", + " temp_doc = sbol2.Document()\n", + " temp_doc.read(design)\n", + "\n", + " design_defs.append(extract_toplevel_definition(temp_doc))\n", + "\n", + " # sbol_doc.read(\"../tests/test_files/ExampleLvl2_design.xml\")\n", + "\n", + "print([design_def.displayId for design_def in design_defs])" ] }, { @@ -34,196 +64,63 @@ "output_type": "stream", "text": [ "Indexing collection: https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\n", - "Indexing collection: https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\n" + "Indexing collection: https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\n", + "['http://rebase.neb.com/rebase/enz/BsaI.html'] ['http://identifiers.org/obi/OBI_0000732']\n", + "[] ['http://identifiers.org/ncit/NCIT:C16796']\n", + "['http://rebase.neb.com/rebase/enz/BbsI.html'] ['http://identifiers.org/obi/OBI_0000732']\n", + "['http://rebase.neb.com/rebase/enz/SapI.html'] ['http://identifiers.org/obi/OBI_0000732']\n" ] } ], "source": [ - "auth = \"1812840e-aa95-4588-9dc3-2a94e0bc1ed4\"\n", + "auth = \"ca97f26e-9d33-4e38-810d-04d99f36e47c\"\n", "collections = [\n", " \"https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\",\n", " \"https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\",\n", "]\n", - "buildcompiler = BuildCompiler(collections, \"https://synbiohub.org\", auth, None)" + "buildcompiler = BuildCompiler(collections, \"https://synbiohub.org\", auth, sbol_doc)" ] }, { "cell_type": "code", "execution_count": 4, - "id": "1712f5ce", + "id": "19c5d2ff", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://synbiohub.org/user/Gon/Enzyme_Implementations/BbsI_impl/1 https://synbiohub.org/user/Gon/Enzyme_Implementations/BsaI_impl/1 https://synbiohub.org/user/Gon/Enzyme_Implementations/T4_Ligase_impl/1\n" + ] + } + ], "source": [ - "from buildcompiler.plasmid import Plasmid\n", - "from typing import List\n", - "from buildcompiler.constants import LVL2_FUSION_SITE_ORDER, KAN, AMP\n", - "from buildcompiler.sbol2build import Assembly\n", - "import warnings\n", - "\n", - "\n", - "def _extract_lvl2_TUs( # TODO send to misc helper file instead of buildcompiler.py?\n", - " design_doc: sbol2.Document,\n", - ") -> List[sbol2.ComponentDefinition]:\n", - " \"\"\"\n", - " Returns the component definitions of each level-1 component (TU)\n", - " in the design.\n", - "\n", - " Args:\n", - " design: :class:`sbol2.Document` containing the design.\n", - "\n", - " Returns:\n", - " A list of TU component definitions in sequential order.\n", - " \"\"\"\n", - " top_design = extract_toplevel_definition(design_doc)\n", - "\n", - " return [\n", - " design_doc.get(comp.definition) for comp in top_design.getInSequentialOrder()\n", - " ]\n", - "\n", - "\n", - "def assembly_lvl2(\n", - " self_buildcompiler,\n", - " abstract_design_doc: sbol2.Document,\n", - " backbone: Plasmid = None,\n", - " product_name: str = None,\n", - ") -> list[sbol2.ComponentDefinition]:\n", - " \"\"\"Assemble level-2 plasmids for the full design.\n", - "\n", - " Uses the assembled lvl1 plasmids and the current design to assemble\n", - " lvl2 plasmids in the correct order.\n", - "\n", - " :returns: List of assembled lvl2 plasmids.\n", - " :rtype: list[Plasmid]\n", - " :raises LookupError: If compatible plasmids or backbones cannot be found.\n", - " \"\"\"\n", - " # get high level genes, send to assembly_lvl1\n", - " # send original abstract_design to get a new dictionary\n", - " # send new dictionary to _get_backbone or get_compatible plasmids with AMP\n", - " TUs = _extract_lvl2_TUs(abstract_design_doc)\n", - " lvl1_plasmids = []\n", - "\n", - " for i, TU in enumerate(TUs):\n", - " print(TU.displayId)\n", - "\n", - " # l1 backbone zselection\n", - " backbone_fusion_sites = LVL2_FUSION_SITE_ORDER[i]\n", - " backbone = next(\n", - " plasmid\n", - " for plasmid in self_buildcompiler.indexed_backbones\n", - " if plasmid.fusion_sites == backbone_fusion_sites\n", - " and plasmid.antibiotic_resistance == KAN\n", - " )\n", - "\n", - " print(backbone)\n", - "\n", - " # TODO insert check here to see if the TU exists already (#43). should not be too expensive, as long as we search only indexed_plasmids where AR=KAN\n", - " composite_plasmids, final_doc = self_buildcompiler.assembly_lvl1(\n", - " TU, backbone=backbone, product_name=f\"{TU.displayId}_plas\"\n", - " )\n", - "\n", - " simplified_representation, new_defs = self_buildcompiler._encapsulate_TU(\n", - " composite_plasmids[0]\n", - " )\n", - " final_doc.add_list(new_defs)\n", - " lvl1_plasmids.append(simplified_representation)\n", - " print(simplified_representation)\n", - "\n", - " final_doc.write(\"encap.xml\")\n", - "\n", - " # get l2 backbone\n", - " plasmid_dict = {}\n", - " for p in lvl1_plasmids:\n", - " key = p.plasmid_definition.displayId\n", - " plasmid_dict.setdefault(key, []).append(p)\n", - "\n", - " backbone, _ = self_buildcompiler._get_backbone(\n", - " plasmid_dict, antibiotic_resistance=AMP\n", - " )\n", - "\n", - " print(backbone)\n", - "\n", - " # BbsI for l2\n", - " if self_buildcompiler.BbsI_impl is None:\n", - " self_buildcompiler._create_RE_implementation(\"BbsI\")\n", - " warnings.warn(\n", - " \"BbsI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\",\n", - " RuntimeWarning,\n", - " )\n", - "\n", - " # TODO see about making these common enzymes (BsaI, BbSI, T4) global or class variables, so they only need to be searched for once\n", - " if self_buildcompiler.T4_ligase_impl is None:\n", - " self_buildcompiler._create_ligase_implementation()\n", - " warnings.warn(\n", - " \"No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.\",\n", - " RuntimeWarning,\n", - " )\n", - "\n", - " assembly = Assembly(\n", - " lvl1_plasmids,\n", - " backbone,\n", - " self_buildcompiler.BbsI_impl,\n", - " self_buildcompiler.T4_ligase_impl,\n", - " self_buildcompiler.sbol_doc,\n", - " final_doc,\n", - " product_name,\n", - " )\n", - "\n", - " lvl2_plasmids, final_doc = assembly.run() # TODO upload product_doc?\n", - " self_buildcompiler.indexed_plasmids.extend(lvl2_plasmids)\n", - "\n", - " return lvl2_plasmids, final_doc" + "print(buildcompiler.BbsI_impl, buildcompiler.BsaI_impl, buildcompiler.T4_ligase_impl)" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "3e2272ff", + "execution_count": null, + "id": "6ec9e2fe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Gen\n", - "Plasmid:\n", - " Name: DVK_AE_A_E\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_AE/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['A', 'E']\n", - " Antibiotic Resistance: Kanamycin\n", - "\n", "matched pJ23100_AB_A_B with DVK_AE_A_E on fusion site A!\n", - "matched pB0032_BC_B_C with pJ23100_AB_A_B on fusion site B!\n", - "matched pE0040_CD_C_D with pB0032_BC_B_C on fusion site C!\n", - "matched final component pB0015_DE_D_E with pE0040_CD_C_D and DVK_AE_A_E on fusion sites (D, E)!\n", - "Plasmid:\n", - " Name: Gen_plas_1_simple_A_E\n", - " Plasmid Definition: https://SBOL2Build.org/Gen_plas_1_simple/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://SBOL2Build.org/Gen_plas_1_impl/1']\n", - " Strain Implementations: [None]\n", - " Fusion Sites: ['A', 'E']\n", - " Antibiotic Resistance: Kanamycin\n", - "\n", - "Gen1\n", - "Plasmid:\n", - " Name: DVK_EF_E_F\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_EF/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_EF_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['E', 'F']\n", - " Antibiotic Resistance: Kanamycin\n", - "\n" + "matched pB0034_BC_B_C with pJ23100_AB_A_B on fusion site B!\n", + "matched pE0030_CD_C_D with pB0034_BC_B_C on fusion site C!\n", + "matched final component pB0015_DE_D_E with pE0030_CD_C_D and DVK_AE_A_E on fusion sites (D, E)!\n", + "Success with backbone: DVK_AE_A_E and plasmids: ['pJ23100_AB_A_B', 'pB0034_BC_B_C', 'pE0030_CD_C_D', 'pB0015_DE_D_E']\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/ryan/GitRepo/SBOL2Build/src/buildcompiler/buildcompiler.py:320: RuntimeWarning: BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\n", + "/Users/ryan/GitRepo/SBOL2Build/src/buildcompiler/buildcompiler.py:321: RuntimeWarning: BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\n", " warnings.warn(\n" ] }, @@ -231,58 +128,73 @@ "name": "stdout", "output_type": "stream", "text": [ - "matched pJ23116_EB_E_B with DVK_EF_E_F on fusion site E!\n", - "matched pB0033_BC_B_C with pJ23116_EB_E_B on fusion site B!\n", - "matched pE1010_CD_C_D with pB0033_BC_B_C on fusion site C!\n", - "matched final component pB0015_DF_D_F with pE1010_CD_C_D and DVK_EF_E_F on fusion sites (D, F)!\n", - "Plasmid:\n", - " Name: Gen1_plas_1_simple_E_F\n", - " Plasmid Definition: https://SBOL2Build.org/Gen1_plas_1_simple/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://SBOL2Build.org/Gen1_plas_1_impl/1']\n", - " Strain Implementations: [None]\n", - " Fusion Sites: ['E', 'F']\n", - " Antibiotic Resistance: Kanamycin\n", - "\n", - "matched Gen_plas_1_simple_A_E with DVA_AF_A_F on fusion site A!\n", - "matched final component Gen1_plas_1_simple_E_F with Gen_plas_1_simple_A_E and DVA_AF_A_F on fusion sites (E, F)!\n", - "Success with backbone: DVA_AF_A_F and plasmids: ['Gen_plas_1_simple_A_E', 'Gen1_plas_1_simple_E_F']\n", - "Plasmid:\n", - " Name: DVA_AF_A_F\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_AF/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVA_AF_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['A', 'F']\n", - " Antibiotic Resistance: Ampicillin\n", - "\n" + "matched pJ23116_AB_A_B with DVK_AE_A_E on fusion site A!\n", + "matched pB0034_BC_B_C with pJ23116_AB_A_B on fusion site B!\n", + "matched pE0030_CD_C_D with pB0034_BC_B_C on fusion site C!\n", + "matched final component pB0015_DE_D_E with pE0030_CD_C_D and DVK_AE_A_E on fusion sites (D, E)!\n" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/ss/w4r72t4j057bp2m46gq_kjwr0000gn/T/ipykernel_78396/3312876232.py:92: RuntimeWarning: BbsI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\n", - " warnings.warn(\n" - ] - }, + "data": { + "text/plain": [ + "({'https://sbolcanvas.org/i6TQvNp91/1': [Plasmid:\n", + " Name: i6TQvNp91_composite_1_A_E\n", + " Plasmid Definition: https://SBOL2Build.org/i6TQvNp91_composite_1/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/i6TQvNp91_composite_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['A', 'E']\n", + " Antibiotic Resistance: Kanamycin],\n", + " 'https://sbolcanvas.org/i0mwvNcgH/1': [Plasmid:\n", + " Name: i0mwvNcgH_composite_1_A_E\n", + " Plasmid Definition: https://SBOL2Build.org/i0mwvNcgH_composite_1/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/i0mwvNcgH_composite_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['A', 'E']\n", + " Antibiotic Resistance: Kanamycin]},\n", + " )" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "product_doc = sbol2.Document()\n", + "\n", + "buildcompiler.assembly_lvl1(design_defs, product_doc)" + ] + }, + { + "cell_type": "markdown", + "id": "00834b0c", + "metadata": {}, + "source": [ + "## LVL 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e2272ff", + "metadata": {}, + "outputs": [ { - "ename": "ValueError", - "evalue": "Not supported number of products. Found: 0", + "ename": "NameError", + "evalue": "name 'assembly_lvl2' is not defined", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mValueError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m composite_plasmids, final_doc = \u001b[43massembly_lvl2\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuildcompiler\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdesign_doc\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 115\u001b[39m, in \u001b[36massembly_lvl2\u001b[39m\u001b[34m(self_buildcompiler, abstract_design_doc, backbone, product_name)\u001b[39m\n\u001b[32m 100\u001b[39m warnings.warn(\n\u001b[32m 101\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mNo appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 102\u001b[39m \u001b[38;5;167;01mRuntimeWarning\u001b[39;00m,\n\u001b[32m 103\u001b[39m )\n\u001b[32m 105\u001b[39m assembly = Assembly(\n\u001b[32m 106\u001b[39m lvl1_plasmids,\n\u001b[32m 107\u001b[39m backbone,\n\u001b[32m (...)\u001b[39m\u001b[32m 112\u001b[39m product_name,\n\u001b[32m 113\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m115\u001b[39m lvl2_plasmids, final_doc = \u001b[43massembly\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# TODO upload product_doc?\u001b[39;00m\n\u001b[32m 116\u001b[39m self_buildcompiler.indexed_plasmids.extend(lvl2_plasmids)\n\u001b[32m 118\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m lvl2_plasmids, final_doc\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/GitRepo/SBOL2Build/src/buildcompiler/sbol2build.py:83\u001b[39m, in \u001b[36mAssembly.run\u001b[39m\u001b[34m(self, include_extracted_parts)\u001b[39m\n\u001b[32m 80\u001b[39m \u001b[38;5;28mself\u001b[39m.extracted_parts.append(extracts_tuple_list[\u001b[32m0\u001b[39m][\u001b[32m0\u001b[39m])\n\u001b[32m 82\u001b[39m backbone_impl = \u001b[38;5;28mself\u001b[39m.backbone.plasmid_implementations[\u001b[32m0\u001b[39m]\n\u001b[32m---> \u001b[39m\u001b[32m83\u001b[39m extracts_tuple_list, _ = \u001b[43mbackbone_digestion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 84\u001b[39m \u001b[43m \u001b[49m\u001b[43mbackbone_impl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 85\u001b[39m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mrestriction_enzyme\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 86\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43massembly_activity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 87\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msource_document\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 88\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 90\u001b[39m append_extracts_to_doc(extracts_tuple_list, \u001b[38;5;28mself\u001b[39m.source_document)\n\u001b[32m 91\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m include_extracted_parts:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/GitRepo/SBOL2Build/src/buildcompiler/sbol2build.py:626\u001b[39m, in \u001b[36mbackbone_digestion\u001b[39m\u001b[34m(reactant, restriction_enzymes, assembly_activity, document)\u001b[39m\n\u001b[32m 623\u001b[39m digested_reactant = ds_reactant.cut(restriction_enzymes_pydna)\n\u001b[32m 625\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(digested_reactant) < \u001b[32m2\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(digested_reactant) > \u001b[32m3\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m626\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 627\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mNot supported number of products. Found: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(digested_reactant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 628\u001b[39m )\n\u001b[32m 629\u001b[39m \u001b[38;5;66;03m# TODO select them based on content rather than size.\u001b[39;00m\n\u001b[32m 630\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m circular \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(digested_reactant) == \u001b[32m2\u001b[39m:\n", - "\u001b[31mValueError\u001b[39m: Not supported number of products. Found: 0" + "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m composite_plasmids, final_doc = \u001b[43massembly_lvl2\u001b[49m(buildcompiler, design_doc)\n", + "\u001b[31mNameError\u001b[39m: name 'assembly_lvl2' is not defined" ] } ], "source": [ - "composite_plasmids, final_doc = assembly_lvl2(buildcompiler, design_doc)" + "composite_plasmids, final_doc = buildcompiler.assembly_lvl2(None)" ] }, { diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index 7b2323e..b8e98e0 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -118,13 +118,13 @@ def _index_collections(self, collections: List[str]): elif sbol2.BIOPAX_PROTEIN in built_object.types: if RESTRICTION_ENZYME in built_object.roles: if ( - built_object.identity - == "http://rebase.neb.com/rebase/enz/BsaI.html" + "http://rebase.neb.com/rebase/enz/BsaI.html" + in built_object.wasDerivedFrom ): self.BsaI_impl = implementation elif ( - built_object.identity - == "http://rebase.neb.com/rebase/enz/BbsI.html" + "http://rebase.neb.com/rebase/enz/BbsI.html" + in built_object.wasDerivedFrom ): self.BbsI_impl = implementation elif LIGASE in built_object.roles: @@ -289,7 +289,7 @@ def assembly_lvl1( self, abstract_designs: List[sbol2.ComponentDefinition], final_doc: sbol2.Document = sbol2.Document(), - product_name: str = None, + product_name: str = "composite", backbone: Plasmid = None, ) -> Tuple[Dict, sbol2.Document]: """Assemble level-1 plasmids for each gene/transcriptional unit. @@ -337,16 +337,16 @@ def assembly_lvl1( self.T4_ligase_impl, self.sbol_doc, final_doc, - product_name, + f"{abstract_design.displayId}_{product_name}", ) - composite_plasmids, product_doc = assembly.run() # TODO upload product_doc? + composite_plasmids, final_doc = assembly.run() # TODO upload product_doc? self.indexed_plasmids.extend( composite_plasmids ) # see about using a wrapper function to do this, where it checks if the design already exists (like in index_collections). this way we avoid duplicate issues that might come with loading the abstract design definitions into the self.sbol_doc ahead of time assembly_dict[abstract_design.identity] = composite_plasmids - return assembly_dict, product_doc + return assembly_dict, final_doc # TODO: Create a SBOL representation of the assembly process, updating the SBOL Document. # Using he selected parts create the representation, you need Plasmids, BsaI and T4 Ligase. From a0f395c65c1baec617123ee1ea63364b063c0f37 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Wed, 29 Apr 2026 14:50:42 -0600 Subject: [PATCH 22/47] better doc search in get_or_pull --- src/buildcompiler/abstract_translator.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/buildcompiler/abstract_translator.py b/src/buildcompiler/abstract_translator.py index c4fc666..d82f7f4 100644 --- a/src/buildcompiler/abstract_translator.py +++ b/src/buildcompiler/abstract_translator.py @@ -140,9 +140,17 @@ def get_or_pull(doc, sbh, uri): Get an SBOL object from a Document. If missing, pull it from SynBioHub and retry. """ - if uri not in doc: + try: + return doc.get(uri) + + except Exception as e: + # Treat lookup failure as "not present" sbh.pull(uri, doc) - return doc.get(uri) + + try: + return doc.get(uri) + except Exception: + raise e def extract_combinatorial_design_parts( From fb76b9c4c1b48692c9359767ebc2206b385e4db5 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Wed, 29 Apr 2026 14:55:07 -0600 Subject: [PATCH 23/47] added combinatorial design support to lvl1 --- src/buildcompiler/buildcompiler.py | 177 +++++++++++++++++++++++------ 1 file changed, 141 insertions(+), 36 deletions(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index b8e98e0..c8b13b7 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -9,6 +9,7 @@ dna_componentdefinition_with_sequence, rebase_restriction_enzyme, ) +from sbol2build.abstract_translator import enumerate_design_variants from .abstract_translator import ( get_or_pull, get_compatible_plasmids, @@ -49,7 +50,7 @@ def __init__( collections: List[str], sbh_registry: str, auth_token: str, - sbol_doc: sbol2.Document, + sbol_doc: sbol2.Document = None, ): self.sbh = sbol2.PartShop(sbh_registry) self.sbh.key = auth_token @@ -287,7 +288,8 @@ def _remove_internal_bsai_sites(sequence: str) -> tuple[str, int]: def assembly_lvl1( self, - abstract_designs: List[sbol2.ComponentDefinition], + abstract_designs: List[sbol2.ComponentDefinition] + | sbol2.CombinatorialDerivation, final_doc: sbol2.Document = sbol2.Document(), product_name: str = "composite", backbone: Plasmid = None, @@ -303,48 +305,109 @@ def assembly_lvl1( """ assembly_dict = {} + if type(abstract_designs) is sbol2.CombinatorialDerivation: + abstract_design_def = self.sbol_doc.getComponentDefinition( + abstract_designs.masterTemplate + ) - for abstract_design in abstract_designs: - plasmid_dict = self._get_input_plasmids( - design=abstract_design, antibiotic_resistance=AMP + combinatorial_part_dict = self.extract_combinatorial_design_parts( + abstract_design_def, abstract_designs ) - if not backbone: - backbone, compatible_plasmids = self._get_backbone( - plasmid_dict, antibiotic_resistance=KAN - ) - else: - compatible_plasmids = get_compatible_plasmids(plasmid_dict, backbone) - - if self.BsaI_impl is None: - self._create_RE_implementation("BsaI") - warnings.warn( - "BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.", - RuntimeWarning, + enumerated_part_lists = enumerate_design_variants(combinatorial_part_dict) + + for i, list in enumerate(enumerated_part_lists): + plasmid_dict = self._construct_plasmid_dict(list, "Ampicillin") + + if not backbone: + backbone, compatible_plasmids = self._get_backbone( + plasmid_dict, antibiotic_resistance=KAN + ) + else: + compatible_plasmids = get_compatible_plasmids( + plasmid_dict, backbone + ) + + if self.BsaI_impl is None: + self._create_RE_implementation("BsaI") + warnings.warn( + "BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.", + RuntimeWarning, + ) + + if self.T4_ligase_impl is None: + self._create_ligase_implementation() + warnings.warn( + "No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.", + RuntimeWarning, + ) + + assembly = Assembly( + compatible_plasmids, + backbone, + self.BsaI_impl, + self.T4_ligase_impl, + self.sbol_doc, + final_doc, + f"{abstract_design_def.displayId}_{product_name}_comb{i}", ) + composite_plasmids, final_doc = ( + assembly.run() + ) # TODO upload product_doc? + + self.indexed_plasmids.extend( + composite_plasmids + ) # see about using a wrapper function to do this, where it checks if the design already exists (like in index_collections). this way we avoid duplicate issues that might come with loading the abstract design definitions into the self.sbol_doc ahead of time - if self.T4_ligase_impl is None: - self._create_ligase_implementation() - warnings.warn( - "No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.", - RuntimeWarning, + assembly_dict.setdefault(abstract_design_def.identity, []).extend( + composite_plasmids + ) + else: + for abstract_design in abstract_designs: + plasmid_dict = self._get_input_plasmids( + design=abstract_designs, antibiotic_resistance=AMP ) - assembly = Assembly( - compatible_plasmids, - backbone, - self.BsaI_impl, - self.T4_ligase_impl, - self.sbol_doc, - final_doc, - f"{abstract_design.displayId}_{product_name}", - ) - composite_plasmids, final_doc = assembly.run() # TODO upload product_doc? + if not backbone: + backbone, compatible_plasmids = self._get_backbone( + plasmid_dict, antibiotic_resistance=KAN + ) + else: + compatible_plasmids = get_compatible_plasmids( + plasmid_dict, backbone + ) + + if self.BsaI_impl is None: + self._create_RE_implementation("BsaI") + warnings.warn( + "BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.", + RuntimeWarning, + ) + + if self.T4_ligase_impl is None: + self._create_ligase_implementation() + warnings.warn( + "No appropriate ligase found in provided collection(s). Domestication of T4 Ligase via purchase will be added to protocol.", + RuntimeWarning, + ) + + assembly = Assembly( + compatible_plasmids, + backbone, + self.BsaI_impl, + self.T4_ligase_impl, + self.sbol_doc, + final_doc, + f"{abstract_design.displayId}_{product_name}", + ) + composite_plasmids, final_doc = ( + assembly.run() + ) # TODO upload product_doc? - self.indexed_plasmids.extend( - composite_plasmids - ) # see about using a wrapper function to do this, where it checks if the design already exists (like in index_collections). this way we avoid duplicate issues that might come with loading the abstract design definitions into the self.sbol_doc ahead of time - assembly_dict[abstract_design.identity] = composite_plasmids + self.indexed_plasmids.extend( + composite_plasmids + ) # see about using a wrapper function to do this, where it checks if the design already exists (like in index_collections). this way we avoid duplicate issues that might come with loading the abstract design definitions into the self.sbol_doc ahead of time + assembly_dict[abstract_design.identity] = composite_plasmids return assembly_dict, final_doc @@ -589,6 +652,48 @@ def _extract_design_parts( for component in component_list ] + def extract_combinatorial_design_parts( + self, + design: sbol2.ComponentDefinition, + derivation: sbol2.CombinatorialDerivation, + ) -> Dict[str, List[sbol2.ComponentDefinition]]: + """ + Extracts and returns a mapping of component definitions from a combinatorial design, in order. + Variants of combinatinatorial components are entered in a list corresponding to the URI of the component in the abstract design. + + Args: + design: + The top-level :class:`sbol2.ComponentDefinition` representing the + abstract design template whose components should be extracted in + sequential order. + + derivation: + The :class:`sbol2.CombinatorialDerivation` associated with ``design`` + that defines variable components and their allowed variants. + + Returns: + Dict[str, List[sbol2.ComponentDefinition]]: + A dictionary mapping component identities to lists + of variable component definitions. + + - Sequential design components map to lists containing a single definition. + - Combinatorial variable components map to lists of variant definitions. + """ + component_list = [c for c in design.getInSequentialOrder()] + component_dict = { + component.identity: [ + get_or_pull(self.sbol_doc, self.sbh, component.definition) + ] + for component in component_list + } + + for component in derivation.variableComponents: + component_dict[component.variable] = [ + self.sbol_doc.getComponentDefinition(var) for var in component.variants + ] + + return component_dict + def _get_abstract_design(self) -> sbol2.ComponentDefinition: for definition in self.sbol_doc.componentDefinitions: if ( From 79b6b3ac5a89874f045798d11d072aa66ac571fe Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Thu, 7 May 2026 12:49:43 -0600 Subject: [PATCH 24/47] new tests for digestion and ligation --- tests/test_core.py | 360 +++++++++++++++++++++++---------------------- 1 file changed, 183 insertions(+), 177 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 8953a4d..0618626 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -5,37 +5,90 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src"))) -from sbol2build import ( - golden_gate_assembly_plan, - rebase_restriction_enzyme, +from buildcompiler.constants import ( + CIRCULAR, + ENGINEERED_PLASMID, + FIVE_PRIME_OVERHANG, + PLASMID_VECTOR, + THREE_PRIME_OVERHANG, +) +from buildcompiler.sbol2build import ( + Assembly, backbone_digestion, part_digestion, ligation, ) +from buildcompiler.plasmid import Plasmid + + +class Test_Assembly_Functions(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.sbh = sbol2.PartShop("https://synbiohub.org") + + username = os.environ.get("SBH_USERNAME") + password = os.environ.get("SBH_PASSWORD") + + if not username or not password: + raise RuntimeError( + "Missing SBH_USERNAME and/or SBH_PASSWORD environment variables" + ) + + cls.sbh.login(username, password) + + cls.source_doc = sbol2.Document() + final_doc = sbol2.Document() + + cls.sbh.pull( + "https://synbiohub.org/user/Gon/CIDARMoCloParts/CIDARMoCloParts_collection/1", + cls.source_doc, + ) + cls.sbh.pull( + "https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/CIDARMoCloPlasmidsKit_collection/1", + cls.source_doc, + ) + cls.sbh.pull( + "https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", + cls.source_doc, + ) + cls.sbh.pull( + "https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1", + cls.source_doc, + ) -class Test_Core_Functions(unittest.TestCase): - def test_part_digestion(self): - doc = sbol2.Document() - doc.read("tests/test_files/pro_in_bb.xml") + cls.re_impl = cls.source_doc.get( + "https://synbiohub.org/user/Gon/Enzyme_Implementations/BsaI_impl/1" + ) + cls.ligase_impl = cls.source_doc.get( + "https://synbiohub.org/user/Gon/Enzyme_Implementations/T4_Ligase_impl/1" + ) + + cls.assembly = Assembly( + None, None, cls.re_impl, cls.ligase_impl, cls.source_doc, final_doc + ) - md = doc.getModuleDefinition("https://sbolcanvas.org/module1") - assembly_plan = sbol2.ModuleDefinition("assembly_plan") + def test_part_digestion(self): # TODO test activity relationships + impl = self.source_doc.get( + "https://synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1" + ) + definition = self.source_doc.get(impl.built) + plasmid = Plasmid(definition, None, [impl], None, self.source_doc) + assembly_activity = self.assembly.initialize_assembly_activity() - parts_list, assembly_plan = part_digestion( - md, [rebase_restriction_enzyme("BsaI")], assembly_plan, doc + parts_list, assembly_activity = part_digestion( + plasmid, [self.re_impl], assembly_activity, self.source_doc ) product_doc = sbol2.Document() - for extract, sequence in parts_list: + for extract, _ in parts_list: product_doc.add(extract) - product_doc.add(sequence) - product_doc.add(assembly_plan) + product_doc.add(assembly_activity) extract = parts_list[0][0] self.assertEqual( extract.roles, - ["https://identifiers.org/so/SO:0000915"], + ["http://identifiers.org/so/SO:0000915"], "Part digestion extracted part missing engineered insert role", ) # engineered insert role self.assertTrue( @@ -63,72 +116,36 @@ def test_part_digestion(self): ) else: self.assertTrue( - comp_def.identity in doc.componentDefinitions, + comp_def.identity in self.source_doc.componentDefinitions, "Digested part missing reference to part from original document", ) # check that old part has been transcribed to new doc, in extracted part - # check that wasderivedfroms match, assembly plan records all interactions, - contains_restriction, contains_reactant, contains_product = False, False, False - for participation in assembly_plan.interactions[0].participations: - if participation.displayId == "restriction": - self.assertTrue( - "http://identifiers.org/biomodels.sbo/SBO:0000019" - in participation.roles, - "Restriction participation missing 'modifier' role", - ) - contains_restriction = True - elif "reactant" in participation.displayId: - self.assertTrue( - "http://identifiers.org/biomodels.sbo/SBO:0000010" - in participation.roles, - "Restriction reactant participation missing 'reactant' role", - ) - contains_reactant = True - elif "product" in participation.displayId: - self.assertTrue( - "http://identifiers.org/biomodels.sbo/SBO:0000011" - in participation.roles, - "Restriction product participation missing 'product' role", - ) - contains_product = True - - self.assertTrue( - contains_product, "Digestion Assembly plan missing product participation" - ) - self.assertTrue( - contains_reactant, "Digestion Assembly plan missing reactant participation" - ) - self.assertTrue( - contains_restriction, - "Digestion Assembly plan missing restriction participation", - ) - sbol_validation_result = product_doc.validate() self.assertEqual( sbol_validation_result, "Valid.", "Part Digestion SBOL validation failed" ) def test_backbone_digestion(self): - doc = sbol2.Document() - doc.read("tests/test_files/backbone.xml") - - md = doc.getModuleDefinition("https://sbolcanvas.org/module1") - assembly_plan = sbol2.ModuleDefinition("assembly_plan") + impl = self.source_doc.get( + "https://synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1" + ) + definition = self.source_doc.get(impl.built) + plasmid = Plasmid(definition, None, [impl], None, self.source_doc) + assembly_activity = self.assembly.initialize_assembly_activity() - parts_list, assembly_plan = backbone_digestion( - md, [rebase_restriction_enzyme("BsaI")], assembly_plan, doc + parts_list, assembly_activity = backbone_digestion( + plasmid, [self.re_impl], assembly_activity, self.source_doc ) product_doc = sbol2.Document() - for extract, sequence in parts_list: + for extract, _ in parts_list: product_doc.add(extract) - product_doc.add(sequence) - product_doc.add(assembly_plan) + product_doc.add(assembly_activity) extract = parts_list[0][0] self.assertEqual( extract.roles, - ["https://identifiers.org/so/SO:0000755"], + [PLASMID_VECTOR], "Backbone digestion extracted part missing plasmid vector role", ) # plasmid vector @@ -141,57 +158,21 @@ def test_backbone_digestion(self): if "three_prime_oh" in comp_obj.displayId: self.assertEqual( comp_def.roles, - ["http://identifiers.org/so/SO:0001933"], + [THREE_PRIME_OVERHANG], "Part digestion missing 3 prime role", ) elif "five_prime_oh" in comp_obj.displayId: self.assertEqual( comp_def.roles, - ["http://identifiers.org/so/SO:0001932"], + [FIVE_PRIME_OVERHANG], "Part digestion missing 5 prime role", ) else: self.assertTrue( - comp_def.identity in doc.componentDefinitions, + comp_def.identity in self.source_doc.componentDefinitions, "Digested part missing reference to part from original document", ) # check that old part has been transcribed to new doc, in extracted part - # check that wasderivedfroms match, assembly plan records all interactions, - contains_restriction, contains_reactant, contains_product = False, False, False - for participation in assembly_plan.interactions[0].participations: - if participation.displayId == "restriction": - self.assertTrue( - "http://identifiers.org/biomodels.sbo/SBO:0000019" - in participation.roles, - "Restriction participation missing 'modifier' role", - ) - contains_restriction = True - elif "reactant" in participation.displayId: - self.assertTrue( - "http://identifiers.org/biomodels.sbo/SBO:0000010" - in participation.roles, - "Restriction reactant participation missing 'reactant' role", - ) - contains_reactant = True - elif "product" in participation.displayId: - self.assertTrue( - "http://identifiers.org/biomodels.sbo/SBO:0000011" - in participation.roles, - "Restriction product participation missing 'product' role", - ) - contains_product = True - - self.assertTrue( - contains_product, "Digestion Assembly plan missing product participation" - ) - self.assertTrue( - contains_reactant, "Digestion Assembly plan missing reactant participation" - ) - self.assertTrue( - contains_restriction, - "Digestion Assembly plan missing restriction participation", - ) - sbol_validation_result = product_doc.validate() self.assertEqual( sbol_validation_result, @@ -201,21 +182,29 @@ def test_backbone_digestion(self): def test_ligation(self): ligation_doc = sbol2.Document() - temp_doc = sbol2.Document() reactants_list = [] - assembly_plan = sbol2.ModuleDefinition("assembly_plan") + assembly_activity = self.assembly.initialize_assembly_activity() parts = [ - "tests/test_files/pro_in_bb.xml", - "tests/test_files/rbs_in_bb.xml", - "tests/test_files/cds_in_bb.xml", - "tests/test_files/terminator_in_bb.xml", + self.source_doc.get( + "https://synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1" + ), + self.source_doc.get( + "https://synbiohub.org/user/Gon/impl_test/pB0034_BC_impl/1" + ), + self.source_doc.get( + "https://synbiohub.org/user/Gon/impl_test/pE0030_CD_impl/1" + ), + self.source_doc.get( + "https://synbiohub.org/user/Gon/impl_test/pB0015_DE_impl/1" + ), ] - for i, part in enumerate(parts): - temp_doc.read(part) - md = temp_doc.getModuleDefinition("https://sbolcanvas.org/module1") - extracts_tuple_list, assembly_plan = part_digestion( - md, [rebase_restriction_enzyme("BsaI")], assembly_plan, temp_doc + for i, impl in enumerate(parts): + definition = self.source_doc.get(impl.built) + plasmid = Plasmid(definition, None, [impl], None, self.source_doc) + + extracts_tuple_list, assembly_activity = part_digestion( + plasmid, [self.re_impl], assembly_activity, self.source_doc ) for extract, sequence in extracts_tuple_list: @@ -230,11 +219,22 @@ def test_ligation(self): reactants_list.append(extracts_tuple_list[0][0]) - temp_doc.read("tests/test_files/backbone.xml") + backbone_impl = self.source_doc.get( + "https://synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1" + ) + # run digestion, extract component + sequence, add to ligation_doc, reactants_list - md = temp_doc.getModuleDefinition("https://sbolcanvas.org/module1") - extracts_tuple_list, assembly_plan = backbone_digestion( - md, [rebase_restriction_enzyme("BsaI")], assembly_plan, temp_doc + definition = self.source_doc.get(backbone_impl.built) + + self.sbh.pull( + "https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_AE/1", + self.source_doc, + ) + + plasmid = Plasmid(definition, None, [backbone_impl], None, self.source_doc) + + extracts_tuple_list, assembly_activity = backbone_digestion( + plasmid, [self.re_impl], assembly_activity, self.source_doc ) for extract, seq in extracts_tuple_list: try: @@ -248,91 +248,97 @@ def test_ligation(self): else: print(e) - ligation_doc.add(assembly_plan) + ligation_doc.add(assembly_activity) reactants_list.append(extracts_tuple_list[0][0]) - ligation_doc.add(rebase_restriction_enzyme("BsaI")) + ligation_doc.add_list([self.re_impl, self.ligase_impl]) + self.sbh.pull(self.ligase_impl.built, ligation_doc) - pl = ligation(reactants_list, assembly_plan, ligation_doc) + final_doc = sbol2.Document() - for p in pl: - for obj in p: - ligation_doc.add(obj) + composite_impls = ligation( + reactants_list, + assembly_activity, + "test", + ligation_doc, + final_doc, + self.ligase_impl, + ) - if type(obj) is sbol2.ComponentDefinition: - self.assertTrue( - "http://identifiers.org/so/SO:0000988" in obj.types, - "Ligation product missing circular DNA type", - ) - self.assertTrue( - "http://www.biopax.org/release/biopax-level3.owl#Dna" - in obj.types, - "Ligation product missing DNA Molecule type", - ) - self.assertTrue( - "http://identifiers.org/so/SO:0000804" in obj.roles, - "Ligation product missing engineered region role", - ) + for i in composite_impls: + obj = final_doc.get(i.built) + + if type(obj) is sbol2.ComponentDefinition: + self.assertTrue( + CIRCULAR in obj.types, + "Ligation product missing circular DNA type", + ) + self.assertTrue( + "http://www.biopax.org/release/biopax-level3.owl#Dna" in obj.types, + "Ligation product missing DNA Molecule type", + ) + self.assertTrue( + ENGINEERED_PLASMID in obj.roles, + "Ligation product missing engineered plasmid role", + ) - locations = [] + locations = [] - for anno in obj.sequenceAnnotations: - for location in anno.locations: - locations.append( - (anno.identity, location.start, location.end) - ) + for anno in obj.sequenceAnnotations: + for location in anno.locations: + locations.append((anno.identity, location.start, location.end)) - locations.sort(key=lambda x: x[1]) + locations.sort(key=lambda x: x[1]) - for i in range(len(locations) - 1): - current_end = locations[i][2] - next_start = locations[i + 1][1] + for i in range(len(locations) - 1): + current_end = locations[i][2] + next_start = locations[i + 1][1] - self.assertEqual( - current_end + 1, - next_start, - f"Mismatch in continuity: {locations[i][0]} ends at {current_end}, " - f"but {locations[i + 1][0]} starts at {next_start}", - ) + self.assertEqual( + current_end + 1, + next_start, + f"Mismatch in continuity: {locations[i][0]} ends at {current_end}, " + f"but {locations[i + 1][0]} starts at {next_start}", + ) - sbol_validation_result = ligation_doc.validate() + sbol_validation_result = final_doc.validate() self.assertEqual( sbol_validation_result, "Valid.", "Ligation SBOL validation failed" ) - def test_golden_gate(self): - pro_doc = sbol2.Document() - pro_doc.read("tests/test_files/pro_in_bb.xml") + # def test_golden_gate(self): + # pro_doc = sbol2.Document() + # pro_doc.read("tests/test_files/pro_in_bb.xml") - rbs_doc = sbol2.Document() - rbs_doc.read("tests/test_files/rbs_in_bb.xml") + # rbs_doc = sbol2.Document() + # rbs_doc.read("tests/test_files/rbs_in_bb.xml") - cds_doc = sbol2.Document() - cds_doc.read("tests/test_files/cds_in_bb.xml") + # cds_doc = sbol2.Document() + # cds_doc.read("tests/test_files/cds_in_bb.xml") - ter_doc = sbol2.Document() - ter_doc.read("tests/test_files/terminator_in_bb.xml") + # ter_doc = sbol2.Document() + # ter_doc.read("tests/test_files/terminator_in_bb.xml") - bb_doc = sbol2.Document() - bb_doc.read("tests/test_files/backbone.xml") + # bb_doc = sbol2.Document() + # bb_doc.read("tests/test_files/backbone.xml") - part_docs = [pro_doc, rbs_doc, cds_doc, ter_doc] + # part_docs = [pro_doc, rbs_doc, cds_doc, ter_doc] - assembly_doc = sbol2.Document() - assembly_obj = golden_gate_assembly_plan( - "testassem", part_docs, bb_doc, "BsaI", assembly_doc - ) + # assembly_doc = sbol2.Document() + # assembly_obj = golden_gate_assembly_plan( + # "testassem", part_docs, bb_doc, "BsaI", assembly_doc + # ) - composites = assembly_obj.run(plasmids_in_module_definitions=True) + # composites = assembly_obj.run(plasmids_in_module_definitions=True) - self.assertEqual(len(composites), 1) + # self.assertEqual(len(composites), 1) - assembly_doc.write("validation_assembly.xml") + # assembly_doc.write("validation_assembly.xml") - sbol_validation_result = assembly_doc.validate() - self.assertEqual( - sbol_validation_result, "Valid.", "Assembly SBOL validation failed" - ) + # sbol_validation_result = assembly_doc.validate() + # self.assertEqual( + # sbol_validation_result, "Valid.", "Assembly SBOL validation failed" + # ) if __name__ == "__main__": From e62294131eeb8ce9e76c2ac8352132b4f7296954 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Thu, 7 May 2026 15:32:47 -0600 Subject: [PATCH 25/47] combinatorial test --- notebooks/comb_design.ipynb | 567 ++++++++++++++++-------------------- 1 file changed, 255 insertions(+), 312 deletions(-) diff --git a/notebooks/comb_design.ipynb b/notebooks/comb_design.ipynb index ead09ff..493be94 100644 --- a/notebooks/comb_design.ipynb +++ b/notebooks/comb_design.ipynb @@ -8,8 +8,7 @@ "outputs": [], "source": [ "import sbol2\n", - "from typing import Dict, List\n", - "from sbol2build.abstract_translator import extract_design_parts" + "from buildcompiler.buildcompiler import BuildCompiler" ] }, { @@ -22,386 +21,330 @@ "name": "stdout", "output_type": "stream", "text": [ - "Design........................0\n", - "Build.........................0\n", - "Test..........................0\n", - "Analysis......................0\n", - "ComponentDefinition...........5\n", - "ModuleDefinition..............0\n", - "Model.........................0\n", - "Sequence......................4\n", - "Collection....................0\n", - "Activity......................0\n", - "Plan..........................0\n", - "Agent.........................0\n", - "Attachment....................0\n", - "CombinatorialDerivation.......1\n", - "Implementation................0\n", - "SampleRoster..................0\n", - "Experiment....................0\n", - "ExperimentalData..............0\n", - "Annotation Objects............5\n", - "---\n", - "Total: .........................15\n", - "\n" + "\n" ] } ], "source": [ "abstract_doc = sbol2.Document()\n", "abstract_doc.read(\"tests/test_files/combinatorial_1.xml\")\n", - "print(abstract_doc)" + "comb_design = abstract_doc.combinatorialderivations[0]\n", + "print(type(comb_design))" ] }, { "cell_type": "code", "execution_count": 3, - "id": "f0fb25d5", + "id": "39f37870", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "https://sbolcanvas.org/abstract_combinatorial/1\n" + "Indexing collection: https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\n", + "Indexing collection: https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\n" ] } ], "source": [ - "toplevel = abstract_doc.componentDefinitions[\n", - " 2\n", - "] # TODO develop approach to extract toplevel definition in non MD documents; maybe check for annotations or subcomponents\n", - "print(toplevel)" + "auth = \"47c58a25-78d5-4aee-b74b-9cd2e25da55e\"\n", + "collections = [\n", + " \"https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\",\n", + " \"https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\",\n", + "]\n", + "buildcompiler = BuildCompiler(collections, \"https://synbiohub.org\", auth, abstract_doc)" ] }, { "cell_type": "code", "execution_count": 4, - "id": "c36c9c45", + "id": "c3bbf89a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "matched pJ23100_AB_A_B with DVK_AE_A_E on fusion site A!\n", + "matched pB0033_BC_B_C with pJ23100_AB_A_B on fusion site B!\n", + "matched pE0040_CD_C_D with pB0033_BC_B_C on fusion site C!\n", + "matched final component pB0015_DE_D_E with pE0040_CD_C_D and DVK_AE_A_E on fusion sites (D, E)!\n", + "Success with backbone: DVK_AE_A_E and plasmids: ['pJ23100_AB_A_B', 'pB0033_BC_B_C', 'pE0040_CD_C_D', 'pB0015_DE_D_E']\n", + "matched pJ23100_AB_A_B with DVK_AE_A_E on fusion site A!\n", + "matched pB0032_BC_B_C with pJ23100_AB_A_B on fusion site B!\n", + "matched pE0040_CD_C_D with pB0032_BC_B_C on fusion site C!\n", + "matched final component pB0015_DE_D_E with pE0040_CD_C_D and DVK_AE_A_E on fusion sites (D, E)!\n" + ] + } + ], "source": [ - "parts = extract_design_parts(toplevel, abstract_doc)" + "list = buildcompiler.assembly_lvl1(comb_design)" ] }, { "cell_type": "code", "execution_count": 5, - "id": "bf3932d9", + "id": "3d70b565", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1\n", - "https://sbolcanvas.org/RBS_HFQr/1\n", - "https://synbiohub.org/user/Gon/CIDARMoCloParts/E0040m_gfp/1\n", - "https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1\n" + "({'https://sbolcanvas.org/abstract_combinatorial/1': [Plasmid:\n", + " Name: abstract_combinatorial_composite_comb0_1_A_E\n", + " Plasmid Definition: https://SBOL2Build.org/abstract_combinatorial_composite_comb0_1/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/abstract_combinatorial_composite_comb0_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['A', 'E']\n", + " Antibiotic Resistance: Kanamycin\n", + ", Plasmid:\n", + " Name: abstract_combinatorial_composite_comb1_1_A_E\n", + " Plasmid Definition: https://SBOL2Build.org/abstract_combinatorial_composite_comb1_1/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/abstract_combinatorial_composite_comb1_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['A', 'E']\n", + " Antibiotic Resistance: Kanamycin\n", + "]}, )\n" ] } ], "source": [ - "for part in parts:\n", - " print(part)" + "print(list)" ] }, { "cell_type": "code", "execution_count": 6, - "id": "ecd909f4", - "metadata": {}, - "outputs": [], - "source": [ - "def extract_combinatorial_design_parts(\n", - " design: sbol2.ComponentDefinition, doc: sbol2.Document, plasmid_doc\n", - ") -> Dict[str, List[sbol2.ComponentDefinition]]:\n", - " \"\"\"\n", - " Extracts and returns a mapping of component definitions from a combinatorial design, in order.\n", - "\n", - " Retrieves the components in sequential order from the given design.\n", - " Variants of combinatinatorial components are entered in a list corresponding to the URI of the component in the abstract design.\n", - "\n", - " Args:\n", - " design:\n", - " The :class:`sbol2.ComponentDefinition` representing the top-level design\n", - " from which to extract parts.\n", - " doc:\n", - " The primary :class:`sbol2.Document` containing the base component definitions\n", - " and combinatorial derivations.\n", - " plasmid_doc:\n", - " An additional :class:`sbol2.Document` used to resolve component variants\n", - " (plasmid-specific variants referenced by combinatorial derivations).\n", - "\n", - " Returns:\n", - " Dict[str, List[sbol2.ComponentDefinition]]:\n", - " A dictionary mapping component identities to lists\n", - " of corresponding component definitions.\n", - "\n", - " - Sequential design components map to lists containing a single definition.\n", - " - Combinatorial variable components map to lists of variant definitions.\n", - " \"\"\"\n", - " component_list = [c for c in design.getInSequentialOrder()]\n", - " component_dict = {\n", - " component.identity: [doc.getComponentDefinition(component.definition)]\n", - " for component in component_list\n", - " }\n", - "\n", - " for deriv in doc.combinatorialderivations:\n", - " for component in deriv.variableComponents:\n", - " component_dict[component.variable] = [\n", - " plasmid_doc.getComponentDefinition(var) for var in component.variants\n", - " ]\n", - "\n", - " return component_dict" - ] - }, - { - "cell_type": "markdown", - "id": "0ee4be8f", - "metadata": {}, - "source": [ - "### Problem: variant definitions are not included in sbolCanvas abstract combinatorial design document, only references\n", - "#### Solution: references can be resolved by pulling from plasmid collection" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "4fef4173", - "metadata": {}, - "outputs": [], - "source": [ - "plasmid_collection = sbol2.Document()\n", - "sbh = sbol2.PartShop(\"https://synbiohub.org\")\n", - "sbh.pull(\n", - " \"https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/CIDARMoCloPlasmidsKit_collection/1/b7fdc21c6601a61d3166073a9e50f2c3843e1df5/share\",\n", - " plasmid_collection,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "5d87a7fd", + "id": "99b474f2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "https://sbolcanvas.org/abstract_combinatorial/J23100_1/1:\n", - "J23100\n", - "https://sbolcanvas.org/abstract_combinatorial/RBS_HFQr_2/1:\n", - "B0032\n", - "B0033\n", - "https://sbolcanvas.org/abstract_combinatorial/E0040m_gfp_3/1:\n", - "E0040m_gfp\n", - "https://sbolcanvas.org/abstract_combinatorial/B0015_4/1:\n", - "B0015\n" + "[Plasmid:\n", + " Name: pE0040_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0040_CD/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pE0040_CD_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['C', 'D']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0034_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0034_BC/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0034_BC_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['B', 'C']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_EB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_EB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_AB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0033_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0033_BC/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0033_BC_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['B', 'C']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_FB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_EB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_EB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_GB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_GB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pE0030_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0030_CD/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pE0030_CD_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['C', 'D']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_FB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_AB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_AB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0032_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0032_BC/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0032_BC_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['B', 'C']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DF_D_F\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DF/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DF_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'F']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DG_D_G\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DG/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DG_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'G']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DH_D_H\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DH/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DH_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'H']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_AB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_AB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_GB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_GB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_GB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_GB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pE1010_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE1010_CD/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pE1010_CD_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['C', 'D']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DE_D_E\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DE/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DE_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'E']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_FB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_EB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_EB_impl/1']\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: abstract_combinatorial_composite_comb0_1_A_E\n", + " Plasmid Definition: https://SBOL2Build.org/abstract_combinatorial_composite_comb0_1/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/abstract_combinatorial_composite_comb0_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['A', 'E']\n", + " Antibiotic Resistance: Kanamycin\n", + ", Plasmid:\n", + " Name: abstract_combinatorial_composite_comb1_1_A_E\n", + " Plasmid Definition: https://SBOL2Build.org/abstract_combinatorial_composite_comb1_1/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['https://SBOL2Build.org/abstract_combinatorial_composite_comb1_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['A', 'E']\n", + " Antibiotic Resistance: Kanamycin\n", + "]\n" ] } ], "source": [ - "dict = extract_combinatorial_design_parts(toplevel, abstract_doc, plasmid_collection)\n", - "\n", - "for key, value in dict.items():\n", - " print(f\"{key}:\")\n", - " for element in value:\n", - " print(element.displayId)" + "print(buildcompiler.indexed_plasmids)" ] }, { - "cell_type": "code", - "execution_count": 9, - "id": "59be7c31", - "metadata": {}, - "outputs": [], - "source": [ - "import itertools\n", - "\n", - "\n", - "def enumerate_design_variants(component_dict):\n", - " \"\"\"\n", - " Given a dict mapping variable component identities to lists of ComponentDefinitions,\n", - " generate all possible design combinations as lists of ComponentDefinitions\n", - " (in consistent order of keys).\n", - " \"\"\"\n", - " keys = list(component_dict.keys())\n", - " variant_lists = [component_dict[k] for k in keys]\n", - "\n", - " # Cartesian product across all variant lists\n", - " all_variants = list(itertools.product(*variant_lists))\n", - "\n", - " # Convert tuples to lists (optional) for clarity\n", - " all_variants = [list(combo) for combo in all_variants]\n", - "\n", - " return all_variants" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f2c774c9", - "metadata": {}, - "outputs": [], - "source": [ - "from sbol2build.abstract_translator import (\n", - " construct_plasmid_dict,\n", - " extract_toplevel_definition,\n", - " get_compatible_plasmids,\n", - " MocloPlasmid,\n", - ")\n", - "\n", - "enumerated = enumerate_design_variants(dict)\n", - "\n", - "plasmid_dicts = []\n", - "\n", - "# for design in enumerated:\n", - "# plasmid_dicts.append(construct_plasmid_dict(design, plasmid_collection))" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "0470f3f9", + "cell_type": "markdown", + "id": "0ee4be8f", "metadata": {}, - "outputs": [], "source": [ - "def translate_abstract_to_plasmids(\n", - " abstract_design_doc: sbol2.Document,\n", - " plasmid_collection: sbol2.Document,\n", - " backbone_doc: sbol2.Document,\n", - "):\n", - " backbone_def = extract_toplevel_definition(backbone_doc)\n", - " backbone_plasmid = MocloPlasmid(backbone_def.displayId, backbone_def, backbone_doc)\n", - "\n", - " # combinatorial design\n", - " if len(abstract_design_doc.combinatorialderivations) > 0:\n", - " abstract_design_def = abstract_design_doc.getComponentDefinition(\n", - " abstract_design_doc.combinatorialderivations[0].masterTemplate\n", - " )\n", - "\n", - " combinatorial_part_dict = extract_combinatorial_design_parts(\n", - " abstract_design_def, abstract_design_doc, plasmid_collection\n", - " )\n", - " enumerated_part_list = enumerate_design_variants(combinatorial_part_dict)\n", - "\n", - " final_plasmid_list = []\n", - "\n", - " for design in enumerated_part_list:\n", - " plasmid_dict = construct_plasmid_dict(design, plasmid_collection)\n", - " final_plasmid_list += get_compatible_plasmids(\n", - " plasmid_dict, backbone_plasmid\n", - " )\n", - "\n", - " return set(final_plasmid_list)\n", - "\n", - " # generic design\n", - " else:\n", - " abstract_design_def = extract_toplevel_definition(abstract_design_doc)\n", - "\n", - " ordered_part_definitions = extract_design_parts(\n", - " abstract_design_def, abstract_design_doc\n", - " )\n", - "\n", - " plasmid_dict = construct_plasmid_dict(\n", - " ordered_part_definitions, plasmid_collection\n", - " )\n", - "\n", - " return get_compatible_plasmids(plasmid_dict, backbone_plasmid)" + "### Problem: variant definitions are not included in sbolCanvas abstract combinatorial design document, only references\n", + "#### Solution: references can be resolved by pulling from plasmid collection" ] }, { - "cell_type": "code", - "execution_count": 19, - "id": "97ee6712", + "cell_type": "markdown", + "id": "b592c69c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_AB/1 with ['Fusion_Site_B', 'Fusion_Site_A']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_GB/1 with ['Fusion_Site_B', 'Fusion_Site_G']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_FB/1 with ['Fusion_Site_F', 'Fusion_Site_B']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_EB/1 with ['Fusion_Site_B', 'Fusion_Site_E']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/B0032/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0032_BC/1 with ['Fusion_Site_C', 'Fusion_Site_B']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/E0040m_gfp/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0040_CD/1 with ['Fusion_Site_D', 'Fusion_Site_C']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DG/1 with ['Fusion_Site_G', 'Fusion_Site_D']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DF/1 with ['Fusion_Site_F', 'Fusion_Site_D']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DE/1 with ['Fusion_Site_D', 'Fusion_Site_E']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DH/1 with ['Fusion_Site_D', 'Fusion_Site_H']\n", - "matched J23100_A_B with DVK_AE_A_E on fusion site A!\n", - "matched B0032_B_C with J23100_A_B on fusion site B!\n", - "matched E0040m_gfp_C_D with B0032_B_C on fusion site C!\n", - "matched final component B0015_D_E with E0040m_gfp_C_D and DVK_AE_A_E on fusion sites (D, E)!\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_AB/1 with ['Fusion_Site_B', 'Fusion_Site_A']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_GB/1 with ['Fusion_Site_B', 'Fusion_Site_G']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_FB/1 with ['Fusion_Site_F', 'Fusion_Site_B']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/J23100/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_EB/1 with ['Fusion_Site_B', 'Fusion_Site_E']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/B0033/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0033_BC/1 with ['Fusion_Site_B', 'Fusion_Site_C']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/E0040m_gfp/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0040_CD/1 with ['Fusion_Site_D', 'Fusion_Site_C']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DG/1 with ['Fusion_Site_G', 'Fusion_Site_D']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DF/1 with ['Fusion_Site_F', 'Fusion_Site_D']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DE/1 with ['Fusion_Site_D', 'Fusion_Site_E']\n", - "found: https://synbiohub.org/user/Gon/CIDARMoCloParts/B0015/1 in https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DH/1 with ['Fusion_Site_D', 'Fusion_Site_H']\n", - "matched J23100_A_B with DVK_AE_A_E on fusion site A!\n", - "matched B0033_B_C with J23100_A_B on fusion site B!\n", - "matched E0040m_gfp_C_D with B0033_B_C on fusion site C!\n", - "matched final component B0015_D_E with E0040m_gfp_C_D and DVK_AE_A_E on fusion sites (D, E)!\n" - ] - }, - { - "data": { - "text/plain": [ - "{MocloPlasmid:\n", - " Name: B0015_D_E\n", - " Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DE/1\n", - " Fusion Sites: ['D', 'E'],\n", - " MocloPlasmid:\n", - " Name: B0032_B_C\n", - " Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0032_BC/1\n", - " Fusion Sites: ['B', 'C'],\n", - " MocloPlasmid:\n", - " Name: B0033_B_C\n", - " Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0033_BC/1\n", - " Fusion Sites: ['B', 'C'],\n", - " MocloPlasmid:\n", - " Name: E0040m_gfp_C_D\n", - " Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0040_CD/1\n", - " Fusion Sites: ['C', 'D'],\n", - " MocloPlasmid:\n", - " Name: J23100_A_B\n", - " Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_AB/1\n", - " Fusion Sites: ['A', 'B']}" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bb_doc = sbol2.Document()\n", - "sbh.pull(\n", - " \"https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_AE/1/647c5b2458567dcce6b0a37178d352b8ffa9a7fe/share\",\n", - " bb_doc,\n", - ")\n", - "\n", - "translate_abstract_to_plasmids(\n", - " abstract_design_doc=abstract_doc,\n", - " plasmid_collection=plasmid_collection,\n", - " backbone_doc=bb_doc,\n", - ")" - ] + "source": [] }, { - "cell_type": "code", - "execution_count": null, - "id": "06f2668c", + "cell_type": "markdown", + "id": "42a819eb", "metadata": {}, - "outputs": [], "source": [] } ], From 4e704fcaf34a9f2e8c17acee42d726dad35a94a0 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Fri, 8 May 2026 11:45:25 -0600 Subject: [PATCH 26/47] type and error handling adjustments --- src/buildcompiler/sbol2build.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index 1de23c9..4ad1312 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -20,7 +20,7 @@ THREE_PRIME_OVERHANG, ) -sbol2.Config.setHomespace("https://SBOL2Build.org") +sbol2.Config.setHomespace("http://buildcompiler.org") sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_COMPLIANT_URIS, True) sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, False) @@ -99,9 +99,8 @@ def run( append_extracts_to_doc(extracts_tuple_list, self.final_document) self.extracted_parts.append(extracts_tuple_list[0][0]) - backbone_impl = self.backbone.plasmid_implementations[0] extracts_tuple_list, _ = backbone_digestion( - backbone_impl, + self.backbone, [self.restriction_enzyme], self.assembly_activity, self.source_document, @@ -343,7 +342,7 @@ def part_digestion( restriction_enzymes: List[sbol2.Implementation], assembly_activity: sbol2.Activity, document: sbol2.Document, -) -> Tuple[List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]]: +) -> Tuple[List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]], sbol2.Activity]: """Simulate restriction digestion of a part plasmid and extract the insert. Uses PyDNA to cut the reactant sequence, then constructs SBOL representations @@ -574,11 +573,11 @@ def part_digestion( def backbone_digestion( - reactant: sbol2.Implementation, + reactant: Plasmid, restriction_enzymes: List[sbol2.Implementation], assembly_activity: sbol2.Activity, document: sbol2.Document, -) -> Tuple[List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]]]: +) -> Tuple[List[Tuple[sbol2.ComponentDefinition, sbol2.Sequence]], sbol2.Activity]: """Simulate restriction digestion of a backbone plasmid and extract the linearised vector. Mirrors :func:`part_digestion` but targets the backbone: for a circular reactant @@ -601,7 +600,8 @@ def backbone_digestion( :raises ValueError: If the reactant does not have exactly one sequence, or if the number of digest products is unsupported for the reactant topology. """ - reactant_component_definition = document.get(reactant.built) + reactant_impl = reactant.plasmid_implementations[0] + reactant_component_definition = document.get(reactant_impl.built) reactant_displayId = reactant_component_definition.displayId types = set(reactant_component_definition.types or []) @@ -619,8 +619,8 @@ def backbone_digestion( assembly_activity.usages.add( sbol2.Usage( - uri=f"{reactant.displayId}", - entity=reactant.identity, + uri=f"{reactant_impl.displayId}", + entity=reactant_impl.identity, role="http://sbols.org/v2#build", ) ) @@ -665,7 +665,7 @@ def backbone_digestion( if len(digested_reactant) < 2 or len(digested_reactant) > 3: raise ValueError( - f"Not supported number of products. Found: {len(digested_reactant)}" + f"Not supported number of products. Found: {len(digested_reactant)} after digesting {reactant_displayId}" ) # TODO select them based on content rather than size. elif circular and len(digested_reactant) == 2: From a0bb9e0058470c07af988ca81e38caa05b5ec5e7 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Fri, 8 May 2026 11:47:19 -0600 Subject: [PATCH 27/47] adjusting backbone arg to allow for dictionary mapping design.displayId:backbone in multi-design assemblylvl1 --- src/buildcompiler/buildcompiler.py | 54 +++++++++++++++++++----------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index c8b13b7..b31c64f 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -13,7 +13,6 @@ from .abstract_translator import ( get_or_pull, get_compatible_plasmids, - extract_toplevel_definition, ) from .constants import ( AMP, @@ -292,7 +291,7 @@ def assembly_lvl1( | sbol2.CombinatorialDerivation, final_doc: sbol2.Document = sbol2.Document(), product_name: str = "composite", - backbone: Plasmid = None, + backbone: Plasmid | Dict[str, Plasmid] | None = None, ) -> Tuple[Dict, sbol2.Document]: """Assemble level-1 plasmids for each gene/transcriptional unit. @@ -317,13 +316,18 @@ def assembly_lvl1( enumerated_part_lists = enumerate_design_variants(combinatorial_part_dict) for i, list in enumerate(enumerated_part_lists): - plasmid_dict = self._construct_plasmid_dict(list, "Ampicillin") + plasmid_dict = self._construct_plasmid_dict(list, AMP) - if not backbone: + if isinstance(backbone, dict): + raise ValueError( + "A backbone dictionary cannot be used with a CombinatorialDerivation. " + "All variants share the same template, so supply a single Plasmid or None to auto-select." + ) + elif not backbone: backbone, compatible_plasmids = self._get_backbone( plasmid_dict, antibiotic_resistance=KAN ) - else: + elif type(backbone) is Plasmid: compatible_plasmids = get_compatible_plasmids( plasmid_dict, backbone ) @@ -362,19 +366,30 @@ def assembly_lvl1( assembly_dict.setdefault(abstract_design_def.identity, []).extend( composite_plasmids ) - else: + else: # list of designs for abstract_design in abstract_designs: plasmid_dict = self._get_input_plasmids( - design=abstract_designs, antibiotic_resistance=AMP + design=abstract_design, antibiotic_resistance=AMP ) if not backbone: - backbone, compatible_plasmids = self._get_backbone( + resolved_backbone, compatible_plasmids = self._get_backbone( plasmid_dict, antibiotic_resistance=KAN ) - else: + elif isinstance(backbone, dict): + resolved_backbone = backbone.get(abstract_design.displayId) + if resolved_backbone is None: + raise ValueError( + f"Backbone dict provided but no entry found for design '{abstract_design.displayId}'. " + f"Available keys: {list(backbone.keys())}" + ) compatible_plasmids = get_compatible_plasmids( - plasmid_dict, backbone + plasmid_dict, resolved_backbone + ) + else: + resolved_backbone, compatible_plasmids = ( + backbone, + get_compatible_plasmids(plasmid_dict, backbone), ) if self.BsaI_impl is None: @@ -393,7 +408,7 @@ def assembly_lvl1( assembly = Assembly( compatible_plasmids, - backbone, + resolved_backbone, self.BsaI_impl, self.T4_ligase_impl, self.sbol_doc, @@ -438,6 +453,7 @@ def assembly_lvl2( # send original abstract_design to get a new dictionary # send new dictionary to _get_backbone or get_compatible plasmids with AMP TUs = _extract_lvl2_TUs(abstract_design_doc) + backbone_dict = {} lvl1_plasmids = [] for i, TU in enumerate(TUs): @@ -452,16 +468,16 @@ def assembly_lvl2( and plasmid.antibiotic_resistance == KAN ) - print(backbone) + backbone_dict[TU.displayId] = backbone # TODO insert check here to see if the TU exists already (#43). should not be too expensive, as long as we search only indexed_plasmids where AR=KAN - composite_plasmids, final_doc = self.assembly_lvl1( - TU, backbone=backbone, product_name=f"{TU.displayId}_plas" - ) - simplified_representation, new_defs = self._encapsulate_TU( - composite_plasmids[0] - ) + composite_plasmid_dict, final_doc = self.assembly_lvl1( + TUs, backbone=backbone_dict, product_name=f"{TU.displayId}_plas" + ) + + for key, composites in composite_plasmid_dict.items(): + simplified_representation, new_defs = self._encapsulate_TU(composites[0]) final_doc.add_list(new_defs) lvl1_plasmids.append(simplified_representation) print(simplified_representation) @@ -1091,7 +1107,7 @@ def _extract_lvl2_TUs( # TODO send to misc helper file instead of buildcompiler Returns: A list of TU component definitions in sequential order. """ - top_design = extract_toplevel_definition(design_doc) + top_design = design_doc.componentDefinitions[0] return [ design_doc.get(comp.definition) for comp in top_design.getInSequentialOrder() From cc967fefa83e1d09a184913cc1a26733af2fccf4 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Fri, 8 May 2026 13:20:58 -0600 Subject: [PATCH 28/47] type correction --- src/buildcompiler/plasmid.py | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/src/buildcompiler/plasmid.py b/src/buildcompiler/plasmid.py index 57fada4..3907cc1 100644 --- a/src/buildcompiler/plasmid.py +++ b/src/buildcompiler/plasmid.py @@ -12,8 +12,8 @@ def __init__( self, definition: sbol2.ComponentDefinition, strain_definition: sbol2.ModuleDefinition, - plasmid_implementations: sbol2.Implementation, - strain_implementations: sbol2.Implementation, + plasmid_implementations: List[sbol2.Implementation], + strain_implementations: List[sbol2.Implementation], doc: sbol2.document, ): self.plasmid_definition = definition @@ -93,26 +93,3 @@ def __eq__(self, other): def __hash__(self): return hash(self.plasmid_definition) - - -# def _extract_fusion_sites( -# plasmid: sbol2.ComponentDefinition, -# doc: sbol2.Document, -# sbh: sbol2.PartShop -# ) -> List[sbol2.ComponentDefinition]: -# """ -# Returns all fusion site component definitions from a plasmid. - -# Args: -# plasmid: :class:`sbol2.ComponentDefinition` representing the plasmid. - -# Returns: -# A list of fusion site component definitions. -# """ -# fusion_sites = [] -# for component in plasmid.components: -# definition = get_or_pull(doc, sbh, component.definition) -# if RESTRICTION_ENZYME_ASSEMBLY_SCAR in definition.roles: -# fusion_sites.append(definition) - -# return fusion_sites From 4e9f961e6bb8db2a7ef0900d1b7ab15e4e477e9a Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 12 May 2026 13:00:42 -0600 Subject: [PATCH 29/47] activity tests for digestion and ligation --- README.md | 10 ++- src/buildcompiler/constants.py | 2 +- tests/test_core.py | 113 ++++++++++++++++++++++----------- uv.lock | 2 +- 4 files changed, 86 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index ddaaa1a..962d203 100644 --- a/README.md +++ b/README.md @@ -52,5 +52,13 @@ uv run pre-commit install ``` -#### Running tests: +#### Running tests locally: +Run these bash commands to establish your SynBioHub account for collection access. These are saved in GitHub secrets for the automated test suite. + +`export SBH_USERNAME=your_username` + +`export SBH_PASSWORD=your_password` + +Then run the tests with: + `uv run python -m unittest discover -s tests` diff --git a/src/buildcompiler/constants.py b/src/buildcompiler/constants.py index e455c66..76bc182 100644 --- a/src/buildcompiler/constants.py +++ b/src/buildcompiler/constants.py @@ -26,7 +26,7 @@ LVL2_FUSION_SITE_ORDER = [["A", "E"], ["E", "F"], ["F", "G"], ["G", "H"]] -# TODO CHANGE ALL TO HTTP +# TODO CHANGE ALL TO HTTP (MAKE SURE REFERENCED PARTS ON SBH ARE ALSO HTTP FIRST) ENGINEERED_PLASMID = "http://identifiers.org/so/SO:0000637" ENGINEERED_INSERT = "https://identifiers.org/so/SO:0000915" diff --git a/tests/test_core.py b/tests/test_core.py index 0618626..cc5c51b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -7,8 +7,10 @@ from buildcompiler.constants import ( CIRCULAR, + ENGINEERED_INSERT, ENGINEERED_PLASMID, FIVE_PRIME_OVERHANG, + LINEAR, PLASMID_VECTOR, THREE_PRIME_OVERHANG, ) @@ -85,14 +87,37 @@ def test_part_digestion(self): # TODO test activity relationships product_doc.add(extract) product_doc.add(assembly_activity) + usages = list(assembly_activity.usages) + + # Expect: 1 reactant + at least 1 enzyme + self.assertTrue( + len(usages) >= 2, + "assembly activity should include reactant and enzyme usages", + ) + + entities = [u.entity for u in usages] + + # reactant implementation should be present + self.assertIn( + plasmid.plasmid_implementations[0].identity, + entities, + "Reactant implementation missing from activity usages", + ) + + # restriction enzyme should be present + self.assertIn( + self.re_impl.identity, + entities, + "Restriction enzyme missing from activity usages", + ) + extract = parts_list[0][0] - self.assertEqual( - extract.roles, - ["http://identifiers.org/so/SO:0000915"], + self.assertTrue( + ENGINEERED_INSERT in extract.roles, "Part digestion extracted part missing engineered insert role", ) # engineered insert role self.assertTrue( - "http://identifiers.org/so/SO:0000987" in extract.types, + LINEAR in extract.types, "Part digestion extracted part missing linear DNA type", ) @@ -142,6 +167,30 @@ def test_backbone_digestion(self): product_doc.add(extract) product_doc.add(assembly_activity) + usages = list(assembly_activity.usages) + + # Expect: 1 reactant + at least 1 enzyme + self.assertTrue( + len(usages) >= 2, + "Digestion activity should include reactant and enzyme usages", + ) + + entities = [u.entity for u in usages] + + # reactant implementation should be present + self.assertIn( + plasmid.plasmid_implementations[0].identity, + entities, + "Reactant implementation missing from activity usages", + ) + + # restriction enzyme should be present + self.assertIn( + self.re_impl.identity, + entities, + "Restriction enzyme missing from activity usages", + ) + extract = parts_list[0][0] self.assertEqual( extract.roles, @@ -265,9 +314,31 @@ def test_ligation(self): self.ligase_impl, ) + usages = list(assembly_activity.usages) + entities = [u.entity for u in usages] + + self.assertIn( + self.ligase_impl.identity, + entities, + "Ligase missing from assembly activity usages", + ) + + for part_impl in parts: + self.assertIn( + part_impl.identity, + entities, + f"{part_impl.displayId} missing from assembly activity usages", + ) + for i in composite_impls: obj = final_doc.get(i.built) + self.assertEqual( + i.wasGeneratedBy, + [assembly_activity.identity], + "Composite implementation not linked to assembly activity", + ) + if type(obj) is sbol2.ComponentDefinition: self.assertTrue( CIRCULAR in obj.types, @@ -306,40 +377,6 @@ def test_ligation(self): sbol_validation_result, "Valid.", "Ligation SBOL validation failed" ) - # def test_golden_gate(self): - # pro_doc = sbol2.Document() - # pro_doc.read("tests/test_files/pro_in_bb.xml") - - # rbs_doc = sbol2.Document() - # rbs_doc.read("tests/test_files/rbs_in_bb.xml") - - # cds_doc = sbol2.Document() - # cds_doc.read("tests/test_files/cds_in_bb.xml") - - # ter_doc = sbol2.Document() - # ter_doc.read("tests/test_files/terminator_in_bb.xml") - - # bb_doc = sbol2.Document() - # bb_doc.read("tests/test_files/backbone.xml") - - # part_docs = [pro_doc, rbs_doc, cds_doc, ter_doc] - - # assembly_doc = sbol2.Document() - # assembly_obj = golden_gate_assembly_plan( - # "testassem", part_docs, bb_doc, "BsaI", assembly_doc - # ) - - # composites = assembly_obj.run(plasmids_in_module_definitions=True) - - # self.assertEqual(len(composites), 1) - - # assembly_doc.write("validation_assembly.xml") - - # sbol_validation_result = assembly_doc.validate() - # self.assertEqual( - # sbol_validation_result, "Valid.", "Assembly SBOL validation failed" - # ) - if __name__ == "__main__": unittest.main() diff --git a/uv.lock b/uv.lock index 8309d65..ddf3f2e 100644 --- a/uv.lock +++ b/uv.lock @@ -1993,7 +1993,7 @@ wheels = [ [[package]] name = "sbol2build" -version = "0.0b1" +version = "0.0b3" source = { editable = "." } dependencies = [ { name = "biopython", version = "1.81", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.8'" }, From 3dc25ee26f3d81aaec93ed031d0c8f0f72159a10 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 12 May 2026 14:30:58 -0600 Subject: [PATCH 30/47] test buildcompiler draft --- tests/test_buildcompiler.py | 203 ++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 tests/test_buildcompiler.py diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py new file mode 100644 index 0000000..aa55f5d --- /dev/null +++ b/tests/test_buildcompiler.py @@ -0,0 +1,203 @@ +# test 1: test same abstract design with each possible circuit selection, ensure the promoter and terminator shift accordingly + +# test 2: inaccessible part in abstract design -> should throw informative error message + +# test 3: (FUTURE) abstract design with multiple TUs + +import sbol2 +import unittest +import sys +import os + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src"))) + +from buildcompiler.buildcompiler import BuildCompiler + +from buildcompiler.abstract_translator import extract_toplevel_definition + + +class Test_Abstract_Translation_Functions(unittest.TestCase): + @classmethod + def setUpClass(cls): + username = os.environ.get("SBH_USERNAME") + password = os.environ.get("SBH_PASSWORD") + + if not username or not password: + raise RuntimeError( + "Missing SBH_USERNAME and/or SBH_PASSWORD environment variables" + ) + sbh = sbol2.PartShop("https://synbiohub.org") + sbh.login(username, password) + + auth = sbh.key + + collections = [ + "https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1", + "https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", + ] + + cls.buildcompiler = BuildCompiler( + collections, "https://synbiohub.org", auth, sbol2.Document() + ) + + def test_simple_lvl1_assembly(self): + abstract_design_doc = sbol2.Document() + abstract_design_doc.read("tests/test_files/moclo_parts_circuit.xml") + + design = extract_toplevel_definition(abstract_design_doc) + product_doc = sbol2.Document() + + dict, product_doc = self.buildcompiler.assembly_lvl1([design], product_doc) + + self.assertEqual( + len(dict), + 1, + "There should be 1 composite resulting from the assembly", + ) + + assembly_activity = product_doc.get( + "https://SBOL2Build.org/qlSBuNBL_composite_assembly/1" + ) + + self.assertEqual( + len(assembly_activity.usages), + 7, + "Assembly should have 7 usages: 5 plasmids, 1 ligase, 1 Restriction Enzyme", + ) + + usage_uris = {u.identity for u in assembly_activity.usages} + + expected_usage_uris = { + "https://SBOL2Build.org/qlSBuNBL_composite_assembly/pJ23100_AB_impl/1", + "https://SBOL2Build.org/qlSBuNBL_composite_assembly/pB0034_BC_impl/1", + "https://SBOL2Build.org/qlSBuNBL_composite_assembly/pE0030_CD_impl/1", + "https://SBOL2Build.org/qlSBuNBL_composite_assembly/pB0015_DE_impl/1", + "https://SBOL2Build.org/qlSBuNBL_composite_assembly/DVK_AE_impl/1", + "https://SBOL2Build.org/qlSBuNBL_composite_assembly/BsaI_enzyme/1", + "https://SBOL2Build.org/qlSBuNBL_composite_assembly/T4_Ligase/1", + } + + for expected_uri in expected_usage_uris: + usage = product_doc.get(expected_uri) + impl = product_doc.get(usage.entity) + + self.assertIn( + expected_uri, + usage_uris, + f"Expected usage {expected_uri} was not found in assembly activity usages", + ) + + self.assertIsNotNone( + impl, + f"Entity {impl} should exist in the activity", + ) + + expected_product_uri = "https://SBOL2Build.org/qlSBuNBL_composite_1_impl/1" + + product_impl = product_doc.get(expected_product_uri) + product_def = product_doc.get(product_impl.built) + + self.assertIsNotNone( + product_impl, + f"Implementation {product_impl} should exist in the document", + ) + + self.assertEqual( + product_impl.wasGeneratedBy, + assembly_activity.identity, + f"Implementation {product_impl} must include wasGeneratedBy {assembly_activity.identity}", + ) + + self.assertEqual( + product_def, + "https://SBOL2Build.org/qlSBuNBL_composite_1/1", + f"Implementation {product_impl} must build {product_def}", + ) + + product_doc.write("test_simple_lvl1_assembly.xml") + + # def test_two_rbs_combinatorial_translation(self): + # comb_doc = sbol2.Document() + # comb_doc.read("tests/test_files/combinatorial_1.xml") + + # design = extract_toplevel_definition(comb_doc) + + # self.assertEqual( + # len(comb_plasmid_list), + # 5, + # "There should be 5 plasmids in the abstract translation", + # ) + + # # Run through sbol2build to test composite count + # part_documents = [] + + # for mocloPlasmid in comb_plasmid_list: + # temp_doc = sbol2.Document() + # mocloPlasmid.definition.copy(temp_doc) + # copy_sequences(mocloPlasmid.definition, temp_doc, self.plasmid_collection) + # part_documents.append(temp_doc) + + # assembly_doc = sbol2.Document() + # assembly_obj = golden_gate_assembly_plan( + # "combinatorial_rbs_assembly_plan", + # part_documents, + # self.DVK_AE_doc, + # "BsaI", + # assembly_doc, + # ) + + # composite_list = assembly_obj.run() + # assembly_doc.write("comb_assembly.xml") + + # self.assertEqual( + # len(composite_list), + # 2, + # "Combinatorial assembly failed to produce 2 composites", + # ) + + # def test_complex_combinatorial_translation( + # self, + # ): # testing combinatorial design with 3 variable promoters and RBSs + # complex_comb_doc = sbol2.Document() + # complex_comb_doc.read("tests/test_files/complex_combinatorial_abstract.xml") + + # comb_plasmid_list = translate_abstract_to_plasmids( + # complex_comb_doc, self.plasmid_collection, self.DVK_AE_doc + # ) + + # self.assertEqual( + # len(comb_plasmid_list), + # 8, + # f"There should be 8 plasmids in the abstract translation, found {len(comb_plasmid_list)}", + # ) + + # # Run through sbol2build to test composite count + # part_documents = [] + + # for mocloPlasmid in comb_plasmid_list: + # temp_doc = sbol2.Document() + # mocloPlasmid.definition.copy(temp_doc) + # copy_sequences(mocloPlasmid.definition, temp_doc, self.plasmid_collection) + # part_documents.append(temp_doc) + + # assembly_doc = sbol2.Document() + # assembly_obj = golden_gate_assembly_plan( + # "complex_combinatorial_assembly_plan", + # part_documents, + # self.DVK_AE_doc, + # "BsaI", + # assembly_doc, + # ) + + # composite_list = assembly_obj.run() + # assembly_doc.write("complex_comb_assembly.xml") + + # self.assertEqual( + # len(composite_list), + # 9, + # f"Combinatorial assembly failed to produce 9 composites, found {len(composite_list)}", + # ) + + +if __name__ == "__main__": + unittest.main() From 7d346cfaaceba87b820dea24b3fa0811ef03c73f Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 12 May 2026 15:31:51 -0600 Subject: [PATCH 31/47] level 1 and first comb test complete --- tests/test_buildcompiler.py | 102 ++++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 39 deletions(-) diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index aa55f5d..8bc43d4 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -13,7 +13,7 @@ from buildcompiler.buildcompiler import BuildCompiler -from buildcompiler.abstract_translator import extract_toplevel_definition +from buildcompiler.abstract_translator import extract_toplevel_definition, get_or_pull class Test_Abstract_Translation_Functions(unittest.TestCase): @@ -36,8 +36,12 @@ def setUpClass(cls): "https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", ] + source = sbol2.Document() + + source.read("tests/test_files/combinatorial_1.xml") + cls.buildcompiler = BuildCompiler( - collections, "https://synbiohub.org", auth, sbol2.Document() + collections, "https://synbiohub.org", auth, source ) def test_simple_lvl1_assembly(self): @@ -78,18 +82,32 @@ def test_simple_lvl1_assembly(self): } for expected_uri in expected_usage_uris: - usage = product_doc.get(expected_uri) - impl = product_doc.get(usage.entity) - self.assertIn( expected_uri, usage_uris, f"Expected usage {expected_uri} was not found in assembly activity usages", ) + usage = next( + u for u in assembly_activity.usages if str(u.identity) == expected_uri + ) + + impl = get_or_pull(product_doc, self.buildcompiler.sbh, usage.entity) + self.assertIsNotNone( impl, - f"Entity {impl} should exist in the activity", + f"Entity {usage.entity} should exist in the document or on SynBioHub", + ) + + self.assertIsInstance( + impl, + sbol2.Implementation, + f"Entity {usage.entity} should be an SBOL Implementation", + ) + + self.assertIsNotNone( + impl.built, + f"Implementation {impl.identity} should reference a built object", ) expected_product_uri = "https://SBOL2Build.org/qlSBuNBL_composite_1_impl/1" @@ -104,56 +122,62 @@ def test_simple_lvl1_assembly(self): self.assertEqual( product_impl.wasGeneratedBy, - assembly_activity.identity, + [assembly_activity.identity], f"Implementation {product_impl} must include wasGeneratedBy {assembly_activity.identity}", ) self.assertEqual( - product_def, + product_def.identity, "https://SBOL2Build.org/qlSBuNBL_composite_1/1", f"Implementation {product_impl} must build {product_def}", ) product_doc.write("test_simple_lvl1_assembly.xml") - # def test_two_rbs_combinatorial_translation(self): - # comb_doc = sbol2.Document() - # comb_doc.read("tests/test_files/combinatorial_1.xml") + def test_two_rbs_combinatorial_translation(self): + comb_doc = sbol2.Document() + comb_doc.read("tests/test_files/combinatorial_1.xml") - # design = extract_toplevel_definition(comb_doc) + design = comb_doc.combinatorialderivations[0] - # self.assertEqual( - # len(comb_plasmid_list), - # 5, - # "There should be 5 plasmids in the abstract translation", - # ) + result_dict, assembly_doc = self.buildcompiler.assembly_lvl1(design) - # # Run through sbol2build to test composite count - # part_documents = [] + assembly_doc.write("comb_assembly.xml") - # for mocloPlasmid in comb_plasmid_list: - # temp_doc = sbol2.Document() - # mocloPlasmid.definition.copy(temp_doc) - # copy_sequences(mocloPlasmid.definition, temp_doc, self.plasmid_collection) - # part_documents.append(temp_doc) + self.assertEqual( + len(result_dict), + 1, + "Expected one combinatorial derivation key in result dictionary", + ) - # assembly_doc = sbol2.Document() - # assembly_obj = golden_gate_assembly_plan( - # "combinatorial_rbs_assembly_plan", - # part_documents, - # self.DVK_AE_doc, - # "BsaI", - # assembly_doc, - # ) + derivation_uri = "https://sbolcanvas.org/abstract_combinatorial/1" - # composite_list = assembly_obj.run() - # assembly_doc.write("comb_assembly.xml") + self.assertIn( + derivation_uri, + result_dict, + "Expected combinatorial derivation URI missing from results", + ) - # self.assertEqual( - # len(composite_list), - # 2, - # "Combinatorial assembly failed to produce 2 composites", - # ) + composites = result_dict[derivation_uri] + + self.assertEqual( + len(composites), + 2, + "Combinatorial assembly failed to produce 2 composites", + ) + + # ensure cobinatorial feature is satsified + for composite in composites: + components = composite.plasmid_definition.getInSequentialOrder() + + if len(components) > 3: + if components[3].displayId == "B0033": + has_b0033 = True + elif components[3].displayId == "B0032": + has_b0032 = True + + self.assertTrue(has_b0033, "No composite has B0033") + self.assertTrue(has_b0032, "No composite has B0032") # def test_complex_combinatorial_translation( # self, From da386689fe715c3f71bf2905a3fcd9b2645c1ea2 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 12 May 2026 17:27:11 -0600 Subject: [PATCH 32/47] removed superfluous prints --- src/buildcompiler/abstract_translator.py | 6 ------ src/sbol2build/sbol2build.py | 3 +-- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/buildcompiler/abstract_translator.py b/src/buildcompiler/abstract_translator.py index d82f7f4..3a0d316 100644 --- a/src/buildcompiler/abstract_translator.py +++ b/src/buildcompiler/abstract_translator.py @@ -321,9 +321,6 @@ def get_compatible_plasmids( and plasmid.fusion_sites[0] == match_to.fusion_sites[match_idx] and plasmid.fusion_sites[1] == backbone.fusion_sites[1] ): - print( - f"matched final component {plasmid.name} with {match_to.name} and {backbone.name} on fusion sites ({plasmid.fusion_sites[0]}, {plasmid.fusion_sites[1]})!" - ) selected_plasmids.append(plasmid) found = True break @@ -331,9 +328,6 @@ def get_compatible_plasmids( i < len(plasmid_dict) - 1 and plasmid.fusion_sites[0] == match_to.fusion_sites[match_idx] ): - print( - f"matched {plasmid.name} with {match_to.name} on fusion site {plasmid.fusion_sites[0]}!" - ) selected_plasmids.append(plasmid) found = True match_to = plasmid diff --git a/src/sbol2build/sbol2build.py b/src/sbol2build/sbol2build.py index db6a755..169804e 100644 --- a/src/sbol2build/sbol2build.py +++ b/src/sbol2build/sbol2build.py @@ -6,7 +6,7 @@ from typing import List, Union, Tuple from .constants import DNA_TYPES -sbol2.Config.setHomespace("https://SBOL2Build.org") +sbol2.Config.setHomespace("http://buildcompiler.org") sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_COMPLIANT_URIS, True) sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, False) @@ -1010,7 +1010,6 @@ def append_extracts_to_doc( """ for extract, sequence in extract_tuples: try: - print("adding: " + extract.displayId) add_object_to_doc(extract, doc) add_object_to_doc(sequence, doc) except Exception as e: From d2fe409a24c9005b1fe6a8183ad002e5c76f16f3 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Tue, 12 May 2026 17:29:10 -0600 Subject: [PATCH 33/47] new test suite --- tests/test_abstract.py | 146 ----------- tests/test_buildcompiler.py | 159 +++++++----- tests/test_files/moclo_parts_circuit.xml | 303 +++++++++++------------ tests/test_helpers.py | 107 +++++--- 4 files changed, 319 insertions(+), 396 deletions(-) delete mode 100644 tests/test_abstract.py diff --git a/tests/test_abstract.py b/tests/test_abstract.py deleted file mode 100644 index 00ba486..0000000 --- a/tests/test_abstract.py +++ /dev/null @@ -1,146 +0,0 @@ -# test 1: test same abstract design with each possible circuit selection, ensure the promoter and terminator shift accordingly - -# test 2: inaccessible part in abstract design -> should throw informative error message - -# test 3: (FUTURE) abstract design with multiple TUs - -import sbol2 -import unittest -import sys -import os - -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src"))) - -from sbol2build.abstract_translator import ( - translate_abstract_to_plasmids, - copy_sequences, -) - -from sbol2build import ( - golden_gate_assembly_plan, -) - - -class Test_Abstract_Translation_Functions(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.sbh = sbol2.PartShop("https://synbiohub.org") - - cls.plasmid_collection = sbol2.Document() - cls.sbh.pull( - "https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/CIDARMoCloPlasmidsKit_collection/1/b7fdc21c6601a61d3166073a9e50f2c3843e1df5/share", - cls.plasmid_collection, - ) - - cls.DVK_AE_doc = sbol2.Document() - cls.sbh.pull( - "https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_AE/1/647c5b2458567dcce6b0a37178d352b8ffa9a7fe/share", - cls.DVK_AE_doc, - ) - - def test_simple_abstract_translation(self): - abstract_design_doc = sbol2.Document() - abstract_design_doc.read("tests/test_files/moclo_parts_circuit.xml") - - mocloplasmid_list = translate_abstract_to_plasmids( - abstract_design_doc, self.plasmid_collection, self.DVK_AE_doc - ) - - self.assertEqual( - len(mocloplasmid_list), - 4, - "There should be 4 plasmids in the abstract translation", - ) - - prev_site = "A" - for plas in mocloplasmid_list: - self.assertEqual(prev_site, plas.fusion_sites[0], mocloplasmid_list) - prev_site = plas.fusion_sites[1] - - def test_two_rbs_combinatorial_translation(self): - comb_doc = sbol2.Document() - comb_doc.read("tests/test_files/combinatorial_1.xml") - - comb_plasmid_list = translate_abstract_to_plasmids( - comb_doc, self.plasmid_collection, self.DVK_AE_doc - ) - - self.assertEqual( - len(comb_plasmid_list), - 5, - "There should be 5 plasmids in the abstract translation", - ) - - # Run through sbol2build to test composite count - part_documents = [] - - for mocloPlasmid in comb_plasmid_list: - temp_doc = sbol2.Document() - mocloPlasmid.definition.copy(temp_doc) - copy_sequences(mocloPlasmid.definition, temp_doc, self.plasmid_collection) - part_documents.append(temp_doc) - - assembly_doc = sbol2.Document() - assembly_obj = golden_gate_assembly_plan( - "combinatorial_rbs_assembly_plan", - part_documents, - self.DVK_AE_doc, - "BsaI", - assembly_doc, - ) - - composite_list = assembly_obj.run() - assembly_doc.write("comb_assembly.xml") - - self.assertEqual( - len(composite_list), - 2, - "Combinatorial assembly failed to produce 2 composites", - ) - - def test_complex_combinatorial_translation( - self, - ): # testing combinatorial design with 3 variable promoters and RBSs - complex_comb_doc = sbol2.Document() - complex_comb_doc.read("tests/test_files/complex_combinatorial_abstract.xml") - - comb_plasmid_list = translate_abstract_to_plasmids( - complex_comb_doc, self.plasmid_collection, self.DVK_AE_doc - ) - - self.assertEqual( - len(comb_plasmid_list), - 8, - f"There should be 8 plasmids in the abstract translation, found {len(comb_plasmid_list)}", - ) - - # Run through sbol2build to test composite count - part_documents = [] - - for mocloPlasmid in comb_plasmid_list: - temp_doc = sbol2.Document() - mocloPlasmid.definition.copy(temp_doc) - copy_sequences(mocloPlasmid.definition, temp_doc, self.plasmid_collection) - part_documents.append(temp_doc) - - assembly_doc = sbol2.Document() - assembly_obj = golden_gate_assembly_plan( - "complex_combinatorial_assembly_plan", - part_documents, - self.DVK_AE_doc, - "BsaI", - assembly_doc, - ) - - composite_list = assembly_obj.run() - assembly_doc.write("complex_comb_assembly.xml") - - self.assertEqual( - len(composite_list), - 9, - f"Combinatorial assembly failed to produce 9 composites, found {len(composite_list)}", - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index 8bc43d4..dba20f7 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -1,13 +1,8 @@ -# test 1: test same abstract design with each possible circuit selection, ensure the promoter and terminator shift accordingly - -# test 2: inaccessible part in abstract design -> should throw informative error message - -# test 3: (FUTURE) abstract design with multiple TUs - import sbol2 import unittest import sys import os +from collections import Counter sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src"))) @@ -16,7 +11,7 @@ from buildcompiler.abstract_translator import extract_toplevel_definition, get_or_pull -class Test_Abstract_Translation_Functions(unittest.TestCase): +class Test_Buildcompiler_Functions(unittest.TestCase): @classmethod def setUpClass(cls): username = os.environ.get("SBH_USERNAME") @@ -38,7 +33,9 @@ def setUpClass(cls): source = sbol2.Document() - source.read("tests/test_files/combinatorial_1.xml") + # preload combinatorial designs into buildcompiler context + source.read("tests/test_files/complex_combinatorial_abstract.xml") + source.append("tests/test_files/combinatorial_1.xml", True) cls.buildcompiler = BuildCompiler( collections, "https://synbiohub.org", auth, source @@ -60,7 +57,7 @@ def test_simple_lvl1_assembly(self): ) assembly_activity = product_doc.get( - "https://SBOL2Build.org/qlSBuNBL_composite_assembly/1" + "http://buildcompiler.org/qlSBuNBL_composite_assembly/1" ) self.assertEqual( @@ -72,13 +69,13 @@ def test_simple_lvl1_assembly(self): usage_uris = {u.identity for u in assembly_activity.usages} expected_usage_uris = { - "https://SBOL2Build.org/qlSBuNBL_composite_assembly/pJ23100_AB_impl/1", - "https://SBOL2Build.org/qlSBuNBL_composite_assembly/pB0034_BC_impl/1", - "https://SBOL2Build.org/qlSBuNBL_composite_assembly/pE0030_CD_impl/1", - "https://SBOL2Build.org/qlSBuNBL_composite_assembly/pB0015_DE_impl/1", - "https://SBOL2Build.org/qlSBuNBL_composite_assembly/DVK_AE_impl/1", - "https://SBOL2Build.org/qlSBuNBL_composite_assembly/BsaI_enzyme/1", - "https://SBOL2Build.org/qlSBuNBL_composite_assembly/T4_Ligase/1", + "http://buildcompiler.org/qlSBuNBL_composite_assembly/pJ23100_AB_impl/1", + "http://buildcompiler.org/qlSBuNBL_composite_assembly/pB0034_BC_impl/1", + "http://buildcompiler.org/qlSBuNBL_composite_assembly/pE0030_CD_impl/1", + "http://buildcompiler.org/qlSBuNBL_composite_assembly/pB0015_DE_impl/1", + "http://buildcompiler.org/qlSBuNBL_composite_assembly/DVK_AE_impl/1", + "http://buildcompiler.org/qlSBuNBL_composite_assembly/BsaI_enzyme/1", + "http://buildcompiler.org/qlSBuNBL_composite_assembly/T4_Ligase/1", } for expected_uri in expected_usage_uris: @@ -110,7 +107,7 @@ def test_simple_lvl1_assembly(self): f"Implementation {impl.identity} should reference a built object", ) - expected_product_uri = "https://SBOL2Build.org/qlSBuNBL_composite_1_impl/1" + expected_product_uri = "http://buildcompiler.org/qlSBuNBL_composite_1_impl/1" product_impl = product_doc.get(expected_product_uri) product_def = product_doc.get(product_impl.built) @@ -128,12 +125,10 @@ def test_simple_lvl1_assembly(self): self.assertEqual( product_def.identity, - "https://SBOL2Build.org/qlSBuNBL_composite_1/1", + "http://buildcompiler.org/qlSBuNBL_composite_1/1", f"Implementation {product_impl} must build {product_def}", ) - product_doc.write("test_simple_lvl1_assembly.xml") - def test_two_rbs_combinatorial_translation(self): comb_doc = sbol2.Document() comb_doc.read("tests/test_files/combinatorial_1.xml") @@ -142,8 +137,6 @@ def test_two_rbs_combinatorial_translation(self): result_dict, assembly_doc = self.buildcompiler.assembly_lvl1(design) - assembly_doc.write("comb_assembly.xml") - self.assertEqual( len(result_dict), 1, @@ -179,48 +172,86 @@ def test_two_rbs_combinatorial_translation(self): self.assertTrue(has_b0033, "No composite has B0033") self.assertTrue(has_b0032, "No composite has B0032") - # def test_complex_combinatorial_translation( - # self, - # ): # testing combinatorial design with 3 variable promoters and RBSs - # complex_comb_doc = sbol2.Document() - # complex_comb_doc.read("tests/test_files/complex_combinatorial_abstract.xml") - - # comb_plasmid_list = translate_abstract_to_plasmids( - # complex_comb_doc, self.plasmid_collection, self.DVK_AE_doc - # ) - - # self.assertEqual( - # len(comb_plasmid_list), - # 8, - # f"There should be 8 plasmids in the abstract translation, found {len(comb_plasmid_list)}", - # ) - - # # Run through sbol2build to test composite count - # part_documents = [] - - # for mocloPlasmid in comb_plasmid_list: - # temp_doc = sbol2.Document() - # mocloPlasmid.definition.copy(temp_doc) - # copy_sequences(mocloPlasmid.definition, temp_doc, self.plasmid_collection) - # part_documents.append(temp_doc) - - # assembly_doc = sbol2.Document() - # assembly_obj = golden_gate_assembly_plan( - # "complex_combinatorial_assembly_plan", - # part_documents, - # self.DVK_AE_doc, - # "BsaI", - # assembly_doc, - # ) - - # composite_list = assembly_obj.run() - # assembly_doc.write("complex_comb_assembly.xml") - - # self.assertEqual( - # len(composite_list), - # 9, - # f"Combinatorial assembly failed to produce 9 composites, found {len(composite_list)}", - # ) + def test_complex_combinatorial_translation( + self, + ): # testing combinatorial design with 3 variable promoters and RBSs + complex_comb_doc = sbol2.Document() + complex_comb_doc.read("tests/test_files/complex_combinatorial_abstract.xml") + + design = complex_comb_doc.combinatorialderivations[0] + + result_dict, assembly_doc = self.buildcompiler.assembly_lvl1(design) + + assembly_doc.write("comb_assembly.xml") + + self.assertEqual( + len(result_dict), + 1, + "Expected one combinatorial derivation key in result dictionary", + ) + + derivation_uri = "https://sbolcanvas.org/dEOuAjnj/1" + + self.assertIn( + derivation_uri, + result_dict, + "Expected combinatorial derivation URI missing from results", + ) + + composites = result_dict[derivation_uri] + + self.assertEqual( + len(composites), + 9, + f"Combinatorial assembly failed to produce 9 composites, found {len(composites)}", + ) + + promoter_counts = Counter() + rbs_counts = Counter() + + for composite in composites: + components = composite.plasmid_definition.getInSequentialOrder() + display_ids = [component.displayId for component in components] + print(display_ids) + + promoter_counts[components[1].displayId] += 1 # index 1 = promoter + rbs_counts[components[3].displayId] += 1 # index 3 = RBS + + self.assertEqual( + promoter_counts["J23100"], + 3, + f"Expected J23100 to appear 3 times across the composite dictionary, found {promoter_counts['J23100']}", + ) + + self.assertEqual( + promoter_counts["J23106"], + 3, + f"Expected J23106 to appear 3 times across the composite dictionary, found {promoter_counts['J23106']}", + ) + + self.assertEqual( + promoter_counts["J23116"], + 3, + f"Expected J23116 to appear 3 times across the composite dictionary, found {promoter_counts['J23116']}", + ) + + self.assertEqual( + rbs_counts["B0034"], + 3, + f"Expected B0034 to appear 3 times across the composite dictionary, found {rbs_counts['B0034']}", + ) + + self.assertEqual( + rbs_counts["B0032"], + 3, + f"Expected B0032 to appear 3 times across the composite dictionary, found {rbs_counts['B0032']}", + ) + + self.assertEqual( + rbs_counts["B0033"], + 3, + f"Expected B0033 to appear 3 times across the composite dictionary, found {rbs_counts['B0033']}", + ) if __name__ == "__main__": diff --git a/tests/test_files/moclo_parts_circuit.xml b/tests/test_files/moclo_parts_circuit.xml index a71bac7..2c47e00 100644 --- a/tests/test_files/moclo_parts_circuit.xml +++ b/tests/test_files/moclo_parts_circuit.xml @@ -1,37 +1,35 @@ - - - - - moclo_circuit - - - - i6TQvNp91_28 - - - - - - - - - i6TQvNp91 + + + + + E0030_yfp + 1 + E0030_yfp + MoClo Basic Part: CDS - Fluorescent protein. Yellow. + 26479688 + + + + + + + qlSBuNBL 1 - - - E0030_yfp_3 + + + J23100_1 1 - + - - + + B0034_2 1 @@ -39,117 +37,137 @@ - - - J23100_1 + + + B0015_4 1 - + - - - B0015_4 + + + E0030_yfp_3 1 - + - - - i6TQvNp91Annotation2 + + + qlSBuNBLAnnotation3 1 - - - location2 + + + location3 1 + 780 + 908 - + - + - - - i6TQvNp91Annotation1 + + + qlSBuNBLAnnotation0 1 - - - location1 + + + location0 1 + 1 + 35 - + - + - - - i6TQvNp91Annotation0 + + + qlSBuNBLAnnotation2 1 - - - location0 + + + location2 1 + 57 + 779 - + - + - - - i6TQvNp91Annotation3 + + + qlSBuNBLAnnotation1 1 - - - location3 + + + location1 1 + 36 + 56 - + - + - - - i6TQvNp91Constraint2 + + + qlSBuNBLConstraint2 1 - - + + - - - i6TQvNp91Constraint3 + + + qlSBuNBLConstraint1 1 - - + + - - - i6TQvNp91Constraint1 + + + qlSBuNBLConstraint3 1 - - + + + + + + + B0034 + 1 + B0034 + MoClo Basic Part: RBS - Weiss RBS, high strength. Modified from Bba_B0034 to adjust spacing in MC system. + 26479688 + + + @@ -157,27 +175,10 @@ 1 B0015 MoClo Basic Part: Double terminator (B0010:B0012) - - - - 26479688 - - - - E0030_yfp - 1 - E0030_yfp - MoClo Basic Part: CDS - Fluorescent protein. Yellow. - - - - - 26479688 - - + @@ -185,68 +186,61 @@ 1 J23100 MoClo Basic Part: Constitutive promoter - Anderson series - high strength - - - - 26479688 + - - - B0034 - 1 - B0034 - MoClo Basic Part: RBS - Weiss RBS, high strength. Modified from Bba_B0034 to adjust spacing in MC system. - - - - - 26479688 - - - + + + J23100_sequence + TTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGC + + + + + E0030_yfp_sequence + ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCTTCGGCTACGGCCTGCAATGCTTCGCCCGCTACCCCGACCACATGAAGCTGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAATAA + + + + + B0015_sequence + CCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + + + + + B0034_sequence + AGAGAAAGAGGAGAAATACTA + + + + + qlSBuNBL_sequence + TTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCAGAGAAAGAGGAGAAATACTA + + J23100_Layout - - - moclo_circuit_Layout - + + + qlSBuNBL_Layout + - - 720.0 - 339.0 - 200.0 - 100.0 - i6TQvNp91_28 - - - - - - - B0015_Layout - - - - - i6TQvNp91_Layout - - - - 720.0 - 339.0 + + 412.5 + 337.0 200.0 100.0 container - + 0.0 50.0 200.0 @@ -255,7 +249,7 @@ - + 0.0 0.0 50.0 @@ -263,9 +257,9 @@ J23100_1 - + - + 50.0 0.0 50.0 @@ -273,9 +267,9 @@ B0034_2 - + - + 100.0 0.0 50.0 @@ -283,9 +277,9 @@ E0030_yfp_3 - + - + 150.0 0.0 50.0 @@ -293,7 +287,12 @@ B0015_4 - + + + + + B0015_Layout + @@ -305,4 +304,4 @@ B0034_Layout - + \ No newline at end of file diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 5600e7f..6154f85 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -1,43 +1,81 @@ import sbol2 -import filecmp import os import sys -import tempfile import unittest -from pathlib import Path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src"))) + +from buildcompiler.sbol2build import ( + rebase_restriction_enzyme, + dna_componentdefinition_with_sequence, + number_to_suffix, + is_circular, + append_extracts_to_doc, +) -from sbol2build import rebase_restriction_enzyme, dna_componentdefinition_with_sequence, number_to_suffix, is_circular, append_extracts_to_doc, part_in_backbone_from_sbol class Test_HelperFunctions(unittest.TestCase): def test_restriction_enzyme(self): bsai = rebase_restriction_enzyme(name="BsaI") - constructor_error = 'Constructor Error: ed_restriction_enzyme' + constructor_error = "Constructor Error: ed_restriction_enzyme" test_cases = [ - ('http://rebase.neb.com/rebase/enz/BsaI.html', bsai.wasDerivedFrom, constructor_error), - ('http://www.biopax.org/release/biopax-level3.owl#Protein', bsai.types, constructor_error), - ('BsaI', bsai.name, constructor_error), - ('Restriction enzyme BsaI from REBASE.', bsai.description, constructor_error) + ( + "http://rebase.neb.com/rebase/enz/BsaI.html", + bsai.wasDerivedFrom, + constructor_error, + ), + ( + "http://www.biopax.org/release/biopax-level3.owl#Protein", + bsai.types, + constructor_error, + ), + ("BsaI", bsai.name, constructor_error), + ( + "Restriction enzyme BsaI from REBASE.", + bsai.description, + constructor_error, + ), ] for expected, attribute, error_msg in test_cases: with self.subTest(expected=expected, attribute=attribute): self.assertIn(expected, attribute, error_msg) - def test_dna_component_and_sequence(self): + def test_dna_component_and_sequence(self): # create a test dna component - dna_identity = 'Test_dna_identity' - dna_sequence = 'Test_dna_sequence' - test_dna_component, test_sequence = dna_componentdefinition_with_sequence(dna_identity, dna_sequence) + dna_identity = "Test_dna_identity" + dna_sequence = "Test_dna_sequence" + test_dna_component, test_sequence = dna_componentdefinition_with_sequence( + dna_identity, dna_sequence + ) test_cases = [ - ("", repr(type(test_dna_component)), 'Constructor Error: dna_componentdefinition_with_sequence, Not a ComponentDefinition'), - ("", repr(type(test_sequence)), 'Constructor Error: dna_componentdefinition_with_sequence, Not a Sequence'), - (f"https://SBOL2Build.org/{dna_identity}_seq/1", test_sequence.identity, 'Constructor Error: dna_componentdefinition_with_sequence, Incorrect Identity'), - ([test_sequence.identity], test_dna_component.sequences, 'Constructor Error: dna_componentdefinition_with_sequence, Sequence not in ComponentDefinition Sequences List'), - (['http://www.biopax.org/release/biopax-level3.owl#DnaRegion'], test_dna_component.types, 'Constructor Error: dna_componentdefinition_with_sequence, Missing DNA type') + ( + "", + repr(type(test_dna_component)), + "Constructor Error: dna_componentdefinition_with_sequence, Not a ComponentDefinition", + ), + ( + "", + repr(type(test_sequence)), + "Constructor Error: dna_componentdefinition_with_sequence, Not a Sequence", + ), + ( + f"http://buildcompiler.org/{dna_identity}_seq/1", + test_sequence.identity, + "Constructor Error: dna_componentdefinition_with_sequence, Incorrect Identity", + ), + ( + [test_sequence.identity], + test_dna_component.sequences, + "Constructor Error: dna_componentdefinition_with_sequence, Sequence not in ComponentDefinition Sequences List", + ), + ( + ["http://www.biopax.org/release/biopax-level3.owl#DnaRegion"], + test_dna_component.types, + "Constructor Error: dna_componentdefinition_with_sequence, Missing DNA type", + ), ] for expected, attribute, error_msg in test_cases: @@ -46,7 +84,7 @@ def test_dna_component_and_sequence(self): def test_is_circular(self): comp_def_circ = sbol2.ComponentDefinition("test") - comp_def_circ.types = ['http://identifiers.org/so/SO:0000988'] + comp_def_circ.types = ["http://identifiers.org/so/SO:0000988"] comp_def_not_circ = sbol2.ComponentDefinition("test_not_circ") @@ -54,38 +92,39 @@ def test_is_circular(self): self.assertFalse(is_circular(comp_def_not_circ)) def test_number_to_suffix(self): - #1 letter + # 1 letter self.assertEqual(number_to_suffix(1), "A") self.assertEqual(number_to_suffix(2), "B") self.assertEqual(number_to_suffix(26), "Z") - #2 letters + # 2 letters self.assertEqual(number_to_suffix(27), "AA") self.assertEqual(number_to_suffix(28), "AB") self.assertEqual(number_to_suffix(52), "AZ") self.assertEqual(number_to_suffix(53), "BA") - #3 letters - self.assertEqual(number_to_suffix(702), "ZZ") # 26*27 - 1 + # 3 letters + self.assertEqual(number_to_suffix(702), "ZZ") # 26*27 - 1 self.assertEqual(number_to_suffix(703), "AAA") # 26*27 self.assertEqual(number_to_suffix(704), "AAB") # 26*27 + 1 self.assertEqual(number_to_suffix(0), "") def test_append_extracts_to_doc(self): doc = sbol2.Document() - tup1 = dna_componentdefinition_with_sequence('def1', 'atgcaatg') - tup2 = dna_componentdefinition_with_sequence('def2', 'ggacttaac') + tup1 = dna_componentdefinition_with_sequence("def1", "atgcaatg") + tup2 = dna_componentdefinition_with_sequence("def2", "ggacttaac") append_extracts_to_doc([tup1, tup2, tup1], doc) - #ensure duplicate of tup1 is not being counted + # ensure duplicate of tup1 is not being counted self.assertEqual(len(doc.sequences), 2) self.assertEqual(len(doc.componentDefinitions), 2) - self.assertEqual(doc.sequences[0].elements, 'atgcaatg') - self.assertEqual(doc.componentDefinitions[0].displayId, 'def1') - self.assertEqual(doc.sequences[1].elements, 'ggacttaac') - self.assertEqual(doc.componentDefinitions[1].displayId, 'def2') + self.assertEqual(doc.sequences[0].elements, "atgcaatg") + self.assertEqual(doc.componentDefinitions[0].displayId, "def1") + self.assertEqual(doc.sequences[1].elements, "ggacttaac") + self.assertEqual(doc.componentDefinitions[1].displayId, "def2") + + # TODO test for part in backbone? - #TODO test for part in backbone? -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() From feb64fa6e6a00ad3966e0aaca63342258781630b Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Wed, 13 May 2026 12:34:15 -0600 Subject: [PATCH 34/47] synbiohub.org->api.synbiohub.org --- tests/test_buildcompiler.py | 8 ++++---- tests/test_core.py | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index dba20f7..0313db7 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -21,14 +21,14 @@ def setUpClass(cls): raise RuntimeError( "Missing SBH_USERNAME and/or SBH_PASSWORD environment variables" ) - sbh = sbol2.PartShop("https://synbiohub.org") + sbh = sbol2.PartShop("https://api.synbiohub.org") sbh.login(username, password) auth = sbh.key collections = [ - "https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1", - "https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", + "https://api.synbiohub.org/user/Gon/impl_test/impl_test_collection/1", + "https://api.synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", ] source = sbol2.Document() @@ -38,7 +38,7 @@ def setUpClass(cls): source.append("tests/test_files/combinatorial_1.xml", True) cls.buildcompiler = BuildCompiler( - collections, "https://synbiohub.org", auth, source + collections, "https://api.synbiohub.org", auth, source ) def test_simple_lvl1_assembly(self): diff --git a/tests/test_core.py b/tests/test_core.py index cc5c51b..6d6080a 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -27,7 +27,7 @@ class Test_Assembly_Functions(unittest.TestCase): @classmethod def setUpClass(cls): - cls.sbh = sbol2.PartShop("https://synbiohub.org") + cls.sbh = sbol2.PartShop("https://api.synbiohub.org") username = os.environ.get("SBH_USERNAME") password = os.environ.get("SBH_PASSWORD") @@ -43,27 +43,27 @@ def setUpClass(cls): final_doc = sbol2.Document() cls.sbh.pull( - "https://synbiohub.org/user/Gon/CIDARMoCloParts/CIDARMoCloParts_collection/1", + "https://api.synbiohub.org/user/Gon/CIDARMoCloParts/CIDARMoCloParts_collection/1", cls.source_doc, ) cls.sbh.pull( - "https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/CIDARMoCloPlasmidsKit_collection/1", + "https://api.synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/CIDARMoCloPlasmidsKit_collection/1", cls.source_doc, ) cls.sbh.pull( - "https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", + "https://api.synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", cls.source_doc, ) cls.sbh.pull( - "https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1", + "https://api.synbiohub.org/user/Gon/impl_test/impl_test_collection/1", cls.source_doc, ) cls.re_impl = cls.source_doc.get( - "https://synbiohub.org/user/Gon/Enzyme_Implementations/BsaI_impl/1" + "https://api.synbiohub.org/user/Gon/Enzyme_Implementations/BsaI_impl/1" ) cls.ligase_impl = cls.source_doc.get( - "https://synbiohub.org/user/Gon/Enzyme_Implementations/T4_Ligase_impl/1" + "https://api.synbiohub.org/user/Gon/Enzyme_Implementations/T4_Ligase_impl/1" ) cls.assembly = Assembly( @@ -72,7 +72,7 @@ def setUpClass(cls): def test_part_digestion(self): # TODO test activity relationships impl = self.source_doc.get( - "https://synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1" + "https://api.synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1" ) definition = self.source_doc.get(impl.built) plasmid = Plasmid(definition, None, [impl], None, self.source_doc) @@ -152,7 +152,7 @@ def test_part_digestion(self): # TODO test activity relationships def test_backbone_digestion(self): impl = self.source_doc.get( - "https://synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1" + "https://api.synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1" ) definition = self.source_doc.get(impl.built) plasmid = Plasmid(definition, None, [impl], None, self.source_doc) @@ -235,16 +235,16 @@ def test_ligation(self): assembly_activity = self.assembly.initialize_assembly_activity() parts = [ self.source_doc.get( - "https://synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1" + "https://api.synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1" ), self.source_doc.get( - "https://synbiohub.org/user/Gon/impl_test/pB0034_BC_impl/1" + "https://api.synbiohub.org/user/Gon/impl_test/pB0034_BC_impl/1" ), self.source_doc.get( - "https://synbiohub.org/user/Gon/impl_test/pE0030_CD_impl/1" + "https://api.synbiohub.org/user/Gon/impl_test/pE0030_CD_impl/1" ), self.source_doc.get( - "https://synbiohub.org/user/Gon/impl_test/pB0015_DE_impl/1" + "https://api.synbiohub.org/user/Gon/impl_test/pB0015_DE_impl/1" ), ] @@ -269,14 +269,14 @@ def test_ligation(self): reactants_list.append(extracts_tuple_list[0][0]) backbone_impl = self.source_doc.get( - "https://synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1" + "https://api.synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1" ) # run digestion, extract component + sequence, add to ligation_doc, reactants_list definition = self.source_doc.get(backbone_impl.built) self.sbh.pull( - "https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_AE/1", + "https://api.synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_AE/1", self.source_doc, ) From 3384148e5266751d471e3fd6af38e63d06133cb9 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Wed, 13 May 2026 12:39:54 -0600 Subject: [PATCH 35/47] secret access for github job --- .github/workflows/python-package.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index e22f5fb..44ce5a5 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -37,5 +37,8 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with unittest + env: + SBH_USERNAME: ${{ secrets.SBH_USERNAME }} + SBH_PASSWORD: ${{ secrets.SBH_PASSWORD }} run: | python -m unittest discover -s tests From cecf73ff054e7658283b10b5b8d3ff5dd997311f Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Wed, 13 May 2026 12:58:06 -0600 Subject: [PATCH 36/47] api for pulls only --- tests/test_buildcompiler.py | 6 +++--- tests/test_core.py | 24 ++++++++++++++---------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index 0313db7..2663e22 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -27,8 +27,8 @@ def setUpClass(cls): auth = sbh.key collections = [ - "https://api.synbiohub.org/user/Gon/impl_test/impl_test_collection/1", - "https://api.synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", + "https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1", + "https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", ] source = sbol2.Document() @@ -38,7 +38,7 @@ def setUpClass(cls): source.append("tests/test_files/combinatorial_1.xml", True) cls.buildcompiler = BuildCompiler( - collections, "https://api.synbiohub.org", auth, source + collections, "https://synbiohub.org", auth, source ) def test_simple_lvl1_assembly(self): diff --git a/tests/test_core.py b/tests/test_core.py index 6d6080a..868bd7a 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -60,10 +60,10 @@ def setUpClass(cls): ) cls.re_impl = cls.source_doc.get( - "https://api.synbiohub.org/user/Gon/Enzyme_Implementations/BsaI_impl/1" + "https://synbiohub.org/user/Gon/Enzyme_Implementations/BsaI_impl/1" ) cls.ligase_impl = cls.source_doc.get( - "https://api.synbiohub.org/user/Gon/Enzyme_Implementations/T4_Ligase_impl/1" + "https://synbiohub.org/user/Gon/Enzyme_Implementations/T4_Ligase_impl/1" ) cls.assembly = Assembly( @@ -72,7 +72,7 @@ def setUpClass(cls): def test_part_digestion(self): # TODO test activity relationships impl = self.source_doc.get( - "https://api.synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1" + "https://synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1" ) definition = self.source_doc.get(impl.built) plasmid = Plasmid(definition, None, [impl], None, self.source_doc) @@ -152,7 +152,7 @@ def test_part_digestion(self): # TODO test activity relationships def test_backbone_digestion(self): impl = self.source_doc.get( - "https://api.synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1" + "https://synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1" ) definition = self.source_doc.get(impl.built) plasmid = Plasmid(definition, None, [impl], None, self.source_doc) @@ -235,16 +235,16 @@ def test_ligation(self): assembly_activity = self.assembly.initialize_assembly_activity() parts = [ self.source_doc.get( - "https://api.synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1" + "https://synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1" ), self.source_doc.get( - "https://api.synbiohub.org/user/Gon/impl_test/pB0034_BC_impl/1" + "https://synbiohub.org/user/Gon/impl_test/pB0034_BC_impl/1" ), self.source_doc.get( - "https://api.synbiohub.org/user/Gon/impl_test/pE0030_CD_impl/1" + "https://synbiohub.org/user/Gon/impl_test/pE0030_CD_impl/1" ), self.source_doc.get( - "https://api.synbiohub.org/user/Gon/impl_test/pB0015_DE_impl/1" + "https://synbiohub.org/user/Gon/impl_test/pB0015_DE_impl/1" ), ] @@ -269,7 +269,7 @@ def test_ligation(self): reactants_list.append(extracts_tuple_list[0][0]) backbone_impl = self.source_doc.get( - "https://api.synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1" + "https://synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1" ) # run digestion, extract component + sequence, add to ligation_doc, reactants_list @@ -301,7 +301,11 @@ def test_ligation(self): reactants_list.append(extracts_tuple_list[0][0]) ligation_doc.add_list([self.re_impl, self.ligase_impl]) - self.sbh.pull(self.ligase_impl.built, ligation_doc) + + pull_uri = self.ligase_impl.built.replace( + "https://synbiohub.org", "https://api.synbiohub.org" + ) + self.sbh.pull(pull_uri, ligation_doc) final_doc = sbol2.Document() From 26e638d13f3e39d4dd0c1ba6c2ca4d8381bbf4a0 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Wed, 13 May 2026 13:00:44 -0600 Subject: [PATCH 37/47] api for collections --- tests/test_buildcompiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index 2663e22..f76e411 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -27,8 +27,8 @@ def setUpClass(cls): auth = sbh.key collections = [ - "https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1", - "https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", + "https://api.synbiohub.org/user/Gon/impl_test/impl_test_collection/1", + "https://api.synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", ] source = sbol2.Document() From f91e4903d258c7fafccedec1287c3d1beff95879 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Wed, 13 May 2026 13:11:27 -0600 Subject: [PATCH 38/47] revert api for collections --- tests/test_buildcompiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index f76e411..2663e22 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -27,8 +27,8 @@ def setUpClass(cls): auth = sbh.key collections = [ - "https://api.synbiohub.org/user/Gon/impl_test/impl_test_collection/1", - "https://api.synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", + "https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1", + "https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1", ] source = sbol2.Document() From bb8d3382f171d3189d1f8931d82cdbac05619ca2 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Wed, 13 May 2026 14:07:50 -0600 Subject: [PATCH 39/47] added server mode flag to add api. to canonical sbh registry uris --- src/buildcompiler/abstract_translator.py | 15 +++++++++++--- src/buildcompiler/buildcompiler.py | 25 ++++++++++++++++++------ tests/test_buildcompiler.py | 4 ++-- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/buildcompiler/abstract_translator.py b/src/buildcompiler/abstract_translator.py index 3a0d316..327c248 100644 --- a/src/buildcompiler/abstract_translator.py +++ b/src/buildcompiler/abstract_translator.py @@ -135,17 +135,26 @@ def copy_sequences(component_definition, target_doc, collection_doc): seq_obj.copy(target_doc) -def get_or_pull(doc, sbh, uri): +def get_or_pull( + doc: sbol2.Document, sbh: sbol2.PartShop, uri: str, server_mode: bool = False +): """ Get an SBOL object from a Document. If missing, pull it from SynBioHub and retry. """ + try: return doc.get(uri) except Exception as e: - # Treat lookup failure as "not present" - sbh.pull(uri, doc) + pull_uri = uri + + if server_mode: + canonical_resource = sbh.resource.replace("://api.", "://") + + pull_uri = uri.replace(canonical_resource, sbh.resource) + + sbh.pull(pull_uri, doc) try: return doc.get(uri) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index b31c64f..f345ba7 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -50,6 +50,7 @@ def __init__( sbh_registry: str, auth_token: str, sbol_doc: sbol2.Document = None, + server_mode: bool = False, ): self.sbh = sbol2.PartShop(sbh_registry) self.sbh.key = auth_token @@ -59,6 +60,7 @@ def __init__( self.BsaI_impl = None self.BbsI_impl = None self.T4_ligase_impl = None + self.server_mode = server_mode self._index_collections(collections) @@ -75,11 +77,16 @@ def _index_collections(self, collections: List[str]): :rtype: None """ for uri in collections: + if self.server_mode: + canonical_resource = self.sbh.resource.replace("://api.", "://") + uri = uri.replace(canonical_resource, self.sbh.resource) print(f"Indexing collection: {uri}") self.sbh.pull(uri, self.sbol_doc) for implementation in self.sbol_doc.implementations: - built_object = get_or_pull(self.sbol_doc, self.sbh, implementation.built) + built_object = get_or_pull( + self.sbol_doc, self.sbh, implementation.built, self.server_mode + ) if ( type(built_object) is sbol2.ModuleDefinition and ORGANISM_STRAIN in built_object.roles @@ -531,7 +538,9 @@ def _extract_plasmids_from_strain( ): # strain_implementation = optional param for plasmid in strain.functionalComponents: - plasmid_definition = get_or_pull(doc, self.sbh, plasmid.definition) + plasmid_definition = get_or_pull( + doc, self.sbh, plasmid.definition, self.server_mode + ) if ENGINEERED_PLASMID in plasmid_definition.roles: existing = self._get_indexed_plasmid( @@ -664,7 +673,7 @@ def _extract_design_parts( """ component_list = [c for c in design.getInSequentialOrder()] return [ - get_or_pull(self.sbol_doc, self.sbh, component.definition) + get_or_pull(self.sbol_doc, self.sbh, component.definition, self.server_mode) for component in component_list ] @@ -698,7 +707,9 @@ def extract_combinatorial_design_parts( component_list = [c for c in design.getInSequentialOrder()] component_dict = { component.identity: [ - get_or_pull(self.sbol_doc, self.sbh, component.definition) + get_or_pull( + self.sbol_doc, self.sbh, component.definition, self.server_mode + ) ] for component in component_list } @@ -720,7 +731,9 @@ def _get_abstract_design(self) -> sbol2.ComponentDefinition: continue component_definitions = [ - get_or_pull(self.sbol_doc, self.sbh, component.definition) + get_or_pull( + self.sbol_doc, self.sbh, component.definition, self.server_mode + ) for component in definition.getInSequentialOrder() ] if any( @@ -782,7 +795,7 @@ def _is_single_part(self, plasmid: sbol2.ComponentDefinition) -> bool: return False else: component_definitions = [ - get_or_pull(self.sbol_doc, self.sbh, comp.definition) + get_or_pull(self.sbol_doc, self.sbh, comp.definition, self.server_mode) for comp in plasmid.getInSequentialOrder() ] diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index 2663e22..c407c5b 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -38,7 +38,7 @@ def setUpClass(cls): source.append("tests/test_files/combinatorial_1.xml", True) cls.buildcompiler = BuildCompiler( - collections, "https://synbiohub.org", auth, source + collections, "https://api.synbiohub.org", auth, source, server_mode=True ) def test_simple_lvl1_assembly(self): @@ -89,7 +89,7 @@ def test_simple_lvl1_assembly(self): u for u in assembly_activity.usages if str(u.identity) == expected_uri ) - impl = get_or_pull(product_doc, self.buildcompiler.sbh, usage.entity) + impl = get_or_pull(product_doc, self.buildcompiler.sbh, usage.entity, True) self.assertIsNotNone( impl, From 776bc73e2ff5629025c17b048a4044dc45d436fc Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Wed, 13 May 2026 17:46:15 -0600 Subject: [PATCH 40/47] notebooks for creating various implementations used in buildcompiler --- notebooks/enzyme_creation.ipynb | 111 +++++++ notebooks/impl_creation.ipynb | 511 ++++++++++++++++++++++++++++++++ 2 files changed, 622 insertions(+) create mode 100644 notebooks/enzyme_creation.ipynb create mode 100644 notebooks/impl_creation.ipynb diff --git a/notebooks/enzyme_creation.ipynb b/notebooks/enzyme_creation.ipynb new file mode 100644 index 0000000..d26eec2 --- /dev/null +++ b/notebooks/enzyme_creation.ipynb @@ -0,0 +1,111 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "87bdb42e", + "metadata": {}, + "outputs": [], + "source": [ + "import sbol2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "00a6e6c9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Valid.'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from buildcompiler.sbol2build import rebase_restriction_enzyme, add_object_to_doc\n", + "\n", + "sbol2.Config.setHomespace(\"https://SBOL2Build.org\")\n", + "sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_COMPLIANT_URIS, True)\n", + "sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, False)\n", + "\n", + "protein_doc = sbol2.Document()\n", + "\n", + "\n", + "BsaI_def = rebase_restriction_enzyme(\"BsaI\")\n", + "\n", + "RE_sourcing = sbol2.Activity(\"restriction_enzyme_purchase\")\n", + "RE_sourcing.name = \"Restriction Enzyme Purchase\"\n", + "\n", + "BsaI_imp = sbol2.Implementation(f\"{BsaI_def.displayId}_impl\")\n", + "\n", + "BsaI_imp.built = BsaI_def.identity\n", + "BsaI_imp.wasGeneratedBy = RE_sourcing.identity\n", + "\n", + "BbsI_def = rebase_restriction_enzyme(\"BbsI\")\n", + "\n", + "BbsI_imp = sbol2.Implementation(f\"{BbsI_def.displayId}_impl\")\n", + "\n", + "BbsI_imp.built = BbsI_def.identity\n", + "BbsI_imp.wasGeneratedBy = RE_sourcing.identity\n", + "\n", + "\n", + "ligase = sbol2.ComponentDefinition(\"T4_Ligase\")\n", + "ligase.name = \"T4_Ligase\"\n", + "ligase.types = [sbol2.BIOPAX_PROTEIN]\n", + "ligase.roles = [\"http://identifiers.org/ncit/NCIT:C16796\"]\n", + "\n", + "ligase_sourcing = sbol2.Activity(\"ligase_purchase\")\n", + "ligase_sourcing.name = \"Ligase Purchase\"\n", + "\n", + "T4_imp = sbol2.Implementation(f\"{ligase.displayId}_impl\")\n", + "T4_imp.built = ligase.identity\n", + "T4_imp.wasGeneratedBy = ligase_sourcing.identity\n", + "\n", + "add_object_to_doc(ligase, protein_doc)\n", + "add_object_to_doc(T4_imp, protein_doc)\n", + "add_object_to_doc(BsaI_def, protein_doc)\n", + "add_object_to_doc(BsaI_imp, protein_doc)\n", + "add_object_to_doc(BbsI_def, protein_doc)\n", + "add_object_to_doc(BbsI_imp, protein_doc)\n", + "add_object_to_doc(RE_sourcing, protein_doc)\n", + "\n", + "protein_doc.write(\"proteins.xml\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f7433c3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (buildplanner)", + "language": "python", + "name": "buildplanner" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/impl_creation.ipynb b/notebooks/impl_creation.ipynb new file mode 100644 index 0000000..a87de5e --- /dev/null +++ b/notebooks/impl_creation.ipynb @@ -0,0 +1,511 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "id": "87bdb42e", + "metadata": {}, + "outputs": [], + "source": [ + "import sbol2\n", + "from buildcompiler.buildcompiler import BuildCompiler" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "90648527", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Indexing collection: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/CIDARMoCloPlasmidsKit_collection/1\n" + ] + } + ], + "source": [ + "auth = \"0b2dc76c-4c1d-4ee8-b339-6a3e15e3faf9\"\n", + "buildcompiler = BuildCompiler(\n", + " [\n", + " \"https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/CIDARMoCloPlasmidsKit_collection/1\"\n", + " ],\n", + " \"https://synbiohub.org\",\n", + " auth,\n", + " sbol2.Document(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "99d093a8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Plasmid:\n", + " Name: pB0033_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0033_BC/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['B', 'C']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: DVA_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_CD/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['C', 'D']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_AB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_AB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_GB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: DVA_DH_D_H\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_DH/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'H']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DE_D_E\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DE/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'E']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_GB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DH_D_H\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DH/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'H']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: DVA_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_AB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DG_D_G\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DG/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'G']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pE1010_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE1010_CD/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['C', 'D']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pE0040_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0040_CD/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['C', 'D']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_EB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0032_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0032_BC/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['B', 'C']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: DVA_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: DVA_DG_D_G\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_DG/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'G']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pE0030_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0030_CD/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['C', 'D']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23100_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: DVA_DF_D_F\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_DF/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'F']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_EB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0015_DF_D_F\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DF/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['D', 'F']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_EB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['E', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: DVA_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_BC/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['B', 'C']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pB0034_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0034_BC/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['B', 'C']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23116_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_AB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['A', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_GB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: DVA_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_GB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['G', 'B']\n", + " Antibiotic Resistance: Ampicillin\n", + "]\n" + ] + } + ], + "source": [ + "print(buildcompiler.indexed_plasmids)" + ] + }, + { + "cell_type": "markdown", + "id": "eb4a9ab0", + "metadata": {}, + "source": [ + "# Create Collection of Implementations To Test" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9cdd18d0", + "metadata": {}, + "outputs": [], + "source": [ + "implementation_collection = sbol2.Document()\n", + "plas_doc = sbol2.Document()\n", + "\n", + "buildcompiler.sbh.pull(\n", + " \"https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/CIDARMoCloPlasmidsKit_collection/1\",\n", + " plas_doc,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d76dbdeb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "amp_region\n", + "B0033\n", + "J23106\n", + "DVK_AE\n", + "E0030_yfp\n", + "kan_region\n", + "E0040m_gfp\n", + "B0015\n", + "J23116\n", + "DVA_AF\n", + "J23100\n", + "DVA_AF2\n", + "DVK_GH\n", + "Fusion_Site_E\n", + "Fusion_Site_F\n", + "Fusion_Site_D\n", + "Fusion_Site_G\n", + "E1010m_rfp\n", + "Fusion_Site_A\n", + "DVK_FG\n", + "LacZ_cassette\n", + "Fusion_Site_B\n", + "Fusion_Site_C\n", + "LacZ_cassette2\n", + "Fusion_Site_H\n", + "B0032\n", + "B0034\n", + "dva_backbone_core\n", + "origin_of_replication_pSB1A2\n", + "DVK_EF\n", + "dvk_backbone_core\n", + "Design........................0\n", + "Build.........................0\n", + "Test..........................0\n", + "Analysis......................0\n", + "ComponentDefinition...........0\n", + "ModuleDefinition..............0\n", + "Model.........................0\n", + "Sequence......................0\n", + "Collection....................0\n", + "Activity......................0\n", + "Plan..........................0\n", + "Agent.........................0\n", + "Attachment....................0\n", + "CombinatorialDerivation.......0\n", + "Implementation................30\n", + "SampleRoster..................0\n", + "Experiment....................0\n", + "ExperimentalData..............0\n", + "Annotation Objects............0\n", + "---\n", + "Total: .........................30\n", + "\n" + ] + } + ], + "source": [ + "implementation_collection.default_namespace = (\n", + " \"http://buildcompiler.org/implementations/\"\n", + ")\n", + "\n", + "dummy_activity = sbol2.Activity(\"plasmid_dna_extraction\")\n", + "dummy_activity.name = \"DNA extraction\"\n", + "dummy_activity.types = \"http://sbols.org/v2#build\"\n", + "\n", + "for plasmid in plas_doc.componentDefinitions:\n", + " if \"http://identifiers.org/so/SO:0000637\" in plasmid.roles:\n", + " implementation = sbol2.Implementation(f\"{plasmid.displayId}_impl\")\n", + " implementation.built = plasmid.identity\n", + " implementation.wasGeneratedBy = dummy_activity\n", + "\n", + " implementation_collection.add(implementation)\n", + " else:\n", + " print(plasmid.displayId)\n", + "\n", + "print(implementation_collection)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3bcf02fb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Valid.'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "implementation_collection.write(\"moclo_implementations.xml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "9bf9c396", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['pB0033_BC_impl',\n", + " 'DVA_CD_impl',\n", + " 'pJ23100_AB_impl',\n", + " 'pJ23106_AB_impl',\n", + " 'pJ23116_GB_impl',\n", + " 'DVA_DH_impl',\n", + " 'pB0015_DE_impl',\n", + " 'pJ23100_GB_impl',\n", + " 'pB0015_DH_impl',\n", + " 'DVA_AB_impl',\n", + " 'pJ23116_FB_impl',\n", + " 'pB0015_DG_impl',\n", + " 'pE1010_CD_impl',\n", + " 'pE0040_CD_impl',\n", + " 'pJ23100_EB_impl',\n", + " 'pB0032_BC_impl',\n", + " 'DVA_FB_impl',\n", + " 'DVA_DG_impl',\n", + " 'pE0030_CD_impl',\n", + " 'pJ23100_FB_impl',\n", + " 'DVA_DF_impl',\n", + " 'pJ23106_EB_impl',\n", + " 'pB0015_DF_impl',\n", + " 'pJ23116_EB_impl',\n", + " 'pJ23106_FB_impl',\n", + " 'DVA_BC_impl',\n", + " 'pB0034_BC_impl',\n", + " 'pJ23116_AB_impl',\n", + " 'pJ23106_GB_impl',\n", + " 'DVA_GB_impl']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[cd.displayId for cd in implementation_collection.implementations]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7b8369e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (buildplanner)", + "language": "python", + "name": "buildplanner" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 767fa6bdfecd8e7409e2f753969dfb16e504381b Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Fri, 15 May 2026 23:12:49 -0600 Subject: [PATCH 41/47] decouple forward and reverse fusion site matching to fix skipped backbone match + composite numbering adjustment --- src/buildcompiler/sbol2build.py | 40 +++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/buildcompiler/sbol2build.py b/src/buildcompiler/sbol2build.py index 4ad1312..8bc8266 100644 --- a/src/buildcompiler/sbol2build.py +++ b/src/buildcompiler/sbol2build.py @@ -904,6 +904,14 @@ def ligation( for part in remaining_parts: # match insert sequence 5' to part 3' part_sequence_uri = part.sequences[0] + # check reverse match + if ( + source_document.getSequence(part_sequence_uri) + .elements[-fusion_site_length:] + .lower() + == insert_sequence[:fusion_site_length].lower() + ): + insert_3prime_match_id = part.identity if ( source_document.getSequence(part_sequence_uri) .elements[:fusion_site_length] @@ -928,13 +936,6 @@ def ligation( list_of_parts_per_composite.append(part) remaining_parts.remove(part) # match backbone 5' to insert sequence 3', set flag - elif ( - source_document.getSequence(part_sequence_uri) - .elements[-fusion_site_length:] - .lower() - == insert_sequence[:fusion_site_length].lower() - ): - insert_3prime_match_id = part.identity remaining_parts_after = len(remaining_parts) if remaining_parts_before == remaining_parts_after: @@ -948,9 +949,8 @@ def ligation( # transform list_of_parts_per_assembly into list of composites product_impl_list = [] - composite_number = 1 + composite_number = 0 - # TODO: use componentinstances to append "subcomponents" to each definition that is a composite component. all composites share the "subcomponents" for composite in list_of_composites_per_assembly: # a composite of the form [A,B,C] # calculate sequence composite_sequence_str = "" @@ -1027,15 +1027,25 @@ def ligation( source_document.getComponentDefinition(prev_three_prime) ) else: + anno_prefix = comp.displayId + + matching_anno_prefix = [ + a.displayId + for a in anno_list + if a.displayId.startswith(f"{comp.displayId}_") + ] + if matching_anno_prefix: + anno_prefix = f"{comp.displayId}_{len(matching_anno_prefix)}" + temp_extract_components.append(comp.definition) comp_location = sbol2.Range( - uri=f"{comp.displayId}_location", + uri=f"{anno_prefix}_location", start=len(composite_sequence_str) + fusion_site_length + 1, end=len(composite_sequence_str) + len(part_extract_sequence[:-4]), - ) # TODO check if seq len is correct + ) comp_anno = sbol2.SequenceAnnotation( - uri=f"{comp.displayId}_annotation" + uri=f"{anno_prefix}_annotation" ) comp_anno.locations.add(comp_location) anno_list.append(comp_anno) @@ -1046,15 +1056,17 @@ def ligation( composite_sequence_str + part_extract_sequence[:-fusion_site_length] ) # needs a version for linear + suffix = f"_{composite_number}" if composite_number > 0 else "" + # create dna component and sequence composite_component_definition, composite_seq = ( dna_componentdefinition_with_sequence( - f"{composite_prefix}_{composite_number}", + f"{composite_prefix}{suffix}", composite_sequence_str, molecule=True, ) ) - composite_component_definition.name = f"{composite_prefix}_{composite_number}" + composite_component_definition.name = f"{composite_prefix}{suffix}" composite_component_definition.addRole(ENGINEERED_PLASMID) composite_component_definition.addType(CIRCULAR) From 0454cfe4cc3cde910b5adbb9c704e3c73d0dbb9d Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Fri, 15 May 2026 23:13:47 -0600 Subject: [PATCH 42/47] lvl2 working --- notebooks/build_compiler_test.ipynb | 355 ++++++---------------------- 1 file changed, 68 insertions(+), 287 deletions(-) diff --git a/notebooks/build_compiler_test.ipynb b/notebooks/build_compiler_test.ipynb index 62a0cbe..f1e5a8f 100644 --- a/notebooks/build_compiler_test.ipynb +++ b/notebooks/build_compiler_test.ipynb @@ -30,7 +30,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['i6TQvNp91', 'i0mwvNcgH']\n" + "['qlSBuNBL', 'i0mwvNcgH']\n" ] } ], @@ -64,16 +64,12 @@ "output_type": "stream", "text": [ "Indexing collection: https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\n", - "Indexing collection: https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\n", - "['http://rebase.neb.com/rebase/enz/BsaI.html'] ['http://identifiers.org/obi/OBI_0000732']\n", - "[] ['http://identifiers.org/ncit/NCIT:C16796']\n", - "['http://rebase.neb.com/rebase/enz/BbsI.html'] ['http://identifiers.org/obi/OBI_0000732']\n", - "['http://rebase.neb.com/rebase/enz/SapI.html'] ['http://identifiers.org/obi/OBI_0000732']\n" + "Indexing collection: https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\n" ] } ], "source": [ - "auth = \"ca97f26e-9d33-4e38-810d-04d99f36e47c\"\n", + "auth = \"c6f22554-f057-4b4f-a597-67041d9002e5\"\n", "collections = [\n", " \"https://synbiohub.org/user/Gon/impl_test/impl_test_collection/1\",\n", " \"https://synbiohub.org/user/Gon/Enzyme_Implementations/Enzyme_Implementations_collection/1\",\n", @@ -101,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "6ec9e2fe", "metadata": {}, "outputs": [ @@ -109,54 +105,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "matched pJ23100_AB_A_B with DVK_AE_A_E on fusion site A!\n", - "matched pB0034_BC_B_C with pJ23100_AB_A_B on fusion site B!\n", - "matched pE0030_CD_C_D with pB0034_BC_B_C on fusion site C!\n", - "matched final component pB0015_DE_D_E with pE0030_CD_C_D and DVK_AE_A_E on fusion sites (D, E)!\n", - "Success with backbone: DVK_AE_A_E and plasmids: ['pJ23100_AB_A_B', 'pB0034_BC_B_C', 'pE0030_CD_C_D', 'pB0015_DE_D_E']\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/ryan/GitRepo/SBOL2Build/src/buildcompiler/buildcompiler.py:321: RuntimeWarning: BsaI Restriction enzyme not found in provided collection(s). Domestication via purchase will be added to protocol.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "matched pJ23116_AB_A_B with DVK_AE_A_E on fusion site A!\n", - "matched pB0034_BC_B_C with pJ23116_AB_A_B on fusion site B!\n", - "matched pE0030_CD_C_D with pB0034_BC_B_C on fusion site C!\n", - "matched final component pB0015_DE_D_E with pE0030_CD_C_D and DVK_AE_A_E on fusion sites (D, E)!\n" + "Success with backbone: DVK_AE_A_E and plasmids: ['pJ23100_AB_A_B', 'pB0034_BC_B_C', 'pE0030_CD_C_D', 'pB0015_DE_D_E']\n", + "Success with backbone: DVK_AE_A_E and plasmids: ['pJ23116_AB_A_B', 'pB0034_BC_B_C', 'pE0030_CD_C_D', 'pB0015_DE_D_E']\n" ] }, { "data": { "text/plain": [ - "({'https://sbolcanvas.org/i6TQvNp91/1': [Plasmid:\n", - " Name: i6TQvNp91_composite_1_A_E\n", - " Plasmid Definition: https://SBOL2Build.org/i6TQvNp91_composite_1/1\n", + "({'https://sbolcanvas.org/qlSBuNBL/1': [Plasmid:\n", + " Name: qlSBuNBL_composite_1_A_E\n", + " Plasmid Definition: http://buildcompiler.org/qlSBuNBL_composite_1/1\n", " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://SBOL2Build.org/i6TQvNp91_composite_1_impl/1']\n", + " Plasmid Implementations: ['http://buildcompiler.org/qlSBuNBL_composite_1_impl/1']\n", " Strain Implementations: [None]\n", " Fusion Sites: ['A', 'E']\n", " Antibiotic Resistance: Kanamycin],\n", " 'https://sbolcanvas.org/i0mwvNcgH/1': [Plasmid:\n", " Name: i0mwvNcgH_composite_1_A_E\n", - " Plasmid Definition: https://SBOL2Build.org/i0mwvNcgH_composite_1/1\n", + " Plasmid Definition: http://buildcompiler.org/i0mwvNcgH_composite_1/1\n", " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://SBOL2Build.org/i0mwvNcgH_composite_1_impl/1']\n", + " Plasmid Implementations: ['http://buildcompiler.org/i0mwvNcgH_composite_1_impl/1']\n", " Strain Implementations: [None]\n", " Fusion Sites: ['A', 'E']\n", " Antibiotic Resistance: Kanamycin]},\n", - " )" + " )" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -180,290 +155,96 @@ "execution_count": null, "id": "3e2272ff", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'assembly_lvl2' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m composite_plasmids, final_doc = \u001b[43massembly_lvl2\u001b[49m(buildcompiler, design_doc)\n", - "\u001b[31mNameError\u001b[39m: name 'assembly_lvl2' is not defined" - ] - } - ], - "source": [ - "composite_plasmids, final_doc = buildcompiler.assembly_lvl2(None)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2ee945ec", - "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[Plasmid:\n", - " Name: pE0040_CD_C_D\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0040_CD/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pE0040_CD_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['C', 'D']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pB0034_BC_B_C\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0034_BC/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0034_BC_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['B', 'C']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23100_EB_E_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_EB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_EB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['E', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23100_AB_A_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_AB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_AB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['A', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pB0033_BC_B_C\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0033_BC/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0033_BC_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['B', 'C']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23106_FB_F_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_FB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_FB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['F', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23116_EB_E_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_EB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_EB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['E', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23116_GB_G_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_GB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_GB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['G', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pE0030_CD_C_D\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0030_CD/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pE0030_CD_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['C', 'D']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23100_FB_F_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_FB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_FB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['F', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23116_AB_A_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_AB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_AB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['A', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pB0032_BC_B_C\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0032_BC/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0032_BC_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['B', 'C']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pB0015_DF_D_F\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DF/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DF_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['D', 'F']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pB0015_DG_D_G\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DG/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DG_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['D', 'G']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pB0015_DH_D_H\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DH/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DH_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['D', 'H']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23106_AB_A_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_AB/1\n", + "Gen\n", + "Gen1\n", + "Plasmid:\n", + " Name: Gen_Gen1_plas_1_simple_A_E\n", + " Plasmid Definition: http://buildcompiler.org/Gen_Gen1_plas_1_simple/1\n", " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_AB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['A', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23106_GB_G_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_GB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_GB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['G', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23100_GB_G_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_GB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23100_GB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['G', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pE1010_CD_C_D\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE1010_CD/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pE1010_CD_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['C', 'D']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pB0015_DE_D_E\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DE/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pB0015_DE_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['D', 'E']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23116_FB_F_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_FB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23116_FB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['F', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23106_EB_E_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_EB/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/pJ23106_EB_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['E', 'B']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: Gen_plas_1_A_E\n", - " Plasmid Definition: https://SBOL2Build.org/Gen_plas_1/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://SBOL2Build.org/Gen_plas_1_impl/1']\n", + " Plasmid Implementations: ['http://buildcompiler.org/Gen_Gen1_plas_1_impl/1']\n", " Strain Implementations: [None]\n", " Fusion Sites: ['A', 'E']\n", " Antibiotic Resistance: Kanamycin\n", - ", Plasmid:\n", - " Name: Gen1_plas_1_E_F\n", - " Plasmid Definition: https://SBOL2Build.org/Gen1_plas_1/1\n", + "\n", + "Plasmid:\n", + " Name: Gen1_Gen1_plas_1_simple_E_F\n", + " Plasmid Definition: http://buildcompiler.org/Gen1_Gen1_plas_1_simple/1\n", " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://SBOL2Build.org/Gen1_plas_1_impl/1']\n", + " Plasmid Implementations: ['http://buildcompiler.org/Gen1_Gen1_plas_1_impl/1']\n", " Strain Implementations: [None]\n", " Fusion Sites: ['E', 'F']\n", " Antibiotic Resistance: Kanamycin\n", - "] [Plasmid:\n", - " Name: DVK_EF_E_F\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_EF/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_EF_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['E', 'F']\n", - " Antibiotic Resistance: Kanamycin\n", - ", Plasmid:\n", - " Name: DVK_AE_A_E\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_AE/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_AE_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['A', 'E']\n", - " Antibiotic Resistance: Kanamycin\n", - ", Plasmid:\n", - " Name: DVK_FG_F_G\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_FG/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_FG_impl/1']\n", - " Strain Implementations: None\n", - " Fusion Sites: ['F', 'G']\n", - " Antibiotic Resistance: Kanamycin\n", - ", Plasmid:\n", - " Name: DVK_GH_G_H\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVK_GH/1\n", + "\n", + "Success with backbone: DVA_AF2_A_F and plasmids: ['Gen_Gen1_plas_1_simple_A_E', 'Gen1_Gen1_plas_1_simple_E_F']\n", + "Plasmid:\n", + " Name: DVA_AF2_A_F\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_AF2/1\n", " Strain Definitions: [None]\n", - " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVK_GH_impl/1']\n", + " Plasmid Implementations: ['https://synbiohub.org/user/Gon/impl_test/DVA_AF2_impl/1']\n", " Strain Implementations: None\n", - " Fusion Sites: ['G', 'H']\n", - " Antibiotic Resistance: Kanamycin\n", - "]\n" + " Fusion Sites: ['A', 'F']\n", + " Antibiotic Resistance: Ampicillin\n", + "\n", + "Comparing gctt (Gen1_Gen1_plas_1_simple_extracted_part)to gcttand reverse: cgct (Gen1_Gen1_plas_1_simple_extracted_part)to ggag\n", + "Comparing cgct (DVA_AF2_extracted_backbone)to cgctand reverse: ggag (DVA_AF2_extracted_backbone)to ggag\n" ] } ], "source": [ - "print(buildcompiler.indexed_plasmids, buildcompiler.indexed_backbones)" + "lvl2_design_doc = sbol2.Document()\n", + "lvl2_design_doc.read(\"../tests/test_files/ExampleLvl2_design.xml\")\n", + "\n", + "\n", + "composite_plasmids, final_doc = buildcompiler.assembly_lvl2(\n", + " lvl2_design_doc, product_name=\"lvl2\"\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "7c12e504", + "execution_count": 7, + "id": "ba31c4c6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[, ]\n", - "[]\n" + "[Plasmid:\n", + " Name: lvl2_1_A_F\n", + " Plasmid Definition: http://buildcompiler.org/lvl2_1/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: ['http://buildcompiler.org/lvl2_1_impl/1']\n", + " Strain Implementations: [None]\n", + " Fusion Sites: ['A', 'F']\n", + " Antibiotic Resistance: Ampicillin\n", + "]\n" ] + }, + { + "data": { + "text/plain": [ + "'Valid.'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(buildcompiler.restriction_enzyme_implementations)\n", - "print(buildcompiler.ligase_implementations)\n", - "\n", - "# composite_plasmids = buildcompiler.assembly_lvl1(design, None)" + "print(composite_plasmids)\n", + "final_doc.write(\"lvl2_assembly.xml\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "79fd0cb5", "metadata": {}, "outputs": [], @@ -495,7 +276,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "8f4ea67c", "metadata": {}, "outputs": [], From 307f61c48b988f40be97cb1ae0f3312f8fa59f53 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Sun, 17 May 2026 17:55:56 -0600 Subject: [PATCH 43/47] lvl2 test draft --- tests/test_buildcompiler.py | 173 +++++++++++++++++++++++++++++++++++- 1 file changed, 171 insertions(+), 2 deletions(-) diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index c407c5b..b843754 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -107,7 +107,7 @@ def test_simple_lvl1_assembly(self): f"Implementation {impl.identity} should reference a built object", ) - expected_product_uri = "http://buildcompiler.org/qlSBuNBL_composite_1_impl/1" + expected_product_uri = "http://buildcompiler.org/qlSBuNBL_composite_impl/1" product_impl = product_doc.get(expected_product_uri) product_def = product_doc.get(product_impl.built) @@ -125,7 +125,7 @@ def test_simple_lvl1_assembly(self): self.assertEqual( product_def.identity, - "http://buildcompiler.org/qlSBuNBL_composite_1/1", + "http://buildcompiler.org/qlSBuNBL_composite/1", f"Implementation {product_impl} must build {product_def}", ) @@ -253,6 +253,175 @@ def test_complex_combinatorial_translation( f"Expected B0033 to appear 3 times across the composite dictionary, found {rbs_counts['B0033']}", ) + def test_simple_lvl2_assembly(self): + """ + High-level integration test for lvl2 assembly. + + Validates: + - lvl2 plasmid generation succeeds + - lvl1 intermediates are used as assembly inputs + - correct enzymes are included + - correct backbone selection occurs + - TU ordering is preserved + - SBOL provenance relationships are intact + """ + + abstract_design_doc = sbol2.Document() + abstract_design_doc.read("tests/test_files/ExampleLvl2_design.xml") + + # ------------------------------------------------------------ + # Run lvl2 assembly + # ------------------------------------------------------------ + lvl2_plasmids, final_doc = self.buildcompiler.assembly_lvl2( + abstract_design_doc, + product_name="lvl2", + ) + + self.assertEqual( + len(lvl2_plasmids), + 1, + "Expected exactly one lvl2 plasmid to be produced", + ) + + lvl2_plasmid = lvl2_plasmids[0] + + # ------------------------------------------------------------ + # Validate provenance / implementation relationships + # ------------------------------------------------------------ + product_impl = lvl2_plasmid.plasmid_implementations[0] + product_def = lvl2_plasmid.plasmid_definition + + self.assertIsNotNone( + product_impl, + "Lvl2 plasmid implementation should exist", + ) + + self.assertIsNotNone( + product_def, + "Lvl2 plasmid definition should exist", + ) + + self.assertEqual( + product_impl.built, + product_def.identity, + "Implementation should reference the built lvl2 construct", + ) + + # test level 1 assembly activities + lvl1_activities = [ + "http://buildcompiler.org/Gen1_Gen1_plas_assembly/1", + "http://buildcompiler.org/Gen_Gen1_plas_assembly/1", + ] + + lvl1_activities + + # TODO add same tests for lvl1 as with the level 2 activities + + # test level2 assembly activity + lvl2_assembly_activity = final_doc.get( + "http://buildcompiler.org/lvl2_assembly/1" + ) + + self.assertIsNotNone( + lvl2_assembly_activity, + "Assembly activity should exist in final document", + ) + + self.assertEqual( + product_impl.wasGeneratedBy, + [lvl2_assembly_activity.identity], + "Lvl2 implementation should reference generating assembly activity", + ) + + # ------------------------------------------------------------ + # Validate expected assembly usages + # ------------------------------------------------------------ + usage_entities = [ + get_or_pull(final_doc, self.buildcompiler.sbh, u.entity, True) + for u in lvl2_assembly_activity.usages + ] + + usage_display_ids = { + entity.displayId for entity in usage_entities if entity is not None + } + + self.assertIn( + "BbsI_impl", + usage_display_ids, + "Lvl2 assembly should use BbsI", + ) + + self.assertIn( + "T4_Ligase_impl", + usage_display_ids, + "Lvl2 assembly should use T4 ligase", + ) + + # ------------------------------------------------------------ + # Ensure lvl1 plasmids were used as inputs + # ------------------------------------------------------------ + lvl1_inputs = [ + entity + for entity in usage_entities + if entity is not None and "_plas" in entity.displayId.lower() + ] + + self.assertEqual( + len(lvl1_inputs), + 2, + "Lvl2 assembly should consume 2 lvl1 plasmids as inputs", + ) + + # ------------------------------------------------------------ + # Validate final construct ordering + # ------------------------------------------------------------ + components = product_def.getInSequentialOrder() + display_ids = [component.displayId for component in components] + + expected_order = [ + "Ligation_Scar_A", + "Gen_Gen1_plas_TU", + "Ligation_Scar_E", + "Gen1_Gen1_plas_TU", + "Ligation_Scar_F", + "dva_backbone_core", + ] + + self.assertEqual( + display_ids, + expected_order, + "Lvl2 construct does not preserve expected TU ordering", + ) + + # ------------------------------------------------------------ + # Validate all usage entities are valid implementations + # ------------------------------------------------------------ + for entity in usage_entities: + self.assertIsNotNone( + entity, + "All assembly usage entities should resolve correctly", + ) + + self.assertIsInstance( + entity, + sbol2.Implementation, + "Assembly usages should reference SBOL implementations", + ) + + self.assertIsNotNone( + entity.built, + f"Implementation {entity.identity} should reference a built object", + ) + + # Ensure no duplicate lvl1 intermediates were generated + lvl1_ids = [entity.displayId for entity in lvl1_inputs] + + self.assertEqual( + len(lvl1_ids), + len(set(lvl1_ids)), + "Duplicate lvl1 intermediates detected", + ) + if __name__ == "__main__": unittest.main() From 9cebf0ddc5957d6796058a3954060df5e57e7e93 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Sun, 17 May 2026 18:39:49 -0600 Subject: [PATCH 44/47] lvl1 TU tests for lvl2 assembly --- tests/test_buildcompiler.py | 73 +++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 4 deletions(-) diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index b843754..3aca313 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -6,7 +6,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src"))) -from buildcompiler.buildcompiler import BuildCompiler +from buildcompiler.buildcompiler import BuildCompiler, _extract_lvl2_TUs from buildcompiler.abstract_translator import extract_toplevel_definition, get_or_pull @@ -269,6 +269,8 @@ def test_simple_lvl2_assembly(self): abstract_design_doc = sbol2.Document() abstract_design_doc.read("tests/test_files/ExampleLvl2_design.xml") + design_TUs = _extract_lvl2_TUs(abstract_design_doc) + # ------------------------------------------------------------ # Run lvl2 assembly # ------------------------------------------------------------ @@ -309,13 +311,76 @@ def test_simple_lvl2_assembly(self): # test level 1 assembly activities lvl1_activities = [ - "http://buildcompiler.org/Gen1_Gen1_plas_assembly/1", "http://buildcompiler.org/Gen_Gen1_plas_assembly/1", + "http://buildcompiler.org/Gen1_Gen1_plas_assembly/1", ] - lvl1_activities + for index, activity_id in enumerate(lvl1_activities): + lvl1_activity = final_doc.get(activity_id) + + self.assertIsNotNone( + lvl1_activity, + f"Lvl1 assembly activity {activity_id} should exist", + ) + + # ------------------------------------------------------------ + # Validate lvl1 activity usages + # ------------------------------------------------------------ + lvl1_usage_entities = [ + get_or_pull(final_doc, self.buildcompiler.sbh, u.entity, True) + for u in lvl1_activity.usages + ] + + lvl1_usage_display_ids = { + entity.displayId for entity in lvl1_usage_entities if entity is not None + } + + lvl1_usage_built_CDs = { + final_doc.get(entity.built) + for entity in lvl1_usage_entities + if entity is not None + } + + for comp in design_TUs[index].components: + self.assertIn( + comp.definition, + [ + subcomp.definition + for plas in lvl1_usage_built_CDs + for subcomp in plas.components + ], + f"Level 1 assembly activity {activity_id} missing {comp.definition} from design TU {design_TUs[index]}", + ) - # TODO add same tests for lvl1 as with the level 2 activities + self.assertIn( + "BsaI_impl", + lvl1_usage_display_ids, + "Lvl1 assembly should use BsaI", + ) + + self.assertIn( + "T4_Ligase_impl", + lvl1_usage_display_ids, + "Lvl1 assembly should use T4 ligase", + ) + + for entity in lvl1_usage_entities: + self.assertIsNotNone( + entity, + "All lvl1 assembly usage entities should resolve correctly", + ) + + self.assertIsInstance( + entity, + sbol2.Implementation, + "Lvl1 assembly usages should reference SBOL implementations", + ) + + self.assertGreaterEqual( + len(lvl1_usage_entities), + 3, + "Lvl1 assembly should contain enzyme and plasmid usages", + ) # test level2 assembly activity lvl2_assembly_activity = final_doc.get( From 207b68c3f2547e450e022b8ab9549c8c543edd6a Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Mon, 18 May 2026 11:59:52 -0600 Subject: [PATCH 45/47] transformation test --- tests/test_buildcompiler.py | 190 ++ tests/test_files/DVK_AE.xml | 443 ++++ tests/test_files/ExampleLvl2_design.xml | 593 ++++++ tests/test_files/comp_tu.xml | 343 +++ tests/test_files/mocloparts116.xml | 307 +++ tests/test_files/pB0015_DE.xml | 440 ++++ tests/test_files/pB0032_BC.xml | 440 ++++ tests/test_files/pE0040_CD.xml | 440 ++++ tests/test_files/pJ23100_AB.xml | 44 +- tests/test_files/transformation_activity.xml | 1884 +++++++++++++++++ .../transformation_activity_new.xml | 1884 +++++++++++++++++ 11 files changed, 7005 insertions(+), 3 deletions(-) create mode 100644 tests/test_files/DVK_AE.xml create mode 100644 tests/test_files/ExampleLvl2_design.xml create mode 100644 tests/test_files/comp_tu.xml create mode 100644 tests/test_files/mocloparts116.xml create mode 100644 tests/test_files/pB0015_DE.xml create mode 100644 tests/test_files/pB0032_BC.xml create mode 100644 tests/test_files/pE0040_CD.xml create mode 100644 tests/test_files/transformation_activity.xml create mode 100644 tests/test_files/transformation_activity_new.xml diff --git a/tests/test_buildcompiler.py b/tests/test_buildcompiler.py index 3aca313..4075e2e 100644 --- a/tests/test_buildcompiler.py +++ b/tests/test_buildcompiler.py @@ -2,8 +2,10 @@ import unittest import sys import os +import copy from collections import Counter + sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src"))) from buildcompiler.buildcompiler import BuildCompiler, _extract_lvl2_TUs @@ -487,6 +489,194 @@ def test_simple_lvl2_assembly(self): "Duplicate lvl1 intermediates detected", ) + def test_transformation(self): + transformation_doc = sbol2.Document() + + chassis_md = sbol2.ModuleDefinition("E_coli_DH5alpha") + chassis_impl = sbol2.Implementation("E_coli_DH5alpha_impl") + chassis_impl.built = chassis_md.identity + + transformation_doc.add(chassis_md) + transformation_doc.add(chassis_impl) + + result = self.buildcompiler.transformation( + assembly_products=self.buildcompiler.indexed_plasmids[:2], + chassis_name="E_coli_DH5alpha", + transformation_doc=transformation_doc, + ) + + # Top-level output structure + self.assertEqual(result["stage"], "transformation") + + plasmid_displayIds = [ + plasmid.plasmid_definition.displayId + for plasmid in self.buildcompiler.indexed_plasmids[:2] + ] + + self.assertEqual( + result["inputs"], + plasmid_displayIds, + ) + + self.assertEqual( + result["chassis"], + "E_coli_DH5alpha", + ) + + self.assertIn("sbol_artifacts", result) + self.assertIn("json_intermediate", result) + self.assertIn("protocol_artifacts", result) + + # Robot JSON intermediate + json_intermediate = result["json_intermediate"] + + self.assertEqual( + json_intermediate["protocol"], + "chemical_transformation", + ) + + self.assertEqual( + json_intermediate["version"], + "0.1", + ) + + self.assertEqual(len(json_intermediate["steps"]), 2) + + first_step = json_intermediate["steps"][0] + + self.assertEqual(first_step["step"], 1) + + self.assertEqual(first_step["plasmid"], plasmid_displayIds[0]) + + self.assertEqual( + first_step["heat_shock"], + { + "temperature_c": 42, + "duration_seconds": 45, + }, + ) + + # SBOL artifact outputs + sbol_artifacts = result["sbol_artifacts"] + + self.assertEqual(len(sbol_artifacts), 2) + + first_artifact = sbol_artifacts[0] + + self.assertIn("transformation_activity", first_artifact) + + self.assertIn( + "transformed_strain_module", + first_artifact, + ) + + self.assertIn( + "transformed_strain_implementation", + first_artifact, + ) + + # Verify generated SBOL objects exist in document + transform_activity = transformation_doc.get( + f"http://buildcompiler.org/transform_{plasmid_displayIds[0]}_1/1" + ) + + self.assertIsInstance( + transform_activity, + sbol2.Activity, + ) + + self.assertEqual( + len(transform_activity.usages), + 2, + ) + + self.assertEqual( + len(transform_activity.associations), + 1, + ) + + association = transform_activity.associations[0] + + self.assertIsNotNone(association.plan) + + self.assertIsNotNone(association.agent) + + # Verify transformed strain + transformed_strain = transformation_doc.get( + f"http://buildcompiler.org/E_coli_DH5alpha_with_{plasmid_displayIds[0]}/1" + ) + + self.assertIsInstance( + transformed_strain, + sbol2.ModuleDefinition, + ) + + self.assertEqual( + len(transformed_strain.modules), + 1, + ) + + self.assertEqual( + len(transformed_strain.functionalComponents), + 1, + ) + + # Verify transformed implementation + transformed_impl = transformation_doc.get( + f"http://buildcompiler.org/E_coli_DH5alpha_with_{plasmid_displayIds[0]}_impl/1" + ) + + self.assertIsInstance( + transformed_impl, + sbol2.Implementation, + ) + + self.assertEqual( + transformed_impl.built, + transformed_strain.identity, + ) + + self.assertEqual( + transformed_impl.wasGeneratedBy, + [transform_activity.identity], + ) + + # Verify protocol artifacts/logging + protocol_artifacts = result["protocol_artifacts"] + + self.assertIn( + "ot2_script", + protocol_artifacts, + ) + + self.assertIn( + "human_instructions", + protocol_artifacts, + ) + + self.assertIn( + "logs", + protocol_artifacts, + ) + + self.assertEqual( + len(protocol_artifacts["logs"]), + 2, + ) + + # Error handling + invalid_plasmid = copy.deepcopy(self.buildcompiler.indexed_plasmids[4]) + + invalid_plasmid.plasmid_implementations = [] + + with self.assertRaises( + ValueError, msg="Plasmid object with no implementations should throw error" + ): + self.buildcompiler.transformation( + assembly_products=[invalid_plasmid], + transformation_doc=transformation_doc, + ) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_files/DVK_AE.xml b/tests/test_files/DVK_AE.xml new file mode 100644 index 0000000..d8ab2b6 --- /dev/null +++ b/tests/test_files/DVK_AE.xml @@ -0,0 +1,443 @@ + + + + + DVK_AE + 1 + DVK_AE + 2026-02-05T23:49:40 + + + + + + + + + + + Fusion_Site_E_4 + 1 + + + + + + + + + + + Fusion_Site_A_2 + 1 + + + + + + + + + + + dvk_backbone_core_5 + 1 + + + + + + + + + + + LacZ_cassette_3 + 1 + + + + + + + + + + + DVK_AEAnnotation0 + 1 + + + + + + + location0 + 1 + + + + 1 + 4 + + + + + + + + + + DVK_AEAnnotation1 + 1 + + + + + + + location1 + 1 + + + + 5 + 500 + + + + + + + + + + DVK_AEAnnotation2 + 1 + + + + + + + location2 + 1 + + + + 501 + 504 + + + + + + + + + + DVK_AEAnnotation3 + 1 + + + + + + + location3 + 1 + + + + 505 + 2731 + + + + + + + + + + DVK_AEConstraint1 + 1 + + + + + + + + + + + + DVK_AEConstraint2 + 1 + + + + + + + + + + + + DVK_AEConstraint3 + 1 + + + + + + + + + + + + + Fusion_Site_A + 1 + Fusion_Site_A + MoClo standard fusion site A + + + + + 26479688 + + + + + + + Fusion_Site_E + 1 + Fusion_Site_E + MoClo standard fusion site E + + + + + 26479688 + + + + + + + LacZ_cassette + 1 + LacZ_cassette + MoClo IPTG inducible LacZ alpha subunit + 2026-04-30T18:50:31 + + + + + 26479688 + + + + + + + dvk_backbone_core + 1 + This is the backbone core for Destination Vector Kanamycin (DVK) plasmids. Based on the pSB1K3 plasmid and contains kanamycin gene and a high copy number origin of replication. + + + + 26479688 + + + + + + Component_ori + 1 + + + + 26479688 + + + + + + + + Component_kan + 1 + + + + 26479688 + + + + + + + + origin_of_replication_pSB1A2_annotation + 1 + + + + 26479688 + + + + ori + 1 + + + + 26479688 + 282 + 870 + + + + + + + + + + kan_region_annotation + 1 + + + + 26479688 + + + + kan + 1 + + + + 26479688 + 1103 + 1929 + + + + + + + + + + + kan_region + 1 + Kanamycin resistance gene from the pSB1K3 plasmid. + 2026-04-16T22:44:18 + + + + 26479688 + + + + + + + + origin_of_replication_pSB1A2 + 1 + Origin of replication from the pSB1A2/pSB1K3 plasmid. pSB1A2 is a high copy number plasmid. The replication origin is a pUC19-derived pMB1 (copy number of 100-300 per cell + 2026-04-16T22:42:31 + + + + 26479688 + + + + + + + DVK_AE_sequence + 1 + + + + GGAGAGAGACCTGCACCATATGCGGTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTGGCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCACGACGTTGTAAAACGACGGCCAGTGAATTCGAGCTCGGTACCCGGGGATCCTCTAGAGTCGACCTGCAGGCATGCAAGCTTGGCGTAATCATGGTCATAGCTGTTTCCTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGAAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGTTGCGCTCACTGCCCGCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGGTCTCTGCTTatgtcttctactagtagcggccgctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtggaagacat + + + + + LacZ_cassette_sequence + 1 + LacZ_cassette Sequence + + + + AGAGACCTGCACCATATGCGGTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTGGCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCACGACGTTGTAAAACGACGGCCAGTGAATTCGAGCTCGGTACCCGGGGATCCTCTAGAGTCGACCTGCAGGCATGCAAGCTTGGCGTAATCATGGTCATAGCTGTTTCCTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGGAAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGTTGCGCTCACTGCCCGCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGGTCTCT + + + + + Fusion_Site_A_sequence + 1 + Fusion_Site_A Sequence + + + + GGAG + + + + + Fusion_Site_E_sequence + 1 + Fusion_Site_E Sequence + + + + GCTT + + + + + dvk_backbone_core_seq + 1 + + + + 26479688 + atgtcttctactagtagcggccgctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtggaagacat + + + + + kan_region_seq + 1 + + + + 26479688 + caaggggtgttatgagccatattcaacgggaaacgtcttgctccaggccgcgattaaattccaacatggatgctgatttatatgggtataaatgggctcgcgataatgtcgggcaatcaggtgcgacaatctatcgattgtatgggaagcccgatgcgccagagttgtttctgaaacatggcaaaggtagcgttgccaatgatgttacagatgagatggtcagactaaactggctgacggaatttatgcctcttccgaccatcaagcattttatccgtactcctgatgatgcatggttactcaccactgcgatccccgggaaaacagcattccaggtattagaagaatatcctgattcaggtgaaaatattgttgatgcgctggcagtgttcctgcgccggttgcattcgattcctgtttgtaattgtccttttaacagcgatcgcgtatttcgtctcgctcaggcgcaatcacgaatgaataacggtttggttgatgcgagtgattttgatgacgagcgtaatggctggcctgttgaacaagtctggaaagaaatgcataagcttttgccattctcaccggattcagtcgtcactcatggtgatttctcacttgataaccttatttttgacgaggggaaattaataggttgtattgatgttggacgagtcggaatcgcagaccgataccaggatcttgccatcctatggaactgcctcggtgagttttctccttcattacagaaacggctttttcaaaaatatggtattgataatcctgatatgaataaattgcagtttcatttgatgctcgatgagtttttctaa + + + + + origin_of_replication_seq + 1 + + + + 26479688 + ttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctgtggaaa + + + \ No newline at end of file diff --git a/tests/test_files/ExampleLvl2_design.xml b/tests/test_files/ExampleLvl2_design.xml new file mode 100644 index 0000000..968ccd3 --- /dev/null +++ b/tests/test_files/ExampleLvl2_design.xml @@ -0,0 +1,593 @@ + + + + + Two_genes + 1 + + + + + + + + Gen1_Component + 1 + + + + + + + + Gen_Component + 1 + + + + + + + + Two_genes_SequenceAnnotation + 1 + + + + Two_genes_SequenceAnnotation_Range + 1 + 1 + 906 + + + + + + + + + + Two_genes_SequenceAnnotation1 + 1 + + + + Two_genes_SequenceAnnotation1_Range + 1 + 907 + 1771 + + + + + + + + + + Two_genes_SequenceConstraint + 1 + + + + + + + + + + Gen1 + 1 + + + + + + + RBS1_Component + 1 + + + + + + + + Pro1_Component + 1 + + + + + + + + Ter1_Component + 1 + + + + + + + + CDS1_Component + 1 + + + + + + + + Gen1_SequenceAnnotation2 + 1 + + + + Gen1_SequenceAnnotation2_Range + 1 + 56 + 736 + + + + + + + + + + Gen1_SequenceAnnotation3 + 1 + + + + Gen1_SequenceAnnotation3_Range + 1 + 737 + 865 + + + + + + + + + + Gen1_SequenceAnnotation1 + 1 + + + + Gen1_SequenceAnnotation1_Range + 1 + 36 + 55 + + + + + + + + + + Gen1_SequenceAnnotation + 1 + + + + Gen1_SequenceAnnotation_Range + 1 + 1 + 35 + + + + + + + + + + Gen1_SequenceConstraint1 + 1 + + + + + + + + + Gen1_SequenceConstraint2 + 1 + + + + + + + + + Gen1_SequenceConstraint + 1 + + + + + + + + + + B0015 + 1 + B0015 + MoClo Basic Part: Double terminator (B0010:B0012) + + + + + 26479688 + + + + + + + B0032 + 1 + B0032 + MoClo Basic Part: RBS - Weiss RBS, medium strength. Modified from Bba_B0032 to adjust spacing in MC system. + + + + + 26479688 + + + + + + + J23116 + 1 + J23116 + MoClo Basic Part: Constitutive promoter - Anderson series - low strength + + + + + 26479688 + + + + + + + E1010m_rfp + 1 + E1010m_rfp + MoClo Basic Part: CDS - Fluorescent protein. Red. Modified from Bba_E1010 to fix illegal sites. + + + + + 26479688 + + + + + + + B0033 + 1 + B0033 + MoClo Basic Part: RBS - Weiss RBS, low strength. Modified from Bba_B0033 to adjust spacing in MC system. + + + + + 26479688 + + + + + + + E0040m_gfp + 1 + E0040m_gfp + MoClo Basic Part: CDS - Fluorescent protein. Green. Modified from Bba_E0040 to fix illegal site. + + + + + 26479688 + + + + + + + Gen + 1 + + + + + + + CDS_Component + 1 + + + + + + + + Pro_Component + 1 + + + + + + + + RBS_Component + 1 + + + + + + + + Ter_Component + 1 + + + + + + + + Gen_SequenceAnnotation2 + 1 + + + + Gen_SequenceAnnotation2_Range + 1 + 58 + 777 + + + + + + + + + + Gen_SequenceAnnotation + 1 + + + + Gen_SequenceAnnotation_Range + 1 + 1 + 35 + + + + + + + + + + Gen_SequenceAnnotation3 + 1 + + + + Gen_SequenceAnnotation3_Range + 1 + 778 + 906 + + + + + + + + + + Gen_SequenceAnnotation1 + 1 + + + + Gen_SequenceAnnotation1_Range + 1 + 36 + 57 + + + + + + + + + + Gen_SequenceConstraint2 + 1 + + + + + + + + + Gen_SequenceConstraint1 + 1 + + + + + + + + + Gen_SequenceConstraint + 1 + + + + + + + + + + J23100 + 1 + J23100 + MoClo Basic Part: Constitutive promoter - Anderson series - high strength + + + + + 26479688 + + + + + + + Two_genesSequence + 1 + + TTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCAGAGTCACACAGGAAAGTACTAATGCGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACATACGGAAAACTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCGGTTATGGTGTTCAATGCTTTGCGAGATACCCAGATCATATGAAACAGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAAAGAACTATATTTTTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAATTGGAATACAACTATAACTCACACAATGTATACATCATGGCAGACAAACAAAAGAATGGAATCAAAGTTAACTTCAAAATTAGACACAACATTGAAGATGGAAGCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGAGAGATCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGATTACACATGGCATGGATGAACTATACAAATAATAACCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATATTGACAGCTAGCTCAGTCCTAGGGACTATGCTAGCAGAGTCACACAGGACTACTAATGGCTTCCTCCGAGGATGTTATCAAAGAGTTCATGCGTTTCAAAGTTCGTATGGAAGGTTCCGTTAACGGTCACGAGTTCGAAATCGAAGGTGAAGGTGAAGGTCGTCCGTACGAAGGTACCCAGACCGCTAAACTGAAAGTTACCAAAGGTGGTCCGCTGCCGTTCGCTTGGGACATCCTGTCCCCGCAGTTCCAGTACGGTTCCAAAGCTTACGTTAAACACCCGGCTGACATCCCGGACTACCTGAAACTGTCCTTCCCGGAAGGTTTCAAATGGGAACGTGTTATGAACTTCGAAGATGGTGGTGTTGTTACCGTTACCCAGGACTCCTCCCTGCAAGACGGTGAGTTCATCTACAAAGTTAAACTGCGTGGTACCAACTTCCCGTCCGACGGTCCGGTTATGCAGAAAAAAACCATGGGTTGGGAAGCTTCCACCGAACGTATGTACCCGGAGGATGGTGCTCTGAAAGGTGAAATCAAAATGCGTCTGAAACTGAAAGACGGTGGTCACTACGACGCTGAAGTTAAAACCACCTACATGGCTAAAAAACCGGTTCAGCTGCCGGGTGCTTACAAAACCGACATCAAACTGGACATCACCTCCCACAACGAGGACTACACCATCGTTGAACAGTACGAACGTGCTGAAGGTCGTCACTCCACCGGTGCTTAATAACCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + + + + + Gen1Sequence + 1 + + TTGACAGCTAGCTCAGTCCTAGGGACTATGCTAGCAGAGTCACACAGGACTACTAATGGCTTCCTCCGAGGATGTTATCAAAGAGTTCATGCGTTTCAAAGTTCGTATGGAAGGTTCCGTTAACGGTCACGAGTTCGAAATCGAAGGTGAAGGTGAAGGTCGTCCGTACGAAGGTACCCAGACCGCTAAACTGAAAGTTACCAAAGGTGGTCCGCTGCCGTTCGCTTGGGACATCCTGTCCCCGCAGTTCCAGTACGGTTCCAAAGCTTACGTTAAACACCCGGCTGACATCCCGGACTACCTGAAACTGTCCTTCCCGGAAGGTTTCAAATGGGAACGTGTTATGAACTTCGAAGATGGTGGTGTTGTTACCGTTACCCAGGACTCCTCCCTGCAAGACGGTGAGTTCATCTACAAAGTTAAACTGCGTGGTACCAACTTCCCGTCCGACGGTCCGGTTATGCAGAAAAAAACCATGGGTTGGGAAGCTTCCACCGAACGTATGTACCCGGAGGATGGTGCTCTGAAAGGTGAAATCAAAATGCGTCTGAAACTGAAAGACGGTGGTCACTACGACGCTGAAGTTAAAACCACCTACATGGCTAAAAAACCGGTTCAGCTGCCGGGTGCTTACAAAACCGACATCAAACTGGACATCACCTCCCACAACGAGGACTACACCATCGTTGAACAGTACGAACGTGCTGAAGGTCGTCACTCCACCGGTGCTTAATAACCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + + + + + GenSequence + 1 + + TTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCAGAGTCACACAGGAAAGTACTAATGCGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACATACGGAAAACTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCGGTTATGGTGTTCAATGCTTTGCGAGATACCCAGATCATATGAAACAGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAAAGAACTATATTTTTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAATTGGAATACAACTATAACTCACACAATGTATACATCATGGCAGACAAACAAAAGAATGGAATCAAAGTTAACTTCAAAATTAGACACAACATTGAAGATGGAAGCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGAGAGATCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGATTACACATGGCATGGATGAACTATACAAATAATAACCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + + + + + B0015_sequence + 1 + B0015 Sequence + + + + CCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + + + + + B0032_sequence + 1 + B0032 Sequence + + + + AGAGTCACACAGGAAAGTACTA + + + + + J23100_sequence + 1 + J23100 Sequence + + + + TTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGC + + + + + J23116_sequence + 1 + J23116 Sequence + + + + TTGACAGCTAGCTCAGTCCTAGGGACTATGCTAGC + + + + + E0040m_gfp_sequence + 1 + E0040m_gfp Sequence + + + + ATGCGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACATACGGAAAACTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCGGTTATGGTGTTCAATGCTTTGCGAGATACCCAGATCATATGAAACAGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAAAGAACTATATTTTTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAATTGGAATACAACTATAACTCACACAATGTATACATCATGGCAGACAAACAAAAGAATGGAATCAAAGTTAACTTCAAAATTAGACACAACATTGAAGATGGAAGCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGAGAGATCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGATTACACATGGCATGGATGAACTATACAAATAATAA + + + + + E1010m_rfp_sequence + 1 + E1010m_rfp Sequence + + + + ATGGCTTCCTCCGAGGATGTTATCAAAGAGTTCATGCGTTTCAAAGTTCGTATGGAAGGTTCCGTTAACGGTCACGAGTTCGAAATCGAAGGTGAAGGTGAAGGTCGTCCGTACGAAGGTACCCAGACCGCTAAACTGAAAGTTACCAAAGGTGGTCCGCTGCCGTTCGCTTGGGACATCCTGTCCCCGCAGTTCCAGTACGGTTCCAAAGCTTACGTTAAACACCCGGCTGACATCCCGGACTACCTGAAACTGTCCTTCCCGGAAGGTTTCAAATGGGAACGTGTTATGAACTTCGAAGATGGTGGTGTTGTTACCGTTACCCAGGACTCCTCCCTGCAAGACGGTGAGTTCATCTACAAAGTTAAACTGCGTGGTACCAACTTCCCGTCCGACGGTCCGGTTATGCAGAAAAAAACCATGGGTTGGGAAGCTTCCACCGAACGTATGTACCCGGAGGATGGTGCTCTGAAAGGTGAAATCAAAATGCGTCTGAAACTGAAAGACGGTGGTCACTACGACGCTGAAGTTAAAACCACCTACATGGCTAAAAAACCGGTTCAGCTGCCGGGTGCTTACAAAACCGACATCAAACTGGACATCACCTCCCACAACGAGGACTACACCATCGTTGAACAGTACGAACGTGCTGAAGGTCGTCACTCCACCGGTGCTTAATAA + + + + + B0033_sequence + 1 + B0033 Sequence + + + + AGAGTCACACAGGACTACTA + + + + + Two_genes_SBOLDesignerActivity + 1 + Gonzalo Vidal + 2026-03-24T16:18:54.110-06:00 + + + + Association + 1 + + + + + + diff --git a/tests/test_files/comp_tu.xml b/tests/test_files/comp_tu.xml new file mode 100644 index 0000000..8a5a343 --- /dev/null +++ b/tests/test_files/comp_tu.xml @@ -0,0 +1,343 @@ + + + + + B0015 + 1 + B0015 + MoClo Basic Part: Double terminator (B0010:B0012) + + + + + 26479688 + + + + + + + i2mLg6N9A + 1 + + + + + + + B0015_4 + 1 + + + + + + + + C0062_luxR_3 + 1 + + + + + + + + J23100_1 + 1 + + + + + + + + B0033_2 + 1 + + + + + + + + i2mLg6N9AAnnotation1 + 1 + + + + location1 + 1 + 36 + 55 + + + + + + + + + + i2mLg6N9AAnnotation2 + 1 + + + + location2 + 1 + 56 + 811 + + + + + + + + + + i2mLg6N9AAnnotation3 + 1 + + + + location3 + 1 + 812 + 940 + + + + + + + + + + i2mLg6N9AAnnotation0 + 1 + + + + location0 + 1 + 1 + 35 + + + + + + + + + + i2mLg6N9AConstraint3 + 1 + + + + + + + + + i2mLg6N9AConstraint1 + 1 + + + + + + + + + i2mLg6N9AConstraint2 + 1 + + + + + + + + + + B0033 + 1 + B0033 + MoClo Basic Part: RBS - Weiss RBS, low strength. Modified from Bba_B0033 to adjust spacing in MC system. + + + + + 26479688 + + + + + + + J23100 + 1 + J23100 + MoClo Basic Part: Constitutive promoter - Anderson series - high strength + + + + + 26479688 + + + + + + + C0062_luxR + 1 + C0062_luxR + MoClo Basic Part: CDS - Controller protein, luxR repressor/activator (in concert with HSL, represses pLuxR(pR) R0063. Also up-regulates pLuxR(pL) R0062) + + + + + 26479688 + + + + + + + B0015_sequence + 1 + B0015 Sequence + + + + CCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + + + + + i2mLg6N9A_sequence + TTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGC + + + + + J23100_sequence + 1 + J23100 Sequence + + + + TTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGC + + + + + C0062_luxR_sequence + 1 + C0062_luxR Sequence + + + + ATGAAAAACATAAATGCCGACGACACATACAGAATAATTAATAAAATTAAAGCTTGTAGAAGCAATAATGATATTAATCAATGCTTATCTGATATGACTAAAATGGTACATTGTGAATATTATTTACTCGCGATCATTTATCCTCATTCTATGGTTAAATCTGATATTTCAATCCTAGATAATTACCCTAAAAAATGGAGGCAATATTATGATGACGCTAATTTAATAAAATATGATCCTATAGTAGATTATTCTAACTCCAATCATTCACCAATTAATTGGAATATATTTGAAAACAATGCTGTAAATAAAAAATCTCCAAATGTAATTAAAGAAGCGAAAACATCAGGTCTTATCACTGGGTTTAGTTTCCCTATTCATACGGCTAACAATGGCTTCGGAATGCTTAGTTTTGCACATTCAGAAAAAGACAACTATATAGATAGTTTATTTTTACATGCGTGTATGAACATACCATTAATTGTTCCTTCTCTAGTTGATAATTATCGAAAAATAAATATAGCAAATAATAAATCAAACAACGATTTAACCAAAAGAGAAAAAGAATGTTTAGCGTGGGCATGCGAAGGAAAAAGCTCTTGGGATATTTCAAAAATATTAGGTTGCAGTGAGCGTACTGTCACTTTCCATTTAACCAATGCGCAAATGAAACTCAATACAACAAACCGCTGCCAAAGTATTTCTAAAGCAATTTTAACAGGAGCAATTGATTGCCCATACTTTAAAAATTAATAA + + + + + B0033_sequence + 1 + B0033 Sequence + + + + AGAGTCACACAGGACTACTA + + + + + J23100_Layout + + + + + B0015_Layout + + + + + C0062_luxR_Layout + + + + + B0033_Layout + + + + + i2mLg6N9A_Layout + + + + 455.0 + 334.5 + 200.0 + 100.0 + container + + + + + 0.0 + 50.0 + 200.0 + 1.0 + backbone + + + + + 0.0 + 0.0 + 50.0 + 100.0 + J23100_1 + + + + + + 50.0 + 0.0 + 50.0 + 100.0 + B0033_2 + + + + + + 100.0 + 0.0 + 50.0 + 100.0 + C0062_luxR_3 + + + + + + 150.0 + 0.0 + 50.0 + 100.0 + B0015_4 + + + + + diff --git a/tests/test_files/mocloparts116.xml b/tests/test_files/mocloparts116.xml new file mode 100644 index 0000000..d683a12 --- /dev/null +++ b/tests/test_files/mocloparts116.xml @@ -0,0 +1,307 @@ + + + + + E0030_yfp + 1 + E0030_yfp + MoClo Basic Part: CDS - Fluorescent protein. Yellow. + 26479688 + + + + + + + J23116 + 1 + J23116 + MoClo Basic Part: Constitutive promoter - Anderson series - low strength + 26479688 + + + + + + + B0034 + 1 + B0034 + MoClo Basic Part: RBS - Weiss RBS, high strength. Modified from Bba_B0034 to adjust spacing in MC system. + 26479688 + + + + + + + i0mwvNcgH + 1 + + + + + + + J23116_1 + 1 + + + + + + + + B0034_2 + 1 + + + + + + + + E0030_yfp_3 + 1 + + + + + + + + B0015_4 + 1 + + + + + + + + i0mwvNcgHAnnotation3 + 1 + + + + location3 + 1 + 780 + 908 + + + + + + + + + + i0mwvNcgHAnnotation1 + 1 + + + + location1 + 1 + 36 + 56 + + + + + + + + + + i0mwvNcgHAnnotation2 + 1 + + + + location2 + 1 + 57 + 779 + + + + + + + + + + i0mwvNcgHAnnotation0 + 1 + + + + location0 + 1 + 1 + 35 + + + + + + + + + + i0mwvNcgHConstraint3 + 1 + + + + + + + + + i0mwvNcgHConstraint1 + 1 + + + + + + + + + i0mwvNcgHConstraint2 + 1 + + + + + + + + + + B0015 + 1 + B0015 + MoClo Basic Part: Double terminator (B0010:B0012) + 26479688 + + + + + + + E0030_yfp_sequence + ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCTTCGGCTACGGCCTGCAATGCTTCGCCCGCTACCCCGACCACATGAAGCTGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAATAA + + + + + J23116_sequence + TTGACAGCTAGCTCAGTCCTAGGGACTATGCTAGC + + + + + B0015_sequence + CCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + + + + + i0mwvNcgH_sequence + TTGACAGCTAGCTCAGTCCTAGGGACTATGCTAGC + + + + + B0034_sequence + AGAGAAAGAGGAGAAATACTA + + + + + J23116_Layout + + + + + B0015_Layout + + + + + i0mwvNcgH_Layout + + + + 437.5 + 337.0 + 200.0 + 100.0 + container + + + + + 0.0 + 50.0 + 200.0 + 1.0 + backbone + + + + + 0.0 + 0.0 + 50.0 + 100.0 + J23116_1 + + + + + + 50.0 + 0.0 + 50.0 + 100.0 + B0034_2 + + + + + + 100.0 + 0.0 + 50.0 + 100.0 + E0030_yfp_3 + + + + + + 150.0 + 0.0 + 50.0 + 100.0 + B0015_4 + + + + + + + E0030_yfp_Layout + + + + + B0034_Layout + + + \ No newline at end of file diff --git a/tests/test_files/pB0015_DE.xml b/tests/test_files/pB0015_DE.xml new file mode 100644 index 0000000..7379b8a --- /dev/null +++ b/tests/test_files/pB0015_DE.xml @@ -0,0 +1,440 @@ + + + + + pB0015_DE + 1 + pB0015_DE + + + + + + + + + + Fusion_Site_D_2 + 1 + + + + + + + + + + + B0015_3 + 1 + + + + + + + + + + + dva_backbone_core_5 + 1 + + + + + + + + + + + Fusion_Site_E_4 + 1 + + + + + + + + + + + pB0015_DEAnnotation0 + 1 + + + + + + + location0 + 1 + + + + 1 + 4 + + + + + + + + + + pB0015_DEAnnotation1 + 1 + + + + + + + location1 + 1 + + + + 5 + 133 + + + + + + + + + + pB0015_DEAnnotation2 + 1 + + + + + + + location2 + 1 + + + + 134 + 137 + + + + + + + + + + pB0015_DEAnnotation3 + 1 + + + + + + + location3 + 1 + + + + 138 + 2237 + + + + + + + + + + pB0015_DEConstraint1 + 1 + + + + + + + + + + + + pB0015_DEConstraint2 + 1 + + + + + + + + + + + + pB0015_DEConstraint3 + 1 + + + + + + + + + + + + + B0015 + 1 + B0015 + MoClo Basic Part: Double terminator (B0010:B0012) + + + + + 26479688 + + + + + + + Fusion_Site_D + 1 + Fusion_Site_D + MoClo standard fusion site D + + + + + 26479688 + + + + + + + Fusion_Site_E + 1 + Fusion_Site_E + MoClo standard fusion site E + + + + + 26479688 + + + + + + + dva_backbone_core + 1 + This is the backbone core for Destination Vector Ampicillin (DVA) plasmids. Based on the pSB1A2 plasmid and contains an ampicillin resistance gene and a high copy number origin of replication. + + + + 26479688 + + + + + + Component_ori + 1 + + + + 26479688 + + + + + + + + Component_amp + 1 + + + + 26479688 + + + + + + + + origin_of_replication_pSB1A2_annotation + 1 + + + + 26479688 + + + + ori + 1 + + + + 26479688 + 272 + 860 + + + + + + + + + + amp_region_annotation + 1 + + + + 26479688 + + + + amp + 1 + + + + 26479688 + 1031 + 1996 + + + + + + + + + + + amp_region + 1 + Ampicillin resistance gene from the pSB1A2 plasmid. + 2026-02-05T23:21:12 + + + + 26479688 + + + + + + + + origin_of_replication_pSB1A2 + 1 + Origin of replication from the pSB1A2/pSB1K3 plasmid. pSB1A2 is a high copy number plasmid. The replication origin is a pUC19-derived pMB1 (copy number of 100-300 per cell + 2026-04-16T22:42:31 + + + + 26479688 + + + + + + + pB0015_DE_sequence + 1 + + + + AGGTCCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATAGCTTagagacctactagtagcggccgctgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgttcatccatagttgcctgactccccgtcgtgtagataactacgatacgggagggcttaccatctggccccagtgctgcaatgataccgcgcgacccacgctcaccggctccagatttatcagcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcctccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagtttgcgcaacgttgttgccattgctacaggcatcgtggtgtcacgctcgtcgtttggtatggcttcattcagctccggttcccaacgatcaaggcgagttacatgatcccccatgttgtgcaaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgcagtgttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaagatgcttttctgtgactggtgagtactcaaccaagtcattctgagaatagtgtatgcggcgaccgagttgctcttgcccggcgtcaatacgggataataccgcgccacatagcagaactttaaaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatcttaccgctgttgagatccagttcgatgtaacccactcgtgcacccaactgatcttcagcatcttttactttcaccagcgtttctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaataagggcgacacggaaatgttgaatactcatactcttcctttttcaatattattgaagcatttatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaaaataaacaaataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtgggtctca + + + + + B0015_sequence + 1 + B0015 Sequence + + + + CCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + + + + + Fusion_Site_D_sequence + 1 + Fusion_Site_D Sequence + + + + AGGT + + + + + Fusion_Site_E_sequence + 1 + Fusion_Site_E Sequence + + + + GCTT + + + + + dva_backbone_core_seq + 1 + + + + 26479688 + agagacctactagtagcggccgctgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgttcatccatagttgcctgactccccgtcgtgtagataactacgatacgggagggcttaccatctggccccagtgctgcaatgataccgcgcgacccacgctcaccggctccagatttatcagcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcctccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagtttgcgcaacgttgttgccattgctacaggcatcgtggtgtcacgctcgtcgtttggtatggcttcattcagctccggttcccaacgatcaaggcgagttacatgatcccccatgttgtgcaaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgcagtgttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaagatgcttttctgtgactggtgagtactcaaccaagtcattctgagaatagtgtatgcggcgaccgagttgctcttgcccggcgtcaatacgggataataccgcgccacatagcagaactttaaaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatcttaccgctgttgagatccagttcgatgtaacccactcgtgcacccaactgatcttcagcatcttttactttcaccagcgtttctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaataagggcgacacggaaatgttgaatactcatactcttcctttttcaatattattgaagcatttatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaaaataaacaaataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtgggtctca + + + + + amp_region_seq + 1 + + + + 26479688 + cgcggaacccctatttgtttatttttctaaatacattcaaatatgtatccgctcatgagacaataaccctgataaatgcttcaataatattgaaaaaggaagagtatgagtattcaacatttccgtgtcgcccttattcccttttttgcggcattttgccttcctgtttttgctcacccagaaacgctggtgaaagtaaaagatgctgaagatcagttgggtgcacgagtgggttacatcgaactggatctcaacagcggtaagatccttgagagttttcgccccgaagaacgttttccaatgatgagcacttttaaagttctgctatgtggcgcggtattatcccgtattgacgccgggcaagagcaactcggtcgccgcatacactattctcagaatgacttggttgagtactcaccagtcacagaaaagcatcttacggatggcatgacagtaagagaattatgcagtgctgccataaccatgagtgataacactgcggccaacttacttctgacaacgatcggaggaccgaaggagctaaccgcttttttgcacaacatgggggatcatgtaactcgccttgatcgttgggaaccggagctgaatgaagccataccaaacgacgagcgtgacaccacgatgcctgtagcaatggcaacaacgttgcgcaaactattaactggcgaactacttactctagcttcccggcaacaattaatagactggatggaggcggataaagttgcaggaccacttctgcgctcggcccttccggctggctggtttattgctgataaatctggagccggtgagcgtgggtcgcgcggtatcattgcagcactggggccagatggtaagccctcccgtatcgtagttatctacacgacggggagtcaggcaactatggatgaacgaaatagacagatcgctgagataggtgcctcactgattaagcattggtaa + + + + + origin_of_replication_seq + 1 + + + + 26479688 + ttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctgtggaaa + + + \ No newline at end of file diff --git a/tests/test_files/pB0032_BC.xml b/tests/test_files/pB0032_BC.xml new file mode 100644 index 0000000..53770bd --- /dev/null +++ b/tests/test_files/pB0032_BC.xml @@ -0,0 +1,440 @@ + + + + + pB0032_BC + 1 + pB0032_BC + + + + + + + + + + Fusion_Site_C_4 + 1 + + + + + + + + + + + Fusion_Site_B_2 + 1 + + + + + + + + + + + dva_backbone_core_5 + 1 + + + + + + + + + + + B0032_3 + 1 + + + + + + + + + + + pB0032_BCAnnotation0 + 1 + + + + + + + location0 + 1 + + + + 1 + 4 + + + + + + + + + + pB0032_BCAnnotation1 + 1 + + + + + + + location1 + 1 + + + + 5 + 26 + + + + + + + + + + pB0032_BCAnnotation2 + 1 + + + + + + + location2 + 1 + + + + 27 + 30 + + + + + + + + + + pB0032_BCAnnotation3 + 1 + + + + + + + location3 + 1 + + + + 31 + 2130 + + + + + + + + + + pB0032_BCConstraint1 + 1 + + + + + + + + + + + + pB0032_BCConstraint2 + 1 + + + + + + + + + + + + pB0032_BCConstraint3 + 1 + + + + + + + + + + + + + B0032 + 1 + B0032 + MoClo Basic Part: RBS - Weiss RBS, medium strength. Modified from Bba_B0032 to adjust spacing in MC system. + + + + + 26479688 + + + + + + + Fusion_Site_B + 1 + Fusion_Site_B + MoClo standard fusion site B + + + + + 26479688 + + + + + + + Fusion_Site_C + 1 + Fusion_Site_C + MoClo standard fusion site C + + + + + 26479688 + + + + + + + dva_backbone_core + 1 + This is the backbone core for Destination Vector Ampicillin (DVA) plasmids. Based on the pSB1A2 plasmid and contains an ampicillin resistance gene and a high copy number origin of replication. + + + + 26479688 + + + + + + Component_ori + 1 + + + + 26479688 + + + + + + + + Component_amp + 1 + + + + 26479688 + + + + + + + + origin_of_replication_pSB1A2_annotation + 1 + + + + 26479688 + + + + ori + 1 + + + + 26479688 + 272 + 860 + + + + + + + + + + amp_region_annotation + 1 + + + + 26479688 + + + + amp + 1 + + + + 26479688 + 1031 + 1996 + + + + + + + + + + + amp_region + 1 + Ampicillin resistance gene from the pSB1A2 plasmid. + 2026-02-05T23:21:12 + + + + 26479688 + + + + + + + + origin_of_replication_pSB1A2 + 1 + Origin of replication from the pSB1A2/pSB1K3 plasmid. pSB1A2 is a high copy number plasmid. The replication origin is a pUC19-derived pMB1 (copy number of 100-300 per cell + 2026-04-16T22:42:31 + + + + 26479688 + + + + + + + pB0032_BC_sequence + 1 + + + + TACTAGAGTCACACAGGAAAGTACTAAATGagagacctactagtagcggccgctgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgttcatccatagttgcctgactccccgtcgtgtagataactacgatacgggagggcttaccatctggccccagtgctgcaatgataccgcgcgacccacgctcaccggctccagatttatcagcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcctccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagtttgcgcaacgttgttgccattgctacaggcatcgtggtgtcacgctcgtcgtttggtatggcttcattcagctccggttcccaacgatcaaggcgagttacatgatcccccatgttgtgcaaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgcagtgttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaagatgcttttctgtgactggtgagtactcaaccaagtcattctgagaatagtgtatgcggcgaccgagttgctcttgcccggcgtcaatacgggataataccgcgccacatagcagaactttaaaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatcttaccgctgttgagatccagttcgatgtaacccactcgtgcacccaactgatcttcagcatcttttactttcaccagcgtttctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaataagggcgacacggaaatgttgaatactcatactcttcctttttcaatattattgaagcatttatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaaaataaacaaataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtgggtctca + + + + + B0032_sequence + 1 + B0032 Sequence + + + + AGAGTCACACAGGAAAGTACTA + + + + + Fusion_Site_B_sequence + 1 + Fusion_Site_B Sequence + + + + TACT + + + + + Fusion_Site_C_sequence + 1 + Fusion_Site_C Sequence + + + + AATG + + + + + dva_backbone_core_seq + 1 + + + + 26479688 + agagacctactagtagcggccgctgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgttcatccatagttgcctgactccccgtcgtgtagataactacgatacgggagggcttaccatctggccccagtgctgcaatgataccgcgcgacccacgctcaccggctccagatttatcagcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcctccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagtttgcgcaacgttgttgccattgctacaggcatcgtggtgtcacgctcgtcgtttggtatggcttcattcagctccggttcccaacgatcaaggcgagttacatgatcccccatgttgtgcaaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgcagtgttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaagatgcttttctgtgactggtgagtactcaaccaagtcattctgagaatagtgtatgcggcgaccgagttgctcttgcccggcgtcaatacgggataataccgcgccacatagcagaactttaaaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatcttaccgctgttgagatccagttcgatgtaacccactcgtgcacccaactgatcttcagcatcttttactttcaccagcgtttctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaataagggcgacacggaaatgttgaatactcatactcttcctttttcaatattattgaagcatttatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaaaataaacaaataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtgggtctca + + + + + amp_region_seq + 1 + + + + 26479688 + cgcggaacccctatttgtttatttttctaaatacattcaaatatgtatccgctcatgagacaataaccctgataaatgcttcaataatattgaaaaaggaagagtatgagtattcaacatttccgtgtcgcccttattcccttttttgcggcattttgccttcctgtttttgctcacccagaaacgctggtgaaagtaaaagatgctgaagatcagttgggtgcacgagtgggttacatcgaactggatctcaacagcggtaagatccttgagagttttcgccccgaagaacgttttccaatgatgagcacttttaaagttctgctatgtggcgcggtattatcccgtattgacgccgggcaagagcaactcggtcgccgcatacactattctcagaatgacttggttgagtactcaccagtcacagaaaagcatcttacggatggcatgacagtaagagaattatgcagtgctgccataaccatgagtgataacactgcggccaacttacttctgacaacgatcggaggaccgaaggagctaaccgcttttttgcacaacatgggggatcatgtaactcgccttgatcgttgggaaccggagctgaatgaagccataccaaacgacgagcgtgacaccacgatgcctgtagcaatggcaacaacgttgcgcaaactattaactggcgaactacttactctagcttcccggcaacaattaatagactggatggaggcggataaagttgcaggaccacttctgcgctcggcccttccggctggctggtttattgctgataaatctggagccggtgagcgtgggtcgcgcggtatcattgcagcactggggccagatggtaagccctcccgtatcgtagttatctacacgacggggagtcaggcaactatggatgaacgaaatagacagatcgctgagataggtgcctcactgattaagcattggtaa + + + + + origin_of_replication_seq + 1 + + + + 26479688 + ttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctgtggaaa + + + \ No newline at end of file diff --git a/tests/test_files/pE0040_CD.xml b/tests/test_files/pE0040_CD.xml new file mode 100644 index 0000000..af9be9e --- /dev/null +++ b/tests/test_files/pE0040_CD.xml @@ -0,0 +1,440 @@ + + + + + pE0040_CD + 1 + pE0040_CD + + + + + + + + + + dva_backbone_core_5 + 1 + + + + + + + + + + + Fusion_Site_D_4 + 1 + + + + + + + + + + + Fusion_Site_C_2 + 1 + + + + + + + + + + + E0040m_gfp_3 + 1 + + + + + + + + + + + pE0040_CDAnnotation0 + 1 + + + + + + + location0 + 1 + + + + 1 + 4 + + + + + + + + + + pE0040_CDAnnotation1 + 1 + + + + + + + location1 + 1 + + + + 5 + 724 + + + + + + + + + + pE0040_CDAnnotation2 + 1 + + + + + + + location2 + 1 + + + + 725 + 728 + + + + + + + + + + pE0040_CDAnnotation3 + 1 + + + + + + + location3 + 1 + + + + 729 + 2828 + + + + + + + + + + pE0040_CDConstraint3 + 1 + + + + + + + + + + + + pE0040_CDConstraint1 + 1 + + + + + + + + + + + + pE0040_CDConstraint2 + 1 + + + + + + + + + + + + + E0040m_gfp + 1 + E0040m_gfp + MoClo Basic Part: CDS - Fluorescent protein. Green. Modified from Bba_E0040 to fix illegal site. + + + + + 26479688 + + + + + + + Fusion_Site_C + 1 + Fusion_Site_C + MoClo standard fusion site C + + + + + 26479688 + + + + + + + Fusion_Site_D + 1 + Fusion_Site_D + MoClo standard fusion site D + + + + + 26479688 + + + + + + + dva_backbone_core + 1 + This is the backbone core for Destination Vector Ampicillin (DVA) plasmids. Based on the pSB1A2 plasmid and contains an ampicillin resistance gene and a high copy number origin of replication. + + + + 26479688 + + + + + + Component_ori + 1 + + + + 26479688 + + + + + + + + Component_amp + 1 + + + + 26479688 + + + + + + + + origin_of_replication_pSB1A2_annotation + 1 + + + + 26479688 + + + + ori + 1 + + + + 26479688 + 272 + 860 + + + + + + + + + + amp_region_annotation + 1 + + + + 26479688 + + + + amp + 1 + + + + 26479688 + 1031 + 1996 + + + + + + + + + + + amp_region + 1 + Ampicillin resistance gene from the pSB1A2 plasmid. + 2026-02-05T23:21:12 + + + + 26479688 + + + + + + + + origin_of_replication_pSB1A2 + 1 + Origin of replication from the pSB1A2/pSB1K3 plasmid. pSB1A2 is a high copy number plasmid. The replication origin is a pUC19-derived pMB1 (copy number of 100-300 per cell + 2026-04-16T22:42:31 + + + + 26479688 + + + + + + + pE0040_CD_sequence + 1 + + + + AATGATGCGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACATACGGAAAACTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCGGTTATGGTGTTCAATGCTTTGCGAGATACCCAGATCATATGAAACAGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAAAGAACTATATTTTTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAATTGGAATACAACTATAACTCACACAATGTATACATCATGGCAGACAAACAAAAGAATGGAATCAAAGTTAACTTCAAAATTAGACACAACATTGAAGATGGAAGCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGAGAGATCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGATTACACATGGCATGGATGAACTATACAAATAATAAAGGTagagacctactagtagcggccgctgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgttcatccatagttgcctgactccccgtcgtgtagataactacgatacgggagggcttaccatctggccccagtgctgcaatgataccgcgcgacccacgctcaccggctccagatttatcagcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcctccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagtttgcgcaacgttgttgccattgctacaggcatcgtggtgtcacgctcgtcgtttggtatggcttcattcagctccggttcccaacgatcaaggcgagttacatgatcccccatgttgtgcaaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgcagtgttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaagatgcttttctgtgactggtgagtactcaaccaagtcattctgagaatagtgtatgcggcgaccgagttgctcttgcccggcgtcaatacgggataataccgcgccacatagcagaactttaaaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatcttaccgctgttgagatccagttcgatgtaacccactcgtgcacccaactgatcttcagcatcttttactttcaccagcgtttctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaataagggcgacacggaaatgttgaatactcatactcttcctttttcaatattattgaagcatttatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaaaataaacaaataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtgggtctca + + + + + E0040m_gfp_sequence + 1 + E0040m_gfp Sequence + + + + ATGCGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACATACGGAAAACTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCGGTTATGGTGTTCAATGCTTTGCGAGATACCCAGATCATATGAAACAGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAAAGAACTATATTTTTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAATTGGAATACAACTATAACTCACACAATGTATACATCATGGCAGACAAACAAAAGAATGGAATCAAAGTTAACTTCAAAATTAGACACAACATTGAAGATGGAAGCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGAGAGATCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGATTACACATGGCATGGATGAACTATACAAATAATAA + + + + + Fusion_Site_C_sequence + 1 + Fusion_Site_C Sequence + + + + AATG + + + + + Fusion_Site_D_sequence + 1 + Fusion_Site_D Sequence + + + + AGGT + + + + + dva_backbone_core_seq + 1 + + + + 26479688 + agagacctactagtagcggccgctgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgttcatccatagttgcctgactccccgtcgtgtagataactacgatacgggagggcttaccatctggccccagtgctgcaatgataccgcgcgacccacgctcaccggctccagatttatcagcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcctccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagtttgcgcaacgttgttgccattgctacaggcatcgtggtgtcacgctcgtcgtttggtatggcttcattcagctccggttcccaacgatcaaggcgagttacatgatcccccatgttgtgcaaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgcagtgttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaagatgcttttctgtgactggtgagtactcaaccaagtcattctgagaatagtgtatgcggcgaccgagttgctcttgcccggcgtcaatacgggataataccgcgccacatagcagaactttaaaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatcttaccgctgttgagatccagttcgatgtaacccactcgtgcacccaactgatcttcagcatcttttactttcaccagcgtttctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaataagggcgacacggaaatgttgaatactcatactcttcctttttcaatattattgaagcatttatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaaaataaacaaataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtgggtctca + + + + + amp_region_seq + 1 + + + + 26479688 + cgcggaacccctatttgtttatttttctaaatacattcaaatatgtatccgctcatgagacaataaccctgataaatgcttcaataatattgaaaaaggaagagtatgagtattcaacatttccgtgtcgcccttattcccttttttgcggcattttgccttcctgtttttgctcacccagaaacgctggtgaaagtaaaagatgctgaagatcagttgggtgcacgagtgggttacatcgaactggatctcaacagcggtaagatccttgagagttttcgccccgaagaacgttttccaatgatgagcacttttaaagttctgctatgtggcgcggtattatcccgtattgacgccgggcaagagcaactcggtcgccgcatacactattctcagaatgacttggttgagtactcaccagtcacagaaaagcatcttacggatggcatgacagtaagagaattatgcagtgctgccataaccatgagtgataacactgcggccaacttacttctgacaacgatcggaggaccgaaggagctaaccgcttttttgcacaacatgggggatcatgtaactcgccttgatcgttgggaaccggagctgaatgaagccataccaaacgacgagcgtgacaccacgatgcctgtagcaatggcaacaacgttgcgcaaactattaactggcgaactacttactctagcttcccggcaacaattaatagactggatggaggcggataaagttgcaggaccacttctgcgctcggcccttccggctggctggtttattgctgataaatctggagccggtgagcgtgggtcgcgcggtatcattgcagcactggggccagatggtaagccctcccgtatcgtagttatctacacgacggggagtcaggcaactatggatgaacgaaatagacagatcgctgagataggtgcctcactgattaagcattggtaa + + + + + origin_of_replication_seq + 1 + + + + 26479688 + ttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctgtggaaa + + + \ No newline at end of file diff --git a/tests/test_files/pJ23100_AB.xml b/tests/test_files/pJ23100_AB.xml index d43bd38..f6174e8 100644 --- a/tests/test_files/pJ23100_AB.xml +++ b/tests/test_files/pJ23100_AB.xml @@ -4,9 +4,10 @@ pJ23100_AB 1 - pJ23100_AB + pJ23100_AB + @@ -17,6 +18,7 @@ 1 + @@ -28,6 +30,7 @@ 1 + @@ -39,6 +42,7 @@ 1 + @@ -50,6 +54,7 @@ 1 + @@ -61,6 +66,7 @@ 1 + @@ -68,6 +74,7 @@ 1 + 1 4 @@ -83,12 +90,14 @@ 1 + location1 1 + 5 39 @@ -104,6 +113,7 @@ pJ23100_ABAnnotation2 1 + @@ -111,6 +121,7 @@ location2 1 + 40 43 @@ -126,6 +137,7 @@ pJ23100_ABAnnotation3 1 + @@ -133,9 +145,10 @@ location3 1 + 44 - 2210 + 2143 @@ -149,6 +162,7 @@ 1 + @@ -161,6 +175,7 @@ 1 + @@ -173,6 +188,7 @@ 1 + @@ -188,6 +204,7 @@ MoClo standard fusion site A + 26479688 @@ -203,6 +220,7 @@ + 26479688 @@ -216,6 +234,7 @@ MoClo Basic Part: Constitutive promoter - Anderson series - high strength + 26479688 @@ -229,6 +248,7 @@ This is the backbone core for Destination Vector Ampicillin (DVA) plasmids. Based on the pSB1A2 plasmid and contains an ampicillin resistance gene and a high copy number origin of replication. + 26479688 @@ -239,6 +259,7 @@ 1 + 26479688 @@ -251,6 +272,7 @@ 1 + 26479688 @@ -263,6 +285,7 @@ 1 + 26479688 @@ -271,6 +294,7 @@ 1 + 26479688 272 860 @@ -287,6 +311,7 @@ 1 + 26479688 @@ -295,6 +320,7 @@ 1 + 26479688 1031 1996 @@ -311,10 +337,13 @@ amp_region 1 Ampicillin resistance gene from the pSB1A2 plasmid. + 2026-02-05T23:21:12 + 26479688 + @@ -323,7 +352,9 @@ origin_of_replication_pSB1A2 1 Origin of replication from the pSB1A2/pSB1K3 plasmid. pSB1A2 is a high copy number plasmid. The replication origin is a pUC19-derived pMB1 (copy number of 100-300 per cell + 2026-04-16T22:42:31 + 26479688 @@ -336,7 +367,8 @@ 1 - GGAGTTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCTACTtccagtcgggaaacctgtcgtgccagctgcattaatgaatcggccaacgcgcggggaagacgtgcttagagacctactagtagcggccgctgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgttcatccatagttgcctgactccccgtcgtgtagataactacgatacgggagggcttaccatctggccccagtgctgcaatgataccgcgcgacccacgctcaccggctccagatttatcagcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcctccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagtttgcgcaacgttgttgccattgctacaggcatcgtggtgtcacgctcgtcgtttggtatggcttcattcagctccggttcccaacgatcaaggcgagttacatgatcccccatgttgtgcaaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgcagtgttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaagatgcttttctgtgactggtgagtactcaaccaagtcattctgagaatagtgtatgcggcgaccgagttgctcttgcccggcgtcaatacgggataataccgcgccacatagcagaactttaaaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatcttaccgctgttgagatccagttcgatgtaacccactcgtgcacccaactgatcttcagcatcttttactttcaccagcgtttctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaataagggcgacacggaaatgttgaatactcatactcttcctttttcaatattattgaagcatttatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaaaataaacaaataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtgggtctca + + GGAGTTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCTACTagagacctactagtagcggccgctgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgttcatccatagttgcctgactccccgtcgtgtagataactacgatacgggagggcttaccatctggccccagtgctgcaatgataccgcgcgacccacgctcaccggctccagatttatcagcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcctccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagtttgcgcaacgttgttgccattgctacaggcatcgtggtgtcacgctcgtcgtttggtatggcttcattcagctccggttcccaacgatcaaggcgagttacatgatcccccatgttgtgcaaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgcagtgttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaagatgcttttctgtgactggtgagtactcaaccaagtcattctgagaatagtgtatgcggcgaccgagttgctcttgcccggcgtcaatacgggataataccgcgccacatagcagaactttaaaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatcttaccgctgttgagatccagttcgatgtaacccactcgtgcacccaactgatcttcagcatcttttactttcaccagcgtttctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaataagggcgacacggaaatgttgaatactcatactcttcctttttcaatattattgaagcatttatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaaaataaacaaataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtgggtctca @@ -345,6 +377,7 @@ 1 Fusion_Site_A Sequence + GGAG @@ -355,6 +388,7 @@ 1 Fusion_Site_B Sequence + TACT @@ -365,6 +399,7 @@ 1 J23100 Sequence + TTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGC @@ -375,6 +410,7 @@ 1 + 26479688 agagacctactagtagcggccgctgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgttcatccatagttgcctgactccccgtcgtgtagataactacgatacgggagggcttaccatctggccccagtgctgcaatgataccgcgcgacccacgctcaccggctccagatttatcagcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcctccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagtttgcgcaacgttgttgccattgctacaggcatcgtggtgtcacgctcgtcgtttggtatggcttcattcagctccggttcccaacgatcaaggcgagttacatgatcccccatgttgtgcaaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgcagtgttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaagatgcttttctgtgactggtgagtactcaaccaagtcattctgagaatagtgtatgcggcgaccgagttgctcttgcccggcgtcaatacgggataataccgcgccacatagcagaactttaaaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatcttaccgctgttgagatccagttcgatgtaacccactcgtgcacccaactgatcttcagcatcttttactttcaccagcgtttctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaataagggcgacacggaaatgttgaatactcatactcttcctttttcaatattattgaagcatttatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaaaataaacaaataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtgggtctca @@ -384,6 +420,7 @@ amp_region_seq 1 + 26479688 cgcggaacccctatttgtttatttttctaaatacattcaaatatgtatccgctcatgagacaataaccctgataaatgcttcaataatattgaaaaaggaagagtatgagtattcaacatttccgtgtcgcccttattcccttttttgcggcattttgccttcctgtttttgctcacccagaaacgctggtgaaagtaaaagatgctgaagatcagttgggtgcacgagtgggttacatcgaactggatctcaacagcggtaagatccttgagagttttcgccccgaagaacgttttccaatgatgagcacttttaaagttctgctatgtggcgcggtattatcccgtattgacgccgggcaagagcaactcggtcgccgcatacactattctcagaatgacttggttgagtactcaccagtcacagaaaagcatcttacggatggcatgacagtaagagaattatgcagtgctgccataaccatgagtgataacactgcggccaacttacttctgacaacgatcggaggaccgaaggagctaaccgcttttttgcacaacatgggggatcatgtaactcgccttgatcgttgggaaccggagctgaatgaagccataccaaacgacgagcgtgacaccacgatgcctgtagcaatggcaacaacgttgcgcaaactattaactggcgaactacttactctagcttcccggcaacaattaatagactggatggaggcggataaagttgcaggaccacttctgcgctcggcccttccggctggctggtttattgctgataaatctggagccggtgagcgtgggtcgcgcggtatcattgcagcactggggccagatggtaagccctcccgtatcgtagttatctacacgacggggagtcaggcaactatggatgaacgaaatagacagatcgctgagataggtgcctcactgattaagcattggtaa @@ -395,6 +432,7 @@ 1 + 26479688 ttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctgtggaaa diff --git a/tests/test_files/transformation_activity.xml b/tests/test_files/transformation_activity.xml new file mode 100644 index 0000000..d04a835 --- /dev/null +++ b/tests/test_files/transformation_activity.xml @@ -0,0 +1,1884 @@ + + + + + + + + + + 1 + pB0015_DE_digestion_product + + + 1 + + + + + 1 + pB0015_DE_reactant + + + + + + + + + + + 1 + pE0030_CD_digestion_product + + + + + + + + pJ23100_AB_reactant + 1 + + + + + + + pB0034_BC_digestion_product + + 1 + + + + + + + + + 1 + pB0034_BC_extracted_part_ligation + + + + + + composite_1_ligation + + + + + + + pE0030_CD_extracted_part_ligation + 1 + + + 1 + + + 1 + + + + composite_1_product + + + + + + pB0015_DE_extracted_part_ligation + + + 1 + + + + + 1 + + pJ23100_AB_extracted_part_ligation + + + + + + + + + 1 + + DVK_AE_extracted_backbone_ligation + + + + + + + + + 1 + + + composite_1 + + + + + + 1 + + + + + pB0034_BC_reactant + + + + + + 1 + + + + DVK_AE_backbone_reactant + + + + + + + + 1 + + BsaI_enzyme + + + + + + 1 + + + + 1 + + + DVK_AE_backbone_product + + + + + + + + restriction + 1 + + + + DVK_AE_digestion + + + + 1 + + + DVK_AE_backbone_reactant + + + + + + + + T4_Ligase + + + 1 + + + + + + + + restriction + 1 + + + + + + + + + + 1 + pB0034_BC_product + + + + 1 + pB0034_BC_digestion + + + + + 1 + pB0034_BC_reactant + + + + + + + + + + DVK_AE_backbone_digestion_product + + + 1 + + + + + + + + 1 + pJ23100_AB_digestion_product + + + + + + + + 1 + pJ23100_AB_digestion + + + + 1 + pJ23100_AB_product + + + + + + + + + + + 1 + pJ23100_AB_reactant + + + + + + + + 1 + + restriction + + + + + + + + pE0030_CD_reactant + + + 1 + + + + assembly_plan + + + pE0030_CD_digestion + + + + + + 1 + pE0030_CD_product + + + + + + + 1 + + + pE0030_CD_reactant + + + + + + + + restriction + 1 + + + + 1 + + + + + 1 + + + + 1 + + + pB0015_DE_product + + + pB0015_DE_digestion + + + + + restriction + 1 + + + + + + + + + + 1 + + pB0015_DE_reactant + + + + + + + + Escherichia coli, strain DH5alpha + + 1 + Ecoli_DH5a + + + Ecoli_DH5a + + + + + 1 + + + + composite_1_engineered_plasmid + + + + + 1 + + + + + Ecoli_DH5a_chassis + + + 1 + + + Ecoli_DH5a_with_composite_1 + + + + + BsaI + BsaI + Restriction enzyme BsaI from REBASE. + + + 1 + + + + + + pJ23100_AB_five_prime_oh_component + + 1 + + + + + + + + + + + 1 + + three_prime_oh_location + 40 + 43 + + + three_prime_overhang + + 1 + + + + 1 + + pJ23100_AB_extracted_part + + + + + 1 + J23100_3 + + + + + + + + + + + 1 + pJ23100_AB_part + + + 5 + 39 + + 1 + + pJ23100_AB_part_location + + + + + + + + + 1 + + + + pJ23100_AB_three_prime_oh_component + + + + + + + 1 + + + 1 + 1 + + 4 + + five_prime_oh_location + + + + five_prime_overhang + + + + + + 1 + + pJ23100_AB_five_prime_oh + + + + + + + + + 1 + Fusion_Site_A + MoClo standard fusion site A + + Fusion_Site_A + 26479688 + + + + + + + MoClo Basic Part: Constitutive promoter - Anderson series - high strength + + + J23100 + + + + + J23100 + + + 26479688 + 1 + + + pJ23100_AB_three_prime_oh + + + + + 1 + + + + + + + + 26479688 + Fusion_Site_B + Fusion_Site_B + + MoClo standard fusion site B + + + + 1 + + + + + + + 1 + + + B0034_3 + + + + + + + + 1 + five_prime_overhang + + + + 1 + 1 + five_prime_oh_location + + + 4 + + + + + + + + + + 1 + + 26 + three_prime_oh_location + 29 + + + + three_prime_overhang + 1 + + + + + + + + + + 1 + + + + 1 + 25 + 5 + + pB0034_BC_part_location + + + pB0034_BC_part + + + + + + + 1 + + pB0034_BC_five_prime_oh_component + + + + + + 1 + pB0034_BC_three_prime_oh_component + + + + + + 1 + pB0034_BC_extracted_part + + + + + + + + + + 1 + + pB0034_BC_five_prime_oh + + + + + + + B0034 + + B0034 + MoClo Basic Part: RBS - Weiss RBS, high strength. Modified from Bba_B0034 to adjust spacing in MC system. + 26479688 + 1 + + + + + + + + 1 + pB0034_BC_three_prime_oh + + + + + + Fusion_Site_C + 1 + Fusion_Site_C + MoClo standard fusion site C + + + 26479688 + + + + + + + + + + + + + 728 + three_prime_oh_location + + + 731 + 1 + + + 1 + three_prime_overhang + + + + + 1 + + + + + + 1 + + + + 727 + pE0030_CD_part_location + 1 + + 5 + + + pE0030_CD_part + + + + + + + + pE0030_CD_five_prime_oh_component + + 1 + + + + pE0030_CD_extracted_part + + + 1 + + + E0030_yfp_3 + + + + + + + + + + pE0030_CD_three_prime_oh_component + + + + 1 + + + + + + 1 + + + + + 4 + 1 + five_prime_oh_location + + 1 + + + + five_prime_overhang + + + + + + pE0030_CD_five_prime_oh + + + 1 + + + + + + 26479688 + + + MoClo Basic Part: CDS - Fluorescent protein. Yellow. + + + + E0030_yfp + + + E0030_yfp + 1 + + + + + + + pE0030_CD_three_prime_oh + + + 1 + + + + + + MoClo standard fusion site D + + 1 + + Fusion_Site_D + + 26479688 + + Fusion_Site_D + + + + 1 + + + three_prime_overhang + + + 1 + + + 137 + + + 1 + 134 + three_prime_oh_location + + + + + + + + pB0015_DE_five_prime_oh_component + + + 1 + + + + + pB0015_DE_three_prime_oh_component + + 1 + + + + + + + + + + 1 + + B0015_3 + + + + + + + + + 1 + + + 4 + + + 1 + five_prime_oh_location + 1 + + + five_prime_overhang + + + + + + + + + pB0015_DE_extracted_part + + + pB0015_DE_part + + + + + 5 + pB0015_DE_part_location + 133 + + 1 + + + + 1 + + + + + + + + 1 + + + pB0015_DE_five_prime_oh + + + B0015 + + 26479688 + MoClo Basic Part: Double terminator (B0010:B0012) + + + + + + + 1 + B0015 + + + + + + 1 + + pB0015_DE_three_prime_oh + + + + + + Fusion_Site_E + + + 26479688 + + + 1 + + MoClo standard fusion site E + + + Fusion_Site_E + + + DVK_AE_extracted_backbone + + 1 + + + + DVK_AE_five_prime_oh_component + 1 + + + + + + + + + + + + + 1 + three_prime_oh_location + 2232 + 2235 + + + three_prime_overhang + 1 + + + + + + + + + + + 1 + + + dvk_backbone_core_5 + + + + + DVK_AE_backbone + + + + + 5 + 1 + DVK_AE_backbone_location + 2231 + + + + + 1 + + + + + DVK_AE_three_prime_oh_component + + 1 + + + + + + + + five_prime_overhang + + + 1 + five_prime_oh_location + + 1 + + 4 + + + + 1 + + + + + + + + DVK_AE_three_prime_oh + + + + + 1 + + + 1 + + DVK_AE_five_prime_oh + + + + + + + This is the backbone core for Destination Vector Kanamycin (DVK) plasmids. Based on the pSB1K3 plasmid and contains kanamycin gene and a high copy number origin of replication. + dvk_backbone_core + 26479688 + + 1 + + + kan_region_annotation + + + 1 + + + kan + 1929 + + 1103 + 26479688 + + + + + + + + 1 + + + 26479688 + + + + + 26479688 + + + + + + 1 + Component_ori + + + + + + + + + + + 870 + + + 1 + + + ori + + 282 + 26479688 + + + + + + 1 + origin_of_replication_pSB1A2_annotation + + + 26479688 + + + + + + 1 + Component_kan + + + 26479688 + + + + + + + + + + + + T4_Ligase + 1 + + T4_Ligase + + + Ligation_Scar_A + + + + + 1 + + + + + + 1 + Ligation_Scar_B + + + + + + + + + + + Ligation_Scar_C + + 1 + + + + + + + + 1 + + Ligation_Scar_D + + + + + + + 1 + + Ligation_Scar_E + + + + + + + + + + + 1 + + Ligation_Scar_E + + + + + 1 + Ligation_Scar_D + + + + + + + + dvk_backbone_core + + + + 1 + + + + + + + + 1 + + Ligation_Scar_A + + + + + + + 1 + + + 2270 + J23100_3_location + + + 1 + 2236 + + + J23100_3_annotation + + + + + Ligation_Scar_E_annotation + + + + + + 1 + 3026 + 3023 + Ligation_Scar_E_location + + + + 1 + + + + + + Ligation_Scar_C + + + 1 + + + composite_1 + + + + + E0030_yfp_3_annotation + + + 3022 + 1 + E0030_yfp_3_location + + + 2300 + + + 1 + + + + + 1 + + + B0015_3_annotation + + + 3155 + + 3027 + 1 + B0015_3_location + + + + + + + + 1 + + + E0030_yfp + + + + + composite_1 + + + + + Ligation_Scar_A_annotation + 1 + + + + + 1 + Ligation_Scar_A_location + 1 + 4 + + + + + + + 1 + + dvk_backbone_core_5_annotation + + + + 1 + + dvk_backbone_core_5_location + 5 + 2231 + + + + + + + + + + + + Ligation_Scar_D_location + 2299 + 2296 + 1 + + + 1 + + Ligation_Scar_D_annotation + + + + + + + + + J23100 + 1 + + + + + + + + B0034 + 1 + + + + 1 + + + B0034_3_annotation + + + B0034_3_location + + 2275 + 1 + 2295 + + + + + + 1 + + + + + + + Ligation_Scar_B_annotation + + + 2235 + + + Ligation_Scar_B_location + 2232 + 1 + + + 1 + + + + + + + + + B0015 + + 1 + + + + + + 1 + Ligation_Scar_B + + + + + + + + + Ligation_Scar_C_annotation + 1 + + + + 2274 + + 1 + Ligation_Scar_C_location + 2271 + + + + + + + + + GGAGTTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCTACT + 1 + pJ23100_AB_extracted_part_seq + + + 1 + + + pJ23100_AB_five_prime_oh_sequence + GGAG + + + + GGAG + + + + 1 + Fusion_Site_A Sequence + + Fusion_Site_A_sequence + + + + J23100_sequence + + TTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGC + + + J23100 Sequence + + 1 + + + + + + + 1 + pJ23100_AB_three_prime_oh_sequence + TACT + + + + Fusion_Site_B Sequence + + 1 + TACT + + + Fusion_Site_B_sequence + + + + 1 + pB0034_BC_extracted_part_seq + TACTAGAGAAAGAGGAGAAATACTAAATG + + + + + 1 + TACT + + + pB0034_BC_five_prime_oh_sequence + + + + + B0034_sequence + + AGAGAAAGAGGAGAAATACTA + + + B0034 Sequence + 1 + + + + 1 + + pB0034_BC_three_prime_oh_sequence + + AATG + + + + Fusion_Site_C Sequence + AATG + + 1 + Fusion_Site_C_sequence + + + + + + + 1 + AATGATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCTTCGGCTACGGCCTGCAATGCTTCGCCCGCTACCCCGACCACATGAAGCTGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAATAAAGGT + + pE0030_CD_extracted_part_seq + + + + + pE0030_CD_five_prime_oh_sequence + AATG + + 1 + + + + + 1 + + E0030_yfp_sequence + + + ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCTTCGGCTACGGCCTGCAATGCTTCGCCCGCTACCCCGACCACATGAAGCTGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAATAA + + E0030_yfp Sequence + + + + + pE0030_CD_three_prime_oh_sequence + AGGT + 1 + + + + AGGT + 1 + Fusion_Site_D_sequence + + Fusion_Site_D Sequence + + + + + + + + pB0015_DE_extracted_part_seq + 1 + AGGTCCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATAGCTT + + + + + + 1 + + AGGT + pB0015_DE_five_prime_oh_sequence + + + + B0015 Sequence + CCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + + + 1 + + B0015_sequence + + + + GCTT + + + pB0015_DE_three_prime_oh_sequence + 1 + + + + + + 1 + + + Fusion_Site_E Sequence + GCTT + + Fusion_Site_E_sequence + + + + + GCTTatgtcttctactagtagcggccgctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtggaagacatGGAG + DVK_AE_extracted_backbone_seq + 1 + + + + DVK_AE_three_prime_oh_sequence + GGAG + 1 + + + + + GCTT + + + 1 + + DVK_AE_five_prime_oh_sequence + + + atgtcttctactagtagcggccgctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtggaagacat + 26479688 + 1 + + + + + dvk_backbone_core_seq + + + + 1 + Ligation_Scar_A_sequence + GCTT + + + + + + Ligation_Scar_B_sequence + GGAG + + 1 + + + Ligation_Scar_C_sequence + + + 1 + TACT + + + + AATG + + Ligation_Scar_D_sequence + 1 + + + 1 + + + Ligation_Scar_E_sequence + AGGT + + + 1 + GCTTatgtcttctactagtagcggccgctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtggaagacatGGAGTTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCTACTAGAGAAAGAGGAGAAATACTAAATGATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCTTCGGCTACGGCCTGCAATGCTTCGCCCGCTACCCCGACCACATGAAGCTGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAATAAAGGTCCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + composite_1_seq + + + + + composite_1_assembly_plan + 1 + + + + Ecoli_DH5a_with_composite_1_transformation_plan + TODO: generate accurate description of transformation + + 1 + + + buildcompiler + 1 + + + + + + + + + assemble_composite_1_design + 1 + + + + Golden Gate Assembly + assemble_composite_1 + 1 + + + + + 1 + + + assemble_composite_1_association + + + + + Bacterial Tranformation + 1 + transform_Ecoli_DH5a + + + + Ecoli_DH5a_chassis_source + + + 1 + + + + + + 1 + + + + transform_Ecoli_DH5a_association + + + + + + composite_1_plasmid_source + 1 + + + + + + + diff --git a/tests/test_files/transformation_activity_new.xml b/tests/test_files/transformation_activity_new.xml new file mode 100644 index 0000000..8f51315 --- /dev/null +++ b/tests/test_files/transformation_activity_new.xml @@ -0,0 +1,1884 @@ + + + + + + T4_Ligase + 1 + + + + + + + + + pE0030_CD_reactant + + + 1 + + + + + + 1 + + + + + pB0015_DE_reactant + + + + + + + + + 1 + DVK_AE_backbone_digestion_product + + + + + + 1 + pJ23100_AB_digestion_product + + + + + + + + + + + + 1 + pE0030_CD_extracted_part_ligation + + + + + + + + 1 + + composite_1_product + + + + + + + + + 1 + pB0034_BC_extracted_part_ligation + + + + + + pJ23100_AB_extracted_part_ligation + + + 1 + + + 1 + composite_1_ligation + + + + + + 1 + + DVK_AE_extracted_backbone_ligation + + + + + + + pB0015_DE_extracted_part_ligation + + 1 + + + + + + + pB0015_DE_digestion + + + 1 + + pB0015_DE_product + + + + + + + + + + 1 + pB0015_DE_reactant + + + + 1 + + + + 1 + restriction + + + + + + + + + + 1 + + + + DVK_AE_backbone_reactant + + + + + + + + pB0015_DE_digestion_product + + 1 + + + + 1 + + + BsaI_enzyme + + + + + 1 + + + + + + DVK_AE_digestion + + + + DVK_AE_backbone_reactant + + 1 + + + + + + 1 + + restriction + + + + + 1 + + + + DVK_AE_backbone_product + + + + 1 + + + + + + + 1 + + + + pJ23100_AB_reactant + + + + + + + + 1 + pJ23100_AB_reactant + + + + + + 1 + + + + + 1 + restriction + + + + + + + + + pJ23100_AB_product + + 1 + + + + pJ23100_AB_digestion + + + + + pB0034_BC_digestion + + + + 1 + + restriction + + + + + 1 + + + 1 + + + + pB0034_BC_product + + + + + + pB0034_BC_reactant + + + + 1 + + + + + assembly_plan + + + + pE0030_CD_digestion_product + + + 1 + + + + + + + 1 + composite_1 + + + + + + + + + 1 + + + pB0034_BC_digestion_product + + + + + + + + + + restriction + + 1 + + + + 1 + + + + + pE0030_CD_reactant + + 1 + + + + + + + + + pE0030_CD_product + 1 + + + + pE0030_CD_digestion + + + + + + 1 + pB0034_BC_reactant + + + + + + + + 1 + Escherichia coli, strain DH5alpha + + + Ecoli_DH5a + Ecoli_DH5a + + + + + + + + + + + 1 + + composite_1_engineered_plasmid + + + + 1 + + + + + 1 + + Ecoli_DH5a_chassis + + + Ecoli_DH5a_with_composite_1 + + + + 1 + + + BsaI + Restriction enzyme BsaI from REBASE. + BsaI + + + + + + + + pJ23100_AB_five_prime_oh_component + + 1 + + + + + + pJ23100_AB_part + 1 + + + + 5 + + 1 + 39 + + pJ23100_AB_part_location + + + + + + + + + + + + + + J23100_3 + 1 + + + + pJ23100_AB_extracted_part + + + + 1 + pJ23100_AB_three_prime_oh_component + + + + + + + + + five_prime_overhang + + + + 4 + five_prime_oh_location + + + 1 + 1 + + + + 1 + + + + + + + three_prime_overhang + + + 43 + + 1 + + 40 + three_prime_oh_location + + + 1 + + + 1 + + + + + + + + 1 + + pJ23100_AB_five_prime_oh + + + + + + Fusion_Site_A + MoClo standard fusion site A + + Fusion_Site_A + + + + 1 + + 26479688 + + + + + + 1 + + J23100 + + + J23100 + + + + MoClo Basic Part: Constitutive promoter - Anderson series - high strength + 26479688 + + + + pJ23100_AB_three_prime_oh + + + + + 1 + + + + + Fusion_Site_B + + + + 26479688 + + + + 1 + Fusion_Site_B + MoClo standard fusion site B + + + + + + + + + 1 + B0034_3 + + + + + + + + + + 1 + + 26 + 29 + three_prime_oh_location + + + + 1 + + three_prime_overhang + + + + + pB0034_BC_extracted_part + + + + + + + 1 + + 1 + 4 + five_prime_oh_location + + + + five_prime_overhang + + 1 + + + + 1 + + + + 1 + + + 25 + + pB0034_BC_part_location + 5 + + 1 + + + pB0034_BC_part + + + + + + + + pB0034_BC_five_prime_oh_component + 1 + + + + + + + + 1 + pB0034_BC_three_prime_oh_component + + + + + + + + + 1 + + pB0034_BC_five_prime_oh + + + + + + + B0034 + + + + 26479688 + + + + 1 + + MoClo Basic Part: RBS - Weiss RBS, high strength. Modified from Bba_B0034 to adjust spacing in MC system. + B0034 + + + + + pB0034_BC_three_prime_oh + 1 + + + + + + Fusion_Site_C + Fusion_Site_C + 1 + + MoClo standard fusion site C + + + 26479688 + + + + + + + + + + 1 + + + pE0030_CD_part + + + 727 + + 5 + + pE0030_CD_part_location + 1 + + + + + + + + five_prime_overhang + + + + 4 + five_prime_oh_location + + 1 + 1 + + + 1 + + + + + + + + pE0030_CD_five_prime_oh_component + 1 + + + + pE0030_CD_extracted_part + + + + + 1 + + + + pE0030_CD_three_prime_oh_component + + + + 1 + + + + + + + E0030_yfp_3 + 1 + + + + + + + + + 1 + three_prime_overhang + + + 731 + 728 + + + 1 + three_prime_oh_location + + + + + + + + + + + pE0030_CD_five_prime_oh + + + + + 1 + + + E0030_yfp + + E0030_yfp + + + + 26479688 + + + MoClo Basic Part: CDS - Fluorescent protein. Yellow. + 1 + + + + + pE0030_CD_three_prime_oh + 1 + + + + + + + + + 26479688 + + Fusion_Site_D + Fusion_Site_D + + + + 1 + MoClo standard fusion site D + + + + + + + + + + three_prime_overhang + + + + three_prime_oh_location + + 134 + 1 + 137 + + + + 1 + + + + + + 1 + + + + pB0015_DE_three_prime_oh_component + + + 1 + pB0015_DE_extracted_part + + + 1 + + pB0015_DE_five_prime_oh_component + + + + + + + + 1 + + five_prime_overhang + + + + five_prime_oh_location + 1 + 1 + + 4 + + + + + + + + pB0015_DE_part + + + + + pB0015_DE_part_location + + + 133 + 5 + 1 + + + 1 + + + + + + 1 + + + + + + + B0015_3 + + + + + + + + + pB0015_DE_five_prime_oh + + + 1 + + + + 26479688 + 1 + + + B0015 + + MoClo Basic Part: Double terminator (B0010:B0012) + + + + + B0015 + + + + + + + + + 1 + pB0015_DE_three_prime_oh + + + Fusion_Site_E + 26479688 + + Fusion_Site_E + + + + 1 + MoClo standard fusion site E + + + + + + + + + + 1 + + + + 5 + DVK_AE_backbone_location + + 2231 + 1 + + + + DVK_AE_backbone + + + + + + + DVK_AE_five_prime_oh_component + + + 1 + + + + + + + + three_prime_oh_location + 2235 + + 2232 + 1 + + + + three_prime_overhang + + + 1 + + + + + 1 + + + + + + 4 + 1 + five_prime_oh_location + 1 + + + five_prime_overhang + + + + + + 1 + + + + + + dvk_backbone_core_5 + + + + + DVK_AE_extracted_backbone + + + + + + DVK_AE_three_prime_oh_component + + + 1 + + + 1 + + + + + 1 + + + DVK_AE_three_prime_oh + + + + + + + DVK_AE_five_prime_oh + + 1 + + + + + + kan_region_annotation + 26479688 + + + + + 1103 + 26479688 + + + kan + + 1929 + 1 + + + + + + + + 1 + + + + + + + + 1 + + + 870 + + 282 + + + + ori + 26479688 + 1 + + + + + origin_of_replication_pSB1A2_annotation + + 26479688 + + + dvk_backbone_core + + + + 1 + 26479688 + + + + + + + Component_ori + + + + + 26479688 + 1 + + + + 26479688 + + + Component_kan + + 1 + + + + + + This is the backbone core for Destination Vector Kanamycin (DVK) plasmids. Based on the pSB1K3 plasmid and contains kanamycin gene and a high copy number origin of replication. + + + + + + T4_Ligase + 1 + + T4_Ligase + + + + + + + + + Ligation_Scar_A + 1 + + + + + + + + Ligation_Scar_B + 1 + + + + + + + + 1 + + + Ligation_Scar_C + + + + + + + + + Ligation_Scar_D + 1 + + + + + 1 + + + + + + Ligation_Scar_E + + + + + + + Ligation_Scar_E + + 1 + + + + + + + B0015 + + 1 + + + + + 1 + + Ligation_Scar_B_annotation + + + + 2232 + + Ligation_Scar_B_location + + 1 + 2235 + + + + + + + 1 + + Ligation_Scar_B + + + + + + + + + Ligation_Scar_A + 1 + + + + + + + + + + Ligation_Scar_D + 1 + + + 1 + + + B0034 + + + 1 + + + + composite_1 + + + + 1 + Ligation_Scar_C + + + + + + + 1 + + + + 1 + 2231 + 5 + + + dvk_backbone_core_5_location + + + + dvk_backbone_core_5_annotation + + + + + Ligation_Scar_A_annotation + + + 1 + Ligation_Scar_A_location + 4 + + + 1 + + + + + 1 + + + + + + + + 3155 + B0015_3_location + + 3027 + 1 + + + + B0015_3_annotation + + 1 + + + + + + 1 + + + + 1 + + 2270 + 2236 + + J23100_3_location + + + J23100_3_annotation + + + + + + Ligation_Scar_C_annotation + + + 1 + + + + 1 + 2274 + 2271 + + Ligation_Scar_C_location + + + + + + + + 1 + + E0030_yfp + + + + + + + + + Ligation_Scar_E_annotation + + + 3023 + Ligation_Scar_E_location + + 3026 + 1 + + + + 1 + + + + + + + J23100 + 1 + + + + + + composite_1 + + + dvk_backbone_core + + 1 + + + + + + + + + 1 + Ligation_Scar_D_location + + + 2296 + 2299 + + + + Ligation_Scar_D_annotation + + 1 + + + + + + + + + + 1 + E0030_yfp_3_location + 3022 + 2300 + + + 1 + + E0030_yfp_3_annotation + + + + + + B0034_3_annotation + + + 2275 + 2295 + + B0034_3_location + + 1 + + + + 1 + + + + + + + 1 + pJ23100_AB_extracted_part_seq + GGAGTTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCTACT + + + + + 1 + + pJ23100_AB_five_prime_oh_sequence + GGAG + + + + Fusion_Site_A_sequence + Fusion_Site_A Sequence + + + + GGAG + + + 1 + + + + + 1 + J23100_sequence + + J23100 Sequence + + + TTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGC + + + + pJ23100_AB_three_prime_oh_sequence + TACT + + + 1 + + + Fusion_Site_B_sequence + + Fusion_Site_B Sequence + + TACT + + 1 + + + + + + 1 + pB0034_BC_extracted_part_seq + + TACTAGAGAAAGAGGAGAAATACTAAATG + + + pB0034_BC_five_prime_oh_sequence + + + TACT + + 1 + + + + + B0034_sequence + + 1 + AGAGAAAGAGGAGAAATACTA + B0034 Sequence + + + + + 1 + AATG + + pB0034_BC_three_prime_oh_sequence + + + + + Fusion_Site_C Sequence + + + AATG + Fusion_Site_C_sequence + + + + 1 + + + + + AATGATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCTTCGGCTACGGCCTGCAATGCTTCGCCCGCTACCCCGACCACATGAAGCTGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAATAAAGGT + 1 + pE0030_CD_extracted_part_seq + + + AATG + + pE0030_CD_five_prime_oh_sequence + + + 1 + + + + E0030_yfp Sequence + + 1 + + ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCTTCGGCTACGGCCTGCAATGCTTCGCCCGCTACCCCGACCACATGAAGCTGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAATAA + + + E0030_yfp_sequence + + + + pE0030_CD_three_prime_oh_sequence + 1 + + AGGT + + + + + + 1 + + Fusion_Site_D Sequence + Fusion_Site_D_sequence + + AGGT + + + + + 1 + + AGGTCCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATAGCTT + pB0015_DE_extracted_part_seq + + + pB0015_DE_five_prime_oh_sequence + 1 + + + AGGT + + + + + + + + + 1 + B0015_sequence + B0015 Sequence + CCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + + + 1 + + GCTT + + + pB0015_DE_three_prime_oh_sequence + + + 1 + GCTT + + + + + Fusion_Site_E_sequence + Fusion_Site_E Sequence + + + + 1 + DVK_AE_extracted_backbone_seq + GCTTatgtcttctactagtagcggccgctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtggaagacatGGAG + + + + + + 1 + + + DVK_AE_three_prime_oh_sequence + GGAG + + + 1 + DVK_AE_five_prime_oh_sequence + + + GCTT + + + + + 26479688 + 1 + + + dvk_backbone_core_seq + atgtcttctactagtagcggccgctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtggaagacat + + + + + + 1 + GCTT + + Ligation_Scar_A_sequence + + + + Ligation_Scar_B_sequence + GGAG + + 1 + + + + TACT + + 1 + Ligation_Scar_C_sequence + + + Ligation_Scar_D_sequence + 1 + + + AATG + + + Ligation_Scar_E_sequence + 1 + + AGGT + + + + + + composite_1_seq + GCTTatgtcttctactagtagcggccgctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgcggccgcttctagagactagtggaagacatGGAGTTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCTACTAGAGAAAGAGGAGAAATACTAAATGATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCTTCGGCTACGGCCTGCAATGCTTCGCCCGCTACCCCGACCACATGAAGCTGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAATAAAGGTCCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATA + 1 + + + 1 + composite_1_assembly_plan + + + + Ecoli_DH5a_with_composite_1_transformation_plan + + 1 + TODO: generate accurate description of transformation + + + 1 + + buildcompiler + + + + + + + + assemble_composite_1_design + + + 1 + + + Golden Gate Assembly + assemble_composite_1 + 1 + + + + 1 + + assemble_composite_1_association + + + + + + transform_Ecoli_DH5a + + + + + + 1 + composite_1_plasmid_source + + + + + + + transform_Ecoli_DH5a_association + + 1 + + + + + + + + + Ecoli_DH5a_chassis_source + 1 + + + + Bacterial Tranformation + 1 + + From 1c67d56b32ef382ff8b1a4dc040613f871e0ba10 Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Mon, 18 May 2026 12:01:43 -0600 Subject: [PATCH 46/47] notebook updates --- notebooks/comb_design.ipynb | 10 +- notebooks/impl_creation.ipynb | 261 +++++++++++++++++----------------- 2 files changed, 138 insertions(+), 133 deletions(-) diff --git a/notebooks/comb_design.ipynb b/notebooks/comb_design.ipynb index 493be94..dcc14c7 100644 --- a/notebooks/comb_design.ipynb +++ b/notebooks/comb_design.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "id": "814099af", "metadata": {}, "outputs": [], @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "id": "2f1ab216", "metadata": {}, "outputs": [ @@ -21,7 +21,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n" + "https://sbolcanvas.org/combinatorialRBSs\n" ] } ], @@ -29,7 +29,7 @@ "abstract_doc = sbol2.Document()\n", "abstract_doc.read(\"tests/test_files/combinatorial_1.xml\")\n", "comb_design = abstract_doc.combinatorialderivations[0]\n", - "print(type(comb_design))" + "print(comb_design.identity)" ] }, { @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "c3bbf89a", "metadata": {}, "outputs": [ diff --git a/notebooks/impl_creation.ipynb b/notebooks/impl_creation.ipynb index a87de5e..6105579 100644 --- a/notebooks/impl_creation.ipynb +++ b/notebooks/impl_creation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 15, "id": "87bdb42e", "metadata": {}, "outputs": [], @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 16, "id": "90648527", "metadata": {}, "outputs": [ @@ -26,7 +26,7 @@ } ], "source": [ - "auth = \"0b2dc76c-4c1d-4ee8-b339-6a3e15e3faf9\"\n", + "auth = \"839935f0-7f6a-4c83-b1f0-91ddc0eb9c9a\"\n", "buildcompiler = BuildCompiler(\n", " [\n", " \"https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/CIDARMoCloPlasmidsKit_collection/1\"\n", @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 17, "id": "99d093a8", "metadata": {}, "outputs": [ @@ -48,84 +48,76 @@ "output_type": "stream", "text": [ "[Plasmid:\n", - " Name: pB0033_BC_B_C\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0033_BC/1\n", + " Name: pB0034_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0034_BC/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", " Fusion Sites: ['B', 'C']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: DVA_CD_C_D\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_CD/1\n", - " Strain Definitions: [None]\n", - " Plasmid Implementations: None\n", - " Strain Implementations: None\n", - " Fusion Sites: ['C', 'D']\n", - " Antibiotic Resistance: Ampicillin\n", - ", Plasmid:\n", - " Name: pJ23100_AB_A_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_AB/1\n", + " Name: pJ23100_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_GB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['A', 'B']\n", + " Fusion Sites: ['G', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pJ23106_AB_A_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_AB/1\n", + " Name: DVA_DF_D_F\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_DF/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['A', 'B']\n", + " Fusion Sites: ['D', 'F']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pJ23116_GB_G_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_GB/1\n", + " Name: DVA_DG_D_G\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_DG/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['G', 'B']\n", + " Fusion Sites: ['D', 'G']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: DVA_DH_D_H\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_DH/1\n", + " Name: pB0015_DH_D_H\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DH/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", " Fusion Sites: ['D', 'H']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pB0015_DE_D_E\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DE/1\n", + " Name: pB0032_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0032_BC/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['D', 'E']\n", + " Fusion Sites: ['B', 'C']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pJ23100_GB_G_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_GB/1\n", + " Name: pE1010_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE1010_CD/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['G', 'B']\n", + " Fusion Sites: ['C', 'D']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pB0015_DH_D_H\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DH/1\n", + " Name: DVA_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_CD/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['D', 'H']\n", + " Fusion Sites: ['C', 'D']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: DVA_AB_A_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_AB/1\n", + " Name: DVA_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_GB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['A', 'B']\n", + " Fusion Sites: ['G', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", " Name: pJ23116_FB_F_B\n", @@ -136,20 +128,20 @@ " Fusion Sites: ['F', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pB0015_DG_D_G\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DG/1\n", + " Name: pB0015_DE_D_E\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DE/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['D', 'G']\n", + " Fusion Sites: ['D', 'E']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pE1010_CD_C_D\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE1010_CD/1\n", + " Name: pJ23100_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_EB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['C', 'D']\n", + " Fusion Sites: ['E', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", " Name: pE0040_CD_C_D\n", @@ -160,132 +152,140 @@ " Fusion Sites: ['C', 'D']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pJ23100_EB_E_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_EB/1\n", + " Name: DVA_DH_D_H\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_DH/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['E', 'B']\n", + " Fusion Sites: ['D', 'H']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pB0032_BC_B_C\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0032_BC/1\n", + " Name: pJ23116_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_EB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['B', 'C']\n", + " Fusion Sites: ['E', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: DVA_FB_F_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_FB/1\n", + " Name: pJ23106_EB_E_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_EB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['F', 'B']\n", + " Fusion Sites: ['E', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: DVA_DG_D_G\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_DG/1\n", + " Name: pJ23116_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_GB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['D', 'G']\n", + " Fusion Sites: ['G', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pE0030_CD_C_D\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0030_CD/1\n", + " Name: pJ23116_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_AB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['C', 'D']\n", + " Fusion Sites: ['A', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pJ23100_FB_F_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_FB/1\n", + " Name: DVA_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_BC/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['F', 'B']\n", + " Fusion Sites: ['B', 'C']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: DVA_DF_D_F\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_DF/1\n", + " Name: pB0033_BC_B_C\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0033_BC/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['D', 'F']\n", + " Fusion Sites: ['B', 'C']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pJ23106_EB_E_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_EB/1\n", + " Name: pJ23106_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_AB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['E', 'B']\n", + " Fusion Sites: ['A', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pB0015_DF_D_F\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DF/1\n", + " Name: pB0015_DG_D_G\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DG/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['D', 'F']\n", + " Fusion Sites: ['D', 'G']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pJ23116_EB_E_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_EB/1\n", + " Name: pE0030_CD_C_D\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pE0030_CD/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['E', 'B']\n", + " Fusion Sites: ['C', 'D']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pJ23106_FB_F_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_FB/1\n", + " Name: DVA_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_FB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", " Fusion Sites: ['F', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: DVA_BC_B_C\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_BC/1\n", + " Name: pJ23106_GB_G_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_GB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['B', 'C']\n", + " Fusion Sites: ['G', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pB0034_BC_B_C\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0034_BC/1\n", + " Name: DVA_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_AB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['B', 'C']\n", + " Fusion Sites: ['A', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pJ23116_AB_A_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23116_AB/1\n", + " Name: pJ23100_AB_A_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_AB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", " Fusion Sites: ['A', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: pJ23106_GB_G_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_GB/1\n", + " Name: pJ23100_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23100_FB/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['G', 'B']\n", + " Fusion Sites: ['F', 'B']\n", " Antibiotic Resistance: Ampicillin\n", ", Plasmid:\n", - " Name: DVA_GB_G_B\n", - " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/DVA_GB/1\n", + " Name: pB0015_DF_D_F\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pB0015_DF/1\n", " Strain Definitions: [None]\n", " Plasmid Implementations: None\n", " Strain Implementations: None\n", - " Fusion Sites: ['G', 'B']\n", + " Fusion Sites: ['D', 'F']\n", + " Antibiotic Resistance: Ampicillin\n", + ", Plasmid:\n", + " Name: pJ23106_FB_F_B\n", + " Plasmid Definition: https://synbiohub.org/user/Gon/CIDARMoCloPlasmidsKit/pJ23106_FB/1\n", + " Strain Definitions: [None]\n", + " Plasmid Implementations: None\n", + " Strain Implementations: None\n", + " Fusion Sites: ['F', 'B']\n", " Antibiotic Resistance: Ampicillin\n", "]\n" ] @@ -305,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 18, "id": "9cdd18d0", "metadata": {}, "outputs": [], @@ -321,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 19, "id": "d76dbdeb", "metadata": {}, "outputs": [ @@ -331,24 +331,20 @@ "text": [ "amp_region\n", "B0033\n", + "dvk_backbone_core\n", "J23106\n", - "DVK_AE\n", "E0030_yfp\n", "kan_region\n", "E0040m_gfp\n", "B0015\n", "J23116\n", - "DVA_AF\n", "J23100\n", - "DVA_AF2\n", - "DVK_GH\n", "Fusion_Site_E\n", "Fusion_Site_F\n", "Fusion_Site_D\n", "Fusion_Site_G\n", "E1010m_rfp\n", "Fusion_Site_A\n", - "DVK_FG\n", "LacZ_cassette\n", "Fusion_Site_B\n", "Fusion_Site_C\n", @@ -358,8 +354,6 @@ "B0034\n", "dva_backbone_core\n", "origin_of_replication_pSB1A2\n", - "DVK_EF\n", - "dvk_backbone_core\n", "Design........................0\n", "Build.........................0\n", "Test..........................0\n", @@ -374,18 +368,21 @@ "Agent.........................0\n", "Attachment....................0\n", "CombinatorialDerivation.......0\n", - "Implementation................30\n", + "Implementation................36\n", "SampleRoster..................0\n", "Experiment....................0\n", "ExperimentalData..............0\n", "Annotation Objects............0\n", "---\n", - "Total: .........................30\n", + "Total: .........................36\n", "\n" ] } ], "source": [ + "from buildcompiler.constants import ENGINEERED_PLASMID, CIRCULAR, PLASMID_CLONING_VECTOR\n", + "\n", + "\n", "implementation_collection.default_namespace = (\n", " \"http://buildcompiler.org/implementations/\"\n", ")\n", @@ -395,7 +392,9 @@ "dummy_activity.types = \"http://sbols.org/v2#build\"\n", "\n", "for plasmid in plas_doc.componentDefinitions:\n", - " if \"http://identifiers.org/so/SO:0000637\" in plasmid.roles:\n", + " if (\n", + " ENGINEERED_PLASMID or PLASMID_CLONING_VECTOR in plasmid.roles\n", + " ) and CIRCULAR in plasmid.types:\n", " implementation = sbol2.Implementation(f\"{plasmid.displayId}_impl\")\n", " implementation.built = plasmid.identity\n", " implementation.wasGeneratedBy = dummy_activity\n", @@ -409,7 +408,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 20, "id": "3bcf02fb", "metadata": {}, "outputs": [ @@ -419,7 +418,7 @@ "'Valid.'" ] }, - "execution_count": 11, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -430,46 +429,52 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 21, "id": "9bf9c396", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['pB0033_BC_impl',\n", - " 'DVA_CD_impl',\n", - " 'pJ23100_AB_impl',\n", - " 'pJ23106_AB_impl',\n", - " 'pJ23116_GB_impl',\n", - " 'DVA_DH_impl',\n", - " 'pB0015_DE_impl',\n", + "['pB0034_BC_impl',\n", " 'pJ23100_GB_impl',\n", + " 'DVA_DF_impl',\n", + " 'DVA_DG_impl',\n", + " 'DVA_AF_impl',\n", " 'pB0015_DH_impl',\n", - " 'DVA_AB_impl',\n", - " 'pJ23116_FB_impl',\n", - " 'pB0015_DG_impl',\n", + " 'pB0032_BC_impl',\n", " 'pE1010_CD_impl',\n", - " 'pE0040_CD_impl',\n", + " 'DVA_CD_impl',\n", + " 'DVA_GB_impl',\n", + " 'DVA_AF2_impl',\n", + " 'pJ23116_FB_impl',\n", + " 'pB0015_DE_impl',\n", " 'pJ23100_EB_impl',\n", - " 'pB0032_BC_impl',\n", - " 'DVA_FB_impl',\n", - " 'DVA_DG_impl',\n", - " 'pE0030_CD_impl',\n", - " 'pJ23100_FB_impl',\n", - " 'DVA_DF_impl',\n", - " 'pJ23106_EB_impl',\n", - " 'pB0015_DF_impl',\n", + " 'pE0040_CD_impl',\n", + " 'DVK_GH_impl',\n", + " 'DVA_DH_impl',\n", " 'pJ23116_EB_impl',\n", - " 'pJ23106_FB_impl',\n", - " 'DVA_BC_impl',\n", - " 'pB0034_BC_impl',\n", + " 'pJ23106_EB_impl',\n", + " 'pJ23116_GB_impl',\n", + " 'DVK_AE_impl',\n", " 'pJ23116_AB_impl',\n", + " 'DVA_BC_impl',\n", + " 'pB0033_BC_impl',\n", + " 'pJ23106_AB_impl',\n", + " 'pB0015_DG_impl',\n", + " 'pE0030_CD_impl',\n", + " 'DVK_EF_impl',\n", + " 'DVA_FB_impl',\n", " 'pJ23106_GB_impl',\n", - " 'DVA_GB_impl']" + " 'DVA_AB_impl',\n", + " 'DVK_FG_impl',\n", + " 'pJ23100_AB_impl',\n", + " 'pJ23100_FB_impl',\n", + " 'pB0015_DF_impl',\n", + " 'pJ23106_FB_impl']" ] }, - "execution_count": 12, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } From 223712359576ec74f5743289e2980b0f82e71d8d Mon Sep 17 00:00:00 2001 From: Ryan Greer Date: Mon, 18 May 2026 12:02:11 -0600 Subject: [PATCH 47/47] added transformation --- src/buildcompiler/buildcompiler.py | 181 ++++++++++++++++++++++++++++- 1 file changed, 180 insertions(+), 1 deletion(-) diff --git a/src/buildcompiler/buildcompiler.py b/src/buildcompiler/buildcompiler.py index f345ba7..abc41f9 100644 --- a/src/buildcompiler/buildcompiler.py +++ b/src/buildcompiler/buildcompiler.py @@ -1,7 +1,7 @@ import sbol2 import random import warnings -from typing import List, Dict, Tuple +from typing import Any, List, Dict, Tuple from buildcompiler.plasmid import Plasmid from buildcompiler.sbol2build import ( @@ -530,6 +530,165 @@ def assembly_lvl2( return lvl2_plasmids, final_doc + def transformation( + self, + assembly_products: List[Plasmid], + chassis_name: str = "E_coli_DH5alpha", + transformation_doc: sbol2.Document = None, + ) -> Dict[str, Any]: + """Generate deterministic transformation artifacts from assembly outputs. + + :param assembly_products: Structured inputs produced by an assembly stage + :type assembly_products: List[Plasmid] + :param chassis_name: Display id used for the chassis module and implementation. + :type chassis_name: str + :param transformation_doc: Optional SBOL document to write outputs into. + :type transformation_doc: sbol2.Document | None + :returns: Structured transformation outputs including SBOL references, + robot JSON intermediate, protocol placeholders, and logs. + :rtype: dict + :raises ValueError: If no valid plasmid inputs can be extracted. + """ + if transformation_doc is None: + transformation_doc = self.sbol_doc + + chassis_module, chassis_impl = self._get_or_create_chassis( + transformation_doc, chassis_name + ) + + sbol_outputs = [] + robot_steps = [] + logs = [] + + for index, plasmid_obj in enumerate(assembly_products, start=1): + plasmid = plasmid_obj.plasmid_definition + + if not plasmid_obj.plasmid_implementations: + raise ValueError( + f"No plasmid implementations found for {plasmid.displayId}" + ) + + plasmid_impl = plasmid_obj.plasmid_implementations[0] + + transform_id = f"transform_{plasmid.displayId}_{index}" + + transformation_activity = sbol2.Activity(transform_id) + transformation_activity.name = ( + f"Transform {chassis_name} with {plasmid.displayId}" + ) + transformation_activity.types = "http://sbols.org/v2#build" + + chassis_usage = sbol2.Usage( + uri=f"{transform_id}_chassis", + entity=chassis_impl.identity, + role="http://sbols.org/v2#build", + ) + plasmid_usage = sbol2.Usage( + uri=f"{transform_id}_plasmid", + entity=plasmid_impl.identity, + role="http://sbols.org/v2#build", + ) + transformation_activity.usages = [chassis_usage, plasmid_usage] + + transformed_strain = sbol2.ModuleDefinition( + f"{chassis_name}_with_{plasmid.displayId}" + ) + transformed_strain.roles = [ORGANISM_STRAIN] + transformed_strain.name = ( + f"{chassis_name} transformed with {plasmid.displayId}" + ) + + chassis_module_ref = sbol2.Module( + uri=f"{transformed_strain.displayId}_chassis" + ) + chassis_module_ref.definition = chassis_module.identity + plasmid_fc = sbol2.FunctionalComponent( + uri=f"{transformed_strain.displayId}_plasmid" + ) + plasmid_fc.definition = plasmid.identity + + transformed_strain.modules = [chassis_module_ref] + transformed_strain.functionalComponents = [plasmid_fc] + + transformation_activity_association = sbol2.Association( + f"transform_{chassis_module_ref.name}" + ) + + transformation_activity_plan = sbol2.Plan( + f"{transformed_strain.displayId}_transformation_plan" + ) + transformation_activity_plan.description = ( + "TODO: generate accurate description of transformation" + ) + transformation_activity_association.plan = transformation_activity_plan + + transformation_activity_agent = sbol2.Agent("BuildCompiler") + transformation_activity_association.agent = transformation_activity_agent + + transformation_activity.associations = [transformation_activity_association] + + transformed_impl = sbol2.Implementation( + f"{transformed_strain.displayId}_impl" + ) + + transformed_impl.built = transformed_strain.identity + transformed_impl.wasGeneratedBy = transformation_activity.identity + + for obj in ( + transformation_activity, + chassis_usage, + plasmid_usage, + transformed_strain, + chassis_module_ref, + plasmid_fc, + transformed_impl, + ): + self._add_if_absent(transformation_doc, obj) + + sbol_outputs.append( + { + "transformation_activity": transformation_activity.identity, + "transformed_strain_module": transformed_strain.identity, + "transformed_strain_implementation": transformed_impl.identity, + } + ) + robot_steps.append( + { + "step": index, + "plasmid": plasmid.displayId, + "chassis": chassis_name, + "mix_ul": {"competent_cells": 50, "assembly_product": 5}, + "heat_shock": {"temperature_c": 42, "duration_seconds": 45}, + "recovery": {"medium": "SOC", "volume_ul": 950, "duration_min": 60}, + } + ) + logs.append( + f"Prepared transformation input for plasmid {plasmid.displayId} into chassis {chassis_name}." + ) + + return { + "stage": "transformation", + "inputs": [ + plasmid.plasmid_definition.displayId for plasmid in assembly_products + ], + "chassis": chassis_name, + "sbol_artifacts": sbol_outputs, + "json_intermediate": { + "protocol": "chemical_transformation", + "version": "0.1", + "steps": robot_steps, + }, + "protocol_artifacts": { + "ot2_script": "TODO: adapter to protocol generator", + "human_instructions": [ + "Thaw competent cells on ice.", + "Combine assembly product with competent cells as specified.", + "Run heat shock and recovery according to generated parameters.", + ], + "logs": logs, + }, + } + def _extract_plasmids_from_strain( self, strain: sbol2.ModuleDefinition, @@ -1106,6 +1265,26 @@ def _create_ligase_implementation(self): self.sbol_doc.add_list([T4_impl, ligase_def]) self.T4_ligase_impl = T4_impl + def _add_if_absent(self, doc: sbol2.Document, obj: Any): + if doc.find(obj.identity) is None: + doc.add(obj) + + def _get_or_create_chassis( + self, doc: sbol2.Document, chassis_name: str + ) -> tuple[sbol2.ModuleDefinition, sbol2.Implementation]: + chassis_module = doc.find(chassis_name) or sbol2.ModuleDefinition(chassis_name) + chassis_module.roles = [ORGANISM_STRAIN] + chassis_module.name = chassis_name + self._add_if_absent(doc, chassis_module) + + chassis_impl_id = f"{chassis_name}_impl" + chassis_impl = doc.find(chassis_impl_id) or sbol2.Implementation( + chassis_impl_id + ) + chassis_impl.built = chassis_module.identity + self._add_if_absent(doc, chassis_impl) + return chassis_module, chassis_impl + def _extract_lvl2_TUs( # TODO send to misc helper file instead of buildcompiler.py? design_doc: sbol2.Document,