From d47d2a51dca822a6b86920ecd5d596083e02f9b4 Mon Sep 17 00:00:00 2001
From: Jan Range <30547301+JR-1991@users.noreply.github.com>
Date: Thu, 9 Oct 2025 12:20:30 +0200
Subject: [PATCH 1/3] Add assertions for data_unit presence in tabular tests

Additional assertions verify that data_unit is not None for species_data[0] and species_data[1] in TestTabularImport, improving test coverage for unit presence.
---
 tests/unit/test_tabular.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/unit/test_tabular.py b/tests/unit/test_tabular.py
index 13d786c..1635fb5 100644
--- a/tests/unit/test_tabular.py
+++ b/tests/unit/test_tabular.py
@@ -80,6 +80,10 @@ def test_csv_import(self):
             assert len(m.species_data[1].time) == 11, (
                 f"Expected 10 time points. Got {len(m.species_data[1].time)}"
             )
+
+            assert m.species_data[0].data_unit is not None, (
+                f"Expected data unit. Got {m.species_data[0].data_unit}"
+            )
             assert m.species_data[0].data_unit.name == "mmol / l", (
                 f"Expected mM. Got {m.species_data[0].data_unit}"
             )
@@ -93,6 +97,9 @@ def test_csv_import(self):
                     f"Expected None. Got {m.species_data[0].time_unit}"
                 )
 
+            assert m.species_data[1].data_unit is not None, (
+                f"Expected data unit. Got {m.species_data[1].data_unit}"
+            )
             assert m.species_data[1].data_unit.name == "mmol / l", (
                 f"Expected mM. Got {m.species_data[1].data_unit.name}"
             )

From 114e1f00c34b9ee0ae8fd0644555371ae493d0f9 Mon Sep 17 00:00:00 2001
From: Jan Range <30547301+JR-1991@users.noreply.github.com>
Date: Thu, 9 Oct 2025 12:21:52 +0200
Subject: [PATCH 2/3] Preserve species referenced in equations during filtering

Updates BaseThinLayer to retain species that are referenced in ODE, assignment, initial assignment, and kinetic law equations, even if they are not part of reactions or defined as ODEs. Adds tests to verify that such species are not removed during optimization.
---
 pyenzyme/thinlayers/base.py  |  36 ++++++-
 tests/unit/test_thinlayer.py | 203 ++++++++++++++++++++++++++++++++++-
 2 files changed, 233 insertions(+), 6 deletions(-)

diff --git a/pyenzyme/thinlayers/base.py b/pyenzyme/thinlayers/base.py
index a814cd1..9eb6346 100644
--- a/pyenzyme/thinlayers/base.py
+++ b/pyenzyme/thinlayers/base.py
@@ -1,8 +1,9 @@
 from abc import ABC, abstractmethod
 from functools import cached_property
-from typing import Dict, List, Optional, Tuple, TypeAlias
+from typing import Dict, List, Optional, Set, Tuple, TypeAlias
 
 import pandas as pd
+from sympy import Symbol, sympify
 
 import pyenzyme as pe
 from pyenzyme.versions import v2
@@ -79,7 +80,9 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum
         enzmldoc = enzmldoc.model_copy(deep=True)
 
         # Collect all species that are explicitly modeled
+        all_species = BaseThinLayer._get_all_species(enzmldoc)
         modeled_species = set()
+        equations = []
 
         # Add species from reactions (reactants and products)
         for reaction in enzmldoc.reactions:
@@ -88,11 +91,24 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum
             )
             modeled_species.update(product.species_id for product in reaction.products)
 
+            if reaction.kinetic_law:
+                equations.append(sympify(reaction.kinetic_law.equation))
+
         # Add species from ODE equations
+        for equation in enzmldoc.equations:
+            if equation.equation_type == v2.EquationType.ODE:
+                equations.append(sympify(equation.equation))
+                modeled_species.add(equation.species_id)
+            elif (
+                equation.equation_type == v2.EquationType.ASSIGNMENT
+                or equation.equation_type == v2.EquationType.INITIAL_ASSIGNMENT
+            ):
+                equations.append(sympify(equation.equation))
+
+        # Find species referenced in equations
+        equation_symbols = {symbol for eq in equations for symbol in eq.free_symbols}
         modeled_species.update(
-            equation.species_id
-            for equation in enzmldoc.equations
-            if equation.equation_type == v2.EquationType.ODE
+            species for species in all_species if Symbol(species) in equation_symbols
         )
 
         if not modeled_species:
@@ -132,6 +148,18 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum
 
         return enzmldoc
 
+    @staticmethod
+    def _get_all_species(enzmldoc: v2.EnzymeMLDocument) -> Set[str]:
+        """
+        Gets all species from the EnzymeML document.
+        """
+        return set(
+            species.id
+            for species in enzmldoc.small_molecules
+            + enzmldoc.proteins
+            + enzmldoc.complexes
+        )
+
     @abstractmethod
     def integrate(
         self,
diff --git a/tests/unit/test_thinlayer.py b/tests/unit/test_thinlayer.py
index e232234..94d2c12 100644
--- a/tests/unit/test_thinlayer.py
+++ b/tests/unit/test_thinlayer.py
@@ -1,5 +1,5 @@
 from pyenzyme.thinlayers.base import BaseThinLayer
-from pyenzyme.versions.v2 import EnzymeMLDocument, EquationType
+from pyenzyme.versions.v2 import EnzymeMLDocument, Equation, EquationType
 
 # Mock data for creating test species measurements
 MOCK_DATA = {
@@ -96,6 +96,205 @@ def test_remove_unmodeled_species_odes(self):
             f"Unmodeled species should be removed, but appears in measurements {measurement_has_unmodeled}."
         )
 
+    def test_includes_species_in_ode_equation(self):
+        """
+        Test that species referenced in equations are kept in the document even when
+        they are not defined as ODEs or part of reactions.
+
+        This test verifies that:
+        - A species that appears in an equation (but not in reactions or ODEs) is kept
+        - The species can have initial values in measurements without time course data
+        - The filtering function correctly identifies equation-referenced species as modeled
+        """
+        enzmldoc = self._create_enzmldoc()
+
+        # Add a new species that will only appear in equations (not reactions/ODEs)
+        equation_only_species = enzmldoc.add_to_small_molecules(
+            id="p1",
+            name="Equation Only Species",
+        )
+
+        # Add an initial for the species to each measurement
+        for i, measurement in enumerate(enzmldoc.measurements):
+            measurement.add_to_species_data(
+                species_id=equation_only_species.id,
+                initial=i * 2.0,
+                data=[],
+                time=[],
+            )
+
+        # Add an assignment equation that references this species
+        # This species is NOT an ODE and NOT part of any reaction
+        enzmldoc.add_to_equations(
+            species_id="Product",  # Left side of equation
+            equation_type=EquationType.ODE,
+            equation="p1 * 2.0",  # p1 appears in the equation
+        )
+
+        # Verify the species exists before filtering
+        assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
+
+        # Remove unmodeled species
+        thinlayer = MockThinLayer(enzmldoc)
+        tl_enzmldoc = thinlayer.optimize()
+
+        assert equation_only_species.id in [
+            s.id for s in tl_enzmldoc.small_molecules
+        ], (
+            f"Species '{equation_only_species.id}' should be kept because it appears in an equation, "
+            f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
+        )
+
+    def test_includes_species_in_assignment_equation(self):
+        """
+        Test that species referenced in equations are kept in the document even when
+        they are not defined as ODEs or part of reactions.
+
+        This test verifies that:
+        - A species that appears in an equation (but not in reactions or ODEs) is kept
+        - The species can have initial values in measurements without time course data
+        - The filtering function correctly identifies equation-referenced species as modeled
+        """
+        enzmldoc = self._create_enzmldoc()
+
+        # Add a new species that will only appear in equations (not reactions/ODEs)
+        equation_only_species = enzmldoc.add_to_small_molecules(
+            id="p1",
+            name="Equation Only Species",
+        )
+
+        # Add an initial for the species to each measurement
+        for i, measurement in enumerate(enzmldoc.measurements):
+            measurement.add_to_species_data(
+                species_id=equation_only_species.id,
+                initial=i * 2.0,
+                data=[],
+                time=[],
+            )
+
+        # Add an assignment equation that references this species
+        # This species is NOT an ODE and NOT part of any reaction
+        enzmldoc.add_to_equations(
+            species_id="Product",  # Left side of equation
+            equation_type=EquationType.ASSIGNMENT,
+            equation="p1 * 2.0",  # p1 appears in the equation
+        )
+
+        # Verify the species exists before filtering
+        assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
+
+        # Remove unmodeled species
+        thinlayer = MockThinLayer(enzmldoc)
+        tl_enzmldoc = thinlayer.optimize()
+
+        assert equation_only_species.id in [
+            s.id for s in tl_enzmldoc.small_molecules
+        ], (
+            f"Species '{equation_only_species.id}' should be kept because it appears in an equation, "
+            f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
+        )
+
+    def test_includes_species_in_initial_assignment_equation(self):
+        """
+        Test that species referenced in equations are kept in the document even when
+        they are not defined as ODEs or part of reactions.
+
+        This test verifies that:
+        - A species that appears in an equation (but not in reactions or ODEs) is kept
+        - The species can have initial values in measurements without time course data
+        - The filtering function correctly identifies equation-referenced species as modeled
+        """
+        enzmldoc = self._create_enzmldoc()
+
+        # Add a new species that will only appear in equations (not reactions/ODEs)
+        equation_only_species = enzmldoc.add_to_small_molecules(
+            id="p1",
+            name="Equation Only Species",
+        )
+
+        # Add an initial for the species to each measurement
+        for i, measurement in enumerate(enzmldoc.measurements):
+            measurement.add_to_species_data(
+                species_id=equation_only_species.id,
+                initial=i * 2.0,
+                data=[],
+                time=[],
+            )
+
+        # Add an assignment equation that references this species
+        # This species is NOT an ODE and NOT part of any reaction
+        enzmldoc.add_to_equations(
+            species_id="Product",  # Left side of equation
+            equation_type=EquationType.INITIAL_ASSIGNMENT,
+            equation="p1 * 2.0",  # p1 appears in the equation
+        )
+
+        # Verify the species exists before filtering
+        assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
+
+        # Remove unmodeled species
+        thinlayer = MockThinLayer(enzmldoc)
+        tl_enzmldoc = thinlayer.optimize()
+
+        assert equation_only_species.id in [
+            s.id for s in tl_enzmldoc.small_molecules
+        ], (
+            f"Species '{equation_only_species.id}' should be kept because it appears in an equation, "
+            f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
+        )
+
+    def test_includes_species_in_kinetic_law_equation(self):
+        """
+        Test that species referenced in equations are kept in the document even when
+        they are not defined as ODEs or part of reactions.
+
+        This test verifies that:
+        - A species that appears in an equation (but not in reactions or ODEs) is kept
+        - The species can have initial values in measurements without time course data
+        - The filtering function correctly identifies equation-referenced species as modeled
+        """
+        enzmldoc = self._create_enzmldoc()
+
+        # Add a new species that will only appear in equations (not reactions/ODEs)
+        equation_only_species = enzmldoc.add_to_small_molecules(
+            id="p1",
+            name="Equation Only Species",
+        )
+
+        # Add an initial for the species to each measurement
+        for i, measurement in enumerate(enzmldoc.measurements):
+            measurement.add_to_species_data(
+                species_id=equation_only_species.id,
+                initial=i * 2.0,
+                data=[],
+                time=[],
+            )
+
+        # Add a reaction that references this species
+        # This species is NOT an ODE and NOT part of any reaction
+        reaction = enzmldoc.add_to_reactions(id="R1", name="R1")
+        reaction.add_to_reactants(species_id="Substrate", stoichiometry=1)
+        reaction.add_to_products(species_id="Product", stoichiometry=1)
+        reaction.kinetic_law = Equation(
+            equation="p1 * 2.0",
+            equation_type=EquationType.RATE_LAW,
+            species_id="v",
+        )
+
+        # Verify the species exists before filtering
+        assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
+
+        # Remove unmodeled species
+        thinlayer = MockThinLayer(enzmldoc)
+        tl_enzmldoc = thinlayer.optimize()
+
+        assert equation_only_species.id in [
+            s.id for s in tl_enzmldoc.small_molecules
+        ], (
+            f"Species '{equation_only_species.id}' should be kept because it appears in a reaction, "
+            f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
+        )
+
     def _create_enzmldoc(self) -> EnzymeMLDocument:
         """
         Create a test EnzymeML document with various measurement scenarios.
@@ -116,7 +315,7 @@ def _create_enzmldoc(self) -> EnzymeMLDocument:
         # Add small molecules
         substrate = enzmldoc.add_to_small_molecules(id="Substrate", name="Substrate")
         product = enzmldoc.add_to_small_molecules(id="Product", name="Product")
-        unmodeled = enzmldoc.add_to_small_molecules(id="Unmodeled", name="Unmodeled")
+        unmodeled = enzmldoc.add_to_small_molecules(id="s1", name="Unmodeled")
 
         # Add a measurement with unmodeled species
         measurement = enzmldoc.add_to_measurements(id="M1", name="M1")

From 7095705de6ffdd1df0485d9ca1664863b02d0192 Mon Sep 17 00:00:00 2001
From: Jan Range <30547301+JR-1991@users.noreply.github.com>
Date: Tue, 14 Oct 2025 12:21:37 +0200
Subject: [PATCH 3/3] Refactor species initialization in InitMap

Improves species value assignment in InitMap by checking for both '<species>_init' and '<species>' attributes in the model, raising an error if neither is found. Also updates import order and minor formatting for consistency.
---
 pyenzyme/thinlayers/psyces.py | 36 ++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/pyenzyme/thinlayers/psyces.py b/pyenzyme/thinlayers/psyces.py
index f7d2115..caacf79 100644
--- a/pyenzyme/thinlayers/psyces.py
+++ b/pyenzyme/thinlayers/psyces.py
@@ -6,19 +6,19 @@
 
 from __future__ import annotations
 
+import contextlib
+import io
+import os
 from dataclasses import dataclass
 from pathlib import Path
-from joblib import Parallel, delayed
+from typing import Dict, List, Optional, Tuple
+
 import dill
 import numpy as np
 import pandas as pd
-import os
-import contextlib
-import io
-
-from typing import Dict, List, Optional, Tuple
+from joblib import Parallel, delayed
 
-from pyenzyme.thinlayers.base import BaseThinLayer, SimResult, Time, InitCondDict
+from pyenzyme.thinlayers.base import BaseThinLayer, InitCondDict, SimResult, Time
 from pyenzyme.versions import v2
 
 try:
@@ -242,7 +242,6 @@ def optimize(self, method="leastsq"):
 
         return result
 
-
     def write(self) -> v2.EnzymeMLDocument:
         """
         Creates a new EnzymeML document with optimized parameter values.
@@ -297,9 +296,9 @@ def _initialize_parameters(self):
         for param in self.enzmldoc.parameters:
             # Build kwargs dictionary with conditional assignments
             kwargs = {
-                **({'min': param.lower_bound} if param.lower_bound is not None else {}),
-                **({'max': param.upper_bound} if param.upper_bound is not None else {}),
-                **({'vary': param.fit}),
+                **({"min": param.lower_bound} if param.lower_bound is not None else {}),
+                **({"max": param.upper_bound} if param.upper_bound is not None else {}),
+                **({"vary": param.fit}),
             }
 
             # Determine parameter value
@@ -542,10 +541,13 @@ def to_pysces_model(self, model: pysces.model):
         """
         model = dill.loads(dill.dumps(model))
         model.sim_time = np.array(self.time)
-        model.__dict__.update(
-            {
-                f"{species_id}_init": value if value != 0 else 1.0e-9
-                for species_id, value in self.species.items()
-            }
-        )
+
+        for species, value in self.species.items():
+            if hasattr(model, f"{species}_init"):
+                setattr(model, f"{species}_init", value)
+            elif hasattr(model, species):
+                setattr(model, species, value)
+            else:
+                raise ValueError(f"Species {species} not found in model")
+
         return model