HelikarLab · JoshLoecker · Oct 7, 2025 · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024
diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [ "3.10", "3.11", "3.12", "3.13" ]
+        python-version: [ "3.11", "3.12", "3.13" ]
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -27,7 +27,7 @@ jobs:
         run: uv sync --python "${{ matrix.python-version }}" --all-extras --dev
 
       - name: Run tests
-        run: uv run --python "${{ matrix.python-version }}" pytest --cov --junitxml=junit.xml -o junit_family=legacy
+        run: uv run --python "${{ matrix.python-version }}" pytest
 
       - name: Cache Clear
         run: uv cache prune --ci
diff --git a/main/COMO.ipynb b/main/COMO.ipynb
diff --git a/main/como/__init__.py b/main/como/__init__.py
@@ -1,3 +1,4 @@
+from como import plot
 from como.data_types import AdjustmentMethod, Algorithm, CobraCompartments, FilteringTechnique, LogLevel, Solver
 from como.utils import stringlist_to_list
 

diff --git a/main/como/combine_distributions.py b/main/como/combine_distributions.py
@@ -23,7 +23,7 @@
 )
 
 
-async def _combine_z_distribution_for_batch(
+def _combine_z_distribution_for_batch(
     context_name: str,
     batch: _BatchEntry,
     matrix: pd.DataFrame,
@@ -33,6 +33,21 @@ async def _combine_z_distribution_for_batch(
     weighted_z_floor: int,
     weighted_z_ceiling: int,
 ) -> pd.DataFrame:
+    """Combine z-score distributions across samples for a single batch.
+
+    Args:
+        context_name: Name of the context (e.g., tissue or condition).
+        batch: Batch entry containing batch number and sample names.
+        matrix: DataFrame with 'ensembl_gene_id' and sample columns.
+        source: Source type (e.g., trna, mrna, scrna, proteomics).
+        output_combined_matrix_filepath: Path to save the combined z-score matrix.
+        output_figure_dirpath: Path to save the z-score distribution figure.
+        weighted_z_floor: Minimum z-score value after combining.
+        weighted_z_ceiling: Maximum z-score value after combining.
+
+    Returns:
+            A pandas dataframe of the weighted z-distributions
+    """
     output_combined_matrix_filepath.parent.mkdir(parents=True, exist_ok=True)
     output_figure_dirpath.mkdir(parents=True, exist_ok=True)
 
@@ -80,15 +95,29 @@ async def _combine_z_distribution_for_batch(
     return weighted_matrix
 
 
-async def _combine_z_distribution_for_source(
+def _combine_z_distribution_for_source(
     merged_source_data: pd.DataFrame,
     context_name: str,
     num_replicates: int,
     output_combined_matrix_filepath: Path,
     output_figure_filepath: Path,
     weighted_z_floor: int = -6,
     weighted_z_ceiling: int = 6,
-):
+) -> pd.DataFrame:
+    """Combine z-score distributions across batches for a single source.
+
+    Args:
+        merged_source_data: DataFrame with 'ensembl_gene_id' and batch columns.
+        context_name: Name of the context (e.g., tissue or condition).
+        num_replicates: Number of replicates (samples) for weighting.
+        output_combined_matrix_filepath: Path to save the combined z-score matrix.
+        output_figure_filepath: Path to save the z-score distribution figure.
+        weighted_z_floor: Minimum z-score value after combining.
+        weighted_z_ceiling: Maximum z-score value after combining.
+
+    Returns:
+          A pandas dataframe of the weighted z-distributions
+    """
     if _num_columns(merged_source_data) <= 2:
         logger.warning("A single source exists, returning matrix as-is because no additional combining can be done")
         merged_source_data.columns = ["ensembl_gene_id", "combine_z"]
@@ -144,14 +173,10 @@ def _combine_z_distribution_for_context(
         return pd.DataFrame({"ensembl_gene_id": [], "combine_z": []})
 
     z_matrices = [
-        result.z_score_matrix.set_index("ensembl_gene_id").rename(columns=dict.fromkeys(result.z_score_matrix.columns[1:], result.type.value))
-        for result in zscore_results
+        res.z_score_matrix.set_index("ensembl_gene_id").rename(columns=dict.fromkeys(res.z_score_matrix.columns[1:], res.type.value))
+        for res in zscore_results
     ]
-    z_matrix = pd.DataFrame()
-    for matrix in z_matrices:
-        z_matrix = z_matrix.merge(right=matrix, left_index=True, right_index=True, how="outer") if not z_matrix.empty else matrix
-    z_matrix = z_matrix.reset_index(drop=False)
-    # z_matrix = pd.concat(z_matrices, axis=1, join="outer").reset_index()
+    z_matrix = pd.concat(z_matrices, axis=1, join="outer").reset_index()
     if _num_columns(z_matrix) <= 1:
         logger.trace(f"Only 1 source exists for '{context}', returning dataframe as-is becuase no data exists to combine")
         z_matrix.columns = ["ensembl_gene_id", "combine_z"]
@@ -229,7 +254,7 @@ async def _begin_combining_distributions(
                     matrix=matrix[[GeneIdentifier.ENSEMBL_GENE_ID.value, *batch.sample_names]],
                     source=source,
                     output_combined_matrix_filepath=(
-                        output_filepaths[source.value].parent / f"{context_name}_{source.value}_batch{batch.batch_num}_combined_z_distribution_.csv"
+                        output_filepaths[source.value].parent / f"{context_name}_{source.value}_batch{batch.batch_num}_combined_z_distribution.csv"
                     ),
                     output_figure_dirpath=output_figure_dirpath,
                     weighted_z_floor=weighted_z_floor,
@@ -243,7 +268,7 @@ async def _begin_combining_distributions(
         for df in batch_results:
             merged_batch_results = df if merged_batch_results.empty else merged_batch_results.merge(df, on="ensembl_gene_id", how="outer")
 
-        merged_source_results: pd.DataFrame = await _combine_z_distribution_for_source(
+        merged_source_results: pd.DataFrame = _combine_z_distribution_for_source(
             merged_source_data=merged_batch_results,
             context_name=context_name,
             num_replicates=sum(batch.num_samples for batch in batch_names[source.value]),