Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 1 addition & 46 deletions .github/workflows/continuous_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,56 +7,11 @@ on:
workflow_dispatch:

jobs:
format:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5

- name: Strip Notebook Output
run: uv tool run --from nbconvert jupyter-nbconvert --clear-output --inplace "main/COMO.ipynb"

- name: Format Python Imports
uses: astral-sh/ruff-action@v3
with:
args: "check --fix --select I"

- name: Format code
uses: astral-sh/ruff-action@v3
with:
args: "format"

- name: Format Notebook
uses: astral-sh/ruff-action@v3
with:
args: "format main/COMO.ipynb"

- name: Commit Changes
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: "style: format code, Jupyter Notebook(s), and Python imports with `ruff`"
file_pattern: "main/como/*.py"


lint:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Check Lint
uses: astral-sh/ruff-action@v3
with:
args: "check --no-fix --verbose"

test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ "3.10", "3.11", "3.12" ]
python-version: [ "3.11", "3.12", "3.13" ]
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down
12 changes: 7 additions & 5 deletions main/como/combine_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,13 @@ def _combine_z_distribution_for_context(
var_name="source",
value_name="zscore",
)
combined_df = pd.DataFrame({
"ensembl_gene_id": z_matrix["ensembl_gene_id"],
"zscore": combined_z_matrix,
"source": "combined",
})
combined_df = pd.DataFrame(
{
"ensembl_gene_id": z_matrix["ensembl_gene_id"],
"zscore": combined_z_matrix,
"source": "combined",
}
)
stack_df = pd.concat([stack_df, combined_df])
# graph_zscore_distribution(
# df=stack_df,
Expand Down
32 changes: 18 additions & 14 deletions main/como/rnaseq_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,12 +477,14 @@ def zfpkm_plot(results, *, output_png_filepath: Path, plot_xfloor: int = -4):
scale_fitted = fitted * (max_fpkm / max_fitted)

to_concat.append(
pd.DataFrame({
"sample_name": [name] * len(x),
"log2fpkm": x,
"fpkm_density": y,
"fitted_density_scaled": scale_fitted,
})
pd.DataFrame(
{
"sample_name": [name] * len(x),
"log2fpkm": x,
"fpkm_density": y,
"fitted_density_scaled": scale_fitted,
}
)
)
mega_df = pd.concat(to_concat, ignore_index=True)
mega_df.columns = pd.Series(data=["sample_name", "log2fpkm", "fpkm_density", "fitted_density_scaled"])
Expand Down Expand Up @@ -1002,14 +1004,16 @@ async def rnaseq_gen( # noqa: C901
)

metadata_df["fragment_length"] = metadata_df["fragment_length"].astype(np.float32)
metadata_df = metadata_df.groupby("sample_name", as_index=False).agg({
"sample_name": "first",
"fragment_length": "mean",
"layout": "first",
"strand": "first",
"study": "first",
"library_prep": "first",
})
metadata_df = metadata_df.groupby("sample_name", as_index=False).agg(
{
"sample_name": "first",
"fragment_length": "mean",
"layout": "first",
"strand": "first",
"study": "first",
"library_prep": "first",
}
)
logger.debug(f"Starting '{context_name}'")
await _process(
context_name=context_name,
Expand Down
8 changes: 4 additions & 4 deletions main/como/rnaseq_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ async def build_from_tab(cls, filepath: Path) -> _STARinformation:
num_ambiguous = [int(i) for i in ambiguous.removesuffix("\n").split("\t")[1:]]

df: pd.DataFrame = await read_file(
fiepath,
filepath,
h5ad_as_df=True,
sep="\t",
header=None,
Expand Down Expand Up @@ -349,9 +349,9 @@ async def _write_counts_matrix(
Returns:
A pandas DataFrame representing the final counts matrix.
"""
counts: list[pd.DataFrame] = await asyncio.gather(*[
_create_sample_counts_matrix(metric) for metric in _organize_gene_counts_files(data_dir=como_context_dir)
])
counts: list[pd.DataFrame] = await asyncio.gather(
*[_create_sample_counts_matrix(metric) for metric in _organize_gene_counts_files(data_dir=como_context_dir)]
)
rna_specific_sample_names = set(config_df.loc[config_df["library_prep"] == rna.value, "sample_name"].tolist())

final_matrix: pd.DataFrame = functools.reduce(lambda left, right: pd.merge(left, right, on="ensembl_gene_id", how="outer"), counts)
Expand Down
Binary file not shown.
30 changes: 22 additions & 8 deletions tests/unit/test_fisher_stats.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
import contextlib
from pathlib import Path

import cobra
import numpy as np
from como.stats.fisher_exact_test import FisherExactTest


def test_fisher_stats():
reference_model = cobra.io.load_matlab_model("main/data/reference_models/GeneralModelUpdatedV3.mat")
scenario_model = cobra.io.read_sbml_model("tests/inputs/naiveB_model.xml")
with (
contextlib.redirect_stderr(Path("/dev/null").open("w", encoding="utf-8")),
contextlib.redirect_stdout(Path("/dev/null").open("w", encoding="utf-8")),
):
reference_model = cobra.io.load_matlab_model("main/data/reference_models/GeneralModelUpdatedV3.mat")
scenario_model = cobra.io.read_sbml_model("tests/inputs/naiveB_model.xml")
real = FisherExactTest.run(reference=reference_model, scenario=scenario_model, pathway="Glycolysis/gluconeogenesis")
expected = FisherExactTest(
pathway="Glycolysis/gluconeogenesis",
statistic=np.float64(4.321708185053381),
pvalue=np.float64(1.2883495211648955e-05),
a=32,
b=10,
c=4496,
d=6072,
)

assert real == expected


assert real.statistic == np.float64(4.321708185053381)
assert real.pvalue == np.float64(1.2883495211648955e-05)
assert real.a == 32
assert real.b == 10
assert real.c == 4496
assert real.d == 6082
test_fisher_stats()
Loading