Skip to content

perf: welford's algorithm for mean-var aggregation#4147

Open
ilan-gold wants to merge 2 commits into
mainfrom
ig/welford
Open

perf: welford's algorithm for mean-var aggregation#4147
ilan-gold wants to merge 2 commits into
mainfrom
ig/welford

Conversation

@ilan-gold

Copy link
Copy Markdown
Contributor

From discussions with @zboldyga.

This should then in theory be reused with #4143 instead of its custom moments calculation

  • Closes #
  • Tests included or not required because:

@ilan-gold ilan-gold added this to the 1.12.2 milestone Jun 8, 2026
@ilan-gold ilan-gold changed the title perf: welford's algorithm for mean-var perf: welford's algorithm for mean-var aggregation Jun 8, 2026
out[cat, col] += data.data[j]


@njit

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should make these nogil or provide an option for fau to provide nogil njit

@codecov

codecov Bot commented Jun 8, 2026

Copy link
Copy Markdown

❌ 2 Tests Failed:

Tests completed Failed Passed Skipped
2628 2 2626 150
View the top 2 failed test(s) by shortest run time
tests/test_metrics.py::test_consistency[morans_i-multi-threaded]
Stack Traces | 0.102s run time
metric = <function morans_i at 0x7fae9eda6140>

    #x1B[0m#x1B[37m@pytest#x1B[39;49;00m.mark.usefixtures(#x1B[33m"#x1B[39;49;00m#x1B[33m_threading#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
    #x1B[94mdef#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[92mtest_consistency#x1B[39;49;00m(metric) -> #x1B[94mNone#x1B[39;49;00m:#x1B[90m#x1B[39;49;00m
        pbmc = pbmc68k_reduced()#x1B[90m#x1B[39;49;00m
        pbmc.layers[#x1B[33m"#x1B[39;49;00m#x1B[33mraw#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m] = pbmc.raw.X.copy()#x1B[90m#x1B[39;49;00m
        g = pbmc.obsp[#x1B[33m"#x1B[39;49;00m#x1B[33mconnectivities#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]#x1B[90m#x1B[39;49;00m
        equality_check = partial(np.testing.assert_allclose, atol=#x1B[94m1e-11#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
        #x1B[90m# This can fail#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
        equality_check(#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]),#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]),#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
        equality_check(#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]),#x1B[90m#x1B[39;49;00m
            metric(pbmc, vals=pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]),#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
        equality_check(  #x1B[90m# Test that series and vectors return same value#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]),#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m].values),#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
        equality_check(#x1B[90m#x1B[39;49;00m
            metric(pbmc, obsm=#x1B[33m"#x1B[39;49;00m#x1B[33mX_pca#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m),#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obsm[#x1B[33m"#x1B[39;49;00m#x1B[33mX_pca#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m].T),#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
        all_genes = metric(pbmc, layer=#x1B[33m"#x1B[39;49;00m#x1B[33mraw#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
        first_gene = metric(#x1B[90m#x1B[39;49;00m
            pbmc, vals=pbmc[:, pbmc.var_names[#x1B[94m0#x1B[39;49;00m]].layers[#x1B[33m"#x1B[39;49;00m#x1B[33mraw#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m].toarray().ravel()#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
>       np.testing.assert_allclose(all_genes[#x1B[94m0#x1B[39;49;00m], first_gene, rtol=#x1B[94m1e-9#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[31mE       AssertionError: #x1B[0m
#x1B[1m#x1B[31mE       Not equal to tolerance rtol=1e-09, atol=0#x1B[0m
#x1B[1m#x1B[31mE       #x1B[0m
#x1B[1m#x1B[31mE       Mismatched elements: 1 / 1 (100%)#x1B[0m
#x1B[1m#x1B[31mE       Max absolute difference among violations: 7.9750179849e-08#x1B[0m
#x1B[1m#x1B[31mE       Max relative difference among violations: 1.8702209022e-07#x1B[0m
#x1B[1m#x1B[31mE        ACTUAL: array(0.426421)#x1B[0m
#x1B[1m#x1B[31mE        DESIRED: array(0.426421)#x1B[0m

#x1B[1m#x1B[31mtests/test_metrics.py#x1B[0m:72: AssertionError
tests/test_metrics.py::test_consistency[morans_i-single-threaded]
Stack Traces | 1.76s run time
metric = <function morans_i at 0x7fae9eda6140>

    #x1B[0m#x1B[37m@pytest#x1B[39;49;00m.mark.usefixtures(#x1B[33m"#x1B[39;49;00m#x1B[33m_threading#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
    #x1B[94mdef#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[92mtest_consistency#x1B[39;49;00m(metric) -> #x1B[94mNone#x1B[39;49;00m:#x1B[90m#x1B[39;49;00m
        pbmc = pbmc68k_reduced()#x1B[90m#x1B[39;49;00m
        pbmc.layers[#x1B[33m"#x1B[39;49;00m#x1B[33mraw#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m] = pbmc.raw.X.copy()#x1B[90m#x1B[39;49;00m
        g = pbmc.obsp[#x1B[33m"#x1B[39;49;00m#x1B[33mconnectivities#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]#x1B[90m#x1B[39;49;00m
        equality_check = partial(np.testing.assert_allclose, atol=#x1B[94m1e-11#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
        #x1B[90m# This can fail#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
        equality_check(#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]),#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]),#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
        equality_check(#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]),#x1B[90m#x1B[39;49;00m
            metric(pbmc, vals=pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]),#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
        equality_check(  #x1B[90m# Test that series and vectors return same value#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m]),#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m].values),#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
        equality_check(#x1B[90m#x1B[39;49;00m
            metric(pbmc, obsm=#x1B[33m"#x1B[39;49;00m#x1B[33mX_pca#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m),#x1B[90m#x1B[39;49;00m
            metric(g, pbmc.obsm[#x1B[33m"#x1B[39;49;00m#x1B[33mX_pca#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m].T),#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
        all_genes = metric(pbmc, layer=#x1B[33m"#x1B[39;49;00m#x1B[33mraw#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
        first_gene = metric(#x1B[90m#x1B[39;49;00m
            pbmc, vals=pbmc[:, pbmc.var_names[#x1B[94m0#x1B[39;49;00m]].layers[#x1B[33m"#x1B[39;49;00m#x1B[33mraw#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m].toarray().ravel()#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
>       np.testing.assert_allclose(all_genes[#x1B[94m0#x1B[39;49;00m], first_gene, rtol=#x1B[94m1e-9#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[31mE       AssertionError: #x1B[0m
#x1B[1m#x1B[31mE       Not equal to tolerance rtol=1e-09, atol=0#x1B[0m
#x1B[1m#x1B[31mE       #x1B[0m
#x1B[1m#x1B[31mE       Mismatched elements: 1 / 1 (100%)#x1B[0m
#x1B[1m#x1B[31mE       Max absolute difference among violations: 7.9750179849e-08#x1B[0m
#x1B[1m#x1B[31mE       Max relative difference among violations: 1.8702209022e-07#x1B[0m
#x1B[1m#x1B[31mE        ACTUAL: array(0.426421)#x1B[0m
#x1B[1m#x1B[31mE        DESIRED: array(0.426421)#x1B[0m

#x1B[1m#x1B[31mtests/test_metrics.py#x1B[0m:72: AssertionError

To view more test analytics, go to the Test Analytics Dashboard
📋 Got 3 mins? Take this short survey to help us improve Test Analytics.

@ilan-gold ilan-gold requested a review from flying-sheep June 8, 2026 12:24
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant