From b3aee158538909224ecaaa3b132fa6e9594c561d Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 2 Feb 2026 13:58:35 -0500 Subject: [PATCH 1/4] Initial add 11618 [skip ci] --- .../experimental_abinitio_pipeline_11618.py | 139 ++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 gallery/experiments/experimental_abinitio_pipeline_11618.py diff --git a/gallery/experiments/experimental_abinitio_pipeline_11618.py b/gallery/experiments/experimental_abinitio_pipeline_11618.py new file mode 100644 index 000000000..0a793fcbb --- /dev/null +++ b/gallery/experiments/experimental_abinitio_pipeline_11618.py @@ -0,0 +1,139 @@ +""" +Abinitio Pipeline - Experimental Data EMPIAR 11618 +================================================== + +This notebook introduces a selection of +components corresponding to loading real Relion picked +particle cryo-EM data and running key ASPIRE-Python +ab initio model components as a pipeline. + +Specifically this pipeline uses the +EMPIAR 11618 picked particles data, available here: + +https://www.ebi.ac.uk/empiar/EMPIAR-11618 +""" + +# %% +# Imports +# ------- +# Import packages that will be used throughout this experiment. + +import logging +from pathlib import Path + +from aspire.classification import RandomClassSelector +from aspire.denoising import LegacyClassAvgSource +from aspire.reconstruction import MeanEstimator +from aspire.source import OrientedSource, RelionSource + +logger = logging.getLogger(__name__) + + +# %% +# Parameters +# --------------- +# +# Use of GPU is expected for a large configuration. + +# Inputs +# User is responsible for downloading `11618` raw data from EMPIAR, +# and assigning correct path. +input_dir = "11618/data/particles/" +starfile_in = f"{input_dir}/J43_particles.star" +data_folder = f"{input_dir}" + +# Config +n_imgs = None # Set to None for all images in starfile, can set smaller for tests +img_size = 129 # Downsample the images/reconstruction to a desired resolution +n_classes = 1000 # How many class averages to compute and use for reconstruction +n_nbor = 100 # How many neighbors to stack for each class average + +# Outputs +preprocessed_fn = f"11618_preprocessed_pf_lds{img_size}px_lnbg_lwt_inv.star" +class_avg_fn = f"11618_rand{n_classes}_cls_avgs_m{n_nbor}_{img_size}px.star" +volume_output_filename = f"11618_abinitio_c{n_classes}_m{n_nbor}_{img_size}px.mrc" + +# %% +# Source data and Preprocessing +# ----------------------------- +# +# ``RelionSource`` is used to access the experimental data via a `STAR` file. +# Begin by preprocessing to correct for CTF, then downsample to ``img_size`` +# and apply noise correction. + + +# Create a source object for the experimental images +src = RelionSource( + starfile_in, max_rows=n_imgs, data_folder=data_folder +) + +# Use phase_flip to attempt correcting for CTF. +# Caching is used throughout for speeding up large datasets on high memory machines. +logger.info("Perform phase flip to input images.") +src = src.phase_flip().cache() + + +# Downsample the images. +logger.info(f"Set the resolution to {img_size} X {img_size}") +src = src.downsample(img_size).cache() + +# Normalize the background of the images. +src = src.legacy_normalize_background().cache() + +# Estimate the noise and whiten based on the estimated noise. +src = src.legacy_whiten().cache() + +# Optionally invert image contrast. +logger.info("Invert the global density contrast") +src = src.invert_contrast().cache() + +# Save the preprocessed images. +# These can be reused to experiment with later stages of the pipeline +# without repeating the preprocessing computations. +src.save(preprocessed_fn, save_mode="single", overwrite=True) + +# %% +# Class Averaging +# ---------------------- +# +# Now perform classification and averaging for each class. +# `RandomClassSelector` will randomize the class averages chosen for reconstruction. +# This avoids having to compute all class averages and sort them by some notion of quality, which is computationally intensive. + +logger.info("Begin Class Averaging") +avgs = LegacyClassAvgSource(src, class_selector=RandomClassSelector(), n_nbor=n_nbor) + +# Compute and cache the random set of `n_classes` from `src`. +avgs = avgs[:n_classes].cache() + +# Save the random set of class averages to disk so they can be re-used. +avgs.save(class_avg_fn, save_mode="single", overwrite=True) + + +# %% +# Common Line Estimation +# ---------------------- +# +# Estimate orientation of projections and assign to source by +# applying ``OrientedSource`` to the class averages from the prior +# step. By default this applies the Common Line with Synchronization +# Voting ``CLSync3N`` method. + +logger.info("Begin Orientation Estimation") +oriented_src = OrientedSource(avgs) + +# %% +# Volume Reconstruction +# ---------------------- +# +# Using the oriented source, attempt to reconstruct a volume. + +logger.info("Begin Volume reconstruction") + +# Set up an estimator to perform the backprojection. +estimator = MeanEstimator(oriented_src) + +# Perform the estimation and save the volume. +estimated_volume = estimator.estimate() +estimated_volume.save(volume_output_filename, overwrite=True) +logger.info(f"Saved Volume to {str(Path(volume_output_filename).resolve())}") From bf248330937bbdf10a2522f1e5ff4dc047829744 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 2 Feb 2026 13:59:46 -0500 Subject: [PATCH 2/4] style cleanup [skip ci] --- gallery/experiments/experimental_abinitio_pipeline_11618.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gallery/experiments/experimental_abinitio_pipeline_11618.py b/gallery/experiments/experimental_abinitio_pipeline_11618.py index 0a793fcbb..3051e45ad 100644 --- a/gallery/experiments/experimental_abinitio_pipeline_11618.py +++ b/gallery/experiments/experimental_abinitio_pipeline_11618.py @@ -63,9 +63,7 @@ # Create a source object for the experimental images -src = RelionSource( - starfile_in, max_rows=n_imgs, data_folder=data_folder -) +src = RelionSource(starfile_in, max_rows=n_imgs, data_folder=data_folder) # Use phase_flip to attempt correcting for CTF. # Caching is used throughout for speeding up large datasets on high memory machines. From dba0592beb0557f79652237ea8c54ba8c4cef41a Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 5 Feb 2026 14:48:05 -0500 Subject: [PATCH 3/4] Change to non-legacy preproc and 32 nbors --- .../experiments/experimental_abinitio_pipeline_11618.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gallery/experiments/experimental_abinitio_pipeline_11618.py b/gallery/experiments/experimental_abinitio_pipeline_11618.py index 3051e45ad..f69ae53b8 100644 --- a/gallery/experiments/experimental_abinitio_pipeline_11618.py +++ b/gallery/experiments/experimental_abinitio_pipeline_11618.py @@ -46,10 +46,10 @@ n_imgs = None # Set to None for all images in starfile, can set smaller for tests img_size = 129 # Downsample the images/reconstruction to a desired resolution n_classes = 1000 # How many class averages to compute and use for reconstruction -n_nbor = 100 # How many neighbors to stack for each class average +n_nbor = 32 # How many neighbors to stack for each class average # Outputs -preprocessed_fn = f"11618_preprocessed_pf_lds{img_size}px_lnbg_lwt_inv.star" +preprocessed_fn = f"11618_preprocessed_pf_ds{img_size}px_nbg_wt_inv.star" class_avg_fn = f"11618_rand{n_classes}_cls_avgs_m{n_nbor}_{img_size}px.star" volume_output_filename = f"11618_abinitio_c{n_classes}_m{n_nbor}_{img_size}px.mrc" @@ -76,10 +76,10 @@ src = src.downsample(img_size).cache() # Normalize the background of the images. -src = src.legacy_normalize_background().cache() +src = src.normalize_background().cache() # Estimate the noise and whiten based on the estimated noise. -src = src.legacy_whiten().cache() +src = src.whiten().cache() # Optionally invert image contrast. logger.info("Invert the global density contrast") From 34006d18e26c08fa8b897614350baed7d8b3ea15 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 9 Feb 2026 10:07:02 -0500 Subject: [PATCH 4/4] Minor cleanup --- .../experimental_abinitio_pipeline_11618.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/gallery/experiments/experimental_abinitio_pipeline_11618.py b/gallery/experiments/experimental_abinitio_pipeline_11618.py index f69ae53b8..0982adb21 100644 --- a/gallery/experiments/experimental_abinitio_pipeline_11618.py +++ b/gallery/experiments/experimental_abinitio_pipeline_11618.py @@ -43,9 +43,9 @@ data_folder = f"{input_dir}" # Config -n_imgs = None # Set to None for all images in starfile, can set smaller for tests +n_imgs = None # Set to None for all images in starfile; set smaller for tests img_size = 129 # Downsample the images/reconstruction to a desired resolution -n_classes = 1000 # How many class averages to compute and use for reconstruction +n_classes = 1000 # Number of class averages to use for reconstruction n_nbor = 32 # How many neighbors to stack for each class average # Outputs @@ -70,11 +70,11 @@ logger.info("Perform phase flip to input images.") src = src.phase_flip().cache() - # Downsample the images. logger.info(f"Set the resolution to {img_size} X {img_size}") src = src.downsample(img_size).cache() +logger.info("Apply noise correction to images") # Normalize the background of the images. src = src.normalize_background().cache() @@ -82,7 +82,7 @@ src = src.whiten().cache() # Optionally invert image contrast. -logger.info("Invert the global density contrast") +logger.info("Invert the global density contrast if needed") src = src.invert_contrast().cache() # Save the preprocessed images. @@ -95,8 +95,11 @@ # ---------------------- # # Now perform classification and averaging for each class. -# `RandomClassSelector` will randomize the class averages chosen for reconstruction. -# This avoids having to compute all class averages and sort them by some notion of quality, which is computationally intensive. +# `RandomClassSelector` will randomize the class averages chosen for +# reconstruction. This avoids having to compute all class averages +# and sort them by some notion of quality, which is computationally +# intensive. Instead this example computes a small random sample of +# classes from the entire data set. logger.info("Begin Class Averaging") avgs = LegacyClassAvgSource(src, class_selector=RandomClassSelector(), n_nbor=n_nbor) @@ -127,7 +130,6 @@ # Using the oriented source, attempt to reconstruct a volume. logger.info("Begin Volume reconstruction") - # Set up an estimator to perform the backprojection. estimator = MeanEstimator(oriented_src)