Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ recursive-include docs Makefile
recursive-include docs *.sh
recursive-include src *.conf
recursive-include src *.cu
recursive-include src *.h
recursive-include src *.yaml
prune docs/build
prune docs/source
Expand Down
95 changes: 68 additions & 27 deletions gallery/experiments/experimental_abinitio_pipeline_10081.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,18 @@
This notebook introduces a selection of
components corresponding to loading real Relion picked
particle cryo-EM data and running key ASPIRE-Python
Abinitio model components as a pipeline.
Abinitio model components as a pipeline. Some of the
components are tailored specifically to handle the C4
cyclic symmetry exhibited by this dataset.

This demonstrates reproducing results similar to those found in:

.. admonition:: Publication

| Gabi Pragier and Yoel Shkolnisky
| A common lines approach for ab-initio modeling of cyclically-symmetric molecules
| Inverse Problems, 35(12), p.124005, 2019.
| DOI 10.1088/1361-6420/ab2fb2

Specifically this pipeline uses the
EMPIAR 10081 picked particles data, available here:
Expand All @@ -27,7 +38,6 @@

from aspire.abinitio import CLSymmetryC3C4
from aspire.denoising import LegacyClassAvgSource
from aspire.noise import AnisotropicNoiseEstimator
from aspire.reconstruction import MeanEstimator
from aspire.source import OrientedSource, RelionSource

Expand All @@ -37,17 +47,29 @@
# %%
# Parameters
# ---------------
# Example simulation configuration.

n_imgs = None # Set to None for all images in starfile, can set smaller for tests.
img_size = 32 # Downsample the images/reconstruction to a desired resolution
n_classes = 500 # How many class averages to compute.
n_nbor = 100 # How many neighbors to stack
# Use of GPU is expected for a large configuration.
# If running on a less capable machine, or simply experimenting, it is
# strongly recommended to reduce ``img_size``, ``n_imgs``, and
# ``n_nbor``.

# Inputs
starfile_in = "10081/data/particle_stacks/data.star"
data_folder = "." # This depends on the specific starfile entries.
volume_output_filename = f"10081_abinitio_c{n_classes}_m{n_nbor}_{img_size}.mrc"
pixel_size = 1.3

# Config
n_imgs = None # Set to None for all images in starfile, can set smaller for tests.
img_size = 129 # Downsample the images/reconstruction to a desired resolution
n_classes = 5000 # How many class averages to compute.
n_nbor = 50 # How many neighbors to stack

# Outputs
preprocessed_fn = f"10081_preprocessed_{img_size}px.star"
class_avg_fn = f"10081_var_sorted_cls_avgs_m{n_nbor}_{img_size}px.star"
oriented_fn = f"10081_oriented_class_averages_{img_size}px.star"
volume_output_filename = f"10081_abinitio_c{n_classes}_m{n_nbor}_{img_size}.mrc"


# %%
# Source data and Preprocessing
Expand All @@ -66,20 +88,31 @@
symmetry_group="C4",
)

# Downsample the images
logger.info(f"Set the resolution to {img_size} X {img_size}")
src = src.downsample(img_size)

# Use phase_flip to attempt correcting for CTF.
logger.info("Perform phase flip to input images.")
src = src.phase_flip()
src = src.phase_flip().cache()

# Legacy MATLAB cropped the images to an odd resolution.
src = src.crop_pad(src.L - 1).cache()

# Downsample the images.
logger.info(f"Set the resolution to {img_size} X {img_size}")
src = src.legacy_downsample(img_size).cache()

# Normalize the background of the images.
src = src.legacy_normalize_background().cache()

# Estimate the noise and `Whiten` based on the estimated noise
aiso_noise_estimator = AnisotropicNoiseEstimator(src)
src = src.whiten(aiso_noise_estimator.filter)
# Estimate the noise and whiten based on the estimated noise.
src = src.legacy_whiten().cache()

# Caching is used for speeding up large datasets on high memory machines.
src = src.cache()
# Optionally invert image contrast.
logger.info("Invert the global density contrast")
src = src.invert_contrast().cache()

# Save the preprocessed images.
# These can be reused to experiment with later stages of the pipeline
# without repeating the preprocessing computations.
src.save(preprocessed_fn, save_mode="single", overwrite=True)

# %%
# Class Averaging
Expand All @@ -93,28 +126,36 @@
# Automatically configure parallel processing
avgs = LegacyClassAvgSource(src, n_nbor=n_nbor)

# We'll continue our pipeline with the first ``n_classes`` from ``avgs``.
avgs = avgs[:n_classes]
# Save the entire set of class averages to disk so they can be re-used.
avgs.save(class_avg_fn, save_mode="single", overwrite=True)

# Save off the set of class average images.
avgs.save("experimental_10081_class_averages.star")
# We'll continue our pipeline with the first ``n_classes`` from
# ``avgs``. The classes will be selected by the ``class_selector`` of a
# ``ClassAvgSource``, which in this case will be the class averages
# having the largest variance. Note global sorting requires computing
# all class averages, which is computationally intensive.
avgs = avgs[:n_classes].cache()

# %%
# Common Line Estimation
# ----------------------
#
# Next create a CL instance for estimating orientation of projections
# using the Common Line with Synchronization Voting method.
# Create a custom orientation estimation object for ``avgs``.
# Here we use the ``CLSymmetryC3C4`` algorithm, which is
# designed for molecules with C3 or C4 symmetry.

logger.info("Begin Orientation Estimation")

# Create a custom orientation estimation object for ``avgs``.
orient_est = CLSymmetryC3C4(avgs, symmetry="C4", n_theta=72, max_shift=0)
orient_est = CLSymmetryC3C4(avgs, symmetry="C4")

# Create an ``OrientedSource`` class instance that performs orientation
# estimation in a lazy fashion upon request of images or rotations.
oriented_src = OrientedSource(avgs, orient_est)

# Save off the selected set of class average images, along with the
# estimated orientations and shifts. These can be reused to
# experiment with alternative volume reconstructions.
oriented_src.save(oriented_fn, save_mode="single", overwrite=True)

# %%
# Volume Reconstruction
# ----------------------
Expand Down
122 changes: 122 additions & 0 deletions src/aspire/abinitio/J_sync.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#include <stdint.h>
#include <math.h>
#include "commonline_utils.h"

extern "C" __global__
void signs_times_v(int n, double* Rijs, const double* vec, double* new_vec, bool J_weighting)
{
/* thread index (1d), represents "i" index */
unsigned int i = blockDim.x * blockIdx.x + threadIdx.x;

/* no-op when out of bounds */
if(i >= n) return;

double c[4];
unsigned int j;
unsigned int k;
for(k=0;k<4;k++){c[k]=0;}
unsigned long ij, jk, ik;
int best_i;
double best_val;
double s_ij_jk, s_ik_jk, s_ij_ik;
double alt_ij_jk, alt_ij_ik, alt_ik_jk;

double *Rij, *Rjk, *Rik;
double JRijJ[9], JRjkJ[9], JRikJ[9];
double tmp[9];

int signs_confs[4][3];
for(int a=0; a<4; a++) { for(k=0; k<3; k++) { signs_confs[a][k]=1; } }
signs_confs[1][0]=-1; signs_confs[1][2]=-1;
signs_confs[2][0]=-1; signs_confs[2][1]=-1;
signs_confs[3][1]=-1; signs_confs[3][2]=-1;

/* initialize alternatives */
/* when we find the best J-configuration, we also compare it to the alternative 2nd best one.
* this comparison is done for every pair in the triplete independently. to make sure that the
* alternative is indeed different in relation to the pair, we document the differences between
* the configurations in advance:
* ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from
* best_conf in relation to pair */

int ALTS[2][4][3];
ALTS[0][0][0]=1; ALTS[0][1][0]=0; ALTS[0][2][0]=0; ALTS[0][3][0]=1;
ALTS[1][0][0]=2; ALTS[1][1][0]=3; ALTS[1][2][0]=3; ALTS[1][3][0]=2;
ALTS[0][0][1]=2; ALTS[0][1][1]=2; ALTS[0][2][1]=0; ALTS[0][3][1]=0;
ALTS[1][0][1]=3; ALTS[1][1][1]=3; ALTS[1][2][1]=1; ALTS[1][3][1]=1;
ALTS[0][0][2]=1; ALTS[0][1][2]=0; ALTS[0][2][2]=1; ALTS[0][3][2]=0;
ALTS[1][0][2]=3; ALTS[1][1][2]=2; ALTS[1][2][2]=3; ALTS[1][3][2]=2;


for(j=i+1; j< (n - 1); j++){
ij = PAIR_IDX(n, i, j);
for(k=j+1; k< n; k++){
ik = PAIR_IDX(n, i, k);
jk = PAIR_IDX(n, j, k);

/* compute configurations matches scores */
Rij = Rijs + 9*ij;
Rjk = Rijs + 9*jk;
Rik = Rijs + 9*ik;

JRJ(Rij, JRijJ);
JRJ(Rjk, JRjkJ);
JRJ(Rik, JRikJ);

mult_3x3(tmp, Rij, Rjk);
c[0] = diff_norm_3x3(tmp, Rik);

mult_3x3(tmp, JRijJ, Rjk);
c[1] = diff_norm_3x3(tmp, Rik);

mult_3x3(tmp, Rij, JRjkJ);
c[2] = diff_norm_3x3(tmp, Rik);

mult_3x3(tmp, Rij, Rjk);
c[3] = diff_norm_3x3(tmp, JRikJ);

/* find best match */
best_i=0; best_val=c[0];
if (c[1]<best_val) {best_i=1; best_val=c[1];}
if (c[2]<best_val) {best_i=2; best_val=c[2];}
if (c[3]<best_val) {best_i=3; best_val=c[3];}

/* set triangles entries to be signs */
s_ij_jk = signs_confs[best_i][0];
s_ik_jk = signs_confs[best_i][1];
s_ij_ik = signs_confs[best_i][2];

/* J weighting */
if(J_weighting){
/* for each triangle side, find the best alternative */
alt_ij_jk = c[ALTS[0][best_i][0]];
if (c[ALTS[1][best_i][0]] < alt_ij_jk){
alt_ij_jk = c[ALTS[1][best_i][0]];
}

alt_ik_jk = c[ALTS[0][best_i][1]];
if (c[ALTS[1][best_i][1]] < alt_ik_jk){
alt_ik_jk = c[ALTS[1][best_i][1]];
}
alt_ij_ik = c[ALTS[0][best_i][2]];
if (c[ALTS[1][best_i][2]] < alt_ij_ik){
alt_ij_ik = c[ALTS[1][best_i][2]];
}

/* Update scores */
s_ij_jk *= 1 - sqrt(best_val / alt_ij_jk);
s_ik_jk *= 1 - sqrt(best_val / alt_ik_jk);
s_ij_ik *= 1 - sqrt(best_val / alt_ij_ik);
}


/* update multiplication */
atomicAdd(&(new_vec[ij]), s_ij_jk*vec[jk] + s_ij_ik*vec[ik]);
atomicAdd(&(new_vec[jk]), s_ij_jk*vec[ij] + s_ik_jk*vec[ik]);
atomicAdd(&(new_vec[ik]), s_ij_ik*vec[ij] + s_ik_jk*vec[jk]);

} /* k */
} /* j */

return;
};
Loading
Loading