Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ee52090
Modify tentivirus to lentivirus in alembic file.
EstelleDa Mar 3, 2025
acc7b7a
Merge pull request #393 from VariantEffect/estelle/modifyTentivirus
EstelleDa Mar 3, 2025
ae9f2ff
Debug UnicodeDecodeError problem and add a related test. Remove unnec…
EstelleDa Mar 4, 2025
f55d8d4
Add statistics router for counts of published records
bencap Mar 4, 2025
b8b2b06
Remove extraneous `f` prefix
EstelleDa Mar 4, 2025
77cf4f7
Merge pull request #396 from VariantEffect/estelle/debugUploadFilesUn…
EstelleDa Mar 5, 2025
8076cf5
Refactor statistics endpoints for simplicity
bencap Mar 5, 2025
e407383
Script for populating mapped gene targets via ClinGen
bencap Mar 6, 2025
9acd2ab
Statistics count endpoints for variants, mapped variants, and mapped …
bencap Mar 6, 2025
c6a53ba
Test utility for inserting mapped variants to DB
bencap Mar 6, 2025
9aedb21
Add test for statistics mapped target gene counts
bencap Mar 6, 2025
5153c34
Merge pull request #397 from VariantEffect/feature/bencap/375/ave-sta…
bencap Mar 13, 2025
9ad50d8
Fixes #358: Literal 'null' inserted for NULL score ranges.
bencap Mar 7, 2025
46b7877
Merge pull request #399 from VariantEffect/bugfix/bencap/358/null-sco…
bencap Mar 14, 2025
d1f097d
(Materialized) view utilities
bencap Mar 14, 2025
f8c6b73
Add 'alembic-utils' to help manage materialized view migrations
bencap Mar 14, 2025
4afc810
Require refresh requests to provide an active session
bencap Mar 14, 2025
4f53e68
Create materialized view for variant publication dates
bencap Mar 14, 2025
44a25e5
Use published variants materialized view for statistics dashboard
bencap Mar 14, 2025
52e5e05
Add async materialized view refresh jobs
bencap Mar 18, 2025
d140d50
Bump python-jose to appease dependabot
bencap Mar 18, 2025
4deebf0
Add Additional Statistics Routes
bencap Mar 14, 2025
5bbd315
Merge pull request #402 from VariantEffect/feature/bencap/sa-mat-views
bencap Mar 18, 2025
00953ec
Merge pull request #404 from VariantEffect/feature/bencap/additional-…
bencap Mar 18, 2025
418762f
Cron timer in USA AM via UTC
bencap Mar 19, 2025
7c809fb
Bump version number
bencap Mar 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion alembic/versions/9702d32bacb3_controlled_keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def upgrade():
"""INSERT INTO controlled_keywords (key, value, vocabulary, special, description, creation_date, modification_date) VALUES ('Delivery method', 'Adeno-associated virus transduction', NULL, False, 'How the variant library was delivered to the model system for phenotype evaluation.', NOW(), NOW())"""
)
op.execute(
"""INSERT INTO controlled_keywords (key, value, vocabulary, special, description, creation_date, modification_date) VALUES ('Delivery method', 'Tentivirus transduction', NULL, False, 'How the variant library was delivered to the model system for phenotype evaluation.', NOW(), NOW())"""
"""INSERT INTO controlled_keywords (key, value, vocabulary, special, description, creation_date, modification_date) VALUES ('Delivery method', 'Lentivirus transduction', NULL, False, 'How the variant library was delivered to the model system for phenotype evaluation.', NOW(), NOW())"""
)
op.execute(
"""INSERT INTO controlled_keywords (key, value, vocabulary, special, description, creation_date, modification_date) VALUES ('Delivery method', 'Chemical or heat shock transformation', NULL, False, 'How the variant library was delivered to the model system for phenotype evaluation.', NOW(), NOW())"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""materialized view for variant statistics
Revision ID: b85bc7b1bec7
Revises: c404b6719110
Create Date: 2025-03-14 01:53:19.898198
"""

from alembic import op
from alembic_utils.pg_materialized_view import PGMaterializedView
from sqlalchemy.dialects import postgresql

from mavedb.models.published_variant import signature, definition


# revision identifiers, used by Alembic.
revision = "b85bc7b1bec7"
down_revision = "c404b6719110"
branch_labels = None
depends_on = None


def upgrade():
op.create_entity(
PGMaterializedView(
schema="public",
signature=signature,
definition=definition.compile(dialect=postgresql.dialect()).string,
with_data=True,
)
)
op.create_index(
f"idx_{signature}_variant_id",
signature,
["variant_id"],
unique=False,
)
op.create_index(
f"idx_{signature}_variant_urn",
signature,
["variant_urn"],
unique=False,
)
op.create_index(
f"idx_{signature}_score_set_id",
signature,
["score_set_id"],
unique=False,
)
op.create_index(
f"idx_{signature}_score_set_urn",
signature,
["score_set_urn"],
unique=False,
)
op.create_index(
f"idx_{signature}_mapped_variant_id",
signature,
["mapped_variant_id"],
unique=True,
)


def downgrade():
op.drop_index(f"idx_{signature}_variant_id", signature)
op.drop_index(f"idx_{signature}_variant_urn", signature)
op.drop_index(f"idx_{signature}_mapped_variant_id", signature)
op.drop_index(f"idx_{signature}_score_set_id", signature)
op.drop_index(f"idx_{signature}_score_set_urn", signature)
op.drop_entity(
PGMaterializedView(
schema="public",
signature=signature,
definition=definition.compile(dialect=postgresql.dialect()).string,
with_data=True,
)
)
2,524 changes: 1,293 additions & 1,231 deletions poetry.lock

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "mavedb"
version = "2025.1.0"
version = "2025.1.1"
description = "API for MaveDB, the database of Multiplexed Assays of Variant Effect."
license = "AGPL-3.0-only"
readme = "README.md"
Expand Down Expand Up @@ -41,6 +41,7 @@ SQLAlchemy = "~2.0.0"

# Optional dependencies for running this application as a server
alembic = { version = "~1.7.6", optional = true }
alembic-utils = { version = "0.8.1", optional = true }
arq = { version = "~0.25.0", optional = true }
authlib = { version = "~1.3.1", optional = true }
boto3 = { version = "~1.34.97", optional = true }
Expand All @@ -51,7 +52,7 @@ fastapi = { version = "~0.95.0", optional = true }
hgvs = { version = "~1.5.4", optional = true }
orcid = { version = "~1.0.3", optional = true }
psycopg2 = { version = "~2.9.3", optional = true }
python-jose = { extras = ["cryptography"], version = "~3.3.0", optional = true }
python-jose = { extras = ["cryptography"], version = "~3.4.0", optional = true }
python-multipart = { version = "~0.0.5", optional = true }
requests = { version = "~2.32.0", optional = true }
starlette = { version = "~0.27.0", optional = true }
Expand Down Expand Up @@ -85,7 +86,7 @@ SQLAlchemy = { extras = ["mypy"], version = "~2.0.0" }


[tool.poetry.extras]
server = ["alembic", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "python-jose", "python-multipart", "requests", "starlette", "starlette-context", "slack-sdk", "uvicorn", "watchtower"]
server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", "cdot", "cryptography", "fastapi", "hgvs", "orcid", "psycopg2", "python-jose", "python-multipart", "requests", "starlette", "starlette-context", "slack-sdk", "uvicorn", "watchtower"]


[tool.mypy]
Expand Down
2 changes: 1 addition & 1 deletion src/mavedb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
logger = module_logging.getLogger(__name__)

__project__ = "mavedb-api"
__version__ = "2025.1.0"
__version__ = "2025.1.1"

logger.info(f"MaveDB {__version__}")
146 changes: 146 additions & 0 deletions src/mavedb/db/view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""
Utilities for managing views via SQLAlchemy.
"""

from functools import partial

import sqlalchemy as sa
from sqlalchemy.ext import compiler
from sqlalchemy.schema import DDLElement, MetaData
from sqlalchemy.orm import Session

from mavedb.db.base import Base

# See: https://github.com/sqlalchemy/sqlalchemy/wiki/Views, https://github.com/jeffwidman/sqlalchemy-postgresql-materialized-views?tab=readme-ov-file


class CreateView(DDLElement):
def __init__(self, name: str, selectable: sa.Select, materialized: bool):
self.name = name
self.selectable = selectable
self.materialized = materialized


class DropView(DDLElement):
def __init__(self, name: str, materialized: bool):
self.name = name
self.materialized = materialized


class MaterializedView(Base):
__abstract__ = True

@classmethod
def refresh(cls, connection, concurrently=True):
"""Refresh this materialized view."""
refresh_mat_view(connection, cls.__table__.fullname, concurrently)


@compiler.compiles(CreateView)
def _create_view(element: CreateView, compiler, **kw):
return "CREATE %s %s AS %s" % (
"MATERIALIZED VIEW" if element.materialized else "VIEW",
element.name,
compiler.sql_compiler.process(element.selectable, literal_binds=True),
)


@compiler.compiles(DropView)
def _drop_view(element: DropView, compiler, **kw):
return "DROP %s %s" % ("MATERIALIZED VIEW" if element.materialized else "VIEW", element.name)


def view_exists(ddl: CreateView, target, connection: sa.Connection, materialized: bool, **kw):
inspector = sa.inspect(connection)
if inspector is None:
return False

view_names = inspector.get_materialized_view_names() if ddl.materialized else inspector.get_view_names()
return ddl.name in view_names


def view_doesnt_exist(ddl: CreateView, target, connection: sa.Connection, materialized: bool, **kw):
return not view_exists(ddl, target, connection, materialized, **kw)


def view(name: str, selectable: sa.Select, metadata: MetaData = Base.metadata, materialized: bool = False):
"""
Register a view or materialized view to SQLAlchemy. Use this function to define a view on some arbitrary
model class.

```
class MyView(Base):
__table__ = view(
"my_view",
select(
MyModel.id.label("id"),
MyModel.name.label("name"),
),
materialized=False,
)
```

When registered in this manner, SQLAlchemy will create and destroy the view along with other tables. You can
then query this view as if it were an ORM object.

```
results = db.query(select(MyView.col1).where(MyView.col2)).all()
```
"""
t = sa.table(
name,
*(sa.Column(c.name, c.type, primary_key=c.primary_key) for c in selectable.selected_columns),
)
t.primary_key.update(c for c in t.c if c.primary_key) # type: ignore

# TODO: Figure out indices.
if materialized:
sa.event.listen(
metadata,
"after_create",
CreateView(name, selectable, True).execute_if(callable_=partial(view_doesnt_exist, materialized=True)),
)
sa.event.listen(
metadata,
"before_drop",
DropView(name, True).execute_if(callable_=partial(view_exists, materialized=True)),
)

else:
sa.event.listen(
metadata,
"after_create",
CreateView(name, selectable, False).execute_if(callable_=partial(view_doesnt_exist, materialized=False)),
)
sa.event.listen(
metadata,
"before_drop",
DropView(name, False).execute_if(callable_=partial(view_exists, materialized=False)),
)

return t


def refresh_mat_view(session: Session, name: str, concurrently=True):
"""
Refreshes a single materialized view, given by `name`.
"""
# since session.execute() bypasses autoflush, must manually flush in order
# to include newly-created/modified objects in the refresh
session.flush()
_con = "CONCURRENTLY " if concurrently else ""
session.execute(sa.text("REFRESH MATERIALIZED VIEW " + _con + name))


def refresh_all_mat_views(session: Session, concurrently=True):
"""
Refreshes all materialized views. Views are refreshed in non-deterministic order,
so view definitions can't depend on each other.
"""
inspector = sa.inspect(session.connection())

if not inspector:
return

for mv in inspector.get_materialized_view_names():
refresh_mat_view(session, mv, concurrently)
1 change: 1 addition & 0 deletions src/mavedb/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"license",
"mapped_variant",
"publication_identifier",
"published_variant",
"raw_read_identifier",
"refseq_identifier",
"refseq_offset",
Expand Down
45 changes: 45 additions & 0 deletions src/mavedb/models/published_variant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from sqlalchemy import select, join

from mavedb.db.view import MaterializedView, view

from mavedb.models.score_set import ScoreSet
from mavedb.models.variant import Variant
from mavedb.models.mapped_variant import MappedVariant


signature = "published_variants_materialized_view"
definition = (
select(
Variant.id.label("variant_id"),
Variant.urn.label("variant_urn"),
MappedVariant.id.label("mapped_variant_id"),
ScoreSet.id.label("score_set_id"),
ScoreSet.urn.label("score_set_urn"),
ScoreSet.published_date.label("published_date"),
MappedVariant.current.label("current_mapped_variant"),
)
.select_from(
join(Variant, MappedVariant, Variant.id == MappedVariant.variant_id, isouter=True).join(
ScoreSet, ScoreSet.id == Variant.score_set_id
)
)
.where(
ScoreSet.published_date.is_not(None),
)
)


class PublishedVariantsMV(MaterializedView):
__table__ = view(
signature,
definition,
materialized=True,
)

variant_id = __table__.c.variant_id
variant_urn = __table__.c.variant_urn
mapped_variant_id = __table__.c.mapped_variant_id
score_set_id = __table__.c.score_set_id
score_set_urn = __table__.c.score_set_urn
published_date = __table__.c.published_date
current_mapped_variant = __table__.c.current_mapped_variant
Loading
Loading