From 466e1256efaae032ea25a7a9dfd8db5e35832ea1 Mon Sep 17 00:00:00 2001 From: Chen Date: Sun, 15 Mar 2026 01:08:37 -0500 Subject: [PATCH] cluster bash script, yaml for conda env, docs --- clustersetup.md | 41 ++++++++++++++++++++++++++ pyhealth_cluster_env.yml | 63 ++++++++++++++++++++++++++++++++++++++++ setup.sh | 53 +++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100644 clustersetup.md create mode 100644 pyhealth_cluster_env.yml create mode 100644 setup.sh diff --git a/clustersetup.md b/clustersetup.md new file mode 100644 index 000000000..93c3af445 --- /dev/null +++ b/clustersetup.md @@ -0,0 +1,41 @@ +# Running PyHealth on the Campus Cluster + +## Quick Start + +```bash +git clone https://github.com/Multimodal-PyHealth/PyHealth.git +chmod +x setup.sh +./setup.sh +``` + +## Data Paths + +All MIMIC-4 data is under `/projects/illinois/eng/cs/jimeng/physionet.org/files/`: + +| Data | Path | +| EHR | `/projects/illinois/eng/cs/jimeng/physionet.org/files/mimiciv/2.2` | +| Clinical Notes | `/projects/illinois/eng/cs/jimeng/physionet.org/files/mimic-note` | +| Chest X-rays | `/projects/illinois/eng/cs/jimeng/physionet.org/files/mimic-cxr-jpg/2.1.0` | + +**Important:** `NOTE_ROOT` should be `.../mimic-note` (not `.../mimic-note/note`). The config YAML appends `note/` automatically. + +Set `CACHE_DIR` to your own writable directory: `/u//pyhealth_cache` + +## Running on a Compute Node + +Run on a compute node: +Slurm command example: +```bash +srun --account=jimeng-cs-eng --partition=eng-research-gpu --time=00:10:00 --gres=gpu:1 --pty bash +``` + +Once on the compute node, re-activate and run: + +```bash +module load miniconda3/24.9.2 +conda activate pyhealth2 +cd ~/PyHealth +python examples/mortality_prediction/multimodal_mimic4.py +``` + + diff --git a/pyhealth_cluster_env.yml b/pyhealth_cluster_env.yml new file mode 100644 index 000000000..5ea5d60a1 --- /dev/null +++ b/pyhealth_cluster_env.yml @@ -0,0 +1,63 @@ +name: pyhealth2 +channels: + - pytorch + - nvidia + - conda-forge + - defaults + +dependencies: + # ── Core Python (PyHealth 2.0 requires >=3.12, <3.14) ── + - python=3.12 + + # ── PyTorch + CUDA ────────────────────────────────────── + # Run `nvidia-smi` on the cluster to check your CUDA version. + # Cluster has CUDA driver 13.0, so cu126 runtime is compatible. + # Pinning both to same minor version avoids mismatch errors. + - pytorch=2.7.1 + - torchvision=0.22.1 + - pytorch-cuda=12.6 + + # ── Scientific stack ──────────────────────────────────── + - numpy>=2.0 + - scipy + - scikit-learn + - pandas + - pyarrow>=15.0 + - dask>=2024.1.0 + + # ── Build / utility ───────────────────────────────────── + - pip + - tqdm + - networkx + - packaging + - pydantic>=2.0 + - urllib3>=2.0 + + # ── Jupyter (for notebook dev on cluster) ─────────────── + - jupyterlab + - ipykernel + - ipywidgets + + # ── pip-only packages ─────────────────────────────────── + - pip: + # PyHealth: editable install from the forked repo clone + # Run this AFTER creating the env: + # conda activate pyhealth2 + # cd ~/PyHealth + # pip install -e . + # + # Dependencies that pyproject.toml pulls in: + - polars>=1.30 + - litdata>=0.2 + - narwhals>=1.0 + - more-itertools>=10.0 + - einops>=0.8.0 + - linear-attention-transformer>=0.19.1 + - pandarallel>=1.6 + - mne>=1.8 + - transformers>=4.40 + - accelerate + - peft + - rdkit + - ogb>=1.3.5 + - xgboost \ No newline at end of file diff --git a/setup.sh b/setup.sh new file mode 100644 index 000000000..18b59eafa --- /dev/null +++ b/setup.sh @@ -0,0 +1,53 @@ +# PyHealth Cluster Environment Setup +# Usage: ./setup.sh + +set -e + +ENV_NAME="pyhealth2" +ENV_FILE="pyhealth_cluster_env.yml" + +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo -e "${GREEN} PyHealth Cluster Setup${NC}" + +# Load miniconda +if ! command -v conda &> /dev/null; then + echo -e "${YELLOW}Loading miniconda module...${NC}" + module load miniconda3/24.9.2 + source ~/.bashrc +fi + +# Create or skip conda env +if conda info --envs 2>/dev/null | grep -q "${ENV_NAME}"; then + echo -e "${GREEN}Conda environment '${ENV_NAME}' already exists. Skipping creation.${NC}" +else + echo -e "${YELLOW}Creating conda environment '${ENV_NAME}'...${NC}" + if [ ! -f "$ENV_FILE" ]; then + echo "Error: ${ENV_FILE} not found. Make sure you're in the PyHealth repo root." + exit 1 + fi + conda env create -f ${ENV_FILE} +fi + +# Activate env +eval "$(conda shell.bash hook)" +conda activate ${ENV_NAME} +echo -e "${GREEN}Activated ${ENV_NAME} (Python: $(python --version))${NC}" + +# Install PyHealth editable if needed +if python -c "import pyhealth" 2>/dev/null; then + echo -e "${GREEN}PyHealth already installed. Skipping.${NC}" +else + echo -e "${YELLOW}Installing PyHealth in editable mode...${NC}" + pip install -e . +fi + +# Verify +echo "" +echo -e "${GREEN}Verification:${NC}" +python -c "import pyhealth; print(f' PyHealth: OK')" +python -c "import torch; print(f' PyTorch {torch.__version__}, CUDA available: {torch.cuda.is_available()}')" +echo "" +echo -e "${GREEN}Setup complete!${NC}" \ No newline at end of file