diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml index d15284d5e..fed58023e 100644 --- a/.github/workflows/pipeline.yaml +++ b/.github/workflows/pipeline.yaml @@ -19,7 +19,7 @@ on: type: string num_workers: description: "Number of parallel H5 workers" - default: "8" + default: "50" type: string skip_national: description: "Skip national calibration/H5" @@ -49,7 +49,7 @@ jobs: GPU="${{ inputs.gpu || 'T4' }}" EPOCHS="${{ inputs.epochs || '1000' }}" NATIONAL_EPOCHS="${{ inputs.national_epochs || '4000' }}" - NUM_WORKERS="${{ inputs.num_workers || '8' }}" + NUM_WORKERS="${{ inputs.num_workers || '50' }}" SKIP_NATIONAL="${{ inputs.skip_national || 'false' }}" python -c " diff --git a/modal_app/local_area.py b/modal_app/local_area.py index 4d5d847bf..5fee7e73f 100644 --- a/modal_app/local_area.py +++ b/modal_app/local_area.py @@ -293,8 +293,9 @@ def run_phase( "/pipeline": pipeline_volume, }, memory=16384, - cpu=4.0, + cpu=1.0, timeout=28800, + max_containers=50, nonpreemptible=True, ) def build_areas_worker( @@ -618,7 +619,7 @@ def promote_publish(branch: str = "main", version: str = "", run_id: str = "") - ) def coordinate_publish( branch: str = "main", - num_workers: int = 8, + num_workers: int = 50, skip_upload: bool = False, n_clones: int = 430, validate: bool = True, @@ -877,7 +878,7 @@ def coordinate_publish( @app.local_entrypoint() def main( branch: str = "main", - num_workers: int = 8, + num_workers: int = 50, skip_upload: bool = False, n_clones: int = 430, run_id: str = "", diff --git a/modal_app/pipeline.py b/modal_app/pipeline.py index 95d293d81..62ca398e4 100644 --- a/modal_app/pipeline.py +++ b/modal_app/pipeline.py @@ -606,7 +606,7 @@ def run_pipeline( epochs: int = 1000, national_gpu: str = "T4", national_epochs: int = 4000, - num_workers: int = 8, + num_workers: int = 50, n_clones: int = 430, skip_national: bool = False, resume_run_id: str = None, @@ -1268,7 +1268,7 @@ def main( epochs: int = 1000, national_gpu: str = "T4", national_epochs: int = 4000, - num_workers: int = 8, + num_workers: int = 50, n_clones: int = 430, skip_national: bool = False, clear_checkpoints: bool = False,