Skip to content

Commit 7652ce4

Browse files
committed
feat(vmm): add OCI registry image discovery and pull support
Add ability for VMM to discover available guest images from an OCI registry and pull them on-demand through the web UI. Images are pulled in the background with status tracked server-side, surviving page refreshes. The UI auto-refreshes every 3s while the registry panel is open. - New `image_registry` config field (e.g., "cr.kvin.wang/dstack/guest-image") - New RPC: ListRegistryImages, PullRegistryImage - Registry module: list tags via Docker Registry HTTP API v2, pull and extract via `docker export` - Background pull with pulling state in App memory - UI: Image Registry button + dialog with pull/status per tag
1 parent d46ba28 commit 7652ce4

File tree

12 files changed

+790
-16
lines changed

12 files changed

+790
-16
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vmm/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ fatfs.workspace = true
5555
fscommon.workspace = true
5656
or-panic.workspace = true
5757
url.workspace = true
58+
reqwest.workspace = true
59+
flate2.workspace = true
60+
tar.workspace = true
5861

5962
[dev-dependencies]
6063
insta.workspace = true

vmm/rpc/proto/vmm_rpc.proto

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,11 @@ service Vmm {
350350
rpc SvStop(Id) returns (google.protobuf.Empty);
351351
// Remove a stopped supervisor process by ID.
352352
rpc SvRemove(Id) returns (google.protobuf.Empty);
353+
354+
// List images available in the configured OCI registry.
355+
rpc ListRegistryImages(google.protobuf.Empty) returns (RegistryImageListResponse);
356+
// Pull an image from the OCI registry to local storage.
357+
rpc PullRegistryImage(PullRegistryImageRequest) returns (google.protobuf.Empty);
353358
}
354359

355360
// DHCP lease event reported by the host DHCP server.
@@ -365,6 +370,29 @@ message SvListResponse {
365370
repeated SvProcessInfo processes = 1;
366371
}
367372

373+
// Available images discovered from the OCI registry.
374+
message RegistryImageListResponse {
375+
repeated RegistryImageInfo images = 1;
376+
}
377+
378+
// Metadata for an image tag in the OCI registry.
379+
message RegistryImageInfo {
380+
// Tag name (e.g., "0.5.8", "nvidia-0.5.8")
381+
string tag = 1;
382+
// Whether this image is already downloaded locally
383+
bool local = 2;
384+
// Whether this image is currently being pulled
385+
bool pulling = 3;
386+
// Error message from the last failed pull attempt (empty if no error)
387+
string error = 4;
388+
}
389+
390+
// Request to pull an image from the OCI registry.
391+
message PullRegistryImageRequest {
392+
// Tag to pull (e.g., "0.5.8")
393+
string tag = 1;
394+
}
395+
368396
// Information about a single supervisor process.
369397
message SvProcessInfo {
370398
string id = 1;

vmm/src/app.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ pub use qemu::{VmConfig, VmWorkDir};
3535
mod id_pool;
3636
mod image;
3737
mod qemu;
38+
pub(crate) mod registry;
3839

3940
#[derive(Deserialize, Serialize, Debug, Clone)]
4041
pub struct PortMapping {
@@ -118,12 +119,20 @@ pub struct GpuSpec {
118119
pub slot: String,
119120
}
120121

122+
#[derive(Clone, Debug)]
123+
pub(crate) enum PullStatus {
124+
Pulling,
125+
Failed(String),
126+
}
127+
121128
#[derive(Clone)]
122129
pub struct App {
123130
pub config: Arc<Config>,
124131
pub supervisor: SupervisorClient,
125132
state: Arc<Mutex<AppState>>,
126133
forward_service: Arc<tokio::sync::Mutex<ForwardService>>,
134+
/// Pull status for registry images: tag → status.
135+
pub(crate) pull_status: Arc<Mutex<std::collections::HashMap<String, PullStatus>>>,
127136
}
128137

129138
impl App {
@@ -152,6 +161,7 @@ impl App {
152161
})),
153162
config: Arc::new(config),
154163
forward_service: Arc::new(tokio::sync::Mutex::new(ForwardService::new())),
164+
pull_status: Arc::new(Mutex::new(std::collections::HashMap::new())),
155165
}
156166
}
157167

@@ -172,7 +182,7 @@ impl App {
172182
{
173183
bail!("Invalid image name");
174184
}
175-
let image_path = self.config.image_path.join(&manifest.image);
185+
let image_path = self.config.image.path.join(&manifest.image);
176186
let image = Image::load(&image_path).context("Failed to load image")?;
177187
let vm_id = manifest.id.clone();
178188
let app_compose = vm_work_dir
@@ -739,7 +749,7 @@ impl App {
739749
{
740750
bail!("Invalid image name");
741751
}
742-
let image_path = self.config.image_path.join(&manifest.image);
752+
let image_path = self.config.image.path.join(&manifest.image);
743753
let image = Image::load(&image_path).context("Failed to load image")?;
744754
let vm_id = manifest.id.clone();
745755
let already_running = cids_assigned.contains_key(&vm_id);
@@ -854,7 +864,7 @@ impl App {
854864
}
855865

856866
pub fn list_images(&self) -> Result<Vec<(String, ImageInfo)>> {
857-
let image_path = self.config.image_path.clone();
867+
let image_path = self.config.image.path.clone();
858868
let images = fs::read_dir(image_path).context("Failed to read image directory")?;
859869
Ok(images
860870
.flat_map(|entry| {
@@ -1115,7 +1125,7 @@ fn rotate_serial_log(work_dir: &VmWorkDir, max_bytes: u64) {
11151125
}
11161126

11171127
pub(crate) fn make_sys_config(cfg: &Config, manifest: &Manifest) -> Result<String> {
1118-
let image_path = cfg.image_path.join(&manifest.image);
1128+
let image_path = cfg.image.path.join(&manifest.image);
11191129
let image = Image::load(image_path).context("Failed to load image info")?;
11201130
let img_ver = image.info.version_tuple().unwrap_or((0, 0, 0));
11211131
let kms_urls = if manifest.kms_urls.is_empty() {

0 commit comments

Comments
 (0)