From 81437647c46601a44c2109111aaea9e9da4e9200 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Mon, 8 Dec 2025 17:03:34 -0800 Subject: [PATCH 01/17] Add int_concurrent_frames_limit to host This field limits the number of concurrent frames allowed to run on a specific host. --- .../ddl/postgres/migrations/V35__Add_host_frame_limit.sql | 4 ++++ rust/crates/scheduler/src/dao/host_dao.rs | 6 +++++- rust/crates/scheduler/src/host_cache/cache.rs | 1 + rust/crates/scheduler/src/host_cache/store.rs | 1 + rust/crates/scheduler/src/models/host.rs | 3 +++ rust/crates/scheduler/src/pipeline/dispatcher/actor.rs | 5 +++++ 6 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql new file mode 100644 index 000000000..0734d530e --- /dev/null +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql @@ -0,0 +1,4 @@ +-- Add a field to limit the max amount of concurrent frames a host can run +-- -1 means no limit +alter table host + add int_concurrent_frames_limit INT NOT NULL DEFAULT -1; diff --git a/rust/crates/scheduler/src/dao/host_dao.rs b/rust/crates/scheduler/src/dao/host_dao.rs index fe77a4990..b842564c9 100644 --- a/rust/crates/scheduler/src/dao/host_dao.rs +++ b/rust/crates/scheduler/src/dao/host_dao.rs @@ -59,6 +59,7 @@ pub struct HostModel { // Number of cores available at the subscription of the show this host has been queried on int_alloc_available_cores: i64, ts_ping: DateTime, + int_concurrent_frames_limit: i64, } impl From for Host { @@ -93,6 +94,8 @@ impl From for Host { alloc_id: parse_uuid(&val.pk_alloc), alloc_name: val.str_alloc_name, last_updated: val.ts_ping, + concurrent_frames_limit: (val.int_concurrent_frames_limit >= 0) + .then_some(val.int_concurrent_frames_limit as u32), } } } @@ -118,7 +121,8 @@ SELECT DISTINCT s.int_burst - s.int_cores as int_alloc_available_cores, a.pk_alloc, a.str_name as str_alloc_name, - hs.ts_ping + hs.ts_ping, + h.int_concurrent_frames_limit FROM host h INNER JOIN host_stat hs ON h.pk_host = hs.pk_host INNER JOIN alloc a ON h.pk_alloc = a.pk_alloc diff --git a/rust/crates/scheduler/src/host_cache/cache.rs b/rust/crates/scheduler/src/host_cache/cache.rs index 487e75281..63adf6211 100644 --- a/rust/crates/scheduler/src/host_cache/cache.rs +++ b/rust/crates/scheduler/src/host_cache/cache.rs @@ -355,6 +355,7 @@ mod tests { alloc_id: Uuid::new_v4(), alloc_name: "test".to_string(), last_updated: Utc::now(), + concurrent_frames_limit: None, } } diff --git a/rust/crates/scheduler/src/host_cache/store.rs b/rust/crates/scheduler/src/host_cache/store.rs index 9653e71e1..8076c8ade 100644 --- a/rust/crates/scheduler/src/host_cache/store.rs +++ b/rust/crates/scheduler/src/host_cache/store.rs @@ -388,6 +388,7 @@ mod tests { alloc_id: Uuid::new_v4(), alloc_name: "test".to_string(), last_updated, + concurrent_frames_limit: None, } } diff --git a/rust/crates/scheduler/src/models/host.rs b/rust/crates/scheduler/src/models/host.rs index 24da1e222..7fd1bdbf6 100644 --- a/rust/crates/scheduler/src/models/host.rs +++ b/rust/crates/scheduler/src/models/host.rs @@ -24,6 +24,7 @@ pub struct Host { pub(crate) alloc_id: Uuid, pub(crate) alloc_name: String, pub(crate) last_updated: DateTime, + pub(crate) concurrent_frames_limit: Option, } impl Host { @@ -63,6 +64,7 @@ impl Host { alloc_available_cores: CoreSize, alloc_id: Uuid, alloc_name: String, + concurrent_frames_limit: Option, ) -> Self { Self { id, @@ -79,6 +81,7 @@ impl Host { alloc_id, alloc_name, last_updated: Local::now().with_timezone(&Utc), + concurrent_frames_limit, } } } diff --git a/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs b/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs index 2222ae4bf..38e0a3683 100644 --- a/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs +++ b/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs @@ -1028,6 +1028,7 @@ mod tests { CoreSize(4), Uuid::new_v4(), "test-alloc".to_string(), + None, ) } @@ -1160,6 +1161,7 @@ mod tests { CoreSize(4), Uuid::new_v4(), "test-alloc".to_string(), + None, ); let mut frame = create_test_dispatch_frame(); @@ -1196,6 +1198,7 @@ mod tests { CoreSize(4), Uuid::new_v4(), "test-alloc".to_string(), + None, ); let mut frame = create_test_dispatch_frame(); @@ -1232,6 +1235,7 @@ mod tests { CoreSize(8), Uuid::new_v4(), "test-alloc".to_string(), + None, ); let mut frame = create_test_dispatch_frame(); @@ -1267,6 +1271,7 @@ mod tests { CoreSize(8), Uuid::new_v4(), "test-alloc".to_string(), + None, ); let mut frame = create_test_dispatch_frame(); From b30c886b5fa498914cb5ec095b8e1e6e86e006f8 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 17 Dec 2025 10:55:45 -0800 Subject: [PATCH 02/17] [scheduler/cuebot] Introduce booking by slot to scheduler This commit is the first step towards the goal of allowing a new booking mode that doesn't take cores and memory into consideration, but a predefined limit on how many concurrent frames a host is allowed to run. Rationale: Booking by slot is useful for pipelines where frames are small and limited not by their cpu/memory consumption but by other resources like storage bandwith or network availability. In these scenarios, limiting the concurrency is more important than the resource consumption. --- .../com/imageworks/spcue/dao/HostDao.java | 2 +- .../spcue/dao/postgres/HostDaoJdbc.java | 7 +- .../spcue/dispatcher/HostReportHandler.java | 2 +- .../imageworks/spcue/service/HostManager.java | 2 +- .../spcue/service/HostManagerService.java | 4 +- .../migrations/V35__Add_host_frame_limit.sql | 7 +- .../spcue/test/dao/postgres/HostDaoTests.java | 2 +- rust/crates/scheduler/src/dao/host_dao.rs | 17 ++- rust/crates/scheduler/src/host_cache/actor.rs | 17 +-- rust/crates/scheduler/src/host_cache/cache.rs | 132 ++++++++++++++---- .../scheduler/src/host_cache/messages.rs | 13 +- rust/crates/scheduler/src/host_cache/store.rs | 3 +- rust/crates/scheduler/src/models/host.rs | 6 +- rust/crates/scheduler/src/pipeline/matcher.rs | 15 +- 14 files changed, 162 insertions(+), 67 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java index dfd0397a2..5354dd9b8 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java @@ -260,7 +260,7 @@ public interface HostDao { */ void updateHostStats(HostInterface host, long totalMemory, long freeMemory, long totalSwap, long freeSwap, long totalMcp, long freeMcp, long totalGpuMemory, long freeGpuMemory, - int load, Timestamp bootTime, String os); + int load, Timestamp bootTime, String os, int runningProcs); /** * Return true if the HardwareState is Up, false if it is anything else. diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java index 862e1c459..e220d98ed 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java @@ -396,13 +396,14 @@ public CallableStatement createCallableStatement(Connection con) throws SQLExcep + " ts_booted = ?, " + " ts_ping = current_timestamp, " + " str_os = ? " + + " int_running_procs = ? " + "WHERE " + " pk_host = ?"; @Override public void updateHostStats(HostInterface host, long totalMemory, long freeMemory, long totalSwap, long freeSwap, long totalMcp, long freeMcp, long totalGpuMemory, - long freeGpuMemory, int load, Timestamp bootTime, String os) { + long freeGpuMemory, int load, Timestamp bootTime, String os, int runningProcs) { if (os == null) { os = Dispatcher.OS_DEFAULT; @@ -410,7 +411,7 @@ public void updateHostStats(HostInterface host, long totalMemory, long freeMemor getJdbcTemplate().update(UPDATE_RENDER_HOST, totalMemory, freeMemory, totalSwap, freeSwap, totalMcp, freeMcp, totalGpuMemory, freeGpuMemory, load, bootTime, os, - host.getHostId()); + runningProcs, host.getHostId()); } @Override @@ -631,7 +632,7 @@ public boolean isNimbyHost(HostInterface h) { /** * Checks if the passed in name looks like a fully qualified domain name. If so, returns the * hostname without the domain. Otherwise returns the passed in name unchanged. - * + * * @param fqdn - String * @return String - hostname */ diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java index 7bd73ef3d..86951fbe1 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java @@ -191,7 +191,7 @@ public void handleHostReport(HostReport report, boolean isBoot) { rhost.getTotalSwap(), rhost.getFreeSwap(), rhost.getTotalMcp(), rhost.getFreeMcp(), rhost.getTotalGpuMem(), rhost.getFreeGpuMem(), rhost.getLoad(), new Timestamp(rhost.getBootTime() * 1000l), - rhost.getAttributesMap().get("SP_OS")); + rhost.getAttributesMap().get("SP_OS"), report.getFramesCount()); // Both logics are conflicting, only change hardware state if // there was no need for a tempDirStorage state change diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java b/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java index 76d5282b2..8568f66a6 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java @@ -120,7 +120,7 @@ public interface HostManager { */ void setHostStatistics(HostInterface host, long totalMemory, long freeMemory, long totalSwap, long freeSwap, long totalMcp, long freeMcp, long totalGpuMemory, long freeGpuMemory, - int load, Timestamp bootTime, String os); + int load, Timestamp bootTime, String os, int runningProcs); void deleteHost(HostInterface host); diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java index 1432f7169..d58cd86ad 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java @@ -116,10 +116,10 @@ public void rebootNow(HostInterface host) { @Override public void setHostStatistics(HostInterface host, long totalMemory, long freeMemory, long totalSwap, long freeSwap, long totalMcp, long freeMcp, long totalGpuMemory, - long freeGpuMemory, int load, Timestamp bootTime, String os) { + long freeGpuMemory, int load, Timestamp bootTime, String os, int runningProcs) { hostDao.updateHostStats(host, totalMemory, freeMemory, totalSwap, freeSwap, totalMcp, - freeMcp, totalGpuMemory, freeGpuMemory, load, bootTime, os); + freeMcp, totalGpuMemory, freeGpuMemory, load, bootTime, os, runningProcs); } @Transactional(propagation = Propagation.SUPPORTS, readOnly = true) diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql index 0734d530e..bc5ea2b18 100644 --- a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql @@ -1,4 +1,7 @@ --- Add a field to limit the max amount of concurrent frames a host can run +-- Add a field to limit the max amount of concurrent procs a host can run -- -1 means no limit alter table host - add int_concurrent_frames_limit INT NOT NULL DEFAULT -1; + add int_concurrent_procs_limit INT NOT NULL DEFAULT -1; + +alter table host_stat + add int_running_procs INT NOT NULL DEFAULT 0; diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java index 77f0b2799..2afa59961 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java @@ -384,7 +384,7 @@ public void updateHostStats() { DispatchHost dispatchHost = hostDao.findDispatchHost(TEST_HOST); hostDao.updateHostStats(dispatchHost, CueUtil.GB8, CueUtil.GB8, CueUtil.GB8, CueUtil.GB8, - CueUtil.GB8, CueUtil.GB8, 1, 1, 100, new Timestamp(1247526000 * 1000l), "spinux1"); + CueUtil.GB8, CueUtil.GB8, 1, 1, 100, new Timestamp(1247526000 * 1000l), "spinux1", 2); Map result = jdbcTemplate .queryForMap("SELECT * FROM host_stat WHERE pk_host=?", dispatchHost.getHostId()); diff --git a/rust/crates/scheduler/src/dao/host_dao.rs b/rust/crates/scheduler/src/dao/host_dao.rs index b842564c9..cf30f50e6 100644 --- a/rust/crates/scheduler/src/dao/host_dao.rs +++ b/rust/crates/scheduler/src/dao/host_dao.rs @@ -59,7 +59,8 @@ pub struct HostModel { // Number of cores available at the subscription of the show this host has been queried on int_alloc_available_cores: i64, ts_ping: DateTime, - int_concurrent_frames_limit: i64, + int_concurrent_procs_limit: i64, + int_running_procs: i64, } impl From for Host { @@ -94,8 +95,9 @@ impl From for Host { alloc_id: parse_uuid(&val.pk_alloc), alloc_name: val.str_alloc_name, last_updated: val.ts_ping, - concurrent_frames_limit: (val.int_concurrent_frames_limit >= 0) - .then_some(val.int_concurrent_frames_limit as u32), + concurrent_procs_limit: (val.int_concurrent_procs_limit >= 0) + .then_some(val.int_concurrent_procs_limit as u32), + running_procs_count: val.int_running_procs as u32, } } } @@ -122,7 +124,8 @@ SELECT DISTINCT a.pk_alloc, a.str_name as str_alloc_name, hs.ts_ping, - h.int_concurrent_frames_limit + h.int_concurrent_procs_limit, + hs.int_running_procs FROM host h INNER JOIN host_stat hs ON h.pk_host = hs.pk_host INNER JOIN alloc a ON h.pk_alloc = a.pk_alloc @@ -144,12 +147,14 @@ WHERE pk_host = $5 RETURNING int_cores_idle, int_mem_idle, int_gpus_idle, int_gpu_mem_idle, NOW() "#; -// This update is meant for testing environments where rqd is not constantly reporting -// host reports to Cuebot to get host_stats properly updated. +// ATTENTION: This update is meant for testing environments where rqd is not constantly reporting +// host reports to Cuebot to get host_stats properly updated. This is turned of by default and +// can be turned on by `host_cache.update_stat_on_book=true` static UPDATE_HOST_STAT: &str = r#" UPDATE host_stat SET int_mem_free = int_mem_free - $1, int_gpu_mem_free = int_gpu_mem_free - $2 + int_running_procs = int_running_procs + 1 WHERE pk_host = $3 "#; diff --git a/rust/crates/scheduler/src/host_cache/actor.rs b/rust/crates/scheduler/src/host_cache/actor.rs index d8e273607..b6a324e5f 100644 --- a/rust/crates/scheduler/src/host_cache/actor.rs +++ b/rust/crates/scheduler/src/host_cache/actor.rs @@ -1,6 +1,5 @@ use actix::{Actor, ActorFutureExt, AsyncContext, Handler, ResponseActFuture, WrapFuture}; -use bytesize::ByteSize; use itertools::Itertools; use miette::IntoDiagnostic; use scc::{hash_map::OccupiedEntry, HashMap, HashSet}; @@ -24,7 +23,7 @@ use crate::{ dao::HostDao, host_cache::*, host_cache::{messages::*, store}, - models::{CoreSize, Host}, + models::Host, }; #[derive(Clone)] @@ -94,8 +93,7 @@ where facility_id, show_id, tags, - cores, - memory, + resource_request, validation, } = msg; @@ -104,7 +102,7 @@ where Box::pin( async move { let out = service - .check_out(facility_id, show_id, tags, cores, memory, validation) + .check_out(facility_id, show_id, tags, resource_request, validation) .await; if let Ok(host) = &out { debug!("Checked out {}", host.1); @@ -197,8 +195,7 @@ impl HostCacheService { facility_id: Uuid, show_id: Uuid, tags: Vec, - cores: CoreSize, - memory: ByteSize, + resource_request: ResourceRequest, validation: F, ) -> Result where @@ -224,9 +221,9 @@ impl HostCacheService { // fight for the same rows. .read_async(&cache_key, |_, cached_group| { if !cached_group.expired() { + // Checkout host from a group cached_group - // Checkout host from a group - .check_out(cores, memory, validation) + .check_out(resource_request, validation) .map(|host| (cache_key.clone(), host.clone())) .ok() } else { @@ -249,7 +246,7 @@ impl HostCacheService { .map_err(|err| HostCacheError::FailedToQueryHostCache(err.to_string()))?; let checked_out_host = group // Checkout host from a group - .check_out(cores, memory, validation) + .check_out(resource_request, validation) .map(|host| CheckedOutHost(cache_key.clone(), host.clone())); if let Ok(checked_out_host) = checked_out_host { diff --git a/rust/crates/scheduler/src/host_cache/cache.rs b/rust/crates/scheduler/src/host_cache/cache.rs index 63adf6211..4969e58ad 100644 --- a/rust/crates/scheduler/src/host_cache/cache.rs +++ b/rust/crates/scheduler/src/host_cache/cache.rs @@ -26,6 +26,7 @@ use std::{ rc::Rc, sync::RwLock, time::{Duration, SystemTime}, + u32, }; use bytesize::ByteSize; @@ -34,7 +35,7 @@ use uuid::Uuid; use crate::{ config::{HostBookingStrategy, CONFIG}, - host_cache::{store::HOST_STORE, HostCacheError, HostId}, + host_cache::{messages::ResourceRequest, store::HOST_STORE, HostCacheError, HostId}, models::{CoreSize, Host}, }; @@ -133,8 +134,7 @@ impl HostCache { /// /// # Arguments /// - /// * `cores` - Minimum number of cores required - /// * `memory` - Minimum memory required + /// * `resource_request` - The resource requirements (cores and memory, GPU, or unit) /// * `validation` - Function to validate additional host requirements /// /// # Returns @@ -143,8 +143,7 @@ impl HostCache { /// * `Err(HostCacheError)` - No suitable host available pub fn check_out( &self, - cores: CoreSize, - memory: ByteSize, + resource_request: ResourceRequest, validation: F, ) -> Result where @@ -152,9 +151,17 @@ impl HostCache { { self.ping_query(); - let host = self - .remove_host(cores, memory, validation) - .ok_or(HostCacheError::NoCandidateAvailable)?; + let host = match resource_request { + ResourceRequest::CoresAndMemory { cores, memory } => self + .remove_host(cores, memory, 1, validation) + .ok_or(HostCacheError::NoCandidateAvailable)?, + ResourceRequest::Gpu(_core_size) => todo!("GPU host search is not yet implemented"), + ResourceRequest::Slots(slots) => self + // Request a host with minimum requirements as the remove logic already accounts for + // limiting slots + .remove_host(CoreSize(1), ByteSize::mib(256), slots, validation) + .ok_or(HostCacheError::NoCandidateAvailable)?, + }; Ok(host) } @@ -175,7 +182,13 @@ impl HostCache { /// /// * `Some(Host)` - Host that meets all requirements /// * `None` - No suitable host found - fn remove_host(&self, cores: CoreSize, memory: ByteSize, validation: F) -> Option + fn remove_host( + &self, + cores: CoreSize, + memory: ByteSize, + slots: u32, + validation: F, + ) -> Option where F: Fn(&Host) -> bool, { @@ -189,6 +202,8 @@ impl HostCache { // Check memory and core requirements just in case host.idle_memory >= memory && host.idle_cores >= cores && + host.running_procs_count + slots + <= host.concurrent_procs_limit.unwrap_or(u32::MAX) && // Ensure we're not retrying the same host as last attempts !failed_candidates.borrow().contains(&host.id) }; @@ -355,7 +370,8 @@ mod tests { alloc_id: Uuid::new_v4(), alloc_name: "test".to_string(), last_updated: Utc::now(), - concurrent_frames_limit: None, + concurrent_procs_limit: None, + running_procs_count: 0, } } @@ -450,8 +466,10 @@ mod tests { cache.check_in(host, false); let result = cache.check_out( - CoreSize(2), - ByteSize::gb(4), + ResourceRequest::CoresAndMemory { + cores: CoreSize(2), + memory: ByteSize::gb(4), + }, |_| true, // Always validate true ); @@ -475,7 +493,13 @@ mod tests { fn test_checkout_no_candidate_available() { let cache = HostCache::default(); - let result = cache.check_out(CoreSize(4), ByteSize::gb(8), |_| true); + let result = cache.check_out( + ResourceRequest::CoresAndMemory { + cores: CoreSize(4), + memory: ByteSize::gb(8), + }, + |_| true, + ); assert!(result.is_err()); assert!(matches!(result, Err(HostCacheError::NoCandidateAvailable))); @@ -490,8 +514,10 @@ mod tests { cache.check_in(host, false); let result = cache.check_out( - CoreSize(4), // Request more cores than available - ByteSize::gb(4), + ResourceRequest::CoresAndMemory { + cores: CoreSize(4), // Request more cores than available + memory: ByteSize::gb(4), + }, |_| true, ); @@ -507,8 +533,10 @@ mod tests { cache.check_in(host, false); let result = cache.check_out( - CoreSize(2), - ByteSize::gb(8), // Request more memory than available + ResourceRequest::CoresAndMemory { + cores: CoreSize(2), + memory: ByteSize::gb(8), // Request more memory than available + }, |_| true, ); @@ -524,8 +552,10 @@ mod tests { cache.check_in(host, false); let result = cache.check_out( - CoreSize(2), - ByteSize::gb(4), + ResourceRequest::CoresAndMemory { + cores: CoreSize(2), + memory: ByteSize::gb(4), + }, |_| false, // Always fail validation ); @@ -541,11 +571,23 @@ mod tests { cache.check_in(host, false); // First checkout should succeed - let result1 = cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true); + let result1 = cache.check_out( + ResourceRequest::CoresAndMemory { + cores: CoreSize(2), + memory: ByteSize::gb(4), + }, + |_| true, + ); assert!(result1.is_ok()); // Second checkout should fail because host is already checked out - let result2 = cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true); + let result2 = cache.check_out( + ResourceRequest::CoresAndMemory { + cores: CoreSize(2), + memory: ByteSize::gb(4), + }, + |_| true, + ); assert!(result2.is_err()); } @@ -558,7 +600,13 @@ mod tests { cache.check_in(host.clone(), false); // Checkout the host - let mut checked_host = assert_ok!(cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true)); + let mut checked_host = assert_ok!(cache.check_out( + ResourceRequest::CoresAndMemory { + cores: CoreSize(2), + memory: ByteSize::gb(4), + }, + |_| true + )); assert_eq!(checked_host.idle_cores.value(), 4); // Reduce the number of cores and checkin to ensure cache is updated @@ -566,8 +614,20 @@ mod tests { // Check it back in cache.check_in(checked_host, false); - assert_err!(cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true)); - assert_ok!(cache.check_out(CoreSize(1), ByteSize::gb(4), |_| true)); + assert_err!(cache.check_out( + ResourceRequest::CoresAndMemory { + cores: CoreSize(2), + memory: ByteSize::gb(4), + }, + |_| true + )); + assert_ok!(cache.check_out( + ResourceRequest::CoresAndMemory { + cores: CoreSize(1), + memory: ByteSize::gb(4), + }, + |_| true + )); } #[test] @@ -589,7 +649,13 @@ mod tests { cache.check_in(host3, false); // Request 3 cores, 6GB - should get host2 (4 cores, 8GB) or host3 (8 cores, 16GB) - let result = cache.check_out(CoreSize(3), ByteSize::gb(6), |_| true); + let result = cache.check_out( + ResourceRequest::CoresAndMemory { + cores: CoreSize(3), + memory: ByteSize::gb(6), + }, + |_| true, + ); assert!(result.is_ok()); let chosen_host = result.unwrap(); @@ -633,11 +699,23 @@ mod tests { cache.check_in(host2, false); // First checkout should succeed - let result1 = cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true); + let result1 = cache.check_out( + ResourceRequest::CoresAndMemory { + cores: CoreSize(2), + memory: ByteSize::gb(4), + }, + |_| true, + ); assert!(result1.is_ok()); // Second checkout should also succeed (different host) - let result2 = cache.check_out(CoreSize(2), ByteSize::gb(4), |_| true); + let result2 = cache.check_out( + ResourceRequest::CoresAndMemory { + cores: CoreSize(2), + memory: ByteSize::gb(4), + }, + |_| true, + ); assert!(result2.is_ok()); // The hosts should be different diff --git a/rust/crates/scheduler/src/host_cache/messages.rs b/rust/crates/scheduler/src/host_cache/messages.rs index 20e2dad24..44f14c8b6 100644 --- a/rust/crates/scheduler/src/host_cache/messages.rs +++ b/rust/crates/scheduler/src/host_cache/messages.rs @@ -54,11 +54,20 @@ where pub facility_id: Uuid, pub show_id: Uuid, pub tags: Vec, - pub cores: CoreSize, - pub memory: ByteSize, + pub resource_request: ResourceRequest, pub validation: F, } +#[derive(Clone, Copy)] +pub enum ResourceRequest { + /// Request a machine with at least this amount of cores and memory idle + CoresAndMemory { cores: CoreSize, memory: ByteSize }, + /// Request a machine with this amount of gpu cores idle + Gpu(CoreSize), + /// Request a machine with this amount of frame slots available + Slots(u32), +} + /// Payload for checking in a host or invalidating a host in the cache. /// /// Allows either returning a host with updated resources to the cache or diff --git a/rust/crates/scheduler/src/host_cache/store.rs b/rust/crates/scheduler/src/host_cache/store.rs index 8076c8ade..3f7c45cee 100644 --- a/rust/crates/scheduler/src/host_cache/store.rs +++ b/rust/crates/scheduler/src/host_cache/store.rs @@ -388,7 +388,8 @@ mod tests { alloc_id: Uuid::new_v4(), alloc_name: "test".to_string(), last_updated, - concurrent_frames_limit: None, + concurrent_procs_limit: None, + running_procs_count: 0, } } diff --git a/rust/crates/scheduler/src/models/host.rs b/rust/crates/scheduler/src/models/host.rs index 7fd1bdbf6..01fbd3e60 100644 --- a/rust/crates/scheduler/src/models/host.rs +++ b/rust/crates/scheduler/src/models/host.rs @@ -24,7 +24,8 @@ pub struct Host { pub(crate) alloc_id: Uuid, pub(crate) alloc_name: String, pub(crate) last_updated: DateTime, - pub(crate) concurrent_frames_limit: Option, + pub(crate) concurrent_procs_limit: Option, + pub(crate) running_procs_count: u32, } impl Host { @@ -81,7 +82,8 @@ impl Host { alloc_id, alloc_name, last_updated: Local::now().with_timezone(&Utc), - concurrent_frames_limit, + concurrent_procs_limit: concurrent_frames_limit, + running_procs_count: 0, } } } diff --git a/rust/crates/scheduler/src/pipeline/matcher.rs b/rust/crates/scheduler/src/pipeline/matcher.rs index 1be0c8f5a..a6eca86d1 100644 --- a/rust/crates/scheduler/src/pipeline/matcher.rs +++ b/rust/crates/scheduler/src/pipeline/matcher.rs @@ -279,27 +279,26 @@ impl MatchingService { layer.show_id ); - // Clone only the minimal data needed for the validation closure - // These are needed because the closure must have 'static lifetime for actor messaging - let layer_id = layer.id; - let show_id = layer.show_id; let cores_requested = layer.cores_min; let allocation_service = self.allocation_service.clone(); let os = layer.str_os.clone(); + // Get a matching candidate let host_candidate = self .host_service .send(CheckOut { facility_id: layer.facility_id, show_id: layer.show_id, tags, - cores: cores_requested, - memory: layer.mem_min, + resource_request: ResourceRequest::CoresAndMemory { + cores: cores_requested, + memory: layer.mem_min, + }, validation: move |host| { Self::validate_match( host, - &layer_id, - &show_id, + &layer.id, + &layer.show_id, cores_requested, &allocation_service, os.as_deref(), From cfe2366554466cea87dc73852d554ea1ff766e43 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Thu, 11 Dec 2025 10:47:57 -0800 Subject: [PATCH 03/17] Add Layer Slots Required Field --- .../V36__Add_layer_slots_required.sql | 4 +++ rust/crates/scheduler/src/dao/layer_dao.rs | 18 ++++++++--- rust/crates/scheduler/src/host_cache/actor.rs | 5 ++-- rust/crates/scheduler/src/host_cache/cache.rs | 8 +++-- .../scheduler/src/host_cache/messages.rs | 12 +------- rust/crates/scheduler/src/models/layer.rs | 30 ++++++++++++++++++- rust/crates/scheduler/src/models/mod.rs | 2 +- rust/crates/scheduler/src/pipeline/matcher.rs | 5 +--- 8 files changed, 57 insertions(+), 27 deletions(-) create mode 100644 cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql new file mode 100644 index 000000000..1905fbdcd --- /dev/null +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql @@ -0,0 +1,4 @@ +-- Add a field to mark a layer as requiring at least a specific number of slots +-- <=0 means slots are not required +alter table layer + add int_slots_required INT NOT NULL DEFAULT 0; diff --git a/rust/crates/scheduler/src/dao/layer_dao.rs b/rust/crates/scheduler/src/dao/layer_dao.rs index c92cad09a..522b661d4 100644 --- a/rust/crates/scheduler/src/dao/layer_dao.rs +++ b/rust/crates/scheduler/src/dao/layer_dao.rs @@ -44,6 +44,7 @@ pub struct DispatchLayerModel { pub b_threadable: bool, pub int_gpus_min: i64, pub int_gpu_mem_min: i64, + pub int_slots_required: i64, pub str_tags: String, } @@ -67,6 +68,7 @@ pub struct LayerWithFramesModel { pub b_threadable: bool, pub int_gpus_min: i64, pub int_gpu_mem_min: i64, + pub int_slots_required: i64, pub str_tags: String, // Frame fields (Optional - NULL when no frames match) @@ -120,13 +122,19 @@ impl DispatchLayer { ), mem_min: ByteSize::kb(layer.int_mem_min as u64), threadable: layer.b_threadable, - gpus_min: layer - .int_gpus_min - .try_into() - .expect("gpus_min should fit on a i32"), + gpus_min: CoreSize( + layer + .int_gpus_min + .try_into() + .expect("gpus_min should fit on a i32"), + ), gpu_mem_min: ByteSize::kb(layer.int_gpu_mem_min as u64), tags: layer.str_tags.split(" | ").map(|t| t.to_string()).collect(), frames: frames.into_iter().map(|f| f.into()).collect(), + slots_required: layer + .int_slots_required + .try_into() + .expect("int_slots_required should fit on a i32"), } } } @@ -206,6 +214,7 @@ SELECT DISTINCT l.b_threadable, l.int_gpus_min, l.int_gpu_mem_min, + l.int_slots_required, l.str_tags, l.int_dispatch_order, @@ -327,6 +336,7 @@ impl LayerDao { int_gpus_min: model.int_gpus_min, int_gpu_mem_min: model.int_gpu_mem_min, str_tags: model.str_tags.clone(), + int_slots_required: model.int_slots_required, }; // Extract frame data (if present) diff --git a/rust/crates/scheduler/src/host_cache/actor.rs b/rust/crates/scheduler/src/host_cache/actor.rs index b6a324e5f..dbf2f8201 100644 --- a/rust/crates/scheduler/src/host_cache/actor.rs +++ b/rust/crates/scheduler/src/host_cache/actor.rs @@ -21,9 +21,8 @@ use crate::{ cluster_key::{ClusterKey, Tag, TagType}, config::CONFIG, dao::HostDao, - host_cache::*, - host_cache::{messages::*, store}, - models::Host, + host_cache::{messages::*, store, *}, + models::{Host, ResourceRequest}, }; #[derive(Clone)] diff --git a/rust/crates/scheduler/src/host_cache/cache.rs b/rust/crates/scheduler/src/host_cache/cache.rs index 4969e58ad..4d76b2ddc 100644 --- a/rust/crates/scheduler/src/host_cache/cache.rs +++ b/rust/crates/scheduler/src/host_cache/cache.rs @@ -35,8 +35,8 @@ use uuid::Uuid; use crate::{ config::{HostBookingStrategy, CONFIG}, - host_cache::{messages::ResourceRequest, store::HOST_STORE, HostCacheError, HostId}, - models::{CoreSize, Host}, + host_cache::{store::HOST_STORE, HostCacheError, HostId}, + models::{CoreSize, Host, ResourceRequest}, }; type CoreKey = u32; @@ -155,7 +155,9 @@ impl HostCache { ResourceRequest::CoresAndMemory { cores, memory } => self .remove_host(cores, memory, 1, validation) .ok_or(HostCacheError::NoCandidateAvailable)?, - ResourceRequest::Gpu(_core_size) => todo!("GPU host search is not yet implemented"), + ResourceRequest::Gpu { cores, memory } => { + todo!("GPU host search is not yet implemented. Request: {cores}, {memory}") + } ResourceRequest::Slots(slots) => self // Request a host with minimum requirements as the remove logic already accounts for // limiting slots diff --git a/rust/crates/scheduler/src/host_cache/messages.rs b/rust/crates/scheduler/src/host_cache/messages.rs index 44f14c8b6..ce85d5c5a 100644 --- a/rust/crates/scheduler/src/host_cache/messages.rs +++ b/rust/crates/scheduler/src/host_cache/messages.rs @@ -7,7 +7,7 @@ use uuid::Uuid; use crate::{ cluster_key::{ClusterKey, Tag}, host_cache::HostCacheError, - models::{CoreSize, Host}, + models::{CoreSize, Host, ResourceRequest}, }; /// Response containing a checked-out host and its associated cluster key. @@ -58,16 +58,6 @@ where pub validation: F, } -#[derive(Clone, Copy)] -pub enum ResourceRequest { - /// Request a machine with at least this amount of cores and memory idle - CoresAndMemory { cores: CoreSize, memory: ByteSize }, - /// Request a machine with this amount of gpu cores idle - Gpu(CoreSize), - /// Request a machine with this amount of frame slots available - Slots(u32), -} - /// Payload for checking in a host or invalidating a host in the cache. /// /// Allows either returning a host with updated resources to the cache or diff --git a/rust/crates/scheduler/src/models/layer.rs b/rust/crates/scheduler/src/models/layer.rs index 443bfb95b..2046817b2 100644 --- a/rust/crates/scheduler/src/models/layer.rs +++ b/rust/crates/scheduler/src/models/layer.rs @@ -19,8 +19,9 @@ pub struct DispatchLayer { pub cores_min: CoreSize, pub mem_min: ByteSize, pub threadable: bool, - pub gpus_min: i32, + pub gpus_min: CoreSize, pub gpu_mem_min: ByteSize, + pub slots_required: u32, pub tags: HashSet, pub frames: Vec, } @@ -37,6 +38,17 @@ impl fmt::Display for DispatchLayer { } } +/// Describes what resources are required to run a frame from this layer +#[derive(Clone, Copy)] +pub enum ResourceRequest { + /// Request a machine with at least this amount of cores and memory idle + CoresAndMemory { cores: CoreSize, memory: ByteSize }, + /// Request a machine with this amount of gpu cores idle + Gpu { cores: CoreSize, memory: ByteSize }, + /// Request a machine with this amount of frame slots available + Slots(u32), +} + impl DispatchLayer { /// Removes frames with matching IDs from this layer's frame list. /// @@ -49,4 +61,20 @@ impl DispatchLayer { pub fn drain_frames(&mut self, frame_ids: Vec) { self.frames.retain(|f| !frame_ids.contains(&f.id)) } + + pub fn resource_request(&self) -> ResourceRequest { + if self.slots_required > 0 { + ResourceRequest::Slots(self.slots_required) + } else if self.gpus_min.value() > 0 { + ResourceRequest::Gpu { + cores: self.gpus_min, + memory: self.gpu_mem_min, + } + } else { + ResourceRequest::CoresAndMemory { + cores: self.cores_min, + memory: self.mem_min, + } + } + } } diff --git a/rust/crates/scheduler/src/models/mod.rs b/rust/crates/scheduler/src/models/mod.rs index f1f723911..a115d4578 100644 --- a/rust/crates/scheduler/src/models/mod.rs +++ b/rust/crates/scheduler/src/models/mod.rs @@ -10,7 +10,7 @@ pub use core_size::{CoreSize, CoreSizeWithMultiplier}; pub use frame::DispatchFrame; pub use host::Host; pub use job::DispatchJob; -pub use layer::DispatchLayer; +pub use layer::{DispatchLayer, ResourceRequest}; pub use subscription::{Allocation, Subscription}; pub use virtual_proc::VirtualProc; diff --git a/rust/crates/scheduler/src/pipeline/matcher.rs b/rust/crates/scheduler/src/pipeline/matcher.rs index a6eca86d1..82acafbae 100644 --- a/rust/crates/scheduler/src/pipeline/matcher.rs +++ b/rust/crates/scheduler/src/pipeline/matcher.rs @@ -290,10 +290,7 @@ impl MatchingService { facility_id: layer.facility_id, show_id: layer.show_id, tags, - resource_request: ResourceRequest::CoresAndMemory { - cores: cores_requested, - memory: layer.mem_min, - }, + resource_request: layer.resource_request(), validation: move |host| { Self::validate_match( host, From 769d8ef892342945e52cf5a8c0ea3425fc606564 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 17 Dec 2025 10:56:20 -0800 Subject: [PATCH 04/17] Add slots_required field to layer in OpenCue DTD and PyOutline layer Add slots_required attribute to layer for slot-based booking --- .../com/imageworks/spcue/LayerDetail.java | 1 + .../com/imageworks/spcue/dao/LayerDao.java | 8 + .../spcue/dao/postgres/HostDaoJdbc.java | 2 +- .../spcue/dao/postgres/LayerDaoJdbc.java | 12 +- .../spcue/dao/postgres/WhiteboardDaoJdbc.java | 3 +- .../imageworks/spcue/servant/ManageLayer.java | 11 + .../imageworks/spcue/service/JobManager.java | 8 + .../spcue/service/JobManagerService.java | 5 + .../com/imageworks/spcue/service/JobSpec.java | 4 + .../main/resources/public/dtd/cjsl-1.16.dtd | 106 ++++++ .../spcue/test/dao/postgres/HostDaoTests.java | 3 +- .../spcue/test/service/JobSpecTests.java | 22 ++ .../src/test/resources/conf/dtd/cjsl-1.16.dtd | 106 ++++++ .../resources/conf/jobspec/jobspec_1_16.xml | 58 ++++ proto/src/job.proto | 15 + pycue/opencue/wrappers/layer.py | 216 ++++++++---- pycue/tests/wrappers/test_layer.py | 317 ++++++++++-------- pyoutline/outline/backend/cue.py | 124 ++++--- pyoutline/outline/layer.py | 33 +- 19 files changed, 775 insertions(+), 279 deletions(-) create mode 100644 cuebot/src/main/resources/public/dtd/cjsl-1.16.dtd create mode 100644 cuebot/src/test/resources/conf/dtd/cjsl-1.16.dtd create mode 100644 cuebot/src/test/resources/conf/jobspec/jobspec_1_16.xml diff --git a/cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java b/cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java index 572139039..da0e830f6 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java +++ b/cuebot/src/main/java/com/imageworks/spcue/LayerDetail.java @@ -38,6 +38,7 @@ public class LayerDetail extends LayerEntity implements LayerInterface { public int timeout_llu; public int dispatchOrder; public int totalFrameCount; + public int slotsRequired; public Set tags = new LinkedHashSet(); public Set services = new LinkedHashSet(); diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java index 5d5433ada..15449a0f5 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java @@ -311,6 +311,14 @@ public interface LayerDao { */ void updateTimeoutLLU(LayerInterface layer, int timeout_llu); + /** + * Updates the slots required for a layer. + * + * @param layer the layer to update + * @param slots the number of slots required (<=0 means not slot-based) + */ + void updateLayerSlotsRequired(LayerInterface layer, int slots); + /** * Lowers the minimum memory on a layer if the layer is using less memory and the currnet min * memory is the dispatcher default. diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java index e220d98ed..a83256f6a 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java @@ -395,7 +395,7 @@ public CallableStatement createCallableStatement(Connection con) throws SQLExcep + " int_load = ?, " + " ts_booted = ?, " + " ts_ping = current_timestamp, " - + " str_os = ? " + + " str_os = ?, " + " int_running_procs = ? " + "WHERE " + " pk_host = ?"; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java index d9ef93e2b..83abf0a60 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java @@ -158,6 +158,7 @@ public LayerDetail mapRow(ResultSet rs, int rowNum) throws SQLException { layer.services.addAll(Lists.newArrayList(rs.getString("str_services").split(","))); layer.timeout = rs.getInt("int_timeout"); layer.timeout_llu = rs.getInt("int_timeout_llu"); + layer.slotsRequired = rs.getInt("int_slots_required"); return layer; } }; @@ -241,7 +242,8 @@ public LayerInterface getLayer(String id) { + "int_dispatch_order, " + "str_tags, " + "str_type," + "int_cores_min, " + "int_cores_max, " + "b_threadable, " + "int_mem_min, " + "int_gpus_min, " + "int_gpus_max, " + "int_gpu_mem_min, " + "str_services, " + "int_timeout," - + "int_timeout_llu " + ") " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; + + "int_timeout_llu, " + "int_slots_required " + ") " + + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; @Override public void insertLayerDetail(LayerDetail l) { @@ -250,7 +252,7 @@ public void insertLayerDetail(LayerDetail l) { l.chunkSize, l.dispatchOrder, StringUtils.join(l.tags, " | "), l.type.toString(), l.minimumCores, l.maximumCores, l.isThreadable, l.minimumMemory, l.minimumGpus, l.maximumGpus, l.minimumGpuMemory, StringUtils.join(l.services, ","), l.timeout, - l.timeout_llu); + l.timeout_llu, l.slotsRequired); } @Override @@ -555,6 +557,12 @@ public void updateTimeoutLLU(LayerInterface layer, int timeout_llu) { layer.getLayerId()); } + @Override + public void updateLayerSlotsRequired(LayerInterface layer, int slots) { + getJdbcTemplate().update("UPDATE layer SET int_slots_required=? WHERE pk_layer=?", slots, + layer.getLayerId()); + } + @Override public void enableMemoryOptimizer(LayerInterface layer, boolean value) { getJdbcTemplate().update("UPDATE layer SET b_optimize=? WHERE pk_layer=?", value, diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java index 9e7b8ee95..c8260f2a2 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java @@ -1183,7 +1183,8 @@ public Layer mapRow(ResultSet rs, int rowNum) throws SQLException { Arrays.asList(SqlUtil.getString(rs, "str_limit_names").split(","))) .setMemoryOptimizerEnabled(rs.getBoolean("b_optimize")) .setTimeout(rs.getInt("int_timeout")) - .setTimeoutLlu(rs.getInt("int_timeout_llu")); + .setTimeoutLlu(rs.getInt("int_timeout_llu")) + .setSlotsRequired(rs.getInt("int_slots_required")); LayerStats.Builder statsBuilder = LayerStats.newBuilder() .setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores"))) diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageLayer.java b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageLayer.java index 0e19a2e43..21f64d9b3 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageLayer.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageLayer.java @@ -110,6 +110,8 @@ import com.imageworks.spcue.grpc.job.LayerSetTimeoutResponse; import com.imageworks.spcue.grpc.job.LayerSetTimeoutLLURequest; import com.imageworks.spcue.grpc.job.LayerSetTimeoutLLUResponse; +import com.imageworks.spcue.grpc.job.LayerSetSlotsRequiredRequest; +import com.imageworks.spcue.grpc.job.LayerSetSlotsRequiredResponse; import com.imageworks.spcue.grpc.job.LayerStaggerFramesRequest; import com.imageworks.spcue.grpc.job.LayerStaggerFramesResponse; import com.imageworks.spcue.grpc.limit.Limit; @@ -432,6 +434,15 @@ public void setTimeoutLLU(LayerSetTimeoutLLURequest request, } } + @Override + public void setSlotsRequired(LayerSetSlotsRequiredRequest request, + StreamObserver responseObserver) { + updateLayer(request.getLayer()); + jobManager.setLayerSlotsRequired(layer, request.getSlots()); + responseObserver.onNext(LayerSetSlotsRequiredResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + @Override public void addLimit(LayerAddLimitRequest request, StreamObserver responseObserver) { diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobManager.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobManager.java index 4641b8e82..51d6e9548 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobManager.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobManager.java @@ -455,6 +455,14 @@ public interface JobManager { */ void setLayerMinGpus(LayerInterface layer, int gpuUnits); + /** + * Sets the slots required for a layer. + * + * @param layer the layer to update + * @param slots the number of slots required + */ + void setLayerSlotsRequired(LayerInterface layer, int slots); + /** * Add a limit to the given layer. * diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java index 03bc765b4..0904689fb 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java @@ -456,6 +456,11 @@ public void setLayerMinGpus(LayerInterface layer, int gpu) { layerDao.updateLayerMinGpus(layer, gpu); } + @Override + public void setLayerSlotsRequired(LayerInterface layer, int slots) { + layerDao.updateLayerSlotsRequired(layer, slots); + } + @Override public void setLayerMaxGpus(LayerInterface layer, int gpu) { layerDao.updateLayerMaxGpus(layer, gpu); diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index d4ff4e6b5..bed7fa903 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -444,6 +444,10 @@ private void handleLayerTags(BuildableJob buildableJob, Element jobTag) { layer.timeout_llu = Integer.parseInt(layerTag.getChildTextTrim("timeout_llu")); } + if (layerTag.getChildTextTrim("slots_required") != null) { + layer.slotsRequired = Integer.parseInt(layerTag.getChildTextTrim("slots_required")); + } + /* * Handle the layer environment */ diff --git a/cuebot/src/main/resources/public/dtd/cjsl-1.16.dtd b/cuebot/src/main/resources/public/dtd/cjsl-1.16.dtd new file mode 100644 index 000000000..20ebf19c2 --- /dev/null +++ b/cuebot/src/main/resources/public/dtd/cjsl-1.16.dtd @@ -0,0 +1,106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java index 2afa59961..6c66e26a6 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/dao/postgres/HostDaoTests.java @@ -384,7 +384,8 @@ public void updateHostStats() { DispatchHost dispatchHost = hostDao.findDispatchHost(TEST_HOST); hostDao.updateHostStats(dispatchHost, CueUtil.GB8, CueUtil.GB8, CueUtil.GB8, CueUtil.GB8, - CueUtil.GB8, CueUtil.GB8, 1, 1, 100, new Timestamp(1247526000 * 1000l), "spinux1", 2); + CueUtil.GB8, CueUtil.GB8, 1, 1, 100, new Timestamp(1247526000 * 1000l), "spinux1", + 2); Map result = jdbcTemplate .queryForMap("SELECT * FROM host_stat WHERE pk_host=?", dispatchHost.getHostId()); diff --git a/cuebot/src/test/java/com/imageworks/spcue/test/service/JobSpecTests.java b/cuebot/src/test/java/com/imageworks/spcue/test/service/JobSpecTests.java index 69057bb83..533feb1c5 100644 --- a/cuebot/src/test/java/com/imageworks/spcue/test/service/JobSpecTests.java +++ b/cuebot/src/test/java/com/imageworks/spcue/test/service/JobSpecTests.java @@ -120,4 +120,26 @@ public void testParseMaxCoresAndMaxGpus() { assertEquals(job.maxGpusOverride, Integer.valueOf(42)); } + @Test + public void testParseSlotsRequired() { + String xml = readJobSpec("jobspec_1_16.xml"); + JobSpec spec = jobLauncher.parse(xml); + assertEquals(spec.getDoc().getDocType().getPublicID(), "SPI Cue Specification Language"); + assertEquals(spec.getDoc().getDocType().getSystemID(), + "http://localhost:8080/spcue/dtd/cjsl-1.16.dtd"); + assertEquals(spec.getJobs().size(), 1); + BuildableJob job = spec.getJobs().get(0); + assertEquals(job.getBuildableLayers().size(), 2); + + // First layer uses slot-based booking + LayerDetail slotBasedLayer = job.getBuildableLayers().get(0).layerDetail; + assertEquals(slotBasedLayer.name, "slot_based_layer"); + assertEquals(slotBasedLayer.slotsRequired, 4); + + // Second layer uses regular resource booking (default slots_required = 0) + LayerDetail regularLayer = job.getBuildableLayers().get(1).layerDetail; + assertEquals(regularLayer.name, "regular_layer"); + assertEquals(regularLayer.slotsRequired, 0); + } + } diff --git a/cuebot/src/test/resources/conf/dtd/cjsl-1.16.dtd b/cuebot/src/test/resources/conf/dtd/cjsl-1.16.dtd new file mode 100644 index 000000000..20ebf19c2 --- /dev/null +++ b/cuebot/src/test/resources/conf/dtd/cjsl-1.16.dtd @@ -0,0 +1,106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cuebot/src/test/resources/conf/jobspec/jobspec_1_16.xml b/cuebot/src/test/resources/conf/jobspec/jobspec_1_16.xml new file mode 100644 index 000000000..6e05da5bb --- /dev/null +++ b/cuebot/src/test/resources/conf/jobspec/jobspec_1_16.xml @@ -0,0 +1,58 @@ + + + + + local + testing + default + testuser + 9860 + + + False + 2 + 420 + 42 + False + + + + echo "Using slot-based booking" + 1-10 + 1 + 4 + + + shell + + + + echo "Using regular resource booking" + 1-5 + 1 + 100 + 2048 + + + shell + + + + + + diff --git a/proto/src/job.proto b/proto/src/job.proto index 4c76308fa..e78f9a89b 100644 --- a/proto/src/job.proto +++ b/proto/src/job.proto @@ -392,6 +392,9 @@ service LayerInterface { // Set whether the LLU timeout for frames in the layer rpc SetTimeoutLLU(LayerSetTimeoutLLURequest) returns (LayerSetTimeoutLLUResponse); + // Set the number of slots required per frame for this layer + rpc SetSlotsRequired(LayerSetSlotsRequiredRequest) returns (LayerSetSlotsRequiredResponse); + // Staggers the specified frame range. rpc StaggerFrames(LayerStaggerFramesRequest) returns (LayerStaggerFramesResponse); } @@ -714,6 +717,8 @@ message Layer { float min_gpus = 20; float max_gpus = 21; string command = 22; + // Number of slots required per frame (<=0 means not slot-based) + int32 slots_required = 23; } message LayerSeq { @@ -1795,6 +1800,16 @@ message LayerSetTimeoutLLURequest { message LayerSetTimeoutLLUResponse {} // Empty +// SetSlotsRequired +// +message LayerSetSlotsRequiredRequest { + Layer layer = 1; + int32 slots = 2; +} + +message LayerSetSlotsRequiredResponse {} // Empty + + // StaggerFrames message LayerStaggerFramesRequest { Layer layer = 1; diff --git a/pycue/opencue/wrappers/layer.py b/pycue/opencue/wrappers/layer.py index 8b3dfdf1b..605b2cb65 100644 --- a/pycue/opencue/wrappers/layer.py +++ b/pycue/opencue/wrappers/layer.py @@ -20,12 +20,13 @@ import platform from opencue_proto import job_pb2 + import opencue.api -from opencue.cuebot import Cuebot import opencue.search import opencue.wrappers.depend import opencue.wrappers.frame import opencue.wrappers.limit +from opencue.cuebot import Cuebot class Layer(object): @@ -33,6 +34,7 @@ class Layer(object): class LayerType(enum.IntEnum): """Represents the type of layer.""" + PRE = job_pb2.PRE POST = job_pb2.POST RENDER = job_pb2.RENDER @@ -40,51 +42,62 @@ class LayerType(enum.IntEnum): class Order(enum.IntEnum): """Represents the order of a layer.""" + FIRST = job_pb2.FIRST LAST = job_pb2.LAST REVERSE = job_pb2.REVERSE def __init__(self, layer=None): self.data = layer - self.stub = Cuebot.getStub('layer') + self.stub = Cuebot.getStub("layer") def kill(self, username=None, pid=None, host_kill=None, reason=None): """Kills the entire layer.""" username = username if username else getpass.getuser() pid = pid if pid else os.getpid() host_kill = host_kill if host_kill else platform.uname()[1] - return self.stub.KillFrames(job_pb2.LayerKillFramesRequest(layer=self.data, - username=username, - pid=str(pid), - host_kill=host_kill, - reason=reason), - timeout=Cuebot.Timeout) + return self.stub.KillFrames( + job_pb2.LayerKillFramesRequest( + layer=self.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason, + ), + timeout=Cuebot.Timeout, + ) def eat(self): """Eats the entire layer.""" - return self.stub.EatFrames(job_pb2.LayerEatFramesRequest(layer=self.data), - timeout=Cuebot.Timeout) + return self.stub.EatFrames( + job_pb2.LayerEatFramesRequest(layer=self.data), timeout=Cuebot.Timeout + ) def retry(self): """Retries the entire layer.""" - return self.stub.RetryFrames(job_pb2.LayerRetryFramesRequest(layer=self.data), - timeout=Cuebot.Timeout) + return self.stub.RetryFrames( + job_pb2.LayerRetryFramesRequest(layer=self.data), timeout=Cuebot.Timeout + ) def markdone(self): """Drops any dependency that requires this layer or requires any frame in the layer.""" - return self.stub.MarkdoneFrames(job_pb2.LayerMarkdoneFramesRequest(layer=self.data), - timeout=Cuebot.Timeout) + return self.stub.MarkdoneFrames( + job_pb2.LayerMarkdoneFramesRequest(layer=self.data), timeout=Cuebot.Timeout + ) def addLimit(self, limit_id): """Adds a limit to the current layer.""" - return self.stub.AddLimit(job_pb2.LayerAddLimitRequest(layer=self.data, limit_id=limit_id), - timeout=Cuebot.Timeout) + return self.stub.AddLimit( + job_pb2.LayerAddLimitRequest(layer=self.data, limit_id=limit_id), + timeout=Cuebot.Timeout, + ) def dropLimit(self, limit_id): """Removes a limit on the current layer.""" return self.stub.DropLimit( job_pb2.LayerDropLimitRequest(layer=self.data, limit_id=limit_id), - timeout=Cuebot.Timeout) + timeout=Cuebot.Timeout, + ) def enableMemoryOptimizer(self, value): """Enables or disables the memory optimizer. @@ -92,9 +105,10 @@ def enableMemoryOptimizer(self, value): :type value: bool :param value: whether memory optimizer is enabled """ - return self.stub.EnableMemoryOptimizer(job_pb2.LayerEnableMemoryOptimizerRequest( - layer=self.data, value=value), - timeout=Cuebot.Timeout) + return self.stub.EnableMemoryOptimizer( + job_pb2.LayerEnableMemoryOptimizerRequest(layer=self.data, value=value), + timeout=Cuebot.Timeout, + ) def getFrames(self, **options): """Returns a list of up to 1000 frames from within the layer. @@ -105,9 +119,14 @@ def getFrames(self, **options): :return: sequence of matching frames """ criteria = opencue.search.FrameSearch.criteriaFromOptions(**options) - response = self.stub.GetFrames(job_pb2.LayerGetFramesRequest(layer=self.data, s=criteria), - timeout=Cuebot.Timeout) - return [opencue.wrappers.frame.Frame(frameData) for frameData in response.frames.frames] + response = self.stub.GetFrames( + job_pb2.LayerGetFramesRequest(layer=self.data, s=criteria), + timeout=Cuebot.Timeout, + ) + return [ + opencue.wrappers.frame.Frame(frameData) + for frameData in response.frames.frames + ] def getOutputPaths(self): """Return the output paths for this layer. @@ -115,8 +134,9 @@ def getOutputPaths(self): :rtype: list :return: list of output paths """ - return self.stub.GetOutputPaths(job_pb2.LayerGetOutputPathsRequest(layer=self.data), - timeout=Cuebot.Timeout).output_paths + return self.stub.GetOutputPaths( + job_pb2.LayerGetOutputPathsRequest(layer=self.data), timeout=Cuebot.Timeout + ).output_paths def setTags(self, tags): """Sets the layer tags. @@ -124,8 +144,10 @@ def setTags(self, tags): :type tags: list :param tags: layer tags """ - return self.stub.SetTags(job_pb2.LayerSetTagsRequest(layer=self.data, tags=tags), - timeout=Cuebot.Timeout) + return self.stub.SetTags( + job_pb2.LayerSetTagsRequest(layer=self.data, tags=tags), + timeout=Cuebot.Timeout, + ) def setMaxCores(self, cores): """Sets the maximum number of cores that this layer requires. @@ -134,8 +156,9 @@ def setMaxCores(self, cores): :param cores: Core units, 100 reserves 1 core """ return self.stub.SetMaxCores( - job_pb2.LayerSetMaxCoresRequest(layer=self.data, cores=cores/100.0), - timeout=Cuebot.Timeout) + job_pb2.LayerSetMaxCoresRequest(layer=self.data, cores=cores / 100.0), + timeout=Cuebot.Timeout, + ) def setMinCores(self, cores): """Sets the minimum number of cores that this layer requires. @@ -146,8 +169,9 @@ def setMinCores(self, cores): :param cores: core units, 100 reserves 1 core """ return self.stub.SetMinCores( - job_pb2.LayerSetMinCoresRequest(layer=self.data, cores=cores/100.0), - timeout=Cuebot.Timeout) + job_pb2.LayerSetMinCoresRequest(layer=self.data, cores=cores / 100.0), + timeout=Cuebot.Timeout, + ) def setMaxGpus(self, max_gpus): """Sets the maximum number of gpus that this layer requires. @@ -155,7 +179,8 @@ def setMaxGpus(self, max_gpus): :param max_gpus: gpu cores""" return self.stub.SetMaxGpus( job_pb2.LayerSetMaxGpusRequest(layer=self.data, max_gpus=max_gpus), - timeout=Cuebot.Timeout) + timeout=Cuebot.Timeout, + ) def setMinGpus(self, min_gpus): """Sets the minimum number of gpus that this layer requires. @@ -163,7 +188,17 @@ def setMinGpus(self, min_gpus): :param min_gpus: gou cores""" return self.stub.SetMinGpus( job_pb2.LayerSetMinGpusRequest(layer=self.data, min_gpus=min_gpus), - timeout=Cuebot.Timeout) + timeout=Cuebot.Timeout, + ) + + def setSlotsRequired(self, slots): + """Sets the number of slots required per frame for this layer. + :type slots: int + :param slots: Number of slots required (<=0 disables slot-based booking)""" + return self.stub.SetSlotsRequired( + job_pb2.LayerSetSlotsRequiredRequest(layer=self.data, slots=slots), + timeout=Cuebot.Timeout, + ) def setMinGpuMemory(self, gpu_memory): """Sets the minimum number of gpu memory that this layer requires. @@ -173,7 +208,8 @@ def setMinGpuMemory(self, gpu_memory): """ return self.stub.SetMinGpuMemory( job_pb2.LayerSetMinGpuMemoryRequest(layer=self.data, gpu_memory=gpu_memory), - timeout=Cuebot.Timeout) + timeout=Cuebot.Timeout, + ) def setMinMemory(self, memory): """Sets the minimum amount of memory that this layer requires. @@ -183,7 +219,8 @@ def setMinMemory(self, memory): """ return self.stub.SetMinMemory( job_pb2.LayerSetMinMemoryRequest(layer=self.data, memory=memory), - timeout=Cuebot.Timeout) + timeout=Cuebot.Timeout, + ) def setThreadable(self, threadable): """Sets the threadable field. @@ -191,27 +228,32 @@ def setThreadable(self, threadable): :type threadable: bool :param threadable: boolean to enable/disable threadable """ - return self.stub.SetThreadable(job_pb2.LayerSetThreadableRequest( - layer=self.data, threadable=threadable), - timeout=Cuebot.Timeout) + return self.stub.SetThreadable( + job_pb2.LayerSetThreadableRequest(layer=self.data, threadable=threadable), + timeout=Cuebot.Timeout, + ) def setTimeout(self, timeout): """Set time out to the value. :type timeout: int :param timeout: value for timeout in minutes""" - return self.stub.SetTimeout(job_pb2.LayerSetTimeoutRequest( - layer=self.data, timeout=timeout), - timeout=Cuebot.Timeout) + return self.stub.SetTimeout( + job_pb2.LayerSetTimeoutRequest(layer=self.data, timeout=timeout), + timeout=Cuebot.Timeout, + ) def setTimeoutLLU(self, timeout_llu): """Set LLU time out to the value. :type timeout: int :param timeout: value for timeout in minutes""" - return self.stub.SetTimeoutLLU(job_pb2.LayerSetTimeoutLLURequest( - layer=self.data, timeout_llu=timeout_llu), - timeout=Cuebot.Timeout) - - def addRenderPartition(self, hostname, threads, max_cores, max_mem, max_gpu_memory, max_gpus): + return self.stub.SetTimeoutLLU( + job_pb2.LayerSetTimeoutLLURequest(layer=self.data, timeout_llu=timeout_llu), + timeout=Cuebot.Timeout, + ) + + def addRenderPartition( + self, hostname, threads, max_cores, max_mem, max_gpu_memory, max_gpus + ): """Adds a render partition to the layer. :type hostname: str @@ -228,14 +270,17 @@ def addRenderPartition(self, hostname, threads, max_cores, max_mem, max_gpu_memo :param max_gpus: max gpus enabled for the partition """ self.stub.AddRenderPartition( - job_pb2.LayerAddRenderPartitionRequest(layer=self.data, - host=hostname, - threads=threads, - max_cores=max_cores, - max_memory=max_mem, - max_gpu_memory=max_gpu_memory, - username=os.getenv("USER", "unknown"), - max_gpus=max_gpus)) + job_pb2.LayerAddRenderPartitionRequest( + layer=self.data, + host=hostname, + threads=threads, + max_cores=max_cores, + max_memory=max_mem, + max_gpu_memory=max_gpu_memory, + username=os.getenv("USER", "unknown"), + max_gpus=max_gpus, + ) + ) def getWhatDependsOnThis(self): """Gets a list of dependencies that depend directly on this layer. @@ -245,7 +290,8 @@ def getWhatDependsOnThis(self): """ response = self.stub.GetWhatDependsOnThis( job_pb2.LayerGetWhatDependsOnThisRequest(layer=self.data), - timeout=Cuebot.Timeout) + timeout=Cuebot.Timeout, + ) dependSeq = response.depends return [opencue.wrappers.depend.Depend(dep) for dep in dependSeq.depends] @@ -257,7 +303,8 @@ def getWhatThisDependsOn(self): """ response = self.stub.GetWhatThisDependsOn( job_pb2.LayerGetWhatThisDependsOnRequest(layer=self.data), - timeout=Cuebot.Timeout) + timeout=Cuebot.Timeout, + ) dependSeq = response.depends return [opencue.wrappers.depend.Depend(dep) for dep in dependSeq.depends] @@ -271,7 +318,8 @@ def createDependencyOnJob(self, job): """ response = self.stub.CreateDependencyOnJob( job_pb2.LayerCreateDependOnJobRequest(layer=self.data, job=job.data), - timeout=Cuebot.Timeout) + timeout=Cuebot.Timeout, + ) return opencue.wrappers.depend.Depend(response.depend) def createDependencyOnLayer(self, layer): @@ -283,8 +331,11 @@ def createDependencyOnLayer(self, layer): :return: the new dependency """ response = self.stub.CreateDependencyOnLayer( - job_pb2.LayerCreateDependOnLayerRequest(layer=self.data, depend_on_layer=layer.data), - timeout=Cuebot.Timeout) + job_pb2.LayerCreateDependOnLayerRequest( + layer=self.data, depend_on_layer=layer.data + ), + timeout=Cuebot.Timeout, + ) return opencue.wrappers.depend.Depend(response.depend) def createDependencyOnFrame(self, frame): @@ -297,7 +348,8 @@ def createDependencyOnFrame(self, frame): """ response = self.stub.CreateDependencyOnFrame( job_pb2.LayerCreateDependOnFrameRequest(layer=self.data, frame=frame.data), - timeout=Cuebot.Timeout) + timeout=Cuebot.Timeout, + ) return opencue.wrappers.depend.Depend(response.depend) def createFrameByFrameDependency(self, layer): @@ -312,8 +364,10 @@ def createFrameByFrameDependency(self, layer): # to LayerOnLayer for better efficiency. response = self.stub.CreateFrameByFrameDependency( job_pb2.LayerCreateFrameByFrameDependRequest( - layer=self.data, depend_layer=layer.data, any_frame=False), - timeout=Cuebot.Timeout) + layer=self.data, depend_layer=layer.data, any_frame=False + ), + timeout=Cuebot.Timeout, + ) return opencue.wrappers.depend.Depend(response.depend) # TODO(gregdenton) Determine if this is needed. (Issue #71) @@ -337,7 +391,8 @@ def registerOutputPath(self, outputPath): """ self.stub.RegisterOutputPath( job_pb2.LayerRegisterOutputPathRequest(layer=self.data, spec=outputPath), - timeout=Cuebot.Timeout) + timeout=Cuebot.Timeout, + ) def reorderFrames(self, frameRange, order): """Reorders the specified frame range on this layer. @@ -348,8 +403,11 @@ def reorderFrames(self, frameRange, order): :param order: First, Last or Reverse """ self.stub.ReorderFrames( - job_pb2.LayerReorderFramesRequest(layer=self.data, range=frameRange, order=order), - timeout=Cuebot.Timeout) + job_pb2.LayerReorderFramesRequest( + layer=self.data, range=frameRange, order=order + ), + timeout=Cuebot.Timeout, + ) def staggerFrames(self, frameRange, stagger): """Staggers the specified frame range on this layer. @@ -360,8 +418,11 @@ def staggerFrames(self, frameRange, stagger): :param stagger: the amount to stagger by """ self.stub.StaggerFrames( - job_pb2.LayerStaggerFramesRequest(layer=self.data, range=frameRange, stagger=stagger), - timeout=Cuebot.Timeout) + job_pb2.LayerStaggerFramesRequest( + layer=self.data, range=frameRange, stagger=stagger + ), + timeout=Cuebot.Timeout, + ) def getLimitDetails(self): """Returns the Limit objects for the given layer. @@ -369,8 +430,12 @@ def getLimitDetails(self): :rtype: list :return: list of limits on this layer """ - return [opencue.wrappers.limit.Limit(limit) for limit in self.stub.GetLimits( - job_pb2.LayerGetLimitsRequest(layer=self.data), timeout=Cuebot.Timeout).limits] + return [ + opencue.wrappers.limit.Limit(limit) + for limit in self.stub.GetLimits( + job_pb2.LayerGetLimitsRequest(layer=self.data), timeout=Cuebot.Timeout + ).limits + ] def id(self): """Returns the id of the layer. @@ -449,6 +514,12 @@ def minGpus(self): :return: Minimum number of gpus required""" return self.data.min_gpus + def slotsRequired(self): + """Returns the number of slots required per frame. + :rtype: int + :return: Number of slots required (<=0 means not slot-based)""" + return self.data.slots_required + def minMemory(self): """Returns the minimum amount of memory that frames in this layer require. @@ -555,8 +626,11 @@ def percentCompleted(self): :return: percentage of frame completion """ try: - return self.data.layer_stats.succeeded_frames / \ - float(self.data.layer_stats.total_frames) * 100.0 + return ( + self.data.layer_stats.succeeded_frames + / float(self.data.layer_stats.total_frames) + * 100.0 + ) except ZeroDivisionError: return 0 diff --git a/pycue/tests/wrappers/test_layer.py b/pycue/tests/wrappers/test_layer.py index 7adb7b11b..033e38e79 100644 --- a/pycue/tests/wrappers/test_layer.py +++ b/pycue/tests/wrappers/test_layer.py @@ -16,28 +16,25 @@ """Tests for `opencue.wrappers.layer`""" -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import +from __future__ import absolute_import, division, print_function + import getpass import os import platform import unittest import mock +from opencue_proto import depend_pb2, job_pb2 -from opencue_proto import depend_pb2 -from opencue_proto import job_pb2 import opencue.wrappers.frame -import opencue.wrappers.layer import opencue.wrappers.job +import opencue.wrappers.layer - -TEST_LAYER_NAME = 'testLayer' -TEST_OUTPUT_PATH = '/path/to/file.txt' +TEST_LAYER_NAME = "testLayer" +TEST_OUTPUT_PATH = "/path/to/file.txt" -@mock.patch('opencue.cuebot.Cuebot.getStub') +@mock.patch("opencue.cuebot.Cuebot.getStub") class LayerTests(unittest.TestCase): """Tests for `opencue.wrappers.layer.Layer`.""" @@ -46,8 +43,7 @@ def testKill(self, getStubMock): stubMock.KillFrames.return_value = job_pb2.LayerKillFramesResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) username = getpass.getuser() pid = os.getpid() host_kill = platform.uname()[1] @@ -55,147 +51,157 @@ def testKill(self, getStubMock): layer.kill(username=username, pid=pid, host_kill=host_kill, reason=reason) stubMock.KillFrames.assert_called_with( - job_pb2.LayerKillFramesRequest(layer=layer.data, - username=username, - pid=str(pid), - host_kill=host_kill, - reason=reason), timeout=mock.ANY) + job_pb2.LayerKillFramesRequest( + layer=layer.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason, + ), + timeout=mock.ANY, + ) def testEat(self, getStubMock): stubMock = mock.Mock() stubMock.EatFrames.return_value = job_pb2.LayerEatFramesResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.eat() stubMock.EatFrames.assert_called_with( - job_pb2.LayerEatFramesRequest(layer=layer.data), timeout=mock.ANY) + job_pb2.LayerEatFramesRequest(layer=layer.data), timeout=mock.ANY + ) def testRetry(self, getStubMock): stubMock = mock.Mock() stubMock.RetryFrames.return_value = job_pb2.LayerRetryFramesResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.retry() stubMock.RetryFrames.assert_called_with( - job_pb2.LayerRetryFramesRequest(layer=layer.data), timeout=mock.ANY) + job_pb2.LayerRetryFramesRequest(layer=layer.data), timeout=mock.ANY + ) def testMarkdone(self, getStubMock): stubMock = mock.Mock() stubMock.MarkdoneFrames.return_value = job_pb2.LayerMarkdoneFramesResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.markdone() stubMock.MarkdoneFrames.assert_called_with( - job_pb2.LayerMarkdoneFramesRequest(layer=layer.data), timeout=mock.ANY) + job_pb2.LayerMarkdoneFramesRequest(layer=layer.data), timeout=mock.ANY + ) def testAddLimit(self, getStubMock): - test_limit_id = 'lll-llll-lll' + test_limit_id = "lll-llll-lll" stubMock = mock.Mock() stubMock.AddLimit.return_value = job_pb2.LayerAddLimitResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.addLimit(test_limit_id) stubMock.AddLimit.assert_called_with( job_pb2.LayerAddLimitRequest(layer=layer.data, limit_id=test_limit_id), - timeout=mock.ANY) + timeout=mock.ANY, + ) def testDropLimit(self, getStubMock): - test_limit_id = 'lll-llll-lll' + test_limit_id = "lll-llll-lll" stubMock = mock.Mock() stubMock.DropLimit.return_value = job_pb2.LayerDropLimitResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.dropLimit(test_limit_id) stubMock.DropLimit.assert_called_with( job_pb2.LayerDropLimitRequest(layer=layer.data, limit_id=test_limit_id), - timeout=mock.ANY) + timeout=mock.ANY, + ) def testEnableMemoryOptimizerTrue(self, getStubMock): stubMock = mock.Mock() - stubMock.EnableMemoryOptimizer.return_value = job_pb2.LayerEnableMemoryOptimizerResponse() + stubMock.EnableMemoryOptimizer.return_value = ( + job_pb2.LayerEnableMemoryOptimizerResponse() + ) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.enableMemoryOptimizer(True) stubMock.EnableMemoryOptimizer.assert_called_with( job_pb2.LayerEnableMemoryOptimizerRequest(layer=layer.data, value=True), - timeout=mock.ANY) + timeout=mock.ANY, + ) def testEnableMemoryOptimizerFalse(self, getStubMock): stubMock = mock.Mock() - stubMock.EnableMemoryOptimizer.return_value = job_pb2.LayerEnableMemoryOptimizerResponse() + stubMock.EnableMemoryOptimizer.return_value = ( + job_pb2.LayerEnableMemoryOptimizerResponse() + ) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.enableMemoryOptimizer(False) stubMock.EnableMemoryOptimizer.assert_called_with( job_pb2.LayerEnableMemoryOptimizerRequest(layer=layer.data, value=False), - timeout=mock.ANY) + timeout=mock.ANY, + ) def testGetFrames(self, getStubMock): stubMock = mock.Mock() stubMock.GetFrames.return_value = job_pb2.LayerGetFramesResponse( - frames=job_pb2.FrameSeq(frames=[job_pb2.Frame(layer_name=TEST_LAYER_NAME)])) + frames=job_pb2.FrameSeq(frames=[job_pb2.Frame(layer_name=TEST_LAYER_NAME)]) + ) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) frames = layer.getFrames() stubMock.GetFrames.assert_called_with( job_pb2.LayerGetFramesRequest( - layer=layer.data, - s=opencue.search.FrameSearch.criteriaFromOptions()), - timeout=mock.ANY) + layer=layer.data, s=opencue.search.FrameSearch.criteriaFromOptions() + ), + timeout=mock.ANY, + ) self.assertEqual(len(frames), 1) self.assertEqual(frames[0].data.layer_name, TEST_LAYER_NAME) def testGetOutputPaths(self, getStubMock): stubMock = mock.Mock() stubMock.GetOutputPaths.return_value = job_pb2.LayerGetOutputPathsResponse( - output_paths=[TEST_OUTPUT_PATH]) + output_paths=[TEST_OUTPUT_PATH] + ) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) outputPaths = layer.getOutputPaths() stubMock.GetOutputPaths.assert_called_with( - job_pb2.LayerGetOutputPathsRequest(layer=layer.data), timeout=mock.ANY) + job_pb2.LayerGetOutputPathsRequest(layer=layer.data), timeout=mock.ANY + ) self.assertEqual(len(outputPaths), 1) self.assertEqual(outputPaths[0], TEST_OUTPUT_PATH) def testSetTags(self, getStubMock): - tags = ['cloud', 'local'] + tags = ["cloud", "local"] stubMock = mock.Mock() stubMock.SetTags.return_value = job_pb2.LayerSetTagsResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.setTags(tags) stubMock.SetTags.assert_called_with( - job_pb2.LayerSetTagsRequest(layer=layer.data, tags=tags), timeout=mock.ANY) + job_pb2.LayerSetTagsRequest(layer=layer.data, tags=tags), timeout=mock.ANY + ) def testSetMaxCores(self, getStubMock): stubMock = mock.Mock() @@ -203,14 +209,14 @@ def testSetMaxCores(self, getStubMock): getStubMock.return_value = stubMock testCores = 100 - testCoresActual = testCores/100.0 - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + testCoresActual = testCores / 100.0 + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.setMaxCores(testCores) stubMock.SetMaxCores.assert_called_with( job_pb2.LayerSetMaxCoresRequest(layer=layer.data, cores=testCoresActual), - timeout=mock.ANY) + timeout=mock.ANY, + ) def testSetMinGpuMemory(self, getStubMock): stubMock = mock.Mock() @@ -218,13 +224,27 @@ def testSetMinGpuMemory(self, getStubMock): getStubMock.return_value = stubMock testCores = 100 - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.setMinGpuMemory(testCores) stubMock.SetMinGpuMemory.assert_called_with( job_pb2.LayerSetMinGpuMemoryRequest(layer=layer.data, gpu_memory=testCores), - timeout=mock.ANY) + timeout=mock.ANY, + ) + + def testSetSlotsRequired(self, getStubMock): + stubMock = mock.Mock() + stubMock.SetSlotsRequired.return_value = job_pb2.LayerSetSlotsRequiredResponse() + getStubMock.return_value = stubMock + + slots = 4 + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer.setSlotsRequired(slots) + + stubMock.SetSlotsRequired.assert_called_with( + job_pb2.LayerSetSlotsRequiredRequest(layer=layer.data, slots=slots), + timeout=mock.ANY, + ) def testSetMinMemory(self, getStubMock): stubMock = mock.Mock() @@ -232,13 +252,13 @@ def testSetMinMemory(self, getStubMock): getStubMock.return_value = stubMock memory = 2048 - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.setMinMemory(memory) stubMock.SetMinMemory.assert_called_with( job_pb2.LayerSetMinMemoryRequest(layer=layer.data, memory=memory), - timeout=mock.ANY) + timeout=mock.ANY, + ) def testSetThreadable(self, getStubMock): stubMock = mock.Mock() @@ -246,173 +266,192 @@ def testSetThreadable(self, getStubMock): getStubMock.return_value = stubMock value = True - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.setThreadable(value) stubMock.SetThreadable.assert_called_with( job_pb2.LayerSetThreadableRequest(layer=layer.data, threadable=value), - timeout=mock.ANY) + timeout=mock.ANY, + ) def testGetWhatDependsOnThis(self, getStubMock): - dependId = 'dddd-ddd-dddd' + dependId = "dddd-ddd-dddd" stubMock = mock.Mock() - stubMock.GetWhatDependsOnThis.return_value = job_pb2.LayerGetWhatDependsOnThisResponse( - depends=depend_pb2.DependSeq(depends=[depend_pb2.Depend(id=dependId)])) + stubMock.GetWhatDependsOnThis.return_value = ( + job_pb2.LayerGetWhatDependsOnThisResponse( + depends=depend_pb2.DependSeq(depends=[depend_pb2.Depend(id=dependId)]) + ) + ) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) depends = layer.getWhatDependsOnThis() stubMock.GetWhatDependsOnThis.assert_called_with( - job_pb2.LayerGetWhatDependsOnThisRequest(layer=layer.data), - timeout=mock.ANY) + job_pb2.LayerGetWhatDependsOnThisRequest(layer=layer.data), timeout=mock.ANY + ) self.assertEqual(len(depends), 1) self.assertEqual(depends[0].id(), dependId) def testGetWhatThisDependsOn(self, getStubMock): - dependId = 'dddd-ddd-dddd' + dependId = "dddd-ddd-dddd" stubMock = mock.Mock() - stubMock.GetWhatThisDependsOn.return_value = job_pb2.LayerGetWhatThisDependsOnResponse( - depends=depend_pb2.DependSeq(depends=[depend_pb2.Depend(id=dependId)])) + stubMock.GetWhatThisDependsOn.return_value = ( + job_pb2.LayerGetWhatThisDependsOnResponse( + depends=depend_pb2.DependSeq(depends=[depend_pb2.Depend(id=dependId)]) + ) + ) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) depends = layer.getWhatThisDependsOn() stubMock.GetWhatThisDependsOn.assert_called_with( - job_pb2.LayerGetWhatThisDependsOnRequest(layer=layer.data), - timeout=mock.ANY) + job_pb2.LayerGetWhatThisDependsOnRequest(layer=layer.data), timeout=mock.ANY + ) self.assertEqual(len(depends), 1) self.assertEqual(depends[0].id(), dependId) def testCreateDependencyOnJob(self, getStubMock): - dependId = 'dddd-ddd-dddd' - jobId = 'jjjj-jjj-jjjj' + dependId = "dddd-ddd-dddd" + jobId = "jjjj-jjj-jjjj" stubMock = mock.Mock() - stubMock.CreateDependencyOnJob.return_value = job_pb2.LayerCreateDependOnJobResponse( - depend=depend_pb2.Depend(id=dependId)) + stubMock.CreateDependencyOnJob.return_value = ( + job_pb2.LayerCreateDependOnJobResponse( + depend=depend_pb2.Depend(id=dependId) + ) + ) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) - job = opencue.wrappers.job.Job( - job_pb2.Job(id=jobId)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + job = opencue.wrappers.job.Job(job_pb2.Job(id=jobId)) depend = layer.createDependencyOnJob(job) stubMock.CreateDependencyOnJob.assert_called_with( job_pb2.LayerCreateDependOnJobRequest(layer=layer.data, job=job.data), - timeout=mock.ANY) + timeout=mock.ANY, + ) self.assertEqual(depend.id(), dependId) def testCreateDependencyOnLayer(self, getStubMock): - dependId = 'dddd-ddd-dddd' - layerId = 'llll-lll-llll' + dependId = "dddd-ddd-dddd" + layerId = "llll-lll-llll" stubMock = mock.Mock() - stubMock.CreateDependencyOnLayer.return_value = job_pb2.LayerCreateDependOnLayerResponse( - depend=depend_pb2.Depend(id=dependId)) + stubMock.CreateDependencyOnLayer.return_value = ( + job_pb2.LayerCreateDependOnLayerResponse( + depend=depend_pb2.Depend(id=dependId) + ) + ) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) - dependLayer = opencue.wrappers.layer.Layer( - job_pb2.Layer(id=layerId)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + dependLayer = opencue.wrappers.layer.Layer(job_pb2.Layer(id=layerId)) depend = layer.createDependencyOnLayer(dependLayer) stubMock.CreateDependencyOnLayer.assert_called_with( - job_pb2.LayerCreateDependOnLayerRequest(layer=layer.data, - depend_on_layer=dependLayer.data), - timeout=mock.ANY) + job_pb2.LayerCreateDependOnLayerRequest( + layer=layer.data, depend_on_layer=dependLayer.data + ), + timeout=mock.ANY, + ) self.assertEqual(depend.id(), dependId) def testCreateDependencyOnFrame(self, getStubMock): - dependId = 'dddd-ddd-dddd' - frameId = 'ffff-fff-ffff' + dependId = "dddd-ddd-dddd" + frameId = "ffff-fff-ffff" stubMock = mock.Mock() - stubMock.CreateDependencyOnFrame.return_value = job_pb2.LayerCreateDependOnFrameResponse( - depend=depend_pb2.Depend(id=dependId)) + stubMock.CreateDependencyOnFrame.return_value = ( + job_pb2.LayerCreateDependOnFrameResponse( + depend=depend_pb2.Depend(id=dependId) + ) + ) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) - frame = opencue.wrappers.frame.Frame( - job_pb2.Frame(id=frameId)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + frame = opencue.wrappers.frame.Frame(job_pb2.Frame(id=frameId)) depend = layer.createDependencyOnFrame(frame) stubMock.CreateDependencyOnFrame.assert_called_with( job_pb2.LayerCreateDependOnFrameRequest(layer=layer.data, frame=frame.data), - timeout=mock.ANY) + timeout=mock.ANY, + ) self.assertEqual(depend.id(), dependId) def testCreateFrameByFrameDependency(self, getStubMock): - dependId = 'dddd-ddd-dddd' - layerId = 'llll-lll-llll' + dependId = "dddd-ddd-dddd" + layerId = "llll-lll-llll" stubMock = mock.Mock() - stubMock.CreateFrameByFrameDependency.return_value = \ - job_pb2.LayerCreateFrameByFrameDependResponse(depend=depend_pb2.Depend(id=dependId)) + stubMock.CreateFrameByFrameDependency.return_value = ( + job_pb2.LayerCreateFrameByFrameDependResponse( + depend=depend_pb2.Depend(id=dependId) + ) + ) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) - dependLayer = opencue.wrappers.layer.Layer( - job_pb2.Layer(id=layerId)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + dependLayer = opencue.wrappers.layer.Layer(job_pb2.Layer(id=layerId)) depend = layer.createFrameByFrameDependency(dependLayer) stubMock.CreateFrameByFrameDependency.assert_called_with( - job_pb2.LayerCreateFrameByFrameDependRequest(layer=layer.data, - depend_layer=dependLayer.data, - any_frame=False), - timeout=mock.ANY) + job_pb2.LayerCreateFrameByFrameDependRequest( + layer=layer.data, depend_layer=dependLayer.data, any_frame=False + ), + timeout=mock.ANY, + ) self.assertEqual(depend.id(), dependId) def testRegisterOutputPath(self, getStubMock): stubMock = mock.Mock() - stubMock.RegisterOutputPath.return_value = job_pb2.LayerRegisterOutputPathResponse() + stubMock.RegisterOutputPath.return_value = ( + job_pb2.LayerRegisterOutputPathResponse() + ) getStubMock.return_value = stubMock - outputPath = '/test/output/path' - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + outputPath = "/test/output/path" + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.registerOutputPath(outputPath) stubMock.RegisterOutputPath.assert_called_with( job_pb2.LayerRegisterOutputPathRequest(layer=layer.data, spec=outputPath), - timeout=mock.ANY) + timeout=mock.ANY, + ) def testReorderFrames(self, getStubMock): stubMock = mock.Mock() stubMock.ReorderFrames.return_value = job_pb2.LayerReorderFramesResponse() getStubMock.return_value = stubMock - frameRange = '1-10' + frameRange = "1-10" order = job_pb2.REVERSE layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.reorderFrames(frameRange, order) stubMock.ReorderFrames.assert_called_with( - job_pb2.LayerReorderFramesRequest(layer=layer.data, range=frameRange, order=order), - timeout=mock.ANY) + job_pb2.LayerReorderFramesRequest( + layer=layer.data, range=frameRange, order=order + ), + timeout=mock.ANY, + ) def testStaggerFrames(self, getStubMock): stubMock = mock.Mock() stubMock.StaggerFrames.return_value = job_pb2.LayerStaggerFramesResponse() getStubMock.return_value = stubMock - frameRange = '1-10' + frameRange = "1-10" stagger = 4 - layer = opencue.wrappers.layer.Layer( - job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.staggerFrames(frameRange, stagger) stubMock.StaggerFrames.assert_called_with( - job_pb2.LayerStaggerFramesRequest(layer=layer.data, range=frameRange, stagger=stagger), - timeout=mock.ANY) + job_pb2.LayerStaggerFramesRequest( + layer=layer.data, range=frameRange, stagger=stagger + ), + timeout=mock.ANY, + ) class LayerEnumTests(unittest.TestCase): - def testLayerType(self): self.assertEqual(opencue.api.Layer.LayerType.PRE, job_pb2.PRE) self.assertEqual(opencue.api.Layer.LayerType.PRE, 0) @@ -422,5 +461,5 @@ def testOrder(self): self.assertEqual(opencue.api.Layer.Order.LAST, 1) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/pyoutline/outline/backend/cue.py b/pyoutline/outline/backend/cue.py index ce88f3692..5d05d4c91 100644 --- a/pyoutline/outline/backend/cue.py +++ b/pyoutline/outline/backend/cue.py @@ -20,22 +20,19 @@ See outline.backend.__init__.py for a description of the PyOutline backend system. """ -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import +from __future__ import absolute_import, division, print_function -from builtins import str import logging import os import sys import time +from builtins import str from xml.dom.minidom import parseString from xml.etree import ElementTree as Et -from packaging.version import Version - import FileSequence import opencue +from packaging.version import Version import outline import outline.depend @@ -43,12 +40,9 @@ import outline.util import outline.versions.main - logger = logging.getLogger("outline.backend.cue") -__all__ = ["launch", - "serialize", - "serialize_simple"] +__all__ = ["launch", "serialize", "serialize_simple"] JOB_WAIT_PERIOD_SEC = 5 @@ -84,13 +78,15 @@ def build_command(launcher, layer): wrapper = "%s/opencue_wrap_frame" % outline.config.get("outline", "wrapper_dir") else: wrapper = "%s/opencue_wrap_frame_no_ss" % outline.config.get( - "outline", "wrapper_dir") + "outline", "wrapper_dir" + ) command.append(wrapper) command.append(outline.config.get("outline", "user_dir")) command.append("%s/pycuerun" % outline.config.get("outline", "bin_dir")) - command.append("%s -e #IFRAME#-%s" % (launcher.get_outline().get_path(), - layer.get_name())) + command.append( + "%s -e #IFRAME#-%s" % (launcher.get_outline().get_path(), layer.get_name()) + ) command.append("--version %s" % outline.versions.get_version("outline")) command.append("--repos %s" % outline.versions.get_repos()) command.append("--debug") @@ -152,15 +148,20 @@ def test(job): job = opencue.api.getJob(job.name()) if job.data.job_stats.dead_frames + job.data.job_stats.eaten_frames > 0: raise outline.exception.OutlineException( - "Job test failed, dead or eaten frames on: %s" % job.data.name) + "Job test failed, dead or eaten frames on: %s" % job.data.name + ) if job.data.state == opencue.api.job_pb2.FINISHED: break logger.debug( - "waiting on %s job to complete: %d/%d", job.data.name, - job.data.job_stats.succeeded_frames, job.data.job_stats.total_frames) + "waiting on %s job to complete: %d/%d", + job.data.name, + job.data.job_stats.succeeded_frames, + job.data.job_stats.total_frames, + ) except opencue.CueException as ie: raise outline.exception.OutlineException( - "test for job %s failed: %s" % (job.data.name, ie)) + "test for job %s failed: %s" % (job.data.name, ie) + ) time.sleep(5) finally: job.kill() @@ -178,13 +179,17 @@ def wait(job): if not opencue.api.isJobPending(job.data.name): break logger.debug( - "waiting on %s job to complete: %d/%d", job.data.name, - job.data.job_stats.succeeded_frames, job.data.job_stats.total_frames) + "waiting on %s job to complete: %d/%d", + job.data.name, + job.data.job_stats.succeeded_frames, + job.data.job_stats.total_frames, + ) except opencue.CueException as ie: print( - "opencue error waiting on job: %s, %s. Will continue to wait." % ( - job.data.name, ie), - file=sys.stderr) + "opencue error waiting on job: %s, %s. Will continue to wait." + % (job.data.name, ie), + file=sys.stderr, + ) time.sleep(JOB_WAIT_PERIOD_SEC) @@ -246,8 +251,9 @@ def _serialize(launcher, use_pycuerun): user = outline.util.get_user() sub_element(root, "user", user) if not launcher.get("nomail"): - sub_element(root, "email", "%s@%s" % (user, - outline.config.get("outline", "domain"))) + sub_element( + root, "email", "%s@%s" % (user, outline.config.get("outline", "domain")) + ) sub_element(root, "uid", str(outline.util.get_uid())) j = Et.SubElement(root, "job", {"name": ol.get_name()}) @@ -287,7 +293,6 @@ def _serialize(launcher, use_pycuerun): layers = Et.SubElement(j, "layers") for layer in ol.get_layers(): - # Unregistered layers are in the job but don't show up on the cue. if not layer.get_arg("register"): continue @@ -301,16 +306,19 @@ def _serialize(launcher, use_pycuerun): # that layer. frame_range = layer.get_frame_range() if not frame_range: - logger.info("Skipping layer %s, its range (%s) does not intersect " - "with ol range %s", layer, layer.get_arg("range"), ol.get_frame_range()) + logger.info( + "Skipping layer %s, its range (%s) does not intersect with ol range %s", + layer, + layer.get_arg("range"), + ol.get_frame_range(), + ) continue - spec_layer = Et.SubElement(layers, "layer", - {"name": layer.get_name(), - "type": layer.get_type()}) + spec_layer = Et.SubElement( + layers, "layer", {"name": layer.get_name(), "type": layer.get_type()} + ) if use_pycuerun: - sub_element(spec_layer, "cmd", - " ".join(build_command(launcher, layer))) + sub_element(spec_layer, "cmd", " ".join(build_command(launcher, layer))) else: sub_element(spec_layer, "cmd", " ".join(layer.get_arg("command"))) sub_element(spec_layer, "range", str(frame_range)) @@ -328,12 +336,15 @@ def _serialize(launcher, use_pycuerun): else: logger.debug("%s is set to override service cores.", layer.get_name()) if layer.is_arg_set("cores") and layer.is_arg_set("threads"): - logger.warning("%s has both cores and threads. Use cores.", layer.get_name()) + logger.warning( + "%s has both cores and threads. Use cores.", layer.get_name() + ) sub_element(spec_layer, "cores", "%0.1f" % float(cores)) if layer.is_arg_set("threadable"): - sub_element(spec_layer, "threadable", - bool_to_str(layer.get_arg("threadable"))) + sub_element( + spec_layer, "threadable", bool_to_str(layer.get_arg("threadable")) + ) if layer.get_arg("memory"): sub_element(spec_layer, "memory", "%s" % (layer.get_arg("memory"))) @@ -370,13 +381,24 @@ def _serialize(launcher, use_pycuerun): if layer.get_arg("timeout_llu"): if spec_version >= Version("1.10"): - sub_element(spec_layer, "timeout_llu", "%s" % (layer.get_arg("timeout_llu"))) + sub_element( + spec_layer, "timeout_llu", "%s" % (layer.get_arg("timeout_llu")) + ) else: _warning_spec_version(spec_version, "timeout_llu") + if layer.get_arg("slots_required"): + if spec_version >= Version("1.16"): + sub_element( + spec_layer, + "slots_required", + "%s" % (layer.get_arg("slots_required")), + ) + else: + _warning_spec_version(spec_version, "slots_required") + if os.environ.get("OL_TAG_OVERRIDE", False): - sub_element(spec_layer, "tags", - scrub_tags(os.environ["OL_TAG_OVERRIDE"])) + sub_element(spec_layer, "tags", scrub_tags(os.environ["OL_TAG_OVERRIDE"])) elif layer.get_arg("tags"): sub_element(spec_layer, "tags", scrub_tags(layer.get_arg("tags"))) @@ -412,7 +434,8 @@ def _serialize(launcher, use_pycuerun): if not layers: raise outline.exception.OutlineException( "Failed to launch job. There are no layers with frame " - "ranges that intersect the job's frame range: %s" % ol.get_frame_range()) + "ranges that intersect the job's frame range: %s" % ol.get_frame_range() + ) # Dependencies go after all of the layers root.append(depends) @@ -421,7 +444,7 @@ def _serialize(launcher, use_pycuerun): '', '' % spec_version, - Et.tostring(root).decode() + Et.tostring(root).decode(), ] result = "".join(xml) @@ -434,8 +457,7 @@ def scrub_tags(tags): Ensure that layer tags pass in as a string are formatted properly. """ if isinstance(tags, str): - tags = [tag.strip() for tag in tags.split("|") - if tag.strip().isalnum()] + tags = [tag.strip() for tag in tags.split("|") if tag.strip().isalnum()] return " | ".join(tags) @@ -455,21 +477,25 @@ def build_dependencies(ol, layer, all_depends): add them to the job spec. """ for dep in layer.get_depends(): - - depend = Et.SubElement(all_depends, "depend", - type=dep.get_type(), - anyframe=bool_to_str(dep.is_any_frame())) + depend = Et.SubElement( + all_depends, + "depend", + type=dep.get_type(), + anyframe=bool_to_str(dep.is_any_frame()), + ) if dep.get_type() == outline.depend.DependType.LayerOnSimFrame: - frame_range = dep.get_depend_on_layer().get_frame_range() first_frame = FileSequence.FrameSet(frame_range)[0] sub_element(depend, "depjob", ol.get_name()) sub_element(depend, "deplayer", layer.get_name()) sub_element(depend, "onjob", ol.get_name()) - sub_element(depend, "onframe", "%04d-%s" - % (first_frame, dep.get_depend_on_layer().get_name())) + sub_element( + depend, + "onframe", + "%04d-%s" % (first_frame, dep.get_depend_on_layer().get_name()), + ) else: sub_element(depend, "depjob", ol.get_name()) sub_element(depend, "deplayer", layer.get_name()) diff --git a/pyoutline/outline/layer.py b/pyoutline/outline/layer.py index 856850ba7..fc41872a4 100644 --- a/pyoutline/outline/layer.py +++ b/pyoutline/outline/layer.py @@ -15,25 +15,22 @@ """Base classes for all outline modules.""" -from __future__ import annotations -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division +from __future__ import absolute_import, annotations, division, print_function +import logging import os import sys -import logging import tempfile from typing import ( - TypedDict, - List, - Optional, + Any, Callable, Dict, - Any, - Union, - Tuple, + List, + Optional, Set, + Tuple, + TypedDict, + Union, ) import FileSequence @@ -47,9 +44,9 @@ import outline.util if sys.version_info >= (3, 12): - from typing import override, Unpack + from typing import Unpack, override else: - from typing_extensions import override, Unpack + from typing_extensions import Unpack, override __all__ = [ "Layer", @@ -110,6 +107,8 @@ class _LayerArgs(TypedDict, total=False): # timeout_llu: Timeout for long last update in seconds # before considering a frame hung timeout_llu: int + # slots_required: Number of slots required per frame (<=0 means not slot-based) + slots_required: int type: outline.constants.LayerType # The layer type (Render, Util, Post) @@ -1294,11 +1293,15 @@ class LayerPostProcess(Frame): the parent and the post process. """ - def __init__(self, creator: Layer, propigate: bool = True, **args: Unpack[_LayerArgs]) -> None: + def __init__( + self, creator: Layer, propigate: bool = True, **args: Unpack[_LayerArgs] + ) -> None: super().__init__(f"{creator.get_name()}_postprocess", **args) self.__creator = creator - self.depend_on(creator, outline.depend.DependType.LayerOnLayer, propigate=propigate) + self.depend_on( + creator, outline.depend.DependType.LayerOnLayer, propigate=propigate + ) self.set_type(outline.constants.LayerType.UTIL) From b3b90cfe8fb354018787362086ac93c86b63d9cb Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Fri, 12 Dec 2025 10:23:39 -0800 Subject: [PATCH 05/17] [rework] Handle hardcoded values --- rust/crates/scheduler/src/host_cache/cache.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/crates/scheduler/src/host_cache/cache.rs b/rust/crates/scheduler/src/host_cache/cache.rs index 4d76b2ddc..4dcbb5d5b 100644 --- a/rust/crates/scheduler/src/host_cache/cache.rs +++ b/rust/crates/scheduler/src/host_cache/cache.rs @@ -161,6 +161,7 @@ impl HostCache { ResourceRequest::Slots(slots) => self // Request a host with minimum requirements as the remove logic already accounts for // limiting slots + // TODO: Replace and consider hardcoded values .remove_host(CoreSize(1), ByteSize::mib(256), slots, validation) .ok_or(HostCacheError::NoCandidateAvailable)?, }; From 4cee5d07e4c9569b283bf7787c5d185b876e7df4 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 17 Dec 2025 11:02:23 -0800 Subject: [PATCH 06/17] Clean up warning from rust modules --- rust/crates/rqd/src/system/linux.rs | 6 +++--- rust/crates/scheduler/src/host_cache/cache.rs | 2 -- rust/crates/scheduler/src/host_cache/messages.rs | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/rust/crates/rqd/src/system/linux.rs b/rust/crates/rqd/src/system/linux.rs index 678ce2b01..721a3569d 100644 --- a/rust/crates/rqd/src/system/linux.rs +++ b/rust/crates/rqd/src/system/linux.rs @@ -14,8 +14,8 @@ use libc::{_SC_CLK_TCK, _SC_PAGESIZE}; use chrono::{DateTime, Local}; use dashmap::{DashMap, DashSet}; use itertools::Itertools; -use miette::{Context, IntoDiagnostic, Result, miette}; -use nix::sys::signal::{Signal, kill, killpg}; +use miette::{miette, Context, IntoDiagnostic, Result}; +use nix::sys::signal::{kill, killpg, Signal}; use opencue_proto::{ host::HardwareState, report::{ChildrenProcStats, ProcStats, Stat}, @@ -954,7 +954,7 @@ impl SystemManager for LinuxSystem { #[cfg(test)] mod tests { - use crate::config::{MachineConfig, MemoryMetric}; + use crate::config::MachineConfig; use std::fs; use std::{collections::HashMap, sync::Mutex}; diff --git a/rust/crates/scheduler/src/host_cache/cache.rs b/rust/crates/scheduler/src/host_cache/cache.rs index 4dcbb5d5b..bd580536f 100644 --- a/rust/crates/scheduler/src/host_cache/cache.rs +++ b/rust/crates/scheduler/src/host_cache/cache.rs @@ -23,10 +23,8 @@ use std::{ cell::RefCell, collections::{BTreeMap, HashSet}, - rc::Rc, sync::RwLock, time::{Duration, SystemTime}, - u32, }; use bytesize::ByteSize; diff --git a/rust/crates/scheduler/src/host_cache/messages.rs b/rust/crates/scheduler/src/host_cache/messages.rs index ce85d5c5a..56c74aad8 100644 --- a/rust/crates/scheduler/src/host_cache/messages.rs +++ b/rust/crates/scheduler/src/host_cache/messages.rs @@ -1,13 +1,12 @@ use actix::{Message, MessageResponse}; -use bytesize::ByteSize; use miette::Result; use uuid::Uuid; use crate::{ cluster_key::{ClusterKey, Tag}, host_cache::HostCacheError, - models::{CoreSize, Host, ResourceRequest}, + models::{Host, ResourceRequest}, }; /// Response containing a checked-out host and its associated cluster key. From 02b1f20ce436bf73b1d18e4dc9847bcae4a5a912 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 17 Dec 2025 15:47:38 -0800 Subject: [PATCH 07/17] [pycue/cuegui/cuebot] Add concurrent_procs_limit Add a menu action for setting a host's slot limit. When a limit is defined, booking will only allocate layers with slots_required > 0 to be executed on this host. Which means regular booking by cores/memory/gpu becomes disabled. (0 for no limit, >0 for specific limit) Changes: - Add new proto field to Host and NestedHost - Change pycue to allow setting concurrent_procs_limit - Change cuegui action menu to add an option to update the new field - Update Cuebot to receive the request and update the database --- .../com/imageworks/spcue/dao/HostDao.java | 8 +++++ .../spcue/dao/postgres/HostDaoJdbc.java | 6 ++++ .../imageworks/spcue/servant/ManageHost.java | 11 ++++++ .../imageworks/spcue/service/HostManager.java | 8 +++++ .../spcue/service/HostManagerService.java | 5 +++ .../migrations/V35__Add_host_frame_limit.sql | 2 +- cuegui/cuegui/HostMonitorTree.py | 1 + cuegui/cuegui/MenuActions.py | 36 +++++++++++++++++++ proto/src/host.proto | 17 +++++++++ pycue/opencue/wrappers/host.py | 9 +++++ 10 files changed, 102 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java index 5354dd9b8..e2269e3a2 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java @@ -243,6 +243,14 @@ public interface HostDao { */ void updateThreadMode(HostInterface host, ThreadMode mode); + /** + * Update the host's concurrent procs limit. + * + * @param host HostInterface + * @param limit int (0 for no limit) + */ + void updateConcurrentSlotsLimit(HostInterface host, int limit); + /** * Update the specified host's hardware information. * diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java index a83256f6a..32a2d822d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java @@ -563,6 +563,12 @@ public void updateThreadMode(HostInterface host, ThreadMode mode) { mode.getNumber(), host.getHostId()); } + @Override + public void updateConcurrentSlotsLimit(HostInterface host, int limit) { + getJdbcTemplate().update("UPDATE host SET int_concurrent_slots_limit=? WHERE pk_host=?", + limit, host.getHostId()); + } + @Override public void updateHostOs(HostInterface host, String os) { getJdbcTemplate().update("UPDATE host_stat SET str_os=? WHERE pk_host=?", os, diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageHost.java b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageHost.java index 5732af62d..9ed1a25a2 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/ManageHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/ManageHost.java @@ -74,6 +74,8 @@ import com.imageworks.spcue.grpc.host.HostSetHardwareStateRequest; import com.imageworks.spcue.grpc.host.HostSetHardwareStateResponse; import com.imageworks.spcue.grpc.host.HostSetOsRequest; +import com.imageworks.spcue.grpc.host.HostSetConcurrentSlotsLimitRequest; +import com.imageworks.spcue.grpc.host.HostSetConcurrentSlotsLimitResponse; import com.imageworks.spcue.grpc.host.HostSetOsResponse; import com.imageworks.spcue.grpc.host.HostSetThreadModeRequest; import com.imageworks.spcue.grpc.host.HostSetThreadModeResponse; @@ -323,6 +325,15 @@ public void setOs(HostSetOsRequest request, responseObserver.onCompleted(); } + @Override + public void setConcurrentSlotsLimit(HostSetConcurrentSlotsLimitRequest request, + StreamObserver responseObserver) { + HostInterface host = getHostInterface(request.getHost()); + hostManager.setConcurrentSlotsLimit(host, request.getLimit()); + responseObserver.onNext(HostSetConcurrentSlotsLimitResponse.newBuilder().build()); + responseObserver.onCompleted(); + } + public HostManager getHostManager() { return hostManager; } diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java b/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java index 8568f66a6..47ab6c508 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java @@ -67,6 +67,14 @@ public interface HostManager { */ void setHostFreeTempDir(HostInterface host, Long freeTempDir); + /** + * Updates the concurrent procs limit of a host. + * + * @param host HostInterface + * @param limit int + */ + void setConcurrentSlotsLimit(HostInterface host, int limit); + DispatchHost createHost(HostReport report); DispatchHost createHost(RenderHost host); diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java index d58cd86ad..094dba5f8 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java @@ -94,6 +94,11 @@ public void setHostFreeTempDir(HostInterface host, Long freeTempDir) { hostDao.updateHostFreeTempDir(host, freeTempDir); } + @Override + public void setConcurrentSlotsLimit(HostInterface host, int limit) { + hostDao.updateConcurrentSlotsLimit(host, limit); + } + public void rebootWhenIdle(HostInterface host) { try { hostDao.updateHostState(host, HardwareState.REBOOT_WHEN_IDLE); diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql index bc5ea2b18..7ef883933 100644 --- a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql @@ -1,7 +1,7 @@ -- Add a field to limit the max amount of concurrent procs a host can run -- -1 means no limit alter table host - add int_concurrent_procs_limit INT NOT NULL DEFAULT -1; + add int_concurrent_slots_limit INT NOT NULL DEFAULT -1; alter table host_stat add int_running_procs INT NOT NULL DEFAULT 0; diff --git a/cuegui/cuegui/HostMonitorTree.py b/cuegui/cuegui/HostMonitorTree.py index 207126214..1aa33761b 100644 --- a/cuegui/cuegui/HostMonitorTree.py +++ b/cuegui/cuegui/HostMonitorTree.py @@ -290,6 +290,7 @@ def contextMenuEvent(self, e): self.__menuActions.hosts().addAction(menu, "removeTags") self.__menuActions.hosts().addAction(menu, "renameTag") self.__menuActions.hosts().addAction(menu, "changeAllocation") + self.__menuActions.hosts().addAction(menu, "setConcurrentSlotsLimit") self.__menuActions.hosts().addAction(menu, "delete") self.__menuActions.hosts().addAction(menu, "rebootWhenIdle") self.__menuActions.hosts().addAction(menu, "setRepair") diff --git a/cuegui/cuegui/MenuActions.py b/cuegui/cuegui/MenuActions.py index fcdae5d41..bc11c444e 100644 --- a/cuegui/cuegui/MenuActions.py +++ b/cuegui/cuegui/MenuActions.py @@ -1936,6 +1936,42 @@ def setThreadModeVariable(self, rpcObjects=None): host.setThreadMode("VARIABLE") self._update() + setConcurrentSlotsLimit_info = ["Update Slot Limit...", None, "configure"] + def setConcurrentSlotsLimit(self, rpcObjects=None): + """Set the concurrent slots limit for selected hosts.""" + hosts = self._getOnlyHostObjects(rpcObjects) + if not hosts: + return + + # Get current value from first selected host + current = hosts[0].concurrentSlotsLimit() if len(hosts) == 1 else 0 + + title = "Set Concurrent Slots Limit" + body = "Enter maximum concurrent slots (usually a frame consumes 1 slot, " \ + "the value can be configured on its layer's slot_required field)\n" \ + "When a limit is defined, booking will only allocate layers with " \ + "slots_required > 0 to be executed on this host. Which means regular booking by " \ + "cores/memory/gpu becomes disabled.\n" \ + "(0 for no limit, >0 for specific limit):" + + (value, choice) = QtWidgets.QInputDialog.getInt( + self._caller, + title, + body, + current, # current value + 0, # minimum value + 10000, # maximum value + 1, # step + ) + + if choice: + for host in hosts: + self.cuebotCall( + host.setConcurrentSlotsLimit, + "Set Concurrent Procs Limit on %s Failed" % host.data.name, + int(value), + ) + self._update() class ProcActions(AbstractActions): """Actions for procs.""" diff --git a/proto/src/host.proto b/proto/src/host.proto index b321fdec1..1fa1f9281 100644 --- a/proto/src/host.proto +++ b/proto/src/host.proto @@ -96,6 +96,9 @@ service HostInterface { // Changes the host's [ThreadMode] rpc SetThreadMode(HostSetThreadModeRequest) returns (HostSetThreadModeResponse); + // Set the maximum concurrent procs limit for the host. + rpc SetConcurrentSlotsLimit(HostSetConcurrentSlotsLimitRequest) returns (HostSetConcurrentSlotsLimitResponse); + // Unlocks the host for booking if the proc is in the Locked state. You cannot unlock a NimbyLocked proc. rpc Unlock(HostUnlockRequest) returns (HostUnlockResponse); } @@ -274,6 +277,11 @@ message Host { ThreadMode thread_mode = 27; float gpus = 28; float idle_gpus = 29; + + // When a limit is defined, booking will only allocate layers with slots_required > 0 to be + // executed on this host. Which means regular booking by cores/memory/gpu becomes disabled. + // (0 for no limit, >0 for specific limit) + int32 concurrent_procs_limit = 30; } message HostSearchCriteria { @@ -321,6 +329,7 @@ message NestedHost { NestedProcSeq procs = 28; float gpus = 29; float idle_gpus = 30; + int32 concurrent_procs_limit = 31; } message NestedHostSeq { @@ -636,6 +645,14 @@ message HostSetThreadModeRequest { message HostSetThreadModeResponse {} // Empty +// SetConcurrentSlotsLimit +message HostSetConcurrentSlotsLimitRequest { + Host host = 1; + int32 limit = 2; +} + +message HostSetConcurrentSlotsLimitResponse {} // Empty + // Unlock message HostUnlockRequest { Host host = 1; diff --git a/pycue/opencue/wrappers/host.py b/pycue/opencue/wrappers/host.py index c6fda1e87..8c2d6ad7e 100644 --- a/pycue/opencue/wrappers/host.py +++ b/pycue/opencue/wrappers/host.py @@ -633,6 +633,15 @@ def os(self): """ return self.data.os + def concurrentSlotsLimit(self): + """Returns the concurrent procs limit. + + :rtype: int + :return: the concurrent procs limit (0 = no limit) + """ + return self.data.concurrent_procs_limit + + class NestedHost(Host): """This class contains information and actions related to a nested host.""" From ed316e1bd3adec5d40d8425c02e987997b75cd85 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Thu, 18 Dec 2025 09:39:41 -0800 Subject: [PATCH 08/17] Rename concurrent_procs to concurrent slots --- .gitignore | 1 + cuebot/build.gradle | 1 + cuegui/cuegui/MenuActions.py | 2 +- proto/src/host.proto | 764 +++++++++--------- pycue/opencue/wrappers/host.py | 11 +- pycue/opencue/wrappers/layer.py | 229 ++---- pyoutline/outline/backend/cue.py | 114 ++- rust/crates/scheduler/src/dao/host_dao.rs | 8 +- rust/crates/scheduler/src/host_cache/cache.rs | 4 +- rust/crates/scheduler/src/host_cache/store.rs | 2 +- rust/crates/scheduler/src/models/host.rs | 4 +- 11 files changed, 535 insertions(+), 605 deletions(-) diff --git a/.gitignore b/.gitignore index a1f68081a..4c8dfbf22 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ sandbox/kafka-data sandbox/zookeeper-data sandbox/zookeeper-logs sandbox/rqd/shots/ +sandbox/pgadmin-data docs/_data/version.yml target/* diff --git a/cuebot/build.gradle b/cuebot/build.gradle index e944402a7..bbbef89f2 100644 --- a/cuebot/build.gradle +++ b/cuebot/build.gradle @@ -88,6 +88,7 @@ compileTestJava { options.compilerArgs << "-Xlint:all,-serial" << "-Werror" } + protobuf { protoc { // The protoc compiler diff --git a/cuegui/cuegui/MenuActions.py b/cuegui/cuegui/MenuActions.py index bc11c444e..7f26da00e 100644 --- a/cuegui/cuegui/MenuActions.py +++ b/cuegui/cuegui/MenuActions.py @@ -1968,7 +1968,7 @@ def setConcurrentSlotsLimit(self, rpcObjects=None): for host in hosts: self.cuebotCall( host.setConcurrentSlotsLimit, - "Set Concurrent Procs Limit on %s Failed" % host.data.name, + "Set Concurrent Slots Limit on %s Failed" % host.data.name, int(value), ) self._update() diff --git a/proto/src/host.proto b/proto/src/host.proto index 1fa1f9281..cc6339f25 100644 --- a/proto/src/host.proto +++ b/proto/src/host.proto @@ -1,713 +1,707 @@ - syntax = "proto3"; package host; -option java_package = "com.imageworks.spcue.grpc.host"; -option java_multiple_files = true; - import "comment.proto"; import "criterion.proto"; import "job.proto"; import "renderPartition.proto"; option go_package = "opencue_gateway/gen/go"; +option java_multiple_files = true; +option java_package = "com.imageworks.spcue.grpc.host"; // -------- Services --------] service DeedInterface { - // Remove the deed - rpc Delete(DeedDeleteRequest) returns (DeedDeleteResponse); + // Remove the deed + rpc Delete(DeedDeleteRequest) returns (DeedDeleteResponse); - // Returns the full host for these settings. - rpc GetHost(DeedGetHostRequest) returns (DeedGetHostResponse); + // Returns the full host for these settings. + rpc GetHost(DeedGetHostRequest) returns (DeedGetHostResponse); - // Returns the owner for these settings. - rpc GetOwner(DeedGetOwnerRequest) returns (DeedGetOwnerResponse); + // Returns the owner for these settings. + rpc GetOwner(DeedGetOwnerRequest) returns (DeedGetOwnerResponse); } service HostInterface { - // Add a comment on this host. - rpc AddComment(HostAddCommentRequest) returns (HostAddCommentResponse); + // Add a comment on this host. + rpc AddComment(HostAddCommentRequest) returns (HostAddCommentResponse); - // Set a tag on this host. - rpc AddTags(HostAddTagsRequest) returns (HostAddTagsResponse); + // Set a tag on this host. + rpc AddTags(HostAddTagsRequest) returns (HostAddTagsResponse); - // Delete host. - rpc Delete(HostDeleteRequest) returns (HostDeleteResponse); + // Delete host. + rpc Delete(HostDeleteRequest) returns (HostDeleteResponse); - // Find a host by its name - rpc FindHost(HostFindHostRequest) returns (HostFindHostResponse); + // Find a host by its name + rpc FindHost(HostFindHostRequest) returns (HostFindHostResponse); - // Get the comments for this host. - rpc GetComments(HostGetCommentsRequest) returns (HostGetCommentsResponse); + // Get the comments for this host. + rpc GetComments(HostGetCommentsRequest) returns (HostGetCommentsResponse); - // Return the deed for this host. - rpc GetDeed(HostGetDeedRequest) returns (HostGetDeedResponse); + // Return the deed for this host. + rpc GetDeed(HostGetDeedRequest) returns (HostGetDeedResponse); - // Return a host by its id - rpc GetHost(HostGetHostRequest) returns (HostGetHostResponse); + // Return a host by its id + rpc GetHost(HostGetHostRequest) returns (HostGetHostResponse); - // Search for a number of hosts - rpc GetHosts(HostGetHostsRequest) returns (HostGetHostsResponse); + // Search for a number of hosts + rpc GetHosts(HostGetHostsRequest) returns (HostGetHostsResponse); - // Return the host whiteboard - rpc GetHostWhiteboard(HostGetHostWhiteboardRequest) returns (HostGetHostWhiteboardResponse); + // Return the host whiteboard + rpc GetHostWhiteboard(HostGetHostWhiteboardRequest) returns (HostGetHostWhiteboardResponse); - // Get the owner settings of this particular host. - rpc GetOwner(HostGetOwnerRequest) returns (HostGetOwnerResponse); + // Get the owner settings of this particular host. + rpc GetOwner(HostGetOwnerRequest) returns (HostGetOwnerResponse); - // Returns the list of proc resources allocated from this host. - rpc GetProcs(HostGetProcsRequest) returns (HostGetProcsResponse); + // Returns the list of proc resources allocated from this host. + rpc GetProcs(HostGetProcsRequest) returns (HostGetProcsResponse); - // Return any render partitions that are setup on this host. - rpc GetRenderPartitions(HostGetRenderPartitionsRequest) returns (HostGetRenderPartitionsResponse); + // Return any render partitions that are setup on this host. + rpc GetRenderPartitions(HostGetRenderPartitionsRequest) returns (HostGetRenderPartitionsResponse); - // Locks the host. Its possible we'll need to pass in a show name here in the future - rpc Lock(HostLockRequest) returns (HostLockResponse); + // Locks the host. Its possible we'll need to pass in a show name here in the future + rpc Lock(HostLockRequest) returns (HostLockResponse); - // Issues an immediate reboot. - rpc Reboot(HostRebootRequest) returns (HostRebootResponse); + // Issues an immediate reboot. + rpc Reboot(HostRebootRequest) returns (HostRebootResponse); - // Sets the reboot when idle state, nothing has to be locked to set this. When the host pings in idle a reboot - // command is sent to the host and the host will be put into the Rebooting state. If any locks are set they will - // be removed upon reboot. - rpc RebootWhenIdle(HostRebootWhenIdleRequest) returns (HostRebootWhenIdleResponse); + // Sets the reboot when idle state, nothing has to be locked to set this. When the host pings in idle a reboot + // command is sent to the host and the host will be put into the Rebooting state. If any locks are set they will + // be removed upon reboot. + rpc RebootWhenIdle(HostRebootWhenIdleRequest) returns (HostRebootWhenIdleResponse); - // Redirect the given procs to the specified job. - rpc RedirectToJob(HostRedirectToJobRequest) returns (HostRedirectToJobResponse); + // Redirect the given procs to the specified job. + rpc RedirectToJob(HostRedirectToJobRequest) returns (HostRedirectToJobResponse); - // Remove a tag from this host. - rpc RemoveTags(HostRemoveTagsRequest) returns (HostRemoveTagsResponse); + // Remove a tag from this host. + rpc RemoveTags(HostRemoveTagsRequest) returns (HostRemoveTagsResponse); - // Rename tag. - rpc RenameTag(HostRenameTagRequest) returns (HostRenameTagResponse); + // Rename tag. + rpc RenameTag(HostRenameTagRequest) returns (HostRenameTagResponse); - // Assign a host to an allocation. - rpc SetAllocation(HostSetAllocationRequest) returns (HostSetAllocationResponse); + // Assign a host to an allocation. + rpc SetAllocation(HostSetAllocationRequest) returns (HostSetAllocationResponse); - // Manually set the hardware state for the host. The hardware state may be changed automatically if the host pings - // in. If the hardware state is set to "Reimage", the state will not automatically change with a host ping, and - // must be manually set back to Up. - rpc SetHardwareState(HostSetHardwareStateRequest) returns (HostSetHardwareStateResponse); + // Manually set the hardware state for the host. The hardware state may be changed automatically if the host pings + // in. If the hardware state is set to "Reimage", the state will not automatically change with a host ping, and + // must be manually set back to Up. + rpc SetHardwareState(HostSetHardwareStateRequest) returns (HostSetHardwareStateResponse); - // Set the name of the host operating system. - rpc SetOs(HostSetOsRequest) returns (HostSetOsResponse); + // Set the name of the host operating system. + rpc SetOs(HostSetOsRequest) returns (HostSetOsResponse); - // Changes the host's [ThreadMode] - rpc SetThreadMode(HostSetThreadModeRequest) returns (HostSetThreadModeResponse); + // Changes the host's [ThreadMode] + rpc SetThreadMode(HostSetThreadModeRequest) returns (HostSetThreadModeResponse); - // Set the maximum concurrent procs limit for the host. - rpc SetConcurrentSlotsLimit(HostSetConcurrentSlotsLimitRequest) returns (HostSetConcurrentSlotsLimitResponse); + // Set the maximum concurrent procs limit for the host. + rpc SetConcurrentSlotsLimit(HostSetConcurrentSlotsLimitRequest) returns (HostSetConcurrentSlotsLimitResponse); - // Unlocks the host for booking if the proc is in the Locked state. You cannot unlock a NimbyLocked proc. - rpc Unlock(HostUnlockRequest) returns (HostUnlockResponse); + // Unlocks the host for booking if the proc is in the Locked state. You cannot unlock a NimbyLocked proc. + rpc Unlock(HostUnlockRequest) returns (HostUnlockResponse); } service OwnerInterface { - // Deletes the owner record. - rpc Delete(OwnerDeleteRequest) returns (OwnerDeleteResponse); + // Deletes the owner record. + rpc Delete(OwnerDeleteRequest) returns (OwnerDeleteResponse); - // Get a list of all deeds this owner has. - rpc GetDeeds(OwnerGetDeedsRequest) returns (OwnerGetDeedsResponse); + // Get a list of all deeds this owner has. + rpc GetDeeds(OwnerGetDeedsRequest) returns (OwnerGetDeedsResponse); - // Get a list of all hosts this owner is responsible for. - rpc GetHosts(OwnerGetHostsRequest) returns (OwnerGetHostsResponse); + // Get a list of all hosts this owner is responsible for. + rpc GetHosts(OwnerGetHostsRequest) returns (OwnerGetHostsResponse); - // Return an Owner record by name, id, or email. - rpc GetOwner(OwnerGetOwnerRequest) returns (OwnerGetOwnerResponse); + // Return an Owner record by name, id, or email. + rpc GetOwner(OwnerGetOwnerRequest) returns (OwnerGetOwnerResponse); - //Sets the owners show. - rpc SetShow(OwnerSetShowRequest) returns (OwnerSetShowResponse); + //Sets the owners show. + rpc SetShow(OwnerSetShowRequest) returns (OwnerSetShowResponse); - // Set the hosts new owner settings. Any host may have an owner, not just desktops. This allows direct control of - // the cores. By default hosts have no owner settings. - rpc TakeOwnership(OwnerTakeOwnershipRequest) returns (OwnerTakeOwnershipResponse); + // Set the hosts new owner settings. Any host may have an owner, not just desktops. This allows direct control of + // the cores. By default hosts have no owner settings. + rpc TakeOwnership(OwnerTakeOwnershipRequest) returns (OwnerTakeOwnershipResponse); } service ProcInterface { - // Clears the redirect off of the proc so it dispatches naturally. - rpc ClearRedirect(ProcClearRedirectRequest) returns (ProcClearRedirectResponse); + // Clears the redirect off of the proc so it dispatches naturally. + rpc ClearRedirect(ProcClearRedirectRequest) returns (ProcClearRedirectResponse); - // Returns the [Frame] running on the [Proc] - rpc GetFrame(ProcGetFrameRequest) returns (ProcGetFrameResponse); + // Returns the [Frame] running on the [Proc] + rpc GetFrame(ProcGetFrameRequest) returns (ProcGetFrameResponse); - // Returns the [Host] this [Proc] was allocated from. - rpc GetHost(ProcGetHostRequest) returns (ProcGetHostResponse); + // Returns the [Host] this [Proc] was allocated from. + rpc GetHost(ProcGetHostRequest) returns (ProcGetHostResponse); - // Returns the [Job] the [Proc] has been assigned to. - rpc GetJob(ProcGetJobRequest) returns (ProcGetJobResponse); + // Returns the [Job] the [Proc] has been assigned to. + rpc GetJob(ProcGetJobRequest) returns (ProcGetJobResponse); - // Returns the [Layer] the [Proc] has been assigned to. - rpc GetLayer(ProcGetLayerRequest) returns (ProcGetLayerResponse); + // Returns the [Layer] the [Proc] has been assigned to. + rpc GetLayer(ProcGetLayerRequest) returns (ProcGetLayerResponse); - // Return a list of procs matching the search - rpc GetProcs(ProcGetProcsRequest) returns (ProcGetProcsResponse); + // Return a list of procs matching the search + rpc GetProcs(ProcGetProcsRequest) returns (ProcGetProcsResponse); - // Sends a kill signal to the running process. - rpc Kill(ProcKillRequest) returns (ProcKillResponse); + // Sends a kill signal to the running process. + rpc Kill(ProcKillRequest) returns (ProcKillResponse); - // Unbooks and redriects the proc to the specified group. Optionally kills the proc immediately. Will overwrite an - // existing redirect. Return true if the redirect was a success. The redirect would fail in the event that the - // specified group does not have a suitable frame for the proc. - rpc RedirectToGroup(ProcRedirectToGroupRequest) returns (ProcRedirectToGroupResponse); + // Unbooks and redriects the proc to the specified group. Optionally kills the proc immediately. Will overwrite an + // existing redirect. Return true if the redirect was a success. The redirect would fail in the event that the + // specified group does not have a suitable frame for the proc. + rpc RedirectToGroup(ProcRedirectToGroupRequest) returns (ProcRedirectToGroupResponse); - // Unbooks and redriects the proc to the specified job. Optionally kills the proc immediately. Will overwrite an - // existing redirect. Return true if the redirect was a success. The redirect would fail in the event th - rpc RedirectToJob(ProcRedirectToJobRequest) returns (ProcRedirectToJobResponse); + // Unbooks and redriects the proc to the specified job. Optionally kills the proc immediately. Will overwrite an + // existing redirect. Return true if the redirect was a success. The redirect would fail in the event th + rpc RedirectToJob(ProcRedirectToJobRequest) returns (ProcRedirectToJobResponse); - // Unbooks this [Proc]. Unbooking means the [Proc] will automatically seek out a new [Job] when the current - // [Frame] is complete. - rpc Unbook(ProcUnbookRequest) returns (ProcUnbookResponse); + // Unbooks this [Proc]. Unbooking means the [Proc] will automatically seek out a new [Job] when the current + // [Frame] is complete. + rpc Unbook(ProcUnbookRequest) returns (ProcUnbookResponse); - //Unbooks procs that match the ProcSearchCriteria. This request can span jobs, shows, allocations, hosts etc. - // Set kill to true if the running frames should immediately be killed. - rpc UnbookProcs(ProcUnbookProcsRequest) returns (ProcUnbookProcsResponse); + //Unbooks procs that match the ProcSearchCriteria. This request can span jobs, shows, allocations, hosts etc. + // Set kill to true if the running frames should immediately be killed. + rpc UnbookProcs(ProcUnbookProcsRequest) returns (ProcUnbookProcsResponse); - // Unbooks procs that match the ProcSearchCriteria and books them on the specified group, assuming the group has - // layers that can take the procs. If the kill boolean is set to true, the operation happens immediately. If false, - // the proc will move after it finishes its current frame. - rpc UnbookToGroup(ProcUnbookToGroupRequest) returns (ProcUnbookToGroupResponse); + // Unbooks procs that match the ProcSearchCriteria and books them on the specified group, assuming the group has + // layers that can take the procs. If the kill boolean is set to true, the operation happens immediately. If false, + // the proc will move after it finishes its current frame. + rpc UnbookToGroup(ProcUnbookToGroupRequest) returns (ProcUnbookToGroupResponse); - // Unbooks procs that match the ProcSearchCriteria and books them on the specified list of jobs, assuming those jobs - // have layers that can take the procs. If the kill boolean is set to true, the operation happens immediately. If - // false, the proc will move after it finishes its current frame. - rpc UnbookToJob(ProcUnbookToJobRequest) returns (ProcUnbookToJobResponse); + // Unbooks procs that match the ProcSearchCriteria and books them on the specified list of jobs, assuming those jobs + // have layers that can take the procs. If the kill boolean is set to true, the operation happens immediately. If + // false, the proc will move after it finishes its current frame. + rpc UnbookToJob(ProcUnbookToJobRequest) returns (ProcUnbookToJobResponse); } - // -------- Enums -------- enum HardwareState { - UP = 0; - DOWN = 1; - REBOOTING = 2; - REBOOT_WHEN_IDLE = 3; - REPAIR = 4; + UP = 0; + DOWN = 1; + REBOOTING = 2; + REBOOT_WHEN_IDLE = 3; + REPAIR = 4; } enum HostTagType { - MANUAL = 0; - HARDWARE = 1; - ALLOC = 2; - HOSTNAME = 3; + MANUAL = 0; + HARDWARE = 1; + ALLOC = 2; + HOSTNAME = 3; } enum LockState { - OPEN = 0; - LOCKED = 1; - NIMBY_LOCKED = 2; + OPEN = 0; + LOCKED = 1; + NIMBY_LOCKED = 2; } // Proc redirects can have two different types of destinations, jobs and groups. enum RedirectType { - JOB_REDIRECT = 0; - GROUP_REDIRECT = 1; + JOB_REDIRECT = 0; + GROUP_REDIRECT = 1; } // Defines the possible states for a core or proc enum RunState { - // Entity is idle, which means it can be booked. - IDLE = 0; - // Entity is booked, which means its in use on a render proc - BOOKED = 1; + // Entity is idle, which means it can be booked. + IDLE = 0; + // Entity is booked, which means its in use on a render proc + BOOKED = 1; } enum ThreadMode { - // Auto determines the number of threads to use automatically - // based on the amount of memory used by the frame. - AUTO = 0; + // Auto determines the number of threads to use automatically + // based on the amount of memory used by the frame. + AUTO = 0; - // All always uses all of the cores available on the proc. - // These hosts are always booked on threadable layers. - ALL = 1; + // All always uses all of the cores available on the proc. + // These hosts are always booked on threadable layers. + ALL = 1; - // All mode during the day, auto-mode at night. - VARIABLE = 2; + // All mode during the day, auto-mode at night. + VARIABLE = 2; } - // -------- Primary Message Types --------] message Deed { - string id = 1; - string host = 2; - string owner = 3; - string show = 4; + string id = 1; + string host = 2; + string owner = 3; + string show = 4; } message DeedSeq { - repeated Deed deeds = 1; + repeated Deed deeds = 1; } message HardwareStateSeq { - repeated HardwareState state = 1; + repeated HardwareState state = 1; } message LockStateSeq { - repeated LockState state = 1; + repeated LockState state = 1; } message Host { - string id = 1; - string name = 2; - string alloc_name = 3; - bool nimby_enabled = 4; - bool has_comment = 5; - float cores = 6; - float idle_cores = 7; - int64 memory = 8; - int64 idle_memory = 9; - int64 gpu_memory = 10; - int64 idle_gpu_memory = 11; - int64 total_swap = 12; - int64 total_memory = 13; - int64 total_gpu_memory = 14; - int64 total_mcp = 15; - int64 free_swap = 16; - int64 free_memory = 17; - int64 free_mcp = 18; - int64 free_gpu_memory = 19; - int32 load = 20; - int32 boot_time = 21; - int32 ping_time = 22; - string os = 23; - repeated string tags = 24; - HardwareState state = 25; - LockState lock_state = 26; - ThreadMode thread_mode = 27; - float gpus = 28; - float idle_gpus = 29; - - // When a limit is defined, booking will only allocate layers with slots_required > 0 to be - // executed on this host. Which means regular booking by cores/memory/gpu becomes disabled. - // (0 for no limit, >0 for specific limit) - int32 concurrent_procs_limit = 30; + string id = 1; + string name = 2; + string alloc_name = 3; + bool nimby_enabled = 4; + bool has_comment = 5; + float cores = 6; + float idle_cores = 7; + int64 memory = 8; + int64 idle_memory = 9; + int64 gpu_memory = 10; + int64 idle_gpu_memory = 11; + int64 total_swap = 12; + int64 total_memory = 13; + int64 total_gpu_memory = 14; + int64 total_mcp = 15; + int64 free_swap = 16; + int64 free_memory = 17; + int64 free_mcp = 18; + int64 free_gpu_memory = 19; + int32 load = 20; + int32 boot_time = 21; + int32 ping_time = 22; + string os = 23; + repeated string tags = 24; + HardwareState state = 25; + LockState lock_state = 26; + ThreadMode thread_mode = 27; + float gpus = 28; + float idle_gpus = 29; + + // When a limit is defined, booking will only allocate layers with slots_required > 0 to be + // executed on this host. Which means regular booking by cores/memory/gpu becomes disabled. + // (0 for no limit, >0 for specific limit) + int32 concurrent_slots_limit = 30; } message HostSearchCriteria { - repeated string hosts = 1; - repeated string regex = 2; - repeated string substr = 3; - repeated string ids = 4; - repeated string allocs = 5; - HardwareStateSeq states = 6; - LockStateSeq lock_states = 7; + repeated string hosts = 1; + repeated string regex = 2; + repeated string substr = 3; + repeated string ids = 4; + repeated string allocs = 5; + HardwareStateSeq states = 6; + LockStateSeq lock_states = 7; } message HostSeq { - repeated Host hosts = 1; + repeated Host hosts = 1; } message NestedHost { - string id = 1; - string name = 2; - string alloc_name = 3; - bool nimby_enabled = 4; - bool has_comment = 5; - float cores = 6; - float idle_cores = 7; - int64 memory = 8; - int64 idle_memory = 9; - int64 gpu_memory = 10; - int64 idle_gpu_memory = 11; - int64 total_swap = 12; - int64 total_memory = 13; - int64 total_gpu_memory = 14; - int64 total_mcp = 15; - int64 free_swap = 16; - int64 free_memory = 17; - int64 free_mcp = 18; - int64 free_gpu_memory = 19; - int32 load = 20; - int32 boot_time = 21; - int32 ping_time = 22; - string os = 23; - repeated string tags = 24; - HardwareState state = 25; - LockState lock_state = 26; - ThreadMode thread_mode = 27; - NestedProcSeq procs = 28; - float gpus = 29; - float idle_gpus = 30; - int32 concurrent_procs_limit = 31; + string id = 1; + string name = 2; + string alloc_name = 3; + bool nimby_enabled = 4; + bool has_comment = 5; + float cores = 6; + float idle_cores = 7; + int64 memory = 8; + int64 idle_memory = 9; + int64 gpu_memory = 10; + int64 idle_gpu_memory = 11; + int64 total_swap = 12; + int64 total_memory = 13; + int64 total_gpu_memory = 14; + int64 total_mcp = 15; + int64 free_swap = 16; + int64 free_memory = 17; + int64 free_mcp = 18; + int64 free_gpu_memory = 19; + int32 load = 20; + int32 boot_time = 21; + int32 ping_time = 22; + string os = 23; + repeated string tags = 24; + HardwareState state = 25; + LockState lock_state = 26; + ThreadMode thread_mode = 27; + NestedProcSeq procs = 28; + float gpus = 29; + float idle_gpus = 30; + int32 concurrent_slots_limit = 31; } message NestedHostSeq { - repeated NestedHost nested_hosts = 1; + repeated NestedHost nested_hosts = 1; } message NestedProc { - string id = 1; - string name = 2; - string show_name = 3; - string job_name = 4; - string frame_name = 5; - string group_name = 6; - int32 ping_time = 7; - int32 bookedTime = 8; - int32 dispatch_time = 9; - int64 reserved_memory = 10; - int64 reserved_gpu_memory = 11; - int64 used_memory = 12; - float reserved_cores = 13; - bool unbooked = 14; - string log_path = 15; - string redirect_target = 16; - repeated string services = 17; - NestedHost parent = 18; - int64 used_gpu_memory = 19; - float reserved_gpus = 20; + string id = 1; + string name = 2; + string show_name = 3; + string job_name = 4; + string frame_name = 5; + string group_name = 6; + int32 ping_time = 7; + int32 bookedTime = 8; + int32 dispatch_time = 9; + int64 reserved_memory = 10; + int64 reserved_gpu_memory = 11; + int64 used_memory = 12; + float reserved_cores = 13; + bool unbooked = 14; + string log_path = 15; + string redirect_target = 16; + repeated string services = 17; + NestedHost parent = 18; + int64 used_gpu_memory = 19; + float reserved_gpus = 20; } message NestedProcSeq { - repeated NestedProc nested_procs = 1; + repeated NestedProc nested_procs = 1; } message Owner { - string id = 1; - string name = 2; - string show = 3; - int32 host_count = 4; + string id = 1; + string name = 2; + string show = 3; + int32 host_count = 4; } message Proc { - string id = 1; - string name = 2; - string show_name = 3; - string job_name = 4; - string frame_name = 5; - string group_name = 6; - int32 ping_time = 7; - int32 bookedTime = 8; - int32 dispatch_time = 9; - int64 reserved_memory = 10; - int64 reserved_gpu_memory = 11; - int64 used_memory = 12; - float reserved_cores = 13; - bool unbooked = 14; - string log_path = 15; - string redirect_target = 16; - repeated string services = 17; - int64 used_gpu_memory = 18; - float reserved_gpus = 19; - bytes child_processes = 20; + string id = 1; + string name = 2; + string show_name = 3; + string job_name = 4; + string frame_name = 5; + string group_name = 6; + int32 ping_time = 7; + int32 bookedTime = 8; + int32 dispatch_time = 9; + int64 reserved_memory = 10; + int64 reserved_gpu_memory = 11; + int64 used_memory = 12; + float reserved_cores = 13; + bool unbooked = 14; + string log_path = 15; + string redirect_target = 16; + repeated string services = 17; + int64 used_gpu_memory = 18; + float reserved_gpus = 19; + bytes child_processes = 20; } message ProcSearchCriteria { - // An array of host names to match. - repeated string hosts = 1; + // An array of host names to match. + repeated string hosts = 1; - // An array of job names to match. - repeated string jobs = 2; + // An array of job names to match. + repeated string jobs = 2; - // An array of layer names to match. - repeated string layers = 3; + // An array of layer names to match. + repeated string layers = 3; - // An array of show names to match. - repeated string shows = 4; + // An array of show names to match. + repeated string shows = 4; - // An array of allocation names to match. - repeated string allocs = 5; + // An array of allocation names to match. + repeated string allocs = 5; - // A range of memory usage. Values are in KB. - repeated criterion.InRangeIntegerSearchCriterion memory_range = 6; + // A range of memory usage. Values are in KB. + repeated criterion.InRangeIntegerSearchCriterion memory_range = 6; - // Less than memory usage. Values are in KB. - repeated criterion.GreaterThanIntegerSearchCriterion memory_greater_than = 10; + // Less than memory usage. Values are in KB. + repeated criterion.GreaterThanIntegerSearchCriterion memory_greater_than = 10; - // Greater than memory usage. Values are in KB. - repeated criterion.GreaterThanIntegerSearchCriterion memory_less_than = 11; + // Greater than memory usage. Values are in KB. + repeated criterion.GreaterThanIntegerSearchCriterion memory_less_than = 11; - // A duration range. Values are in seconds. - repeated criterion.InRangeIntegerSearchCriterion duration_range = 7; + // A duration range. Values are in seconds. + repeated criterion.InRangeIntegerSearchCriterion duration_range = 7; - //The maximum number of results. - repeated int32 max_results = 8; + //The maximum number of results. + repeated int32 max_results = 8; - //The offset of the first result. - int32 first_result = 9; + //The offset of the first result. + int32 first_result = 9; } message ProcSeq { - repeated Proc procs = 1; + repeated Proc procs = 1; } - // -------- Requests & Responses --------] // DEED ---- // Delete message DeedDeleteRequest { - Deed deed = 1; + Deed deed = 1; } message DeedDeleteResponse {} // Empty // GetHost message DeedGetHostRequest { - Deed deed = 1; + Deed deed = 1; } message DeedGetHostResponse { - Host host = 1; + Host host = 1; } // GetOwner message DeedGetOwnerRequest { - Deed deed = 1; + Deed deed = 1; } message DeedGetOwnerResponse { - Owner owner = 1; + Owner owner = 1; } // HOST ---- // AddComment message HostAddCommentRequest { - Host host = 1; - comment.Comment new_comment = 2; + Host host = 1; + comment.Comment new_comment = 2; } message HostAddCommentResponse {} // Empty // AddTags message HostAddTagsRequest { - Host host = 1; - repeated string tags = 2; + Host host = 1; + repeated string tags = 2; } message HostAddTagsResponse {} // Empty // Delete message HostDeleteRequest { - Host host = 1; + Host host = 1; } message HostDeleteResponse {} // Empty // FindHost message HostFindHostRequest { - string name = 1; + string name = 1; } message HostFindHostResponse { - Host host = 1; + Host host = 1; } // GetComments message HostGetCommentsRequest { - Host host = 1; + Host host = 1; } message HostGetCommentsResponse { - comment.CommentSeq comments = 1; + comment.CommentSeq comments = 1; } // GetDeed message HostGetDeedRequest { - Host host = 1; + Host host = 1; } message HostGetDeedResponse { - Deed deed = 1; + Deed deed = 1; } // GetHost message HostGetHostRequest { - string id = 1; + string id = 1; } message HostGetHostResponse { - Host host = 1; + Host host = 1; } // GetHosts message HostGetHostsRequest { - HostSearchCriteria r = 1; + HostSearchCriteria r = 1; } message HostGetHostsResponse { - HostSeq hosts = 1; + HostSeq hosts = 1; } // GetHostWhiteboard message HostGetHostWhiteboardRequest {} // Empty message HostGetHostWhiteboardResponse { - NestedHostSeq nested_hosts = 1; + NestedHostSeq nested_hosts = 1; } // GetOwner message HostGetOwnerRequest { - Host host = 1; + Host host = 1; } message HostGetOwnerResponse { - Owner owner = 1; + Owner owner = 1; } // GetProcs message HostGetProcsRequest { - Host host = 1; + Host host = 1; } message HostGetProcsResponse { - ProcSeq procs = 1; + ProcSeq procs = 1; } // GetRenderPartitions message HostGetRenderPartitionsRequest { - Host host = 1; + Host host = 1; } message HostGetRenderPartitionsResponse { - renderPartition.RenderPartitionSeq render_partitions = 1; + renderPartition.RenderPartitionSeq render_partitions = 1; } // Lock message HostLockRequest { - Host host = 1; + Host host = 1; } message HostLockResponse {} // Empty // Reboot message HostRebootRequest { - Host host = 1; + Host host = 1; } message HostRebootResponse {} // Empty // RebootWhenIdle message HostRebootWhenIdleRequest { - Host host = 1; + Host host = 1; } message HostRebootWhenIdleResponse {} // Empty // RedirectToJob message HostRedirectToJobRequest { - Host host = 1; - repeated string proc_names = 2; - string job_id = 3; + Host host = 1; + repeated string proc_names = 2; + string job_id = 3; } message HostRedirectToJobResponse { - bool value = 1; + bool value = 1; } // RemoveTags message HostRemoveTagsRequest { - Host host = 1; - repeated string tags = 2; + Host host = 1; + repeated string tags = 2; } message HostRemoveTagsResponse {} // Empty // RenameTag message HostRenameTagRequest { - Host host = 1; - string old_tag = 2; - string new_tag = 3; + Host host = 1; + string old_tag = 2; + string new_tag = 3; } message HostRenameTagResponse {} // Empty // SetAllocation message HostSetAllocationRequest { - Host host = 1; - string allocation_id = 2; + Host host = 1; + string allocation_id = 2; } message HostSetAllocationResponse {} // Empty // SetHardwareState message HostSetHardwareStateRequest { - Host host = 1; - HardwareState state = 2; + Host host = 1; + HardwareState state = 2; } -message HostSetHardwareStateResponse {} // Empty +message HostSetHardwareStateResponse {} // Empty // SetOs message HostSetOsRequest { - Host host = 1; - string os = 2; + Host host = 1; + string os = 2; } message HostSetOsResponse {} // Empty // SetThreadMode message HostSetThreadModeRequest { - Host host = 1; - ThreadMode mode = 2; + Host host = 1; + ThreadMode mode = 2; } message HostSetThreadModeResponse {} // Empty // SetConcurrentSlotsLimit message HostSetConcurrentSlotsLimitRequest { - Host host = 1; - int32 limit = 2; + Host host = 1; + int32 limit = 2; } message HostSetConcurrentSlotsLimitResponse {} // Empty // Unlock message HostUnlockRequest { - Host host = 1; + Host host = 1; } message HostUnlockResponse {} // Empty - // OWNER ---- // Delete message OwnerDeleteRequest { - Owner owner = 1; + Owner owner = 1; } message OwnerDeleteResponse {} // Empty // GetDeeds message OwnerGetDeedsRequest { - Owner owner = 1; + Owner owner = 1; } message OwnerGetDeedsResponse { - DeedSeq deeds = 1; + DeedSeq deeds = 1; } // GetHosts message OwnerGetHostsRequest { - Owner owner = 1; + Owner owner = 1; } message OwnerGetHostsResponse { - HostSeq hosts = 1; + HostSeq hosts = 1; } // GetOwner message OwnerGetOwnerRequest { - string name = 1; + string name = 1; } message OwnerGetOwnerResponse { - Owner owner = 1; + Owner owner = 1; } // SetShow message OwnerSetShowRequest { - Owner owner = 1; - string show = 2; + Owner owner = 1; + string show = 2; } message OwnerSetShowResponse {} // Empty // TakeOwnership message OwnerTakeOwnershipRequest { - Owner owner = 1; - string host = 2; + Owner owner = 1; + string host = 2; } message OwnerTakeOwnershipResponse {} // Empty @@ -715,123 +709,123 @@ message OwnerTakeOwnershipResponse {} // Empty // PROC --- // ClearRedirect message ProcClearRedirectRequest { - Proc proc = 1; + Proc proc = 1; } message ProcClearRedirectResponse { - bool value = 1; + bool value = 1; } // GetFrame message ProcGetFrameRequest { - Proc proc = 1; + Proc proc = 1; } message ProcGetFrameResponse { - job.Frame frame = 1; + job.Frame frame = 1; } // GetHost message ProcGetHostRequest { - Proc proc = 1; + Proc proc = 1; } message ProcGetHostResponse { - Host host = 1; + Host host = 1; } // GetJob message ProcGetJobRequest { - Proc proc = 1; + Proc proc = 1; } message ProcGetJobResponse { - job.Job job = 1; + job.Job job = 1; } // GetLayer message ProcGetLayerRequest { - Proc proc = 1; + Proc proc = 1; } message ProcGetLayerResponse { - job.Layer layer = 1; + job.Layer layer = 1; } // GetProcs message ProcGetProcsRequest { - ProcSearchCriteria r = 1; + ProcSearchCriteria r = 1; } message ProcGetProcsResponse { - ProcSeq procs = 1; + ProcSeq procs = 1; } // Kill message ProcKillRequest { - Proc proc = 1; + Proc proc = 1; } message ProcKillResponse {} // Empty // RedirectToGroup message ProcRedirectToGroupRequest { - Proc proc = 1; - string group_id = 2; - bool kill = 3; + Proc proc = 1; + string group_id = 2; + bool kill = 3; } message ProcRedirectToGroupResponse { - bool value = 1; + bool value = 1; } // RedirectToJob message ProcRedirectToJobRequest { - Proc proc = 1; - string job_id = 2; - bool kill = 3; + Proc proc = 1; + string job_id = 2; + bool kill = 3; } message ProcRedirectToJobResponse { - bool value = 1; + bool value = 1; } // Unbook message ProcUnbookRequest { - Proc proc = 1; - bool kill = 2; + Proc proc = 1; + bool kill = 2; } message ProcUnbookResponse {} // Empty // UnbookProcs message ProcUnbookProcsRequest { - ProcSearchCriteria r = 1; - bool kill = 2; + ProcSearchCriteria r = 1; + bool kill = 2; } message ProcUnbookProcsResponse { - int32 num_procs = 1; + int32 num_procs = 1; } // UnbookToJob message ProcUnbookToJobRequest { - ProcSearchCriteria r = 1; - job.JobSeq jobs = 2; - bool kill = 3; + ProcSearchCriteria r = 1; + job.JobSeq jobs = 2; + bool kill = 3; } message ProcUnbookToJobResponse { - int32 num_procs = 1; + int32 num_procs = 1; } // UnbookToGroup message ProcUnbookToGroupRequest { - ProcSearchCriteria r = 1; - job.Group group = 2; - bool kill = 3; + ProcSearchCriteria r = 1; + job.Group group = 2; + bool kill = 3; } message ProcUnbookToGroupResponse { - int32 num_procs = 1; + int32 num_procs = 1; } diff --git a/pycue/opencue/wrappers/host.py b/pycue/opencue/wrappers/host.py index 8c2d6ad7e..de47308a5 100644 --- a/pycue/opencue/wrappers/host.py +++ b/pycue/opencue/wrappers/host.py @@ -128,6 +128,11 @@ def reboot(self): """Causes the host to kill all running frames and reboot the machine.""" self.stub.Reboot(host_pb2.HostRebootRequest(host=self.data), timeout=Cuebot.Timeout) + def setConcurrentSlotsLimit(self, limit): + """Set the concurrent slots limit for selected hosts.""" + self.stub.SetConcurrentSlotsLimit(host_pb2.HostSetConcurrentSlotsLimitRequest( + host=self.data, limit=limit), timeout=Cuebot.Timeout) + def addTags(self, tags): """Adds tags to a host. @@ -634,12 +639,12 @@ def os(self): return self.data.os def concurrentSlotsLimit(self): - """Returns the concurrent procs limit. + """Returns the limit of slots this host can run concurrently. :rtype: int - :return: the concurrent procs limit (0 = no limit) + :return: the concurrent slots limit (0 = no limit) """ - return self.data.concurrent_procs_limit + return self.data.concurrent_slots_limit class NestedHost(Host): diff --git a/pycue/opencue/wrappers/layer.py b/pycue/opencue/wrappers/layer.py index 605b2cb65..e8ea87a69 100644 --- a/pycue/opencue/wrappers/layer.py +++ b/pycue/opencue/wrappers/layer.py @@ -20,13 +20,12 @@ import platform from opencue_proto import job_pb2 - import opencue.api +from opencue.cuebot import Cuebot import opencue.search import opencue.wrappers.depend import opencue.wrappers.frame import opencue.wrappers.limit -from opencue.cuebot import Cuebot class Layer(object): @@ -34,7 +33,6 @@ class Layer(object): class LayerType(enum.IntEnum): """Represents the type of layer.""" - PRE = job_pb2.PRE POST = job_pb2.POST RENDER = job_pb2.RENDER @@ -42,62 +40,51 @@ class LayerType(enum.IntEnum): class Order(enum.IntEnum): """Represents the order of a layer.""" - FIRST = job_pb2.FIRST LAST = job_pb2.LAST REVERSE = job_pb2.REVERSE def __init__(self, layer=None): self.data = layer - self.stub = Cuebot.getStub("layer") + self.stub = Cuebot.getStub('layer') def kill(self, username=None, pid=None, host_kill=None, reason=None): """Kills the entire layer.""" username = username if username else getpass.getuser() pid = pid if pid else os.getpid() host_kill = host_kill if host_kill else platform.uname()[1] - return self.stub.KillFrames( - job_pb2.LayerKillFramesRequest( - layer=self.data, - username=username, - pid=str(pid), - host_kill=host_kill, - reason=reason, - ), - timeout=Cuebot.Timeout, - ) + return self.stub.KillFrames(job_pb2.LayerKillFramesRequest(layer=self.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason), + timeout=Cuebot.Timeout) def eat(self): """Eats the entire layer.""" - return self.stub.EatFrames( - job_pb2.LayerEatFramesRequest(layer=self.data), timeout=Cuebot.Timeout - ) + return self.stub.EatFrames(job_pb2.LayerEatFramesRequest(layer=self.data), + timeout=Cuebot.Timeout) def retry(self): """Retries the entire layer.""" - return self.stub.RetryFrames( - job_pb2.LayerRetryFramesRequest(layer=self.data), timeout=Cuebot.Timeout - ) + return self.stub.RetryFrames(job_pb2.LayerRetryFramesRequest(layer=self.data), + timeout=Cuebot.Timeout) def markdone(self): """Drops any dependency that requires this layer or requires any frame in the layer.""" - return self.stub.MarkdoneFrames( - job_pb2.LayerMarkdoneFramesRequest(layer=self.data), timeout=Cuebot.Timeout - ) + return self.stub.MarkdoneFrames(job_pb2.LayerMarkdoneFramesRequest(layer=self.data), + timeout=Cuebot.Timeout) def addLimit(self, limit_id): """Adds a limit to the current layer.""" - return self.stub.AddLimit( - job_pb2.LayerAddLimitRequest(layer=self.data, limit_id=limit_id), - timeout=Cuebot.Timeout, - ) + return self.stub.AddLimit(job_pb2.LayerAddLimitRequest(layer=self.data, limit_id=limit_id), + timeout=Cuebot.Timeout) def dropLimit(self, limit_id): """Removes a limit on the current layer.""" return self.stub.DropLimit( job_pb2.LayerDropLimitRequest(layer=self.data, limit_id=limit_id), - timeout=Cuebot.Timeout, - ) + timeout=Cuebot.Timeout) def enableMemoryOptimizer(self, value): """Enables or disables the memory optimizer. @@ -105,10 +92,9 @@ def enableMemoryOptimizer(self, value): :type value: bool :param value: whether memory optimizer is enabled """ - return self.stub.EnableMemoryOptimizer( - job_pb2.LayerEnableMemoryOptimizerRequest(layer=self.data, value=value), - timeout=Cuebot.Timeout, - ) + return self.stub.EnableMemoryOptimizer(job_pb2.LayerEnableMemoryOptimizerRequest( + layer=self.data, value=value), + timeout=Cuebot.Timeout) def getFrames(self, **options): """Returns a list of up to 1000 frames from within the layer. @@ -119,14 +105,9 @@ def getFrames(self, **options): :return: sequence of matching frames """ criteria = opencue.search.FrameSearch.criteriaFromOptions(**options) - response = self.stub.GetFrames( - job_pb2.LayerGetFramesRequest(layer=self.data, s=criteria), - timeout=Cuebot.Timeout, - ) - return [ - opencue.wrappers.frame.Frame(frameData) - for frameData in response.frames.frames - ] + response = self.stub.GetFrames(job_pb2.LayerGetFramesRequest(layer=self.data, s=criteria), + timeout=Cuebot.Timeout) + return [opencue.wrappers.frame.Frame(frameData) for frameData in response.frames.frames] def getOutputPaths(self): """Return the output paths for this layer. @@ -134,9 +115,8 @@ def getOutputPaths(self): :rtype: list :return: list of output paths """ - return self.stub.GetOutputPaths( - job_pb2.LayerGetOutputPathsRequest(layer=self.data), timeout=Cuebot.Timeout - ).output_paths + return self.stub.GetOutputPaths(job_pb2.LayerGetOutputPathsRequest(layer=self.data), + timeout=Cuebot.Timeout).output_paths def setTags(self, tags): """Sets the layer tags. @@ -144,10 +124,8 @@ def setTags(self, tags): :type tags: list :param tags: layer tags """ - return self.stub.SetTags( - job_pb2.LayerSetTagsRequest(layer=self.data, tags=tags), - timeout=Cuebot.Timeout, - ) + return self.stub.SetTags(job_pb2.LayerSetTagsRequest(layer=self.data, tags=tags), + timeout=Cuebot.Timeout) def setMaxCores(self, cores): """Sets the maximum number of cores that this layer requires. @@ -156,9 +134,8 @@ def setMaxCores(self, cores): :param cores: Core units, 100 reserves 1 core """ return self.stub.SetMaxCores( - job_pb2.LayerSetMaxCoresRequest(layer=self.data, cores=cores / 100.0), - timeout=Cuebot.Timeout, - ) + job_pb2.LayerSetMaxCoresRequest(layer=self.data, cores=cores/100.0), + timeout=Cuebot.Timeout) def setMinCores(self, cores): """Sets the minimum number of cores that this layer requires. @@ -169,9 +146,8 @@ def setMinCores(self, cores): :param cores: core units, 100 reserves 1 core """ return self.stub.SetMinCores( - job_pb2.LayerSetMinCoresRequest(layer=self.data, cores=cores / 100.0), - timeout=Cuebot.Timeout, - ) + job_pb2.LayerSetMinCoresRequest(layer=self.data, cores=cores/100.0), + timeout=Cuebot.Timeout) def setMaxGpus(self, max_gpus): """Sets the maximum number of gpus that this layer requires. @@ -179,8 +155,7 @@ def setMaxGpus(self, max_gpus): :param max_gpus: gpu cores""" return self.stub.SetMaxGpus( job_pb2.LayerSetMaxGpusRequest(layer=self.data, max_gpus=max_gpus), - timeout=Cuebot.Timeout, - ) + timeout=Cuebot.Timeout) def setMinGpus(self, min_gpus): """Sets the minimum number of gpus that this layer requires. @@ -188,17 +163,7 @@ def setMinGpus(self, min_gpus): :param min_gpus: gou cores""" return self.stub.SetMinGpus( job_pb2.LayerSetMinGpusRequest(layer=self.data, min_gpus=min_gpus), - timeout=Cuebot.Timeout, - ) - - def setSlotsRequired(self, slots): - """Sets the number of slots required per frame for this layer. - :type slots: int - :param slots: Number of slots required (<=0 disables slot-based booking)""" - return self.stub.SetSlotsRequired( - job_pb2.LayerSetSlotsRequiredRequest(layer=self.data, slots=slots), - timeout=Cuebot.Timeout, - ) + timeout=Cuebot.Timeout) def setMinGpuMemory(self, gpu_memory): """Sets the minimum number of gpu memory that this layer requires. @@ -208,8 +173,7 @@ def setMinGpuMemory(self, gpu_memory): """ return self.stub.SetMinGpuMemory( job_pb2.LayerSetMinGpuMemoryRequest(layer=self.data, gpu_memory=gpu_memory), - timeout=Cuebot.Timeout, - ) + timeout=Cuebot.Timeout) def setMinMemory(self, memory): """Sets the minimum amount of memory that this layer requires. @@ -219,6 +183,16 @@ def setMinMemory(self, memory): """ return self.stub.SetMinMemory( job_pb2.LayerSetMinMemoryRequest(layer=self.data, memory=memory), + timeout=Cuebot.Timeout) + + def setSlotsRequired(self, slots): + """Sets the number of slots required per frame for this layer. + + :type slots: int + :param slots: Number of slots required (<=0 disables slot-based booking) + """ + return self.stub.SetSlotsRequired( + job_pb2.LayerSetSlotsRequiredRequest(layer=self.data, slots=slots), timeout=Cuebot.Timeout, ) @@ -228,32 +202,27 @@ def setThreadable(self, threadable): :type threadable: bool :param threadable: boolean to enable/disable threadable """ - return self.stub.SetThreadable( - job_pb2.LayerSetThreadableRequest(layer=self.data, threadable=threadable), - timeout=Cuebot.Timeout, - ) + return self.stub.SetThreadable(job_pb2.LayerSetThreadableRequest( + layer=self.data, threadable=threadable), + timeout=Cuebot.Timeout) def setTimeout(self, timeout): """Set time out to the value. :type timeout: int :param timeout: value for timeout in minutes""" - return self.stub.SetTimeout( - job_pb2.LayerSetTimeoutRequest(layer=self.data, timeout=timeout), - timeout=Cuebot.Timeout, - ) + return self.stub.SetTimeout(job_pb2.LayerSetTimeoutRequest( + layer=self.data, timeout=timeout), + timeout=Cuebot.Timeout) def setTimeoutLLU(self, timeout_llu): """Set LLU time out to the value. :type timeout: int :param timeout: value for timeout in minutes""" - return self.stub.SetTimeoutLLU( - job_pb2.LayerSetTimeoutLLURequest(layer=self.data, timeout_llu=timeout_llu), - timeout=Cuebot.Timeout, - ) + return self.stub.SetTimeoutLLU(job_pb2.LayerSetTimeoutLLURequest( + layer=self.data, timeout_llu=timeout_llu), + timeout=Cuebot.Timeout) - def addRenderPartition( - self, hostname, threads, max_cores, max_mem, max_gpu_memory, max_gpus - ): + def addRenderPartition(self, hostname, threads, max_cores, max_mem, max_gpu_memory, max_gpus): """Adds a render partition to the layer. :type hostname: str @@ -270,17 +239,14 @@ def addRenderPartition( :param max_gpus: max gpus enabled for the partition """ self.stub.AddRenderPartition( - job_pb2.LayerAddRenderPartitionRequest( - layer=self.data, - host=hostname, - threads=threads, - max_cores=max_cores, - max_memory=max_mem, - max_gpu_memory=max_gpu_memory, - username=os.getenv("USER", "unknown"), - max_gpus=max_gpus, - ) - ) + job_pb2.LayerAddRenderPartitionRequest(layer=self.data, + host=hostname, + threads=threads, + max_cores=max_cores, + max_memory=max_mem, + max_gpu_memory=max_gpu_memory, + username=os.getenv("USER", "unknown"), + max_gpus=max_gpus)) def getWhatDependsOnThis(self): """Gets a list of dependencies that depend directly on this layer. @@ -290,8 +256,7 @@ def getWhatDependsOnThis(self): """ response = self.stub.GetWhatDependsOnThis( job_pb2.LayerGetWhatDependsOnThisRequest(layer=self.data), - timeout=Cuebot.Timeout, - ) + timeout=Cuebot.Timeout) dependSeq = response.depends return [opencue.wrappers.depend.Depend(dep) for dep in dependSeq.depends] @@ -303,8 +268,7 @@ def getWhatThisDependsOn(self): """ response = self.stub.GetWhatThisDependsOn( job_pb2.LayerGetWhatThisDependsOnRequest(layer=self.data), - timeout=Cuebot.Timeout, - ) + timeout=Cuebot.Timeout) dependSeq = response.depends return [opencue.wrappers.depend.Depend(dep) for dep in dependSeq.depends] @@ -318,8 +282,7 @@ def createDependencyOnJob(self, job): """ response = self.stub.CreateDependencyOnJob( job_pb2.LayerCreateDependOnJobRequest(layer=self.data, job=job.data), - timeout=Cuebot.Timeout, - ) + timeout=Cuebot.Timeout) return opencue.wrappers.depend.Depend(response.depend) def createDependencyOnLayer(self, layer): @@ -331,11 +294,8 @@ def createDependencyOnLayer(self, layer): :return: the new dependency """ response = self.stub.CreateDependencyOnLayer( - job_pb2.LayerCreateDependOnLayerRequest( - layer=self.data, depend_on_layer=layer.data - ), - timeout=Cuebot.Timeout, - ) + job_pb2.LayerCreateDependOnLayerRequest(layer=self.data, depend_on_layer=layer.data), + timeout=Cuebot.Timeout) return opencue.wrappers.depend.Depend(response.depend) def createDependencyOnFrame(self, frame): @@ -348,8 +308,7 @@ def createDependencyOnFrame(self, frame): """ response = self.stub.CreateDependencyOnFrame( job_pb2.LayerCreateDependOnFrameRequest(layer=self.data, frame=frame.data), - timeout=Cuebot.Timeout, - ) + timeout=Cuebot.Timeout) return opencue.wrappers.depend.Depend(response.depend) def createFrameByFrameDependency(self, layer): @@ -364,10 +323,8 @@ def createFrameByFrameDependency(self, layer): # to LayerOnLayer for better efficiency. response = self.stub.CreateFrameByFrameDependency( job_pb2.LayerCreateFrameByFrameDependRequest( - layer=self.data, depend_layer=layer.data, any_frame=False - ), - timeout=Cuebot.Timeout, - ) + layer=self.data, depend_layer=layer.data, any_frame=False), + timeout=Cuebot.Timeout) return opencue.wrappers.depend.Depend(response.depend) # TODO(gregdenton) Determine if this is needed. (Issue #71) @@ -391,8 +348,7 @@ def registerOutputPath(self, outputPath): """ self.stub.RegisterOutputPath( job_pb2.LayerRegisterOutputPathRequest(layer=self.data, spec=outputPath), - timeout=Cuebot.Timeout, - ) + timeout=Cuebot.Timeout) def reorderFrames(self, frameRange, order): """Reorders the specified frame range on this layer. @@ -403,11 +359,8 @@ def reorderFrames(self, frameRange, order): :param order: First, Last or Reverse """ self.stub.ReorderFrames( - job_pb2.LayerReorderFramesRequest( - layer=self.data, range=frameRange, order=order - ), - timeout=Cuebot.Timeout, - ) + job_pb2.LayerReorderFramesRequest(layer=self.data, range=frameRange, order=order), + timeout=Cuebot.Timeout) def staggerFrames(self, frameRange, stagger): """Staggers the specified frame range on this layer. @@ -418,11 +371,8 @@ def staggerFrames(self, frameRange, stagger): :param stagger: the amount to stagger by """ self.stub.StaggerFrames( - job_pb2.LayerStaggerFramesRequest( - layer=self.data, range=frameRange, stagger=stagger - ), - timeout=Cuebot.Timeout, - ) + job_pb2.LayerStaggerFramesRequest(layer=self.data, range=frameRange, stagger=stagger), + timeout=Cuebot.Timeout) def getLimitDetails(self): """Returns the Limit objects for the given layer. @@ -430,12 +380,8 @@ def getLimitDetails(self): :rtype: list :return: list of limits on this layer """ - return [ - opencue.wrappers.limit.Limit(limit) - for limit in self.stub.GetLimits( - job_pb2.LayerGetLimitsRequest(layer=self.data), timeout=Cuebot.Timeout - ).limits - ] + return [opencue.wrappers.limit.Limit(limit) for limit in self.stub.GetLimits( + job_pb2.LayerGetLimitsRequest(layer=self.data), timeout=Cuebot.Timeout).limits] def id(self): """Returns the id of the layer. @@ -514,12 +460,6 @@ def minGpus(self): :return: Minimum number of gpus required""" return self.data.min_gpus - def slotsRequired(self): - """Returns the number of slots required per frame. - :rtype: int - :return: Number of slots required (<=0 means not slot-based)""" - return self.data.slots_required - def minMemory(self): """Returns the minimum amount of memory that frames in this layer require. @@ -528,6 +468,14 @@ def minMemory(self): """ return self.data.min_memory + def slotsRequired(self): + """Returns the number of slots required per frame. + + :rtype: int + :return: Number of slots required (<=0 means not slot-based) + """ + return self.data.slots_required + def limits(self): """Returns the limit names for this layer. @@ -626,11 +574,8 @@ def percentCompleted(self): :return: percentage of frame completion """ try: - return ( - self.data.layer_stats.succeeded_frames - / float(self.data.layer_stats.total_frames) - * 100.0 - ) + return self.data.layer_stats.succeeded_frames / \ + float(self.data.layer_stats.total_frames) * 100.0 except ZeroDivisionError: return 0 diff --git a/pyoutline/outline/backend/cue.py b/pyoutline/outline/backend/cue.py index 5d05d4c91..f6d6db991 100644 --- a/pyoutline/outline/backend/cue.py +++ b/pyoutline/outline/backend/cue.py @@ -20,19 +20,22 @@ See outline.backend.__init__.py for a description of the PyOutline backend system. """ -from __future__ import absolute_import, division, print_function +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from builtins import str import logging import os import sys import time -from builtins import str from xml.dom.minidom import parseString from xml.etree import ElementTree as Et +from packaging.version import Version + import FileSequence import opencue -from packaging.version import Version import outline import outline.depend @@ -40,9 +43,12 @@ import outline.util import outline.versions.main + logger = logging.getLogger("outline.backend.cue") -__all__ = ["launch", "serialize", "serialize_simple"] +__all__ = ["launch", + "serialize", + "serialize_simple"] JOB_WAIT_PERIOD_SEC = 5 @@ -78,15 +84,13 @@ def build_command(launcher, layer): wrapper = "%s/opencue_wrap_frame" % outline.config.get("outline", "wrapper_dir") else: wrapper = "%s/opencue_wrap_frame_no_ss" % outline.config.get( - "outline", "wrapper_dir" - ) + "outline", "wrapper_dir") command.append(wrapper) command.append(outline.config.get("outline", "user_dir")) command.append("%s/pycuerun" % outline.config.get("outline", "bin_dir")) - command.append( - "%s -e #IFRAME#-%s" % (launcher.get_outline().get_path(), layer.get_name()) - ) + command.append("%s -e #IFRAME#-%s" % (launcher.get_outline().get_path(), + layer.get_name())) command.append("--version %s" % outline.versions.get_version("outline")) command.append("--repos %s" % outline.versions.get_repos()) command.append("--debug") @@ -148,20 +152,15 @@ def test(job): job = opencue.api.getJob(job.name()) if job.data.job_stats.dead_frames + job.data.job_stats.eaten_frames > 0: raise outline.exception.OutlineException( - "Job test failed, dead or eaten frames on: %s" % job.data.name - ) + "Job test failed, dead or eaten frames on: %s" % job.data.name) if job.data.state == opencue.api.job_pb2.FINISHED: break logger.debug( - "waiting on %s job to complete: %d/%d", - job.data.name, - job.data.job_stats.succeeded_frames, - job.data.job_stats.total_frames, - ) + "waiting on %s job to complete: %d/%d", job.data.name, + job.data.job_stats.succeeded_frames, job.data.job_stats.total_frames) except opencue.CueException as ie: raise outline.exception.OutlineException( - "test for job %s failed: %s" % (job.data.name, ie) - ) + "test for job %s failed: %s" % (job.data.name, ie)) time.sleep(5) finally: job.kill() @@ -179,17 +178,13 @@ def wait(job): if not opencue.api.isJobPending(job.data.name): break logger.debug( - "waiting on %s job to complete: %d/%d", - job.data.name, - job.data.job_stats.succeeded_frames, - job.data.job_stats.total_frames, - ) + "waiting on %s job to complete: %d/%d", job.data.name, + job.data.job_stats.succeeded_frames, job.data.job_stats.total_frames) except opencue.CueException as ie: print( - "opencue error waiting on job: %s, %s. Will continue to wait." - % (job.data.name, ie), - file=sys.stderr, - ) + "opencue error waiting on job: %s, %s. Will continue to wait." % ( + job.data.name, ie), + file=sys.stderr) time.sleep(JOB_WAIT_PERIOD_SEC) @@ -251,9 +246,8 @@ def _serialize(launcher, use_pycuerun): user = outline.util.get_user() sub_element(root, "user", user) if not launcher.get("nomail"): - sub_element( - root, "email", "%s@%s" % (user, outline.config.get("outline", "domain")) - ) + sub_element(root, "email", "%s@%s" % (user, + outline.config.get("outline", "domain"))) sub_element(root, "uid", str(outline.util.get_uid())) j = Et.SubElement(root, "job", {"name": ol.get_name()}) @@ -293,6 +287,7 @@ def _serialize(launcher, use_pycuerun): layers = Et.SubElement(j, "layers") for layer in ol.get_layers(): + # Unregistered layers are in the job but don't show up on the cue. if not layer.get_arg("register"): continue @@ -306,19 +301,16 @@ def _serialize(launcher, use_pycuerun): # that layer. frame_range = layer.get_frame_range() if not frame_range: - logger.info( - "Skipping layer %s, its range (%s) does not intersect with ol range %s", - layer, - layer.get_arg("range"), - ol.get_frame_range(), - ) + logger.info("Skipping layer %s, its range (%s) does not intersect " + "with ol range %s", layer, layer.get_arg("range"), ol.get_frame_range()) continue - spec_layer = Et.SubElement( - layers, "layer", {"name": layer.get_name(), "type": layer.get_type()} - ) + spec_layer = Et.SubElement(layers, "layer", + {"name": layer.get_name(), + "type": layer.get_type()}) if use_pycuerun: - sub_element(spec_layer, "cmd", " ".join(build_command(launcher, layer))) + sub_element(spec_layer, "cmd", + " ".join(build_command(launcher, layer))) else: sub_element(spec_layer, "cmd", " ".join(layer.get_arg("command"))) sub_element(spec_layer, "range", str(frame_range)) @@ -336,15 +328,12 @@ def _serialize(launcher, use_pycuerun): else: logger.debug("%s is set to override service cores.", layer.get_name()) if layer.is_arg_set("cores") and layer.is_arg_set("threads"): - logger.warning( - "%s has both cores and threads. Use cores.", layer.get_name() - ) + logger.warning("%s has both cores and threads. Use cores.", layer.get_name()) sub_element(spec_layer, "cores", "%0.1f" % float(cores)) if layer.is_arg_set("threadable"): - sub_element( - spec_layer, "threadable", bool_to_str(layer.get_arg("threadable")) - ) + sub_element(spec_layer, "threadable", + bool_to_str(layer.get_arg("threadable"))) if layer.get_arg("memory"): sub_element(spec_layer, "memory", "%s" % (layer.get_arg("memory"))) @@ -381,9 +370,7 @@ def _serialize(launcher, use_pycuerun): if layer.get_arg("timeout_llu"): if spec_version >= Version("1.10"): - sub_element( - spec_layer, "timeout_llu", "%s" % (layer.get_arg("timeout_llu")) - ) + sub_element(spec_layer, "timeout_llu", "%s" % (layer.get_arg("timeout_llu"))) else: _warning_spec_version(spec_version, "timeout_llu") @@ -398,7 +385,8 @@ def _serialize(launcher, use_pycuerun): _warning_spec_version(spec_version, "slots_required") if os.environ.get("OL_TAG_OVERRIDE", False): - sub_element(spec_layer, "tags", scrub_tags(os.environ["OL_TAG_OVERRIDE"])) + sub_element(spec_layer, "tags", + scrub_tags(os.environ["OL_TAG_OVERRIDE"])) elif layer.get_arg("tags"): sub_element(spec_layer, "tags", scrub_tags(layer.get_arg("tags"))) @@ -434,8 +422,7 @@ def _serialize(launcher, use_pycuerun): if not layers: raise outline.exception.OutlineException( "Failed to launch job. There are no layers with frame " - "ranges that intersect the job's frame range: %s" % ol.get_frame_range() - ) + "ranges that intersect the job's frame range: %s" % ol.get_frame_range()) # Dependencies go after all of the layers root.append(depends) @@ -444,7 +431,7 @@ def _serialize(launcher, use_pycuerun): '', '' % spec_version, - Et.tostring(root).decode(), + Et.tostring(root).decode() ] result = "".join(xml) @@ -457,7 +444,8 @@ def scrub_tags(tags): Ensure that layer tags pass in as a string are formatted properly. """ if isinstance(tags, str): - tags = [tag.strip() for tag in tags.split("|") if tag.strip().isalnum()] + tags = [tag.strip() for tag in tags.split("|") + if tag.strip().isalnum()] return " | ".join(tags) @@ -477,25 +465,21 @@ def build_dependencies(ol, layer, all_depends): add them to the job spec. """ for dep in layer.get_depends(): - depend = Et.SubElement( - all_depends, - "depend", - type=dep.get_type(), - anyframe=bool_to_str(dep.is_any_frame()), - ) + + depend = Et.SubElement(all_depends, "depend", + type=dep.get_type(), + anyframe=bool_to_str(dep.is_any_frame())) if dep.get_type() == outline.depend.DependType.LayerOnSimFrame: + frame_range = dep.get_depend_on_layer().get_frame_range() first_frame = FileSequence.FrameSet(frame_range)[0] sub_element(depend, "depjob", ol.get_name()) sub_element(depend, "deplayer", layer.get_name()) sub_element(depend, "onjob", ol.get_name()) - sub_element( - depend, - "onframe", - "%04d-%s" % (first_frame, dep.get_depend_on_layer().get_name()), - ) + sub_element(depend, "onframe", "%04d-%s" + % (first_frame, dep.get_depend_on_layer().get_name())) else: sub_element(depend, "depjob", ol.get_name()) sub_element(depend, "deplayer", layer.get_name()) diff --git a/rust/crates/scheduler/src/dao/host_dao.rs b/rust/crates/scheduler/src/dao/host_dao.rs index cf30f50e6..98c994ab1 100644 --- a/rust/crates/scheduler/src/dao/host_dao.rs +++ b/rust/crates/scheduler/src/dao/host_dao.rs @@ -59,7 +59,7 @@ pub struct HostModel { // Number of cores available at the subscription of the show this host has been queried on int_alloc_available_cores: i64, ts_ping: DateTime, - int_concurrent_procs_limit: i64, + int_concurrent_slots_limit: i64, int_running_procs: i64, } @@ -95,8 +95,8 @@ impl From for Host { alloc_id: parse_uuid(&val.pk_alloc), alloc_name: val.str_alloc_name, last_updated: val.ts_ping, - concurrent_procs_limit: (val.int_concurrent_procs_limit >= 0) - .then_some(val.int_concurrent_procs_limit as u32), + concurrent_slots_limit: (val.int_concurrent_slots_limit >= 0) + .then_some(val.int_concurrent_slots_limit as u32), running_procs_count: val.int_running_procs as u32, } } @@ -124,7 +124,7 @@ SELECT DISTINCT a.pk_alloc, a.str_name as str_alloc_name, hs.ts_ping, - h.int_concurrent_procs_limit, + h.int_concurrent_slots_limit, hs.int_running_procs FROM host h INNER JOIN host_stat hs ON h.pk_host = hs.pk_host diff --git a/rust/crates/scheduler/src/host_cache/cache.rs b/rust/crates/scheduler/src/host_cache/cache.rs index bd580536f..32260ce9b 100644 --- a/rust/crates/scheduler/src/host_cache/cache.rs +++ b/rust/crates/scheduler/src/host_cache/cache.rs @@ -204,7 +204,7 @@ impl HostCache { host.idle_memory >= memory && host.idle_cores >= cores && host.running_procs_count + slots - <= host.concurrent_procs_limit.unwrap_or(u32::MAX) && + <= host.concurrent_slots_limit.unwrap_or(u32::MAX) && // Ensure we're not retrying the same host as last attempts !failed_candidates.borrow().contains(&host.id) }; @@ -371,7 +371,7 @@ mod tests { alloc_id: Uuid::new_v4(), alloc_name: "test".to_string(), last_updated: Utc::now(), - concurrent_procs_limit: None, + concurrent_slots_limit: None, running_procs_count: 0, } } diff --git a/rust/crates/scheduler/src/host_cache/store.rs b/rust/crates/scheduler/src/host_cache/store.rs index 3f7c45cee..f8b5c03e6 100644 --- a/rust/crates/scheduler/src/host_cache/store.rs +++ b/rust/crates/scheduler/src/host_cache/store.rs @@ -388,7 +388,7 @@ mod tests { alloc_id: Uuid::new_v4(), alloc_name: "test".to_string(), last_updated, - concurrent_procs_limit: None, + concurrent_slots_limit: None, running_procs_count: 0, } } diff --git a/rust/crates/scheduler/src/models/host.rs b/rust/crates/scheduler/src/models/host.rs index 01fbd3e60..8a95a0ec1 100644 --- a/rust/crates/scheduler/src/models/host.rs +++ b/rust/crates/scheduler/src/models/host.rs @@ -24,7 +24,7 @@ pub struct Host { pub(crate) alloc_id: Uuid, pub(crate) alloc_name: String, pub(crate) last_updated: DateTime, - pub(crate) concurrent_procs_limit: Option, + pub(crate) concurrent_slots_limit: Option, pub(crate) running_procs_count: u32, } @@ -82,7 +82,7 @@ impl Host { alloc_id, alloc_name, last_updated: Local::now().with_timezone(&Utc), - concurrent_procs_limit: concurrent_frames_limit, + concurrent_slots_limit: concurrent_frames_limit, running_procs_count: 0, } } From 33acb5c50b1e200a4add7e2ec4a604da5ba2d67f Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Thu, 18 Dec 2025 10:38:44 -0800 Subject: [PATCH 09/17] Minor fixes --- .../java/com/imageworks/spcue/HostEntity.java | 2 + .../com/imageworks/spcue/dao/LayerDao.java | 2 +- .../spcue/dao/postgres/HostDaoJdbc.java | 2 + .../spcue/dao/postgres/WhiteboardDaoJdbc.java | 8 +- .../migrations/V35__Add_host_frame_limit.sql | 2 +- .../V36__Add_layer_slots_required.sql | 2 +- cuegui/cuegui/HostMonitorTree.py | 9 + cuegui/cuegui/MenuActions.py | 6 +- proto/src/job.proto | 1662 ++++++++--------- pycue/opencue/wrappers/layer.py | 2 +- pyoutline/outline/layer.py | 2 +- rust/crates/scheduler/src/dao/host_dao.rs | 2 +- 12 files changed, 851 insertions(+), 850 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/HostEntity.java b/cuebot/src/main/java/com/imageworks/spcue/HostEntity.java index 548e99cd2..cb196dda1 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/HostEntity.java +++ b/cuebot/src/main/java/com/imageworks/spcue/HostEntity.java @@ -38,6 +38,7 @@ public class HostEntity extends Entity implements HostInterface { public int idleGpus; public long gpuMemory; public long idleGpuMemory; + public int concurrentSlotsLimit; public boolean unlockAtBoot; @@ -61,6 +62,7 @@ public HostEntity(Host grpcHost) { this.idleGpus = (int) grpcHost.getIdleGpus(); this.gpuMemory = grpcHost.getGpuMemory(); this.idleGpuMemory = grpcHost.getIdleGpuMemory(); + this.concurrentSlotsLimit = grpcHost.getConcurrentSlotsLimit(); } public String getHostId() { diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java index 15449a0f5..1b5435218 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java @@ -315,7 +315,7 @@ public interface LayerDao { * Updates the slots required for a layer. * * @param layer the layer to update - * @param slots the number of slots required (<=0 means not slot-based) + * @param slots the number of slots required (<0 means the host is not slot-based) */ void updateLayerSlotsRequired(LayerInterface layer, int slots); diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java index 32a2d822d..7fb031f9e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java @@ -78,6 +78,7 @@ public HostEntity mapRow(ResultSet rs, int rowNum) throws SQLException { host.idleGpus = rs.getInt("int_gpus_idle"); host.gpuMemory = rs.getLong("int_gpu_mem"); host.idleGpuMemory = rs.getLong("int_gpu_mem_idle"); + host.concurrentSlotsLimit = rs.getInt("int_concurrent_slots_limit"); host.dateBooted = rs.getDate("ts_booted"); host.dateCreated = rs.getDate("ts_created"); host.datePinged = rs.getDate("ts_ping"); @@ -131,6 +132,7 @@ public String getFacilityId() { + " host.int_gpus_idle, " + " host.int_gpu_mem, " + " host.int_gpu_mem_idle, " + + " host.int_concurrent_slots_limit, " + " host.ts_created, " + " host.str_name, " + " host_stat.str_state, " diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java index c8260f2a2..1be896420 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/WhiteboardDaoJdbc.java @@ -961,6 +961,7 @@ public static NestedHost.Builder mapNestedHostBuilder(ResultSet rs) throws SQLEx .setLockState(LockState.valueOf(SqlUtil.getString(rs, "str_lock_state"))) .setHasComment(rs.getBoolean("b_comment")) .setThreadMode(ThreadMode.values()[rs.getInt("int_thread_mode")]) + .setConcurrentSlotsLimit(rs.getInt("int_concurrent_slots_limit")) .setOs(SqlUtil.getString(rs, "str_os")); String tags = SqlUtil.getString(rs, "str_tags"); @@ -998,6 +999,7 @@ public static Host.Builder mapHostBuilder(ResultSet rs) throws SQLException { builder.setLockState(LockState.valueOf(SqlUtil.getString(rs, "str_lock_state"))); builder.setHasComment(rs.getBoolean("b_comment")); builder.setThreadMode(ThreadMode.values()[rs.getInt("int_thread_mode")]); + builder.setConcurrentSlotsLimit(rs.getInt("int_concurrent_slots_limit")); builder.setOs(SqlUtil.getString(rs, "str_os")); String tags = SqlUtil.getString(rs, "str_tags"); @@ -1711,9 +1713,9 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException { + "host.int_cores_idle," + "host.int_mem," + "host.int_mem_idle," + "host.int_gpus," + "host.int_gpus_idle," + "host.int_gpu_mem," + "host.int_gpu_mem_idle," + "host.str_tags," + "host.str_lock_state," + "host.b_comment," - + "host.int_thread_mode," + "host_stat.str_os," + "host_stat.int_mem_total," - + "host_stat.int_mem_free," + "host_stat.int_swap_total," + "host_stat.int_swap_free," - + "host_stat.int_mcp_total," + "host_stat.int_mcp_free," + + "host.int_thread_mode," + "host.int_concurrent_slots_limit," + "host_stat.str_os," + + "host_stat.int_mem_total," + "host_stat.int_mem_free," + "host_stat.int_swap_total," + + "host_stat.int_swap_free," + "host_stat.int_mcp_total," + "host_stat.int_mcp_free," + "host_stat.int_gpu_mem_total," + "host_stat.int_gpu_mem_free," + "host_stat.int_load, " + "alloc.str_name AS alloc_name " + "FROM " + "alloc," + "facility, " + "host_stat," + "host " + "WHERE " + "host.pk_alloc = alloc.pk_alloc " diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql index 7ef883933..e42d873f8 100644 --- a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V35__Add_host_frame_limit.sql @@ -1,4 +1,4 @@ --- Add a field to limit the max amount of concurrent procs a host can run +-- Add a field to limit the max amount of concurrent frales a host can run -- -1 means no limit alter table host add int_concurrent_slots_limit INT NOT NULL DEFAULT -1; diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql index 1905fbdcd..0b9ea3bd1 100644 --- a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql @@ -1,4 +1,4 @@ -- Add a field to mark a layer as requiring at least a specific number of slots --- <=0 means slots are not required +-- <0 means slots are not required alter table layer add int_slots_required INT NOT NULL DEFAULT 0; diff --git a/cuegui/cuegui/HostMonitorTree.py b/cuegui/cuegui/HostMonitorTree.py index 1aa33761b..c75790e2b 100644 --- a/cuegui/cuegui/HostMonitorTree.py +++ b/cuegui/cuegui/HostMonitorTree.py @@ -159,6 +159,15 @@ def __init__(self, parent): data=lambda host: ",".join(host.data.tags), tip="The tags applied to the host.\n\n" "On a frame it is the name of the job.") + self.addColumn("Concurrent Slots", 50, id=23, + data=lambda host: \ + host.data.concurrent_slots_limit \ + if host.data.concurrent_slots_limit >= 0 \ + else "-", + tip="When >0 the host is configured to be slot based.\n" + "The host can only run this amount of slots at the same time " + "(Usually: 1 frame = 1 slot)\n\n" + "This host will only run layers with a slots_required field configured.") self.hostSearch = opencue.search.HostSearch() diff --git a/cuegui/cuegui/MenuActions.py b/cuegui/cuegui/MenuActions.py index 7f26da00e..e2c0e5fc9 100644 --- a/cuegui/cuegui/MenuActions.py +++ b/cuegui/cuegui/MenuActions.py @@ -1947,11 +1947,11 @@ def setConcurrentSlotsLimit(self, rpcObjects=None): current = hosts[0].concurrentSlotsLimit() if len(hosts) == 1 else 0 title = "Set Concurrent Slots Limit" - body = "Enter maximum concurrent slots (usually a frame consumes 1 slot, " \ + body = "Enter maximum concurrent slots \n(usually a frame consumes 1 slot, " \ "the value can be configured on its layer's slot_required field)\n" \ "When a limit is defined, booking will only allocate layers with " \ - "slots_required > 0 to be executed on this host. Which means regular booking by " \ - "cores/memory/gpu becomes disabled.\n" \ + "slots_required > 0 to be executed on this host. \n" \ + "Which means regular booking by cores/memory/gpu becomes disabled.\n\n" \ "(0 for no limit, >0 for specific limit):" (value, choice) = QtWidgets.QInputDialog.getInt( diff --git a/proto/src/job.proto b/proto/src/job.proto index e78f9a89b..61fd6cd3e 100644 --- a/proto/src/job.proto +++ b/proto/src/job.proto @@ -1,458 +1,454 @@ - syntax = "proto3"; package job; -option java_package = "com.imageworks.spcue.grpc.job"; -option java_multiple_files = true; - -option go_package = "opencue_gateway/gen/go"; - import "comment.proto"; import "depend.proto"; import "limit.proto"; import "renderPartition.proto"; +option go_package = "opencue_gateway/gen/go"; +option java_multiple_files = true; +option java_package = "com.imageworks.spcue.grpc.job"; + // Job related messages and services // This includes Job, Layer, Frame, and Group objects - // -------- Services --------] service FrameInterface { - // Add a render partition to the frame. - rpc AddRenderPartition(FrameAddRenderPartitionRequest) returns (FrameAddRenderPartitionResponse); + // Add a render partition to the frame. + rpc AddRenderPartition(FrameAddRenderPartitionRequest) returns (FrameAddRenderPartitionResponse); - // Sets up and returns a FrameOnFrame dependency. - rpc CreateDependencyOnFrame(FrameCreateDependencyOnFrameRequest) returns (FrameCreateDependencyOnFrameResponse); + // Sets up and returns a FrameOnFrame dependency. + rpc CreateDependencyOnFrame(FrameCreateDependencyOnFrameRequest) returns (FrameCreateDependencyOnFrameResponse); - // Sets up and returns a FrameOnJob dependency. - rpc CreateDependencyOnJob(FrameCreateDependencyOnJobRequest) returns (FrameCreateDependencyOnJobResponse); + // Sets up and returns a FrameOnJob dependency. + rpc CreateDependencyOnJob(FrameCreateDependencyOnJobRequest) returns (FrameCreateDependencyOnJobResponse); - // Sets up and returns a FrameOnLayer dependency. - rpc CreateDependencyOnLayer(FrameCreateDependencyOnLayerRequest) returns (FrameCreateDependencyOnLayerResponse); + // Sets up and returns a FrameOnLayer dependency. + rpc CreateDependencyOnLayer(FrameCreateDependencyOnLayerRequest) returns (FrameCreateDependencyOnLayerResponse); - // Drops every dependendy that is causing this frame not to run. - rpc DropDepends(FrameDropDependsRequest) returns (FrameDropDependsResponse); + // Drops every dependendy that is causing this frame not to run. + rpc DropDepends(FrameDropDependsRequest) returns (FrameDropDependsResponse); - // Eating a frame will stop rendering and will not try to coninue processing the frame. - rpc Eat(FrameEatRequest) returns (FrameEatResponse); + // Eating a frame will stop rendering and will not try to coninue processing the frame. + rpc Eat(FrameEatRequest) returns (FrameEatResponse); - // Finds a frame in a pending job based on the job, layer, and frame number. - rpc FindFrame(FrameFindFrameRequest) returns (FrameFindFrameResponse); + // Finds a frame in a pending job based on the job, layer, and frame number. + rpc FindFrame(FrameFindFrameRequest) returns (FrameFindFrameResponse); - // Get a frame from its unique id - rpc GetFrame(FrameGetFrameRequest) returns (FrameGetFrameResponse); + // Get a frame from its unique id + rpc GetFrame(FrameGetFrameRequest) returns (FrameGetFrameResponse); - // Get a frame from search criteria - rpc GetFrames(FrameGetFramesRequest) returns (FrameGetFramesResponse); + // Get a frame from search criteria + rpc GetFrames(FrameGetFramesRequest) returns (FrameGetFramesResponse); - // Returns a list of dependencies setup to depend on this frame. - rpc GetWhatDependsOnThis(FrameGetWhatDependsOnThisRequest) returns (FrameGetWhatDependsOnThisResponse); + // Returns a list of dependencies setup to depend on this frame. + rpc GetWhatDependsOnThis(FrameGetWhatDependsOnThisRequest) returns (FrameGetWhatDependsOnThisResponse); - // Returns a list of dependencies that this frame depends on. - rpc GetWhatThisDependsOn(FrameGetWhatThisDependsOnRequest) returns (FrameGetWhatThisDependsOnResponse); + // Returns a list of dependencies that this frame depends on. + rpc GetWhatThisDependsOn(FrameGetWhatThisDependsOnRequest) returns (FrameGetWhatThisDependsOnResponse); - // Kills the frame if it is running - rpc Kill(FrameKillRequest) returns (FrameKillResponse); + // Kills the frame if it is running + rpc Kill(FrameKillRequest) returns (FrameKillResponse); - // Will recount the number of active dependencies on the frame and put it back into the Depend state if that count - // is greater than 0. - rpc MarkAsDepend(FrameMarkAsDependRequest) returns (FrameMarkAsDependResponse); + // Will recount the number of active dependencies on the frame and put it back into the Depend state if that count + // is greater than 0. + rpc MarkAsDepend(FrameMarkAsDependRequest) returns (FrameMarkAsDependResponse); - // Changes the frame's dependency count to 0, which will put the frame into the waiting state. Retrying the frame - // will put it back into the waiting state. - rpc MarkAsWaiting(FrameMarkAsWaitingRequest) returns (FrameMarkAsWaitingResponse); + // Changes the frame's dependency count to 0, which will put the frame into the waiting state. Retrying the frame + // will put it back into the waiting state. + rpc MarkAsWaiting(FrameMarkAsWaitingRequest) returns (FrameMarkAsWaitingResponse); - // Retries the frame by setting it as waiting - rpc Retry(FrameRetryRequest) returns (FrameRetryResponse); + // Retries the frame by setting it as waiting + rpc Retry(FrameRetryRequest) returns (FrameRetryResponse); - // Updates the state of the frame's checkpoint status. If the checkpoint status is complete, then the frame's - // checkpointCoreSeconds is updated with the amount of render time that was checkpointed. - rpc SetCheckpointState(FrameSetCheckpointStateRequest) returns (FrameSetCheckpointStateResponse); + // Updates the state of the frame's checkpoint status. If the checkpoint status is complete, then the frame's + // checkpointCoreSeconds is updated with the amount of render time that was checkpointed. + rpc SetCheckpointState(FrameSetCheckpointStateRequest) returns (FrameSetCheckpointStateResponse); - // Set a frame's state display override - rpc SetFrameStateDisplayOverride(FrameStateDisplayOverrideRequest) returns (FrameStateDisplayOverrideResponse); + // Set a frame's state display override + rpc SetFrameStateDisplayOverride(FrameStateDisplayOverrideRequest) returns (FrameStateDisplayOverrideResponse); - // Get a frame's state display overrides - rpc GetFrameStateDisplayOverrides(GetFrameStateDisplayOverridesRequest) returns (GetFrameStateDisplayOverridesResponse); + // Get a frame's state display overrides + rpc GetFrameStateDisplayOverrides(GetFrameStateDisplayOverridesRequest) returns (GetFrameStateDisplayOverridesResponse); } service GroupInterface { - // Create a new sub group of the given name - rpc CreateSubGroup(GroupCreateSubGroupRequest) returns (GroupCreateSubGroupResponse); + // Create a new sub group of the given name + rpc CreateSubGroup(GroupCreateSubGroupRequest) returns (GroupCreateSubGroupResponse); - // Delete the provided group - rpc Delete(GroupDeleteRequest) returns (GroupDeleteResponse); + // Delete the provided group + rpc Delete(GroupDeleteRequest) returns (GroupDeleteResponse); - // Finds a group by show name and group - rpc FindGroup(GroupFindGroupRequest) returns (GroupFindGroupResponse); + // Finds a group by show name and group + rpc FindGroup(GroupFindGroupRequest) returns (GroupFindGroupResponse); - // Gets a group by its id - rpc GetGroup(GroupGetGroupRequest) returns (GroupGetGroupResponse); + // Gets a group by its id + rpc GetGroup(GroupGetGroupRequest) returns (GroupGetGroupResponse); - // Get child groups of the provided group - rpc GetGroups(GroupGetGroupsRequest) returns (GroupGetGroupsResponse); + // Get child groups of the provided group + rpc GetGroups(GroupGetGroupsRequest) returns (GroupGetGroupsResponse); - // Get Jobs of the provided group - rpc GetJobs(GroupGetJobsRequest) returns (GroupGetJobsResponse); + // Get Jobs of the provided group + rpc GetJobs(GroupGetJobsRequest) returns (GroupGetJobsResponse); - // Reparent the provided groups to the provided group - rpc ReparentGroups(GroupReparentGroupsRequest) returns (GroupReparentGroupsResponse); + // Reparent the provided groups to the provided group + rpc ReparentGroups(GroupReparentGroupsRequest) returns (GroupReparentGroupsResponse); - // Reparent the provided jobs to the provided group - rpc ReparentJobs(GroupReparentJobsRequest) returns (GroupReparentJobsResponse); + // Reparent the provided jobs to the provided group + rpc ReparentJobs(GroupReparentJobsRequest) returns (GroupReparentJobsResponse); - // Set the Default Job Max Core values to all in the provided group - rpc SetDefaultJobMaxCores(GroupSetDefJobMaxCoresRequest) returns (GroupSetDefJobMaxCoresResponse); + // Set the Default Job Max Core values to all in the provided group + rpc SetDefaultJobMaxCores(GroupSetDefJobMaxCoresRequest) returns (GroupSetDefJobMaxCoresResponse); - // Set the Default Job Min Core values to all in the provided group - rpc SetDefaultJobMinCores(GroupSetDefJobMinCoresRequest) returns (GroupSetDefJobMinCoresResponse); + // Set the Default Job Min Core values to all in the provided group + rpc SetDefaultJobMinCores(GroupSetDefJobMinCoresRequest) returns (GroupSetDefJobMinCoresResponse); - // Set the Default Job Max Gpu values to all in the provided group - rpc SetDefaultJobMaxGpus(GroupSetDefJobMaxGpusRequest) returns (GroupSetDefJobMaxGpusResponse); + // Set the Default Job Max Gpu values to all in the provided group + rpc SetDefaultJobMaxGpus(GroupSetDefJobMaxGpusRequest) returns (GroupSetDefJobMaxGpusResponse); - // Set the Default Job Min Gpu values to all in the provided group - rpc SetDefaultJobMinGpus(GroupSetDefJobMinGpusRequest) returns (GroupSetDefJobMinGpusResponse); + // Set the Default Job Min Gpu values to all in the provided group + rpc SetDefaultJobMinGpus(GroupSetDefJobMinGpusRequest) returns (GroupSetDefJobMinGpusResponse); - // Set the Default Job Priority values to all in the provided group - rpc SetDefaultJobPriority(GroupSetDefJobPriorityRequest) returns (GroupSetDefJobPriorityResponse); + // Set the Default Job Priority values to all in the provided group + rpc SetDefaultJobPriority(GroupSetDefJobPriorityRequest) returns (GroupSetDefJobPriorityResponse); - // Set the department associated with the provided group - rpc SetDepartment(GroupSetDeptRequest) returns (GroupSetDeptResponse); + // Set the department associated with the provided group + rpc SetDepartment(GroupSetDeptRequest) returns (GroupSetDeptResponse); - // Set the provided groups parent group - rpc SetGroup(GroupSetGroupRequest) returns (GroupSetGroupResponse); + // Set the provided groups parent group + rpc SetGroup(GroupSetGroupRequest) returns (GroupSetGroupResponse); - // Set the group's Max Cores value - rpc SetMaxCores(GroupSetMaxCoresRequest) returns (GroupSetMaxCoresResponse); + // Set the group's Max Cores value + rpc SetMaxCores(GroupSetMaxCoresRequest) returns (GroupSetMaxCoresResponse); - // Set the groups Min Cores values - rpc SetMinCores(GroupSetMinCoresRequest) returns (GroupSetMinCoresResponse); + // Set the groups Min Cores values + rpc SetMinCores(GroupSetMinCoresRequest) returns (GroupSetMinCoresResponse); - // Set the group's Max Gpu value - rpc SetMaxGpus(GroupSetMaxGpusRequest) returns (GroupSetMaxGpusResponse); + // Set the group's Max Gpu value + rpc SetMaxGpus(GroupSetMaxGpusRequest) returns (GroupSetMaxGpusResponse); - // Set the groups Min Gpu values - rpc SetMinGpus(GroupSetMinGpusRequest) returns (GroupSetMinGpusResponse); + // Set the groups Min Gpu values + rpc SetMinGpus(GroupSetMinGpusRequest) returns (GroupSetMinGpusResponse); - // Set the groups name - rpc SetName(GroupSetNameRequest) returns (GroupSetNameResponse); + // Set the groups name + rpc SetName(GroupSetNameRequest) returns (GroupSetNameResponse); } service JobInterface { - // Add a comment on this job - rpc AddComment(JobAddCommentRequest) returns (JobAddCommentResponse); + // Add a comment on this job + rpc AddComment(JobAddCommentRequest) returns (JobAddCommentResponse); - // Add a render partition to the local host. This partition will - // run frames on the specified job. - rpc AddRenderPartition(JobAddRenderPartRequest) returns (JobAddRenderPartResponse); + // Add a render partition to the local host. This partition will + // run frames on the specified job. + rpc AddRenderPartition(JobAddRenderPartRequest) returns (JobAddRenderPartResponse); - // Adds a subscriber to a job. When the job is finished, subscriber - // receives email with notification - rpc AddSubscriber(JobAddSubscriberRequest) returns (JobAddSubscriberResponse); + // Adds a subscriber to a job. When the job is finished, subscriber + // receives email with notification + rpc AddSubscriber(JobAddSubscriberRequest) returns (JobAddSubscriberResponse); - // Setup and retunrn a JobOnFrame dependency - rpc CreateDependencyOnFrame(JobCreateDependencyOnFrameRequest) returns (JobCreateDependencyOnFrameResponse); + // Setup and retunrn a JobOnFrame dependency + rpc CreateDependencyOnFrame(JobCreateDependencyOnFrameRequest) returns (JobCreateDependencyOnFrameResponse); - // Setup and return a JobOnJob dependency - rpc CreateDependencyOnJob(JobCreateDependencyOnJobRequest) returns (JobCreateDependencyOnJobResponse); + // Setup and return a JobOnJob dependency + rpc CreateDependencyOnJob(JobCreateDependencyOnJobRequest) returns (JobCreateDependencyOnJobResponse); - // Setup and retunrn a JobOnLayer dependency - rpc CreateDependencyOnLayer(JobCreateDependencyOnLayerRequest) returns (JobCreateDependencyOnLayerResponse); + // Setup and retunrn a JobOnLayer dependency + rpc CreateDependencyOnLayer(JobCreateDependencyOnLayerRequest) returns (JobCreateDependencyOnLayerResponse); - // Drops all external dependencies for the job. This means that - // the internal depend structure will be maintained, but everything - // that depends on another job will be dropped. - rpc DropDepends(JobDropDependsRequest) returns (JobDropDependsResponse); + // Drops all external dependencies for the job. This means that + // the internal depend structure will be maintained, but everything + // that depends on another job will be dropped. + rpc DropDepends(JobDropDependsRequest) returns (JobDropDependsResponse); - // Eats all frames that match the FrameSearchCriteria - rpc EatFrames(JobEatFramesRequest) returns (JobEatFramesResponse); + // Eats all frames that match the FrameSearchCriteria + rpc EatFrames(JobEatFramesRequest) returns (JobEatFramesResponse); - // Finds a pending job using the job name - rpc FindJob(JobFindJobRequest) returns (JobFindJobResponse); + // Finds a pending job using the job name + rpc FindJob(JobFindJobRequest) returns (JobFindJobResponse); - // Get the comments for this job - rpc GetComments(JobGetCommentsRequest) returns (JobGetCommentsResponse); + // Get the comments for this job + rpc GetComments(JobGetCommentsRequest) returns (JobGetCommentsResponse); - // Get the job details - rpc GetCurrent(JobGetCurrentRequest) returns (JobGetCurrentResponse); + // Get the job details + rpc GetCurrent(JobGetCurrentRequest) returns (JobGetCurrentResponse); - // Returns a list of all dependencies that this job is involved with - rpc GetDepends(JobGetDependsRequest) returns (JobGetDependsResponse); + // Returns a list of all dependencies that this job is involved with + rpc GetDepends(JobGetDependsRequest) returns (JobGetDependsResponse); - // Returns all frame objects that match FrameSearchCriteria - rpc GetFrames(JobGetFramesRequest) returns (JobGetFramesResponse); + // Returns all frame objects that match FrameSearchCriteria + rpc GetFrames(JobGetFramesRequest) returns (JobGetFramesResponse); - // Finds a pending job using the job name - rpc GetJob(JobGetJobRequest) returns (JobGetJobResponse); + // Finds a pending job using the job name + rpc GetJob(JobGetJobRequest) returns (JobGetJobResponse); - // Returns a list of jobs based on specified criteria - rpc GetJobs(JobGetJobsRequest) returns (JobGetJobsResponse); + // Returns a list of jobs based on specified criteria + rpc GetJobs(JobGetJobsRequest) returns (JobGetJobsResponse); - // Returns a sequence of job names using search criteria - rpc GetJobNames(JobGetJobNamesRequest) returns (JobGetJobNamesResponse); + // Returns a sequence of job names using search criteria + rpc GetJobNames(JobGetJobNamesRequest) returns (JobGetJobNamesResponse); - // Returns all layer objects - rpc GetLayers(JobGetLayersRequest) returns (JobGetLayersResponse); + // Returns all layer objects + rpc GetLayers(JobGetLayersRequest) returns (JobGetLayersResponse); - // Returns a UpdatedFrameCheckResult which contains - // updated state information for frames that have changed since the - // last update time as well as the current state of the job. - // - // If the user is filtering by layer, passing an array of layer - // proxies will limit the updates to specific layers. - // - // At most, your going to get 1 update per running frame every minute - // due to memory usage. - rpc GetUpdatedFrames(JobGetUpdatedFramesRequest) returns (JobGetUpdatedFramesResponse); + // Returns a UpdatedFrameCheckResult which contains + // updated state information for frames that have changed since the + // last update time as well as the current state of the job. + // + // If the user is filtering by layer, passing an array of layer + // proxies will limit the updates to specific layers. + // + // At most, your going to get 1 update per running frame every minute + // due to memory usage. + rpc GetUpdatedFrames(JobGetUpdatedFramesRequest) returns (JobGetUpdatedFramesResponse); - // Returns a list of dependencies setup to depend on - // this job. This includes all types of depends, not just - // OnJob dependencies. This will not return any frame on frame - // dependencies that are part of a FrameByFrame depend. It will - // return a single element that represents the entire dependency. - rpc GetWhatDependsOnThis(JobGetWhatDependsOnThisRequest) returns (JobGetWhatDependsOnThisResponse); + // Returns a list of dependencies setup to depend on + // this job. This includes all types of depends, not just + // OnJob dependencies. This will not return any frame on frame + // dependencies that are part of a FrameByFrame depend. It will + // return a single element that represents the entire dependency. + rpc GetWhatDependsOnThis(JobGetWhatDependsOnThisRequest) returns (JobGetWhatDependsOnThisResponse); - // Returns a list of dependencies that this frame depends on. - rpc GetWhatThisDependsOn(JobGetWhatThisDependsOnRequest) returns (JobGetWhatThisDependsOnResponse); + // Returns a list of dependencies that this frame depends on. + rpc GetWhatThisDependsOn(JobGetWhatThisDependsOnRequest) returns (JobGetWhatThisDependsOnResponse); - // Returns true if the job is in the pending state the cue. - rpc IsJobPending(JobIsJobPendingRequest) returns (JobIsJobPendingResponse); + // Returns true if the job is in the pending state the cue. + rpc IsJobPending(JobIsJobPendingRequest) returns (JobIsJobPendingResponse); - // Kill the job. This puts the job into the Finished State - // All running frames are killed, all depends satisfied. - rpc Kill(JobKillRequest) returns (JobKillResponse); + // Kill the job. This puts the job into the Finished State + // All running frames are killed, all depends satisfied. + rpc Kill(JobKillRequest) returns (JobKillResponse); - // Kills all frames that match the FrameSearchCriteria - rpc KillFrames(JobKillFramesRequest) returns (JobKillFramesResponse); + // Kills all frames that match the FrameSearchCriteria + rpc KillFrames(JobKillFramesRequest) returns (JobKillFramesResponse); - // Launches a job spec and returns an array of launched jobs. Waits for jobs to be committed to DB. This might time - // out before jobs are launched. - rpc LaunchSpecAndWait(JobLaunchSpecAndWaitRequest) returns (JobLaunchSpecAndWaitResponse); + // Launches a job spec and returns an array of launched jobs. Waits for jobs to be committed to DB. This might time + // out before jobs are launched. + rpc LaunchSpecAndWait(JobLaunchSpecAndWaitRequest) returns (JobLaunchSpecAndWaitResponse); - // Launches as a job spec and returns an array of job names that are being launched. This method returns immediately - // after basic checks. The job could fail to launch of a DB error occurs but that is rare. - rpc LaunchSpec(JobLaunchSpecRequest) returns (JobLaunchSpecResponse); + // Launches as a job spec and returns an array of job names that are being launched. This method returns immediately + // after basic checks. The job could fail to launch of a DB error occurs but that is rare. + rpc LaunchSpec(JobLaunchSpecRequest) returns (JobLaunchSpecResponse); - // Updates the matching frames from the Depend state to the waiting state - rpc MarkAsWaiting(JobMarkAsWaitingRequest) returns (JobMarkAsWaitingResponse); + // Updates the matching frames from the Depend state to the waiting state + rpc MarkAsWaiting(JobMarkAsWaitingRequest) returns (JobMarkAsWaitingResponse); - // Drops any dependency that requires any frame that matches the FrameSearchCriteria - rpc MarkDoneFrames(JobMarkDoneFramesRequest) returns (JobMarkDoneFramesResponse); + // Drops any dependency that requires any frame that matches the FrameSearchCriteria + rpc MarkDoneFrames(JobMarkDoneFramesRequest) returns (JobMarkDoneFramesResponse); - // Pauses the job, which means it no longer gets procs - rpc Pause(JobPauseRequest) returns (JobPauseResponse); + // Pauses the job, which means it no longer gets procs + rpc Pause(JobPauseRequest) returns (JobPauseResponse); - // Reorders the specified frame range on this job - rpc ReorderFrames(JobReorderFramesRequest) returns (JobReorderFramesResponse); + // Reorders the specified frame range on this job + rpc ReorderFrames(JobReorderFramesRequest) returns (JobReorderFramesResponse); - // Resumes a paused job - rpc Resume(JobResumeRequest) returns (JobResumeResponse); + // Resumes a paused job + rpc Resume(JobResumeRequest) returns (JobResumeResponse); - // Retries all frames that match the FrameSearchCriteria - rpc RetryFrames(JobRetryFramesRequest) returns (JobRetryFramesResponse); + // Retries all frames that match the FrameSearchCriteria + rpc RetryFrames(JobRetryFramesRequest) returns (JobRetryFramesResponse); - // Rerun filters for this job - rpc RunFilters(JobRunFiltersRequest) returns (JobRunFiltersResponse); + // Rerun filters for this job + rpc RunFilters(JobRunFiltersRequest) returns (JobRunFiltersResponse); - // If set to true, a frame that would have turned dead, will become eaten - rpc SetAutoEat(JobSetAutoEatRequest) returns (JobSetAutoEatResponse); + // If set to true, a frame that would have turned dead, will become eaten + rpc SetAutoEat(JobSetAutoEatRequest) returns (JobSetAutoEatResponse); - // Move the job into the specified group - rpc SetGroup(JobSetGroupRequest) returns (JobSetGroupResponse); + // Move the job into the specified group + rpc SetGroup(JobSetGroupRequest) returns (JobSetGroupResponse); - // Sets the maximum number of procs that can run on this job - rpc SetMaxCores(JobSetMaxCoresRequest) returns (JobSetMaxCoresResponse); + // Sets the maximum number of procs that can run on this job + rpc SetMaxCores(JobSetMaxCoresRequest) returns (JobSetMaxCoresResponse); - // Sets the default maximum number of frame retries for the job. One - // a frame has retried this many times it will automatically go - // to the dead state. The default upper limit on this is 16 retries. - rpc SetMaxRetries(JobSetMaxRetriesRequest) returns (JobSetMaxRetriesResponse); + // Sets the default maximum number of frame retries for the job. One + // a frame has retried this many times it will automatically go + // to the dead state. The default upper limit on this is 16 retries. + rpc SetMaxRetries(JobSetMaxRetriesRequest) returns (JobSetMaxRetriesResponse); - // Sets the minimum number of procs that can run on this job - rpc SetMinCores(JobSetMinCoresRequest) returns (JobSetMinCoresResponse); + // Sets the minimum number of procs that can run on this job + rpc SetMinCores(JobSetMinCoresRequest) returns (JobSetMinCoresResponse); - // Sets the maximum number of Gpu that can run on this job - rpc SetMaxGpus(JobSetMaxGpusRequest) returns (JobSetMaxGpusResponse); + // Sets the maximum number of Gpu that can run on this job + rpc SetMaxGpus(JobSetMaxGpusRequest) returns (JobSetMaxGpusResponse); - // Sets the minimum number of Gpu that can run on this job - rpc SetMinGpus(JobSetMinGpusRequest) returns (JobSetMinGpusResponse); + // Sets the minimum number of Gpu that can run on this job + rpc SetMinGpus(JobSetMinGpusRequest) returns (JobSetMinGpusResponse); - // Sets the job priority - rpc SetPriority(JobSetPriorityRequest) returns (JobSetPriorityResponse); + // Sets the job priority + rpc SetPriority(JobSetPriorityRequest) returns (JobSetPriorityResponse); - // Shutdown the job if it is completed. This is a workaround for when - // Cuebot failed to shutdown a job due to database access error. - rpc ShutdownIfCompleted(JobShutdownIfCompletedRequest) returns (JobShutdownIfCompletedResponse); + // Shutdown the job if it is completed. This is a workaround for when + // Cuebot failed to shutdown a job due to database access error. + rpc ShutdownIfCompleted(JobShutdownIfCompletedRequest) returns (JobShutdownIfCompletedResponse); - // Staggers the specified frame range - rpc StaggerFrames(JobStaggerFramesRequest) returns (JobStaggerFramesResponse); + // Staggers the specified frame range + rpc StaggerFrames(JobStaggerFramesRequest) returns (JobStaggerFramesResponse); } service LayerInterface { - // Add Limit to the Layer - rpc AddLimit(LayerAddLimitRequest) returns (LayerAddLimitResponse); + // Add Limit to the Layer + rpc AddLimit(LayerAddLimitRequest) returns (LayerAddLimitResponse); - // Add RenderPartition to Layer - rpc AddRenderPartition(LayerAddRenderPartitionRequest) returns (LayerAddRenderPartitionResponse); + // Add RenderPartition to Layer + rpc AddRenderPartition(LayerAddRenderPartitionRequest) returns (LayerAddRenderPartitionResponse); - // Setup and return a LayerOnFrame dependency - rpc CreateDependencyOnFrame(LayerCreateDependOnFrameRequest) returns (LayerCreateDependOnFrameResponse); + // Setup and return a LayerOnFrame dependency + rpc CreateDependencyOnFrame(LayerCreateDependOnFrameRequest) returns (LayerCreateDependOnFrameResponse); - // Setup and return a LayerOnJob dependency - rpc CreateDependencyOnJob(LayerCreateDependOnJobRequest) returns (LayerCreateDependOnJobResponse); + // Setup and return a LayerOnJob dependency + rpc CreateDependencyOnJob(LayerCreateDependOnJobRequest) returns (LayerCreateDependOnJobResponse); - // Setup and return a LayerOnLayer dependency - rpc CreateDependencyOnLayer(LayerCreateDependOnLayerRequest) returns (LayerCreateDependOnLayerResponse); + // Setup and return a LayerOnLayer dependency + rpc CreateDependencyOnLayer(LayerCreateDependOnLayerRequest) returns (LayerCreateDependOnLayerResponse); - // Setup and return a FrameByFrame dependency - rpc CreateFrameByFrameDependency(LayerCreateFrameByFrameDependRequest) returns (LayerCreateFrameByFrameDependResponse); + // Setup and return a FrameByFrame dependency + rpc CreateFrameByFrameDependency(LayerCreateFrameByFrameDependRequest) returns (LayerCreateFrameByFrameDependResponse); - // Drops every dependency that is causing this layer not to run. - rpc DropDepends(LayerDropDependsRequest) returns (LayerDropDependsResponse); + // Drops every dependency that is causing this layer not to run. + rpc DropDepends(LayerDropDependsRequest) returns (LayerDropDependsResponse); - // Drop the limit from the specified layer. - rpc DropLimit(LayerDropLimitRequest) returns (LayerDropLimitResponse); + // Drop the limit from the specified layer. + rpc DropLimit(LayerDropLimitRequest) returns (LayerDropLimitResponse); - // Eat the Frames of this Layer - rpc EatFrames(LayerEatFramesRequest) returns (LayerEatFramesResponse); + // Eat the Frames of this Layer + rpc EatFrames(LayerEatFramesRequest) returns (LayerEatFramesResponse); - // When disabled, This will stop Cuebot from lowering the amount of memory required for a given layer. - rpc EnableMemoryOptimizer(LayerEnableMemoryOptimizerRequest) returns (LayerEnableMemoryOptimizerResponse); + // When disabled, This will stop Cuebot from lowering the amount of memory required for a given layer. + rpc EnableMemoryOptimizer(LayerEnableMemoryOptimizerRequest) returns (LayerEnableMemoryOptimizerResponse); - // Finds a layer in a pending job based the job and layer name - rpc FindLayer(LayerFindLayerRequest) returns (LayerFindLayerResponse); + // Finds a layer in a pending job based the job and layer name + rpc FindLayer(LayerFindLayerRequest) returns (LayerFindLayerResponse); - // Get the frames that match the FrameSearchCriteria - rpc GetFrames(LayerGetFramesRequest) returns (LayerGetFramesResponse); + // Get the frames that match the FrameSearchCriteria + rpc GetFrames(LayerGetFramesRequest) returns (LayerGetFramesResponse); - // Finds a layer in a pending job from its unique ID - rpc GetLayer(LayerGetLayerRequest) returns (LayerGetLayerResponse); + // Finds a layer in a pending job from its unique ID + rpc GetLayer(LayerGetLayerRequest) returns (LayerGetLayerResponse); - // Return a list of the limits on the specified layer. - rpc GetLimits(LayerGetLimitsRequest) returns (LayerGetLimitsResponse); + // Return a list of the limits on the specified layer. + rpc GetLimits(LayerGetLimitsRequest) returns (LayerGetLimitsResponse); - // Return a list of all registered output paths. - rpc GetOutputPaths(LayerGetOutputPathsRequest) returns (LayerGetOutputPathsResponse); + // Return a list of all registered output paths. + rpc GetOutputPaths(LayerGetOutputPathsRequest) returns (LayerGetOutputPathsResponse); - // Returns a list of dependencies that this frame depends on. - rpc GetWhatThisDependsOn(LayerGetWhatThisDependsOnRequest) returns (LayerGetWhatThisDependsOnResponse); + // Returns a list of dependencies that this frame depends on. + rpc GetWhatThisDependsOn(LayerGetWhatThisDependsOnRequest) returns (LayerGetWhatThisDependsOnResponse); - // Returns a list of dependencies setup to depend on this layer. This includes all types of depends, not just - // OnLayer dependencies. This will not return any frame on frame dependencies that are part of a FrameByFrame - // depend. It will return a single element that represents the entire dependency. - rpc GetWhatDependsOnThis(LayerGetWhatDependsOnThisRequest) returns (LayerGetWhatDependsOnThisResponse); + // Returns a list of dependencies setup to depend on this layer. This includes all types of depends, not just + // OnLayer dependencies. This will not return any frame on frame dependencies that are part of a FrameByFrame + // depend. It will return a single element that represents the entire dependency. + rpc GetWhatDependsOnThis(LayerGetWhatDependsOnThisRequest) returns (LayerGetWhatDependsOnThisResponse); - // Kill Frames associated with this layer - rpc KillFrames(LayerKillFramesRequest) returns (LayerKillFramesResponse); + // Kill Frames associated with this layer + rpc KillFrames(LayerKillFramesRequest) returns (LayerKillFramesResponse); - // Mark the frames of this layer done - rpc MarkdoneFrames(LayerMarkdoneFramesRequest) returns (LayerMarkdoneFramesResponse); + // Mark the frames of this layer done + rpc MarkdoneFrames(LayerMarkdoneFramesRequest) returns (LayerMarkdoneFramesResponse); - // Register an output with the given layer. The output paths are sent in the opencue email. - rpc RegisterOutputPath(LayerRegisterOutputPathRequest) returns (LayerRegisterOutputPathResponse); + // Register an output with the given layer. The output paths are sent in the opencue email. + rpc RegisterOutputPath(LayerRegisterOutputPathRequest) returns (LayerRegisterOutputPathResponse); - //Reorders the specified frame range on this job. - rpc ReorderFrames(LayerReorderFramesRequest) returns (LayerReorderFramesResponse); + //Reorders the specified frame range on this job. + rpc ReorderFrames(LayerReorderFramesRequest) returns (LayerReorderFramesResponse); - // Retry the Frames of this Layer - rpc RetryFrames(LayerRetryFramesRequest) returns (LayerRetryFramesResponse); + // Retry the Frames of this Layer + rpc RetryFrames(LayerRetryFramesRequest) returns (LayerRetryFramesResponse); - // The maximum number of cores to run on a given frame within this layer. Fractional core values are not allowed - // with this setting. - rpc SetMaxCores(LayerSetMaxCoresRequest) returns (LayerSetMaxCoresResponse); + // The maximum number of cores to run on a given frame within this layer. Fractional core values are not allowed + // with this setting. + rpc SetMaxCores(LayerSetMaxCoresRequest) returns (LayerSetMaxCoresResponse); - // Set the Min Cores for this layer - rpc SetMinCores(LayerSetMinCoresRequest) returns (LayerSetMinCoresResponse); + // Set the Min Cores for this layer + rpc SetMinCores(LayerSetMinCoresRequest) returns (LayerSetMinCoresResponse); - // The maximum number of Gpu to run on a given frame within this layer. - rpc SetMaxGpus(LayerSetMaxGpusRequest) returns (LayerSetMaxGpusResponse); + // The maximum number of Gpu to run on a given frame within this layer. + rpc SetMaxGpus(LayerSetMaxGpusRequest) returns (LayerSetMaxGpusResponse); - // Set the Min Gpus for this layer - rpc SetMinGpus(LayerSetMinGpusRequest) returns (LayerSetMinGpusResponse); + // Set the Min Gpus for this layer + rpc SetMinGpus(LayerSetMinGpusRequest) returns (LayerSetMinGpusResponse); - // Set the Min gpu memory value for the layer - rpc SetMinGpuMemory(LayerSetMinGpuMemoryRequest) returns (LayerSetMinGpuMemoryResponse); + // Set the Min gpu memory value for the layer + rpc SetMinGpuMemory(LayerSetMinGpuMemoryRequest) returns (LayerSetMinGpuMemoryResponse); - // [Deprecated] Set the Min gpu memory value for the layer - rpc SetMinGpu(LayerSetMinGpuRequest) returns (LayerSetMinGpuResponse); + // [Deprecated] Set the Min gpu memory value for the layer + rpc SetMinGpu(LayerSetMinGpuRequest) returns (LayerSetMinGpuResponse); - // Set the Min Memory value for the layer - rpc SetMinMemory(LayerSetMinMemoryRequest) returns (LayerSetMinMemoryResponse); + // Set the Min Memory value for the layer + rpc SetMinMemory(LayerSetMinMemoryRequest) returns (LayerSetMinMemoryResponse); - // Set tags on the layer - rpc SetTags(LayerSetTagsRequest) returns (LayerSetTagsResponse); + // Set tags on the layer + rpc SetTags(LayerSetTagsRequest) returns (LayerSetTagsResponse); - // Set whether the layer is threadable or not - rpc SetThreadable(LayerSetThreadableRequest) returns (LayerSetThreadableResponse); + // Set whether the layer is threadable or not + rpc SetThreadable(LayerSetThreadableRequest) returns (LayerSetThreadableResponse); - // Set whether the timeout for frames in the layer - rpc SetTimeout(LayerSetTimeoutRequest) returns (LayerSetTimeoutResponse); + // Set whether the timeout for frames in the layer + rpc SetTimeout(LayerSetTimeoutRequest) returns (LayerSetTimeoutResponse); - // Set whether the LLU timeout for frames in the layer - rpc SetTimeoutLLU(LayerSetTimeoutLLURequest) returns (LayerSetTimeoutLLUResponse); + // Set whether the LLU timeout for frames in the layer + rpc SetTimeoutLLU(LayerSetTimeoutLLURequest) returns (LayerSetTimeoutLLUResponse); - // Set the number of slots required per frame for this layer - rpc SetSlotsRequired(LayerSetSlotsRequiredRequest) returns (LayerSetSlotsRequiredResponse); + // Set the number of slots required per frame for this layer + rpc SetSlotsRequired(LayerSetSlotsRequiredRequest) returns (LayerSetSlotsRequiredResponse); - // Staggers the specified frame range. - rpc StaggerFrames(LayerStaggerFramesRequest) returns (LayerStaggerFramesResponse); + // Staggers the specified frame range. + rpc StaggerFrames(LayerStaggerFramesRequest) returns (LayerStaggerFramesResponse); } - // -------- Enums --------] // Define the possible checkpoint states for a frame. enum CheckpointState { - DISABLED = 0; - ENABLED = 1; - COPYING = 2; - COMPLETE = 3; + DISABLED = 0; + ENABLED = 1; + COPYING = 2; + COMPLETE = 3; } // These frame exit status values are used to trigger special dispatcher behavior. They are greater than 255 // so they don't collide with any real exit status values used by applications running on the cue. enum FrameExitStatus { - // The frame was a success - SUCCESS = 0; - // The frame should be automatically retried - FAILED_LAUNCH = 256; - // Retries should not be incremented - SKIP_RETRY = 286; + // The frame was a success + SUCCESS = 0; + // The frame should be automatically retried + FAILED_LAUNCH = 256; + // Retries should not be incremented + SKIP_RETRY = 286; } // Defines the possible states of a frame. enum FrameState { - // Ok to be dispatched - WAITING = 0; - // Reserved to be dispatched - SETUP = 1; - // Running on a render proc - RUNNING = 2; - // Frame completed successfully - SUCCEEDED = 3; - // Frame is waiting on a dependency - DEPEND = 4; - // Frame is dead,which means it has died N times - DEAD = 5; - // Frame is eaten, acts like the frame has succeeded - EATEN = 6; - // Frame is checkpointing - CHECKPOINT = 7; + // Ok to be dispatched + WAITING = 0; + // Reserved to be dispatched + SETUP = 1; + // Running on a render proc + RUNNING = 2; + // Frame completed successfully + SUCCEEDED = 3; + // Frame is waiting on a dependency + DEPEND = 4; + // Frame is dead,which means it has died N times + DEAD = 5; + // Frame is eaten, acts like the frame has succeeded + EATEN = 6; + // Frame is checkpointing + CHECKPOINT = 7; } // Defines the possible states of a job. enum JobState { - // Job is running - PENDING = 0; - // The job has completed - FINISHED = 1; - // The job is in the process of starting up - STARTUP = 2; - // The job is in the process of shutting down - SHUTDOWN = 3; - // The job is a post job and is waiting to go pending - POSTED = 4; + // Job is running + PENDING = 0; + // The job has completed + FINISHED = 1; + // The job is in the process of starting up + STARTUP = 2; + // The job is in the process of shutting down + SHUTDOWN = 3; + // The job is a post job and is waiting to go pending + POSTED = 4; } // The LayerType determines the type of the layer. A proc will not run @@ -462,668 +458,663 @@ enum JobState { // There is no specific dispatch order for layer types. You will need // to setup dependencies. enum LayerType { - PRE = 0; - POST = 1; - RENDER = 2; - UTIL = 3; + PRE = 0; + POST = 1; + RENDER = 2; + UTIL = 3; } //Used for reordering frames. enum Order { - // Moves frames to the lowest dispatch order - FIRST = 0; - // Moves frames to the last dispatch order - LAST = 1; - // Reverses the dispatch order - REVERSE = 2; + // Moves frames to the lowest dispatch order + FIRST = 0; + // Moves frames to the last dispatch order + LAST = 1; + // Reverses the dispatch order + REVERSE = 2; } - // -------- Primary Message Types --------] // Object to contain an override of the frame's display settings message FrameStateDisplayOverride { - FrameState state = 1; // the FrameState to override - string text = 2; // the text to use + FrameState state = 1; // the FrameState to override + string text = 2; // the text to use - message RGB { - int32 red = 1; - int32 green = 2; - int32 blue = 3; - } - RGB color = 3; // RGB color to use e.g. 255,0,0 + message RGB { + int32 red = 1; + int32 green = 2; + int32 blue = 3; + } + RGB color = 3; // RGB color to use e.g. 255,0,0 } message FrameStateDisplayOverrideSeq { - repeated FrameStateDisplayOverride overrides = 1; + repeated FrameStateDisplayOverride overrides = 1; } // FRAME ---- message Frame { - string id = 1; - string name = 2; - string layer_name = 3; - int32 number = 4; - FrameState state = 5; - int32 retry_count = 6; - int32 exit_status = 7; - int32 dispatch_order = 8; - int32 start_time = 9; - int32 stop_time = 10; - int64 max_rss = 11; - int64 used_memory = 12; - int64 reserved_memory = 13; - int64 reserved_gpu_memory = 14; - string last_resource = 15; - CheckpointState checkpoint_state = 16; - int32 checkpoint_count = 17; - int32 total_core_time = 18; - int32 llu_time = 19; - int32 total_gpu_time = 20; - int64 max_gpu_memory = 21; - int64 used_gpu_memory = 22; - FrameStateDisplayOverride frame_state_display_override = 23; + string id = 1; + string name = 2; + string layer_name = 3; + int32 number = 4; + FrameState state = 5; + int32 retry_count = 6; + int32 exit_status = 7; + int32 dispatch_order = 8; + int32 start_time = 9; + int32 stop_time = 10; + int64 max_rss = 11; + int64 used_memory = 12; + int64 reserved_memory = 13; + int64 reserved_gpu_memory = 14; + string last_resource = 15; + CheckpointState checkpoint_state = 16; + int32 checkpoint_count = 17; + int32 total_core_time = 18; + int32 llu_time = 19; + int32 total_gpu_time = 20; + int64 max_gpu_memory = 21; + int64 used_gpu_memory = 22; + FrameStateDisplayOverride frame_state_display_override = 23; } // Object for frame searching message FrameSearchCriteria { - repeated string ids = 1; - repeated string frames = 2; - repeated string layers = 3; - FrameStateSeq states = 4; - string frame_range = 5; - string memory_range = 6; - string duration_range = 7; - int32 page = 8; - int32 limit = 9; - int32 change_date = 10; - int32 max_results = 11; - int32 offset = 12; - bool include_finished = 13; + repeated string ids = 1; + repeated string frames = 2; + repeated string layers = 3; + FrameStateSeq states = 4; + string frame_range = 5; + string memory_range = 6; + string duration_range = 7; + int32 page = 8; + int32 limit = 9; + int32 change_date = 10; + int32 max_results = 11; + int32 offset = 12; + bool include_finished = 13; } // A sequence of Frames message FrameSeq { - repeated Frame frames = 1; + repeated Frame frames = 1; } // A sequence of FrameStates message FrameStateSeq { - repeated FrameState frame_states = 1; + repeated FrameState frame_states = 1; } - // A struct containing properties for all the elements of a frame that // can change except for the ID which is there for indexing purposes. message UpdatedFrame { - string id = 1; - FrameState state = 2; - int32 retry_count = 3; - int32 exit_status = 4; - int32 start_time = 5; - int32 stop_time = 6; - int64 max_rss = 7; - int64 used_memory = 8; - string last_resource = 9; - int32 llu_time = 10; - int64 max_gpu_memory = 11; - int64 used_gpu_memory = 12; - FrameStateDisplayOverride frame_state_display_override = 13; + string id = 1; + FrameState state = 2; + int32 retry_count = 3; + int32 exit_status = 4; + int32 start_time = 5; + int32 stop_time = 6; + int64 max_rss = 7; + int64 used_memory = 8; + string last_resource = 9; + int32 llu_time = 10; + int64 max_gpu_memory = 11; + int64 used_gpu_memory = 12; + FrameStateDisplayOverride frame_state_display_override = 13; } message UpdatedFrameSeq { - repeated UpdatedFrame updated_frames = 1; + repeated UpdatedFrame updated_frames = 1; } - // The result of an updated frame check. The job state is included // so tools that are just monitoring frames can stop monitoring them // once the job state changes to Finished. message UpdatedFrameCheckResult { - JobState state = 1; - int32 server_time = 2; - UpdatedFrameSeq updated_frames = 3; + JobState state = 1; + int32 server_time = 2; + UpdatedFrameSeq updated_frames = 3; } // GROUP ---- message Group { - string id = 1; - string name = 2; - string department = 3; - int32 default_job_priority = 4; - float default_job_min_cores = 5; - float default_job_max_cores = 6; - float min_cores = 7; - float max_cores = 8; - int32 level = 9; - string parent_id = 10; - GroupStats group_stats = 11; - float default_job_min_gpus = 12; - float default_job_max_gpus = 13; - float min_gpus = 14; - float max_gpus = 15; + string id = 1; + string name = 2; + string department = 3; + int32 default_job_priority = 4; + float default_job_min_cores = 5; + float default_job_max_cores = 6; + float min_cores = 7; + float max_cores = 8; + int32 level = 9; + string parent_id = 10; + GroupStats group_stats = 11; + float default_job_min_gpus = 12; + float default_job_max_gpus = 13; + float min_gpus = 14; + float max_gpus = 15; } message GroupSeq { - repeated Group groups = 1; + repeated Group groups = 1; } message GroupStats { - int32 running_frames = 1; - int32 dead_frames = 2; - int32 depend_frames = 3; - int32 waiting_frames = 4; - int32 pending_jobs = 5; - float reserved_cores = 6; - float reserved_gpus = 7; + int32 running_frames = 1; + int32 dead_frames = 2; + int32 depend_frames = 3; + int32 waiting_frames = 4; + int32 pending_jobs = 5; + float reserved_cores = 6; + float reserved_gpus = 7; } // JOB ---- message Job { - string id = 1; - JobState state = 2; - string name = 3; - string shot = 4; - string show = 5; - string user = 6; - string group = 7; - string facility = 8; - string os = 9; - oneof uid_optional { - int32 uid = 10; - } - int32 priority = 11; - float min_cores = 12; - float max_cores = 13; - string log_dir = 14; - bool is_paused = 15; - bool has_comment = 16; - bool auto_eat = 17; - int32 start_time = 18; - int32 stop_time = 19; - JobStats job_stats = 20; - float min_gpus = 21; - float max_gpus = 22; - string loki_url = 23; + string id = 1; + JobState state = 2; + string name = 3; + string shot = 4; + string show = 5; + string user = 6; + string group = 7; + string facility = 8; + string os = 9; + oneof uid_optional { + int32 uid = 10; + } + int32 priority = 11; + float min_cores = 12; + float max_cores = 13; + string log_dir = 14; + bool is_paused = 15; + bool has_comment = 16; + bool auto_eat = 17; + int32 start_time = 18; + int32 stop_time = 19; + JobStats job_stats = 20; + float min_gpus = 21; + float max_gpus = 22; + string loki_url = 23; } // Use to filter the job search. Please note that by searching for non-pending jobs, the output is limited to 200 jobs message JobSearchCriteria { - repeated string ids = 1; - repeated string jobs = 2; - repeated string regex = 3; - repeated string substr = 4; - repeated string users = 5; - repeated string shots = 6; - repeated string shows = 7; - bool include_finished = 8; + repeated string ids = 1; + repeated string jobs = 2; + repeated string regex = 3; + repeated string substr = 4; + repeated string users = 5; + repeated string shots = 6; + repeated string shows = 7; + bool include_finished = 8; } message JobSeq { - repeated Job jobs = 1; + repeated Job jobs = 1; } message JobStats { - int32 total_layers = 1; - int32 total_frames = 2; - int32 waiting_frames = 3; - int32 running_frames = 4; - int32 dead_frames = 5; - int32 eaten_frames = 6; - int32 depend_frames = 7; - int32 succeeded_frames = 8; - int32 pending_frames = 9; - int32 avg_frame_sec = 10; - int32 high_frame_sec = 11; - int32 avg_core_sec = 12; - int64 rendered_frame_count = 13; - int64 failed_frame_count = 14; - int64 remaining_core_sec = 15; - int64 total_core_sec = 16; - int64 rendered_core_sec = 17; - int64 failed_core_sec = 18; - int64 max_rss = 19; - float reserved_cores = 20; - int64 total_gpu_sec = 21; - int64 rendered_gpu_sec = 22; - int64 failed_gpu_sec = 23; - float reserved_gpus = 24; - int64 max_gpu_memory = 25; + int32 total_layers = 1; + int32 total_frames = 2; + int32 waiting_frames = 3; + int32 running_frames = 4; + int32 dead_frames = 5; + int32 eaten_frames = 6; + int32 depend_frames = 7; + int32 succeeded_frames = 8; + int32 pending_frames = 9; + int32 avg_frame_sec = 10; + int32 high_frame_sec = 11; + int32 avg_core_sec = 12; + int64 rendered_frame_count = 13; + int64 failed_frame_count = 14; + int64 remaining_core_sec = 15; + int64 total_core_sec = 16; + int64 rendered_core_sec = 17; + int64 failed_core_sec = 18; + int64 max_rss = 19; + float reserved_cores = 20; + int64 total_gpu_sec = 21; + int64 rendered_gpu_sec = 22; + int64 failed_gpu_sec = 23; + float reserved_gpus = 24; + int64 max_gpu_memory = 25; } // LAYER ---- message Layer { - string id = 1; - string name = 2; - string range = 3; - repeated string tags = 4; - float min_cores = 5; - float max_cores = 6; - bool is_threadable = 7; - int64 min_memory = 8; - int64 min_gpu_memory = 9; - int32 chunk_size = 10; - int32 dispatch_order = 11; - LayerType type = 12; - // An array of services that are being run on all frames within this layer. - repeated string services = 13; - // True if the memory optimizer is enabled. Disabling the optimizer will stop Cuebot from lowering memory. - bool memory_optimizer_enabled = 14; - LayerStats layer_stats = 15; - string parent_id = 16; - repeated string limits = 17; - int32 timeout = 18; - int32 timeout_llu = 19; - float min_gpus = 20; - float max_gpus = 21; - string command = 22; - // Number of slots required per frame (<=0 means not slot-based) - int32 slots_required = 23; + string id = 1; + string name = 2; + string range = 3; + repeated string tags = 4; + float min_cores = 5; + float max_cores = 6; + bool is_threadable = 7; + int64 min_memory = 8; + int64 min_gpu_memory = 9; + int32 chunk_size = 10; + int32 dispatch_order = 11; + LayerType type = 12; + // An array of services that are being run on all frames within this layer. + repeated string services = 13; + // True if the memory optimizer is enabled. Disabling the optimizer will stop Cuebot from lowering memory. + bool memory_optimizer_enabled = 14; + LayerStats layer_stats = 15; + string parent_id = 16; + repeated string limits = 17; + int32 timeout = 18; + int32 timeout_llu = 19; + float min_gpus = 20; + float max_gpus = 21; + string command = 22; + // Number of slots required per frame (<0 means not slot-based) + int32 slots_required = 23; } message LayerSeq { - repeated Layer layers = 1; + repeated Layer layers = 1; } message LayerStats { - int32 total_frames = 1; - int32 waiting_frames = 2; - int32 running_frames = 3; - int32 dead_frames = 4; - int32 eaten_frames = 5; - int32 depend_frames = 6; - int32 succeeded_frames = 7; - int32 pending_frames = 8; - int32 avg_frame_sec = 9; - int32 low_frame_sec = 10; - int32 high_frame_sec = 11; - int32 avg_core_sec = 12; - int64 rendered_frame_count = 13; - int64 failed_frame_count = 14; - int64 remaining_core_sec = 15; - int64 total_core_sec = 16; - int64 rendered_core_sec = 17; - int64 failed_core_sec = 18; - int64 max_rss = 19; - float reserved_cores = 20; - int64 total_gpu_sec = 21; - int64 rendered_gpu_sec = 22; - int64 failed_gpu_sec = 23; - float reserved_gpus = 24; - int64 max_gpu_memory = 25; + int32 total_frames = 1; + int32 waiting_frames = 2; + int32 running_frames = 3; + int32 dead_frames = 4; + int32 eaten_frames = 5; + int32 depend_frames = 6; + int32 succeeded_frames = 7; + int32 pending_frames = 8; + int32 avg_frame_sec = 9; + int32 low_frame_sec = 10; + int32 high_frame_sec = 11; + int32 avg_core_sec = 12; + int64 rendered_frame_count = 13; + int64 failed_frame_count = 14; + int64 remaining_core_sec = 15; + int64 total_core_sec = 16; + int64 rendered_core_sec = 17; + int64 failed_core_sec = 18; + int64 max_rss = 19; + float reserved_cores = 20; + int64 total_gpu_sec = 21; + int64 rendered_gpu_sec = 22; + int64 failed_gpu_sec = 23; + float reserved_gpus = 24; + int64 max_gpu_memory = 25; } // NestedGroup --- message NestedGroup { - string id = 1; - string name = 2; - string department = 3; - int32 default_job_priority = 4; - float default_job_min_cores = 5; - float default_job_max_cores = 6; - float min_cores = 7; - float max_cores = 8; - int32 level = 9; - NestedGroup parent = 10; - NestedGroupSeq groups = 11; - repeated string jobs = 12; - GroupStats stats = 13; - float default_job_min_gpus = 14; - float default_job_max_gpus = 15; - float min_gpus = 16; - float max_gpus = 17; + string id = 1; + string name = 2; + string department = 3; + int32 default_job_priority = 4; + float default_job_min_cores = 5; + float default_job_max_cores = 6; + float min_cores = 7; + float max_cores = 8; + int32 level = 9; + NestedGroup parent = 10; + NestedGroupSeq groups = 11; + repeated string jobs = 12; + GroupStats stats = 13; + float default_job_min_gpus = 14; + float default_job_max_gpus = 15; + float min_gpus = 16; + float max_gpus = 17; } message NestedGroupSeq { - repeated NestedGroup nested_groups = 1; + repeated NestedGroup nested_groups = 1; } // NESTED JOB ---- message NestedJob { - string id = 1; - JobState state = 2; - string name = 3; - string shot = 4; - string show = 5; - string user = 6; - string group = 7; - string facility = 8; - string os = 9; - oneof uid_optional { - int32 uid = 10; - } - int32 priority = 11; - float min_cores = 12; - float max_cores = 13; - string log_dir = 14; - bool is_paused = 15; - bool has_comment = 16; - bool auto_eat = 17; - int32 start_time = 18; - int32 stop_time = 19; - NestedGroup parent = 20; - JobStats stats = 21; - float min_gpus = 22; - float max_gpus = 23; + string id = 1; + JobState state = 2; + string name = 3; + string shot = 4; + string show = 5; + string user = 6; + string group = 7; + string facility = 8; + string os = 9; + oneof uid_optional { + int32 uid = 10; + } + int32 priority = 11; + float min_cores = 12; + float max_cores = 13; + string log_dir = 14; + bool is_paused = 15; + bool has_comment = 16; + bool auto_eat = 17; + int32 start_time = 18; + int32 stop_time = 19; + NestedGroup parent = 20; + JobStats stats = 21; + float min_gpus = 22; + float max_gpus = 23; } - // -------- Requests & Responses --------] // FRAME ---- // AddRenderPartition message FrameAddRenderPartitionRequest { - Frame frame = 1; - string host = 2; - int32 threads = 3; - int32 max_cores = 4; - int64 max_memory = 5; - int64 max_gpu_memory = 6; - string username = 7; - int32 max_gpus = 8; + Frame frame = 1; + string host = 2; + int32 threads = 3; + int32 max_cores = 4; + int64 max_memory = 5; + int64 max_gpu_memory = 6; + string username = 7; + int32 max_gpus = 8; } message FrameAddRenderPartitionResponse { - renderPartition.RenderPartition render_partition = 1; + renderPartition.RenderPartition render_partition = 1; } // CreateDependencyOnFrame message FrameCreateDependencyOnFrameRequest { - Frame frame = 1; - Frame depend_on_frame = 2; + Frame frame = 1; + Frame depend_on_frame = 2; } message FrameCreateDependencyOnFrameResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnJob message FrameCreateDependencyOnJobRequest { - Frame frame = 1; - Job job = 2; + Frame frame = 1; + Job job = 2; } message FrameCreateDependencyOnJobResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnLayer message FrameCreateDependencyOnLayerRequest { - Frame frame = 1; - Layer layer = 2; + Frame frame = 1; + Layer layer = 2; } message FrameCreateDependencyOnLayerResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // DropDepends message FrameDropDependsRequest { - Frame frame = 1; - depend.DependTarget target = 2; + Frame frame = 1; + depend.DependTarget target = 2; } message FrameDropDependsResponse {} // Empty // Eat message FrameEatRequest { - Frame frame = 1; + Frame frame = 1; } message FrameEatResponse {} // Empty // FindFrame message FrameFindFrameRequest { - string job = 1; - string layer = 2; - int32 frame = 3; + string job = 1; + string layer = 2; + int32 frame = 3; } message FrameFindFrameResponse { - Frame frame = 1; + Frame frame = 1; } // GetFrame message FrameGetFrameRequest { - string id = 1; + string id = 1; } message FrameGetFrameResponse { - Frame frame = 1; + Frame frame = 1; } // GetFrames message FrameGetFramesRequest { - string job = 1; - job.FrameSearchCriteria r = 2; + string job = 1; + job.FrameSearchCriteria r = 2; } message FrameGetFramesResponse { - FrameSeq frames = 1; + FrameSeq frames = 1; } // GetWhatDependsOnThis message FrameGetWhatDependsOnThisRequest { - Frame frame = 1; + Frame frame = 1; } message FrameGetWhatDependsOnThisResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // GetWhatThisDependsOn message FrameGetWhatThisDependsOnRequest { - Frame frame = 1; + Frame frame = 1; } message FrameGetWhatThisDependsOnResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // Kill message FrameKillRequest { - Frame frame = 1; - string username = 2; - string pid = 3; - string host_kill = 4; - string reason = 5; + Frame frame = 1; + string username = 2; + string pid = 3; + string host_kill = 4; + string reason = 5; } message FrameKillResponse {} // Empty // MarkAsDepend message FrameMarkAsDependRequest { - Frame frame = 1; + Frame frame = 1; } message FrameMarkAsDependResponse {} // Empty // MarkAsWaiting message FrameMarkAsWaitingRequest { - Frame frame = 1; + Frame frame = 1; } message FrameMarkAsWaitingResponse {} // Empty // Retry message FrameRetryRequest { - Frame frame = 1; + Frame frame = 1; } message FrameRetryResponse {} // Empty // SetCheckpointState message FrameSetCheckpointStateRequest { - Frame frame = 1; - CheckpointState state = 2; + Frame frame = 1; + CheckpointState state = 2; } message FrameSetCheckpointStateResponse {} // Empty - // GROUP ---- // CreateSubGroup message GroupCreateSubGroupRequest { - Group group = 1; - string name = 2; + Group group = 1; + string name = 2; } message GroupCreateSubGroupResponse { - Group group = 1; + Group group = 1; } // Delete message GroupDeleteRequest { - Group group = 1; + Group group = 1; } message GroupDeleteResponse {} // Empty // FindGroup message GroupFindGroupRequest { - string show = 1; - string name = 2; + string show = 1; + string name = 2; } message GroupFindGroupResponse { - Group group = 1; + Group group = 1; } // GetGroup message GroupGetGroupRequest { - string id = 1; + string id = 1; } message GroupGetGroupResponse { - Group group = 1; + Group group = 1; } // GetGroups message GroupGetGroupsRequest { - Group group = 1; + Group group = 1; } message GroupGetGroupsResponse { - GroupSeq groups = 1; + GroupSeq groups = 1; } // GetJobs message GroupGetJobsRequest { - Group group = 1; + Group group = 1; } message GroupGetJobsResponse { - JobSeq jobs = 1; + JobSeq jobs = 1; } // ReparentGroups message GroupReparentGroupsRequest { - Group group = 1; - GroupSeq groups = 2; + Group group = 1; + GroupSeq groups = 2; } message GroupReparentGroupsResponse {} // Empty // ReparentJobs message GroupReparentJobsRequest { - Group group = 1; - JobSeq jobs = 2; + Group group = 1; + JobSeq jobs = 2; } message GroupReparentJobsResponse {} // Empty // SetDefJobMaxCores message GroupSetDefJobMaxCoresRequest { - Group group = 1; - float max_cores = 2; + Group group = 1; + float max_cores = 2; } message GroupSetDefJobMaxCoresResponse {} // Empty // SetDefJobMinCores message GroupSetDefJobMinCoresRequest { - Group group = 1; - float min_cores = 2; + Group group = 1; + float min_cores = 2; } message GroupSetDefJobMinCoresResponse {} // Empty // SetDefaultJobMaxGpus message GroupSetDefJobMaxGpusRequest { - Group group = 1; - int32 max_gpus = 2; + Group group = 1; + int32 max_gpus = 2; } message GroupSetDefJobMaxGpusResponse {} // Empty // SetDefaultJobMinGpus message GroupSetDefJobMinGpusRequest { - Group group = 1; - int32 min_gpus = 2; + Group group = 1; + int32 min_gpus = 2; } message GroupSetDefJobMinGpusResponse {} // Empty // SetDefJobPriority message GroupSetDefJobPriorityRequest { - Group group = 1; - int32 priority = 2; + Group group = 1; + int32 priority = 2; } message GroupSetDefJobPriorityResponse {} // Empty // SetDept message GroupSetDeptRequest { - Group group = 1; - string dept = 2; + Group group = 1; + string dept = 2; } message GroupSetDeptResponse {} // Empty // SetGroup message GroupSetGroupRequest { - Group group = 1; - Group parent_group = 2; + Group group = 1; + Group parent_group = 2; } message GroupSetGroupResponse {} // Empty // SetMaxCores message GroupSetMaxCoresRequest { - Group group = 1; - float max_cores = 2; + Group group = 1; + float max_cores = 2; } message GroupSetMaxCoresResponse {} // Empty // SetMinCores message GroupSetMinCoresRequest { - Group group = 1; - float min_cores = 2; + Group group = 1; + float min_cores = 2; } message GroupSetMinCoresResponse {} // Empty // SetMaxGpus message GroupSetMaxGpusRequest { - Group group = 1; - int32 max_gpus = 2; + Group group = 1; + int32 max_gpus = 2; } message GroupSetMaxGpusResponse {} // Empty // SetMinGpus message GroupSetMinGpusRequest { - Group group = 1; - int32 min_gpus = 2; + Group group = 1; + int32 min_gpus = 2; } message GroupSetMinGpusResponse {} // Empty // SetName message GroupSetNameRequest { - Group group = 1; - string name = 2; + Group group = 1; + string name = 2; } message GroupSetNameResponse {} // Empty @@ -1131,605 +1122,603 @@ message GroupSetNameResponse {} // Empty // JOB ---- // AddComment message JobAddCommentRequest { - Job job = 1; - comment.Comment new_comment = 2; + Job job = 1; + comment.Comment new_comment = 2; } message JobAddCommentResponse {} // Empty // AddRenderPartition message JobAddRenderPartRequest { - Job job = 1; - string host = 2; - int32 threads = 3; - int32 max_cores = 4; - int64 max_memory = 5; - int64 max_gpu_memory = 6; - string username = 7; - int32 max_gpus = 8; + Job job = 1; + string host = 2; + int32 threads = 3; + int32 max_cores = 4; + int64 max_memory = 5; + int64 max_gpu_memory = 6; + string username = 7; + int32 max_gpus = 8; } message JobAddRenderPartResponse { - renderPartition.RenderPartition render_partition = 1; + renderPartition.RenderPartition render_partition = 1; } // AddSubscriber message JobAddSubscriberRequest { - Job job = 1; - string subscriber = 2; + Job job = 1; + string subscriber = 2; } message JobAddSubscriberResponse {} // CreateDependencyOnFrame message JobCreateDependencyOnFrameRequest { - Job job = 1; - Frame frame = 2; + Job job = 1; + Frame frame = 2; } message JobCreateDependencyOnFrameResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnJob message JobCreateDependencyOnJobRequest { - Job job = 1; - Job on_job = 2; + Job job = 1; + Job on_job = 2; } message JobCreateDependencyOnJobResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnLayer message JobCreateDependencyOnLayerRequest { - Job job = 1; - Layer layer = 2; + Job job = 1; + Layer layer = 2; } message JobCreateDependencyOnLayerResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // DropDepends message JobDropDependsRequest { - Job job = 1; - depend.DependTarget target = 2; + Job job = 1; + depend.DependTarget target = 2; } message JobDropDependsResponse {} // Empty // EatFrames message JobEatFramesRequest { - Job job = 1; - FrameSearchCriteria req = 2; + Job job = 1; + FrameSearchCriteria req = 2; } message JobEatFramesResponse {} // Empty // FindJob message JobFindJobRequest { - string name = 1; + string name = 1; } message JobFindJobResponse { - Job job = 1; + Job job = 1; } // GetComments message JobGetCommentsRequest { - Job job = 1; + Job job = 1; } message JobGetCommentsResponse { - comment.CommentSeq comments = 1; + comment.CommentSeq comments = 1; } // GetCurrent message JobGetCurrentRequest { - Job job = 1; + Job job = 1; } message JobGetCurrentResponse { - Job job = 1; + Job job = 1; } // GetDepends message JobGetDependsRequest { - Job job = 1; + Job job = 1; } message JobGetDependsResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // GetFrames message JobGetFramesRequest { - Job job = 1; - FrameSearchCriteria req = 2; + Job job = 1; + FrameSearchCriteria req = 2; } message JobGetFramesResponse { - FrameSeq frames = 1; + FrameSeq frames = 1; } // GetJob message JobGetJobRequest { - string id = 1; + string id = 1; } message JobGetJobResponse { - Job job = 1; + Job job = 1; } // GetJobs message JobGetJobsRequest { - JobSearchCriteria r = 1; + JobSearchCriteria r = 1; } message JobGetJobsResponse { - JobSeq jobs = 1; + JobSeq jobs = 1; } // GetJobNames message JobGetJobNamesRequest { - job.JobSearchCriteria r = 1; + job.JobSearchCriteria r = 1; } - message JobGetJobNamesResponse { - repeated string names = 1; + repeated string names = 1; } // GetLayers message JobGetLayersRequest { - Job job = 1; + Job job = 1; } message JobGetLayersResponse { - LayerSeq layers = 1; + LayerSeq layers = 1; } // GetUpdatedFrames message JobGetUpdatedFramesRequest { - Job job = 1; - int32 last_check = 2; - LayerSeq layer_filter = 3; + Job job = 1; + int32 last_check = 2; + LayerSeq layer_filter = 3; } message JobGetUpdatedFramesResponse { - JobState state = 1; - int32 server_time = 2; - UpdatedFrameSeq updated_frames = 3; + JobState state = 1; + int32 server_time = 2; + UpdatedFrameSeq updated_frames = 3; } // GetWhatDependsOnThis message JobGetWhatDependsOnThisRequest { - Job job = 1; + Job job = 1; } message JobGetWhatDependsOnThisResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } //GetWhatThisDependsOn message JobGetWhatThisDependsOnRequest { - Job job = 1; + Job job = 1; } message JobGetWhatThisDependsOnResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // IsJobPending message JobIsJobPendingRequest { - string name = 1; + string name = 1; } message JobIsJobPendingResponse { - bool value = 1; + bool value = 1; } // Kill message JobKillRequest { - Job job = 1; - string username = 2; - string pid = 3; - string host_kill = 4; - string reason = 5; + Job job = 1; + string username = 2; + string pid = 3; + string host_kill = 4; + string reason = 5; } message JobKillResponse {} // Empty // KillFrames message JobKillFramesRequest { - Job job = 1; - FrameSearchCriteria req = 2; - string username = 3; - string pid = 4; - string host_kill = 5; - string reason = 6; + Job job = 1; + FrameSearchCriteria req = 2; + string username = 3; + string pid = 4; + string host_kill = 5; + string reason = 6; } message JobKillFramesResponse {} // Empty // LaunchSpec message JobLaunchSpecRequest { - string spec = 1; + string spec = 1; } message JobLaunchSpecResponse { - repeated string names = 1; + repeated string names = 1; } // LaunchSpecAndWait message JobLaunchSpecAndWaitRequest { - string spec = 1; + string spec = 1; } message JobLaunchSpecAndWaitResponse { - JobSeq jobs = 1; + JobSeq jobs = 1; } //MarkAsWaiting message JobMarkAsWaitingRequest { - Job job = 1; - FrameSearchCriteria req = 2; + Job job = 1; + FrameSearchCriteria req = 2; } message JobMarkAsWaitingResponse {} // Empty // MarkDoneFrames message JobMarkDoneFramesRequest { - Job job = 1; - FrameSearchCriteria req = 2; + Job job = 1; + FrameSearchCriteria req = 2; } message JobMarkDoneFramesResponse {} // Empty // Pause message JobPauseRequest { - Job job = 1; + Job job = 1; } message JobPauseResponse {} // Empty // ReorderFrames message JobReorderFramesRequest { - Job job = 1; - string range = 2; - Order order = 3; + Job job = 1; + string range = 2; + Order order = 3; } message JobReorderFramesResponse {} // Empty // Resume message JobResumeRequest { - Job job = 1; + Job job = 1; } message JobResumeResponse {} // Empty // RetryFrames message JobRetryFramesRequest { - Job job = 1; - FrameSearchCriteria req = 2; + Job job = 1; + FrameSearchCriteria req = 2; } message JobRetryFramesResponse {} // Empty // RunFilters message JobRunFiltersRequest { - Job job = 1; + Job job = 1; } message JobRunFiltersResponse {} // Empty // SetAutoEat message JobSetAutoEatRequest { - Job job = 1; - bool value = 2; + Job job = 1; + bool value = 2; } message JobSetAutoEatResponse {} // Empty // SetGroup message JobSetGroupRequest { - Job job = 1; - string group_id = 2; + Job job = 1; + string group_id = 2; } message JobSetGroupResponse {} // Empty // SetMaxCores message JobSetMaxCoresRequest { - Job job = 1; - float val = 2; + Job job = 1; + float val = 2; } message JobSetMaxCoresResponse {} // Empty // SetMaxGpus message JobSetMaxGpusRequest { - Job job = 1; - int32 val = 2; + Job job = 1; + int32 val = 2; } message JobSetMaxGpusResponse {} // Empty // SetMaxRetries message JobSetMaxRetriesRequest { - Job job = 1; - int32 max_retries = 2; + Job job = 1; + int32 max_retries = 2; } message JobSetMaxRetriesResponse {} // Empty // SetMinCores message JobSetMinCoresRequest { - Job job = 1; - float val = 2; + Job job = 1; + float val = 2; } message JobSetMinCoresResponse {} // Empty // SetMinGpus message JobSetMinGpusRequest { - Job job = 1; - int32 val = 2; + Job job = 1; + int32 val = 2; } message JobSetMinGpusResponse {} // Empty // SetPriority message JobSetPriorityRequest { - Job job = 1; - int32 val = 2; + Job job = 1; + int32 val = 2; } message JobSetPriorityResponse {} // Empty // ShutdownIfCompleted message JobShutdownIfCompletedRequest { - Job job = 1; + Job job = 1; } message JobShutdownIfCompletedResponse {} // Empty // StaggerFrames message JobStaggerFramesRequest { - Job job = 1; - string range = 2; - int32 stagger = 3; + Job job = 1; + string range = 2; + int32 stagger = 3; } message JobStaggerFramesResponse {} // Empty - // LAYER ---- // AddLimit message LayerAddLimitRequest { - Layer layer = 1; - string limit_id = 2; + Layer layer = 1; + string limit_id = 2; } message LayerAddLimitResponse {} // Empty // AddRenderPartion message LayerAddRenderPartitionRequest { - Layer layer = 1; - string host = 2; - int32 threads = 3; - int32 max_cores = 4; - int64 max_memory = 5; - int64 max_gpu_memory = 6; - string username = 7; - int32 max_gpus = 8; + Layer layer = 1; + string host = 2; + int32 threads = 3; + int32 max_cores = 4; + int64 max_memory = 5; + int64 max_gpu_memory = 6; + string username = 7; + int32 max_gpus = 8; } message LayerAddRenderPartitionResponse { - renderPartition.RenderPartition render_partition = 1; + renderPartition.RenderPartition render_partition = 1; } // CreateDependencyOnFrame message LayerCreateDependOnFrameRequest { - Layer layer = 1; - Frame frame = 2; + Layer layer = 1; + Frame frame = 2; } message LayerCreateDependOnFrameResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnJob message LayerCreateDependOnJobRequest { - Layer layer = 1; - Job job = 2; + Layer layer = 1; + Job job = 2; } message LayerCreateDependOnJobResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnLayer message LayerCreateDependOnLayerRequest { - Layer layer = 1; - Layer depend_on_layer = 2; + Layer layer = 1; + Layer depend_on_layer = 2; } message LayerCreateDependOnLayerResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateFrameByFrameDependency message LayerCreateFrameByFrameDependRequest { - Layer layer = 1; - Layer depend_layer = 2; - bool any_frame = 3; + Layer layer = 1; + Layer depend_layer = 2; + bool any_frame = 3; } message LayerCreateFrameByFrameDependResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // DropDepends message LayerDropDependsRequest { - Layer layer = 1; - depend.DependTarget target = 2; + Layer layer = 1; + depend.DependTarget target = 2; } message LayerDropDependsResponse {} // Empty // DropLimit message LayerDropLimitRequest { - Layer layer = 1; - string limit_id = 2; + Layer layer = 1; + string limit_id = 2; } message LayerDropLimitResponse {} // Empty // EatFrames message LayerEatFramesRequest { - Layer layer = 1; + Layer layer = 1; } message LayerEatFramesResponse {} // Empty // EnableMemoryOptimizer message LayerEnableMemoryOptimizerRequest { - Layer layer = 1; - bool value = 2; + Layer layer = 1; + bool value = 2; } message LayerEnableMemoryOptimizerResponse {} // Empty // FindLayer message LayerFindLayerRequest { - string job = 1; - string layer = 2; + string job = 1; + string layer = 2; } message LayerFindLayerResponse { - Layer layer = 1; + Layer layer = 1; } // GetFrames message LayerGetFramesRequest { - Layer layer = 1; - FrameSearchCriteria s = 2; + Layer layer = 1; + FrameSearchCriteria s = 2; } message LayerGetFramesResponse { - FrameSeq frames = 1; + FrameSeq frames = 1; } // GetLayer message LayerGetLayerRequest { - string id = 1; + string id = 1; } message LayerGetLayerResponse { - Layer layer = 1; + Layer layer = 1; } // GetLayer message LayerGetLimitsRequest { - Layer layer = 1; + Layer layer = 1; } message LayerGetLimitsResponse { - repeated limit.Limit limits = 1; + repeated limit.Limit limits = 1; } // GetOutputPaths message LayerGetOutputPathsRequest { - Layer layer = 1; + Layer layer = 1; } message LayerGetOutputPathsResponse { - repeated string output_paths = 1; + repeated string output_paths = 1; } // GetWhatDependsOnThis message LayerGetWhatDependsOnThisRequest { - Layer layer = 1; + Layer layer = 1; } message LayerGetWhatDependsOnThisResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // GetWhatThisDependsOn message LayerGetWhatThisDependsOnRequest { - Layer layer = 1; + Layer layer = 1; } message LayerGetWhatThisDependsOnResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // KillFrames message LayerKillFramesRequest { - Layer layer = 1; - string username = 2; - string pid = 3; - string host_kill = 4; - string reason = 5; + Layer layer = 1; + string username = 2; + string pid = 3; + string host_kill = 4; + string reason = 5; } message LayerKillFramesResponse {} // Empty // MarkdoneFrames message LayerMarkdoneFramesRequest { - Layer layer = 1; + Layer layer = 1; } message LayerMarkdoneFramesResponse {} // Empty // RetryFrames message LayerRetryFramesRequest { - Layer layer = 1; + Layer layer = 1; } message LayerRetryFramesResponse {} // Empty // RegisterOutputPath message LayerRegisterOutputPathRequest { - Layer layer = 1; - string spec = 2; + Layer layer = 1; + string spec = 2; } message LayerRegisterOutputPathResponse {} // Empty // ReorderFrames message LayerReorderFramesRequest { - Layer layer = 1; - string range = 2; - Order order = 3; + Layer layer = 1; + string range = 2; + Order order = 3; } message LayerReorderFramesResponse {} // Empty // SetMaxCores message LayerSetMaxCoresRequest { - Layer layer = 1; - float cores = 2; + Layer layer = 1; + float cores = 2; } message LayerSetMaxCoresResponse {} // Empty // SetMinCores message LayerSetMinCoresRequest { - Layer layer = 1; - float cores = 2; + Layer layer = 1; + float cores = 2; } message LayerSetMinCoresResponse {} // Empty // [Deprecated] SetMinGpu message LayerSetMinGpuRequest { - Layer layer = 1 [deprecated=true]; - int64 gpu = 2 [deprecated=true]; + Layer layer = 1 [deprecated = true]; + int64 gpu = 2 [deprecated = true]; } // [Deprecated] @@ -1737,89 +1726,86 @@ message LayerSetMinGpuResponse {} // Empty // SetMaxGpus message LayerSetMaxGpusRequest { - Layer layer = 1; - int32 max_gpus = 2; + Layer layer = 1; + int32 max_gpus = 2; } message LayerSetMaxGpusResponse {} // Empty // SetMinGpus message LayerSetMinGpusRequest { - Layer layer = 1; - int32 min_gpus = 2; + Layer layer = 1; + int32 min_gpus = 2; } message LayerSetMinGpusResponse {} // Empty // SetMinGpuMemory message LayerSetMinGpuMemoryRequest { - Layer layer = 1; - int64 gpu_memory = 2; + Layer layer = 1; + int64 gpu_memory = 2; } message LayerSetMinGpuMemoryResponse {} // Empty // SetMinMemory message LayerSetMinMemoryRequest { - Layer layer = 1; - int64 memory = 2; + Layer layer = 1; + int64 memory = 2; } message LayerSetMinMemoryResponse {} // Empty // SetTags message LayerSetTagsRequest { - Layer layer = 1; - repeated string tags = 2; + Layer layer = 1; + repeated string tags = 2; } message LayerSetTagsResponse {} // Empty // SetThreadable message LayerSetThreadableRequest { - Layer layer = 1; - bool threadable = 2; + Layer layer = 1; + bool threadable = 2; } message LayerSetThreadableResponse {} // Empty // SetTimeout message LayerSetTimeoutRequest { - Layer layer = 1; - int32 timeout = 2; + Layer layer = 1; + int32 timeout = 2; } message LayerSetTimeoutResponse {} // Empty // SetTimeoutLLU message LayerSetTimeoutLLURequest { - Layer layer = 1; - int32 timeout_llu = 2; + Layer layer = 1; + int32 timeout_llu = 2; } message LayerSetTimeoutLLUResponse {} // Empty - // SetSlotsRequired // message LayerSetSlotsRequiredRequest { - Layer layer = 1; - int32 slots = 2; + Layer layer = 1; + int32 slots = 2; } message LayerSetSlotsRequiredResponse {} // Empty - // StaggerFrames message LayerStaggerFramesRequest { - Layer layer = 1; - string range = 2; - int32 stagger = 3; + Layer layer = 1; + string range = 2; + int32 stagger = 3; } message LayerStaggerFramesResponse {} // Empty - message FrameStateDisplayOverrideRequest { Frame frame = 1; FrameStateDisplayOverride override = 2; diff --git a/pycue/opencue/wrappers/layer.py b/pycue/opencue/wrappers/layer.py index e8ea87a69..ba6e61bb2 100644 --- a/pycue/opencue/wrappers/layer.py +++ b/pycue/opencue/wrappers/layer.py @@ -472,7 +472,7 @@ def slotsRequired(self): """Returns the number of slots required per frame. :rtype: int - :return: Number of slots required (<=0 means not slot-based) + :return: Number of slots required (<0 means not slot-based) """ return self.data.slots_required diff --git a/pyoutline/outline/layer.py b/pyoutline/outline/layer.py index fc41872a4..d2eb23d31 100644 --- a/pyoutline/outline/layer.py +++ b/pyoutline/outline/layer.py @@ -107,7 +107,7 @@ class _LayerArgs(TypedDict, total=False): # timeout_llu: Timeout for long last update in seconds # before considering a frame hung timeout_llu: int - # slots_required: Number of slots required per frame (<=0 means not slot-based) + # slots_required: Number of slots required per frame (<0 means not slot-based) slots_required: int type: outline.constants.LayerType # The layer type (Render, Util, Post) diff --git a/rust/crates/scheduler/src/dao/host_dao.rs b/rust/crates/scheduler/src/dao/host_dao.rs index e9737cc17..6663cd307 100644 --- a/rust/crates/scheduler/src/dao/host_dao.rs +++ b/rust/crates/scheduler/src/dao/host_dao.rs @@ -107,7 +107,7 @@ impl From for Host { alloc_id: parse_uuid(&val.pk_alloc), alloc_name: val.str_alloc_name, last_updated: val.ts_ping, - concurrent_slots_limit: (val.int_concurrent_slots_limit >= 0) + concurrent_slots_limit: (val.int_concurrent_slots_limit > 0) .then_some(val.int_concurrent_slots_limit as u32), running_procs_count: val.int_running_procs as u32, } From 2ad42b45a022cfa6b132f709b87de4d6720eff56 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 7 Jan 2026 08:30:36 -0800 Subject: [PATCH 10/17] Revert re-formatting --- proto/src/host.proto | 762 ++++++++++++++++++++++--------------------- 1 file changed, 384 insertions(+), 378 deletions(-) diff --git a/proto/src/host.proto b/proto/src/host.proto index cc6339f25..296b34cfe 100644 --- a/proto/src/host.proto +++ b/proto/src/host.proto @@ -1,641 +1,646 @@ + syntax = "proto3"; package host; +option java_package = "com.imageworks.spcue.grpc.host"; +option java_multiple_files = true; + import "comment.proto"; import "criterion.proto"; import "job.proto"; import "renderPartition.proto"; option go_package = "opencue_gateway/gen/go"; -option java_multiple_files = true; -option java_package = "com.imageworks.spcue.grpc.host"; // -------- Services --------] service DeedInterface { - // Remove the deed - rpc Delete(DeedDeleteRequest) returns (DeedDeleteResponse); + // Remove the deed + rpc Delete(DeedDeleteRequest) returns (DeedDeleteResponse); - // Returns the full host for these settings. - rpc GetHost(DeedGetHostRequest) returns (DeedGetHostResponse); + // Returns the full host for these settings. + rpc GetHost(DeedGetHostRequest) returns (DeedGetHostResponse); - // Returns the owner for these settings. - rpc GetOwner(DeedGetOwnerRequest) returns (DeedGetOwnerResponse); + // Returns the owner for these settings. + rpc GetOwner(DeedGetOwnerRequest) returns (DeedGetOwnerResponse); } service HostInterface { - // Add a comment on this host. - rpc AddComment(HostAddCommentRequest) returns (HostAddCommentResponse); - - // Set a tag on this host. - rpc AddTags(HostAddTagsRequest) returns (HostAddTagsResponse); + // Add a comment on this host. + rpc AddComment(HostAddCommentRequest) returns (HostAddCommentResponse); - // Delete host. - rpc Delete(HostDeleteRequest) returns (HostDeleteResponse); + // Set a tag on this host. + rpc AddTags(HostAddTagsRequest) returns (HostAddTagsResponse); - // Find a host by its name - rpc FindHost(HostFindHostRequest) returns (HostFindHostResponse); + // Delete host. + rpc Delete(HostDeleteRequest) returns (HostDeleteResponse); - // Get the comments for this host. - rpc GetComments(HostGetCommentsRequest) returns (HostGetCommentsResponse); + // Find a host by its name + rpc FindHost(HostFindHostRequest) returns (HostFindHostResponse); - // Return the deed for this host. - rpc GetDeed(HostGetDeedRequest) returns (HostGetDeedResponse); + // Get the comments for this host. + rpc GetComments(HostGetCommentsRequest) returns (HostGetCommentsResponse); - // Return a host by its id - rpc GetHost(HostGetHostRequest) returns (HostGetHostResponse); + // Return the deed for this host. + rpc GetDeed(HostGetDeedRequest) returns (HostGetDeedResponse); - // Search for a number of hosts - rpc GetHosts(HostGetHostsRequest) returns (HostGetHostsResponse); + // Return a host by its id + rpc GetHost(HostGetHostRequest) returns (HostGetHostResponse); - // Return the host whiteboard - rpc GetHostWhiteboard(HostGetHostWhiteboardRequest) returns (HostGetHostWhiteboardResponse); + // Search for a number of hosts + rpc GetHosts(HostGetHostsRequest) returns (HostGetHostsResponse); - // Get the owner settings of this particular host. - rpc GetOwner(HostGetOwnerRequest) returns (HostGetOwnerResponse); + // Return the host whiteboard + rpc GetHostWhiteboard(HostGetHostWhiteboardRequest) returns (HostGetHostWhiteboardResponse); - // Returns the list of proc resources allocated from this host. - rpc GetProcs(HostGetProcsRequest) returns (HostGetProcsResponse); + // Get the owner settings of this particular host. + rpc GetOwner(HostGetOwnerRequest) returns (HostGetOwnerResponse); - // Return any render partitions that are setup on this host. - rpc GetRenderPartitions(HostGetRenderPartitionsRequest) returns (HostGetRenderPartitionsResponse); + // Returns the list of proc resources allocated from this host. + rpc GetProcs(HostGetProcsRequest) returns (HostGetProcsResponse); - // Locks the host. Its possible we'll need to pass in a show name here in the future - rpc Lock(HostLockRequest) returns (HostLockResponse); + // Return any render partitions that are setup on this host. + rpc GetRenderPartitions(HostGetRenderPartitionsRequest) returns (HostGetRenderPartitionsResponse); - // Issues an immediate reboot. - rpc Reboot(HostRebootRequest) returns (HostRebootResponse); + // Locks the host. Its possible we'll need to pass in a show name here in the future + rpc Lock(HostLockRequest) returns (HostLockResponse); - // Sets the reboot when idle state, nothing has to be locked to set this. When the host pings in idle a reboot - // command is sent to the host and the host will be put into the Rebooting state. If any locks are set they will - // be removed upon reboot. - rpc RebootWhenIdle(HostRebootWhenIdleRequest) returns (HostRebootWhenIdleResponse); + // Issues an immediate reboot. + rpc Reboot(HostRebootRequest) returns (HostRebootResponse); - // Redirect the given procs to the specified job. - rpc RedirectToJob(HostRedirectToJobRequest) returns (HostRedirectToJobResponse); + // Sets the reboot when idle state, nothing has to be locked to set this. When the host pings in idle a reboot + // command is sent to the host and the host will be put into the Rebooting state. If any locks are set they will + // be removed upon reboot. + rpc RebootWhenIdle(HostRebootWhenIdleRequest) returns (HostRebootWhenIdleResponse); - // Remove a tag from this host. - rpc RemoveTags(HostRemoveTagsRequest) returns (HostRemoveTagsResponse); + // Redirect the given procs to the specified job. + rpc RedirectToJob(HostRedirectToJobRequest) returns (HostRedirectToJobResponse); - // Rename tag. - rpc RenameTag(HostRenameTagRequest) returns (HostRenameTagResponse); + // Remove a tag from this host. + rpc RemoveTags(HostRemoveTagsRequest) returns (HostRemoveTagsResponse); - // Assign a host to an allocation. - rpc SetAllocation(HostSetAllocationRequest) returns (HostSetAllocationResponse); + // Rename tag. + rpc RenameTag(HostRenameTagRequest) returns (HostRenameTagResponse); - // Manually set the hardware state for the host. The hardware state may be changed automatically if the host pings - // in. If the hardware state is set to "Reimage", the state will not automatically change with a host ping, and - // must be manually set back to Up. - rpc SetHardwareState(HostSetHardwareStateRequest) returns (HostSetHardwareStateResponse); + // Assign a host to an allocation. + rpc SetAllocation(HostSetAllocationRequest) returns (HostSetAllocationResponse); - // Set the name of the host operating system. - rpc SetOs(HostSetOsRequest) returns (HostSetOsResponse); + // Manually set the hardware state for the host. The hardware state may be changed automatically if the host pings + // in. If the hardware state is set to "Reimage", the state will not automatically change with a host ping, and + // must be manually set back to Up. + rpc SetHardwareState(HostSetHardwareStateRequest) returns (HostSetHardwareStateResponse); - // Changes the host's [ThreadMode] - rpc SetThreadMode(HostSetThreadModeRequest) returns (HostSetThreadModeResponse); + // Set the name of the host operating system. + rpc SetOs(HostSetOsRequest) returns (HostSetOsResponse); - // Set the maximum concurrent procs limit for the host. - rpc SetConcurrentSlotsLimit(HostSetConcurrentSlotsLimitRequest) returns (HostSetConcurrentSlotsLimitResponse); + // Changes the host's [ThreadMode] + rpc SetThreadMode(HostSetThreadModeRequest) returns (HostSetThreadModeResponse); + + // Set the maximum concurrent procs limit for the host. + rpc SetConcurrentSlotsLimit(HostSetConcurrentSlotsLimitRequest) returns (HostSetConcurrentSlotsLimitResponse); - // Unlocks the host for booking if the proc is in the Locked state. You cannot unlock a NimbyLocked proc. - rpc Unlock(HostUnlockRequest) returns (HostUnlockResponse); + // Unlocks the host for booking if the proc is in the Locked state. You cannot unlock a NimbyLocked proc. + rpc Unlock(HostUnlockRequest) returns (HostUnlockResponse); } service OwnerInterface { - // Deletes the owner record. - rpc Delete(OwnerDeleteRequest) returns (OwnerDeleteResponse); + // Deletes the owner record. + rpc Delete(OwnerDeleteRequest) returns (OwnerDeleteResponse); - // Get a list of all deeds this owner has. - rpc GetDeeds(OwnerGetDeedsRequest) returns (OwnerGetDeedsResponse); + // Get a list of all deeds this owner has. + rpc GetDeeds(OwnerGetDeedsRequest) returns (OwnerGetDeedsResponse); - // Get a list of all hosts this owner is responsible for. - rpc GetHosts(OwnerGetHostsRequest) returns (OwnerGetHostsResponse); + // Get a list of all hosts this owner is responsible for. + rpc GetHosts(OwnerGetHostsRequest) returns (OwnerGetHostsResponse); - // Return an Owner record by name, id, or email. - rpc GetOwner(OwnerGetOwnerRequest) returns (OwnerGetOwnerResponse); + // Return an Owner record by name, id, or email. + rpc GetOwner(OwnerGetOwnerRequest) returns (OwnerGetOwnerResponse); - //Sets the owners show. - rpc SetShow(OwnerSetShowRequest) returns (OwnerSetShowResponse); + //Sets the owners show. + rpc SetShow(OwnerSetShowRequest) returns (OwnerSetShowResponse); - // Set the hosts new owner settings. Any host may have an owner, not just desktops. This allows direct control of - // the cores. By default hosts have no owner settings. - rpc TakeOwnership(OwnerTakeOwnershipRequest) returns (OwnerTakeOwnershipResponse); + // Set the hosts new owner settings. Any host may have an owner, not just desktops. This allows direct control of + // the cores. By default hosts have no owner settings. + rpc TakeOwnership(OwnerTakeOwnershipRequest) returns (OwnerTakeOwnershipResponse); } service ProcInterface { - // Clears the redirect off of the proc so it dispatches naturally. - rpc ClearRedirect(ProcClearRedirectRequest) returns (ProcClearRedirectResponse); + // Clears the redirect off of the proc so it dispatches naturally. + rpc ClearRedirect(ProcClearRedirectRequest) returns (ProcClearRedirectResponse); - // Returns the [Frame] running on the [Proc] - rpc GetFrame(ProcGetFrameRequest) returns (ProcGetFrameResponse); + // Returns the [Frame] running on the [Proc] + rpc GetFrame(ProcGetFrameRequest) returns (ProcGetFrameResponse); - // Returns the [Host] this [Proc] was allocated from. - rpc GetHost(ProcGetHostRequest) returns (ProcGetHostResponse); + // Returns the [Host] this [Proc] was allocated from. + rpc GetHost(ProcGetHostRequest) returns (ProcGetHostResponse); - // Returns the [Job] the [Proc] has been assigned to. - rpc GetJob(ProcGetJobRequest) returns (ProcGetJobResponse); + // Returns the [Job] the [Proc] has been assigned to. + rpc GetJob(ProcGetJobRequest) returns (ProcGetJobResponse); - // Returns the [Layer] the [Proc] has been assigned to. - rpc GetLayer(ProcGetLayerRequest) returns (ProcGetLayerResponse); + // Returns the [Layer] the [Proc] has been assigned to. + rpc GetLayer(ProcGetLayerRequest) returns (ProcGetLayerResponse); - // Return a list of procs matching the search - rpc GetProcs(ProcGetProcsRequest) returns (ProcGetProcsResponse); + // Return a list of procs matching the search + rpc GetProcs(ProcGetProcsRequest) returns (ProcGetProcsResponse); - // Sends a kill signal to the running process. - rpc Kill(ProcKillRequest) returns (ProcKillResponse); + // Sends a kill signal to the running process. + rpc Kill(ProcKillRequest) returns (ProcKillResponse); - // Unbooks and redriects the proc to the specified group. Optionally kills the proc immediately. Will overwrite an - // existing redirect. Return true if the redirect was a success. The redirect would fail in the event that the - // specified group does not have a suitable frame for the proc. - rpc RedirectToGroup(ProcRedirectToGroupRequest) returns (ProcRedirectToGroupResponse); + // Unbooks and redriects the proc to the specified group. Optionally kills the proc immediately. Will overwrite an + // existing redirect. Return true if the redirect was a success. The redirect would fail in the event that the + // specified group does not have a suitable frame for the proc. + rpc RedirectToGroup(ProcRedirectToGroupRequest) returns (ProcRedirectToGroupResponse); - // Unbooks and redriects the proc to the specified job. Optionally kills the proc immediately. Will overwrite an - // existing redirect. Return true if the redirect was a success. The redirect would fail in the event th - rpc RedirectToJob(ProcRedirectToJobRequest) returns (ProcRedirectToJobResponse); + // Unbooks and redriects the proc to the specified job. Optionally kills the proc immediately. Will overwrite an + // existing redirect. Return true if the redirect was a success. The redirect would fail in the event th + rpc RedirectToJob(ProcRedirectToJobRequest) returns (ProcRedirectToJobResponse); - // Unbooks this [Proc]. Unbooking means the [Proc] will automatically seek out a new [Job] when the current - // [Frame] is complete. - rpc Unbook(ProcUnbookRequest) returns (ProcUnbookResponse); + // Unbooks this [Proc]. Unbooking means the [Proc] will automatically seek out a new [Job] when the current + // [Frame] is complete. + rpc Unbook(ProcUnbookRequest) returns (ProcUnbookResponse); - //Unbooks procs that match the ProcSearchCriteria. This request can span jobs, shows, allocations, hosts etc. - // Set kill to true if the running frames should immediately be killed. - rpc UnbookProcs(ProcUnbookProcsRequest) returns (ProcUnbookProcsResponse); + //Unbooks procs that match the ProcSearchCriteria. This request can span jobs, shows, allocations, hosts etc. + // Set kill to true if the running frames should immediately be killed. + rpc UnbookProcs(ProcUnbookProcsRequest) returns (ProcUnbookProcsResponse); - // Unbooks procs that match the ProcSearchCriteria and books them on the specified group, assuming the group has - // layers that can take the procs. If the kill boolean is set to true, the operation happens immediately. If false, - // the proc will move after it finishes its current frame. - rpc UnbookToGroup(ProcUnbookToGroupRequest) returns (ProcUnbookToGroupResponse); + // Unbooks procs that match the ProcSearchCriteria and books them on the specified group, assuming the group has + // layers that can take the procs. If the kill boolean is set to true, the operation happens immediately. If false, + // the proc will move after it finishes its current frame. + rpc UnbookToGroup(ProcUnbookToGroupRequest) returns (ProcUnbookToGroupResponse); - // Unbooks procs that match the ProcSearchCriteria and books them on the specified list of jobs, assuming those jobs - // have layers that can take the procs. If the kill boolean is set to true, the operation happens immediately. If - // false, the proc will move after it finishes its current frame. - rpc UnbookToJob(ProcUnbookToJobRequest) returns (ProcUnbookToJobResponse); + // Unbooks procs that match the ProcSearchCriteria and books them on the specified list of jobs, assuming those jobs + // have layers that can take the procs. If the kill boolean is set to true, the operation happens immediately. If + // false, the proc will move after it finishes its current frame. + rpc UnbookToJob(ProcUnbookToJobRequest) returns (ProcUnbookToJobResponse); } + // -------- Enums -------- enum HardwareState { - UP = 0; - DOWN = 1; - REBOOTING = 2; - REBOOT_WHEN_IDLE = 3; - REPAIR = 4; + UP = 0; + DOWN = 1; + REBOOTING = 2; + REBOOT_WHEN_IDLE = 3; + REPAIR = 4; } enum HostTagType { - MANUAL = 0; - HARDWARE = 1; - ALLOC = 2; - HOSTNAME = 3; + MANUAL = 0; + HARDWARE = 1; + ALLOC = 2; + HOSTNAME = 3; } enum LockState { - OPEN = 0; - LOCKED = 1; - NIMBY_LOCKED = 2; + OPEN = 0; + LOCKED = 1; + NIMBY_LOCKED = 2; } // Proc redirects can have two different types of destinations, jobs and groups. enum RedirectType { - JOB_REDIRECT = 0; - GROUP_REDIRECT = 1; + JOB_REDIRECT = 0; + GROUP_REDIRECT = 1; } // Defines the possible states for a core or proc enum RunState { - // Entity is idle, which means it can be booked. - IDLE = 0; - // Entity is booked, which means its in use on a render proc - BOOKED = 1; + // Entity is idle, which means it can be booked. + IDLE = 0; + // Entity is booked, which means its in use on a render proc + BOOKED = 1; } enum ThreadMode { - // Auto determines the number of threads to use automatically - // based on the amount of memory used by the frame. - AUTO = 0; + // Auto determines the number of threads to use automatically + // based on the amount of memory used by the frame. + AUTO = 0; - // All always uses all of the cores available on the proc. - // These hosts are always booked on threadable layers. - ALL = 1; + // All always uses all of the cores available on the proc. + // These hosts are always booked on threadable layers. + ALL = 1; - // All mode during the day, auto-mode at night. - VARIABLE = 2; + // All mode during the day, auto-mode at night. + VARIABLE = 2; } + // -------- Primary Message Types --------] message Deed { - string id = 1; - string host = 2; - string owner = 3; - string show = 4; + string id = 1; + string host = 2; + string owner = 3; + string show = 4; } message DeedSeq { - repeated Deed deeds = 1; + repeated Deed deeds = 1; } message HardwareStateSeq { - repeated HardwareState state = 1; + repeated HardwareState state = 1; } message LockStateSeq { - repeated LockState state = 1; + repeated LockState state = 1; } message Host { - string id = 1; - string name = 2; - string alloc_name = 3; - bool nimby_enabled = 4; - bool has_comment = 5; - float cores = 6; - float idle_cores = 7; - int64 memory = 8; - int64 idle_memory = 9; - int64 gpu_memory = 10; - int64 idle_gpu_memory = 11; - int64 total_swap = 12; - int64 total_memory = 13; - int64 total_gpu_memory = 14; - int64 total_mcp = 15; - int64 free_swap = 16; - int64 free_memory = 17; - int64 free_mcp = 18; - int64 free_gpu_memory = 19; - int32 load = 20; - int32 boot_time = 21; - int32 ping_time = 22; - string os = 23; - repeated string tags = 24; - HardwareState state = 25; - LockState lock_state = 26; - ThreadMode thread_mode = 27; - float gpus = 28; - float idle_gpus = 29; - - // When a limit is defined, booking will only allocate layers with slots_required > 0 to be - // executed on this host. Which means regular booking by cores/memory/gpu becomes disabled. - // (0 for no limit, >0 for specific limit) - int32 concurrent_slots_limit = 30; + string id = 1; + string name = 2; + string alloc_name = 3; + bool nimby_enabled = 4; + bool has_comment = 5; + float cores = 6; + float idle_cores = 7; + int64 memory = 8; + int64 idle_memory = 9; + int64 gpu_memory = 10; + int64 idle_gpu_memory = 11; + int64 total_swap = 12; + int64 total_memory = 13; + int64 total_gpu_memory = 14; + int64 total_mcp = 15; + int64 free_swap = 16; + int64 free_memory = 17; + int64 free_mcp = 18; + int64 free_gpu_memory = 19; + int32 load = 20; + int32 boot_time = 21; + int32 ping_time = 22; + string os = 23; + repeated string tags = 24; + HardwareState state = 25; + LockState lock_state = 26; + ThreadMode thread_mode = 27; + float gpus = 28; + float idle_gpus = 29; + + // When a limit is defined, booking will only allocate layers with slots_required > 0 to be + // executed on this host. Which means regular booking by cores/memory/gpu becomes disabled. + // (0 for no limit, >0 for specific limit) + int32 concurrent_slots_limit = 30; } message HostSearchCriteria { - repeated string hosts = 1; - repeated string regex = 2; - repeated string substr = 3; - repeated string ids = 4; - repeated string allocs = 5; - HardwareStateSeq states = 6; - LockStateSeq lock_states = 7; + repeated string hosts = 1; + repeated string regex = 2; + repeated string substr = 3; + repeated string ids = 4; + repeated string allocs = 5; + HardwareStateSeq states = 6; + LockStateSeq lock_states = 7; } message HostSeq { - repeated Host hosts = 1; + repeated Host hosts = 1; } message NestedHost { - string id = 1; - string name = 2; - string alloc_name = 3; - bool nimby_enabled = 4; - bool has_comment = 5; - float cores = 6; - float idle_cores = 7; - int64 memory = 8; - int64 idle_memory = 9; - int64 gpu_memory = 10; - int64 idle_gpu_memory = 11; - int64 total_swap = 12; - int64 total_memory = 13; - int64 total_gpu_memory = 14; - int64 total_mcp = 15; - int64 free_swap = 16; - int64 free_memory = 17; - int64 free_mcp = 18; - int64 free_gpu_memory = 19; - int32 load = 20; - int32 boot_time = 21; - int32 ping_time = 22; - string os = 23; - repeated string tags = 24; - HardwareState state = 25; - LockState lock_state = 26; - ThreadMode thread_mode = 27; - NestedProcSeq procs = 28; - float gpus = 29; - float idle_gpus = 30; - int32 concurrent_slots_limit = 31; + string id = 1; + string name = 2; + string alloc_name = 3; + bool nimby_enabled = 4; + bool has_comment = 5; + float cores = 6; + float idle_cores = 7; + int64 memory = 8; + int64 idle_memory = 9; + int64 gpu_memory = 10; + int64 idle_gpu_memory = 11; + int64 total_swap = 12; + int64 total_memory = 13; + int64 total_gpu_memory = 14; + int64 total_mcp = 15; + int64 free_swap = 16; + int64 free_memory = 17; + int64 free_mcp = 18; + int64 free_gpu_memory = 19; + int32 load = 20; + int32 boot_time = 21; + int32 ping_time = 22; + string os = 23; + repeated string tags = 24; + HardwareState state = 25; + LockState lock_state = 26; + ThreadMode thread_mode = 27; + NestedProcSeq procs = 28; + float gpus = 29; + float idle_gpus = 30; + int32 concurrent_slots_limit = 31; } message NestedHostSeq { - repeated NestedHost nested_hosts = 1; + repeated NestedHost nested_hosts = 1; } message NestedProc { - string id = 1; - string name = 2; - string show_name = 3; - string job_name = 4; - string frame_name = 5; - string group_name = 6; - int32 ping_time = 7; - int32 bookedTime = 8; - int32 dispatch_time = 9; - int64 reserved_memory = 10; - int64 reserved_gpu_memory = 11; - int64 used_memory = 12; - float reserved_cores = 13; - bool unbooked = 14; - string log_path = 15; - string redirect_target = 16; - repeated string services = 17; - NestedHost parent = 18; - int64 used_gpu_memory = 19; - float reserved_gpus = 20; + string id = 1; + string name = 2; + string show_name = 3; + string job_name = 4; + string frame_name = 5; + string group_name = 6; + int32 ping_time = 7; + int32 bookedTime = 8; + int32 dispatch_time = 9; + int64 reserved_memory = 10; + int64 reserved_gpu_memory = 11; + int64 used_memory = 12; + float reserved_cores = 13; + bool unbooked = 14; + string log_path = 15; + string redirect_target = 16; + repeated string services = 17; + NestedHost parent = 18; + int64 used_gpu_memory = 19; + float reserved_gpus = 20; } message NestedProcSeq { - repeated NestedProc nested_procs = 1; + repeated NestedProc nested_procs = 1; } message Owner { - string id = 1; - string name = 2; - string show = 3; - int32 host_count = 4; + string id = 1; + string name = 2; + string show = 3; + int32 host_count = 4; } message Proc { - string id = 1; - string name = 2; - string show_name = 3; - string job_name = 4; - string frame_name = 5; - string group_name = 6; - int32 ping_time = 7; - int32 bookedTime = 8; - int32 dispatch_time = 9; - int64 reserved_memory = 10; - int64 reserved_gpu_memory = 11; - int64 used_memory = 12; - float reserved_cores = 13; - bool unbooked = 14; - string log_path = 15; - string redirect_target = 16; - repeated string services = 17; - int64 used_gpu_memory = 18; - float reserved_gpus = 19; - bytes child_processes = 20; + string id = 1; + string name = 2; + string show_name = 3; + string job_name = 4; + string frame_name = 5; + string group_name = 6; + int32 ping_time = 7; + int32 bookedTime = 8; + int32 dispatch_time = 9; + int64 reserved_memory = 10; + int64 reserved_gpu_memory = 11; + int64 used_memory = 12; + float reserved_cores = 13; + bool unbooked = 14; + string log_path = 15; + string redirect_target = 16; + repeated string services = 17; + int64 used_gpu_memory = 18; + float reserved_gpus = 19; + bytes child_processes = 20; } message ProcSearchCriteria { - // An array of host names to match. - repeated string hosts = 1; + // An array of host names to match. + repeated string hosts = 1; - // An array of job names to match. - repeated string jobs = 2; + // An array of job names to match. + repeated string jobs = 2; - // An array of layer names to match. - repeated string layers = 3; + // An array of layer names to match. + repeated string layers = 3; - // An array of show names to match. - repeated string shows = 4; + // An array of show names to match. + repeated string shows = 4; - // An array of allocation names to match. - repeated string allocs = 5; + // An array of allocation names to match. + repeated string allocs = 5; - // A range of memory usage. Values are in KB. - repeated criterion.InRangeIntegerSearchCriterion memory_range = 6; + // A range of memory usage. Values are in KB. + repeated criterion.InRangeIntegerSearchCriterion memory_range = 6; - // Less than memory usage. Values are in KB. - repeated criterion.GreaterThanIntegerSearchCriterion memory_greater_than = 10; + // Less than memory usage. Values are in KB. + repeated criterion.GreaterThanIntegerSearchCriterion memory_greater_than = 10; - // Greater than memory usage. Values are in KB. - repeated criterion.GreaterThanIntegerSearchCriterion memory_less_than = 11; + // Greater than memory usage. Values are in KB. + repeated criterion.GreaterThanIntegerSearchCriterion memory_less_than = 11; - // A duration range. Values are in seconds. - repeated criterion.InRangeIntegerSearchCriterion duration_range = 7; + // A duration range. Values are in seconds. + repeated criterion.InRangeIntegerSearchCriterion duration_range = 7; - //The maximum number of results. - repeated int32 max_results = 8; + //The maximum number of results. + repeated int32 max_results = 8; - //The offset of the first result. - int32 first_result = 9; + //The offset of the first result. + int32 first_result = 9; } message ProcSeq { - repeated Proc procs = 1; + repeated Proc procs = 1; } + // -------- Requests & Responses --------] // DEED ---- // Delete message DeedDeleteRequest { - Deed deed = 1; + Deed deed = 1; } message DeedDeleteResponse {} // Empty // GetHost message DeedGetHostRequest { - Deed deed = 1; + Deed deed = 1; } message DeedGetHostResponse { - Host host = 1; + Host host = 1; } // GetOwner message DeedGetOwnerRequest { - Deed deed = 1; + Deed deed = 1; } message DeedGetOwnerResponse { - Owner owner = 1; + Owner owner = 1; } // HOST ---- // AddComment message HostAddCommentRequest { - Host host = 1; - comment.Comment new_comment = 2; + Host host = 1; + comment.Comment new_comment = 2; } message HostAddCommentResponse {} // Empty // AddTags message HostAddTagsRequest { - Host host = 1; - repeated string tags = 2; + Host host = 1; + repeated string tags = 2; } message HostAddTagsResponse {} // Empty // Delete message HostDeleteRequest { - Host host = 1; + Host host = 1; } message HostDeleteResponse {} // Empty // FindHost message HostFindHostRequest { - string name = 1; + string name = 1; } message HostFindHostResponse { - Host host = 1; + Host host = 1; } // GetComments message HostGetCommentsRequest { - Host host = 1; + Host host = 1; } message HostGetCommentsResponse { - comment.CommentSeq comments = 1; + comment.CommentSeq comments = 1; } // GetDeed message HostGetDeedRequest { - Host host = 1; + Host host = 1; } message HostGetDeedResponse { - Deed deed = 1; + Deed deed = 1; } // GetHost message HostGetHostRequest { - string id = 1; + string id = 1; } message HostGetHostResponse { - Host host = 1; + Host host = 1; } // GetHosts message HostGetHostsRequest { - HostSearchCriteria r = 1; + HostSearchCriteria r = 1; } message HostGetHostsResponse { - HostSeq hosts = 1; + HostSeq hosts = 1; } // GetHostWhiteboard message HostGetHostWhiteboardRequest {} // Empty message HostGetHostWhiteboardResponse { - NestedHostSeq nested_hosts = 1; + NestedHostSeq nested_hosts = 1; } // GetOwner message HostGetOwnerRequest { - Host host = 1; + Host host = 1; } message HostGetOwnerResponse { - Owner owner = 1; + Owner owner = 1; } // GetProcs message HostGetProcsRequest { - Host host = 1; + Host host = 1; } message HostGetProcsResponse { - ProcSeq procs = 1; + ProcSeq procs = 1; } // GetRenderPartitions message HostGetRenderPartitionsRequest { - Host host = 1; + Host host = 1; } message HostGetRenderPartitionsResponse { - renderPartition.RenderPartitionSeq render_partitions = 1; + renderPartition.RenderPartitionSeq render_partitions = 1; } // Lock message HostLockRequest { - Host host = 1; + Host host = 1; } message HostLockResponse {} // Empty // Reboot message HostRebootRequest { - Host host = 1; + Host host = 1; } message HostRebootResponse {} // Empty // RebootWhenIdle message HostRebootWhenIdleRequest { - Host host = 1; + Host host = 1; } message HostRebootWhenIdleResponse {} // Empty // RedirectToJob message HostRedirectToJobRequest { - Host host = 1; - repeated string proc_names = 2; - string job_id = 3; + Host host = 1; + repeated string proc_names = 2; + string job_id = 3; } message HostRedirectToJobResponse { - bool value = 1; + bool value = 1; } // RemoveTags message HostRemoveTagsRequest { - Host host = 1; - repeated string tags = 2; + Host host = 1; + repeated string tags = 2; } message HostRemoveTagsResponse {} // Empty // RenameTag message HostRenameTagRequest { - Host host = 1; - string old_tag = 2; - string new_tag = 3; + Host host = 1; + string old_tag = 2; + string new_tag = 3; } message HostRenameTagResponse {} // Empty // SetAllocation message HostSetAllocationRequest { - Host host = 1; - string allocation_id = 2; + Host host = 1; + string allocation_id = 2; } message HostSetAllocationResponse {} // Empty // SetHardwareState message HostSetHardwareStateRequest { - Host host = 1; - HardwareState state = 2; + Host host = 1; + HardwareState state = 2; } -message HostSetHardwareStateResponse {} // Empty +message HostSetHardwareStateResponse {} // Empty // SetOs message HostSetOsRequest { - Host host = 1; - string os = 2; + Host host = 1; + string os = 2; } message HostSetOsResponse {} // Empty // SetThreadMode message HostSetThreadModeRequest { - Host host = 1; - ThreadMode mode = 2; + Host host = 1; + ThreadMode mode = 2; } message HostSetThreadModeResponse {} // Empty @@ -650,58 +655,59 @@ message HostSetConcurrentSlotsLimitResponse {} // Empty // Unlock message HostUnlockRequest { - Host host = 1; + Host host = 1; } message HostUnlockResponse {} // Empty + // OWNER ---- // Delete message OwnerDeleteRequest { - Owner owner = 1; + Owner owner = 1; } message OwnerDeleteResponse {} // Empty // GetDeeds message OwnerGetDeedsRequest { - Owner owner = 1; + Owner owner = 1; } message OwnerGetDeedsResponse { - DeedSeq deeds = 1; + DeedSeq deeds = 1; } // GetHosts message OwnerGetHostsRequest { - Owner owner = 1; + Owner owner = 1; } message OwnerGetHostsResponse { - HostSeq hosts = 1; + HostSeq hosts = 1; } // GetOwner message OwnerGetOwnerRequest { - string name = 1; + string name = 1; } message OwnerGetOwnerResponse { - Owner owner = 1; + Owner owner = 1; } // SetShow message OwnerSetShowRequest { - Owner owner = 1; - string show = 2; + Owner owner = 1; + string show = 2; } message OwnerSetShowResponse {} // Empty // TakeOwnership message OwnerTakeOwnershipRequest { - Owner owner = 1; - string host = 2; + Owner owner = 1; + string host = 2; } message OwnerTakeOwnershipResponse {} // Empty @@ -709,123 +715,123 @@ message OwnerTakeOwnershipResponse {} // Empty // PROC --- // ClearRedirect message ProcClearRedirectRequest { - Proc proc = 1; + Proc proc = 1; } message ProcClearRedirectResponse { - bool value = 1; + bool value = 1; } // GetFrame message ProcGetFrameRequest { - Proc proc = 1; + Proc proc = 1; } message ProcGetFrameResponse { - job.Frame frame = 1; + job.Frame frame = 1; } // GetHost message ProcGetHostRequest { - Proc proc = 1; + Proc proc = 1; } message ProcGetHostResponse { - Host host = 1; + Host host = 1; } // GetJob message ProcGetJobRequest { - Proc proc = 1; + Proc proc = 1; } message ProcGetJobResponse { - job.Job job = 1; + job.Job job = 1; } // GetLayer message ProcGetLayerRequest { - Proc proc = 1; + Proc proc = 1; } message ProcGetLayerResponse { - job.Layer layer = 1; + job.Layer layer = 1; } // GetProcs message ProcGetProcsRequest { - ProcSearchCriteria r = 1; + ProcSearchCriteria r = 1; } message ProcGetProcsResponse { - ProcSeq procs = 1; + ProcSeq procs = 1; } // Kill message ProcKillRequest { - Proc proc = 1; + Proc proc = 1; } message ProcKillResponse {} // Empty // RedirectToGroup message ProcRedirectToGroupRequest { - Proc proc = 1; - string group_id = 2; - bool kill = 3; + Proc proc = 1; + string group_id = 2; + bool kill = 3; } message ProcRedirectToGroupResponse { - bool value = 1; + bool value = 1; } // RedirectToJob message ProcRedirectToJobRequest { - Proc proc = 1; - string job_id = 2; - bool kill = 3; + Proc proc = 1; + string job_id = 2; + bool kill = 3; } message ProcRedirectToJobResponse { - bool value = 1; + bool value = 1; } // Unbook message ProcUnbookRequest { - Proc proc = 1; - bool kill = 2; + Proc proc = 1; + bool kill = 2; } message ProcUnbookResponse {} // Empty // UnbookProcs message ProcUnbookProcsRequest { - ProcSearchCriteria r = 1; - bool kill = 2; + ProcSearchCriteria r = 1; + bool kill = 2; } message ProcUnbookProcsResponse { - int32 num_procs = 1; + int32 num_procs = 1; } // UnbookToJob message ProcUnbookToJobRequest { - ProcSearchCriteria r = 1; - job.JobSeq jobs = 2; - bool kill = 3; + ProcSearchCriteria r = 1; + job.JobSeq jobs = 2; + bool kill = 3; } message ProcUnbookToJobResponse { - int32 num_procs = 1; + int32 num_procs = 1; } // UnbookToGroup message ProcUnbookToGroupRequest { - ProcSearchCriteria r = 1; - job.Group group = 2; - bool kill = 3; + ProcSearchCriteria r = 1; + job.Group group = 2; + bool kill = 3; } message ProcUnbookToGroupResponse { - int32 num_procs = 1; + int32 num_procs = 1; } From db5315261b73d41e299d02a00c9a58542b0bccdc Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 7 Jan 2026 08:35:33 -0800 Subject: [PATCH 11/17] Fix formatting --- pycue/tests/wrappers/test_layer.py | 325 +++++++++++++---------------- 1 file changed, 150 insertions(+), 175 deletions(-) diff --git a/pycue/tests/wrappers/test_layer.py b/pycue/tests/wrappers/test_layer.py index 033e38e79..97c04d410 100644 --- a/pycue/tests/wrappers/test_layer.py +++ b/pycue/tests/wrappers/test_layer.py @@ -16,25 +16,28 @@ """Tests for `opencue.wrappers.layer`""" -from __future__ import absolute_import, division, print_function - +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import import getpass import os import platform import unittest import mock -from opencue_proto import depend_pb2, job_pb2 +from opencue_proto import depend_pb2 +from opencue_proto import job_pb2 import opencue.wrappers.frame -import opencue.wrappers.job import opencue.wrappers.layer +import opencue.wrappers.job + -TEST_LAYER_NAME = "testLayer" -TEST_OUTPUT_PATH = "/path/to/file.txt" +TEST_LAYER_NAME = 'testLayer' +TEST_OUTPUT_PATH = '/path/to/file.txt' -@mock.patch("opencue.cuebot.Cuebot.getStub") +@mock.patch('opencue.cuebot.Cuebot.getStub') class LayerTests(unittest.TestCase): """Tests for `opencue.wrappers.layer.Layer`.""" @@ -43,7 +46,8 @@ def testKill(self, getStubMock): stubMock.KillFrames.return_value = job_pb2.LayerKillFramesResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) username = getpass.getuser() pid = os.getpid() host_kill = platform.uname()[1] @@ -51,157 +55,147 @@ def testKill(self, getStubMock): layer.kill(username=username, pid=pid, host_kill=host_kill, reason=reason) stubMock.KillFrames.assert_called_with( - job_pb2.LayerKillFramesRequest( - layer=layer.data, - username=username, - pid=str(pid), - host_kill=host_kill, - reason=reason, - ), - timeout=mock.ANY, - ) + job_pb2.LayerKillFramesRequest(layer=layer.data, + username=username, + pid=str(pid), + host_kill=host_kill, + reason=reason), timeout=mock.ANY) def testEat(self, getStubMock): stubMock = mock.Mock() stubMock.EatFrames.return_value = job_pb2.LayerEatFramesResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.eat() stubMock.EatFrames.assert_called_with( - job_pb2.LayerEatFramesRequest(layer=layer.data), timeout=mock.ANY - ) + job_pb2.LayerEatFramesRequest(layer=layer.data), timeout=mock.ANY) def testRetry(self, getStubMock): stubMock = mock.Mock() stubMock.RetryFrames.return_value = job_pb2.LayerRetryFramesResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.retry() stubMock.RetryFrames.assert_called_with( - job_pb2.LayerRetryFramesRequest(layer=layer.data), timeout=mock.ANY - ) + job_pb2.LayerRetryFramesRequest(layer=layer.data), timeout=mock.ANY) def testMarkdone(self, getStubMock): stubMock = mock.Mock() stubMock.MarkdoneFrames.return_value = job_pb2.LayerMarkdoneFramesResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.markdone() stubMock.MarkdoneFrames.assert_called_with( - job_pb2.LayerMarkdoneFramesRequest(layer=layer.data), timeout=mock.ANY - ) + job_pb2.LayerMarkdoneFramesRequest(layer=layer.data), timeout=mock.ANY) def testAddLimit(self, getStubMock): - test_limit_id = "lll-llll-lll" + test_limit_id = 'lll-llll-lll' stubMock = mock.Mock() stubMock.AddLimit.return_value = job_pb2.LayerAddLimitResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.addLimit(test_limit_id) stubMock.AddLimit.assert_called_with( job_pb2.LayerAddLimitRequest(layer=layer.data, limit_id=test_limit_id), - timeout=mock.ANY, - ) + timeout=mock.ANY) def testDropLimit(self, getStubMock): - test_limit_id = "lll-llll-lll" + test_limit_id = 'lll-llll-lll' stubMock = mock.Mock() stubMock.DropLimit.return_value = job_pb2.LayerDropLimitResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.dropLimit(test_limit_id) stubMock.DropLimit.assert_called_with( job_pb2.LayerDropLimitRequest(layer=layer.data, limit_id=test_limit_id), - timeout=mock.ANY, - ) + timeout=mock.ANY) def testEnableMemoryOptimizerTrue(self, getStubMock): stubMock = mock.Mock() - stubMock.EnableMemoryOptimizer.return_value = ( - job_pb2.LayerEnableMemoryOptimizerResponse() - ) + stubMock.EnableMemoryOptimizer.return_value = job_pb2.LayerEnableMemoryOptimizerResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.enableMemoryOptimizer(True) stubMock.EnableMemoryOptimizer.assert_called_with( job_pb2.LayerEnableMemoryOptimizerRequest(layer=layer.data, value=True), - timeout=mock.ANY, - ) + timeout=mock.ANY) def testEnableMemoryOptimizerFalse(self, getStubMock): stubMock = mock.Mock() - stubMock.EnableMemoryOptimizer.return_value = ( - job_pb2.LayerEnableMemoryOptimizerResponse() - ) + stubMock.EnableMemoryOptimizer.return_value = job_pb2.LayerEnableMemoryOptimizerResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.enableMemoryOptimizer(False) stubMock.EnableMemoryOptimizer.assert_called_with( job_pb2.LayerEnableMemoryOptimizerRequest(layer=layer.data, value=False), - timeout=mock.ANY, - ) + timeout=mock.ANY) def testGetFrames(self, getStubMock): stubMock = mock.Mock() stubMock.GetFrames.return_value = job_pb2.LayerGetFramesResponse( - frames=job_pb2.FrameSeq(frames=[job_pb2.Frame(layer_name=TEST_LAYER_NAME)]) - ) + frames=job_pb2.FrameSeq(frames=[job_pb2.Frame(layer_name=TEST_LAYER_NAME)])) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) frames = layer.getFrames() stubMock.GetFrames.assert_called_with( job_pb2.LayerGetFramesRequest( - layer=layer.data, s=opencue.search.FrameSearch.criteriaFromOptions() - ), - timeout=mock.ANY, - ) + layer=layer.data, + s=opencue.search.FrameSearch.criteriaFromOptions()), + timeout=mock.ANY) self.assertEqual(len(frames), 1) self.assertEqual(frames[0].data.layer_name, TEST_LAYER_NAME) def testGetOutputPaths(self, getStubMock): stubMock = mock.Mock() stubMock.GetOutputPaths.return_value = job_pb2.LayerGetOutputPathsResponse( - output_paths=[TEST_OUTPUT_PATH] - ) + output_paths=[TEST_OUTPUT_PATH]) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) outputPaths = layer.getOutputPaths() stubMock.GetOutputPaths.assert_called_with( - job_pb2.LayerGetOutputPathsRequest(layer=layer.data), timeout=mock.ANY - ) + job_pb2.LayerGetOutputPathsRequest(layer=layer.data), timeout=mock.ANY) self.assertEqual(len(outputPaths), 1) self.assertEqual(outputPaths[0], TEST_OUTPUT_PATH) def testSetTags(self, getStubMock): - tags = ["cloud", "local"] + tags = ['cloud', 'local'] stubMock = mock.Mock() stubMock.SetTags.return_value = job_pb2.LayerSetTagsResponse() getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.setTags(tags) stubMock.SetTags.assert_called_with( - job_pb2.LayerSetTagsRequest(layer=layer.data, tags=tags), timeout=mock.ANY - ) + job_pb2.LayerSetTagsRequest(layer=layer.data, tags=tags), timeout=mock.ANY) def testSetMaxCores(self, getStubMock): stubMock = mock.Mock() @@ -209,14 +203,14 @@ def testSetMaxCores(self, getStubMock): getStubMock.return_value = stubMock testCores = 100 - testCoresActual = testCores / 100.0 - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + testCoresActual = testCores/100.0 + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.setMaxCores(testCores) stubMock.SetMaxCores.assert_called_with( job_pb2.LayerSetMaxCoresRequest(layer=layer.data, cores=testCoresActual), - timeout=mock.ANY, - ) + timeout=mock.ANY) def testSetMinGpuMemory(self, getStubMock): stubMock = mock.Mock() @@ -224,13 +218,27 @@ def testSetMinGpuMemory(self, getStubMock): getStubMock.return_value = stubMock testCores = 100 - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.setMinGpuMemory(testCores) stubMock.SetMinGpuMemory.assert_called_with( job_pb2.LayerSetMinGpuMemoryRequest(layer=layer.data, gpu_memory=testCores), - timeout=mock.ANY, - ) + timeout=mock.ANY) + + def testSetMinMemory(self, getStubMock): + stubMock = mock.Mock() + stubMock.SetMinMemory.return_value = job_pb2.LayerSetMinMemoryResponse() + getStubMock.return_value = stubMock + + memory = 2048 + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) + layer.setMinMemory(memory) + + stubMock.SetMinMemory.assert_called_with( + job_pb2.LayerSetMinMemoryRequest(layer=layer.data, memory=memory), + timeout=mock.ANY) def testSetSlotsRequired(self, getStubMock): stubMock = mock.Mock() @@ -246,212 +254,179 @@ def testSetSlotsRequired(self, getStubMock): timeout=mock.ANY, ) - def testSetMinMemory(self, getStubMock): - stubMock = mock.Mock() - stubMock.SetMinMemory.return_value = job_pb2.LayerSetMinMemoryResponse() - getStubMock.return_value = stubMock - - memory = 2048 - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) - layer.setMinMemory(memory) - - stubMock.SetMinMemory.assert_called_with( - job_pb2.LayerSetMinMemoryRequest(layer=layer.data, memory=memory), - timeout=mock.ANY, - ) - def testSetThreadable(self, getStubMock): stubMock = mock.Mock() stubMock.SetThreadable.return_value = job_pb2.LayerSetThreadableResponse() getStubMock.return_value = stubMock value = True - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.setThreadable(value) stubMock.SetThreadable.assert_called_with( job_pb2.LayerSetThreadableRequest(layer=layer.data, threadable=value), - timeout=mock.ANY, - ) + timeout=mock.ANY) def testGetWhatDependsOnThis(self, getStubMock): - dependId = "dddd-ddd-dddd" + dependId = 'dddd-ddd-dddd' stubMock = mock.Mock() - stubMock.GetWhatDependsOnThis.return_value = ( - job_pb2.LayerGetWhatDependsOnThisResponse( - depends=depend_pb2.DependSeq(depends=[depend_pb2.Depend(id=dependId)]) - ) - ) + stubMock.GetWhatDependsOnThis.return_value = job_pb2.LayerGetWhatDependsOnThisResponse( + depends=depend_pb2.DependSeq(depends=[depend_pb2.Depend(id=dependId)])) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) depends = layer.getWhatDependsOnThis() stubMock.GetWhatDependsOnThis.assert_called_with( - job_pb2.LayerGetWhatDependsOnThisRequest(layer=layer.data), timeout=mock.ANY - ) + job_pb2.LayerGetWhatDependsOnThisRequest(layer=layer.data), + timeout=mock.ANY) self.assertEqual(len(depends), 1) self.assertEqual(depends[0].id(), dependId) def testGetWhatThisDependsOn(self, getStubMock): - dependId = "dddd-ddd-dddd" + dependId = 'dddd-ddd-dddd' stubMock = mock.Mock() - stubMock.GetWhatThisDependsOn.return_value = ( - job_pb2.LayerGetWhatThisDependsOnResponse( - depends=depend_pb2.DependSeq(depends=[depend_pb2.Depend(id=dependId)]) - ) - ) + stubMock.GetWhatThisDependsOn.return_value = job_pb2.LayerGetWhatThisDependsOnResponse( + depends=depend_pb2.DependSeq(depends=[depend_pb2.Depend(id=dependId)])) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) depends = layer.getWhatThisDependsOn() stubMock.GetWhatThisDependsOn.assert_called_with( - job_pb2.LayerGetWhatThisDependsOnRequest(layer=layer.data), timeout=mock.ANY - ) + job_pb2.LayerGetWhatThisDependsOnRequest(layer=layer.data), + timeout=mock.ANY) self.assertEqual(len(depends), 1) self.assertEqual(depends[0].id(), dependId) def testCreateDependencyOnJob(self, getStubMock): - dependId = "dddd-ddd-dddd" - jobId = "jjjj-jjj-jjjj" + dependId = 'dddd-ddd-dddd' + jobId = 'jjjj-jjj-jjjj' stubMock = mock.Mock() - stubMock.CreateDependencyOnJob.return_value = ( - job_pb2.LayerCreateDependOnJobResponse( - depend=depend_pb2.Depend(id=dependId) - ) - ) + stubMock.CreateDependencyOnJob.return_value = job_pb2.LayerCreateDependOnJobResponse( + depend=depend_pb2.Depend(id=dependId)) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) - job = opencue.wrappers.job.Job(job_pb2.Job(id=jobId)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) + job = opencue.wrappers.job.Job( + job_pb2.Job(id=jobId)) depend = layer.createDependencyOnJob(job) stubMock.CreateDependencyOnJob.assert_called_with( job_pb2.LayerCreateDependOnJobRequest(layer=layer.data, job=job.data), - timeout=mock.ANY, - ) + timeout=mock.ANY) self.assertEqual(depend.id(), dependId) def testCreateDependencyOnLayer(self, getStubMock): - dependId = "dddd-ddd-dddd" - layerId = "llll-lll-llll" + dependId = 'dddd-ddd-dddd' + layerId = 'llll-lll-llll' stubMock = mock.Mock() - stubMock.CreateDependencyOnLayer.return_value = ( - job_pb2.LayerCreateDependOnLayerResponse( - depend=depend_pb2.Depend(id=dependId) - ) - ) + stubMock.CreateDependencyOnLayer.return_value = job_pb2.LayerCreateDependOnLayerResponse( + depend=depend_pb2.Depend(id=dependId)) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) - dependLayer = opencue.wrappers.layer.Layer(job_pb2.Layer(id=layerId)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) + dependLayer = opencue.wrappers.layer.Layer( + job_pb2.Layer(id=layerId)) depend = layer.createDependencyOnLayer(dependLayer) stubMock.CreateDependencyOnLayer.assert_called_with( - job_pb2.LayerCreateDependOnLayerRequest( - layer=layer.data, depend_on_layer=dependLayer.data - ), - timeout=mock.ANY, - ) + job_pb2.LayerCreateDependOnLayerRequest(layer=layer.data, + depend_on_layer=dependLayer.data), + timeout=mock.ANY) self.assertEqual(depend.id(), dependId) def testCreateDependencyOnFrame(self, getStubMock): - dependId = "dddd-ddd-dddd" - frameId = "ffff-fff-ffff" + dependId = 'dddd-ddd-dddd' + frameId = 'ffff-fff-ffff' stubMock = mock.Mock() - stubMock.CreateDependencyOnFrame.return_value = ( - job_pb2.LayerCreateDependOnFrameResponse( - depend=depend_pb2.Depend(id=dependId) - ) - ) + stubMock.CreateDependencyOnFrame.return_value = job_pb2.LayerCreateDependOnFrameResponse( + depend=depend_pb2.Depend(id=dependId)) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) - frame = opencue.wrappers.frame.Frame(job_pb2.Frame(id=frameId)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) + frame = opencue.wrappers.frame.Frame( + job_pb2.Frame(id=frameId)) depend = layer.createDependencyOnFrame(frame) stubMock.CreateDependencyOnFrame.assert_called_with( job_pb2.LayerCreateDependOnFrameRequest(layer=layer.data, frame=frame.data), - timeout=mock.ANY, - ) + timeout=mock.ANY) self.assertEqual(depend.id(), dependId) def testCreateFrameByFrameDependency(self, getStubMock): - dependId = "dddd-ddd-dddd" - layerId = "llll-lll-llll" + dependId = 'dddd-ddd-dddd' + layerId = 'llll-lll-llll' stubMock = mock.Mock() - stubMock.CreateFrameByFrameDependency.return_value = ( - job_pb2.LayerCreateFrameByFrameDependResponse( - depend=depend_pb2.Depend(id=dependId) - ) - ) + stubMock.CreateFrameByFrameDependency.return_value = \ + job_pb2.LayerCreateFrameByFrameDependResponse(depend=depend_pb2.Depend(id=dependId)) getStubMock.return_value = stubMock - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) - dependLayer = opencue.wrappers.layer.Layer(job_pb2.Layer(id=layerId)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) + dependLayer = opencue.wrappers.layer.Layer( + job_pb2.Layer(id=layerId)) depend = layer.createFrameByFrameDependency(dependLayer) stubMock.CreateFrameByFrameDependency.assert_called_with( - job_pb2.LayerCreateFrameByFrameDependRequest( - layer=layer.data, depend_layer=dependLayer.data, any_frame=False - ), - timeout=mock.ANY, - ) + job_pb2.LayerCreateFrameByFrameDependRequest(layer=layer.data, + depend_layer=dependLayer.data, + any_frame=False), + timeout=mock.ANY) self.assertEqual(depend.id(), dependId) def testRegisterOutputPath(self, getStubMock): stubMock = mock.Mock() - stubMock.RegisterOutputPath.return_value = ( - job_pb2.LayerRegisterOutputPathResponse() - ) + stubMock.RegisterOutputPath.return_value = job_pb2.LayerRegisterOutputPathResponse() getStubMock.return_value = stubMock - outputPath = "/test/output/path" - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + outputPath = '/test/output/path' + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.registerOutputPath(outputPath) stubMock.RegisterOutputPath.assert_called_with( job_pb2.LayerRegisterOutputPathRequest(layer=layer.data, spec=outputPath), - timeout=mock.ANY, - ) + timeout=mock.ANY) def testReorderFrames(self, getStubMock): stubMock = mock.Mock() stubMock.ReorderFrames.return_value = job_pb2.LayerReorderFramesResponse() getStubMock.return_value = stubMock - frameRange = "1-10" + frameRange = '1-10' order = job_pb2.REVERSE layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) layer.reorderFrames(frameRange, order) stubMock.ReorderFrames.assert_called_with( - job_pb2.LayerReorderFramesRequest( - layer=layer.data, range=frameRange, order=order - ), - timeout=mock.ANY, - ) + job_pb2.LayerReorderFramesRequest(layer=layer.data, range=frameRange, order=order), + timeout=mock.ANY) def testStaggerFrames(self, getStubMock): stubMock = mock.Mock() stubMock.StaggerFrames.return_value = job_pb2.LayerStaggerFramesResponse() getStubMock.return_value = stubMock - frameRange = "1-10" + frameRange = '1-10' stagger = 4 - layer = opencue.wrappers.layer.Layer(job_pb2.Layer(name=TEST_LAYER_NAME)) + layer = opencue.wrappers.layer.Layer( + job_pb2.Layer(name=TEST_LAYER_NAME)) layer.staggerFrames(frameRange, stagger) stubMock.StaggerFrames.assert_called_with( - job_pb2.LayerStaggerFramesRequest( - layer=layer.data, range=frameRange, stagger=stagger - ), - timeout=mock.ANY, - ) + job_pb2.LayerStaggerFramesRequest(layer=layer.data, range=frameRange, stagger=stagger), + timeout=mock.ANY) class LayerEnumTests(unittest.TestCase): + def testLayerType(self): self.assertEqual(opencue.api.Layer.LayerType.PRE, job_pb2.PRE) self.assertEqual(opencue.api.Layer.LayerType.PRE, 0) @@ -461,5 +436,5 @@ def testOrder(self): self.assertEqual(opencue.api.Layer.Order.LAST, 1) -if __name__ == "__main__": +if __name__ == '__main__': unittest.main() From de7b26ced4672a03e39797b186dffe8af43be22a Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 7 Jan 2026 08:37:50 -0800 Subject: [PATCH 12/17] Fix formatting --- proto/src/job.proto | 1656 ++++++++++++++++++++++--------------------- 1 file changed, 834 insertions(+), 822 deletions(-) diff --git a/proto/src/job.proto b/proto/src/job.proto index 61fd6cd3e..559b5d9b2 100644 --- a/proto/src/job.proto +++ b/proto/src/job.proto @@ -1,454 +1,458 @@ + syntax = "proto3"; package job; +option java_package = "com.imageworks.spcue.grpc.job"; +option java_multiple_files = true; + +option go_package = "opencue_gateway/gen/go"; + import "comment.proto"; import "depend.proto"; import "limit.proto"; import "renderPartition.proto"; -option go_package = "opencue_gateway/gen/go"; -option java_multiple_files = true; -option java_package = "com.imageworks.spcue.grpc.job"; - // Job related messages and services // This includes Job, Layer, Frame, and Group objects + // -------- Services --------] service FrameInterface { - // Add a render partition to the frame. - rpc AddRenderPartition(FrameAddRenderPartitionRequest) returns (FrameAddRenderPartitionResponse); + // Add a render partition to the frame. + rpc AddRenderPartition(FrameAddRenderPartitionRequest) returns (FrameAddRenderPartitionResponse); - // Sets up and returns a FrameOnFrame dependency. - rpc CreateDependencyOnFrame(FrameCreateDependencyOnFrameRequest) returns (FrameCreateDependencyOnFrameResponse); + // Sets up and returns a FrameOnFrame dependency. + rpc CreateDependencyOnFrame(FrameCreateDependencyOnFrameRequest) returns (FrameCreateDependencyOnFrameResponse); - // Sets up and returns a FrameOnJob dependency. - rpc CreateDependencyOnJob(FrameCreateDependencyOnJobRequest) returns (FrameCreateDependencyOnJobResponse); + // Sets up and returns a FrameOnJob dependency. + rpc CreateDependencyOnJob(FrameCreateDependencyOnJobRequest) returns (FrameCreateDependencyOnJobResponse); - // Sets up and returns a FrameOnLayer dependency. - rpc CreateDependencyOnLayer(FrameCreateDependencyOnLayerRequest) returns (FrameCreateDependencyOnLayerResponse); + // Sets up and returns a FrameOnLayer dependency. + rpc CreateDependencyOnLayer(FrameCreateDependencyOnLayerRequest) returns (FrameCreateDependencyOnLayerResponse); - // Drops every dependendy that is causing this frame not to run. - rpc DropDepends(FrameDropDependsRequest) returns (FrameDropDependsResponse); + // Drops every dependendy that is causing this frame not to run. + rpc DropDepends(FrameDropDependsRequest) returns (FrameDropDependsResponse); - // Eating a frame will stop rendering and will not try to coninue processing the frame. - rpc Eat(FrameEatRequest) returns (FrameEatResponse); + // Eating a frame will stop rendering and will not try to coninue processing the frame. + rpc Eat(FrameEatRequest) returns (FrameEatResponse); - // Finds a frame in a pending job based on the job, layer, and frame number. - rpc FindFrame(FrameFindFrameRequest) returns (FrameFindFrameResponse); + // Finds a frame in a pending job based on the job, layer, and frame number. + rpc FindFrame(FrameFindFrameRequest) returns (FrameFindFrameResponse); - // Get a frame from its unique id - rpc GetFrame(FrameGetFrameRequest) returns (FrameGetFrameResponse); + // Get a frame from its unique id + rpc GetFrame(FrameGetFrameRequest) returns (FrameGetFrameResponse); - // Get a frame from search criteria - rpc GetFrames(FrameGetFramesRequest) returns (FrameGetFramesResponse); + // Get a frame from search criteria + rpc GetFrames(FrameGetFramesRequest) returns (FrameGetFramesResponse); - // Returns a list of dependencies setup to depend on this frame. - rpc GetWhatDependsOnThis(FrameGetWhatDependsOnThisRequest) returns (FrameGetWhatDependsOnThisResponse); + // Returns a list of dependencies setup to depend on this frame. + rpc GetWhatDependsOnThis(FrameGetWhatDependsOnThisRequest) returns (FrameGetWhatDependsOnThisResponse); - // Returns a list of dependencies that this frame depends on. - rpc GetWhatThisDependsOn(FrameGetWhatThisDependsOnRequest) returns (FrameGetWhatThisDependsOnResponse); + // Returns a list of dependencies that this frame depends on. + rpc GetWhatThisDependsOn(FrameGetWhatThisDependsOnRequest) returns (FrameGetWhatThisDependsOnResponse); - // Kills the frame if it is running - rpc Kill(FrameKillRequest) returns (FrameKillResponse); + // Kills the frame if it is running + rpc Kill(FrameKillRequest) returns (FrameKillResponse); - // Will recount the number of active dependencies on the frame and put it back into the Depend state if that count - // is greater than 0. - rpc MarkAsDepend(FrameMarkAsDependRequest) returns (FrameMarkAsDependResponse); + // Will recount the number of active dependencies on the frame and put it back into the Depend state if that count + // is greater than 0. + rpc MarkAsDepend(FrameMarkAsDependRequest) returns (FrameMarkAsDependResponse); - // Changes the frame's dependency count to 0, which will put the frame into the waiting state. Retrying the frame - // will put it back into the waiting state. - rpc MarkAsWaiting(FrameMarkAsWaitingRequest) returns (FrameMarkAsWaitingResponse); + // Changes the frame's dependency count to 0, which will put the frame into the waiting state. Retrying the frame + // will put it back into the waiting state. + rpc MarkAsWaiting(FrameMarkAsWaitingRequest) returns (FrameMarkAsWaitingResponse); - // Retries the frame by setting it as waiting - rpc Retry(FrameRetryRequest) returns (FrameRetryResponse); + // Retries the frame by setting it as waiting + rpc Retry(FrameRetryRequest) returns (FrameRetryResponse); - // Updates the state of the frame's checkpoint status. If the checkpoint status is complete, then the frame's - // checkpointCoreSeconds is updated with the amount of render time that was checkpointed. - rpc SetCheckpointState(FrameSetCheckpointStateRequest) returns (FrameSetCheckpointStateResponse); + // Updates the state of the frame's checkpoint status. If the checkpoint status is complete, then the frame's + // checkpointCoreSeconds is updated with the amount of render time that was checkpointed. + rpc SetCheckpointState(FrameSetCheckpointStateRequest) returns (FrameSetCheckpointStateResponse); - // Set a frame's state display override - rpc SetFrameStateDisplayOverride(FrameStateDisplayOverrideRequest) returns (FrameStateDisplayOverrideResponse); + // Set a frame's state display override + rpc SetFrameStateDisplayOverride(FrameStateDisplayOverrideRequest) returns (FrameStateDisplayOverrideResponse); - // Get a frame's state display overrides - rpc GetFrameStateDisplayOverrides(GetFrameStateDisplayOverridesRequest) returns (GetFrameStateDisplayOverridesResponse); + // Get a frame's state display overrides + rpc GetFrameStateDisplayOverrides(GetFrameStateDisplayOverridesRequest) returns (GetFrameStateDisplayOverridesResponse); } service GroupInterface { - // Create a new sub group of the given name - rpc CreateSubGroup(GroupCreateSubGroupRequest) returns (GroupCreateSubGroupResponse); + // Create a new sub group of the given name + rpc CreateSubGroup(GroupCreateSubGroupRequest) returns (GroupCreateSubGroupResponse); - // Delete the provided group - rpc Delete(GroupDeleteRequest) returns (GroupDeleteResponse); + // Delete the provided group + rpc Delete(GroupDeleteRequest) returns (GroupDeleteResponse); - // Finds a group by show name and group - rpc FindGroup(GroupFindGroupRequest) returns (GroupFindGroupResponse); + // Finds a group by show name and group + rpc FindGroup(GroupFindGroupRequest) returns (GroupFindGroupResponse); - // Gets a group by its id - rpc GetGroup(GroupGetGroupRequest) returns (GroupGetGroupResponse); + // Gets a group by its id + rpc GetGroup(GroupGetGroupRequest) returns (GroupGetGroupResponse); - // Get child groups of the provided group - rpc GetGroups(GroupGetGroupsRequest) returns (GroupGetGroupsResponse); + // Get child groups of the provided group + rpc GetGroups(GroupGetGroupsRequest) returns (GroupGetGroupsResponse); - // Get Jobs of the provided group - rpc GetJobs(GroupGetJobsRequest) returns (GroupGetJobsResponse); + // Get Jobs of the provided group + rpc GetJobs(GroupGetJobsRequest) returns (GroupGetJobsResponse); - // Reparent the provided groups to the provided group - rpc ReparentGroups(GroupReparentGroupsRequest) returns (GroupReparentGroupsResponse); + // Reparent the provided groups to the provided group + rpc ReparentGroups(GroupReparentGroupsRequest) returns (GroupReparentGroupsResponse); - // Reparent the provided jobs to the provided group - rpc ReparentJobs(GroupReparentJobsRequest) returns (GroupReparentJobsResponse); + // Reparent the provided jobs to the provided group + rpc ReparentJobs(GroupReparentJobsRequest) returns (GroupReparentJobsResponse); - // Set the Default Job Max Core values to all in the provided group - rpc SetDefaultJobMaxCores(GroupSetDefJobMaxCoresRequest) returns (GroupSetDefJobMaxCoresResponse); + // Set the Default Job Max Core values to all in the provided group + rpc SetDefaultJobMaxCores(GroupSetDefJobMaxCoresRequest) returns (GroupSetDefJobMaxCoresResponse); - // Set the Default Job Min Core values to all in the provided group - rpc SetDefaultJobMinCores(GroupSetDefJobMinCoresRequest) returns (GroupSetDefJobMinCoresResponse); + // Set the Default Job Min Core values to all in the provided group + rpc SetDefaultJobMinCores(GroupSetDefJobMinCoresRequest) returns (GroupSetDefJobMinCoresResponse); - // Set the Default Job Max Gpu values to all in the provided group - rpc SetDefaultJobMaxGpus(GroupSetDefJobMaxGpusRequest) returns (GroupSetDefJobMaxGpusResponse); + // Set the Default Job Max Gpu values to all in the provided group + rpc SetDefaultJobMaxGpus(GroupSetDefJobMaxGpusRequest) returns (GroupSetDefJobMaxGpusResponse); - // Set the Default Job Min Gpu values to all in the provided group - rpc SetDefaultJobMinGpus(GroupSetDefJobMinGpusRequest) returns (GroupSetDefJobMinGpusResponse); + // Set the Default Job Min Gpu values to all in the provided group + rpc SetDefaultJobMinGpus(GroupSetDefJobMinGpusRequest) returns (GroupSetDefJobMinGpusResponse); - // Set the Default Job Priority values to all in the provided group - rpc SetDefaultJobPriority(GroupSetDefJobPriorityRequest) returns (GroupSetDefJobPriorityResponse); + // Set the Default Job Priority values to all in the provided group + rpc SetDefaultJobPriority(GroupSetDefJobPriorityRequest) returns (GroupSetDefJobPriorityResponse); - // Set the department associated with the provided group - rpc SetDepartment(GroupSetDeptRequest) returns (GroupSetDeptResponse); + // Set the department associated with the provided group + rpc SetDepartment(GroupSetDeptRequest) returns (GroupSetDeptResponse); - // Set the provided groups parent group - rpc SetGroup(GroupSetGroupRequest) returns (GroupSetGroupResponse); + // Set the provided groups parent group + rpc SetGroup(GroupSetGroupRequest) returns (GroupSetGroupResponse); - // Set the group's Max Cores value - rpc SetMaxCores(GroupSetMaxCoresRequest) returns (GroupSetMaxCoresResponse); + // Set the group's Max Cores value + rpc SetMaxCores(GroupSetMaxCoresRequest) returns (GroupSetMaxCoresResponse); - // Set the groups Min Cores values - rpc SetMinCores(GroupSetMinCoresRequest) returns (GroupSetMinCoresResponse); + // Set the groups Min Cores values + rpc SetMinCores(GroupSetMinCoresRequest) returns (GroupSetMinCoresResponse); - // Set the group's Max Gpu value - rpc SetMaxGpus(GroupSetMaxGpusRequest) returns (GroupSetMaxGpusResponse); + // Set the group's Max Gpu value + rpc SetMaxGpus(GroupSetMaxGpusRequest) returns (GroupSetMaxGpusResponse); - // Set the groups Min Gpu values - rpc SetMinGpus(GroupSetMinGpusRequest) returns (GroupSetMinGpusResponse); + // Set the groups Min Gpu values + rpc SetMinGpus(GroupSetMinGpusRequest) returns (GroupSetMinGpusResponse); - // Set the groups name - rpc SetName(GroupSetNameRequest) returns (GroupSetNameResponse); + // Set the groups name + rpc SetName(GroupSetNameRequest) returns (GroupSetNameResponse); } service JobInterface { - // Add a comment on this job - rpc AddComment(JobAddCommentRequest) returns (JobAddCommentResponse); + // Add a comment on this job + rpc AddComment(JobAddCommentRequest) returns (JobAddCommentResponse); - // Add a render partition to the local host. This partition will - // run frames on the specified job. - rpc AddRenderPartition(JobAddRenderPartRequest) returns (JobAddRenderPartResponse); + // Add a render partition to the local host. This partition will + // run frames on the specified job. + rpc AddRenderPartition(JobAddRenderPartRequest) returns (JobAddRenderPartResponse); - // Adds a subscriber to a job. When the job is finished, subscriber - // receives email with notification - rpc AddSubscriber(JobAddSubscriberRequest) returns (JobAddSubscriberResponse); + // Adds a subscriber to a job. When the job is finished, subscriber + // receives email with notification + rpc AddSubscriber(JobAddSubscriberRequest) returns (JobAddSubscriberResponse); - // Setup and retunrn a JobOnFrame dependency - rpc CreateDependencyOnFrame(JobCreateDependencyOnFrameRequest) returns (JobCreateDependencyOnFrameResponse); + // Setup and retunrn a JobOnFrame dependency + rpc CreateDependencyOnFrame(JobCreateDependencyOnFrameRequest) returns (JobCreateDependencyOnFrameResponse); - // Setup and return a JobOnJob dependency - rpc CreateDependencyOnJob(JobCreateDependencyOnJobRequest) returns (JobCreateDependencyOnJobResponse); + // Setup and return a JobOnJob dependency + rpc CreateDependencyOnJob(JobCreateDependencyOnJobRequest) returns (JobCreateDependencyOnJobResponse); - // Setup and retunrn a JobOnLayer dependency - rpc CreateDependencyOnLayer(JobCreateDependencyOnLayerRequest) returns (JobCreateDependencyOnLayerResponse); + // Setup and retunrn a JobOnLayer dependency + rpc CreateDependencyOnLayer(JobCreateDependencyOnLayerRequest) returns (JobCreateDependencyOnLayerResponse); - // Drops all external dependencies for the job. This means that - // the internal depend structure will be maintained, but everything - // that depends on another job will be dropped. - rpc DropDepends(JobDropDependsRequest) returns (JobDropDependsResponse); + // Drops all external dependencies for the job. This means that + // the internal depend structure will be maintained, but everything + // that depends on another job will be dropped. + rpc DropDepends(JobDropDependsRequest) returns (JobDropDependsResponse); - // Eats all frames that match the FrameSearchCriteria - rpc EatFrames(JobEatFramesRequest) returns (JobEatFramesResponse); + // Eats all frames that match the FrameSearchCriteria + rpc EatFrames(JobEatFramesRequest) returns (JobEatFramesResponse); - // Finds a pending job using the job name - rpc FindJob(JobFindJobRequest) returns (JobFindJobResponse); + // Finds a pending job using the job name + rpc FindJob(JobFindJobRequest) returns (JobFindJobResponse); - // Get the comments for this job - rpc GetComments(JobGetCommentsRequest) returns (JobGetCommentsResponse); + // Get the comments for this job + rpc GetComments(JobGetCommentsRequest) returns (JobGetCommentsResponse); - // Get the job details - rpc GetCurrent(JobGetCurrentRequest) returns (JobGetCurrentResponse); + // Get the job details + rpc GetCurrent(JobGetCurrentRequest) returns (JobGetCurrentResponse); - // Returns a list of all dependencies that this job is involved with - rpc GetDepends(JobGetDependsRequest) returns (JobGetDependsResponse); + // Returns a list of all dependencies that this job is involved with + rpc GetDepends(JobGetDependsRequest) returns (JobGetDependsResponse); - // Returns all frame objects that match FrameSearchCriteria - rpc GetFrames(JobGetFramesRequest) returns (JobGetFramesResponse); + // Returns all frame objects that match FrameSearchCriteria + rpc GetFrames(JobGetFramesRequest) returns (JobGetFramesResponse); - // Finds a pending job using the job name - rpc GetJob(JobGetJobRequest) returns (JobGetJobResponse); + // Finds a pending job using the job name + rpc GetJob(JobGetJobRequest) returns (JobGetJobResponse); - // Returns a list of jobs based on specified criteria - rpc GetJobs(JobGetJobsRequest) returns (JobGetJobsResponse); + // Returns a list of jobs based on specified criteria + rpc GetJobs(JobGetJobsRequest) returns (JobGetJobsResponse); - // Returns a sequence of job names using search criteria - rpc GetJobNames(JobGetJobNamesRequest) returns (JobGetJobNamesResponse); + // Returns a sequence of job names using search criteria + rpc GetJobNames(JobGetJobNamesRequest) returns (JobGetJobNamesResponse); - // Returns all layer objects - rpc GetLayers(JobGetLayersRequest) returns (JobGetLayersResponse); + // Returns all layer objects + rpc GetLayers(JobGetLayersRequest) returns (JobGetLayersResponse); - // Returns a UpdatedFrameCheckResult which contains - // updated state information for frames that have changed since the - // last update time as well as the current state of the job. - // - // If the user is filtering by layer, passing an array of layer - // proxies will limit the updates to specific layers. - // - // At most, your going to get 1 update per running frame every minute - // due to memory usage. - rpc GetUpdatedFrames(JobGetUpdatedFramesRequest) returns (JobGetUpdatedFramesResponse); + // Returns a UpdatedFrameCheckResult which contains + // updated state information for frames that have changed since the + // last update time as well as the current state of the job. + // + // If the user is filtering by layer, passing an array of layer + // proxies will limit the updates to specific layers. + // + // At most, your going to get 1 update per running frame every minute + // due to memory usage. + rpc GetUpdatedFrames(JobGetUpdatedFramesRequest) returns (JobGetUpdatedFramesResponse); - // Returns a list of dependencies setup to depend on - // this job. This includes all types of depends, not just - // OnJob dependencies. This will not return any frame on frame - // dependencies that are part of a FrameByFrame depend. It will - // return a single element that represents the entire dependency. - rpc GetWhatDependsOnThis(JobGetWhatDependsOnThisRequest) returns (JobGetWhatDependsOnThisResponse); + // Returns a list of dependencies setup to depend on + // this job. This includes all types of depends, not just + // OnJob dependencies. This will not return any frame on frame + // dependencies that are part of a FrameByFrame depend. It will + // return a single element that represents the entire dependency. + rpc GetWhatDependsOnThis(JobGetWhatDependsOnThisRequest) returns (JobGetWhatDependsOnThisResponse); - // Returns a list of dependencies that this frame depends on. - rpc GetWhatThisDependsOn(JobGetWhatThisDependsOnRequest) returns (JobGetWhatThisDependsOnResponse); + // Returns a list of dependencies that this frame depends on. + rpc GetWhatThisDependsOn(JobGetWhatThisDependsOnRequest) returns (JobGetWhatThisDependsOnResponse); - // Returns true if the job is in the pending state the cue. - rpc IsJobPending(JobIsJobPendingRequest) returns (JobIsJobPendingResponse); + // Returns true if the job is in the pending state the cue. + rpc IsJobPending(JobIsJobPendingRequest) returns (JobIsJobPendingResponse); - // Kill the job. This puts the job into the Finished State - // All running frames are killed, all depends satisfied. - rpc Kill(JobKillRequest) returns (JobKillResponse); + // Kill the job. This puts the job into the Finished State + // All running frames are killed, all depends satisfied. + rpc Kill(JobKillRequest) returns (JobKillResponse); - // Kills all frames that match the FrameSearchCriteria - rpc KillFrames(JobKillFramesRequest) returns (JobKillFramesResponse); + // Kills all frames that match the FrameSearchCriteria + rpc KillFrames(JobKillFramesRequest) returns (JobKillFramesResponse); - // Launches a job spec and returns an array of launched jobs. Waits for jobs to be committed to DB. This might time - // out before jobs are launched. - rpc LaunchSpecAndWait(JobLaunchSpecAndWaitRequest) returns (JobLaunchSpecAndWaitResponse); + // Launches a job spec and returns an array of launched jobs. Waits for jobs to be committed to DB. This might time + // out before jobs are launched. + rpc LaunchSpecAndWait(JobLaunchSpecAndWaitRequest) returns (JobLaunchSpecAndWaitResponse); - // Launches as a job spec and returns an array of job names that are being launched. This method returns immediately - // after basic checks. The job could fail to launch of a DB error occurs but that is rare. - rpc LaunchSpec(JobLaunchSpecRequest) returns (JobLaunchSpecResponse); + // Launches as a job spec and returns an array of job names that are being launched. This method returns immediately + // after basic checks. The job could fail to launch of a DB error occurs but that is rare. + rpc LaunchSpec(JobLaunchSpecRequest) returns (JobLaunchSpecResponse); - // Updates the matching frames from the Depend state to the waiting state - rpc MarkAsWaiting(JobMarkAsWaitingRequest) returns (JobMarkAsWaitingResponse); + // Updates the matching frames from the Depend state to the waiting state + rpc MarkAsWaiting(JobMarkAsWaitingRequest) returns (JobMarkAsWaitingResponse); - // Drops any dependency that requires any frame that matches the FrameSearchCriteria - rpc MarkDoneFrames(JobMarkDoneFramesRequest) returns (JobMarkDoneFramesResponse); + // Drops any dependency that requires any frame that matches the FrameSearchCriteria + rpc MarkDoneFrames(JobMarkDoneFramesRequest) returns (JobMarkDoneFramesResponse); - // Pauses the job, which means it no longer gets procs - rpc Pause(JobPauseRequest) returns (JobPauseResponse); + // Pauses the job, which means it no longer gets procs + rpc Pause(JobPauseRequest) returns (JobPauseResponse); - // Reorders the specified frame range on this job - rpc ReorderFrames(JobReorderFramesRequest) returns (JobReorderFramesResponse); + // Reorders the specified frame range on this job + rpc ReorderFrames(JobReorderFramesRequest) returns (JobReorderFramesResponse); - // Resumes a paused job - rpc Resume(JobResumeRequest) returns (JobResumeResponse); + // Resumes a paused job + rpc Resume(JobResumeRequest) returns (JobResumeResponse); - // Retries all frames that match the FrameSearchCriteria - rpc RetryFrames(JobRetryFramesRequest) returns (JobRetryFramesResponse); + // Retries all frames that match the FrameSearchCriteria + rpc RetryFrames(JobRetryFramesRequest) returns (JobRetryFramesResponse); - // Rerun filters for this job - rpc RunFilters(JobRunFiltersRequest) returns (JobRunFiltersResponse); + // Rerun filters for this job + rpc RunFilters(JobRunFiltersRequest) returns (JobRunFiltersResponse); - // If set to true, a frame that would have turned dead, will become eaten - rpc SetAutoEat(JobSetAutoEatRequest) returns (JobSetAutoEatResponse); + // If set to true, a frame that would have turned dead, will become eaten + rpc SetAutoEat(JobSetAutoEatRequest) returns (JobSetAutoEatResponse); - // Move the job into the specified group - rpc SetGroup(JobSetGroupRequest) returns (JobSetGroupResponse); + // Move the job into the specified group + rpc SetGroup(JobSetGroupRequest) returns (JobSetGroupResponse); - // Sets the maximum number of procs that can run on this job - rpc SetMaxCores(JobSetMaxCoresRequest) returns (JobSetMaxCoresResponse); + // Sets the maximum number of procs that can run on this job + rpc SetMaxCores(JobSetMaxCoresRequest) returns (JobSetMaxCoresResponse); - // Sets the default maximum number of frame retries for the job. One - // a frame has retried this many times it will automatically go - // to the dead state. The default upper limit on this is 16 retries. - rpc SetMaxRetries(JobSetMaxRetriesRequest) returns (JobSetMaxRetriesResponse); + // Sets the default maximum number of frame retries for the job. One + // a frame has retried this many times it will automatically go + // to the dead state. The default upper limit on this is 16 retries. + rpc SetMaxRetries(JobSetMaxRetriesRequest) returns (JobSetMaxRetriesResponse); - // Sets the minimum number of procs that can run on this job - rpc SetMinCores(JobSetMinCoresRequest) returns (JobSetMinCoresResponse); + // Sets the minimum number of procs that can run on this job + rpc SetMinCores(JobSetMinCoresRequest) returns (JobSetMinCoresResponse); - // Sets the maximum number of Gpu that can run on this job - rpc SetMaxGpus(JobSetMaxGpusRequest) returns (JobSetMaxGpusResponse); + // Sets the maximum number of Gpu that can run on this job + rpc SetMaxGpus(JobSetMaxGpusRequest) returns (JobSetMaxGpusResponse); - // Sets the minimum number of Gpu that can run on this job - rpc SetMinGpus(JobSetMinGpusRequest) returns (JobSetMinGpusResponse); + // Sets the minimum number of Gpu that can run on this job + rpc SetMinGpus(JobSetMinGpusRequest) returns (JobSetMinGpusResponse); - // Sets the job priority - rpc SetPriority(JobSetPriorityRequest) returns (JobSetPriorityResponse); + // Sets the job priority + rpc SetPriority(JobSetPriorityRequest) returns (JobSetPriorityResponse); - // Shutdown the job if it is completed. This is a workaround for when - // Cuebot failed to shutdown a job due to database access error. - rpc ShutdownIfCompleted(JobShutdownIfCompletedRequest) returns (JobShutdownIfCompletedResponse); + // Shutdown the job if it is completed. This is a workaround for when + // Cuebot failed to shutdown a job due to database access error. + rpc ShutdownIfCompleted(JobShutdownIfCompletedRequest) returns (JobShutdownIfCompletedResponse); - // Staggers the specified frame range - rpc StaggerFrames(JobStaggerFramesRequest) returns (JobStaggerFramesResponse); + // Staggers the specified frame range + rpc StaggerFrames(JobStaggerFramesRequest) returns (JobStaggerFramesResponse); } service LayerInterface { - // Add Limit to the Layer - rpc AddLimit(LayerAddLimitRequest) returns (LayerAddLimitResponse); + // Add Limit to the Layer + rpc AddLimit(LayerAddLimitRequest) returns (LayerAddLimitResponse); - // Add RenderPartition to Layer - rpc AddRenderPartition(LayerAddRenderPartitionRequest) returns (LayerAddRenderPartitionResponse); + // Add RenderPartition to Layer + rpc AddRenderPartition(LayerAddRenderPartitionRequest) returns (LayerAddRenderPartitionResponse); - // Setup and return a LayerOnFrame dependency - rpc CreateDependencyOnFrame(LayerCreateDependOnFrameRequest) returns (LayerCreateDependOnFrameResponse); + // Setup and return a LayerOnFrame dependency + rpc CreateDependencyOnFrame(LayerCreateDependOnFrameRequest) returns (LayerCreateDependOnFrameResponse); - // Setup and return a LayerOnJob dependency - rpc CreateDependencyOnJob(LayerCreateDependOnJobRequest) returns (LayerCreateDependOnJobResponse); + // Setup and return a LayerOnJob dependency + rpc CreateDependencyOnJob(LayerCreateDependOnJobRequest) returns (LayerCreateDependOnJobResponse); - // Setup and return a LayerOnLayer dependency - rpc CreateDependencyOnLayer(LayerCreateDependOnLayerRequest) returns (LayerCreateDependOnLayerResponse); + // Setup and return a LayerOnLayer dependency + rpc CreateDependencyOnLayer(LayerCreateDependOnLayerRequest) returns (LayerCreateDependOnLayerResponse); - // Setup and return a FrameByFrame dependency - rpc CreateFrameByFrameDependency(LayerCreateFrameByFrameDependRequest) returns (LayerCreateFrameByFrameDependResponse); + // Setup and return a FrameByFrame dependency + rpc CreateFrameByFrameDependency(LayerCreateFrameByFrameDependRequest) returns (LayerCreateFrameByFrameDependResponse); - // Drops every dependency that is causing this layer not to run. - rpc DropDepends(LayerDropDependsRequest) returns (LayerDropDependsResponse); + // Drops every dependency that is causing this layer not to run. + rpc DropDepends(LayerDropDependsRequest) returns (LayerDropDependsResponse); - // Drop the limit from the specified layer. - rpc DropLimit(LayerDropLimitRequest) returns (LayerDropLimitResponse); + // Drop the limit from the specified layer. + rpc DropLimit(LayerDropLimitRequest) returns (LayerDropLimitResponse); - // Eat the Frames of this Layer - rpc EatFrames(LayerEatFramesRequest) returns (LayerEatFramesResponse); + // Eat the Frames of this Layer + rpc EatFrames(LayerEatFramesRequest) returns (LayerEatFramesResponse); - // When disabled, This will stop Cuebot from lowering the amount of memory required for a given layer. - rpc EnableMemoryOptimizer(LayerEnableMemoryOptimizerRequest) returns (LayerEnableMemoryOptimizerResponse); + // When disabled, This will stop Cuebot from lowering the amount of memory required for a given layer. + rpc EnableMemoryOptimizer(LayerEnableMemoryOptimizerRequest) returns (LayerEnableMemoryOptimizerResponse); - // Finds a layer in a pending job based the job and layer name - rpc FindLayer(LayerFindLayerRequest) returns (LayerFindLayerResponse); + // Finds a layer in a pending job based the job and layer name + rpc FindLayer(LayerFindLayerRequest) returns (LayerFindLayerResponse); - // Get the frames that match the FrameSearchCriteria - rpc GetFrames(LayerGetFramesRequest) returns (LayerGetFramesResponse); + // Get the frames that match the FrameSearchCriteria + rpc GetFrames(LayerGetFramesRequest) returns (LayerGetFramesResponse); - // Finds a layer in a pending job from its unique ID - rpc GetLayer(LayerGetLayerRequest) returns (LayerGetLayerResponse); + // Finds a layer in a pending job from its unique ID + rpc GetLayer(LayerGetLayerRequest) returns (LayerGetLayerResponse); - // Return a list of the limits on the specified layer. - rpc GetLimits(LayerGetLimitsRequest) returns (LayerGetLimitsResponse); + // Return a list of the limits on the specified layer. + rpc GetLimits(LayerGetLimitsRequest) returns (LayerGetLimitsResponse); - // Return a list of all registered output paths. - rpc GetOutputPaths(LayerGetOutputPathsRequest) returns (LayerGetOutputPathsResponse); + // Return a list of all registered output paths. + rpc GetOutputPaths(LayerGetOutputPathsRequest) returns (LayerGetOutputPathsResponse); - // Returns a list of dependencies that this frame depends on. - rpc GetWhatThisDependsOn(LayerGetWhatThisDependsOnRequest) returns (LayerGetWhatThisDependsOnResponse); + // Returns a list of dependencies that this frame depends on. + rpc GetWhatThisDependsOn(LayerGetWhatThisDependsOnRequest) returns (LayerGetWhatThisDependsOnResponse); - // Returns a list of dependencies setup to depend on this layer. This includes all types of depends, not just - // OnLayer dependencies. This will not return any frame on frame dependencies that are part of a FrameByFrame - // depend. It will return a single element that represents the entire dependency. - rpc GetWhatDependsOnThis(LayerGetWhatDependsOnThisRequest) returns (LayerGetWhatDependsOnThisResponse); + // Returns a list of dependencies setup to depend on this layer. This includes all types of depends, not just + // OnLayer dependencies. This will not return any frame on frame dependencies that are part of a FrameByFrame + // depend. It will return a single element that represents the entire dependency. + rpc GetWhatDependsOnThis(LayerGetWhatDependsOnThisRequest) returns (LayerGetWhatDependsOnThisResponse); - // Kill Frames associated with this layer - rpc KillFrames(LayerKillFramesRequest) returns (LayerKillFramesResponse); + // Kill Frames associated with this layer + rpc KillFrames(LayerKillFramesRequest) returns (LayerKillFramesResponse); - // Mark the frames of this layer done - rpc MarkdoneFrames(LayerMarkdoneFramesRequest) returns (LayerMarkdoneFramesResponse); + // Mark the frames of this layer done + rpc MarkdoneFrames(LayerMarkdoneFramesRequest) returns (LayerMarkdoneFramesResponse); - // Register an output with the given layer. The output paths are sent in the opencue email. - rpc RegisterOutputPath(LayerRegisterOutputPathRequest) returns (LayerRegisterOutputPathResponse); + // Register an output with the given layer. The output paths are sent in the opencue email. + rpc RegisterOutputPath(LayerRegisterOutputPathRequest) returns (LayerRegisterOutputPathResponse); - //Reorders the specified frame range on this job. - rpc ReorderFrames(LayerReorderFramesRequest) returns (LayerReorderFramesResponse); + //Reorders the specified frame range on this job. + rpc ReorderFrames(LayerReorderFramesRequest) returns (LayerReorderFramesResponse); - // Retry the Frames of this Layer - rpc RetryFrames(LayerRetryFramesRequest) returns (LayerRetryFramesResponse); + // Retry the Frames of this Layer + rpc RetryFrames(LayerRetryFramesRequest) returns (LayerRetryFramesResponse); - // The maximum number of cores to run on a given frame within this layer. Fractional core values are not allowed - // with this setting. - rpc SetMaxCores(LayerSetMaxCoresRequest) returns (LayerSetMaxCoresResponse); + // The maximum number of cores to run on a given frame within this layer. Fractional core values are not allowed + // with this setting. + rpc SetMaxCores(LayerSetMaxCoresRequest) returns (LayerSetMaxCoresResponse); - // Set the Min Cores for this layer - rpc SetMinCores(LayerSetMinCoresRequest) returns (LayerSetMinCoresResponse); + // Set the Min Cores for this layer + rpc SetMinCores(LayerSetMinCoresRequest) returns (LayerSetMinCoresResponse); - // The maximum number of Gpu to run on a given frame within this layer. - rpc SetMaxGpus(LayerSetMaxGpusRequest) returns (LayerSetMaxGpusResponse); + // The maximum number of Gpu to run on a given frame within this layer. + rpc SetMaxGpus(LayerSetMaxGpusRequest) returns (LayerSetMaxGpusResponse); - // Set the Min Gpus for this layer - rpc SetMinGpus(LayerSetMinGpusRequest) returns (LayerSetMinGpusResponse); + // Set the Min Gpus for this layer + rpc SetMinGpus(LayerSetMinGpusRequest) returns (LayerSetMinGpusResponse); - // Set the Min gpu memory value for the layer - rpc SetMinGpuMemory(LayerSetMinGpuMemoryRequest) returns (LayerSetMinGpuMemoryResponse); + // Set the Min gpu memory value for the layer + rpc SetMinGpuMemory(LayerSetMinGpuMemoryRequest) returns (LayerSetMinGpuMemoryResponse); - // [Deprecated] Set the Min gpu memory value for the layer - rpc SetMinGpu(LayerSetMinGpuRequest) returns (LayerSetMinGpuResponse); + // [Deprecated] Set the Min gpu memory value for the layer + rpc SetMinGpu(LayerSetMinGpuRequest) returns (LayerSetMinGpuResponse); - // Set the Min Memory value for the layer - rpc SetMinMemory(LayerSetMinMemoryRequest) returns (LayerSetMinMemoryResponse); + // Set the Min Memory value for the layer + rpc SetMinMemory(LayerSetMinMemoryRequest) returns (LayerSetMinMemoryResponse); - // Set tags on the layer - rpc SetTags(LayerSetTagsRequest) returns (LayerSetTagsResponse); + // Set tags on the layer + rpc SetTags(LayerSetTagsRequest) returns (LayerSetTagsResponse); - // Set whether the layer is threadable or not - rpc SetThreadable(LayerSetThreadableRequest) returns (LayerSetThreadableResponse); + // Set whether the layer is threadable or not + rpc SetThreadable(LayerSetThreadableRequest) returns (LayerSetThreadableResponse); - // Set whether the timeout for frames in the layer - rpc SetTimeout(LayerSetTimeoutRequest) returns (LayerSetTimeoutResponse); + // Set whether the timeout for frames in the layer + rpc SetTimeout(LayerSetTimeoutRequest) returns (LayerSetTimeoutResponse); - // Set whether the LLU timeout for frames in the layer - rpc SetTimeoutLLU(LayerSetTimeoutLLURequest) returns (LayerSetTimeoutLLUResponse); + // Set whether the LLU timeout for frames in the layer + rpc SetTimeoutLLU(LayerSetTimeoutLLURequest) returns (LayerSetTimeoutLLUResponse); - // Set the number of slots required per frame for this layer - rpc SetSlotsRequired(LayerSetSlotsRequiredRequest) returns (LayerSetSlotsRequiredResponse); + // Set the number of slots required per frame for this layer + rpc SetSlotsRequired(LayerSetSlotsRequiredRequest) returns (LayerSetSlotsRequiredResponse); - // Staggers the specified frame range. - rpc StaggerFrames(LayerStaggerFramesRequest) returns (LayerStaggerFramesResponse); + // Staggers the specified frame range. + rpc StaggerFrames(LayerStaggerFramesRequest) returns (LayerStaggerFramesResponse); } + // -------- Enums --------] // Define the possible checkpoint states for a frame. enum CheckpointState { - DISABLED = 0; - ENABLED = 1; - COPYING = 2; - COMPLETE = 3; + DISABLED = 0; + ENABLED = 1; + COPYING = 2; + COMPLETE = 3; } // These frame exit status values are used to trigger special dispatcher behavior. They are greater than 255 // so they don't collide with any real exit status values used by applications running on the cue. enum FrameExitStatus { - // The frame was a success - SUCCESS = 0; - // The frame should be automatically retried - FAILED_LAUNCH = 256; - // Retries should not be incremented - SKIP_RETRY = 286; + // The frame was a success + SUCCESS = 0; + // The frame should be automatically retried + FAILED_LAUNCH = 256; + // Retries should not be incremented + SKIP_RETRY = 286; } // Defines the possible states of a frame. enum FrameState { - // Ok to be dispatched - WAITING = 0; - // Reserved to be dispatched - SETUP = 1; - // Running on a render proc - RUNNING = 2; - // Frame completed successfully - SUCCEEDED = 3; - // Frame is waiting on a dependency - DEPEND = 4; - // Frame is dead,which means it has died N times - DEAD = 5; - // Frame is eaten, acts like the frame has succeeded - EATEN = 6; - // Frame is checkpointing - CHECKPOINT = 7; + // Ok to be dispatched + WAITING = 0; + // Reserved to be dispatched + SETUP = 1; + // Running on a render proc + RUNNING = 2; + // Frame completed successfully + SUCCEEDED = 3; + // Frame is waiting on a dependency + DEPEND = 4; + // Frame is dead,which means it has died N times + DEAD = 5; + // Frame is eaten, acts like the frame has succeeded + EATEN = 6; + // Frame is checkpointing + CHECKPOINT = 7; } // Defines the possible states of a job. enum JobState { - // Job is running - PENDING = 0; - // The job has completed - FINISHED = 1; - // The job is in the process of starting up - STARTUP = 2; - // The job is in the process of shutting down - SHUTDOWN = 3; - // The job is a post job and is waiting to go pending - POSTED = 4; + // Job is running + PENDING = 0; + // The job has completed + FINISHED = 1; + // The job is in the process of starting up + STARTUP = 2; + // The job is in the process of shutting down + SHUTDOWN = 3; + // The job is a post job and is waiting to go pending + POSTED = 4; } // The LayerType determines the type of the layer. A proc will not run @@ -458,663 +462,668 @@ enum JobState { // There is no specific dispatch order for layer types. You will need // to setup dependencies. enum LayerType { - PRE = 0; - POST = 1; - RENDER = 2; - UTIL = 3; + PRE = 0; + POST = 1; + RENDER = 2; + UTIL = 3; } //Used for reordering frames. enum Order { - // Moves frames to the lowest dispatch order - FIRST = 0; - // Moves frames to the last dispatch order - LAST = 1; - // Reverses the dispatch order - REVERSE = 2; + // Moves frames to the lowest dispatch order + FIRST = 0; + // Moves frames to the last dispatch order + LAST = 1; + // Reverses the dispatch order + REVERSE = 2; } + // -------- Primary Message Types --------] // Object to contain an override of the frame's display settings message FrameStateDisplayOverride { - FrameState state = 1; // the FrameState to override - string text = 2; // the text to use + FrameState state = 1; // the FrameState to override + string text = 2; // the text to use - message RGB { - int32 red = 1; - int32 green = 2; - int32 blue = 3; - } - RGB color = 3; // RGB color to use e.g. 255,0,0 + message RGB { + int32 red = 1; + int32 green = 2; + int32 blue = 3; + } + RGB color = 3; // RGB color to use e.g. 255,0,0 } message FrameStateDisplayOverrideSeq { - repeated FrameStateDisplayOverride overrides = 1; + repeated FrameStateDisplayOverride overrides = 1; } // FRAME ---- message Frame { - string id = 1; - string name = 2; - string layer_name = 3; - int32 number = 4; - FrameState state = 5; - int32 retry_count = 6; - int32 exit_status = 7; - int32 dispatch_order = 8; - int32 start_time = 9; - int32 stop_time = 10; - int64 max_rss = 11; - int64 used_memory = 12; - int64 reserved_memory = 13; - int64 reserved_gpu_memory = 14; - string last_resource = 15; - CheckpointState checkpoint_state = 16; - int32 checkpoint_count = 17; - int32 total_core_time = 18; - int32 llu_time = 19; - int32 total_gpu_time = 20; - int64 max_gpu_memory = 21; - int64 used_gpu_memory = 22; - FrameStateDisplayOverride frame_state_display_override = 23; + string id = 1; + string name = 2; + string layer_name = 3; + int32 number = 4; + FrameState state = 5; + int32 retry_count = 6; + int32 exit_status = 7; + int32 dispatch_order = 8; + int32 start_time = 9; + int32 stop_time = 10; + int64 max_rss = 11; + int64 used_memory = 12; + int64 reserved_memory = 13; + int64 reserved_gpu_memory = 14; + string last_resource = 15; + CheckpointState checkpoint_state = 16; + int32 checkpoint_count = 17; + int32 total_core_time = 18; + int32 llu_time = 19; + int32 total_gpu_time = 20; + int64 max_gpu_memory = 21; + int64 used_gpu_memory = 22; + FrameStateDisplayOverride frame_state_display_override = 23; } // Object for frame searching message FrameSearchCriteria { - repeated string ids = 1; - repeated string frames = 2; - repeated string layers = 3; - FrameStateSeq states = 4; - string frame_range = 5; - string memory_range = 6; - string duration_range = 7; - int32 page = 8; - int32 limit = 9; - int32 change_date = 10; - int32 max_results = 11; - int32 offset = 12; - bool include_finished = 13; + repeated string ids = 1; + repeated string frames = 2; + repeated string layers = 3; + FrameStateSeq states = 4; + string frame_range = 5; + string memory_range = 6; + string duration_range = 7; + int32 page = 8; + int32 limit = 9; + int32 change_date = 10; + int32 max_results = 11; + int32 offset = 12; + bool include_finished = 13; } // A sequence of Frames message FrameSeq { - repeated Frame frames = 1; + repeated Frame frames = 1; } // A sequence of FrameStates message FrameStateSeq { - repeated FrameState frame_states = 1; + repeated FrameState frame_states = 1; } + // A struct containing properties for all the elements of a frame that // can change except for the ID which is there for indexing purposes. message UpdatedFrame { - string id = 1; - FrameState state = 2; - int32 retry_count = 3; - int32 exit_status = 4; - int32 start_time = 5; - int32 stop_time = 6; - int64 max_rss = 7; - int64 used_memory = 8; - string last_resource = 9; - int32 llu_time = 10; - int64 max_gpu_memory = 11; - int64 used_gpu_memory = 12; - FrameStateDisplayOverride frame_state_display_override = 13; + string id = 1; + FrameState state = 2; + int32 retry_count = 3; + int32 exit_status = 4; + int32 start_time = 5; + int32 stop_time = 6; + int64 max_rss = 7; + int64 used_memory = 8; + string last_resource = 9; + int32 llu_time = 10; + int64 max_gpu_memory = 11; + int64 used_gpu_memory = 12; + FrameStateDisplayOverride frame_state_display_override = 13; } message UpdatedFrameSeq { - repeated UpdatedFrame updated_frames = 1; + repeated UpdatedFrame updated_frames = 1; } + // The result of an updated frame check. The job state is included // so tools that are just monitoring frames can stop monitoring them // once the job state changes to Finished. message UpdatedFrameCheckResult { - JobState state = 1; - int32 server_time = 2; - UpdatedFrameSeq updated_frames = 3; + JobState state = 1; + int32 server_time = 2; + UpdatedFrameSeq updated_frames = 3; } // GROUP ---- message Group { - string id = 1; - string name = 2; - string department = 3; - int32 default_job_priority = 4; - float default_job_min_cores = 5; - float default_job_max_cores = 6; - float min_cores = 7; - float max_cores = 8; - int32 level = 9; - string parent_id = 10; - GroupStats group_stats = 11; - float default_job_min_gpus = 12; - float default_job_max_gpus = 13; - float min_gpus = 14; - float max_gpus = 15; + string id = 1; + string name = 2; + string department = 3; + int32 default_job_priority = 4; + float default_job_min_cores = 5; + float default_job_max_cores = 6; + float min_cores = 7; + float max_cores = 8; + int32 level = 9; + string parent_id = 10; + GroupStats group_stats = 11; + float default_job_min_gpus = 12; + float default_job_max_gpus = 13; + float min_gpus = 14; + float max_gpus = 15; } message GroupSeq { - repeated Group groups = 1; + repeated Group groups = 1; } message GroupStats { - int32 running_frames = 1; - int32 dead_frames = 2; - int32 depend_frames = 3; - int32 waiting_frames = 4; - int32 pending_jobs = 5; - float reserved_cores = 6; - float reserved_gpus = 7; + int32 running_frames = 1; + int32 dead_frames = 2; + int32 depend_frames = 3; + int32 waiting_frames = 4; + int32 pending_jobs = 5; + float reserved_cores = 6; + float reserved_gpus = 7; } // JOB ---- message Job { - string id = 1; - JobState state = 2; - string name = 3; - string shot = 4; - string show = 5; - string user = 6; - string group = 7; - string facility = 8; - string os = 9; - oneof uid_optional { - int32 uid = 10; - } - int32 priority = 11; - float min_cores = 12; - float max_cores = 13; - string log_dir = 14; - bool is_paused = 15; - bool has_comment = 16; - bool auto_eat = 17; - int32 start_time = 18; - int32 stop_time = 19; - JobStats job_stats = 20; - float min_gpus = 21; - float max_gpus = 22; - string loki_url = 23; + string id = 1; + JobState state = 2; + string name = 3; + string shot = 4; + string show = 5; + string user = 6; + string group = 7; + string facility = 8; + string os = 9; + oneof uid_optional { + int32 uid = 10; + } + int32 priority = 11; + float min_cores = 12; + float max_cores = 13; + string log_dir = 14; + bool is_paused = 15; + bool has_comment = 16; + bool auto_eat = 17; + int32 start_time = 18; + int32 stop_time = 19; + JobStats job_stats = 20; + float min_gpus = 21; + float max_gpus = 22; + string loki_url = 23; } // Use to filter the job search. Please note that by searching for non-pending jobs, the output is limited to 200 jobs message JobSearchCriteria { - repeated string ids = 1; - repeated string jobs = 2; - repeated string regex = 3; - repeated string substr = 4; - repeated string users = 5; - repeated string shots = 6; - repeated string shows = 7; - bool include_finished = 8; + repeated string ids = 1; + repeated string jobs = 2; + repeated string regex = 3; + repeated string substr = 4; + repeated string users = 5; + repeated string shots = 6; + repeated string shows = 7; + bool include_finished = 8; } message JobSeq { - repeated Job jobs = 1; + repeated Job jobs = 1; } message JobStats { - int32 total_layers = 1; - int32 total_frames = 2; - int32 waiting_frames = 3; - int32 running_frames = 4; - int32 dead_frames = 5; - int32 eaten_frames = 6; - int32 depend_frames = 7; - int32 succeeded_frames = 8; - int32 pending_frames = 9; - int32 avg_frame_sec = 10; - int32 high_frame_sec = 11; - int32 avg_core_sec = 12; - int64 rendered_frame_count = 13; - int64 failed_frame_count = 14; - int64 remaining_core_sec = 15; - int64 total_core_sec = 16; - int64 rendered_core_sec = 17; - int64 failed_core_sec = 18; - int64 max_rss = 19; - float reserved_cores = 20; - int64 total_gpu_sec = 21; - int64 rendered_gpu_sec = 22; - int64 failed_gpu_sec = 23; - float reserved_gpus = 24; - int64 max_gpu_memory = 25; + int32 total_layers = 1; + int32 total_frames = 2; + int32 waiting_frames = 3; + int32 running_frames = 4; + int32 dead_frames = 5; + int32 eaten_frames = 6; + int32 depend_frames = 7; + int32 succeeded_frames = 8; + int32 pending_frames = 9; + int32 avg_frame_sec = 10; + int32 high_frame_sec = 11; + int32 avg_core_sec = 12; + int64 rendered_frame_count = 13; + int64 failed_frame_count = 14; + int64 remaining_core_sec = 15; + int64 total_core_sec = 16; + int64 rendered_core_sec = 17; + int64 failed_core_sec = 18; + int64 max_rss = 19; + float reserved_cores = 20; + int64 total_gpu_sec = 21; + int64 rendered_gpu_sec = 22; + int64 failed_gpu_sec = 23; + float reserved_gpus = 24; + int64 max_gpu_memory = 25; } // LAYER ---- message Layer { - string id = 1; - string name = 2; - string range = 3; - repeated string tags = 4; - float min_cores = 5; - float max_cores = 6; - bool is_threadable = 7; - int64 min_memory = 8; - int64 min_gpu_memory = 9; - int32 chunk_size = 10; - int32 dispatch_order = 11; - LayerType type = 12; - // An array of services that are being run on all frames within this layer. - repeated string services = 13; - // True if the memory optimizer is enabled. Disabling the optimizer will stop Cuebot from lowering memory. - bool memory_optimizer_enabled = 14; - LayerStats layer_stats = 15; - string parent_id = 16; - repeated string limits = 17; - int32 timeout = 18; - int32 timeout_llu = 19; - float min_gpus = 20; - float max_gpus = 21; - string command = 22; - // Number of slots required per frame (<0 means not slot-based) - int32 slots_required = 23; + string id = 1; + string name = 2; + string range = 3; + repeated string tags = 4; + float min_cores = 5; + float max_cores = 6; + bool is_threadable = 7; + int64 min_memory = 8; + int64 min_gpu_memory = 9; + int32 chunk_size = 10; + int32 dispatch_order = 11; + LayerType type = 12; + // An array of services that are being run on all frames within this layer. + repeated string services = 13; + // True if the memory optimizer is enabled. Disabling the optimizer will stop Cuebot from lowering memory. + bool memory_optimizer_enabled = 14; + LayerStats layer_stats = 15; + string parent_id = 16; + repeated string limits = 17; + int32 timeout = 18; + int32 timeout_llu = 19; + float min_gpus = 20; + float max_gpus = 21; + string command = 22; + // Number of slots required per frame (<0 means not slot-based) + int32 slots_required = 23; } message LayerSeq { - repeated Layer layers = 1; + repeated Layer layers = 1; } message LayerStats { - int32 total_frames = 1; - int32 waiting_frames = 2; - int32 running_frames = 3; - int32 dead_frames = 4; - int32 eaten_frames = 5; - int32 depend_frames = 6; - int32 succeeded_frames = 7; - int32 pending_frames = 8; - int32 avg_frame_sec = 9; - int32 low_frame_sec = 10; - int32 high_frame_sec = 11; - int32 avg_core_sec = 12; - int64 rendered_frame_count = 13; - int64 failed_frame_count = 14; - int64 remaining_core_sec = 15; - int64 total_core_sec = 16; - int64 rendered_core_sec = 17; - int64 failed_core_sec = 18; - int64 max_rss = 19; - float reserved_cores = 20; - int64 total_gpu_sec = 21; - int64 rendered_gpu_sec = 22; - int64 failed_gpu_sec = 23; - float reserved_gpus = 24; - int64 max_gpu_memory = 25; + int32 total_frames = 1; + int32 waiting_frames = 2; + int32 running_frames = 3; + int32 dead_frames = 4; + int32 eaten_frames = 5; + int32 depend_frames = 6; + int32 succeeded_frames = 7; + int32 pending_frames = 8; + int32 avg_frame_sec = 9; + int32 low_frame_sec = 10; + int32 high_frame_sec = 11; + int32 avg_core_sec = 12; + int64 rendered_frame_count = 13; + int64 failed_frame_count = 14; + int64 remaining_core_sec = 15; + int64 total_core_sec = 16; + int64 rendered_core_sec = 17; + int64 failed_core_sec = 18; + int64 max_rss = 19; + float reserved_cores = 20; + int64 total_gpu_sec = 21; + int64 rendered_gpu_sec = 22; + int64 failed_gpu_sec = 23; + float reserved_gpus = 24; + int64 max_gpu_memory = 25; } // NestedGroup --- message NestedGroup { - string id = 1; - string name = 2; - string department = 3; - int32 default_job_priority = 4; - float default_job_min_cores = 5; - float default_job_max_cores = 6; - float min_cores = 7; - float max_cores = 8; - int32 level = 9; - NestedGroup parent = 10; - NestedGroupSeq groups = 11; - repeated string jobs = 12; - GroupStats stats = 13; - float default_job_min_gpus = 14; - float default_job_max_gpus = 15; - float min_gpus = 16; - float max_gpus = 17; + string id = 1; + string name = 2; + string department = 3; + int32 default_job_priority = 4; + float default_job_min_cores = 5; + float default_job_max_cores = 6; + float min_cores = 7; + float max_cores = 8; + int32 level = 9; + NestedGroup parent = 10; + NestedGroupSeq groups = 11; + repeated string jobs = 12; + GroupStats stats = 13; + float default_job_min_gpus = 14; + float default_job_max_gpus = 15; + float min_gpus = 16; + float max_gpus = 17; } message NestedGroupSeq { - repeated NestedGroup nested_groups = 1; + repeated NestedGroup nested_groups = 1; } // NESTED JOB ---- message NestedJob { - string id = 1; - JobState state = 2; - string name = 3; - string shot = 4; - string show = 5; - string user = 6; - string group = 7; - string facility = 8; - string os = 9; - oneof uid_optional { - int32 uid = 10; - } - int32 priority = 11; - float min_cores = 12; - float max_cores = 13; - string log_dir = 14; - bool is_paused = 15; - bool has_comment = 16; - bool auto_eat = 17; - int32 start_time = 18; - int32 stop_time = 19; - NestedGroup parent = 20; - JobStats stats = 21; - float min_gpus = 22; - float max_gpus = 23; + string id = 1; + JobState state = 2; + string name = 3; + string shot = 4; + string show = 5; + string user = 6; + string group = 7; + string facility = 8; + string os = 9; + oneof uid_optional { + int32 uid = 10; + } + int32 priority = 11; + float min_cores = 12; + float max_cores = 13; + string log_dir = 14; + bool is_paused = 15; + bool has_comment = 16; + bool auto_eat = 17; + int32 start_time = 18; + int32 stop_time = 19; + NestedGroup parent = 20; + JobStats stats = 21; + float min_gpus = 22; + float max_gpus = 23; } + // -------- Requests & Responses --------] // FRAME ---- // AddRenderPartition message FrameAddRenderPartitionRequest { - Frame frame = 1; - string host = 2; - int32 threads = 3; - int32 max_cores = 4; - int64 max_memory = 5; - int64 max_gpu_memory = 6; - string username = 7; - int32 max_gpus = 8; + Frame frame = 1; + string host = 2; + int32 threads = 3; + int32 max_cores = 4; + int64 max_memory = 5; + int64 max_gpu_memory = 6; + string username = 7; + int32 max_gpus = 8; } message FrameAddRenderPartitionResponse { - renderPartition.RenderPartition render_partition = 1; + renderPartition.RenderPartition render_partition = 1; } // CreateDependencyOnFrame message FrameCreateDependencyOnFrameRequest { - Frame frame = 1; - Frame depend_on_frame = 2; + Frame frame = 1; + Frame depend_on_frame = 2; } message FrameCreateDependencyOnFrameResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnJob message FrameCreateDependencyOnJobRequest { - Frame frame = 1; - Job job = 2; + Frame frame = 1; + Job job = 2; } message FrameCreateDependencyOnJobResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnLayer message FrameCreateDependencyOnLayerRequest { - Frame frame = 1; - Layer layer = 2; + Frame frame = 1; + Layer layer = 2; } message FrameCreateDependencyOnLayerResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // DropDepends message FrameDropDependsRequest { - Frame frame = 1; - depend.DependTarget target = 2; + Frame frame = 1; + depend.DependTarget target = 2; } message FrameDropDependsResponse {} // Empty // Eat message FrameEatRequest { - Frame frame = 1; + Frame frame = 1; } message FrameEatResponse {} // Empty // FindFrame message FrameFindFrameRequest { - string job = 1; - string layer = 2; - int32 frame = 3; + string job = 1; + string layer = 2; + int32 frame = 3; } message FrameFindFrameResponse { - Frame frame = 1; + Frame frame = 1; } // GetFrame message FrameGetFrameRequest { - string id = 1; + string id = 1; } message FrameGetFrameResponse { - Frame frame = 1; + Frame frame = 1; } // GetFrames message FrameGetFramesRequest { - string job = 1; - job.FrameSearchCriteria r = 2; + string job = 1; + job.FrameSearchCriteria r = 2; } message FrameGetFramesResponse { - FrameSeq frames = 1; + FrameSeq frames = 1; } // GetWhatDependsOnThis message FrameGetWhatDependsOnThisRequest { - Frame frame = 1; + Frame frame = 1; } message FrameGetWhatDependsOnThisResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // GetWhatThisDependsOn message FrameGetWhatThisDependsOnRequest { - Frame frame = 1; + Frame frame = 1; } message FrameGetWhatThisDependsOnResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // Kill message FrameKillRequest { - Frame frame = 1; - string username = 2; - string pid = 3; - string host_kill = 4; - string reason = 5; + Frame frame = 1; + string username = 2; + string pid = 3; + string host_kill = 4; + string reason = 5; } message FrameKillResponse {} // Empty // MarkAsDepend message FrameMarkAsDependRequest { - Frame frame = 1; + Frame frame = 1; } message FrameMarkAsDependResponse {} // Empty // MarkAsWaiting message FrameMarkAsWaitingRequest { - Frame frame = 1; + Frame frame = 1; } message FrameMarkAsWaitingResponse {} // Empty // Retry message FrameRetryRequest { - Frame frame = 1; + Frame frame = 1; } message FrameRetryResponse {} // Empty // SetCheckpointState message FrameSetCheckpointStateRequest { - Frame frame = 1; - CheckpointState state = 2; + Frame frame = 1; + CheckpointState state = 2; } message FrameSetCheckpointStateResponse {} // Empty + // GROUP ---- // CreateSubGroup message GroupCreateSubGroupRequest { - Group group = 1; - string name = 2; + Group group = 1; + string name = 2; } message GroupCreateSubGroupResponse { - Group group = 1; + Group group = 1; } // Delete message GroupDeleteRequest { - Group group = 1; + Group group = 1; } message GroupDeleteResponse {} // Empty // FindGroup message GroupFindGroupRequest { - string show = 1; - string name = 2; + string show = 1; + string name = 2; } message GroupFindGroupResponse { - Group group = 1; + Group group = 1; } // GetGroup message GroupGetGroupRequest { - string id = 1; + string id = 1; } message GroupGetGroupResponse { - Group group = 1; + Group group = 1; } // GetGroups message GroupGetGroupsRequest { - Group group = 1; + Group group = 1; } message GroupGetGroupsResponse { - GroupSeq groups = 1; + GroupSeq groups = 1; } // GetJobs message GroupGetJobsRequest { - Group group = 1; + Group group = 1; } message GroupGetJobsResponse { - JobSeq jobs = 1; + JobSeq jobs = 1; } // ReparentGroups message GroupReparentGroupsRequest { - Group group = 1; - GroupSeq groups = 2; + Group group = 1; + GroupSeq groups = 2; } message GroupReparentGroupsResponse {} // Empty // ReparentJobs message GroupReparentJobsRequest { - Group group = 1; - JobSeq jobs = 2; + Group group = 1; + JobSeq jobs = 2; } message GroupReparentJobsResponse {} // Empty // SetDefJobMaxCores message GroupSetDefJobMaxCoresRequest { - Group group = 1; - float max_cores = 2; + Group group = 1; + float max_cores = 2; } message GroupSetDefJobMaxCoresResponse {} // Empty // SetDefJobMinCores message GroupSetDefJobMinCoresRequest { - Group group = 1; - float min_cores = 2; + Group group = 1; + float min_cores = 2; } message GroupSetDefJobMinCoresResponse {} // Empty // SetDefaultJobMaxGpus message GroupSetDefJobMaxGpusRequest { - Group group = 1; - int32 max_gpus = 2; + Group group = 1; + int32 max_gpus = 2; } message GroupSetDefJobMaxGpusResponse {} // Empty // SetDefaultJobMinGpus message GroupSetDefJobMinGpusRequest { - Group group = 1; - int32 min_gpus = 2; + Group group = 1; + int32 min_gpus = 2; } message GroupSetDefJobMinGpusResponse {} // Empty // SetDefJobPriority message GroupSetDefJobPriorityRequest { - Group group = 1; - int32 priority = 2; + Group group = 1; + int32 priority = 2; } message GroupSetDefJobPriorityResponse {} // Empty // SetDept message GroupSetDeptRequest { - Group group = 1; - string dept = 2; + Group group = 1; + string dept = 2; } message GroupSetDeptResponse {} // Empty // SetGroup message GroupSetGroupRequest { - Group group = 1; - Group parent_group = 2; + Group group = 1; + Group parent_group = 2; } message GroupSetGroupResponse {} // Empty // SetMaxCores message GroupSetMaxCoresRequest { - Group group = 1; - float max_cores = 2; + Group group = 1; + float max_cores = 2; } message GroupSetMaxCoresResponse {} // Empty // SetMinCores message GroupSetMinCoresRequest { - Group group = 1; - float min_cores = 2; + Group group = 1; + float min_cores = 2; } message GroupSetMinCoresResponse {} // Empty // SetMaxGpus message GroupSetMaxGpusRequest { - Group group = 1; - int32 max_gpus = 2; + Group group = 1; + int32 max_gpus = 2; } message GroupSetMaxGpusResponse {} // Empty // SetMinGpus message GroupSetMinGpusRequest { - Group group = 1; - int32 min_gpus = 2; + Group group = 1; + int32 min_gpus = 2; } message GroupSetMinGpusResponse {} // Empty // SetName message GroupSetNameRequest { - Group group = 1; - string name = 2; + Group group = 1; + string name = 2; } message GroupSetNameResponse {} // Empty @@ -1122,603 +1131,605 @@ message GroupSetNameResponse {} // Empty // JOB ---- // AddComment message JobAddCommentRequest { - Job job = 1; - comment.Comment new_comment = 2; + Job job = 1; + comment.Comment new_comment = 2; } message JobAddCommentResponse {} // Empty // AddRenderPartition message JobAddRenderPartRequest { - Job job = 1; - string host = 2; - int32 threads = 3; - int32 max_cores = 4; - int64 max_memory = 5; - int64 max_gpu_memory = 6; - string username = 7; - int32 max_gpus = 8; + Job job = 1; + string host = 2; + int32 threads = 3; + int32 max_cores = 4; + int64 max_memory = 5; + int64 max_gpu_memory = 6; + string username = 7; + int32 max_gpus = 8; } message JobAddRenderPartResponse { - renderPartition.RenderPartition render_partition = 1; + renderPartition.RenderPartition render_partition = 1; } // AddSubscriber message JobAddSubscriberRequest { - Job job = 1; - string subscriber = 2; + Job job = 1; + string subscriber = 2; } message JobAddSubscriberResponse {} // CreateDependencyOnFrame message JobCreateDependencyOnFrameRequest { - Job job = 1; - Frame frame = 2; + Job job = 1; + Frame frame = 2; } message JobCreateDependencyOnFrameResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnJob message JobCreateDependencyOnJobRequest { - Job job = 1; - Job on_job = 2; + Job job = 1; + Job on_job = 2; } message JobCreateDependencyOnJobResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnLayer message JobCreateDependencyOnLayerRequest { - Job job = 1; - Layer layer = 2; + Job job = 1; + Layer layer = 2; } message JobCreateDependencyOnLayerResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // DropDepends message JobDropDependsRequest { - Job job = 1; - depend.DependTarget target = 2; + Job job = 1; + depend.DependTarget target = 2; } message JobDropDependsResponse {} // Empty // EatFrames message JobEatFramesRequest { - Job job = 1; - FrameSearchCriteria req = 2; + Job job = 1; + FrameSearchCriteria req = 2; } message JobEatFramesResponse {} // Empty // FindJob message JobFindJobRequest { - string name = 1; + string name = 1; } message JobFindJobResponse { - Job job = 1; + Job job = 1; } // GetComments message JobGetCommentsRequest { - Job job = 1; + Job job = 1; } message JobGetCommentsResponse { - comment.CommentSeq comments = 1; + comment.CommentSeq comments = 1; } // GetCurrent message JobGetCurrentRequest { - Job job = 1; + Job job = 1; } message JobGetCurrentResponse { - Job job = 1; + Job job = 1; } // GetDepends message JobGetDependsRequest { - Job job = 1; + Job job = 1; } message JobGetDependsResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // GetFrames message JobGetFramesRequest { - Job job = 1; - FrameSearchCriteria req = 2; + Job job = 1; + FrameSearchCriteria req = 2; } message JobGetFramesResponse { - FrameSeq frames = 1; + FrameSeq frames = 1; } // GetJob message JobGetJobRequest { - string id = 1; + string id = 1; } message JobGetJobResponse { - Job job = 1; + Job job = 1; } // GetJobs message JobGetJobsRequest { - JobSearchCriteria r = 1; + JobSearchCriteria r = 1; } message JobGetJobsResponse { - JobSeq jobs = 1; + JobSeq jobs = 1; } // GetJobNames message JobGetJobNamesRequest { - job.JobSearchCriteria r = 1; + job.JobSearchCriteria r = 1; } + message JobGetJobNamesResponse { - repeated string names = 1; + repeated string names = 1; } // GetLayers message JobGetLayersRequest { - Job job = 1; + Job job = 1; } message JobGetLayersResponse { - LayerSeq layers = 1; + LayerSeq layers = 1; } // GetUpdatedFrames message JobGetUpdatedFramesRequest { - Job job = 1; - int32 last_check = 2; - LayerSeq layer_filter = 3; + Job job = 1; + int32 last_check = 2; + LayerSeq layer_filter = 3; } message JobGetUpdatedFramesResponse { - JobState state = 1; - int32 server_time = 2; - UpdatedFrameSeq updated_frames = 3; + JobState state = 1; + int32 server_time = 2; + UpdatedFrameSeq updated_frames = 3; } // GetWhatDependsOnThis message JobGetWhatDependsOnThisRequest { - Job job = 1; + Job job = 1; } message JobGetWhatDependsOnThisResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } //GetWhatThisDependsOn message JobGetWhatThisDependsOnRequest { - Job job = 1; + Job job = 1; } message JobGetWhatThisDependsOnResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // IsJobPending message JobIsJobPendingRequest { - string name = 1; + string name = 1; } message JobIsJobPendingResponse { - bool value = 1; + bool value = 1; } // Kill message JobKillRequest { - Job job = 1; - string username = 2; - string pid = 3; - string host_kill = 4; - string reason = 5; + Job job = 1; + string username = 2; + string pid = 3; + string host_kill = 4; + string reason = 5; } message JobKillResponse {} // Empty // KillFrames message JobKillFramesRequest { - Job job = 1; - FrameSearchCriteria req = 2; - string username = 3; - string pid = 4; - string host_kill = 5; - string reason = 6; + Job job = 1; + FrameSearchCriteria req = 2; + string username = 3; + string pid = 4; + string host_kill = 5; + string reason = 6; } message JobKillFramesResponse {} // Empty // LaunchSpec message JobLaunchSpecRequest { - string spec = 1; + string spec = 1; } message JobLaunchSpecResponse { - repeated string names = 1; + repeated string names = 1; } // LaunchSpecAndWait message JobLaunchSpecAndWaitRequest { - string spec = 1; + string spec = 1; } message JobLaunchSpecAndWaitResponse { - JobSeq jobs = 1; + JobSeq jobs = 1; } //MarkAsWaiting message JobMarkAsWaitingRequest { - Job job = 1; - FrameSearchCriteria req = 2; + Job job = 1; + FrameSearchCriteria req = 2; } message JobMarkAsWaitingResponse {} // Empty // MarkDoneFrames message JobMarkDoneFramesRequest { - Job job = 1; - FrameSearchCriteria req = 2; + Job job = 1; + FrameSearchCriteria req = 2; } message JobMarkDoneFramesResponse {} // Empty // Pause message JobPauseRequest { - Job job = 1; + Job job = 1; } message JobPauseResponse {} // Empty // ReorderFrames message JobReorderFramesRequest { - Job job = 1; - string range = 2; - Order order = 3; + Job job = 1; + string range = 2; + Order order = 3; } message JobReorderFramesResponse {} // Empty // Resume message JobResumeRequest { - Job job = 1; + Job job = 1; } message JobResumeResponse {} // Empty // RetryFrames message JobRetryFramesRequest { - Job job = 1; - FrameSearchCriteria req = 2; + Job job = 1; + FrameSearchCriteria req = 2; } message JobRetryFramesResponse {} // Empty // RunFilters message JobRunFiltersRequest { - Job job = 1; + Job job = 1; } message JobRunFiltersResponse {} // Empty // SetAutoEat message JobSetAutoEatRequest { - Job job = 1; - bool value = 2; + Job job = 1; + bool value = 2; } message JobSetAutoEatResponse {} // Empty // SetGroup message JobSetGroupRequest { - Job job = 1; - string group_id = 2; + Job job = 1; + string group_id = 2; } message JobSetGroupResponse {} // Empty // SetMaxCores message JobSetMaxCoresRequest { - Job job = 1; - float val = 2; + Job job = 1; + float val = 2; } message JobSetMaxCoresResponse {} // Empty // SetMaxGpus message JobSetMaxGpusRequest { - Job job = 1; - int32 val = 2; + Job job = 1; + int32 val = 2; } message JobSetMaxGpusResponse {} // Empty // SetMaxRetries message JobSetMaxRetriesRequest { - Job job = 1; - int32 max_retries = 2; + Job job = 1; + int32 max_retries = 2; } message JobSetMaxRetriesResponse {} // Empty // SetMinCores message JobSetMinCoresRequest { - Job job = 1; - float val = 2; + Job job = 1; + float val = 2; } message JobSetMinCoresResponse {} // Empty // SetMinGpus message JobSetMinGpusRequest { - Job job = 1; - int32 val = 2; + Job job = 1; + int32 val = 2; } message JobSetMinGpusResponse {} // Empty // SetPriority message JobSetPriorityRequest { - Job job = 1; - int32 val = 2; + Job job = 1; + int32 val = 2; } message JobSetPriorityResponse {} // Empty // ShutdownIfCompleted message JobShutdownIfCompletedRequest { - Job job = 1; + Job job = 1; } message JobShutdownIfCompletedResponse {} // Empty // StaggerFrames message JobStaggerFramesRequest { - Job job = 1; - string range = 2; - int32 stagger = 3; + Job job = 1; + string range = 2; + int32 stagger = 3; } message JobStaggerFramesResponse {} // Empty + // LAYER ---- // AddLimit message LayerAddLimitRequest { - Layer layer = 1; - string limit_id = 2; + Layer layer = 1; + string limit_id = 2; } message LayerAddLimitResponse {} // Empty // AddRenderPartion message LayerAddRenderPartitionRequest { - Layer layer = 1; - string host = 2; - int32 threads = 3; - int32 max_cores = 4; - int64 max_memory = 5; - int64 max_gpu_memory = 6; - string username = 7; - int32 max_gpus = 8; + Layer layer = 1; + string host = 2; + int32 threads = 3; + int32 max_cores = 4; + int64 max_memory = 5; + int64 max_gpu_memory = 6; + string username = 7; + int32 max_gpus = 8; } message LayerAddRenderPartitionResponse { - renderPartition.RenderPartition render_partition = 1; + renderPartition.RenderPartition render_partition = 1; } // CreateDependencyOnFrame message LayerCreateDependOnFrameRequest { - Layer layer = 1; - Frame frame = 2; + Layer layer = 1; + Frame frame = 2; } message LayerCreateDependOnFrameResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnJob message LayerCreateDependOnJobRequest { - Layer layer = 1; - Job job = 2; + Layer layer = 1; + Job job = 2; } message LayerCreateDependOnJobResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateDependencyOnLayer message LayerCreateDependOnLayerRequest { - Layer layer = 1; - Layer depend_on_layer = 2; + Layer layer = 1; + Layer depend_on_layer = 2; } message LayerCreateDependOnLayerResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // CreateFrameByFrameDependency message LayerCreateFrameByFrameDependRequest { - Layer layer = 1; - Layer depend_layer = 2; - bool any_frame = 3; + Layer layer = 1; + Layer depend_layer = 2; + bool any_frame = 3; } message LayerCreateFrameByFrameDependResponse { - depend.Depend depend = 1; + depend.Depend depend = 1; } // DropDepends message LayerDropDependsRequest { - Layer layer = 1; - depend.DependTarget target = 2; + Layer layer = 1; + depend.DependTarget target = 2; } message LayerDropDependsResponse {} // Empty // DropLimit message LayerDropLimitRequest { - Layer layer = 1; - string limit_id = 2; + Layer layer = 1; + string limit_id = 2; } message LayerDropLimitResponse {} // Empty // EatFrames message LayerEatFramesRequest { - Layer layer = 1; + Layer layer = 1; } message LayerEatFramesResponse {} // Empty // EnableMemoryOptimizer message LayerEnableMemoryOptimizerRequest { - Layer layer = 1; - bool value = 2; + Layer layer = 1; + bool value = 2; } message LayerEnableMemoryOptimizerResponse {} // Empty // FindLayer message LayerFindLayerRequest { - string job = 1; - string layer = 2; + string job = 1; + string layer = 2; } message LayerFindLayerResponse { - Layer layer = 1; + Layer layer = 1; } // GetFrames message LayerGetFramesRequest { - Layer layer = 1; - FrameSearchCriteria s = 2; + Layer layer = 1; + FrameSearchCriteria s = 2; } message LayerGetFramesResponse { - FrameSeq frames = 1; + FrameSeq frames = 1; } // GetLayer message LayerGetLayerRequest { - string id = 1; + string id = 1; } message LayerGetLayerResponse { - Layer layer = 1; + Layer layer = 1; } // GetLayer message LayerGetLimitsRequest { - Layer layer = 1; + Layer layer = 1; } message LayerGetLimitsResponse { - repeated limit.Limit limits = 1; + repeated limit.Limit limits = 1; } // GetOutputPaths message LayerGetOutputPathsRequest { - Layer layer = 1; + Layer layer = 1; } message LayerGetOutputPathsResponse { - repeated string output_paths = 1; + repeated string output_paths = 1; } // GetWhatDependsOnThis message LayerGetWhatDependsOnThisRequest { - Layer layer = 1; + Layer layer = 1; } message LayerGetWhatDependsOnThisResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // GetWhatThisDependsOn message LayerGetWhatThisDependsOnRequest { - Layer layer = 1; + Layer layer = 1; } message LayerGetWhatThisDependsOnResponse { - depend.DependSeq depends = 1; + depend.DependSeq depends = 1; } // KillFrames message LayerKillFramesRequest { - Layer layer = 1; - string username = 2; - string pid = 3; - string host_kill = 4; - string reason = 5; + Layer layer = 1; + string username = 2; + string pid = 3; + string host_kill = 4; + string reason = 5; } message LayerKillFramesResponse {} // Empty // MarkdoneFrames message LayerMarkdoneFramesRequest { - Layer layer = 1; + Layer layer = 1; } message LayerMarkdoneFramesResponse {} // Empty // RetryFrames message LayerRetryFramesRequest { - Layer layer = 1; + Layer layer = 1; } message LayerRetryFramesResponse {} // Empty // RegisterOutputPath message LayerRegisterOutputPathRequest { - Layer layer = 1; - string spec = 2; + Layer layer = 1; + string spec = 2; } message LayerRegisterOutputPathResponse {} // Empty // ReorderFrames message LayerReorderFramesRequest { - Layer layer = 1; - string range = 2; - Order order = 3; + Layer layer = 1; + string range = 2; + Order order = 3; } message LayerReorderFramesResponse {} // Empty // SetMaxCores message LayerSetMaxCoresRequest { - Layer layer = 1; - float cores = 2; + Layer layer = 1; + float cores = 2; } message LayerSetMaxCoresResponse {} // Empty // SetMinCores message LayerSetMinCoresRequest { - Layer layer = 1; - float cores = 2; + Layer layer = 1; + float cores = 2; } message LayerSetMinCoresResponse {} // Empty // [Deprecated] SetMinGpu message LayerSetMinGpuRequest { - Layer layer = 1 [deprecated = true]; - int64 gpu = 2 [deprecated = true]; + Layer layer = 1 [deprecated=true]; + int64 gpu = 2 [deprecated=true]; } // [Deprecated] @@ -1726,64 +1737,64 @@ message LayerSetMinGpuResponse {} // Empty // SetMaxGpus message LayerSetMaxGpusRequest { - Layer layer = 1; - int32 max_gpus = 2; + Layer layer = 1; + int32 max_gpus = 2; } message LayerSetMaxGpusResponse {} // Empty // SetMinGpus message LayerSetMinGpusRequest { - Layer layer = 1; - int32 min_gpus = 2; + Layer layer = 1; + int32 min_gpus = 2; } message LayerSetMinGpusResponse {} // Empty // SetMinGpuMemory message LayerSetMinGpuMemoryRequest { - Layer layer = 1; - int64 gpu_memory = 2; + Layer layer = 1; + int64 gpu_memory = 2; } message LayerSetMinGpuMemoryResponse {} // Empty // SetMinMemory message LayerSetMinMemoryRequest { - Layer layer = 1; - int64 memory = 2; + Layer layer = 1; + int64 memory = 2; } message LayerSetMinMemoryResponse {} // Empty // SetTags message LayerSetTagsRequest { - Layer layer = 1; - repeated string tags = 2; + Layer layer = 1; + repeated string tags = 2; } message LayerSetTagsResponse {} // Empty // SetThreadable message LayerSetThreadableRequest { - Layer layer = 1; - bool threadable = 2; + Layer layer = 1; + bool threadable = 2; } message LayerSetThreadableResponse {} // Empty // SetTimeout message LayerSetTimeoutRequest { - Layer layer = 1; - int32 timeout = 2; + Layer layer = 1; + int32 timeout = 2; } message LayerSetTimeoutResponse {} // Empty // SetTimeoutLLU message LayerSetTimeoutLLURequest { - Layer layer = 1; - int32 timeout_llu = 2; + Layer layer = 1; + int32 timeout_llu = 2; } message LayerSetTimeoutLLUResponse {} // Empty @@ -1799,13 +1810,14 @@ message LayerSetSlotsRequiredResponse {} // Empty // StaggerFrames message LayerStaggerFramesRequest { - Layer layer = 1; - string range = 2; - int32 stagger = 3; + Layer layer = 1; + string range = 2; + int32 stagger = 3; } message LayerStaggerFramesResponse {} // Empty + message FrameStateDisplayOverrideRequest { Frame frame = 1; FrameStateDisplayOverride override = 2; From d6888eeb02492cfa50601cddd5d12e771125b175 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 7 Jan 2026 09:45:20 -0800 Subject: [PATCH 13/17] Add slots_required to rqd.RunFrame --- .../main/java/com/imageworks/spcue/DispatchFrame.java | 1 + .../src/main/java/com/imageworks/spcue/VirtualProc.java | 4 ++++ .../com/imageworks/spcue/dao/postgres/DispatchQuery.java | 9 +++++++++ .../com/imageworks/spcue/dao/postgres/FrameDaoJdbc.java | 5 +++-- .../spcue/dispatcher/DispatchSupportService.java | 2 +- .../migrations/V36__Add_layer_slots_required.sql | 2 +- proto/src/job.proto | 2 +- proto/src/rqd.proto | 2 ++ rust/crates/dummy-cuebot/src/rqd_client.rs | 2 ++ rust/crates/rqd/src/frame/running_frame.rs | 1 + rust/crates/rqd/src/system/oom.rs | 1 + rust/crates/scheduler/src/pipeline/dispatcher/actor.rs | 1 + 12 files changed, 27 insertions(+), 5 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchFrame.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchFrame.java index b73cd75c3..a006a7a79 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchFrame.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchFrame.java @@ -43,6 +43,7 @@ public class DispatchFrame extends FrameEntity implements FrameInterface { public int minGpus; public int maxGpus; public long minGpuMemory; + public int slotsRequired; // A comma separated list of services public String services; diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index 8c92ad016..4f953ee5e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -45,6 +45,8 @@ public class VirtualProc extends FrameEntity implements ProcInterface { public long gpuMemoryUsed; public long gpuMemoryMax; + public int slotsRequired; + public boolean unbooked; public boolean usageRecorded = false; public boolean isLocalDispatch = false; @@ -101,6 +103,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame, proc.memoryReserved = frame.getMinMemory(); proc.gpusReserved = frame.minGpus; proc.gpuMemoryReserved = frame.minGpuMemory; + proc.slotsRequired = frame.slotsRequired; /* * Frames that are announcing cores less than 100 are not multi-threaded so there is no @@ -237,6 +240,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame, proc.memoryReserved = frame.getMinMemory(); proc.gpusReserved = frame.minGpus; proc.gpuMemoryReserved = frame.minGpuMemory; + proc.slotsRequired = frame.slotsRequired; int wholeCores = (int) (Math.floor(host.idleCores / 100.0)); if (wholeCores == 0) { diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatchQuery.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatchQuery.java index 2daa00335..4f75d6708 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatchQuery.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/DispatchQuery.java @@ -546,6 +546,7 @@ private static final String replaceQueryForFifo(String query) { "int_gpus_min, " + "int_gpus_max, " + "int_gpu_mem_min, " + + "int_slots_required, " + "str_cmd, " + "str_range, " + "int_chunk_size, " + @@ -588,6 +589,7 @@ private static final String replaceQueryForFifo(String query) { "layer.int_gpus_min, " + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + + "layer.int_slots_required, " + "layer.str_cmd, " + "layer.str_range, " + "layer.int_chunk_size, " + @@ -676,6 +678,7 @@ private static final String replaceQueryForFifo(String query) { "layer.b_threadable, " + "layer.int_mem_min, " + "layer.int_gpu_mem_min, " + + "layer.int_slots_required, " + "layer.str_cmd, " + "layer.str_range, " + "layer.int_chunk_size, " + @@ -765,6 +768,7 @@ private static final String replaceQueryForFifo(String query) { "layer.int_gpus_min, " + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + + "layer.int_slots_required, " + "layer.str_cmd, " + "layer.str_range, " + "layer.int_chunk_size, " + @@ -847,6 +851,7 @@ private static final String replaceQueryForFifo(String query) { "layer.int_gpus_min, " + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + + "layer.int_slots_required, " + "layer.str_cmd, " + "layer.str_range, " + "layer.int_chunk_size, " + @@ -932,6 +937,7 @@ private static final String replaceQueryForFifo(String query) { "layer.int_gpus_min, " + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + + "layer.int_slots_required, " + "layer.str_cmd, " + "layer.str_range, " + "layer.int_chunk_size, " + @@ -1020,6 +1026,7 @@ private static final String replaceQueryForFifo(String query) { "layer.int_gpus_min, " + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + + "layer.int_slots_required, " + "layer.str_cmd, " + "layer.str_range, " + "layer.int_chunk_size, " + @@ -1108,6 +1115,7 @@ private static final String replaceQueryForFifo(String query) { "layer.int_gpus_min, " + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + + "layer.int_slots_required, " + "layer.int_cores_max, " + "layer.str_cmd, " + "layer.str_range, " + @@ -1191,6 +1199,7 @@ private static final String replaceQueryForFifo(String query) { "layer.int_gpus_min, " + "layer.int_gpus_max, " + "layer.int_gpu_mem_min, " + + "layer.int_slots_required, " + "layer.str_cmd, " + "layer.str_range, " + "layer.int_chunk_size, " + diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/FrameDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/FrameDaoJdbc.java index 2ccaef16c..15ec06a17 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/FrameDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/FrameDaoJdbc.java @@ -234,6 +234,7 @@ public DispatchFrame mapRow(ResultSet rs, int rowNum) throws SQLException { frame.minGpus = rs.getInt("int_gpus_min"); frame.maxGpus = rs.getInt("int_gpus_max"); frame.minGpuMemory = rs.getLong("int_gpu_mem_min"); + frame.slotsRequired = rs.getInt("int_slots_required"); frame.version = rs.getInt("int_version"); frame.services = rs.getString("str_services"); frame.os = rs.getString("str_os"); @@ -252,8 +253,8 @@ public DispatchFrame mapRow(ResultSet rs, int rowNum) throws SQLException { + "layer.str_type AS layer_type, " + "layer.str_cmd, " + "layer.int_cores_min," + "layer.int_cores_max," + "layer.b_threadable," + "layer.int_mem_min, " + "layer.int_gpus_min," + "layer.int_gpus_max," + "layer.int_gpu_mem_min, " - + "layer.str_range, " + "layer.int_chunk_size, " + "layer.str_services " + "FROM " - + "layer, " + "job, " + "show, " + + "layer.int_slots_required, " + "layer.str_range, " + "layer.int_chunk_size, " + + "layer.str_services " + "FROM " + "layer, " + "job, " + "show, " + "frame LEFT JOIN proc ON (proc.pk_frame = frame.pk_frame) " + "WHERE " + "job.pk_show = show.pk_show " + "AND " + "frame.pk_job = job.pk_job " + "AND " + "frame.pk_layer = layer.pk_layer " + "AND " + "frame.pk_frame = ?"; diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java index 3eb1ad3f2..a32b0e8fc 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/DispatchSupportService.java @@ -392,7 +392,7 @@ public RunFrame prepareRqdRunFrame(VirtualProc proc, DispatchFrame frame) { .setNumCores(proc.coresReserved).setNumGpus(proc.gpusReserved) .setStartTime(System.currentTimeMillis()).setIgnoreNimby(proc.isLocalDispatch) .setOs(proc.os).setSoftMemoryLimit(frame.softMemoryLimit).setLokiUrl(frame.lokiURL) - .setHardMemoryLimit(frame.hardMemoryLimit) + .setHardMemoryLimit(frame.hardMemoryLimit).setSlotsRequired(proc.slotsRequired) .putAllEnvironment(jobDao.getEnvironment(frame)) .putAllEnvironment(layerDao.getLayerEnvironment(frame)).putEnvironment("CUE3", "1") .putEnvironment("CUE_THREADS", String.valueOf(threads)) diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql index 0b9ea3bd1..2172118dc 100644 --- a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V36__Add_layer_slots_required.sql @@ -1,4 +1,4 @@ -- Add a field to mark a layer as requiring at least a specific number of slots --- <0 means slots are not required +-- <= 0 means slots are not required alter table layer add int_slots_required INT NOT NULL DEFAULT 0; diff --git a/proto/src/job.proto b/proto/src/job.proto index 559b5d9b2..9c5756e78 100644 --- a/proto/src/job.proto +++ b/proto/src/job.proto @@ -717,7 +717,7 @@ message Layer { float min_gpus = 20; float max_gpus = 21; string command = 22; - // Number of slots required per frame (<0 means not slot-based) + // Number of slots required per frame (<= 0 means not slot-based) int32 slots_required = 23; } diff --git a/proto/src/rqd.proto b/proto/src/rqd.proto index 621a13212..2151ad9d6 100644 --- a/proto/src/rqd.proto +++ b/proto/src/rqd.proto @@ -126,6 +126,8 @@ message RunFrame { int64 hard_memory_limit = 27; int32 pid = 28; string loki_url = 29; + // Number of slots required per frame (<= 0 means not slot-based) + int32 slots_required = 30; } message RunFrameSeq { diff --git a/rust/crates/dummy-cuebot/src/rqd_client.rs b/rust/crates/dummy-cuebot/src/rqd_client.rs index ab3ce01b4..bd9acdfd4 100644 --- a/rust/crates/dummy-cuebot/src/rqd_client.rs +++ b/rust/crates/dummy-cuebot/src/rqd_client.rs @@ -82,6 +82,8 @@ impl DummyRqdClient { #[allow(deprecated)] start_time: 0, + + slots_required: 0, }; let mut client = self.client.lock().await; diff --git a/rust/crates/rqd/src/frame/running_frame.rs b/rust/crates/rqd/src/frame/running_frame.rs index 396c589fd..79518014e 100644 --- a/rust/crates/rqd/src/frame/running_frame.rs +++ b/rust/crates/rqd/src/frame/running_frame.rs @@ -1359,6 +1359,7 @@ mod tests { #[allow(deprecated)] start_time: 0, + slots_required: 0, }, uid, config, diff --git a/rust/crates/rqd/src/system/oom.rs b/rust/crates/rqd/src/system/oom.rs index 2ca82f69e..d5a91a820 100644 --- a/rust/crates/rqd/src/system/oom.rs +++ b/rust/crates/rqd/src/system/oom.rs @@ -239,6 +239,7 @@ mod tests { log_dir_file: "".to_string(), #[allow(deprecated)] start_time: 0, + slots_required: 0, }, 1000, config, diff --git a/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs b/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs index 1c592b8d7..46343e278 100644 --- a/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs +++ b/rust/crates/scheduler/src/pipeline/dispatcher/actor.rs @@ -967,6 +967,7 @@ impl RqdDispatcherService { log_file: "deprecated".to_string(), #[allow(deprecated)] log_dir_file: "deprecated".to_string(), + slots_required: 0, }; Ok(run_frame) From 19791d0fe67bb0fee3338f2f8eb84b1ce12f62f2 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Thu, 8 Jan 2026 16:11:08 -0800 Subject: [PATCH 14/17] Implement slot based booking on rqd --- .../com/imageworks/spcue/dao/HostDao.java | 8 + .../spcue/dao/postgres/HostDaoJdbc.java | 11 + .../spcue/servant/RqdReportStatic.java | 22 ++ .../imageworks/spcue/service/HostManager.java | 8 + .../spcue/service/HostManagerService.java | 6 + .../conf/spring/applicationContext-grpc.xml | 57 +++-- proto/src/report.proto | 12 ++ .../crates/dummy-cuebot/src/report_servant.rs | 20 ++ rust/crates/rqd/src/frame/manager.rs | 195 ++++++++++++------ rust/crates/rqd/src/report/report_client.rs | 20 ++ rust/crates/rqd/src/system/machine.rs | 151 +++++++++++++- rust/crates/rqd/src/system/manager.rs | 3 + 12 files changed, 430 insertions(+), 83 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java index e2269e3a2..8459d62cf 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java @@ -251,6 +251,14 @@ public interface HostDao { */ void updateConcurrentSlotsLimit(HostInterface host, int limit); + /** + * Get the host's concurrent slots limit by hostname. + * + * @param hostname String + * @return int the concurrent slots limit + */ + int getHostConcurrentSlotsLimit(String hostname); + /** * Update the specified host's hardware information. * diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java index 7fb031f9e..9365b9f2b 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/HostDaoJdbc.java @@ -571,6 +571,17 @@ public void updateConcurrentSlotsLimit(HostInterface host, int limit) { limit, host.getHostId()); } + @Override + public int getHostConcurrentSlotsLimit(String hostname) { + try { + return getJdbcTemplate().queryForObject( + "SELECT int_concurrent_slots_limit FROM host WHERE str_name = ?", + Integer.class, hostname); + } catch (EmptyResultDataAccessException e) { + return 0; + } + } + @Override public void updateHostOs(HostInterface host, String os) { getJdbcTemplate().update("UPDATE host_stat SET str_os=? WHERE pk_host=?", os, diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/RqdReportStatic.java b/cuebot/src/main/java/com/imageworks/spcue/servant/RqdReportStatic.java index 2a30fb2cd..7913dee74 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/RqdReportStatic.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/RqdReportStatic.java @@ -12,11 +12,15 @@ import com.imageworks.spcue.grpc.report.RqdReportRunningFrameCompletionResponse; import com.imageworks.spcue.grpc.report.RqdReportStatusRequest; import com.imageworks.spcue.grpc.report.RqdReportStatusResponse; +import com.imageworks.spcue.grpc.report.RqdReportGetHostSlotsLimitRequest; +import com.imageworks.spcue.grpc.report.RqdReportGetHostSlotsLimitResponse; +import com.imageworks.spcue.service.HostManager; public class RqdReportStatic extends RqdReportInterfaceGrpc.RqdReportInterfaceImplBase { private FrameCompleteHandler frameCompleteHandler; private HostReportHandler hostReportHandler; + private HostManager hostManager; @SuppressWarnings("unused") @@ -44,6 +48,16 @@ public void reportStatus(RqdReportStatusRequest request, responseObserver.onCompleted(); } + @Override + public void getHostSlotsLimit(RqdReportGetHostSlotsLimitRequest request, + StreamObserver responseObserver) { + int slotsLimit = hostManager.getHostConcurrentSlotsLimit(request.getName()); + responseObserver.onNext(RqdReportGetHostSlotsLimitResponse.newBuilder() + .setSlotsLimit(slotsLimit) + .build()); + responseObserver.onCompleted(); + } + public FrameCompleteHandler getFrameCompleteHandler() { return frameCompleteHandler; } @@ -59,4 +73,12 @@ public HostReportHandler getHostReportHandler() { public void setHostReportHandler(HostReportHandler hostReportHandler) { this.hostReportHandler = hostReportHandler; } + + public HostManager getHostManager() { + return hostManager; + } + + public void setHostManager(HostManager hostManager) { + this.hostManager = hostManager; + } } diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java b/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java index 47ab6c508..14cdd75a1 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/HostManager.java @@ -75,6 +75,14 @@ public interface HostManager { */ void setConcurrentSlotsLimit(HostInterface host, int limit); + /** + * Gets the concurrent slots limit of a host by hostname. + * + * @param hostname String + * @return int the concurrent slots limit + */ + int getHostConcurrentSlotsLimit(String hostname); + DispatchHost createHost(HostReport report); DispatchHost createHost(RenderHost host); diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java index 094dba5f8..3d0dad129 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/HostManagerService.java @@ -99,6 +99,12 @@ public void setConcurrentSlotsLimit(HostInterface host, int limit) { hostDao.updateConcurrentSlotsLimit(host, limit); } + @Override + @Transactional(propagation = Propagation.REQUIRED, readOnly = true) + public int getHostConcurrentSlotsLimit(String hostname) { + return hostDao.getHostConcurrentSlotsLimit(hostname); + } + public void rebootWhenIdle(HostInterface host) { try { hostDao.updateHostState(host, HardwareState.REBOOT_WHEN_IDLE); diff --git a/cuebot/src/main/resources/conf/spring/applicationContext-grpc.xml b/cuebot/src/main/resources/conf/spring/applicationContext-grpc.xml index 612aeaff5..86293e8ac 100644 --- a/cuebot/src/main/resources/conf/spring/applicationContext-grpc.xml +++ b/cuebot/src/main/resources/conf/spring/applicationContext-grpc.xml @@ -1,4 +1,4 @@ - + - - - + @@ -35,7 +35,11 @@ - + @@ -53,7 +57,11 @@ - + @@ -160,7 +168,11 @@ - + @@ -169,7 +181,11 @@ - + @@ -184,7 +200,11 @@ - + @@ -193,12 +213,21 @@ - + + - + diff --git a/proto/src/report.proto b/proto/src/report.proto index 6ace5708e..806ec9973 100644 --- a/proto/src/report.proto +++ b/proto/src/report.proto @@ -23,6 +23,9 @@ service RqdReportInterface { // An incremental status report sent by RQD rpc ReportStatus(RqdReportStatusRequest) returns (RqdReportStatusResponse); + + // Get the host's slot limit + rpc GetHostSlotsLimit(RqdReportGetHostSlotsLimitRequest) returns (RqdReportGetHostSlotsLimitResponse); } @@ -180,3 +183,12 @@ message RqdReportStatusRequest { HostReport host_report = 1; } message RqdReportStatusResponse {} // Empty + +// GetHostSlotsLimit +message RqdReportGetHostSlotsLimitRequest { + string name = 1; +} + +message RqdReportGetHostSlotsLimitResponse { + int64 slots_limit = 1; +} diff --git a/rust/crates/dummy-cuebot/src/report_servant.rs b/rust/crates/dummy-cuebot/src/report_servant.rs index 1590dcb79..9eb290338 100644 --- a/rust/crates/dummy-cuebot/src/report_servant.rs +++ b/rust/crates/dummy-cuebot/src/report_servant.rs @@ -19,6 +19,9 @@ use opencue_proto::report::{ RqdReportRqdStartupResponse, RqdReportRunningFrameCompletionRequest, RqdReportRunningFrameCompletionResponse, RqdReportStatusRequest, RqdReportStatusResponse, }; +use opencue_proto::report::{ + RqdReportGetHostSlotsLimitRequest, RqdReportGetHostSlotsLimitResponse, +}; use tonic::transport::Server; use tonic::{async_trait, Request, Response, Status}; @@ -64,6 +67,23 @@ impl RqdReportInterface for ReportServant { Ok(Response::new(RqdReportStatusResponse {})) } + + /// Get the host's slot limit + async fn get_host_slots_limit( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status> + { + let name = request.into_inner().name; + println!( + "RqdReport: Received a get_host_slots_limit request with: {:?}", + name + ); + + Ok(Response::new(RqdReportGetHostSlotsLimitResponse { + slots_limit: -1, + })) + } } pub struct DummyCuebotServer {} diff --git a/rust/crates/rqd/src/frame/manager.rs b/rust/crates/rqd/src/frame/manager.rs index 811efebd1..d4cca5101 100644 --- a/rust/crates/rqd/src/frame/manager.rs +++ b/rust/crates/rqd/src/frame/manager.rs @@ -80,6 +80,8 @@ impl FrameManager { self.validate_grpc_frame(&run_frame)?; self.validate_machine_state(run_frame.ignore_nimby).await?; + let resource_id = run_frame.resource_id(); + // Create user if required. uid and gid ranges have already been verified let uid = match run_frame.uid_optional.as_ref().map(|o| match o { run_frame::UidOptional::Uid(v) => *v as u32, @@ -97,39 +99,13 @@ impl FrameManager { None => CONFIG.runner.default_uid, }; - // **Attention**: If an error happens between here and spawning a frame, the resources - // reserved need to be released. - - let num_cores = (run_frame.num_cores as u32).div_ceil(CONFIG.machine.core_multiplier); - - // Reserving cores will always yield a list of reserved thread_ids. If hyperthreading is off, - // the list should be ignored - let thread_ids = self - .machine - .reserve_cores(Either::Left(num_cores as usize), run_frame.resource_id()) - .await - .map_err(|err| { - FrameManagerError::Aborted(format!( - "Not launching, failed to reserve cpu resources {:?}", - err - )) - })?; // Although num_gpus is not required on a frame, the field is not optional on the proto // layer. =0 means None, !=0 means Some let gpu_list = match run_frame.num_gpus { 0 => None, _ => { + // TODO: Release GPUs in case of error when GPU support gets implemented let reserved_res = self.machine.reserve_gpus(run_frame.num_gpus as u32).await; - if reserved_res.is_err() { - // Release cores reserved on the last step - if let Err(err) = self.machine.release_cores(&run_frame.resource_id()).await { - warn!( - "Failed to release cores reserved for {} during gpu reservation failure. {}", - &run_frame.resource_id(), - err - ) - }; - } Some(reserved_res.map_err(|err| { FrameManagerError::Aborted(format!( "Not launching, insufficient resources {:?}", @@ -145,27 +121,90 @@ impl FrameManager { .environment .get("CUE_THREADABLE") .is_some_and(|v| v == "1"); - // Ignore the list of allocated threads if hyperthreading is off - let thread_ids = hyperthreaded.then_some(thread_ids); - let resource_id = run_frame.resource_id(); - let running_frame = Arc::new(RunningFrame::init( - run_frame, - uid, - CONFIG.runner.clone(), - thread_ids, - gpu_list, - self.machine.get_host_name().await, - )); + let slot_based_booking = self.machine.is_slot_configured().await; + // Keep track of reserved slots, if any + let mut reserved_slots = 0; + + let running_frame = match slot_based_booking { + // Core based booking + false => { + // **Attention**: If an error happens between here and spawning a frame, the resources + // reserved need to be released. + let num_cores = + (run_frame.num_cores as u32).div_ceil(CONFIG.machine.core_multiplier); + + // Reserving cores will always yield a list of reserved thread_ids. If hyperthreading is off, + // the list should be ignored + let thread_ids = self + .machine + .reserve_cores(Either::Left(num_cores as usize), run_frame.resource_id()) + .await + .map_err(|err| { + FrameManagerError::Aborted(format!( + "Not launching, failed to reserve cpu resources {:?}", + err + )) + })?; + // Ignore the list of allocated threads if hyperthreading is off + let thread_ids = hyperthreaded.then_some(thread_ids); + + Arc::new(RunningFrame::init( + run_frame, + uid, + CONFIG.runner.clone(), + thread_ids, + gpu_list, + self.machine.get_host_name(), + )) + } + // Slot based booking + true => { + reserved_slots = if run_frame.slots_required > 0 { + run_frame.slots_required as u32 + } else { + Err(FrameManagerError::InvalidArgument( + "Core based frame cannot be launched on a slot configured host".to_string(), + ))? + }; + self.machine + .reserve_slots(reserved_slots) + .await + .map_err(|err| { + FrameManagerError::Aborted(format!( + "Not launching, failed to reserve {:} slots {:?}", + run_frame.slots_required, err + )) + })?; + + Arc::new(RunningFrame::init( + run_frame, + uid, + CONFIG.runner.clone(), + // Disable taskset to avoid binding this frame to specific threads + None, + gpu_list, + self.machine.get_host_name(), + )) + } + }; if cfg!(feature = "containerized_frames") && CONFIG.runner.run_on_docker { #[cfg(feature = "containerized_frames")] self.spawn_docker_frame(running_frame, false); } else if self.spawn_running_frame(running_frame, false).is_err() { - // Release cores reserved if spawning the frame failed - if let Err(err) = self.machine.release_cores(&resource_id).await { + let release_res = if slot_based_booking { + // Release slots reserved if spawning the frame failed + self.machine.release_slots(reserved_slots).await + } else { + // Release cores reserved if spawning the frame failed + self.machine.release_cores(&resource_id).await + }; + + // Log failure to release + if let Err(err) = release_res { warn!( - "Failed to release cores reserved for {} during spawn failure. {}", + "Failed to release resources reserved for {} during spawn failure. {}", &resource_id, err ); } @@ -207,43 +246,75 @@ impl FrameManager { }) .collect(); let mut errors = Vec::new(); + let slot_based_booking = self.machine.is_slot_configured().await; + for path in snapshot_dir { let running_frame = RunningFrame::from_snapshot(&path, CONFIG.runner.clone()) .await .map(Arc::new); match running_frame { Ok(running_frame) => { - // Update reservations. If a thread_ids list exists, the frame was booked using affinity - if let Err(err) = match &running_frame.thread_ids { - Some(thread_ids) => { - self.machine - .reserve_cores( - Either::Right(thread_ids.clone()), - running_frame.request.resource_id(), - ) - .await + let resource_id = running_frame.request.resource_id(); + let mut reserved_slots = 0; + + // Update reservations based on booking mode + if let Err(err) = match slot_based_booking { + // Core-based booking: If a thread_ids list exists, the frame was booked using affinity + false => { + match &running_frame.thread_ids { + Some(thread_ids) => { + self.machine + .reserve_cores( + Either::Right(thread_ids.clone()), + running_frame.request.resource_id(), + ) + .await + } + None => { + let num_cores = (running_frame.request.num_cores as u32) + .div_ceil(CONFIG.machine.core_multiplier); + self.machine + .reserve_cores( + Either::Left(num_cores as usize), + running_frame.request.resource_id(), + ) + .await + } + } + // Ignore reserved threads as they are no longer necessary + .map(|_| ()) } - None => { - let num_cores = (running_frame.request.num_cores as u32) - .div_ceil(CONFIG.machine.core_multiplier); - self.machine - .reserve_cores( - Either::Left(num_cores as usize), - running_frame.request.resource_id(), - ) - .await + // Slot-based booking + true => { + reserved_slots = if running_frame.request.slots_required > 0 { + running_frame.request.slots_required as u32 + } else { + errors.push(format!( + "Core based frame {} cannot be recovered on a slot configured host", + resource_id + )); + continue; + }; + self.machine.reserve_slots(reserved_slots).await } } { errors.push(err.to_string()); } - let resource_id = running_frame.request.resource_id(); if CONFIG.runner.run_on_docker { todo!("Recovering frames when running on docker is not yet supported") } else if self.spawn_running_frame(running_frame, true).is_err() { - if let Err(err) = self.machine.release_cores(&resource_id).await { + let release_res = if slot_based_booking { + // Release slots reserved if spawning the frame failed + self.machine.release_slots(reserved_slots).await + } else { + self.machine.release_cores(&resource_id).await + }; + + // Failed to release + if let Err(err) = release_res { warn!( - "Failed to release cores reserved for {} during recover spawn error. {}", + "Failed to release resources reserved for {} during recover spawn error. {}", &resource_id, err ); } diff --git a/rust/crates/rqd/src/report/report_client.rs b/rust/crates/rqd/src/report/report_client.rs index 31995e894..092cb9944 100644 --- a/rust/crates/rqd/src/report/report_client.rs +++ b/rust/crates/rqd/src/report/report_client.rs @@ -183,6 +183,7 @@ pub trait ReportInterface { run_time: u32, ) -> Result<()>; async fn send_host_report(&self, host_report: pb::HostReport) -> Result<()>; + async fn get_host_slots_limit(&self, name: String) -> Result>; } #[async_trait] @@ -242,4 +243,23 @@ impl ReportInterface for ReportClient { .into_diagnostic() .and(Ok(())) } + + async fn get_host_slots_limit(&self, name: String) -> Result> { + let request = pb::RqdReportGetHostSlotsLimitRequest { name }; + let slots_limit = self + .get_client() + .await? + .get_host_slots_limit(request) + .await + .into_diagnostic()? + .into_inner() + .slots_limit; + + // Host with limit <= 0 are running on core based booking mode, so they don't have a limit + if slots_limit > 0 { + Ok(Some(slots_limit as u32)) + } else { + Ok(None) + } + } } diff --git a/rust/crates/rqd/src/system/machine.rs b/rust/crates/rqd/src/system/machine.rs index 14843c835..ab547b1a0 100644 --- a/rust/crates/rqd/src/system/machine.rs +++ b/rust/crates/rqd/src/system/machine.rs @@ -78,6 +78,11 @@ pub struct MachineMonitor { pub core_manager: Arc>, pub running_frames_cache: Arc, last_host_state: Arc>>, + // Host name is only written once at the beginning of start. After that it is only read. + // This makes it safe to have a sync lock to give the object mutability (it can't be + // initialized at init) but avoid unecessary awaits + host_name: std::sync::RwLock>, + slot_state: RwLock>, interrupt: Mutex>>, reboot_when_idle: Mutex, #[cfg(feature = "nimby")] @@ -86,6 +91,11 @@ pub struct MachineMonitor { nimby_state: RwLock, } +struct SlotState { + slot_limit: u32, + slots_consumed: u32, +} + static MACHINE_MONITOR: OnceCell> = OnceCell::const_new(); pub async fn instance() -> Result> { @@ -153,6 +163,7 @@ impl MachineMonitor { system_manager: Mutex::new(system_manager), running_frames_cache: RunningFrameCache::init(), last_host_state: Arc::new(RwLock::new(None)), + host_name: std::sync::RwLock::new(None), interrupt: Mutex::new(None), reboot_when_idle: Mutex::new(false), #[cfg(feature = "nimby")] @@ -160,6 +171,7 @@ impl MachineMonitor { #[cfg(feature = "nimby")] nimby_state: RwLock::new(LockState::Open), core_manager, + slot_state: RwLock::new(None), }) } @@ -177,6 +189,14 @@ impl MachineMonitor { core_manager.get_core_info_report(self.maching_config.core_multiplier) }; + // Write host_name to the object + { + self.host_name + .write() + .unwrap_or_else(|p| p.into_inner()) + .replace(host_state.name.clone()); + } + self.last_host_state .write() .await @@ -216,6 +236,7 @@ impl MachineMonitor { _ = interval.tick() => { self.collect_and_send_host_report().await?; self.check_reboot_flag().await; + self.check_host_state_on_server().await; #[cfg(feature = "nimby")] if let Some(nimby) = &*self.nimby { @@ -337,6 +358,29 @@ impl MachineMonitor { } } + async fn check_host_state_on_server(&self) { + let client = self.report_client.clone(); + + if let Some(slot_limit) = client + .get_host_slots_limit(self.get_host_name()) + .await + .ok() + .flatten() + { + let mut current_state = self.slot_state.write().await; + let slots_consumed = current_state + .as_ref() + .map(|s| s.slots_consumed) + .unwrap_or(0); + + // Replace limit but keep consumed count + current_state.replace(SlotState { + slot_limit, + slots_consumed, + }); + } + } + async fn monitor_running_frames(&self) -> Result<()> { let mut finished_frames: Vec> = Vec::new(); let mut running_frames: Vec<(Arc, RunningState)> = Vec::new(); @@ -499,7 +543,19 @@ impl MachineMonitor { let frame_report = frame.clone_into_running_frame_info(); info!("Sending frame complete report: {}", frame); - if let Err(err) = self.release_cores(&frame.request.resource_id()).await { + // Either release slots or cores, depending on whether it was configured with slots + if frame.request.slots_required > 0 { + if let Err(err) = self + .release_slots(frame.request.slots_required as u32) + .await + { + warn!( + "Failed to release cores reserved by {}: {}", + frame.request.resource_id(), + err + ); + }; + } else if let Err(err) = self.release_cores(&frame.request.resource_id()).await { warn!( "Failed to release cores reserved by {}: {}", frame.request.resource_id(), @@ -567,6 +623,7 @@ pub trait Machine { async fn hardware_state(&self) -> Option; async fn memory_usage(&self) -> Option<(u32, u64)>; async fn nimby_locked(&self) -> bool; + async fn is_slot_configured(&self) -> bool; /// Reserve CPU cores for a resource /// @@ -588,6 +645,40 @@ pub trait Machine { resource_id: Uuid, ) -> Result, ReservationError>; + /// Reserve slot units for a resource + /// + /// # Arguments + /// + /// * `requested_slots` - Number of slots to reserve + /// + /// # Returns + /// + /// Returns `Ok(())` if the slots were successfully reserved + /// + /// # Errors + /// + /// Returns `ReservationError` if: + /// * There are not enough available slots (`NotEnoughResourcesAvailable`) + /// * Slot reservation is not configured on this machine (`InvalidSlotReservationRequest`) + async fn reserve_slots(&self, requested_slots: u32) -> Result<(), ReservationError>; + + /// Release slot units previously reserved by a resource + /// + /// # Arguments + /// + /// * `requested_slots` - Number of slots to release + /// + /// # Returns + /// + /// Returns `Ok(())` if the slots were successfully released + /// + /// # Errors + /// + /// Returns `ReservationError` if: + /// * Attempting to release more slots than are currently consumed (`NotEnoughResourcesAvailable`) + /// * Slot reservation is not configured on this machine (`InvalidSlotReservationRequest`) + async fn release_slots(&self, requested_slots: u32) -> Result<(), ReservationError>; + /// Release CPU cores previously reserved by a resource /// /// # Arguments @@ -627,7 +718,17 @@ pub trait Machine { /// The user ID (uid) of the created or existing user async fn create_user_if_unexisting(&self, username: &str, uid: u32, gid: u32) -> Result; - async fn get_host_name(&self) -> String; + /// Returns the hostname of this machine + /// + /// The hostname is determined during the initial startup report and remains + /// constant throughout the machine's lifecycle. If the hostname hasn't been + /// initialized yet (which shouldn't happen in normal operation), returns + /// "noname" as a fallback. + /// + /// # Returns + /// + /// The machine's hostname as a String + fn get_host_name(&self) -> String; /// Send a signal to kill a process /// @@ -693,6 +794,10 @@ impl Machine for MachineMonitor { .unwrap_or(false) } + async fn is_slot_configured(&self) -> bool { + self.slot_state.read().await.as_ref().is_some() + } + async fn reserve_cores( &self, request: Either>, @@ -706,6 +811,38 @@ impl Machine for MachineMonitor { } } + async fn reserve_slots(&self, requested_slots: u32) -> Result<(), ReservationError> { + let mut slot_state = self.slot_state.write().await; + + match slot_state.as_mut() { + Some(slot_state) => { + if slot_state.slots_consumed + requested_slots < slot_state.slot_limit { + slot_state.slots_consumed += requested_slots; + Ok(()) + } else { + Err(ReservationError::NotEnoughResourcesAvailable) + } + } + None => Err(ReservationError::InvalidSlotReservationRequest), + } + } + + async fn release_slots(&self, released_slots: u32) -> Result<(), ReservationError> { + let mut slot_state = self.slot_state.write().await; + + match slot_state.as_mut() { + Some(slot_state) => { + if released_slots <= slot_state.slots_consumed { + slot_state.slots_consumed -= released_slots; + Ok(()) + } else { + Err(ReservationError::NotEnoughResourcesAvailable) + } + } + None => Err(ReservationError::InvalidSlotReservationRequest), + } + } + async fn release_cores(&self, resource_id: &Uuid) -> Result<(), ReservationError> { let mut core_manager = self.core_manager.write().await; core_manager.release_cores(resource_id).map(|_| ()) @@ -720,11 +857,11 @@ impl Machine for MachineMonitor { system.create_user_if_unexisting(username, uid, gid) } - async fn get_host_name(&self) -> String { - let lock = self.last_host_state.read().await; - - lock.as_ref() - .map(|h| h.name.clone()) + fn get_host_name(&self) -> String { + self.host_name + .read() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .clone() .unwrap_or("noname".to_string()) } diff --git a/rust/crates/rqd/src/system/manager.rs b/rust/crates/rqd/src/system/manager.rs index 4dda6f06f..b9516fb2e 100644 --- a/rust/crates/rqd/src/system/manager.rs +++ b/rust/crates/rqd/src/system/manager.rs @@ -70,6 +70,9 @@ pub enum ReservationError { #[error("Could not find core owner of this thread id")] CoreNotFoundForThread(Vec), + + #[error("Slot reservation requested when the host is configured to core based booking")] + InvalidSlotReservationRequest, } /// Represents attributes on a machine that should never change without restarting the From fb4172ad7592fddbc3c0698fbc28f2de3b4caed3 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Thu, 8 Jan 2026 16:23:46 -0800 Subject: [PATCH 15/17] spotless Apply --- .../java/com/imageworks/spcue/servant/RqdReportStatic.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/servant/RqdReportStatic.java b/cuebot/src/main/java/com/imageworks/spcue/servant/RqdReportStatic.java index 7913dee74..ef044834d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/servant/RqdReportStatic.java +++ b/cuebot/src/main/java/com/imageworks/spcue/servant/RqdReportStatic.java @@ -52,9 +52,8 @@ public void reportStatus(RqdReportStatusRequest request, public void getHostSlotsLimit(RqdReportGetHostSlotsLimitRequest request, StreamObserver responseObserver) { int slotsLimit = hostManager.getHostConcurrentSlotsLimit(request.getName()); - responseObserver.onNext(RqdReportGetHostSlotsLimitResponse.newBuilder() - .setSlotsLimit(slotsLimit) - .build()); + responseObserver.onNext( + RqdReportGetHostSlotsLimitResponse.newBuilder().setSlotsLimit(slotsLimit).build()); responseObserver.onCompleted(); } From e7bc337ee5c378f2645cf1b3ff7458af5cada7f1 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Fri, 9 Jan 2026 08:53:14 -0800 Subject: [PATCH 16/17] Add slots_required to LayerMonitorTree --- cuegui/cuegui/LayerMonitorTree.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cuegui/cuegui/LayerMonitorTree.py b/cuegui/cuegui/LayerMonitorTree.py index 1d2ec0250..d5799506d 100644 --- a/cuegui/cuegui/LayerMonitorTree.py +++ b/cuegui/cuegui/LayerMonitorTree.py @@ -150,6 +150,11 @@ def __init__(self, parent): data=lambda layer: cuegui.Utils.secondsToHHHMM(layer.data.timeout_llu*60), sort=lambda layer: layer.data.timeout_llu, tip="Timeout for a frames\' LLU, Hours:Minutes") + self.addColumn("Slots Required", 65, id=23, + data=lambda layer: "-" if layer.data.slots_required <= 0 else str(layer.data.slots_required), + sort=lambda layer: layer.data.slots_required, + tip="Number of slots required per frame\n" + "(- means not slot-based)") cuegui.AbstractTreeWidget.AbstractTreeWidget.__init__(self, parent) # pylint: disable=no-member From 9729954ff327b22b48b3df23d5ca76ad9024cd54 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Fri, 9 Jan 2026 10:22:31 -0800 Subject: [PATCH 17/17] Add slots required configuration to LayerDialog --- cuegui/cuegui/LayerDialog.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/cuegui/cuegui/LayerDialog.py b/cuegui/cuegui/LayerDialog.py index d95cb5367..10fc7545f 100644 --- a/cuegui/cuegui/LayerDialog.py +++ b/cuegui/cuegui/LayerDialog.py @@ -170,6 +170,12 @@ def __init__(self, layers, parent=None): self.__timeout_llu.setSuffix(" minutes") self.__timeout_llu.setSpecialValueText("No timeout") + # Slots Required + self.__slots_required = QtWidgets.QSpinBox(self) + self.__slots_required.setRange(0, int(self._cfg().get('max_cores', 16))) + self.__slots_required.setSingleStep(1) + self.__slots_required.setSpecialValueText("Not slot-based") + # Memory Optimizer self.__mem_opt = QtWidgets.QCheckBox() self.__mem_opt.setChecked(self.getMemoryOptSetting()) @@ -228,6 +234,7 @@ def __init__(self, layers, parent=None): self.__max_gpus.setValue(self.getMaxGpus()) self.__timeout.setValue(self.getTimeout()) self.__timeout_llu.setValue(self.getTimeoutLLU()) + self.__slots_required.setValue(self.getSlotsRequired()) QtWidgets.QVBoxLayout(self) @@ -272,6 +279,10 @@ def __init__(self, layers, parent=None): self.__timeout_llu, False), multiSelect)) + layout.addWidget(EnableableItem(LayerPropertiesItem("Slots Required:", + self.__slots_required, + False), + multiSelect)) layout.addStretch() self.__group.setLayout(layout) @@ -336,6 +347,8 @@ def apply(self): layer.setTimeout(self.__timeout.value()) if self.__timeout_llu.isEnabled(): layer.setTimeoutLLU(self.__timeout_llu.value()) + if self.__slots_required.isEnabled(): + layer.setSlotsRequired(self.__slots_required.value()) if self.__tags.isEnabled(): self.__tags.apply() if self.__limits.isEnabled(): @@ -421,6 +434,14 @@ def getMemoryOptSetting(self): break return result + def getSlotsRequired(self): + """Gets the layer slots required.""" + result = 0 + for layer in self.__layers: + if layer.data.slots_required > result: + result = layer.data.slots_required + return result + def __translateToMemSpinbox(self, value): self.__mem.spinner.setValue(float(value) / 1024.0)