Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ sandbox/kafka-data
sandbox/zookeeper-data
sandbox/zookeeper-logs
sandbox/rqd/shots/
sandbox/pgadmin-data
docs/_data/version.yml
target/*

Expand Down
1 change: 1 addition & 0 deletions cuebot/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ compileTestJava {
options.compilerArgs << "-Xlint:all,-serial" << "-Werror"
}


protobuf {
protoc {
// The protoc compiler
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public class DispatchFrame extends FrameEntity implements FrameInterface {
public int minGpus;
public int maxGpus;
public long minGpuMemory;
public int slotsRequired;

// A comma separated list of services
public String services;
Expand Down
2 changes: 2 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/HostEntity.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class HostEntity extends Entity implements HostInterface {
public int idleGpus;
public long gpuMemory;
public long idleGpuMemory;
public int concurrentSlotsLimit;

public boolean unlockAtBoot;

Expand All @@ -61,6 +62,7 @@ public HostEntity(Host grpcHost) {
this.idleGpus = (int) grpcHost.getIdleGpus();
this.gpuMemory = grpcHost.getGpuMemory();
this.idleGpuMemory = grpcHost.getIdleGpuMemory();
this.concurrentSlotsLimit = grpcHost.getConcurrentSlotsLimit();
}

public String getHostId() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class LayerDetail extends LayerEntity implements LayerInterface {
public int timeout_llu;
public int dispatchOrder;
public int totalFrameCount;
public int slotsRequired;

public Set<String> tags = new LinkedHashSet<String>();
public Set<String> services = new LinkedHashSet<String>();
Expand Down
4 changes: 4 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ public class VirtualProc extends FrameEntity implements ProcInterface {
public long gpuMemoryUsed;
public long gpuMemoryMax;

public int slotsRequired;

public boolean unbooked;
public boolean usageRecorded = false;
public boolean isLocalDispatch = false;
Expand Down Expand Up @@ -101,6 +103,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame,
proc.memoryReserved = frame.getMinMemory();
proc.gpusReserved = frame.minGpus;
proc.gpuMemoryReserved = frame.minGpuMemory;
proc.slotsRequired = frame.slotsRequired;

/*
* Frames that are announcing cores less than 100 are not multi-threaded so there is no
Expand Down Expand Up @@ -237,6 +240,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame,
proc.memoryReserved = frame.getMinMemory();
proc.gpusReserved = frame.minGpus;
proc.gpuMemoryReserved = frame.minGpuMemory;
proc.slotsRequired = frame.slotsRequired;

int wholeCores = (int) (Math.floor(host.idleCores / 100.0));
if (wholeCores == 0) {
Expand Down
18 changes: 17 additions & 1 deletion cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,22 @@ public interface HostDao {
*/
void updateThreadMode(HostInterface host, ThreadMode mode);

/**
* Update the host's concurrent procs limit.
*
* @param host HostInterface
* @param limit int (0 for no limit)
*/
void updateConcurrentSlotsLimit(HostInterface host, int limit);

/**
* Get the host's concurrent slots limit by hostname.
*
* @param hostname String
* @return int the concurrent slots limit
*/
int getHostConcurrentSlotsLimit(String hostname);

/**
* Update the specified host's hardware information.
*
Expand All @@ -260,7 +276,7 @@ public interface HostDao {
*/
void updateHostStats(HostInterface host, long totalMemory, long freeMemory, long totalSwap,
long freeSwap, long totalMcp, long freeMcp, long totalGpuMemory, long freeGpuMemory,
int load, Timestamp bootTime, String os);
int load, Timestamp bootTime, String os, int runningProcs);

/**
* Return true if the HardwareState is Up, false if it is anything else.
Expand Down
8 changes: 8 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,14 @@ public interface LayerDao {
*/
void updateTimeoutLLU(LayerInterface layer, int timeout_llu);

/**
* Updates the slots required for a layer.
*
* @param layer the layer to update
* @param slots the number of slots required (<0 means the host is not slot-based)
*/
void updateLayerSlotsRequired(LayerInterface layer, int slots);

/**
* Lowers the minimum memory on a layer if the layer is using less memory and the currnet min
* memory is the dispatcher default.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ private static final String replaceQueryForFifo(String query) {
"int_gpus_min, " +
"int_gpus_max, " +
"int_gpu_mem_min, " +
"int_slots_required, " +
"str_cmd, " +
"str_range, " +
"int_chunk_size, " +
Expand Down Expand Up @@ -588,6 +589,7 @@ private static final String replaceQueryForFifo(String query) {
"layer.int_gpus_min, " +
"layer.int_gpus_max, " +
"layer.int_gpu_mem_min, " +
"layer.int_slots_required, " +
"layer.str_cmd, " +
"layer.str_range, " +
"layer.int_chunk_size, " +
Expand Down Expand Up @@ -676,6 +678,7 @@ private static final String replaceQueryForFifo(String query) {
"layer.b_threadable, " +
"layer.int_mem_min, " +
"layer.int_gpu_mem_min, " +
"layer.int_slots_required, " +
"layer.str_cmd, " +
"layer.str_range, " +
"layer.int_chunk_size, " +
Expand Down Expand Up @@ -765,6 +768,7 @@ private static final String replaceQueryForFifo(String query) {
"layer.int_gpus_min, " +
"layer.int_gpus_max, " +
"layer.int_gpu_mem_min, " +
"layer.int_slots_required, " +
"layer.str_cmd, " +
"layer.str_range, " +
"layer.int_chunk_size, " +
Expand Down Expand Up @@ -847,6 +851,7 @@ private static final String replaceQueryForFifo(String query) {
"layer.int_gpus_min, " +
"layer.int_gpus_max, " +
"layer.int_gpu_mem_min, " +
"layer.int_slots_required, " +
"layer.str_cmd, " +
"layer.str_range, " +
"layer.int_chunk_size, " +
Expand Down Expand Up @@ -932,6 +937,7 @@ private static final String replaceQueryForFifo(String query) {
"layer.int_gpus_min, " +
"layer.int_gpus_max, " +
"layer.int_gpu_mem_min, " +
"layer.int_slots_required, " +
"layer.str_cmd, " +
"layer.str_range, " +
"layer.int_chunk_size, " +
Expand Down Expand Up @@ -1020,6 +1026,7 @@ private static final String replaceQueryForFifo(String query) {
"layer.int_gpus_min, " +
"layer.int_gpus_max, " +
"layer.int_gpu_mem_min, " +
"layer.int_slots_required, " +
"layer.str_cmd, " +
"layer.str_range, " +
"layer.int_chunk_size, " +
Expand Down Expand Up @@ -1108,6 +1115,7 @@ private static final String replaceQueryForFifo(String query) {
"layer.int_gpus_min, " +
"layer.int_gpus_max, " +
"layer.int_gpu_mem_min, " +
"layer.int_slots_required, " +
"layer.int_cores_max, " +
"layer.str_cmd, " +
"layer.str_range, " +
Expand Down Expand Up @@ -1191,6 +1199,7 @@ private static final String replaceQueryForFifo(String query) {
"layer.int_gpus_min, " +
"layer.int_gpus_max, " +
"layer.int_gpu_mem_min, " +
"layer.int_slots_required, " +
"layer.str_cmd, " +
"layer.str_range, " +
"layer.int_chunk_size, " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ public DispatchFrame mapRow(ResultSet rs, int rowNum) throws SQLException {
frame.minGpus = rs.getInt("int_gpus_min");
frame.maxGpus = rs.getInt("int_gpus_max");
frame.minGpuMemory = rs.getLong("int_gpu_mem_min");
frame.slotsRequired = rs.getInt("int_slots_required");
frame.version = rs.getInt("int_version");
frame.services = rs.getString("str_services");
frame.os = rs.getString("str_os");
Expand All @@ -252,8 +253,8 @@ public DispatchFrame mapRow(ResultSet rs, int rowNum) throws SQLException {
+ "layer.str_type AS layer_type, " + "layer.str_cmd, " + "layer.int_cores_min,"
+ "layer.int_cores_max," + "layer.b_threadable," + "layer.int_mem_min, "
+ "layer.int_gpus_min," + "layer.int_gpus_max," + "layer.int_gpu_mem_min, "
+ "layer.str_range, " + "layer.int_chunk_size, " + "layer.str_services " + "FROM "
+ "layer, " + "job, " + "show, "
+ "layer.int_slots_required, " + "layer.str_range, " + "layer.int_chunk_size, "
+ "layer.str_services " + "FROM " + "layer, " + "job, " + "show, "
+ "frame LEFT JOIN proc ON (proc.pk_frame = frame.pk_frame) " + "WHERE "
+ "job.pk_show = show.pk_show " + "AND " + "frame.pk_job = job.pk_job " + "AND "
+ "frame.pk_layer = layer.pk_layer " + "AND " + "frame.pk_frame = ?";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ public HostEntity mapRow(ResultSet rs, int rowNum) throws SQLException {
host.idleGpus = rs.getInt("int_gpus_idle");
host.gpuMemory = rs.getLong("int_gpu_mem");
host.idleGpuMemory = rs.getLong("int_gpu_mem_idle");
host.concurrentSlotsLimit = rs.getInt("int_concurrent_slots_limit");
host.dateBooted = rs.getDate("ts_booted");
host.dateCreated = rs.getDate("ts_created");
host.datePinged = rs.getDate("ts_ping");
Expand Down Expand Up @@ -131,6 +132,7 @@ public String getFacilityId() {
+ " host.int_gpus_idle, "
+ " host.int_gpu_mem, "
+ " host.int_gpu_mem_idle, "
+ " host.int_concurrent_slots_limit, "
+ " host.ts_created, "
+ " host.str_name, "
+ " host_stat.str_state, "
Expand Down Expand Up @@ -395,22 +397,23 @@ public CallableStatement createCallableStatement(Connection con) throws SQLExcep
+ " int_load = ?, "
+ " ts_booted = ?, "
+ " ts_ping = current_timestamp, "
+ " str_os = ? "
+ " str_os = ?, "
+ " int_running_procs = ? "
+ "WHERE "
+ " pk_host = ?";

@Override
public void updateHostStats(HostInterface host, long totalMemory, long freeMemory,
long totalSwap, long freeSwap, long totalMcp, long freeMcp, long totalGpuMemory,
long freeGpuMemory, int load, Timestamp bootTime, String os) {
long freeGpuMemory, int load, Timestamp bootTime, String os, int runningProcs) {

if (os == null) {
os = Dispatcher.OS_DEFAULT;
}

getJdbcTemplate().update(UPDATE_RENDER_HOST, totalMemory, freeMemory, totalSwap, freeSwap,
totalMcp, freeMcp, totalGpuMemory, freeGpuMemory, load, bootTime, os,
host.getHostId());
runningProcs, host.getHostId());
}

@Override
Expand Down Expand Up @@ -562,6 +565,23 @@ public void updateThreadMode(HostInterface host, ThreadMode mode) {
mode.getNumber(), host.getHostId());
}

@Override
public void updateConcurrentSlotsLimit(HostInterface host, int limit) {
getJdbcTemplate().update("UPDATE host SET int_concurrent_slots_limit=? WHERE pk_host=?",
limit, host.getHostId());
}

@Override
public int getHostConcurrentSlotsLimit(String hostname) {
try {
return getJdbcTemplate().queryForObject(
"SELECT int_concurrent_slots_limit FROM host WHERE str_name = ?",
Integer.class, hostname);
} catch (EmptyResultDataAccessException e) {
return 0;
}
}

@Override
public void updateHostOs(HostInterface host, String os) {
getJdbcTemplate().update("UPDATE host_stat SET str_os=? WHERE pk_host=?", os,
Expand Down Expand Up @@ -631,7 +651,7 @@ public boolean isNimbyHost(HostInterface h) {
/**
* Checks if the passed in name looks like a fully qualified domain name. If so, returns the
* hostname without the domain. Otherwise returns the passed in name unchanged.
*
*
* @param fqdn - String
* @return String - hostname
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ public LayerDetail mapRow(ResultSet rs, int rowNum) throws SQLException {
layer.services.addAll(Lists.newArrayList(rs.getString("str_services").split(",")));
layer.timeout = rs.getInt("int_timeout");
layer.timeout_llu = rs.getInt("int_timeout_llu");
layer.slotsRequired = rs.getInt("int_slots_required");
return layer;
}
};
Expand Down Expand Up @@ -241,7 +242,8 @@ public LayerInterface getLayer(String id) {
+ "int_dispatch_order, " + "str_tags, " + "str_type," + "int_cores_min, "
+ "int_cores_max, " + "b_threadable, " + "int_mem_min, " + "int_gpus_min, "
+ "int_gpus_max, " + "int_gpu_mem_min, " + "str_services, " + "int_timeout,"
+ "int_timeout_llu " + ") " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
+ "int_timeout_llu, " + "int_slots_required " + ") "
+ "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";

@Override
public void insertLayerDetail(LayerDetail l) {
Expand All @@ -250,7 +252,7 @@ public void insertLayerDetail(LayerDetail l) {
l.chunkSize, l.dispatchOrder, StringUtils.join(l.tags, " | "), l.type.toString(),
l.minimumCores, l.maximumCores, l.isThreadable, l.minimumMemory, l.minimumGpus,
l.maximumGpus, l.minimumGpuMemory, StringUtils.join(l.services, ","), l.timeout,
l.timeout_llu);
l.timeout_llu, l.slotsRequired);
}

@Override
Expand Down Expand Up @@ -555,6 +557,12 @@ public void updateTimeoutLLU(LayerInterface layer, int timeout_llu) {
layer.getLayerId());
}

@Override
public void updateLayerSlotsRequired(LayerInterface layer, int slots) {
getJdbcTemplate().update("UPDATE layer SET int_slots_required=? WHERE pk_layer=?", slots,
layer.getLayerId());
}

@Override
public void enableMemoryOptimizer(LayerInterface layer, boolean value) {
getJdbcTemplate().update("UPDATE layer SET b_optimize=? WHERE pk_layer=?", value,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,7 @@ public static NestedHost.Builder mapNestedHostBuilder(ResultSet rs) throws SQLEx
.setLockState(LockState.valueOf(SqlUtil.getString(rs, "str_lock_state")))
.setHasComment(rs.getBoolean("b_comment"))
.setThreadMode(ThreadMode.values()[rs.getInt("int_thread_mode")])
.setConcurrentSlotsLimit(rs.getInt("int_concurrent_slots_limit"))
.setOs(SqlUtil.getString(rs, "str_os"));

String tags = SqlUtil.getString(rs, "str_tags");
Expand Down Expand Up @@ -998,6 +999,7 @@ public static Host.Builder mapHostBuilder(ResultSet rs) throws SQLException {
builder.setLockState(LockState.valueOf(SqlUtil.getString(rs, "str_lock_state")));
builder.setHasComment(rs.getBoolean("b_comment"));
builder.setThreadMode(ThreadMode.values()[rs.getInt("int_thread_mode")]);
builder.setConcurrentSlotsLimit(rs.getInt("int_concurrent_slots_limit"));
builder.setOs(SqlUtil.getString(rs, "str_os"));

String tags = SqlUtil.getString(rs, "str_tags");
Expand Down Expand Up @@ -1183,7 +1185,8 @@ public Layer mapRow(ResultSet rs, int rowNum) throws SQLException {
Arrays.asList(SqlUtil.getString(rs, "str_limit_names").split(",")))
.setMemoryOptimizerEnabled(rs.getBoolean("b_optimize"))
.setTimeout(rs.getInt("int_timeout"))
.setTimeoutLlu(rs.getInt("int_timeout_llu"));
.setTimeoutLlu(rs.getInt("int_timeout_llu"))
.setSlotsRequired(rs.getInt("int_slots_required"));

LayerStats.Builder statsBuilder = LayerStats.newBuilder()
.setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores")))
Expand Down Expand Up @@ -1710,9 +1713,9 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException {
+ "host.int_cores_idle," + "host.int_mem," + "host.int_mem_idle," + "host.int_gpus,"
+ "host.int_gpus_idle," + "host.int_gpu_mem," + "host.int_gpu_mem_idle,"
+ "host.str_tags," + "host.str_lock_state," + "host.b_comment,"
+ "host.int_thread_mode," + "host_stat.str_os," + "host_stat.int_mem_total,"
+ "host_stat.int_mem_free," + "host_stat.int_swap_total," + "host_stat.int_swap_free,"
+ "host_stat.int_mcp_total," + "host_stat.int_mcp_free,"
+ "host.int_thread_mode," + "host.int_concurrent_slots_limit," + "host_stat.str_os,"
+ "host_stat.int_mem_total," + "host_stat.int_mem_free," + "host_stat.int_swap_total,"
+ "host_stat.int_swap_free," + "host_stat.int_mcp_total," + "host_stat.int_mcp_free,"
+ "host_stat.int_gpu_mem_total," + "host_stat.int_gpu_mem_free,"
+ "host_stat.int_load, " + "alloc.str_name AS alloc_name " + "FROM " + "alloc,"
+ "facility, " + "host_stat," + "host " + "WHERE " + "host.pk_alloc = alloc.pk_alloc "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ public RunFrame prepareRqdRunFrame(VirtualProc proc, DispatchFrame frame) {
.setNumCores(proc.coresReserved).setNumGpus(proc.gpusReserved)
.setStartTime(System.currentTimeMillis()).setIgnoreNimby(proc.isLocalDispatch)
.setOs(proc.os).setSoftMemoryLimit(frame.softMemoryLimit).setLokiUrl(frame.lokiURL)
.setHardMemoryLimit(frame.hardMemoryLimit)
.setHardMemoryLimit(frame.hardMemoryLimit).setSlotsRequired(proc.slotsRequired)
.putAllEnvironment(jobDao.getEnvironment(frame))
.putAllEnvironment(layerDao.getLayerEnvironment(frame)).putEnvironment("CUE3", "1")
.putEnvironment("CUE_THREADS", String.valueOf(threads))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ public void handleHostReport(HostReport report, boolean isBoot) {
rhost.getTotalSwap(), rhost.getFreeSwap(), rhost.getTotalMcp(),
rhost.getFreeMcp(), rhost.getTotalGpuMem(), rhost.getFreeGpuMem(),
rhost.getLoad(), new Timestamp(rhost.getBootTime() * 1000l),
rhost.getAttributesMap().get("SP_OS"));
rhost.getAttributesMap().get("SP_OS"), report.getFramesCount());

// Both logics are conflicting, only change hardware state if
// there was no need for a tempDirStorage state change
Expand Down
Loading
Loading