Skip to content

Commit 867e977

Browse files
committed
update type2 eval
1 parent a0c5b8d commit 867e977

8 files changed

Lines changed: 5673 additions & 0 deletions

File tree

qemu_integration/guest_libcuda/coherency.c

Lines changed: 585 additions & 0 deletions
Large diffs are not rendered by default.

qemu_integration/guest_libcuda/libcuda.c

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,17 @@ static int find_and_map_device(void)
280280
DLOG("Device mapped successfully, magic=0x%x version=0x%x\n",
281281
magic, reg_read32(CXL_GPU_REG_VERSION));
282282

283+
/* Check device ready */
284+
uint32_t status = reg_read32(CXL_GPU_REG_STATUS);
285+
if (!(status & CXL_GPU_STATUS_READY)) {
286+
DLOG("Device not ready, status=0x%x\n", status);
287+
munmap(map, g_bar_size);
288+
close(g_pci_fd);
289+
g_pci_fd = -1;
290+
g_regs = NULL;
291+
continue; /* try next device */
292+
}
293+
283294
closedir(dir);
284295
return 0;
285296
}
@@ -1126,6 +1137,166 @@ int cxl_p2p_get_status(int *num_peers, uint64_t *transfers_completed,
11261137
return CUDA_SUCCESS;
11271138
}
11281139

1140+
/* ========================================================================
1141+
* BAR4 Coherent Memory Support
1142+
* ======================================================================== */
1143+
1144+
static int g_bar4_fd = -1;
1145+
static volatile uint8_t *g_bar4_ptr = NULL;
1146+
static size_t g_bar4_size = 0;
1147+
static uint64_t g_coh_offset = 0; /* bump allocator offset */
1148+
1149+
static volatile uint8_t *ensure_bar4(void)
1150+
{
1151+
if (g_bar4_ptr) return g_bar4_ptr;
1152+
1153+
/* Find BAR4 for the device we already mapped */
1154+
char path[256];
1155+
/* Scan sysfs for the device whose BAR2 we have open */
1156+
DIR *dir = opendir("/sys/bus/pci/devices");
1157+
if (!dir) return NULL;
1158+
struct dirent *ent;
1159+
while ((ent = readdir(dir)) != NULL) {
1160+
if (ent->d_name[0] == '.') continue;
1161+
snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/resource", ent->d_name);
1162+
FILE *fp = fopen(path, "r");
1163+
if (!fp) continue;
1164+
uint64_t start, end, flags;
1165+
/* Skip BAR0..BAR3 (4 lines) */
1166+
for (int i = 0; i < 4; i++) {
1167+
if (fscanf(fp, "0x%lx 0x%lx 0x%lx\n", &start, &end, &flags) != 3) break;
1168+
}
1169+
/* Read BAR4 */
1170+
if (fscanf(fp, "0x%lx 0x%lx 0x%lx", &start, &end, &flags) == 3 && end > start) {
1171+
g_bar4_size = end - start + 1;
1172+
}
1173+
fclose(fp);
1174+
if (g_bar4_size == 0) continue;
1175+
1176+
/* Check vendor/device match */
1177+
snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/vendor", ent->d_name);
1178+
int fd = open(path, O_RDONLY);
1179+
if (fd < 0) continue;
1180+
char buf[32]; int n = read(fd, buf, sizeof(buf)-1); close(fd);
1181+
if (n <= 0) continue; buf[n] = '\0';
1182+
if ((uint16_t)strtol(buf, NULL, 16) != CXL_TYPE2_VENDOR_ID) { g_bar4_size = 0; continue; }
1183+
1184+
snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/device", ent->d_name);
1185+
fd = open(path, O_RDONLY);
1186+
if (fd < 0) continue;
1187+
n = read(fd, buf, sizeof(buf)-1); close(fd);
1188+
if (n <= 0) continue; buf[n] = '\0';
1189+
if ((uint16_t)strtol(buf, NULL, 16) != CXL_TYPE2_DEVICE_ID) { g_bar4_size = 0; continue; }
1190+
1191+
/* Check this is the same device we're using (status must be READY) */
1192+
snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/resource2", ent->d_name);
1193+
int bar2_fd = open(path, O_RDWR | O_SYNC);
1194+
if (bar2_fd < 0) { g_bar4_size = 0; continue; }
1195+
void *bar2_map = mmap(NULL, 4096, PROT_READ, MAP_SHARED, bar2_fd, 0);
1196+
if (bar2_map == MAP_FAILED) { close(bar2_fd); g_bar4_size = 0; continue; }
1197+
uint32_t magic = *(volatile uint32_t *)bar2_map;
1198+
uint32_t status = *(volatile uint32_t *)((uint8_t *)bar2_map + 8);
1199+
munmap(bar2_map, 4096);
1200+
close(bar2_fd);
1201+
if (magic != CXL_GPU_MAGIC || !(status & CXL_GPU_STATUS_READY)) { g_bar4_size = 0; continue; }
1202+
1203+
/* Map BAR4 */
1204+
snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/resource4", ent->d_name);
1205+
g_bar4_fd = open(path, O_RDWR);
1206+
if (g_bar4_fd < 0) { g_bar4_size = 0; continue; }
1207+
void *b4 = mmap(NULL, g_bar4_size, PROT_READ | PROT_WRITE, MAP_SHARED, g_bar4_fd, 0);
1208+
if (b4 == MAP_FAILED) { close(g_bar4_fd); g_bar4_fd = -1; g_bar4_size = 0; continue; }
1209+
g_bar4_ptr = (volatile uint8_t *)b4;
1210+
DLOG("Mapped BAR4 for %s (%zu MB)\n", ent->d_name, g_bar4_size >> 20);
1211+
closedir(dir);
1212+
return g_bar4_ptr;
1213+
}
1214+
closedir(dir);
1215+
return NULL;
1216+
}
1217+
1218+
int cxlCoherentAlloc(uint64_t size, void **host_ptr)
1219+
{
1220+
DLOG("cxlCoherentAlloc(size=%lu)\n", (unsigned long)size);
1221+
if (!host_ptr || size == 0) return 1;
1222+
volatile uint8_t *bar4 = ensure_bar4();
1223+
if (!bar4) return 3;
1224+
1225+
size = (size + 4095) & ~4095UL;
1226+
if (g_coh_offset + size > g_bar4_size) return 2;
1227+
1228+
*host_ptr = (void *)(bar4 + g_coh_offset);
1229+
g_coh_offset += size;
1230+
return 0;
1231+
}
1232+
1233+
int cxlCoherentFree(void *host_ptr)
1234+
{
1235+
(void)host_ptr;
1236+
return 0;
1237+
}
1238+
1239+
void *cxlDeviceToHost(uint64_t dev_offset)
1240+
{
1241+
volatile uint8_t *bar4 = ensure_bar4();
1242+
if (!bar4) return NULL;
1243+
return (void *)(bar4 + dev_offset);
1244+
}
1245+
1246+
int cxlCoherentFence(void)
1247+
{
1248+
__sync_synchronize();
1249+
return 0;
1250+
}
1251+
1252+
int cxlSetBias(void *host_ptr, uint64_t size, int bias_mode)
1253+
{
1254+
(void)host_ptr; (void)size; (void)bias_mode;
1255+
return 0;
1256+
}
1257+
1258+
int cxlGetBias(void *host_ptr, int *bias_mode)
1259+
{
1260+
(void)host_ptr;
1261+
if (bias_mode) *bias_mode = 0;
1262+
return 0;
1263+
}
1264+
1265+
int cxlBiasFlip(void *host_ptr, uint64_t size, int new_bias)
1266+
{
1267+
(void)host_ptr; (void)size; (void)new_bias;
1268+
return 0;
1269+
}
1270+
1271+
typedef struct {
1272+
uint64_t snoop_hits; uint64_t snoop_misses;
1273+
uint64_t coherency_requests; uint64_t back_invalidations;
1274+
uint64_t writebacks; uint64_t evictions; uint64_t bias_flips;
1275+
uint64_t device_bias_hits; uint64_t host_bias_hits;
1276+
uint64_t upgrades; uint64_t downgrades; uint64_t directory_entries;
1277+
} CXLCoherencyStats;
1278+
1279+
int cxlGetCoherencyStats(CXLCoherencyStats *stats)
1280+
{
1281+
if (!stats) return 1;
1282+
memset(stats, 0, sizeof(*stats));
1283+
return 0;
1284+
}
1285+
1286+
int cxlResetCoherencyStats(void)
1287+
{
1288+
return 0;
1289+
}
1290+
1291+
CUresult cuCxlGetCoherentBase(CUdeviceptr *base, size_t *size, CUdevice dev)
1292+
{
1293+
(void)dev;
1294+
volatile uint8_t *bar4 = ensure_bar4();
1295+
if (base) *base = bar4 ? (CUdeviceptr)(uintptr_t)bar4 : 0;
1296+
if (size) *size = g_bar4_size;
1297+
return 0;
1298+
}
1299+
11291300
/* Library initialization/cleanup */
11301301
__attribute__((constructor))
11311302
static void libcuda_init(void)
@@ -1137,6 +1308,14 @@ __attribute__((destructor))
11371308
static void libcuda_cleanup(void)
11381309
{
11391310
DLOG("libcuda.so unloading\n");
1311+
if (g_bar4_ptr) {
1312+
munmap((void *)g_bar4_ptr, g_bar4_size);
1313+
g_bar4_ptr = NULL;
1314+
}
1315+
if (g_bar4_fd >= 0) {
1316+
close(g_bar4_fd);
1317+
g_bar4_fd = -1;
1318+
}
11401319
if (g_regs) {
11411320
munmap((void *)g_regs, g_bar_size);
11421321
g_regs = NULL;

0 commit comments

Comments
 (0)