-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_gpu.py
More file actions
60 lines (49 loc) · 1.75 KB
/
test_gpu.py
File metadata and controls
60 lines (49 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import torch
import torchvision
from torch import nn
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
# Define the MNIST classification network
class MNISTNet(nn.Module):
def __init__(self):
super(MNISTNet, self).__init__()
self.fc1 = nn.Linear(28 * 28, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = x.view(-1, 28 * 28)
x = self.fc1(x)
x = self.fc2(x)
return x
# Get the number of GPUs in the system
num_gpus = torch.cuda.device_count()
# Load the MNIST dataset
mnist = MNIST(
'.', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
])
)
# Create a data loader for the MNIST dataset
data_loader = DataLoader(mnist, batch_size=128, shuffle=True)
# Iterate over the GPUs and test each one
for i in range(num_gpus):
try:
# Test GPU $i by training an MNIST classification network
device = torch.device(f"cuda:{i}")
# Move the MNIST dataset and the model to GPU $i
model = MNISTNet().to(device)
data_loader = DataLoader(mnist, batch_size=128, shuffle=True, pin_memory=True)
# Train the MNIST classification network
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
for epoch in range(10):
for data, target in data_loader:
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
except:
# GPU $i is not working
print(f"GPU {i} is not working")