Skip to content

Commit aba40cd

Browse files
committed
add a test for a NULL local work size vs. a suggested local work size
1 parent bca2272 commit aba40cd

File tree

1 file changed

+56
-0
lines changed
  • samples/benchmarks/00_apibenchmark

1 file changed

+56
-0
lines changed

samples/benchmarks/00_apibenchmark/main.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ struct OpenCLBenchmarkEnvironment
5555
ooq = cl::CommandQueue{context, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE};
5656
}
5757

58+
void TearDown(void)
59+
{
60+
ioq = NULL;
61+
ooq = NULL;
62+
}
63+
5864
cl::Platform platform;
5965
cl::Device device;
6066
cl::Context context;
@@ -208,6 +214,8 @@ struct Kernel : public benchmark::Fixture
208214
cl::Program program;
209215
cl::Kernel kernel;
210216

217+
clGetKernelSuggestedLocalWorkSizeKHR_fn clGetKernelSuggestedLocalWorkSizeKHR = nullptr;
218+
211219
virtual void SetUp(benchmark::State& state) override {
212220
queue = env.ioq;
213221

@@ -225,6 +233,9 @@ struct Kernel : public benchmark::Fixture
225233
kernel = cl::Kernel{program, "Silly"};
226234

227235
kernel.setArg(0, nullptr);
236+
237+
clGetKernelSuggestedLocalWorkSizeKHR = (clGetKernelSuggestedLocalWorkSizeKHR_fn)
238+
clGetExtensionFunctionAddressForPlatform(env.platform(), "clGetKernelSuggestedLocalWorkSizeKHR");
228239
}
229240
virtual void TearDown(benchmark::State& state) override {
230241
program = NULL;
@@ -385,6 +396,48 @@ BENCHMARK_DEFINE_F(Kernel, clEnqueueNDRangeKernel_overhead)(benchmark::State& st
385396
}
386397
BENCHMARK_REGISTER_F(Kernel, clEnqueueNDRangeKernel_overhead)->ArgsProduct({{0, 1}, {1, 32*1024*1024}});
387398

399+
BENCHMARK_DEFINE_F(Kernel, clEnqueueNDRangeKernel_LocalWorkSize)(benchmark::State& state)
400+
{
401+
const bool useLocalWorkSize = state.range(0) == 1;
402+
403+
const size_t work_dim = 1;
404+
const size_t global_work_size[work_dim] = { 256 };
405+
406+
size_t suggested_local_work_size[work_dim];
407+
if (clGetKernelSuggestedLocalWorkSizeKHR) {
408+
clGetKernelSuggestedLocalWorkSizeKHR(
409+
queue(),
410+
kernel(),
411+
work_dim,
412+
NULL,
413+
global_work_size,
414+
suggested_local_work_size);
415+
} else {
416+
suggested_local_work_size[0] = 64;
417+
}
418+
419+
const size_t* local_work_size = useLocalWorkSize ?
420+
suggested_local_work_size :
421+
NULL;
422+
for(auto _ : state) {
423+
clEnqueueNDRangeKernel(
424+
queue(),
425+
kernel(),
426+
work_dim,
427+
NULL,
428+
global_work_size,
429+
local_work_size,
430+
0,
431+
NULL,
432+
NULL );
433+
clFinish(queue());
434+
}
435+
436+
clFinish(queue());
437+
}
438+
BENCHMARK_REGISTER_F(Kernel, clEnqueueNDRangeKernel_LocalWorkSize)->Arg(0)->ArgName("NULL");
439+
BENCHMARK_REGISTER_F(Kernel, clEnqueueNDRangeKernel_LocalWorkSize)->Arg(1)->ArgName("Suggested");
440+
388441
BENCHMARK_DEFINE_F(Kernel, clSetKernelArgSVMPointer_null)(benchmark::State& state)
389442
{
390443
for(auto _ : state) {
@@ -589,10 +642,13 @@ int main(int argc, char** argv)
589642
env.ParseArgs(argc, argv);
590643

591644
::benchmark::Initialize(&argc, argv);
645+
::benchmark::ReportUnrecognizedArguments(argc, argv);
592646
//if (::benchmark::ReportUnrecognizedArguments(argc, argv)) {
593647
// return 1;
594648
//}
595649
::benchmark::RunSpecifiedBenchmarks();
596650
::benchmark::Shutdown();
651+
652+
env.TearDown();
597653
return 0;
598654
}

0 commit comments

Comments
 (0)