Skip to content

Commit b6d572d

Browse files
fix: store image implicit args in bindless slot 1
bindless slot 0: image bindless slot 1: image implicit args bindless slot 2: redescribed image Redescribed image moved one slot higher. Separate allocation is created for new bindless slot. Related-To: NEO-9740 Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
1 parent 0b85a9f commit b6d572d

File tree

15 files changed

+417
-28
lines changed

15 files changed

+417
-28
lines changed

level_zero/core/source/context/context_imp.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,18 +513,26 @@ ze_result_t ContextImp::evictMemory(ze_device_handle_t hDevice, void *ptr, size_
513513

514514
ze_result_t ContextImp::makeImageResident(ze_device_handle_t hDevice, ze_image_handle_t hImage) {
515515
auto alloc = Image::fromHandle(hImage)->getAllocation();
516+
auto implicitArgsAlloc = Image::fromHandle(hImage)->getImplicitArgsAllocation();
516517

517518
NEO::Device *neoDevice = L0::Device::fromHandle(hDevice)->getNEODevice();
518519
NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get();
519520
auto success = memoryOperationsIface->makeResident(neoDevice, ArrayRef<NEO::GraphicsAllocation *>(&alloc, 1));
521+
if (implicitArgsAlloc) {
522+
memoryOperationsIface->makeResident(neoDevice, ArrayRef<NEO::GraphicsAllocation *>(&implicitArgsAlloc, 1));
523+
}
520524
return changeMemoryOperationStatusToL0ResultType(success);
521525
}
522526
ze_result_t ContextImp::evictImage(ze_device_handle_t hDevice, ze_image_handle_t hImage) {
523527
auto alloc = Image::fromHandle(hImage)->getAllocation();
528+
auto implicitArgsAlloc = Image::fromHandle(hImage)->getImplicitArgsAllocation();
524529

525530
NEO::Device *neoDevice = L0::Device::fromHandle(hDevice)->getNEODevice();
526531
NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get();
527532
auto success = memoryOperationsIface->evict(neoDevice, *alloc);
533+
if (implicitArgsAlloc) {
534+
memoryOperationsIface->evict(neoDevice, *implicitArgsAlloc);
535+
}
528536
return changeMemoryOperationStatusToL0ResultType(success);
529537
}
530538

level_zero/core/source/image/image.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,12 @@ struct Image : _ze_image_handle_t {
3636
virtual ze_result_t createView(Device *device, const ze_image_desc_t *desc, ze_image_handle_t *pImage) = 0;
3737

3838
virtual NEO::GraphicsAllocation *getAllocation() = 0;
39+
virtual NEO::GraphicsAllocation *getImplicitArgsAllocation() = 0;
3940
virtual void copySurfaceStateToSSH(void *surfaceStateHeap,
4041
const uint32_t surfaceStateOffset,
4142
bool isMediaBlockArg) = 0;
4243
virtual void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) = 0;
44+
virtual void copyImplicitArgsSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) = 0;
4345
virtual NEO::ImageInfo getImageInfo() = 0;
4446
virtual ze_image_desc_t getImageDesc() = 0;
4547
virtual ze_result_t getMemoryProperties(ze_image_memory_properties_exp_t *pMemoryProperties) = 0;

level_zero/core/source/image/image_hw.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2022 Intel Corporation
2+
* Copyright (C) 2020-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -22,6 +22,7 @@ struct ImageCoreFamily : public ImageImp {
2222
ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override;
2323
void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) override;
2424
void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override;
25+
void copyImplicitArgsSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override;
2526
bool isMediaFormat(const ze_image_format_layout_t layout) {
2627
if (layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_NV12 ||
2728
layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_P010 ||
@@ -49,6 +50,7 @@ struct ImageCoreFamily : public ImageImp {
4950
bool isSuitableForCompression(const StructuresLookupTable &structuresLookupTable, const NEO::ImageInfo &imgInfo);
5051

5152
RENDER_SURFACE_STATE surfaceState;
53+
RENDER_SURFACE_STATE implicitArgsSurfaceState;
5254
RENDER_SURFACE_STATE redescribedSurfaceState;
5355
};
5456

level_zero/core/source/image/image_hw.inl

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,33 @@
1111
#include "shared/source/device/device.h"
1212
#include "shared/source/execution_environment/root_device_environment.h"
1313
#include "shared/source/gmm_helper/gmm.h"
14+
#include "shared/source/helpers/api_specific_config.h"
1415
#include "shared/source/helpers/basic_math.h"
16+
#include "shared/source/helpers/gfx_core_helper.h"
1517
#include "shared/source/helpers/surface_format_info.h"
1618
#include "shared/source/image/image_surface_state.h"
1719
#include "shared/source/memory_manager/allocation_properties.h"
1820
#include "shared/source/memory_manager/memory_manager.h"
21+
#include "shared/source/release_helper/release_helper.h"
1922

2023
#include "level_zero/core/source/device/device.h"
2124
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
2225
#include "level_zero/core/source/helpers/properties_parser.h"
26+
#include "level_zero/core/source/image/image_format_desc_helper.h"
2327
#include "level_zero/core/source/image/image_formats.h"
2428
#include "level_zero/core/source/image/image_hw.h"
2529

30+
#include "encode_surface_state_args.h"
31+
2632
namespace L0 {
2733

2834
template <GFXCORE_FAMILY gfxCoreFamily>
2935
ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_image_desc_t *desc) {
3036
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
3137

38+
const auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
39+
const bool isBindlessMode = rootDeviceEnvironment.getReleaseHelper() ? NEO::ApiSpecificConfig::getBindlessMode(rootDeviceEnvironment.getReleaseHelper()) : false;
40+
3241
StructuresLookupTable lookupTable = {};
3342

3443
lookupTable.areImageProperties = true;
@@ -104,8 +113,13 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
104113
}
105114
}
106115

116+
if (isBindlessMode) {
117+
NEO::AllocationProperties imgImplicitArgsAllocProperties(device->getRootDeviceIndex(), NEO::ImageImplicitArgs::getSize(), NEO::AllocationType::buffer, device->getNEODevice()->getDeviceBitfield());
118+
implicitArgsAllocation = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(imgImplicitArgsAllocProperties);
119+
}
120+
107121
auto gmm = this->allocation->getDefaultGmm();
108-
auto gmmHelper = static_cast<const NEO::RootDeviceEnvironment &>(device->getNEODevice()->getRootDeviceEnvironment()).getGmmHelper();
122+
auto gmmHelper = static_cast<const NEO::RootDeviceEnvironment &>(rootDeviceEnvironment).getGmmHelper();
109123

110124
if (gmm != nullptr) {
111125
NEO::ImagePlane yuvPlaneType = NEO::ImagePlane::noPlane;
@@ -157,6 +171,53 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
157171
NEO::EncodeSurfaceState<GfxFamily>::setImageAuxParamsForCCS(&surfaceState, gmm);
158172
}
159173
}
174+
175+
if (isBindlessMode && implicitArgsAllocation) {
176+
implicitArgsSurfaceState = GfxFamily::cmdInitRenderSurfaceState;
177+
178+
auto clChannelType = getClChannelDataType(imageFormatDesc.format);
179+
auto clChannelOrder = getClChannelOrder(imageFormatDesc.format);
180+
181+
NEO::ImageImplicitArgs imageImplicitArgs{};
182+
imageImplicitArgs.structVersion = 0;
183+
184+
imageImplicitArgs.imageWidth = imgInfo.imgDesc.imageWidth;
185+
imageImplicitArgs.imageHeight = imgInfo.imgDesc.imageHeight;
186+
imageImplicitArgs.imageDepth = imgInfo.imgDesc.imageDepth;
187+
imageImplicitArgs.imageArraySize = imgInfo.imgDesc.imageArraySize;
188+
imageImplicitArgs.numSamples = imgInfo.imgDesc.numSamples;
189+
imageImplicitArgs.channelType = clChannelType;
190+
imageImplicitArgs.channelOrder = clChannelOrder;
191+
imageImplicitArgs.numMipLevels = imgInfo.imgDesc.numMipLevels;
192+
193+
auto pixelSize = imgInfo.surfaceFormat->imageElementSizeInBytes;
194+
imageImplicitArgs.flatBaseOffset = implicitArgsAllocation->getGpuAddress();
195+
imageImplicitArgs.flatWidth = (imgInfo.imgDesc.imageWidth * pixelSize) - 1u;
196+
imageImplicitArgs.flagHeight = (imgInfo.imgDesc.imageHeight * pixelSize) - 1u;
197+
imageImplicitArgs.flatPitch = imgInfo.imgDesc.imageRowPitch - 1u;
198+
199+
const auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
200+
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *implicitArgsAllocation), *this->device->getNEODevice(), implicitArgsAllocation, 0u, &imageImplicitArgs, NEO::ImageImplicitArgs::getSize());
201+
202+
{
203+
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
204+
205+
NEO::EncodeSurfaceStateArgs args;
206+
args.outMemory = &implicitArgsSurfaceState;
207+
args.size = NEO::ImageImplicitArgs::getSize();
208+
args.graphicsAddress = implicitArgsAllocation->getGpuAddress();
209+
args.gmmHelper = gmmHelper;
210+
args.allocation = implicitArgsAllocation;
211+
args.numAvailableDevices = this->device->getNEODevice()->getNumGenericSubDevices();
212+
args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
213+
args.mocs = gfxCoreHelper.getMocsIndex(*args.gmmHelper, true, false) << 1;
214+
args.implicitScaling = this->device->isImplicitScalingCapable();
215+
args.isDebuggerActive = this->device->getNEODevice()->getDebugger() != nullptr;
216+
217+
gfxCoreHelper.encodeBufferSurfaceState(args);
218+
}
219+
}
220+
160221
{
161222
const uint32_t exponent = Math::log2(imgInfo.surfaceFormat->imageElementSizeInBytes);
162223
DEBUG_BREAK_IF(exponent >= 5u);
@@ -232,6 +293,18 @@ void ImageCoreFamily<gfxCoreFamily>::copyRedescribedSurfaceStateToSSH(void *surf
232293
&redescribedSurfaceState, sizeof(RENDER_SURFACE_STATE));
233294
}
234295

296+
template <GFXCORE_FAMILY gfxCoreFamily>
297+
void ImageCoreFamily<gfxCoreFamily>::copyImplicitArgsSurfaceStateToSSH(void *surfaceStateHeap,
298+
const uint32_t surfaceStateOffset) {
299+
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
300+
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
301+
302+
// Copy the image's surface state into position in the provided surface state heap
303+
auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset);
304+
memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE),
305+
&implicitArgsSurfaceState, sizeof(RENDER_SURFACE_STATE));
306+
}
307+
235308
template <GFXCORE_FAMILY gfxCoreFamily>
236309
bool ImageCoreFamily<gfxCoreFamily>::isSuitableForCompression(const StructuresLookupTable &structuresLookupTable, const NEO::ImageInfo &imgInfo) {
237310
auto &hwInfo = device->getHwInfo();

level_zero/core/source/image/image_imp.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,13 @@ ImageImp::~ImageImp() {
3131
this->device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[this->allocation->getRootDeviceIndex()]->getBindlessHeapsHelper()->releaseSSToReusePool(*bindlessInfo);
3232
}
3333
}
34-
if (!isImageView() && this->device != nullptr) {
35-
this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->allocation);
34+
if (this->device != nullptr) {
35+
if (!isImageView()) {
36+
this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->allocation);
37+
}
38+
if (implicitArgsAllocation) {
39+
this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->implicitArgsAllocation);
40+
}
3641
}
3742
}
3843

@@ -100,7 +105,7 @@ ze_result_t ImageImp::allocateBindlessSlot() {
100105

101106
if (bindlessHelper && !bindlessInfo) {
102107
auto &gfxCoreHelper = this->device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[allocation->getRootDeviceIndex()]->getHelper<NEO::GfxCoreHelper>();
103-
const auto surfStateCount = 2;
108+
const auto surfStateCount = 3;
104109
auto surfaceStateSize = surfStateCount * gfxCoreHelper.getRenderSurfaceStateSize();
105110

106111
auto surfaceStateInfo = bindlessHelper->allocateSSInHeap(surfaceStateSize, allocation, NEO::BindlessHeapsHelper::globalSsh);

level_zero/core/source/image/image_imp.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include "shared/source/helpers/non_copyable_or_moveable.h"
1111
#include "shared/source/helpers/surface_format_info.h"
12+
#include "shared/source/memory_manager/graphics_allocation.h"
1213

1314
#include "level_zero/core/source/image/image.h"
1415

@@ -26,6 +27,7 @@ struct ImageImp : public Image, NEO::NonCopyableOrMovableClass {
2627
~ImageImp() override;
2728

2829
NEO::GraphicsAllocation *getAllocation() override { return allocation; }
30+
NEO::GraphicsAllocation *getImplicitArgsAllocation() override { return implicitArgsAllocation; }
2931
NEO::ImageInfo getImageInfo() override { return imgInfo; }
3032
ze_image_desc_t getImageDesc() override {
3133
return imageFormatDesc;
@@ -52,6 +54,7 @@ struct ImageImp : public Image, NEO::NonCopyableOrMovableClass {
5254
Device *device = nullptr;
5355
NEO::ImageInfo imgInfo = {};
5456
NEO::GraphicsAllocation *allocation = nullptr;
57+
NEO::GraphicsAllocation *implicitArgsAllocation = nullptr;
5558
ze_image_desc_t imageFormatDesc = {};
5659
std::optional<ze_image_desc_t> sourceImageFormatDesc = {};
5760
std::unique_ptr<NEO::SurfaceStateInHeapInfo> bindlessInfo;

level_zero/core/source/kernel/kernel_imp.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -571,12 +571,12 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
571571

572572
auto ssInHeap = image->getBindlessSlot();
573573
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
574-
// redescribed image's surface state is after image's state
575-
auto bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize;
574+
// redescribed image's surface state is after image's implicit args
575+
auto bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * 2;
576576
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
577577
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
578578

579-
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize), 0u);
579+
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize * 2), 0u);
580580
isBindlessOffsetSet[argIndex] = true;
581581
this->residencyContainer.push_back(ssInHeap->heapAllocation);
582582
} else {
@@ -589,6 +589,10 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
589589
}
590590
residencyContainer[argIndex] = image->getAllocation();
591591

592+
if (image->getImplicitArgsAllocation()) {
593+
this->residencyContainer.push_back(image->getImplicitArgsAllocation());
594+
}
595+
592596
return ZE_RESULT_SUCCESS;
593597
}
594598

@@ -778,6 +782,8 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
778782
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
779783

780784
image->copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, isMediaBlockImage);
785+
image->copyImplicitArgsSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize), 0u);
786+
781787
isBindlessOffsetSet[argIndex] = true;
782788
this->residencyContainer.push_back(ssInHeap->heapAllocation);
783789
} else {
@@ -791,6 +797,10 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
791797

792798
residencyContainer[argIndex] = image->getAllocation();
793799

800+
if (image->getImplicitArgsAllocation()) {
801+
this->residencyContainer.push_back(image->getImplicitArgsAllocation());
802+
}
803+
794804
auto imageInfo = image->getImageInfo();
795805
auto clChannelType = getClChannelDataType(image->getImageDesc().format);
796806
auto clChannelOrder = getClChannelOrder(image->getImageDesc().format);

level_zero/core/test/unit_tests/mocks/mock_image.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2022 Intel Corporation
2+
* Copyright (C) 2022-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -25,6 +25,7 @@ struct WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>
2525

2626
~WhiteBox() override {}
2727

28+
using BaseClass::implicitArgsSurfaceState;
2829
using BaseClass::redescribedSurfaceState;
2930
using BaseClass::surfaceState;
3031
};

0 commit comments

Comments
 (0)