From c1d841fab4c84a952c3367dff00887529180af81 Mon Sep 17 00:00:00 2001
From: Corey <corey.w108@gmail.com>
Date: Tue, 30 Sep 2025 16:50:10 -0500
Subject: [PATCH 1/8] init - im using example 61 as my hello world

---
 CMakeLists.txt                   |   2 +
 MeshShader/CMakeLists.txt        |  20 +
 MeshShader/README.md             |  22 +
 MeshShader/include/common.hpp    |  19 +
 MeshShader/include/transform.hpp | 162 ++++++
 MeshShader/main.cpp              | 886 +++++++++++++++++++++++++++++++
 MeshShader/src/transform.cpp     |   0
 7 files changed, 1111 insertions(+)
 create mode 100644 MeshShader/CMakeLists.txt
 create mode 100644 MeshShader/README.md
 create mode 100644 MeshShader/include/common.hpp
 create mode 100644 MeshShader/include/transform.hpp
 create mode 100644 MeshShader/main.cpp
 create mode 100644 MeshShader/src/transform.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8ce94f93..0d873b063 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,6 +88,8 @@ if(NBL_BUILD_EXAMPLES)
   	add_subdirectory(70_FLIPFluids)
 	add_subdirectory(71_RayTracingPipeline)
 
+	add_subdirectory(MeshShader)
+
 	# add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory
 	NBL_GET_ALL_TARGETS(TARGETS)
 
diff --git a/MeshShader/CMakeLists.txt b/MeshShader/CMakeLists.txt
new file mode 100644
index 000000000..8f80cc0ea
--- /dev/null
+++ b/MeshShader/CMakeLists.txt
@@ -0,0 +1,20 @@
+if(NBL_BUILD_IMGUI)
+	set(NBL_EXTRA_SOURCES
+		#"${CMAKE_CURRENT_SOURCE_DIR}/src/transform.cpp" #just leaving this so i can easily reference it later
+	)
+
+	set(NBL_INCLUDE_SEARCH_DIRECTORIES
+		"${CMAKE_CURRENT_SOURCE_DIR}/include"
+	)
+
+	list(APPEND NBL_LIBRARIES 
+		imtestengine
+		imguizmo
+		"${NBL_EXT_IMGUI_UI_LIB}"
+	)
+	
+	# TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !?
+	nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SEARCH_DIRECTORIES}" "${NBL_LIBRARIES}")
+	# TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet
+	# LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD)
+endif()
\ No newline at end of file
diff --git a/MeshShader/README.md b/MeshShader/README.md
new file mode 100644
index 000000000..314b77a59
--- /dev/null
+++ b/MeshShader/README.md
@@ -0,0 +1,22 @@
+9/30/2025 - GDBobby
+Here's the current plan, front to back
+
+1. Remove all unnecessary parts from my copy of example 61.
+    1.1 figure out what IS necessary.
+    1.2 trace the graphics pipeline used, so I can figure out how the mesh pipeline should look
+
+2. i dont have much experience with viewports and scissors yet, so I'd like to change
+    how the imgui viewport is handled just for the fun of it. 61 mentions it's rendered to a
+    temporary color attachment which is then sourced as a texture in imgui. id like to change it so
+    that imgui literally just puts a box around a viewport thats rendered to directly
+
+3. Create the Mesh Pipeline.
+    3.1. I want to support generative (procedural) mesh shaders, which take 0 input vertices
+    3.2. I want to support meshlets - small meshes that are defined by pre-existing vertices
+    3.3. I want to re-compile the mesh shader into a compute and vertex shader combo, 
+        which can be used on machines that don't support the mesh shader extension 
+        (mostly GPUs older than 2016)
+
+
+I think, to prevent controlling two different branches in two different repos, I'll stuff everything into this example in the beginning. 
+Once everything start to come together, I'll start moving things, like the Mesh Pipeline class, into more appropriate places, like Nabla itself.
\ No newline at end of file
diff --git a/MeshShader/include/common.hpp b/MeshShader/include/common.hpp
new file mode 100644
index 000000000..fe7d086dd
--- /dev/null
+++ b/MeshShader/include/common.hpp
@@ -0,0 +1,19 @@
+#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
+#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
+
+
+#include "nbl/examples/examples.hpp"
+
+// the example's headers
+#include "transform.hpp"
+
+using namespace nbl;
+using namespace nbl::core;
+using namespace nbl::hlsl;
+using namespace nbl::system;
+using namespace nbl::asset;
+using namespace nbl::ui;
+using namespace nbl::video;
+using namespace nbl::examples;
+
+#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
\ No newline at end of file
diff --git a/MeshShader/include/transform.hpp b/MeshShader/include/transform.hpp
new file mode 100644
index 000000000..fb1672c2f
--- /dev/null
+++ b/MeshShader/include/transform.hpp
@@ -0,0 +1,162 @@
+#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_
+#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_
+
+
+#include "nbl/ui/ICursorControl.h"
+
+#include "nbl/ext/ImGui/ImGui.h"
+
+#include "imgui/imgui_internal.h"
+#include "imguizmo/ImGuizmo.h"
+
+
+struct TransformRequestParams
+{
+	float camDistance = 8.f;
+	uint8_t sceneTexDescIx = ~0;
+	bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false;
+};
+
+nbl::hlsl::uint16_t2 EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params)
+{
+	static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE);
+	static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL);
+	static bool useSnap = false;
+	static float snap[3] = { 1.f, 1.f, 1.f };
+	static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f };
+	static float boundsSnap[] = { 0.1f, 0.1f, 0.1f };
+	static bool boundSizing = false;
+	static bool boundSizingSnap = false;
+
+	if (params.editTransformDecomposition)
+	{
+		if (ImGui::IsKeyPressed(ImGuiKey_T))
+			mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
+		if (ImGui::IsKeyPressed(ImGuiKey_R))
+			mCurrentGizmoOperation = ImGuizmo::ROTATE;
+		if (ImGui::IsKeyPressed(ImGuiKey_S))
+			mCurrentGizmoOperation = ImGuizmo::SCALE;
+		if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE))
+			mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
+		ImGui::SameLine();
+		if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE))
+			mCurrentGizmoOperation = ImGuizmo::ROTATE;
+		ImGui::SameLine();
+		if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE))
+			mCurrentGizmoOperation = ImGuizmo::SCALE;
+		if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL))
+			mCurrentGizmoOperation = ImGuizmo::UNIVERSAL;
+		float matrixTranslation[3], matrixRotation[3], matrixScale[3];
+		ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale);
+		ImGui::InputFloat3("Tr", matrixTranslation);
+		ImGui::InputFloat3("Rt", matrixRotation);
+		ImGui::InputFloat3("Sc", matrixScale);
+		ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix);
+
+		if (mCurrentGizmoOperation != ImGuizmo::SCALE)
+		{
+			if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL))
+				mCurrentGizmoMode = ImGuizmo::LOCAL;
+			ImGui::SameLine();
+			if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD))
+				mCurrentGizmoMode = ImGuizmo::WORLD;
+		}
+		if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift))
+			useSnap = !useSnap;
+		ImGui::Checkbox("##UseSnap", &useSnap);
+		ImGui::SameLine();
+
+		switch (mCurrentGizmoOperation)
+		{
+		case ImGuizmo::TRANSLATE:
+			ImGui::InputFloat3("Snap", &snap[0]);
+			break;
+		case ImGuizmo::ROTATE:
+			ImGui::InputFloat("Angle Snap", &snap[0]);
+			break;
+		case ImGuizmo::SCALE:
+			ImGui::InputFloat("Scale Snap", &snap[0]);
+			break;
+		}
+		ImGui::Checkbox("Bound Sizing", &boundSizing);
+		if (boundSizing)
+		{
+			ImGui::PushID(3);
+			ImGui::Checkbox("##BoundSizing", &boundSizingSnap);
+			ImGui::SameLine();
+			ImGui::InputFloat3("Snap", boundsSnap);
+			ImGui::PopID();
+		}
+	}
+
+	ImGuiIO& io = ImGui::GetIO();
+	float viewManipulateRight = io.DisplaySize.x;
+	float viewManipulateTop = 0;
+	static ImGuiWindowFlags gizmoWindowFlags = 0;
+
+	/*
+		for the "useWindow" case we just render to a gui area, 
+		otherwise to fake full screen transparent window
+
+		note that for both cases we make sure gizmo being 
+		rendered is aligned to our texture scene using 
+        imgui  "cursor" screen positions
+	*/
+// TODO: this shouldn't be handled here I think
+	SImResourceInfo info;
+	info.textureID = params.sceneTexDescIx;
+	info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER;
+
+	nbl::hlsl::uint16_t2 retval;
+	if (params.useWindow)
+	{
+		ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing);
+		ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing);
+		ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f));
+		ImGui::Begin("Gizmo", 0, gizmoWindowFlags);
+		ImGuizmo::SetDrawlist();
+
+		ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
+		ImVec2 windowPos = ImGui::GetWindowPos();
+		ImVec2 cursorPos = ImGui::GetCursorScreenPos();
+
+		ImGui::Image(info, contentRegionSize);
+		ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
+		retval = {contentRegionSize.x,contentRegionSize.y};
+
+		viewManipulateRight = cursorPos.x + contentRegionSize.x;
+		viewManipulateTop = cursorPos.y;
+
+		ImGuiWindow* window = ImGui::GetCurrentWindow();
+		gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0);
+	}
+	else
+	{
+		ImGui::SetNextWindowPos(ImVec2(0, 0));
+		ImGui::SetNextWindowSize(io.DisplaySize);
+		ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window
+		ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs);
+
+		ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
+		ImVec2 cursorPos = ImGui::GetCursorScreenPos();
+
+		ImGui::Image(info, contentRegionSize);
+		ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
+		retval = {contentRegionSize.x,contentRegionSize.y};
+
+		viewManipulateRight = cursorPos.x + contentRegionSize.x;
+		viewManipulateTop = cursorPos.y;
+	}
+
+	ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL);
+
+	if(params.enableViewManipulate)
+		ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
+
+	ImGui::End();
+	ImGui::PopStyleColor();
+
+	return retval;
+}
+
+#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__
\ No newline at end of file
diff --git a/MeshShader/main.cpp b/MeshShader/main.cpp
new file mode 100644
index 000000000..643cab079
--- /dev/null
+++ b/MeshShader/main.cpp
@@ -0,0 +1,886 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#include "common.hpp"
+
+/*
+Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window.
+
+Written with Nabla's UI extension and got integrated with ImGuizmo to handle scene's object translations.
+*/
+class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesApplication
+{
+		using device_base_t = MonoWindowApplication;
+		using asset_base_t = BuiltinResourcesApplication;
+
+	public:
+		inline UISampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) 
+			: IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD),
+			device_base_t({1280,720}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {}
+
+		inline bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override
+		{
+			if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system)))
+				return false;
+			if (!device_base_t::onAppInitialized(smart_refctd_ptr(system)))
+				return false;
+
+			m_semaphore = m_device->createSemaphore(m_realFrameIx);
+			if (!m_semaphore)
+				return logFail("Failed to Create a Semaphore!");
+
+			auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT);
+			for (auto i = 0u; i<MaxFramesInFlight; i++)
+			{
+				if (!pool)
+					return logFail("Couldn't create Command Pool!");
+				if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_cmdBufs.data()+i,1}))
+					return logFail("Couldn't create Command Buffer!");
+			}
+			
+			const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()};
+			m_scene = CGeometryCreatorScene::create(
+				{
+					.transferQueue = getTransferUpQueue(),
+					.utilities = m_utils.get(),
+					.logger = m_logger.get(),
+					.addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies
+				},
+				CSimpleDebugRenderer::DefaultPolygonGeometryPatch
+			);
+			
+			// for the scene drawing pass
+			{
+				IGPURenderpass::SCreationParams params = {};
+				const IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = {
+					{{
+						{
+							.format = sceneRenderDepthFormat,
+							.samples = IGPUImage::ESCF_1_BIT,
+							.mayAlias = false
+						},
+						/*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR},
+						/*.storeOp = */{IGPURenderpass::STORE_OP::STORE},
+						/*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED},
+						/*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}
+					}},
+					IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd
+				};
+				params.depthStencilAttachments = depthAttachments;
+				const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = {
+					{{
+						{
+							.format = finalSceneRenderFormat,
+							.samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT,
+							.mayAlias = false
+						},
+						/*.loadOp = */IGPURenderpass::LOAD_OP::CLEAR,
+						/*.storeOp = */IGPURenderpass::STORE_OP::STORE,
+						/*.initialLayout = */IGPUImage::LAYOUT::UNDEFINED,
+						/*.finalLayout = */ IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read
+					}},
+					IGPURenderpass::SCreationParams::ColorAttachmentsEnd
+				};
+				params.colorAttachments = colorAttachments;
+				IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = {
+					{},
+					IGPURenderpass::SCreationParams::SubpassesEnd
+				};
+				subpasses[0].depthStencilAttachment = {{.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}};
+				subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}};
+				params.subpasses = subpasses;
+				
+				const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = {
+					// wipe-transition of Color to ATTACHMENT_OPTIMAL and depth
+					{
+						.srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
+						.dstSubpass = 0,
+						.memoryBarrier = {
+							// last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later
+							// while color is sampled by ImGUI
+							.srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,
+							// don't want any writes to be available, as we are clearing both attachments
+							.srcAccessMask = ACCESS_FLAGS::NONE,
+							// destination needs to wait as early as possible
+							// TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h`
+							.dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
+							// because depth and color get cleared first no read mask
+							.dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
+						}
+						// leave view offsets and flags default
+					},
+					{
+						.srcSubpass = 0,
+						.dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
+						.memoryBarrier = {
+							// last place where the color can get modified, depth is implicitly earlier
+							.srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
+							// only write ops, reads can't be made available, also won't be using depth so don't care about it being visible to anyone else
+							.srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,
+							// the ImGUI will sample the color, then next frame we overwrite both attachments
+							.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,
+							// but we only care about the availability-visibility chain between renderpass and imgui 
+							.dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT
+						}
+						// leave view offsets and flags default
+					},
+					IGPURenderpass::SCreationParams::DependenciesEnd
+				};
+				params.dependencies = {};
+				m_renderpass = m_device->createRenderpass(std::move(params));
+				if (!m_renderpass)
+					return logFail("Failed to create Scene Renderpass!");
+			}
+			const auto& geometries = m_scene->getInitParams().geometries;
+			m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(),m_renderpass.get(),0,{&geometries.front().get(),geometries.size()});
+			// special case
+			{
+				const auto& pipelines = m_renderer->getInitParams().pipelines;
+				auto ix = 0u;
+				for (const auto& name : m_scene->getInitParams().geometryNames)
+				{
+					if (name=="Cone")
+						m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone];
+					ix++;
+				}
+			}
+			// we'll only display one thing at a time
+			m_renderer->m_instances.resize(1);
+
+			// Create ImGUI
+			{
+				auto scRes = static_cast<CDefaultSwapchainFramebuffers*>(m_surface->getSwapchainResources());
+				ext::imgui::UI::SCreationParameters params = {};
+				params.resources.texturesInfo = {.setIx=0u,.bindingIx=TexturesImGUIBindingIndex};
+				params.resources.samplersInfo = {.setIx=0u,.bindingIx=1u};
+				params.utilities = m_utils;
+				params.transfer = getTransferUpQueue();
+				params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(),params.resources.texturesInfo,params.resources.samplersInfo,MaxImGUITextures);
+				params.assetManager = make_smart_refctd_ptr<IAssetManager>(smart_refctd_ptr(m_system));
+				params.renderpass = smart_refctd_ptr<IGPURenderpass>(scRes->getRenderpass());
+				params.subpassIx = 0u;
+				params.pipelineCache = nullptr;
+				interface.imGUI = ext::imgui::UI::create(std::move(params));
+				if (!interface.imGUI)
+					return logFail("Failed to create `nbl::ext::imgui::UI` class");
+			}
+
+			// create rest of User Interface
+			{
+				auto* imgui = interface.imGUI.get();
+				// create the suballocated descriptor set
+				{
+					// note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources
+					const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u);
+					auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT,{&layout,1});
+					auto ds = pool->createDescriptorSet(smart_refctd_ptr<const IGPUDescriptorSetLayout>(layout));
+					interface.subAllocDS = make_smart_refctd_ptr<SubAllocatedDescriptorSet>(std::move(ds));
+					if (!interface.subAllocDS)
+						return logFail("Failed to create the descriptor set");
+					// make sure Texture Atlas slot is taken for eternity
+					{
+						auto dummy = SubAllocatedDescriptorSet::invalid_value;
+						interface.subAllocDS->multi_allocate(0,1,&dummy);
+						assert(dummy==ext::imgui::UI::FontAtlasTexId);
+					}
+					// write constant descriptors, note we don't create info & write pair for the samplers because UI extension's are immutable and baked into DS layout
+					IGPUDescriptorSet::SDescriptorInfo info = {};
+					info.desc = smart_refctd_ptr<nbl::video::IGPUImageView>(interface.imGUI->getFontAtlasView());
+					info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL;
+					const IGPUDescriptorSet::SWriteDescriptorSet write = {
+						.dstSet = interface.subAllocDS->getDescriptorSet(),
+						.binding = TexturesImGUIBindingIndex,
+						.arrayElement = ext::imgui::UI::FontAtlasTexId,
+						.count = 1,
+						.info = &info
+					};
+					if (!m_device->updateDescriptorSets({&write,1},{}))
+						return logFail("Failed to write the descriptor set");
+				}
+				imgui->registerListener([this](){interface();});
+			}
+
+			interface.camera.mapKeysToArrows();
+
+			onAppInitializedFinish();
+			return true;
+		}
+
+		//
+		virtual inline bool onAppTerminated()
+		{
+			SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId;
+			IGPUDescriptorSet::SDropDescriptorSet dummy[1];
+			interface.subAllocDS->multi_deallocate(dummy,TexturesImGUIBindingIndex,1,&fontAtlasDescIx);
+			return device_base_t::onAppTerminated();
+		}
+
+		inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override
+		{
+			// CPU events
+			update(nextPresentationTimestamp);
+
+			const auto& virtualWindowRes = interface.sceneResolution;
+			if (!m_framebuffer || m_framebuffer->getCreationParameters().width!=virtualWindowRes[0] || m_framebuffer->getCreationParameters().height!=virtualWindowRes[1])
+				recreateFramebuffer(virtualWindowRes);
+
+			//
+			const auto resourceIx = m_realFrameIx % MaxFramesInFlight;
+
+			auto* const cb = m_cmdBufs.data()[resourceIx].get();
+			cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT);
+			cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
+			// clear to black for both things
+			const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} };
+			if (m_framebuffer)
+			{
+				cb->beginDebugMarker("UISampleApp Scene Frame");
+				{
+					const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth=0.f };
+					const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo =
+					{
+						.framebuffer = m_framebuffer.get(),
+						.colorClearValues = &clearValue,
+						.depthStencilClearValues = &farValue,
+						.renderArea = {
+							.offset = {0,0},
+							.extent = {virtualWindowRes[0],virtualWindowRes[1]}
+						}
+					};
+					beginRenderpass(cb,renderpassInfo);
+				}
+				// draw scene
+				{
+					float32_t3x4 viewMatrix;
+					float32_t4x4 viewProjMatrix;
+					// TODO: get rid of legacy matrices
+					{
+						const auto& camera = interface.camera;
+						memcpy(&viewMatrix,camera.getViewMatrix().pointer(),sizeof(viewMatrix));
+						memcpy(&viewProjMatrix,camera.getConcatenatedMatrix().pointer(),sizeof(viewProjMatrix));
+					}
+					const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix);
+
+					// tear down scene every frame
+					auto& instance = m_renderer->m_instances[0];
+					memcpy(&instance.world,&interface.model,sizeof(instance.world));
+					instance.packedGeo = m_renderer->getGeometries().data() + interface.gcIndex;
+ 					m_renderer->render(cb,viewParams);
+				}
+				cb->endRenderPass();
+				cb->endDebugMarker();
+			}
+			{
+				cb->beginDebugMarker("UISampleApp IMGUI Frame");
+				{
+					auto scRes = static_cast<CDefaultSwapchainFramebuffers*>(m_surface->getSwapchainResources());
+					const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo =
+					{
+						.framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex),
+						.colorClearValues = &clearValue,
+						.depthStencilClearValues = nullptr,
+						.renderArea = {
+							.offset = {0,0},
+							.extent = {m_window->getWidth(),m_window->getHeight()}
+						}
+					};
+					beginRenderpass(cb,renderpassInfo);
+				}
+				// draw ImGUI
+				{
+					auto* imgui = interface.imGUI.get();
+					auto* pipeline = imgui->getPipeline();
+					cb->bindGraphicsPipeline(pipeline);
+					// note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx
+					const auto* ds = interface.subAllocDS->getDescriptorSet();
+					cb->bindDescriptorSets(EPBP_GRAPHICS,pipeline->getLayout(),imgui->getCreationParameters().resources.texturesInfo.setIx,1u,&ds);
+					// a timepoint in the future to release streaming resources for geometry
+					const ISemaphore::SWaitInfo drawFinished = {.semaphore=m_semaphore.get(),.value=m_realFrameIx+1u};
+					if (!imgui->render(cb,drawFinished))
+					{
+						m_logger->log("TODO: need to present acquired image before bailing because its already acquired.",ILogger::ELL_ERROR);
+						return {};
+					}
+				}
+				cb->endRenderPass();
+				cb->endDebugMarker();
+			}
+			cb->end();
+
+			//updateGUIDescriptorSet();
+
+			IQueue::SSubmitInfo::SSemaphoreInfo retval =
+			{
+				.semaphore = m_semaphore.get(),
+				.value = ++m_realFrameIx,
+				.stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS
+			};
+			const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] =
+			{
+				{.cmdbuf = cb }
+			};
+			const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = {
+				{
+					.semaphore = device_base_t::getCurrentAcquire().semaphore,
+					.value = device_base_t::getCurrentAcquire().acquireCount,
+					.stageMask = PIPELINE_STAGE_FLAGS::NONE
+				}
+			};
+			const IQueue::SSubmitInfo infos[] =
+			{
+				{
+					.waitSemaphores = acquired,
+					.commandBuffers = commandBuffers,
+					.signalSemaphores = {&retval,1}
+				}
+			};
+			
+			if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS)
+			{
+				retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal
+				m_realFrameIx--;
+			}
+
+
+			m_window->setCaption("[Nabla Engine] UI App Test Demo");
+			return retval;
+		}
+
+	protected:
+		const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override
+		{
+			// Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present
+			const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = {
+				// don't want any writes to be available, we'll clear, only thing to worry about is the layout transition
+				{
+					.srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
+					.dstSubpass = 0,
+					.memoryBarrier = {
+						.srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway 
+						.srcAccessMask = ACCESS_FLAGS::NONE,
+						// layout transition needs to finish before the color write
+						.dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
+						.dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
+					}
+					// leave view offsets and flags default
+				},
+				// want layout transition to begin after all color output is done
+				{
+					.srcSubpass = 0,
+					.dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
+					.memoryBarrier = {
+						// last place where the color can get modified, depth is implicitly earlier
+						.srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
+						// only write ops, reads can't be made available
+						.srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
+						// spec says nothing is needed when presentation is the destination
+					}
+					// leave view offsets and flags default
+				},
+				IGPURenderpass::SCreationParams::DependenciesEnd
+			};
+			return dependencies;
+		}
+
+	private:
+		inline void update(const std::chrono::microseconds nextPresentationTimestamp)
+		{
+			auto& camera = interface.camera;
+			camera.setMoveSpeed(interface.moveSpeed);
+			camera.setRotateSpeed(interface.rotateSpeed);
+
+
+			m_inputSystem->getDefaultMouse(&mouse);
+			m_inputSystem->getDefaultKeyboard(&keyboard);
+
+			struct
+			{
+				std::vector<SMouseEvent> mouse{};
+				std::vector<SKeyboardEvent> keyboard{};
+			} uiEvents;
+
+			// TODO: should be a member really
+			static std::chrono::microseconds previousEventTimestamp{};
+
+			// I think begin/end should always be called on camera, just events shouldn't be fed, why?
+			// If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to
+			// `perActionDt` becoming obnoxiously large the first time the even processing resumes due to
+			// `timeDiff` being computed since `lastVirtualUpTimeStamp` 
+			camera.beginInputProcessing(nextPresentationTimestamp);
+			{
+				mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void
+					{
+						if (interface.move)
+							camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl
+
+						for (const auto& e : events) // here capture
+						{
+							if (e.timeStamp < previousEventTimestamp)
+								continue;
+
+							previousEventTimestamp = e.timeStamp;
+							uiEvents.mouse.emplace_back(e);
+
+							if (e.type==nbl::ui::SMouseEvent::EET_SCROLL && m_renderer)
+							{
+								interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll));
+								interface.gcIndex = core::clamp(interface.gcIndex,0ull,m_renderer->getGeometries().size()-1);
+							}
+						}
+					},
+					m_logger.get()
+				);
+				keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void
+					{
+						if (interface.move)
+							camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl
+
+						for (const auto& e : events) // here capture
+						{
+							if (e.timeStamp < previousEventTimestamp)
+								continue;
+
+							previousEventTimestamp = e.timeStamp;
+							uiEvents.keyboard.emplace_back(e);
+						}
+					},
+					m_logger.get()
+				);
+			}
+			camera.endInputProcessing(nextPresentationTimestamp);
+
+			const auto cursorPosition = m_window->getCursorControl()->getPosition();
+
+			ext::imgui::UI::SUpdateParameters params = 
+			{
+				.mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()),
+				.displaySize = {m_window->getWidth(),m_window->getHeight()},
+				.mouseEvents = uiEvents.mouse,
+				.keyboardEvents = uiEvents.keyboard
+			};
+
+			interface.objectName = m_scene->getInitParams().geometryNames[interface.gcIndex];
+			interface.imGUI->update(params);
+		}
+
+		void recreateFramebuffer(const uint16_t2 resolution)
+		{
+			auto createImageAndView = [&](E_FORMAT format)->smart_refctd_ptr<IGPUImageView>
+			{
+				auto image = m_device->createImage({{
+					.type = IGPUImage::ET_2D,
+					.samples = IGPUImage::ESCF_1_BIT,
+					.format = format,
+					.extent = {resolution.x,resolution.y,1},
+					.mipLevels = 1,
+					.arrayLayers = 1,
+					.usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT|IGPUImage::EUF_SAMPLED_BIT
+				}});
+				if (!m_device->allocate(image->getMemoryReqs(),image.get()).isValid())
+					return nullptr;
+				IGPUImageView::SCreationParams params = {
+					.image = std::move(image),
+					.viewType = IGPUImageView::ET_2D,
+					.format = format
+				};
+				params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT:IGPUImage::EAF_COLOR_BIT;
+				return m_device->createImageView(std::move(params));
+			};
+			
+			smart_refctd_ptr<IGPUImageView> colorView;
+			// detect window minimization
+			if (resolution.x<0x4000 && resolution.y<0x4000)
+			{
+				colorView = createImageAndView(finalSceneRenderFormat);
+				auto depthView = createImageAndView(sceneRenderDepthFormat);
+				m_framebuffer = m_device->createFramebuffer({ {
+					.renderpass = m_renderpass,
+					.depthStencilAttachments = &depthView.get(),
+					.colorAttachments = &colorView.get(),
+					.width = resolution.x,
+					.height = resolution.y
+				}});
+			}
+			else
+				m_framebuffer = nullptr;
+
+			// release previous slot and its image
+			interface.subAllocDS->multi_deallocate(0,1,&interface.renderColorViewDescIndex,{.semaphore=m_semaphore.get(),.value=m_realFrameIx});
+			//
+			if (colorView)
+			{
+				interface.subAllocDS->multi_allocate(0,1,&interface.renderColorViewDescIndex);
+				// update descriptor set
+				IGPUDescriptorSet::SDescriptorInfo info = {};
+				info.desc = colorView;
+				info.info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL;
+				const IGPUDescriptorSet::SWriteDescriptorSet write = {
+					.dstSet = interface.subAllocDS->getDescriptorSet(),
+					.binding = TexturesImGUIBindingIndex,
+					.arrayElement = interface.renderColorViewDescIndex,
+					.count = 1,
+					.info = &info
+				};
+				m_device->updateDescriptorSets({&write,1},{});
+			}
+			interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndex;
+		}
+
+		inline void beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info)
+		{
+			cb->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE);
+			cb->setScissor(0,1,&info.renderArea);
+			const SViewport viewport = {
+				.x = 0,
+				.y = 0,
+				.width = static_cast<float>(info.renderArea.extent.width),
+				.height = static_cast<float>(info.renderArea.extent.height)
+			};
+			cb->setViewport(0u,1u,&viewport);
+		}
+
+		// Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers
+		constexpr static inline uint32_t MaxFramesInFlight = 3u;
+		constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT;
+		constexpr static inline auto finalSceneRenderFormat = EF_R8G8B8A8_SRGB;
+		constexpr static inline auto TexturesImGUIBindingIndex = 0u;
+		// we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes
+		constexpr static inline auto MaxImGUITextures = 2u+MaxFramesInFlight;
+
+		//
+		smart_refctd_ptr<CGeometryCreatorScene> m_scene;
+		smart_refctd_ptr<IGPURenderpass> m_renderpass;
+		smart_refctd_ptr<CSimpleDebugRenderer> m_renderer;
+		smart_refctd_ptr<IGPUFramebuffer> m_framebuffer;
+		//
+		smart_refctd_ptr<ISemaphore> m_semaphore;
+		uint64_t m_realFrameIx = 0;
+		std::array<smart_refctd_ptr<IGPUCommandBuffer>,MaxFramesInFlight> m_cmdBufs;
+		//
+		InputSystem::ChannelReader<IMouseEventChannel> mouse;
+		InputSystem::ChannelReader<IKeyboardEventChannel> keyboard;
+		// UI stuff
+		struct CInterface
+		{
+			void operator()()
+			{
+				ImGuiIO& io = ImGui::GetIO();
+
+				// TODO: why is this a lambda and not just an assignment in a scope ?
+				camera.setProjectionMatrix([&]() 
+				{
+					matrix4SIMD projection;
+
+					if (isPerspective)
+						if(isLH)
+							projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
+						else
+							projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
+					else
+					{
+						float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x;
+
+						if(isLH)
+							projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar);
+						else
+							projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar);
+					}
+
+					return projection;
+				}());
+
+				ImGuizmo::SetOrthographic(false);
+				ImGuizmo::BeginFrame();
+
+				ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing);
+				ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing);
+
+				// create a window and insert the inspector
+				ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing);
+				ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing);
+				ImGui::Begin("Editor");
+
+				if (ImGui::RadioButton("Full view", !transformParams.useWindow))
+					transformParams.useWindow = false;
+
+				ImGui::SameLine();
+
+				if (ImGui::RadioButton("Window", transformParams.useWindow))
+					transformParams.useWindow = true;
+
+				ImGui::Text("Camera");
+				bool viewDirty = false;
+
+				if (ImGui::RadioButton("LH", isLH))
+					isLH = true;
+
+				ImGui::SameLine();
+
+				if (ImGui::RadioButton("RH", !isLH))
+					isLH = false;
+
+				if (ImGui::RadioButton("Perspective", isPerspective))
+					isPerspective = true;
+
+				ImGui::SameLine();
+
+				if (ImGui::RadioButton("Orthographic", !isPerspective))
+					isPerspective = false;
+
+				ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate);
+				ImGui::Checkbox("Enable camera movement", &move);
+				ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f);
+				ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f);
+
+				// ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case
+
+				if (isPerspective)
+					ImGui::SliderFloat("Fov", &fov, 20.f, 150.f);
+				else
+					ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20);
+
+				ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f);
+				ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f);
+
+				viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f);
+
+				if (viewDirty || firstFrame)
+				{
+					core::vectorSIMDf cameraPosition(cosf(camYAngle)* cosf(camXAngle)* transformParams.camDistance, sinf(camXAngle)* transformParams.camDistance, sinf(camYAngle)* cosf(camXAngle)* transformParams.camDistance);
+					core::vectorSIMDf cameraTarget(0.f, 0.f, 0.f);
+					const static core::vectorSIMDf up(0.f, 1.f, 0.f);
+
+					camera.setPosition(cameraPosition);
+					camera.setTarget(cameraTarget);
+					camera.setBackupUpVector(up);
+
+					camera.recomputeViewMatrix();
+				}
+				firstFrame = false;
+
+				ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y);
+				if (ImGuizmo::IsUsing())
+				{
+					ImGui::Text("Using gizmo");
+				}
+				else
+				{
+					ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : "");
+					ImGui::SameLine();
+					ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : "");
+					ImGui::SameLine();
+					ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : "");
+					ImGui::SameLine();
+					ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : "");
+				}
+				ImGui::Separator();
+
+				/*
+				* ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout
+				* and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection
+
+				- VIEW:
+
+					ImGuizmo
+
+					|     X[0]          Y[0]          Z[0]         0.0f |
+					|     X[1]          Y[1]          Z[1]         0.0f |
+					|     X[2]          Y[2]          Z[2]         0.0f |
+					| -Dot(X, eye)  -Dot(Y, eye)  -Dot(Z, eye)     1.0f |
+
+					Nabla
+
+					|     X[0]         X[1]           X[2]     -Dot(X, eye)  |
+					|     Y[0]         Y[1]           Y[2]     -Dot(Y, eye)  |
+					|     Z[0]         Z[1]           Z[2]     -Dot(Z, eye)  |
+
+					<ImGuizmo View Matrix> = transpose(nbl::core::matrix4SIMD(<Nabla View Matrix>))
+
+				- PERSPECTIVE [PROJECTION CASE]:
+
+					ImGuizmo
+
+					|      (temp / temp2)                 (0.0)                       (0.0)                   (0.0)  |
+					|          (0.0)                  (temp / temp3)                  (0.0)                   (0.0)  |
+					| ((right + left) / temp2)   ((top + bottom) / temp3)    ((-zfar - znear) / temp4)       (-1.0f) |
+					|          (0.0)                      (0.0)               ((-temp * zfar) / temp4)        (0.0)  |
+
+					Nabla
+
+					|            w                        (0.0)                       (0.0)                   (0.0)               |
+					|          (0.0)                       -h                         (0.0)                   (0.0)               |
+					|          (0.0)                      (0.0)               (-zFar/(zFar-zNear))     (-zNear*zFar/(zFar-zNear)) |
+					|          (0.0)                      (0.0)                      (-1.0)                   (0.0)               |
+
+					<ImGuizmo Projection Matrix> = transpose(<Nabla Projection Matrix>)
+
+				*
+				* the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object,
+				* note it also modifies input view matrix but projection matrix is immutable
+				*/
+
+// TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases
+				static struct
+				{
+					core::matrix4SIMD view, projection, model;
+				} imguizmoM16InOut;
+
+				ImGuizmo::SetID(0u);
+
+				imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix()));
+				imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix());
+				imguizmoM16InOut.model = core::transpose(matrix4SIMD(model));
+				{
+					if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates
+						imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/	
+
+					transformParams.editTransformDecomposition = true;
+					sceneResolution = EditTransform(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
+				}
+
+				model = core::transpose(imguizmoM16InOut.model).extractSub3x4();
+				// to Nabla + update camera & model matrices
+// TODO: make it more nicely, extract:
+// - Position by computing inverse of the view matrix and grabbing its translation
+// - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position
+// But then set the view matrix this way anyway, because up-vector may not be compatible
+				const auto& view = camera.getViewMatrix();
+				const_cast<core::matrix3x4SIMD&>(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok)
+				// update concatanated matrix
+				const auto& projection = camera.getProjectionMatrix();
+				camera.setProjectionMatrix(projection);
+
+				// object meta display
+				{
+					ImGui::Begin("Object");
+					ImGui::Text("type: \"%s\"", objectName.data());
+					ImGui::End();
+				}
+					
+				// view matrices editor
+				{
+					ImGui::Begin("Matrices");
+
+					auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true)
+					{
+						ImGui::Text(topText);
+						if (ImGui::BeginTable(tableName, columns))
+						{
+							for (int y = 0; y < rows; ++y)
+							{
+								ImGui::TableNextRow();
+								for (int x = 0; x < columns; ++x)
+								{
+									ImGui::TableSetColumnIndex(x);
+									ImGui::Text("%.3f", *(pointer + (y * columns) + x));
+								}
+							}
+							ImGui::EndTable();
+						}
+
+						if (withSeparator)
+							ImGui::Separator();
+					};
+
+					addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, model.pointer());
+					addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer());
+					addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false);
+
+					ImGui::End();
+				}
+
+				// Nabla Imgui backend MDI buffer info
+				// To be 100% accurate and not overly conservative we'd have to explicitly `cull_frees` and defragment each time,
+				// so unless you do that, don't use this basic info to optimize the size of your IMGUI buffer.
+				{
+					auto* streaminingBuffer = imGUI->getStreamingBuffer();
+
+					const size_t total = streaminingBuffer->get_total_size();			// total memory range size for which allocation can be requested
+					const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size();		// max total free bloock memory size we can still allocate from total memory available
+					const size_t consumedMemory = total - freeSize;			// memory currently consumed by streaming buffer
+
+					float freePercentage = 100.0f * (float)(freeSize) / (float)total;
+					float allocatedPercentage = (float)(consumedMemory) / (float)total;
+
+					ImVec2 barSize = ImVec2(400, 30);
+					float windowPadding = 10.0f;
+					float verticalPadding = ImGui::GetStyle().FramePadding.y;
+
+					ImGui::SetNextWindowSize(ImVec2(barSize.x + 2 * windowPadding, 110 + verticalPadding), ImGuiCond_Always);
+					ImGui::Begin("Nabla Imgui MDI Buffer Info", nullptr, ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoScrollbar);
+
+					ImGui::Text("Total Allocated Size: %zu bytes", total);
+					ImGui::Text("In use: %zu bytes", consumedMemory);
+					ImGui::Text("Buffer Usage:");
+
+					ImGui::SetCursorPosX(windowPadding);
+
+					if (freePercentage > 70.0f)
+						ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(0.0f, 1.0f, 0.0f, 0.4f));  // Green
+					else if (freePercentage > 30.0f)
+						ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 1.0f, 0.0f, 0.4f));  // Yellow
+					else
+						ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 0.0f, 0.0f, 0.4f));  // Red
+
+					ImGui::ProgressBar(allocatedPercentage, barSize, "");
+
+					ImGui::PopStyleColor();
+
+					ImDrawList* drawList = ImGui::GetWindowDrawList();
+
+					ImVec2 progressBarPos = ImGui::GetItemRectMin();
+					ImVec2 progressBarSize = ImGui::GetItemRectSize();
+
+					const char* text = "%.2f%% free";
+					char textBuffer[64];
+					snprintf(textBuffer, sizeof(textBuffer), text, freePercentage);
+
+					ImVec2 textSize = ImGui::CalcTextSize(textBuffer);
+					ImVec2 textPos = ImVec2
+					(
+						progressBarPos.x + (progressBarSize.x - textSize.x) * 0.5f,
+						progressBarPos.y + (progressBarSize.y - textSize.y) * 0.5f
+					);
+
+					ImVec4 bgColor = ImGui::GetStyleColorVec4(ImGuiCol_WindowBg);
+					drawList->AddRectFilled
+					(
+						ImVec2(textPos.x - 5, textPos.y - 2),
+						ImVec2(textPos.x + textSize.x + 5, textPos.y + textSize.y + 2),
+						ImGui::GetColorU32(bgColor)
+					);
+
+					ImGui::SetCursorScreenPos(textPos);
+					ImGui::Text("%s", textBuffer);
+
+					ImGui::Dummy(ImVec2(0.0f, verticalPadding));
+
+					ImGui::End();
+				}
+
+				ImGui::End();
+			}
+
+			smart_refctd_ptr<ext::imgui::UI> imGUI;
+			// descriptor set
+			smart_refctd_ptr<SubAllocatedDescriptorSet> subAllocDS;
+			SubAllocatedDescriptorSet::value_type renderColorViewDescIndex = SubAllocatedDescriptorSet::invalid_value;
+			//
+			Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD());
+			// mutables
+			core::matrix3x4SIMD model;
+			std::string_view objectName;
+			TransformRequestParams transformParams;
+			uint16_t2 sceneResolution = {1280,720};
+			float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f;
+			float viewWidth = 10.f;
+			float camYAngle = 165.f / 180.f * 3.14159f;
+			float camXAngle = 32.f / 180.f * 3.14159f;
+			uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed
+			bool isPerspective = true, isLH = true, flipGizmoY = true, move = false;
+			bool firstFrame = true;
+		} interface;
+};
+
+NBL_MAIN_FUNC(UISampleApp)
\ No newline at end of file
diff --git a/MeshShader/src/transform.cpp b/MeshShader/src/transform.cpp
new file mode 100644
index 000000000..e69de29bb

From e40ca207ed079abb15bb9ec9e1cd4d6c7e65fc23 Mon Sep 17 00:00:00 2001
From: Corey <corey.w108@gmail.com>
Date: Wed, 1 Oct 2025 22:42:20 -0500
Subject: [PATCH 2/8] minor tweaks to 61, beginning of mesh pipeline

---
 MeshShader/README.md                  |  17 +-
 MeshShader/include/IGPUMeshPipeline.h | 212 ++++++++++++++++++++
 MeshShader/include/transform.hpp      | 270 +++++++++++++-------------
 MeshShader/main.cpp                   |  69 +++----
 4 files changed, 401 insertions(+), 167 deletions(-)
 create mode 100644 MeshShader/include/IGPUMeshPipeline.h

diff --git a/MeshShader/README.md b/MeshShader/README.md
index 314b77a59..271129235 100644
--- a/MeshShader/README.md
+++ b/MeshShader/README.md
@@ -2,7 +2,9 @@
 Here's the current plan, front to back
 
 1. Remove all unnecessary parts from my copy of example 61.
+
     1.1 figure out what IS necessary.
+    
     1.2 trace the graphics pipeline used, so I can figure out how the mesh pipeline should look
 
 2. i dont have much experience with viewports and scissors yet, so I'd like to change
@@ -11,12 +13,25 @@ Here's the current plan, front to back
     that imgui literally just puts a box around a viewport thats rendered to directly
 
 3. Create the Mesh Pipeline.
+
     3.1. I want to support generative (procedural) mesh shaders, which take 0 input vertices
+    
     3.2. I want to support meshlets - small meshes that are defined by pre-existing vertices
+    
     3.3. I want to re-compile the mesh shader into a compute and vertex shader combo, 
         which can be used on machines that don't support the mesh shader extension 
         (mostly GPUs older than 2016)
 
 
 I think, to prevent controlling two different branches in two different repos, I'll stuff everything into this example in the beginning. 
-Once everything start to come together, I'll start moving things, like the Mesh Pipeline class, into more appropriate places, like Nabla itself.
\ No newline at end of file
+Once everything start to come together, I'll start moving things, like the Mesh Pipeline class, into more appropriate places, like Nabla itself.
+
+
+9/31
+I'll create a mesh shader tomorrow. I don't really know what to do yet but I'll start with procedural gen.
+
+I think I'll also make a different pipeline object that supports the geometry from example 61?
+
+I had my fun with viewports. idk what i expected tbh
+
+I need to search a little deeper in the spec for other mesh pipeline related rules. I need to research subpasses as well.
\ No newline at end of file
diff --git a/MeshShader/include/IGPUMeshPipeline.h b/MeshShader/include/IGPUMeshPipeline.h
new file mode 100644
index 000000000..94a83c79a
--- /dev/null
+++ b/MeshShader/include/IGPUMeshPipeline.h
@@ -0,0 +1,212 @@
+#pragma once
+#ifndef _NBL_I_GPU_MESH_PIPELINE_H_INCLUDED_
+#define _NBL_I_GPU_MESH_PIPELINE_H_INCLUDED_
+
+#include "nbl/video/IGPUPipelineLayout.h"
+#include "nbl/video/IGPURenderpass.h"
+#include "nbl/video/IGPUPipeline.h"
+
+//related spec
+
+//i feel like this MIGHT get stuffed into graphicspipeline but idk
+
+/*
+https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-PrimitiveId-06264
+** If the pipeline requires pre-rasterization shader state, it includes a mesh shader and the fragment shader code reads from an input variable that is decorated with PrimitiveId, then the mesh shader code must write to a matching output variable, decorated with PrimitiveId, in all execution paths
+
+https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07064
+* If renderPass is not VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, subpass viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader
+
+https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-None-02322
+* If the pipeline requires pre-rasterization shader state, and there are any mesh shader stages in the pipeline there must not be any shader stage in the pipeline with a Xfb execution mode
+*** whats a xfb
+
+https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-shaderMeshEnqueue-10187
+* If the shaderMeshEnqueue feature is not enabled, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability
+https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-flags-10188
+* If flags does not include VK_PIPELINE_CREATE_LIBRARY_BIT_KHR, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability
+*** my understanding is nabla strictly controls it's extensions, so this shouldnt be an issue
+
+https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07065
+* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the 
+*  pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY, or VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE
+*** this one seems the most relevant
+
+https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07066
+* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the 
+*  pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE, or VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT
+
+https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07067
+* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_VERTEX_INPUT_EXT
+
+https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07720
+* If renderPass is VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, and 
+*  VkPipelineRenderingCreateInfo::viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader
+
+
+* theres 1 or 2 more about pipeline libraries, but im not going to worry about that
+*/
+
+namespace nbl::asset {
+    class IMeshPipelineBase : public virtual core::IReferenceCounted {
+    public:
+        constexpr static inline uint8_t MESH_SHADER_STAGE_COUNT = 3u; //i dont know what this is going to be used for yet, might be redundant
+        struct SCachedCreationParams final {
+            SRasterizationParams rasterization = {};
+            SBlendParams blend = {};
+            uint32_t subpassIx = 0u; //this subpass stuff is eluding me rn. i might just need to crack open the vulkan documentation
+
+        };
+
+    };
+
+    template<typename PipelineLayoutType, typename RenderpassType>
+    class IMeshPipeline : public IPipeline<PipelineLayoutType>, public IMeshPipelineBase {
+    protected:
+        using renderpass_t = RenderpassType;
+    public:
+
+        static inline bool hasRequiredStages(const core::bitflag<hlsl::ShaderStage>& stagePresence)
+        {
+
+            // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-stage-02096
+            if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_MESH)) {
+                return false;
+            }
+            //i dont quite understand why igraphicspipeline doesnt require a fragment shader. is it not required by vulkan?
+            if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_FRAGMENT)) {
+                return false;
+            }
+
+            return true;
+        }
+
+    protected:
+        explicit IMeshPipeline(PipelineLayoutType* layout, const SCachedCreationParams& cachedParams, renderpass_t* renderpass) :
+            IPipeline<PipelineLayoutType>(core::smart_refctd_ptr<PipelineLayoutType>(layout)),
+            m_params(cachedParams), m_renderpass(core::smart_refctd_ptr<renderpass_t>(renderpass))
+        {}
+
+        SCachedCreationParams m_params = {};
+        core::smart_refctd_ptr<renderpass_t> m_renderpass = nullptr;
+    };
+
+}
+
+namespace nbl::video
+{
+
+    class IGPUMeshPipeline : public IGPUPipeline<asset::IMeshPipeline<const IGPUPipelineLayout, const IGPURenderpass>>
+    {
+        using pipeline_t = asset::IMeshPipeline<const IGPUPipelineLayout, const IGPURenderpass>;
+
+    public:
+        struct SCreationParams final : public SPipelineCreationParams<const IGPUMeshPipeline>
+        {
+        public:
+#define base_flag(F) static_cast<uint64_t>(pipeline_t::FLAGS::F)
+            enum class FLAGS : uint64_t
+            {
+                NONE = base_flag(NONE),
+                DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS),
+                ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES),
+                VIEW_INDEX_FROM_DEVICE_INDEX = 1 << 3,
+                FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED),
+                EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE),
+            };
+#undef base_flag
+
+            inline SSpecializationValidationResult valid() const
+            {
+                if (!layout)
+                    return {};
+                SSpecializationValidationResult retval = { .count = 0,.dataSize = 0 };
+                if (!layout)
+                    return {};
+
+                // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-dynamicRendering-06576
+                if (!renderpass || cached.subpassIx >= renderpass->getSubpassCount())
+                    return {};
+
+                // TODO: check rasterization samples, etc.
+                //rp->getCreationParameters().subpasses[i]
+
+                core::bitflag<hlsl::ShaderStage> stagePresence = {};
+
+                auto processSpecInfo = [&](const SShaderSpecInfo& specInfo, hlsl::ShaderStage stage)
+                    {
+                        if (!specInfo.shader) return true;
+                        if (!specInfo.accumulateSpecializationValidationResult(&retval)) return false;
+                        stagePresence |= stage;
+                        return true;
+                    };
+                if (!processSpecInfo(taskShader, hlsl::ShaderStage::ESS_TASK)) return {};
+                if (!processSpecInfo(meshShader, hlsl::ShaderStage::ESS_MESH)) return {};
+                if (!processSpecInfo(fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT)) return {};
+
+                if (!hasRequiredStages(stagePresence))
+                    return {};
+
+                //if (!vertexShader.shader) return {}; //i dont quite understand why this line was here. checking if the shader itself was made correctly?
+
+                return retval;
+            }
+
+            inline core::bitflag<hlsl::ShaderStage> getRequiredSubgroupStages() const
+            {
+                core::bitflag<hlsl::ShaderStage> stages = {};
+                auto processSpecInfo = [&](const SShaderSpecInfo& spec, hlsl::ShaderStage stage)
+                    {
+                        if (spec.shader && spec.requiredSubgroupSize >= SUBGROUP_SIZE::REQUIRE_4) {
+                            stages |= stage;
+                        }
+                    };
+                processSpecInfo(taskShader, hlsl::ESS_TASK);
+                processSpecInfo(meshShader, hlsl::ESS_MESH);
+                processSpecInfo(fragmentShader, hlsl::ESS_FRAGMENT);
+                return stages;
+            }
+
+            inline core::bitflag<FLAGS>& getFlags() { return flags; }
+
+            inline core::bitflag<FLAGS> getFlags() const { return flags; }
+
+            const IGPUPipelineLayout* layout = nullptr;
+            SShaderSpecInfo taskShader;
+            SShaderSpecInfo meshShader;
+            SShaderSpecInfo fragmentShader;
+            SCachedCreationParams cached = {};
+            renderpass_t* renderpass = nullptr;
+
+            // TODO: Could guess the required flags from SPIR-V introspection of declared caps
+            core::bitflag<FLAGS> flags = FLAGS::NONE;
+
+            inline uint32_t getShaderCount() const
+            {
+                uint32_t count = 0; //count = 2 and only check task shader??
+                count += (taskShader.shader != nullptr);
+                count += (meshShader.shader != nullptr);
+                count += (fragmentShader.shader != nullptr);
+                return count;
+            }
+        };
+
+        inline core::bitflag<SCreationParams::FLAGS> getCreationFlags() const { return m_flags; }
+
+        // Vulkan: const VkPipeline*
+        virtual const void* getNativeHandle() const = 0;
+
+    protected:
+        // not explicit?
+        IGPUMeshPipeline(const SCreationParams& params) :
+            IGPUPipeline(core::smart_refctd_ptr<const ILogicalDevice>(params.layout->getOriginDevice()), params.layout, params.cached, params.renderpass), m_flags(params.flags)
+        {
+        }
+        virtual ~IGPUMeshPipeline() override = default;
+
+        const core::bitflag<SCreationParams::FLAGS> m_flags;
+    };
+
+}
+
+#endif
\ No newline at end of file
diff --git a/MeshShader/include/transform.hpp b/MeshShader/include/transform.hpp
index fb1672c2f..747d06423 100644
--- a/MeshShader/include/transform.hpp
+++ b/MeshShader/include/transform.hpp
@@ -1,9 +1,6 @@
 #ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_
 #define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_
 
-
-#include "nbl/ui/ICursorControl.h"
-
 #include "nbl/ext/ImGui/ImGui.h"
 
 #include "imgui/imgui_internal.h"
@@ -14,149 +11,156 @@ struct TransformRequestParams
 {
 	float camDistance = 8.f;
 	uint8_t sceneTexDescIx = ~0;
-	bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false;
+	bool useWindow = true;
+	bool editTransformDecomposition = false;
+	bool enableViewManipulate = false;
 };
 
-nbl::hlsl::uint16_t2 EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params)
-{
-	static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE);
-	static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL);
-	static bool useSnap = false;
-	static float snap[3] = { 1.f, 1.f, 1.f };
-	static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f };
-	static float boundsSnap[] = { 0.1f, 0.1f, 0.1f };
-	static bool boundSizing = false;
-	static bool boundSizingSnap = false;
-
-	if (params.editTransformDecomposition)
-	{
-		if (ImGui::IsKeyPressed(ImGuiKey_T))
-			mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
-		if (ImGui::IsKeyPressed(ImGuiKey_R))
-			mCurrentGizmoOperation = ImGuizmo::ROTATE;
-		if (ImGui::IsKeyPressed(ImGuiKey_S))
-			mCurrentGizmoOperation = ImGuizmo::SCALE;
-		if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE))
-			mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
-		ImGui::SameLine();
-		if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE))
-			mCurrentGizmoOperation = ImGuizmo::ROTATE;
-		ImGui::SameLine();
-		if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE))
-			mCurrentGizmoOperation = ImGuizmo::SCALE;
-		if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL))
-			mCurrentGizmoOperation = ImGuizmo::UNIVERSAL;
-		float matrixTranslation[3], matrixRotation[3], matrixScale[3];
-		ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale);
-		ImGui::InputFloat3("Tr", matrixTranslation);
-		ImGui::InputFloat3("Rt", matrixRotation);
-		ImGui::InputFloat3("Sc", matrixScale);
-		ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix);
-
-		if (mCurrentGizmoOperation != ImGuizmo::SCALE)
+struct TransformWidget {
+	ImGuizmo::OPERATION mCurrentGizmoOperation{ ImGuizmo::TRANSLATE };
+	ImGuizmo::MODE mCurrentGizmoMode{ImGuizmo::LOCAL};
+	bool useSnap = false;
+	float snap[3] = { 1.f, 1.f, 1.f };
+	float bounds[6] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f };
+	float boundsSnap[3] = { 0.1f, 0.1f, 0.1f };
+	bool boundSizing = false;
+	bool boundSizingSnap = false;
+
+
+	void EditTransform(float* matrix, const TransformRequestParams& params) {
+
+
+		if (params.editTransformDecomposition)
 		{
-			if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL))
-				mCurrentGizmoMode = ImGuizmo::LOCAL;
+			if (ImGui::IsKeyPressed(ImGuiKey_T))
+				mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
+			if (ImGui::IsKeyPressed(ImGuiKey_R))
+				mCurrentGizmoOperation = ImGuizmo::ROTATE;
+			if (ImGui::IsKeyPressed(ImGuiKey_S))
+				mCurrentGizmoOperation = ImGuizmo::SCALE;
+			if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE))
+				mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
 			ImGui::SameLine();
-			if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD))
-				mCurrentGizmoMode = ImGuizmo::WORLD;
+			if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE))
+				mCurrentGizmoOperation = ImGuizmo::ROTATE;
+			ImGui::SameLine();
+			if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE))
+				mCurrentGizmoOperation = ImGuizmo::SCALE;
+			if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL))
+				mCurrentGizmoOperation = ImGuizmo::UNIVERSAL;
+			float matrixTranslation[3], matrixRotation[3], matrixScale[3];
+			ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale);
+			ImGui::InputFloat3("Tr", matrixTranslation);
+			ImGui::InputFloat3("Rt", matrixRotation);
+			ImGui::InputFloat3("Sc", matrixScale);
+			ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix);
+
+			if (mCurrentGizmoOperation != ImGuizmo::SCALE)
+			{
+				if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL))
+					mCurrentGizmoMode = ImGuizmo::LOCAL;
+				ImGui::SameLine();
+				if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD))
+					mCurrentGizmoMode = ImGuizmo::WORLD;
+			}
+			if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift))
+				useSnap = !useSnap;
+			ImGui::Checkbox("##UseSnap", &useSnap);
+			ImGui::SameLine();
+
+			switch (mCurrentGizmoOperation)
+			{
+			case ImGuizmo::TRANSLATE:
+				ImGui::InputFloat3("Snap", &snap[0]);
+				break;
+			case ImGuizmo::ROTATE:
+				ImGui::InputFloat("Angle Snap", &snap[0]);
+				break;
+			case ImGuizmo::SCALE:
+				ImGui::InputFloat("Scale Snap", &snap[0]);
+				break;
+			}
+			ImGui::Checkbox("Bound Sizing", &boundSizing);
+			if (boundSizing)
+			{
+				ImGui::PushID(3);
+				ImGui::Checkbox("##BoundSizing", &boundSizingSnap);
+				ImGui::SameLine();
+				ImGui::InputFloat3("Snap", boundsSnap);
+				ImGui::PopID();
+			}
 		}
-		if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift))
-			useSnap = !useSnap;
-		ImGui::Checkbox("##UseSnap", &useSnap);
-		ImGui::SameLine();
+	
+	}
 
-		switch (mCurrentGizmoOperation)
-		{
-		case ImGuizmo::TRANSLATE:
-			ImGui::InputFloat3("Snap", &snap[0]);
-			break;
-		case ImGuizmo::ROTATE:
-			ImGui::InputFloat("Angle Snap", &snap[0]);
-			break;
-		case ImGuizmo::SCALE:
-			ImGui::InputFloat("Scale Snap", &snap[0]);
-			break;
+
+	nbl::hlsl::uint16_t4 ViewingGizmo(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
+		ImGuiIO& io = ImGui::GetIO();
+		float viewManipulateRight = io.DisplaySize.x;
+		float viewManipulateTop = 0;
+		static ImGuiWindowFlags gizmoWindowFlags = 0;
+		SImResourceInfo info;
+		info.textureID = params.sceneTexDescIx;
+		info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER;
+
+		nbl::hlsl::uint16_t4 retval;
+		if (params.useWindow) {
+			ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing);
+			ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing);
+			ImGui::PushStyleColor(ImGuiCol_Border, (ImVec4)ImColor(0.35f, 0.3f, 0.3f));
+			
+			ImGui::Begin("Gizmo", 0, gizmoWindowFlags);
+			ImGuizmo::SetDrawlist();
+
+			ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
+			ImVec2 windowPos = ImGui::GetWindowPos();
+			ImVec2 cursorPos = ImGui::GetCursorScreenPos();
+
+			ImGui::Image(info, contentRegionSize);
+			ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
+			retval = { cursorPos.x, cursorPos.y, contentRegionSize.x,contentRegionSize.y };
+
+			viewManipulateRight = cursorPos.x + contentRegionSize.x;
+			viewManipulateTop = cursorPos.y;
+
+			ImGuiWindow* window = ImGui::GetCurrentWindow();
+			gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0);
 		}
-		ImGui::Checkbox("Bound Sizing", &boundSizing);
-		if (boundSizing)
+		else
 		{
-			ImGui::PushID(3);
-			ImGui::Checkbox("##BoundSizing", &boundSizingSnap);
-			ImGui::SameLine();
-			ImGui::InputFloat3("Snap", boundsSnap);
-			ImGui::PopID();
+			ImGui::SetNextWindowPos(ImVec2(0, 0));
+			ImGui::SetNextWindowSize(io.DisplaySize);
+			ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window
+			ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs);
+
+			ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
+			ImVec2 cursorPos = ImGui::GetCursorScreenPos();
+
+			ImGui::Image(info, contentRegionSize);
+			ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
+			retval = { cursorPos.x, cursorPos.y,  contentRegionSize.x,contentRegionSize.y };
+
+			viewManipulateRight = cursorPos.x + contentRegionSize.x;
+			viewManipulateTop = cursorPos.y;
 		}
-	}
 
-	ImGuiIO& io = ImGui::GetIO();
-	float viewManipulateRight = io.DisplaySize.x;
-	float viewManipulateTop = 0;
-	static ImGuiWindowFlags gizmoWindowFlags = 0;
-
-	/*
-		for the "useWindow" case we just render to a gui area, 
-		otherwise to fake full screen transparent window
-
-		note that for both cases we make sure gizmo being 
-		rendered is aligned to our texture scene using 
-        imgui  "cursor" screen positions
-	*/
-// TODO: this shouldn't be handled here I think
-	SImResourceInfo info;
-	info.textureID = params.sceneTexDescIx;
-	info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER;
-
-	nbl::hlsl::uint16_t2 retval;
-	if (params.useWindow)
-	{
-		ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing);
-		ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing);
-		ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f));
-		ImGui::Begin("Gizmo", 0, gizmoWindowFlags);
-		ImGuizmo::SetDrawlist();
-
-		ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
-		ImVec2 windowPos = ImGui::GetWindowPos();
-		ImVec2 cursorPos = ImGui::GetCursorScreenPos();
-
-		ImGui::Image(info, contentRegionSize);
-		ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
-		retval = {contentRegionSize.x,contentRegionSize.y};
-
-		viewManipulateRight = cursorPos.x + contentRegionSize.x;
-		viewManipulateTop = cursorPos.y;
-
-		ImGuiWindow* window = ImGui::GetCurrentWindow();
-		gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0);
-	}
-	else
-	{
-		ImGui::SetNextWindowPos(ImVec2(0, 0));
-		ImGui::SetNextWindowSize(io.DisplaySize);
-		ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window
-		ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs);
-
-		ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
-		ImVec2 cursorPos = ImGui::GetCursorScreenPos();
-
-		ImGui::Image(info, contentRegionSize);
-		ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
-		retval = {contentRegionSize.x,contentRegionSize.y};
-
-		viewManipulateRight = cursorPos.x + contentRegionSize.x;
-		viewManipulateTop = cursorPos.y;
-	}
+		ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL);
+
+		if (params.enableViewManipulate)
+			ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
+
+		ImGui::End();
+		ImGui::PopStyleColor();
 
-	ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL);
+		return retval;
+	}
 
-	if(params.enableViewManipulate)
-		ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
+	nbl::hlsl::uint16_t4 Update(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
+		EditTransform(matrix, params);
+		return ViewingGizmo(cameraView, cameraProjection, matrix, params);
+	}
 
-	ImGui::End();
-	ImGui::PopStyleColor();
+};
 
-	return retval;
-}
 
 #endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__
\ No newline at end of file
diff --git a/MeshShader/main.cpp b/MeshShader/main.cpp
index 643cab079..b50db465b 100644
--- a/MeshShader/main.cpp
+++ b/MeshShader/main.cpp
@@ -3,6 +3,9 @@
 // For conditions of distribution and use, see copyright notice in nabla.h
 
 #include "common.hpp"
+#include "nbl/ui/ICursorControl.h"
+
+#include "IGPUMeshPipeline.h"
 
 /*
 Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window.
@@ -225,7 +228,6 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			if (!m_framebuffer || m_framebuffer->getCreationParameters().width!=virtualWindowRes[0] || m_framebuffer->getCreationParameters().height!=virtualWindowRes[1])
 				recreateFramebuffer(virtualWindowRes);
 
-			//
 			const auto resourceIx = m_realFrameIx % MaxFramesInFlight;
 
 			auto* const cb = m_cmdBufs.data()[resourceIx].get();
@@ -237,7 +239,7 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			{
 				cb->beginDebugMarker("UISampleApp Scene Frame");
 				{
-					const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth=0.f };
+					const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f };
 					const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo =
 					{
 						.framebuffer = m_framebuffer.get(),
@@ -248,35 +250,18 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 							.extent = {virtualWindowRes[0],virtualWindowRes[1]}
 						}
 					};
-					beginRenderpass(cb,renderpassInfo);
+					beginRenderpass(cb, renderpassInfo);
 				}
 				// draw scene
-				{
-					float32_t3x4 viewMatrix;
-					float32_t4x4 viewProjMatrix;
-					// TODO: get rid of legacy matrices
-					{
-						const auto& camera = interface.camera;
-						memcpy(&viewMatrix,camera.getViewMatrix().pointer(),sizeof(viewMatrix));
-						memcpy(&viewProjMatrix,camera.getConcatenatedMatrix().pointer(),sizeof(viewProjMatrix));
-					}
-					const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix);
-
-					// tear down scene every frame
-					auto& instance = m_renderer->m_instances[0];
-					memcpy(&instance.world,&interface.model,sizeof(instance.world));
-					instance.packedGeo = m_renderer->getGeometries().data() + interface.gcIndex;
- 					m_renderer->render(cb,viewParams);
-				}
+				UpdateScene(cb);
 				cb->endRenderPass();
 				cb->endDebugMarker();
 			}
 			{
 				cb->beginDebugMarker("UISampleApp IMGUI Frame");
-				{
+				{ //begin imgui subpass
 					auto scRes = static_cast<CDefaultSwapchainFramebuffers*>(m_surface->getSwapchainResources());
-					const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo =
-					{
+					const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = {
 						.framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex),
 						.colorClearValues = &clearValue,
 						.depthStencilClearValues = nullptr,
@@ -285,7 +270,7 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 							.extent = {m_window->getWidth(),m_window->getHeight()}
 						}
 					};
-					beginRenderpass(cb,renderpassInfo);
+					beginRenderpass(cb, renderpassInfo);
 				}
 				// draw ImGUI
 				{
@@ -308,8 +293,6 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			}
 			cb->end();
 
-			//updateGUIDescriptorSet();
-
 			IQueue::SSubmitInfo::SSemaphoreInfo retval =
 			{
 				.semaphore = m_semaphore.get(),
@@ -343,7 +326,7 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			}
 
 
-			m_window->setCaption("[Nabla Engine] UI App Test Demo");
+			m_window->setCaption("[Nabla Engine] Mesh Shader Demo");
 			return retval;
 		}
 
@@ -384,6 +367,26 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 		}
 
 	private:
+
+		void UpdateScene(nbl::video::IGPUCommandBuffer* cb) {
+			float32_t3x4 viewMatrix;
+			float32_t4x4 viewProjMatrix;
+			// TODO: get rid of legacy matrices //<-- camera.getViewMatrix returns matrix3x4SIMD
+			{
+				const auto& camera = interface.camera;
+				memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix));
+				memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix));
+			}
+			const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix);
+
+			// tear down scene every frame
+			auto& instance = m_renderer->m_instances[0];
+			memcpy(&instance.world, &interface.model, sizeof(instance.world));
+			instance.packedGeo = m_renderer->getGeometries().data() + interface.gcIndex;
+			m_renderer->render(cb, viewParams);
+		}
+
+
 		inline void update(const std::chrono::microseconds nextPresentationTimestamp)
 		{
 			auto& camera = interface.camera;
@@ -567,8 +570,6 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			{
 				ImGuiIO& io = ImGui::GetIO();
 
-				// TODO: why is this a lambda and not just an assignment in a scope ?
-				camera.setProjectionMatrix([&]() 
 				{
 					matrix4SIMD projection;
 
@@ -586,9 +587,8 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 						else
 							projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar);
 					}
-
-					return projection;
-				}());
+					camera.setProjectionMatrix(projection);
+				}
 
 				ImGuizmo::SetOrthographic(false);
 				ImGuizmo::BeginFrame();
@@ -736,7 +736,9 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 						imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/	
 
 					transformParams.editTransformDecomposition = true;
-					sceneResolution = EditTransform(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
+					static TransformWidget transformWidget{};
+					widgetBox = transformWidget.Update(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
+					sceneResolution = widgetBox.zw;
 				}
 
 				model = core::transpose(imguizmoM16InOut.model).extractSub3x4();
@@ -873,6 +875,7 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			std::string_view objectName;
 			TransformRequestParams transformParams;
 			uint16_t2 sceneResolution = {1280,720};
+			uint16_t4 widgetBox;
 			float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f;
 			float viewWidth = 10.f;
 			float camYAngle = 165.f / 180.f * 3.14159f;

From 8d932f37b407bf8c3f7c2ff8ea71c441eeb72181 Mon Sep 17 00:00:00 2001
From: Corey <corey.w108@gmail.com>
Date: Thu, 9 Oct 2025 04:47:46 -0500
Subject: [PATCH 3/8] ALMOST stable

---
 MeshShader/CMakeLists.txt                     |  23 ++
 MeshShader/README.md                          |  15 +-
 MeshShader/app_resources/geom.frag.hlsl       |  13 +
 MeshShader/app_resources/geom.mesh.hlsl       |  72 +++++
 MeshShader/app_resources/geom.task.hlsl       |  23 ++
 .../app_resources/task_mesh_common.hlsl       |  34 ++
 MeshShader/include/IGPUMeshPipeline.h         | 212 ------------
 MeshShader/include/transform.hpp              |  50 +--
 MeshShader/main.cpp                           | 303 ++++++++++++------
 9 files changed, 386 insertions(+), 359 deletions(-)
 create mode 100644 MeshShader/app_resources/geom.frag.hlsl
 create mode 100644 MeshShader/app_resources/geom.mesh.hlsl
 create mode 100644 MeshShader/app_resources/geom.task.hlsl
 create mode 100644 MeshShader/app_resources/task_mesh_common.hlsl
 delete mode 100644 MeshShader/include/IGPUMeshPipeline.h

diff --git a/MeshShader/CMakeLists.txt b/MeshShader/CMakeLists.txt
index 8f80cc0ea..330ee55d4 100644
--- a/MeshShader/CMakeLists.txt
+++ b/MeshShader/CMakeLists.txt
@@ -1,3 +1,8 @@
+include(common RESULT_VARIABLE RES)
+if(NOT RES)
+        message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
+endif()
+
 if(NBL_BUILD_IMGUI)
 	set(NBL_EXTRA_SOURCES
 		#"${CMAKE_CURRENT_SOURCE_DIR}/src/transform.cpp" #just leaving this so i can easily reference it later
@@ -12,6 +17,24 @@ if(NBL_BUILD_IMGUI)
 		imguizmo
 		"${NBL_EXT_IMGUI_UI_LIB}"
 	)
+	if(NBL_EMBED_BUILTIN_RESOURCES)
+		set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData)
+		set(RESOURCE_DIR "app_resources")
+
+		get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
+		get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE)
+		get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE)
+
+		file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*")
+		foreach(RES_FILE ${BUILTIN_RESOURCE_FILES})
+			LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}")
+		endforeach()
+
+		ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")
+
+		LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
+	endif()
+	
 	
 	# TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !?
 	nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SEARCH_DIRECTORIES}" "${NBL_LIBRARIES}")
diff --git a/MeshShader/README.md b/MeshShader/README.md
index 271129235..c52cfcd7a 100644
--- a/MeshShader/README.md
+++ b/MeshShader/README.md
@@ -1,4 +1,4 @@
-9/30/2025 - GDBobby
+## 9/30/2025 - GDBobby
 Here's the current plan, front to back
 
 1. Remove all unnecessary parts from my copy of example 61.
@@ -27,11 +27,20 @@ I think, to prevent controlling two different branches in two different repos, I
 Once everything start to come together, I'll start moving things, like the Mesh Pipeline class, into more appropriate places, like Nabla itself.
 
 
-9/31
+## 9/31
 I'll create a mesh shader tomorrow. I don't really know what to do yet but I'll start with procedural gen.
 
 I think I'll also make a different pipeline object that supports the geometry from example 61?
 
 I had my fun with viewports. idk what i expected tbh
 
-I need to search a little deeper in the spec for other mesh pipeline related rules. I need to research subpasses as well.
\ No newline at end of file
+I need to search a little deeper in the spec for other mesh pipeline related rules. I need to research subpasses as well.
+
+
+## 10/3
+Beginning shader experimentation. Setting up easy reload of shaders so I don't have to relaunch every test iteration.
+
+## 10/6
+I need to add CPU side verification that mesh shader vert and prim count are below vulkan limits. The same as how work group size is verified.
+
+Mesh and Task shaders having branches where the output is not defined is incorrect. The glslc compiler won't warn the user, I'll have to check for DXC. Nvidia will assume it's a 0 group output, but AMD will get DEVICE_LOST. If it's possible, having a warning or compile check for that would be nice. Most likely outside the scope of Nabla, but possibly not. I'll have to ask.
\ No newline at end of file
diff --git a/MeshShader/app_resources/geom.frag.hlsl b/MeshShader/app_resources/geom.frag.hlsl
new file mode 100644
index 000000000..0c4e051e0
--- /dev/null
+++ b/MeshShader/app_resources/geom.frag.hlsl
@@ -0,0 +1,13 @@
+
+struct VertexOut {
+    float32_t4 ndc : SV_Position;
+    float32_t3 meta : COLOR1;
+};
+
+
+[shader("pixel")]
+float32_t4 main(VertexOut input) : SV_Target0
+{
+    const float32_t3 normal = input.meta;
+    return float32_t4(normalize(normal) * 0.5f + float32_t3(0.5f, 0.5f, 0.5f), 1.f);
+}
\ No newline at end of file
diff --git a/MeshShader/app_resources/geom.mesh.hlsl b/MeshShader/app_resources/geom.mesh.hlsl
new file mode 100644
index 000000000..0fdee3c55
--- /dev/null
+++ b/MeshShader/app_resources/geom.mesh.hlsl
@@ -0,0 +1,72 @@
+//https://microsoft.github.io/DirectX-Specs/d3d/MeshShader.html#primitive-attributes
+
+#include "task_mesh_common.hlsl"
+
+
+//utb is short for "uniform texel buffer", or its a storage buffer with vec4s
+[[vk::binding(0)]] StructuredBuffer<float32_t4> utbs[PushDescCount];
+//none of the objects use the index buffer
+
+struct VertexOut {
+    float32_t4 ndc : SV_Position;
+    float32_t3 meta : COLOR1;
+};
+
+[numthreads(WORKGROUP_SIZE,1,1)]
+
+[outputtopology("triangle")]
+[shader("mesh")]
+void main(
+    in uint3 id : SV_DispatchThreadID,
+    in uint3 groupThreadID : SV_GroupThreadID,
+    in payload TaskToMeshPayload taskToMeshPayload,
+    out vertices VertexOut verts[WORKGROUP_SIZE],
+    out indices uint3 prims[WORKGROUP_SIZE]
+)
+{   
+    MeshData meshDataCopy = meshData[taskToMeshPayload.objectType[groupThreadID.x]];
+
+    //if the ratio isnt 1 object to 1 transform, the payload can be used to pass in a transform index
+    //or if it isnt 1 task shader launching every mesh shader, the payload will need to handle
+    const float32_t4x4 worldViewProj = pc.viewProj * transform[groupThreadID.x];
+
+
+    if(id.x < meshDataCopy.vertCount){
+        const float32_t3 position = utbs[meshDataCopy.positionView][id.x].xyz;
+
+        verts[id.x].ndc = mul(float32_t4(position, 1.0), worldViewProj);
+
+
+        if (meshDataCopy.normalView < PushDescCount) { // && meshDataCopy.objType != CONE_OBJECT - just going to set cone_object normalView to pushdesccount
+            verts[id.x].meta = utbs[meshDataCopy.normalView][id.x].xyz;
+        }
+        else {
+        //i could reconstruct the normal right here in the mesh shader for the cone
+            //verts[id.x].meta = mul(inverse(transpose(pc.matrices.normal)),position);
+            verts[id.x].meta = float32_t3(0.0, 0.0, 0.0); //id like to check if cones even have a normal first
+        }
+    }
+
+    if(id.x < meshDataCopy.primCount){
+        if(meshDataCopy.objType == T_FAN_OBJECT_TYPE){
+            uint3 prim = uint3(0, id.x + 1, id.x + 2);
+            if(prim.y >= meshDataCopy.vertCount){
+                //not adding
+            }
+            if(prim.z >= meshDataCopy.vertCount){
+                prim.z = 1;
+            }
+            prims[id.x] = prim;
+        }
+        else{
+            uint3 prim = uint3(id.x, id.x + 1, id.x + 2);
+            bool lessThan = (prim.x < meshDataCopy.vertCount) && (prim.y < meshDataCopy.vertCount) && (prim.z < meshDataCopy.vertCount);
+            if(lessThan){
+                prims[id.x] = prim;
+            }
+        }
+    }
+
+
+    SetMeshOutputCounts(meshDataCopy.vertCount, meshDataCopy.primCount);
+}
\ No newline at end of file
diff --git a/MeshShader/app_resources/geom.task.hlsl b/MeshShader/app_resources/geom.task.hlsl
new file mode 100644
index 000000000..3c228a60f
--- /dev/null
+++ b/MeshShader/app_resources/geom.task.hlsl
@@ -0,0 +1,23 @@
+
+#include "task_mesh_common.hlsl"
+
+groupshared TaskToMeshPayload taskToMeshPayload;
+
+[numthreads(WORKGROUP_SIZE,1,1)]
+void main(
+	in uint3 id : SV_DispatchThreadID,
+	in uint3 groupThreadId : SV_GroupThreadID
+	//out payload TaskToMeshPayload taskToMeshPayload, interestingly, thats not how it's done here
+){
+
+	uint objectCount = 0;
+	for(uint i = 0; i < OBJECT_COUNT; i++){
+		for(uint j = 0; j < pc.objectCount[i]; j++){
+			taskToMeshPayload.objectType[objectCount] = i;
+			objectCount++;
+		}
+		objectCount += pc.objectCount[i];
+	}
+
+	DispatchMesh(objectCount, 1, 1, taskToMeshPayload);
+}
\ No newline at end of file
diff --git a/MeshShader/app_resources/task_mesh_common.hlsl b/MeshShader/app_resources/task_mesh_common.hlsl
new file mode 100644
index 000000000..125a411a5
--- /dev/null
+++ b/MeshShader/app_resources/task_mesh_common.hlsl
@@ -0,0 +1,34 @@
+
+//this is user defined data sent from the task shader to the mesh shader
+//1 packet is sent, but it can use arrays so that each workgroup can receive customized data
+struct TaskToMeshPayload {
+    uint objectType[INSTANCE_COUNT];
+};
+
+//1 is cone, 2 is for fan, anything else for trangle list without the special normal calc.
+//cone can be handled in the task shader or the mesh shader, I'm going to handle it in the task shader
+//#define OTHER_OBJECTS 0
+#define CONE_OBJECT_TYPE 1
+#define T_FAN_OBJECT_TYPE 2
+struct MeshData{
+    uint vertCount;
+    uint primCount;
+    uint objType; 
+	uint positionView;
+	uint normalView;
+};
+
+[[vk::binding(1)]] cbuffer MeshDataBuffer {
+    
+    MeshData meshData[OBJECT_COUNT];
+    float4x4 transform[INSTANCE_COUNT]; //this is goign to be based on device limits
+};
+
+#define PushDescCount (0x1<<16)-1
+struct SPushConstants {
+	float4x4 viewProj;
+    uint objectCount[OBJECT_COUNT];
+};
+
+//im not keen on trying to figure out how the push constant abstraction worked before without documentation
+[[vk::push_constant]] SPushConstants pc;
\ No newline at end of file
diff --git a/MeshShader/include/IGPUMeshPipeline.h b/MeshShader/include/IGPUMeshPipeline.h
deleted file mode 100644
index 94a83c79a..000000000
--- a/MeshShader/include/IGPUMeshPipeline.h
+++ /dev/null
@@ -1,212 +0,0 @@
-#pragma once
-#ifndef _NBL_I_GPU_MESH_PIPELINE_H_INCLUDED_
-#define _NBL_I_GPU_MESH_PIPELINE_H_INCLUDED_
-
-#include "nbl/video/IGPUPipelineLayout.h"
-#include "nbl/video/IGPURenderpass.h"
-#include "nbl/video/IGPUPipeline.h"
-
-//related spec
-
-//i feel like this MIGHT get stuffed into graphicspipeline but idk
-
-/*
-https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-PrimitiveId-06264
-** If the pipeline requires pre-rasterization shader state, it includes a mesh shader and the fragment shader code reads from an input variable that is decorated with PrimitiveId, then the mesh shader code must write to a matching output variable, decorated with PrimitiveId, in all execution paths
-
-https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07064
-* If renderPass is not VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, subpass viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader
-
-https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-None-02322
-* If the pipeline requires pre-rasterization shader state, and there are any mesh shader stages in the pipeline there must not be any shader stage in the pipeline with a Xfb execution mode
-*** whats a xfb
-
-https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-shaderMeshEnqueue-10187
-* If the shaderMeshEnqueue feature is not enabled, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability
-https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-flags-10188
-* If flags does not include VK_PIPELINE_CREATE_LIBRARY_BIT_KHR, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability
-*** my understanding is nabla strictly controls it's extensions, so this shouldnt be an issue
-
-https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07065
-* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the 
-*  pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY, or VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE
-*** this one seems the most relevant
-
-https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07066
-* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the 
-*  pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE, or VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT
-
-https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07067
-* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_VERTEX_INPUT_EXT
-
-https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07720
-* If renderPass is VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, and 
-*  VkPipelineRenderingCreateInfo::viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader
-
-
-* theres 1 or 2 more about pipeline libraries, but im not going to worry about that
-*/
-
-namespace nbl::asset {
-    class IMeshPipelineBase : public virtual core::IReferenceCounted {
-    public:
-        constexpr static inline uint8_t MESH_SHADER_STAGE_COUNT = 3u; //i dont know what this is going to be used for yet, might be redundant
-        struct SCachedCreationParams final {
-            SRasterizationParams rasterization = {};
-            SBlendParams blend = {};
-            uint32_t subpassIx = 0u; //this subpass stuff is eluding me rn. i might just need to crack open the vulkan documentation
-
-        };
-
-    };
-
-    template<typename PipelineLayoutType, typename RenderpassType>
-    class IMeshPipeline : public IPipeline<PipelineLayoutType>, public IMeshPipelineBase {
-    protected:
-        using renderpass_t = RenderpassType;
-    public:
-
-        static inline bool hasRequiredStages(const core::bitflag<hlsl::ShaderStage>& stagePresence)
-        {
-
-            // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-stage-02096
-            if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_MESH)) {
-                return false;
-            }
-            //i dont quite understand why igraphicspipeline doesnt require a fragment shader. is it not required by vulkan?
-            if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_FRAGMENT)) {
-                return false;
-            }
-
-            return true;
-        }
-
-    protected:
-        explicit IMeshPipeline(PipelineLayoutType* layout, const SCachedCreationParams& cachedParams, renderpass_t* renderpass) :
-            IPipeline<PipelineLayoutType>(core::smart_refctd_ptr<PipelineLayoutType>(layout)),
-            m_params(cachedParams), m_renderpass(core::smart_refctd_ptr<renderpass_t>(renderpass))
-        {}
-
-        SCachedCreationParams m_params = {};
-        core::smart_refctd_ptr<renderpass_t> m_renderpass = nullptr;
-    };
-
-}
-
-namespace nbl::video
-{
-
-    class IGPUMeshPipeline : public IGPUPipeline<asset::IMeshPipeline<const IGPUPipelineLayout, const IGPURenderpass>>
-    {
-        using pipeline_t = asset::IMeshPipeline<const IGPUPipelineLayout, const IGPURenderpass>;
-
-    public:
-        struct SCreationParams final : public SPipelineCreationParams<const IGPUMeshPipeline>
-        {
-        public:
-#define base_flag(F) static_cast<uint64_t>(pipeline_t::FLAGS::F)
-            enum class FLAGS : uint64_t
-            {
-                NONE = base_flag(NONE),
-                DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS),
-                ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES),
-                VIEW_INDEX_FROM_DEVICE_INDEX = 1 << 3,
-                FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED),
-                EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE),
-            };
-#undef base_flag
-
-            inline SSpecializationValidationResult valid() const
-            {
-                if (!layout)
-                    return {};
-                SSpecializationValidationResult retval = { .count = 0,.dataSize = 0 };
-                if (!layout)
-                    return {};
-
-                // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-dynamicRendering-06576
-                if (!renderpass || cached.subpassIx >= renderpass->getSubpassCount())
-                    return {};
-
-                // TODO: check rasterization samples, etc.
-                //rp->getCreationParameters().subpasses[i]
-
-                core::bitflag<hlsl::ShaderStage> stagePresence = {};
-
-                auto processSpecInfo = [&](const SShaderSpecInfo& specInfo, hlsl::ShaderStage stage)
-                    {
-                        if (!specInfo.shader) return true;
-                        if (!specInfo.accumulateSpecializationValidationResult(&retval)) return false;
-                        stagePresence |= stage;
-                        return true;
-                    };
-                if (!processSpecInfo(taskShader, hlsl::ShaderStage::ESS_TASK)) return {};
-                if (!processSpecInfo(meshShader, hlsl::ShaderStage::ESS_MESH)) return {};
-                if (!processSpecInfo(fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT)) return {};
-
-                if (!hasRequiredStages(stagePresence))
-                    return {};
-
-                //if (!vertexShader.shader) return {}; //i dont quite understand why this line was here. checking if the shader itself was made correctly?
-
-                return retval;
-            }
-
-            inline core::bitflag<hlsl::ShaderStage> getRequiredSubgroupStages() const
-            {
-                core::bitflag<hlsl::ShaderStage> stages = {};
-                auto processSpecInfo = [&](const SShaderSpecInfo& spec, hlsl::ShaderStage stage)
-                    {
-                        if (spec.shader && spec.requiredSubgroupSize >= SUBGROUP_SIZE::REQUIRE_4) {
-                            stages |= stage;
-                        }
-                    };
-                processSpecInfo(taskShader, hlsl::ESS_TASK);
-                processSpecInfo(meshShader, hlsl::ESS_MESH);
-                processSpecInfo(fragmentShader, hlsl::ESS_FRAGMENT);
-                return stages;
-            }
-
-            inline core::bitflag<FLAGS>& getFlags() { return flags; }
-
-            inline core::bitflag<FLAGS> getFlags() const { return flags; }
-
-            const IGPUPipelineLayout* layout = nullptr;
-            SShaderSpecInfo taskShader;
-            SShaderSpecInfo meshShader;
-            SShaderSpecInfo fragmentShader;
-            SCachedCreationParams cached = {};
-            renderpass_t* renderpass = nullptr;
-
-            // TODO: Could guess the required flags from SPIR-V introspection of declared caps
-            core::bitflag<FLAGS> flags = FLAGS::NONE;
-
-            inline uint32_t getShaderCount() const
-            {
-                uint32_t count = 0; //count = 2 and only check task shader??
-                count += (taskShader.shader != nullptr);
-                count += (meshShader.shader != nullptr);
-                count += (fragmentShader.shader != nullptr);
-                return count;
-            }
-        };
-
-        inline core::bitflag<SCreationParams::FLAGS> getCreationFlags() const { return m_flags; }
-
-        // Vulkan: const VkPipeline*
-        virtual const void* getNativeHandle() const = 0;
-
-    protected:
-        // not explicit?
-        IGPUMeshPipeline(const SCreationParams& params) :
-            IGPUPipeline(core::smart_refctd_ptr<const ILogicalDevice>(params.layout->getOriginDevice()), params.layout, params.cached, params.renderpass), m_flags(params.flags)
-        {
-        }
-        virtual ~IGPUMeshPipeline() override = default;
-
-        const core::bitflag<SCreationParams::FLAGS> m_flags;
-    };
-
-}
-
-#endif
\ No newline at end of file
diff --git a/MeshShader/include/transform.hpp b/MeshShader/include/transform.hpp
index 747d06423..e1aed2224 100644
--- a/MeshShader/include/transform.hpp
+++ b/MeshShader/include/transform.hpp
@@ -94,7 +94,7 @@ struct TransformWidget {
 	}
 
 
-	nbl::hlsl::uint16_t4 ViewingGizmo(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
+	void ViewingGizmo(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
 		ImGuiIO& io = ImGui::GetIO();
 		float viewManipulateRight = io.DisplaySize.x;
 		float viewManipulateTop = 0;
@@ -103,61 +103,17 @@ struct TransformWidget {
 		info.textureID = params.sceneTexDescIx;
 		info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER;
 
-		nbl::hlsl::uint16_t4 retval;
-		if (params.useWindow) {
-			ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing);
-			ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing);
-			ImGui::PushStyleColor(ImGuiCol_Border, (ImVec4)ImColor(0.35f, 0.3f, 0.3f));
-			
-			ImGui::Begin("Gizmo", 0, gizmoWindowFlags);
-			ImGuizmo::SetDrawlist();
-
-			ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
-			ImVec2 windowPos = ImGui::GetWindowPos();
-			ImVec2 cursorPos = ImGui::GetCursorScreenPos();
-
-			ImGui::Image(info, contentRegionSize);
-			ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
-			retval = { cursorPos.x, cursorPos.y, contentRegionSize.x,contentRegionSize.y };
-
-			viewManipulateRight = cursorPos.x + contentRegionSize.x;
-			viewManipulateTop = cursorPos.y;
-
-			ImGuiWindow* window = ImGui::GetCurrentWindow();
-			gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0);
-		}
-		else
-		{
-			ImGui::SetNextWindowPos(ImVec2(0, 0));
-			ImGui::SetNextWindowSize(io.DisplaySize);
-			ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window
-			ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs);
-
-			ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
-			ImVec2 cursorPos = ImGui::GetCursorScreenPos();
-
-			ImGui::Image(info, contentRegionSize);
-			ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
-			retval = { cursorPos.x, cursorPos.y,  contentRegionSize.x,contentRegionSize.y };
-
-			viewManipulateRight = cursorPos.x + contentRegionSize.x;
-			viewManipulateTop = cursorPos.y;
-		}
-
 		ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL);
 
 		if (params.enableViewManipulate)
 			ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
 
 		ImGui::End();
-		ImGui::PopStyleColor();
-
-		return retval;
 	}
 
-	nbl::hlsl::uint16_t4 Update(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
+	void Update(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
 		EditTransform(matrix, params);
-		return ViewingGizmo(cameraView, cameraProjection, matrix, params);
+		ViewingGizmo(cameraView, cameraProjection, matrix, params);
 	}
 
 };
diff --git a/MeshShader/main.cpp b/MeshShader/main.cpp
index b50db465b..4baff6159 100644
--- a/MeshShader/main.cpp
+++ b/MeshShader/main.cpp
@@ -5,9 +5,13 @@
 #include "common.hpp"
 #include "nbl/ui/ICursorControl.h"
 
-#include "IGPUMeshPipeline.h"
+struct MeshletPush {
+	float32_t4x4 viewProj; //nbl::core::matrix4SIMD is 128bit??
+	constexpr static uint8_t object_type_count_max = 16;//it can go up til this struct hits the limit for push size
+	uint32_t objectCount[object_type_count_max]; 
+};
 
-/*
+/* 
 Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window.
 
 Written with Nabla's UI extension and got integrated with ImGuizmo to handle scene's object translations.
@@ -43,6 +47,12 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			}
 			
 			const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()};
+			//auto creator = core::make_smart_refctd_ptr<CGeometryCreator>();
+			//auto cube = creator->createCube({ 1.f,1.f,1.f });
+			//id like to combine all the vertices into 1 buffer but given how it's set up, thats out of scope
+			//cube->getPositionView();
+
+
 			m_scene = CGeometryCreatorScene::create(
 				{
 					.transferQueue = getTransferUpQueue(),
@@ -52,6 +62,10 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 				},
 				CSimpleDebugRenderer::DefaultPolygonGeometryPatch
 			);
+			for(uint8_t i = 0; i < m_scene->getInitParams().geometries.size(); i++){
+				auto const& geom = m_scene->getInitParams().geometries[i];
+				printf("%s - %zu - %zu\n", m_scene->getInitParams().geometryNames[i].c_str(), geom->getVertexReferenceCount(), geom->getIndexCount());				
+			}
 			
 			// for the scene drawing pass
 			{
@@ -139,17 +153,17 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(),m_renderpass.get(),0,{&geometries.front().get(),geometries.size()});
 			// special case
 			{
-				const auto& pipelines = m_renderer->getInitParams().pipelines;
+				//const auto& pipelines = m_renderer->getInitParams().pipelines;
 				auto ix = 0u;
 				for (const auto& name : m_scene->getInitParams().geometryNames)
 				{
 					if (name=="Cone")
-						m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone];
+						//m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone];
 					ix++;
 				}
 			}
 			// we'll only display one thing at a time
-			m_renderer->m_instances.resize(1);
+			//m_renderer->m_instances.resize(1);
 
 			// Create ImGUI
 			{
@@ -204,13 +218,159 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 				imgui->registerListener([this](){interface();});
 			}
 
+			//create meshlet pipeline
+			CreateMeshPipelines();
+
 			interface.camera.mapKeysToArrows();
 
 			onAppInitializedFinish();
 			return true;
 		}
 
-		//
+		smart_refctd_ptr<IGPUDescriptorSetLayout> BuildMeshletDSLayout() const {
+			smart_refctd_ptr<IGPUDescriptorSetLayout> ret;
+			using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS;
+			const IGPUDescriptorSetLayout::SBinding bindings[] =
+			{
+				{
+					.binding = 0,
+					.type = IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER,
+					// need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable
+					.createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | binding_flags_t::ECF_PARTIALLY_BOUND_BIT,
+					.stageFlags = IShader::E_SHADER_STAGE::ESS_MESH,
+					.count = UINT16_MAX
+				},
+				{
+					.binding = 1,
+					.type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER,
+					.createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT,
+					.stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH,
+					.count = 1
+				}
+			};
+			ret = m_device->createDescriptorSetLayout(bindings);
+			if (!ret) {
+				m_logger->log("Could not create descriptor set layout!", ILogger::ELL_ERROR);
+				return nullptr;
+			}
+			return ret;
+		}
+
+		std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 3> CreateTestShader() const {
+
+
+			auto loadCompileAndCreateShader = [&](const std::string& relPath, hlsl::ShaderStage stage, std::span<const asset::IShaderCompiler::SMacroDefinition> extraDefines) -> smart_refctd_ptr<IShader>
+				{
+					IAssetLoader::SAssetLoadParams lp = {};
+					lp.logger = m_logger.get();
+					lp.workingDirectory = ""; // virtual root
+					auto assetBundle = m_assetMgr->getAsset(relPath, lp);
+					const auto assets = assetBundle.getContents();
+					if (assets.empty()){
+						printf("asset was empty - %s\n", relPath.c_str());
+						return nullptr;
+					}
+
+					// lets go straight from ICPUSpecializedShader to IGPUSpecializedShader
+					auto sourceRaw = IAsset::castDown<IShader>(assets[0]);
+					if (!sourceRaw){
+						printf("source raw was nullptr - %s\n", relPath.c_str());
+						return nullptr;
+					}
+
+					nbl::video::ILogicalDevice::SShaderCreationParameters creationParams{
+						.source = sourceRaw.get(),
+						.optimizer = nullptr,
+						.readCache = nullptr,
+						.writeCache = nullptr,
+						.extraDefines = extraDefines,
+						.stage = stage
+					};
+
+					auto ret = m_device->compileShader(creationParams);
+					if (ret.get() == nullptr) {
+						printf("failed to compile shader - %s\n", relPath.c_str());
+					}
+					//m_assetMgr->removeAssetFromCache(assetBundle);
+					//return nullptr;
+					//i dont think that ^ was working
+					return ret;
+			};
+			constexpr uint32_t WorkgroupSize = 64;
+			constexpr uint32_t ObjectCount = WorkgroupSize;
+			constexpr uint32_t InstanceCount = WorkgroupSize;
+			const string WorkgroupSizeAsStr = std::to_string(WorkgroupSize);
+			const string ObjectCountAsStr = std::to_string(ObjectCount);
+			const string InstanceCountAsStr = std::to_string(InstanceCount);
+
+			const IShaderCompiler::SMacroDefinition WorkgroupSizeDefine = { "WORKGROUP_SIZE",WorkgroupSizeAsStr };
+			const IShaderCompiler::SMacroDefinition ObjectCountDefine = { "OBJECT_COUNT", ObjectCountAsStr };
+			const IShaderCompiler::SMacroDefinition InstanceCountDefine = { "INSTANCE_COUNT", InstanceCountAsStr };
+
+			const IShaderCompiler::SMacroDefinition meshArray[] = {WorkgroupSizeDefine, ObjectCountDefine, InstanceCountDefine};
+			return {
+				loadCompileAndCreateShader("app_resources/geom.task.hlsl", IShader::E_SHADER_STAGE::ESS_TASK, { meshArray }),
+				loadCompileAndCreateShader("app_resources/geom.mesh.hlsl", IShader::E_SHADER_STAGE::ESS_MESH, { meshArray }),
+				loadCompileAndCreateShader("app_resources/geom.frag.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT, {})
+			};
+		}
+		
+		bool CreateMeshPipelines() {
+			//referencing example 10 for this
+			//and referencing CSimpleDebugRenderer
+
+			auto shaders = CreateTestShader();
+			auto dsLayout = BuildMeshletDSLayout();
+			{//descriptorset
+				auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, { &dsLayout.get(),1 });
+				auto ds = pool->createDescriptorSet(std::move(dsLayout));
+				if (!ds) {
+					m_logger->log("Could not descriptor set!", ILogger::ELL_ERROR);
+					return false;
+				}
+				meshlet_subAllocDS = make_smart_refctd_ptr<SubAllocatedDescriptorSet>(std::move(ds));
+			}
+
+			{
+				const SPushConstantRange ranges[] = { {
+					.stageFlags = hlsl::ShaderStage::ESS_TASK | hlsl::ShaderStage::ESS_MESH,
+					.offset = 0,
+					.size = sizeof(MeshletPush),
+				} }; 
+				
+				meshletLayout = m_device->createPipelineLayout(ranges, smart_refctd_ptr<const IGPUDescriptorSetLayout>(meshlet_subAllocDS->getDescriptorSet()->getLayout()));
+				IGPUMeshPipeline::SCreationParams params = {};
+				params.layout = meshletLayout.get();
+				params.renderpass = m_renderpass.get();
+				params.cached.subpassIx = 0;
+
+				params.taskShader.shader = shaders[0].get();
+				params.taskShader.entryPoint = "main";
+				params.taskShader.entries = nullptr;
+				params.taskShader.requiredSubgroupSize = static_cast<IPipelineBase::SUBGROUP_SIZE>(4); //ill need to adjust this probably
+
+
+				params.meshShader.shader = shaders[1].get();
+				params.meshShader.entryPoint = "main";
+				params.meshShader.entries = nullptr;
+				params.meshShader.requiredSubgroupSize = static_cast<IPipelineBase::SUBGROUP_SIZE>(5); //ill need to adjust this probably
+
+				params.fragmentShader = { .shader = shaders[2].get(), .entryPoint = "main"};
+
+
+				params.cached.requireFullSubgroups = true;
+				params.cached.rasterization.faceCullingMode = EFCM_NONE; //maybe change this? i was a bit limited in example 61
+
+				if (!m_device->createMeshPipelines(nullptr, { &params, 1 }, &meshletPipeline)) {
+					logFail("Failed to create mesh pipeline!\n");
+				}
+			}
+
+
+		}
+
+
+
 		virtual inline bool onAppTerminated()
 		{
 			SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId;
@@ -380,10 +540,12 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix);
 
 			// tear down scene every frame
-			auto& instance = m_renderer->m_instances[0];
-			memcpy(&instance.world, &interface.model, sizeof(instance.world));
-			instance.packedGeo = m_renderer->getGeometries().data() + interface.gcIndex;
-			m_renderer->render(cb, viewParams);
+			//auto& instance = m_renderer->m_instances[0];
+			//memcpy(&instance.world, &interface.model, sizeof(instance.world));
+			//instance.packedGeo = m_renderer->getGeometries().data() + interface.gcIndex;
+			//m_renderer->render(cb, viewParams);
+
+			//MeshPushConstant mPushConstant = { interface.camera.getConcatenatedMatrix(), cubeCount, coneCount, tubeCount };
 		}
 
 
@@ -425,10 +587,10 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 							previousEventTimestamp = e.timeStamp;
 							uiEvents.mouse.emplace_back(e);
 
-							if (e.type==nbl::ui::SMouseEvent::EET_SCROLL && m_renderer)
+							if (e.type==nbl::ui::SMouseEvent::EET_SCROLL)// && m_renderer)
 							{
 								interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll));
-								interface.gcIndex = core::clamp(interface.gcIndex,0ull,m_renderer->getGeometries().size()-1);
+								//interface.gcIndex = core::clamp(interface.gcIndex,0ull,m_renderer->getGeometries().size()-1);
 							}
 						}
 					},
@@ -463,7 +625,7 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 				.keyboardEvents = uiEvents.keyboard
 			};
 
-			interface.objectName = m_scene->getInitParams().geometryNames[interface.gcIndex];
+			interface.objectName = m_scene->getInitParams().geometryNames[0];
 			interface.imGUI->update(params);
 		}
 
@@ -554,8 +716,8 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 		//
 		smart_refctd_ptr<CGeometryCreatorScene> m_scene;
 		smart_refctd_ptr<IGPURenderpass> m_renderpass;
-		smart_refctd_ptr<CSimpleDebugRenderer> m_renderer;
 		smart_refctd_ptr<IGPUFramebuffer> m_framebuffer;
+		smart_refctd_ptr<CSimpleDebugRenderer> m_renderer;
 		//
 		smart_refctd_ptr<ISemaphore> m_semaphore;
 		uint64_t m_realFrameIx = 0;
@@ -563,6 +725,10 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 		//
 		InputSystem::ChannelReader<IMouseEventChannel> mouse;
 		InputSystem::ChannelReader<IKeyboardEventChannel> keyboard;
+
+		core::smart_refctd_ptr<video::SubAllocatedDescriptorSet> meshlet_subAllocDS;
+		smart_refctd_ptr<IGPUPipelineLayout> meshletLayout;
+		smart_refctd_ptr<IGPUMeshPipeline> meshletPipeline;
 		// UI stuff
 		struct CInterface
 		{
@@ -601,6 +767,10 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 				ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing);
 				ImGui::Begin("Editor");
 
+				if (ImGui::Button("reload mesh shader")) {
+					//printf("test shader result - %d\n", CreateTestShaderFuncPtr());
+				}
+
 				if (ImGui::RadioButton("Full view", !transformParams.useWindow))
 					transformParams.useWindow = false;
 
@@ -664,8 +834,7 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 				{
 					ImGui::Text("Using gizmo");
 				}
-				else
-				{
+				else {
 					ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : "");
 					ImGui::SameLine();
 					ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : "");
@@ -730,18 +899,19 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 
 				imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix()));
 				imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix());
-				imguizmoM16InOut.model = core::transpose(matrix4SIMD(model));
+				if (currentTransform >= 0 && currentTransform < transforms.size()) {
+					imguizmoM16InOut.model = core::transpose(matrix4SIMD(transforms[currentTransform]));
+				}
 				{
-					if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates
-						imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/	
-
 					transformParams.editTransformDecomposition = true;
 					static TransformWidget transformWidget{};
-					widgetBox = transformWidget.Update(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
+					transformWidget.Update(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
 					sceneResolution = widgetBox.zw;
 				}
 
-				model = core::transpose(imguizmoM16InOut.model).extractSub3x4();
+				if (currentTransform >= 0 && currentTransform < transforms.size()) {
+					transforms[currentTransform] = core::transpose(imguizmoM16InOut.model).extractSub3x4();
+				}
 				// to Nabla + update camera & model matrices
 // TODO: make it more nicely, extract:
 // - Position by computing inverse of the view matrix and grabbing its translation
@@ -755,8 +925,10 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 
 				// object meta display
 				{
-					ImGui::Begin("Object");
-					ImGui::Text("type: \"%s\"", objectName.data());
+					ImGui::Begin("Object Counts");
+					ImGui::Text("object count - cube[%d] - cone[%d] - tube[%d]", cubeCount, coneCount, tubeCount);
+
+
 					ImGui::End();
 				}
 					
@@ -785,83 +957,14 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 							ImGui::Separator();
 					};
 
-					addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, model.pointer());
+					if (currentTransform >= 0 && currentTransform < transforms.size()) {
+						addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, transforms[currentTransform].pointer());
+					}
 					addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer());
 					addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false);
 
 					ImGui::End();
 				}
-
-				// Nabla Imgui backend MDI buffer info
-				// To be 100% accurate and not overly conservative we'd have to explicitly `cull_frees` and defragment each time,
-				// so unless you do that, don't use this basic info to optimize the size of your IMGUI buffer.
-				{
-					auto* streaminingBuffer = imGUI->getStreamingBuffer();
-
-					const size_t total = streaminingBuffer->get_total_size();			// total memory range size for which allocation can be requested
-					const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size();		// max total free bloock memory size we can still allocate from total memory available
-					const size_t consumedMemory = total - freeSize;			// memory currently consumed by streaming buffer
-
-					float freePercentage = 100.0f * (float)(freeSize) / (float)total;
-					float allocatedPercentage = (float)(consumedMemory) / (float)total;
-
-					ImVec2 barSize = ImVec2(400, 30);
-					float windowPadding = 10.0f;
-					float verticalPadding = ImGui::GetStyle().FramePadding.y;
-
-					ImGui::SetNextWindowSize(ImVec2(barSize.x + 2 * windowPadding, 110 + verticalPadding), ImGuiCond_Always);
-					ImGui::Begin("Nabla Imgui MDI Buffer Info", nullptr, ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoScrollbar);
-
-					ImGui::Text("Total Allocated Size: %zu bytes", total);
-					ImGui::Text("In use: %zu bytes", consumedMemory);
-					ImGui::Text("Buffer Usage:");
-
-					ImGui::SetCursorPosX(windowPadding);
-
-					if (freePercentage > 70.0f)
-						ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(0.0f, 1.0f, 0.0f, 0.4f));  // Green
-					else if (freePercentage > 30.0f)
-						ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 1.0f, 0.0f, 0.4f));  // Yellow
-					else
-						ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 0.0f, 0.0f, 0.4f));  // Red
-
-					ImGui::ProgressBar(allocatedPercentage, barSize, "");
-
-					ImGui::PopStyleColor();
-
-					ImDrawList* drawList = ImGui::GetWindowDrawList();
-
-					ImVec2 progressBarPos = ImGui::GetItemRectMin();
-					ImVec2 progressBarSize = ImGui::GetItemRectSize();
-
-					const char* text = "%.2f%% free";
-					char textBuffer[64];
-					snprintf(textBuffer, sizeof(textBuffer), text, freePercentage);
-
-					ImVec2 textSize = ImGui::CalcTextSize(textBuffer);
-					ImVec2 textPos = ImVec2
-					(
-						progressBarPos.x + (progressBarSize.x - textSize.x) * 0.5f,
-						progressBarPos.y + (progressBarSize.y - textSize.y) * 0.5f
-					);
-
-					ImVec4 bgColor = ImGui::GetStyleColorVec4(ImGuiCol_WindowBg);
-					drawList->AddRectFilled
-					(
-						ImVec2(textPos.x - 5, textPos.y - 2),
-						ImVec2(textPos.x + textSize.x + 5, textPos.y + textSize.y + 2),
-						ImGui::GetColorU32(bgColor)
-					);
-
-					ImGui::SetCursorScreenPos(textPos);
-					ImGui::Text("%s", textBuffer);
-
-					ImGui::Dummy(ImVec2(0.0f, verticalPadding));
-
-					ImGui::End();
-				}
-
-				ImGui::End();
 			}
 
 			smart_refctd_ptr<ext::imgui::UI> imGUI;
@@ -871,7 +974,9 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			//
 			Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD());
 			// mutables
-			core::matrix3x4SIMD model;
+			int32_t currentTransform = -1;
+			std::vector<core::matrix3x4SIMD> transforms;
+
 			std::string_view objectName;
 			TransformRequestParams transformParams;
 			uint16_t2 sceneResolution = {1280,720};
@@ -883,6 +988,10 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed
 			bool isPerspective = true, isLH = true, flipGizmoY = true, move = false;
 			bool firstFrame = true;
+
+			uint32_t cubeCount = 0;
+			uint32_t coneCount = 0;
+			uint32_t tubeCount = 0;
 		} interface;
 };
 

From 08fc32b5c10c63ec82d9251251d242c6bf0aa3de Mon Sep 17 00:00:00 2001
From: Corey <corey.w108@gmail.com>
Date: Thu, 9 Oct 2025 04:52:19 -0500
Subject: [PATCH 4/8] Update README.md

---
 MeshShader/README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/MeshShader/README.md b/MeshShader/README.md
index c52cfcd7a..01640d5a1 100644
--- a/MeshShader/README.md
+++ b/MeshShader/README.md
@@ -43,4 +43,7 @@ Beginning shader experimentation. Setting up easy reload of shaders so I don't h
 ## 10/6
 I need to add CPU side verification that mesh shader vert and prim count are below vulkan limits. The same as how work group size is verified.
 
-Mesh and Task shaders having branches where the output is not defined is incorrect. The glslc compiler won't warn the user, I'll have to check for DXC. Nvidia will assume it's a 0 group output, but AMD will get DEVICE_LOST. If it's possible, having a warning or compile check for that would be nice. Most likely outside the scope of Nabla, but possibly not. I'll have to ask.
\ No newline at end of file
+Mesh and Task shaders having branches where the output is not defined is incorrect. The glslc compiler won't warn the user, I'll have to check for DXC. Nvidia will assume it's a 0 group output, but AMD will get DEVICE_LOST. If it's possible, having a warning or compile check for that would be nice. Most likely outside the scope of Nabla, but possibly not. I'll have to ask.
+
+## 10/9
+On the bug hunting phase. Should be finished shortly, then I'll hit the cleanup phase.
\ No newline at end of file

From dc0d0cf16dfcf87d2a593462d7533fc41f5338e0 Mon Sep 17 00:00:00 2001
From: Corey <corey.w108@gmail.com>
Date: Fri, 19 Dec 2025 18:28:43 -0600
Subject: [PATCH 5/8] full rebuild of simple debug renderer

---
 .gitignore                                    |   1 +
 MeshShader/CMakeLists.txt                     |   2 +
 MeshShader/app_resources/FirstBuild.mesh.hlsl |  23 +
 MeshShader/app_resources/geom.mesh.hlsl       |   2 +-
 MeshShader/include/MeshRenderer.hpp           | 141 +++
 MeshShader/include/SampleApp.h                |  96 ++
 MeshShader/include/transform.hpp              |  28 +-
 MeshShader/main.cpp                           | 992 +-----------------
 MeshShader/src/MeshRenderer.cpp               | 330 ++++++
 MeshShader/src/SampleApp.cpp                  | 769 ++++++++++++++
 .../geometry/CSimpleDebugRenderer.hpp         |  10 +-
 11 files changed, 1398 insertions(+), 996 deletions(-)
 create mode 100644 MeshShader/app_resources/FirstBuild.mesh.hlsl
 create mode 100644 MeshShader/include/MeshRenderer.hpp
 create mode 100644 MeshShader/include/SampleApp.h
 create mode 100644 MeshShader/src/MeshRenderer.cpp
 create mode 100644 MeshShader/src/SampleApp.cpp

diff --git a/.gitignore b/.gitignore
index f119890ee..e95b918cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@ compiled.spv
 */.vscode/*
 */__main__.py
 /tmp/rtSamples.bin
+imgui.ini
diff --git a/MeshShader/CMakeLists.txt b/MeshShader/CMakeLists.txt
index 330ee55d4..315040e99 100644
--- a/MeshShader/CMakeLists.txt
+++ b/MeshShader/CMakeLists.txt
@@ -6,6 +6,8 @@ endif()
 if(NBL_BUILD_IMGUI)
 	set(NBL_EXTRA_SOURCES
 		#"${CMAKE_CURRENT_SOURCE_DIR}/src/transform.cpp" #just leaving this so i can easily reference it later
+		"${CMAKE_CURRENT_SOURCE_DIR}/src/SampleApp.cpp"
+		"${CMAKE_CURRENT_SOURCE_DIR}/src/MeshRenderer.cpp"
 	)
 
 	set(NBL_INCLUDE_SEARCH_DIRECTORIES
diff --git a/MeshShader/app_resources/FirstBuild.mesh.hlsl b/MeshShader/app_resources/FirstBuild.mesh.hlsl
new file mode 100644
index 000000000..db0ed585d
--- /dev/null
+++ b/MeshShader/app_resources/FirstBuild.mesh.hlsl
@@ -0,0 +1,23 @@
+//https://microsoft.github.io/DirectX-Specs/d3d/MeshShader.html#primitive-attributes
+
+struct SInterpolants{
+    float4 ndc : SV_Position;
+};
+struct Primo {
+    uint vertexID : SV_PrimitiveID;
+};
+
+[numthreads(WORKGROUP_SIZE,1,1)]
+[outputtopology("point")]
+
+[shader("mesh")]
+void main(
+    in uint3 ID : SV_DispatchThreadID,
+    out vertices SInterpolants verts[WORKGROUP_SIZE],
+    out indices uint prims[WORKGROUP_SIZE]
+)
+{
+    verts[ID.x].ndc = float32_t4(ID.x, 0.0, 0.0, 1.0);
+    prims[ID.x] = ID.x;
+    SetMeshOutputCounts(WORKGROUP_SIZE, WORKGROUP_SIZE);
+}
\ No newline at end of file
diff --git a/MeshShader/app_resources/geom.mesh.hlsl b/MeshShader/app_resources/geom.mesh.hlsl
index 0fdee3c55..d8778c4a5 100644
--- a/MeshShader/app_resources/geom.mesh.hlsl
+++ b/MeshShader/app_resources/geom.mesh.hlsl
@@ -4,7 +4,7 @@
 
 
 //utb is short for "uniform texel buffer", or its a storage buffer with vec4s
-[[vk::binding(0)]] StructuredBuffer<float32_t4> utbs[PushDescCount];
+[[vk::binding(0)]] Buffer<float32_t4> utbs[PushDescCount];
 //none of the objects use the index buffer
 
 struct VertexOut {
diff --git a/MeshShader/include/MeshRenderer.hpp b/MeshShader/include/MeshRenderer.hpp
new file mode 100644
index 000000000..c05542bf2
--- /dev/null
+++ b/MeshShader/include/MeshRenderer.hpp
@@ -0,0 +1,141 @@
+#pragma once
+
+#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl"
+#include "nbl/examples/geometry/SPushConstants.hlsl"
+
+namespace nbl::examples
+{
+
+
+	enum class MeshletObjectTypes {
+		Cube,
+		Rectangle,
+		Disk,
+		Sphere,
+		Cylinder,
+		Cone,
+		Icosphere,
+
+		COUNT
+	};
+
+class MeshDebugRenderer final : public core::IReferenceCounted {
+#define EXPOSE_NABLA_NAMESPACES \
+		using namespace nbl::core; \
+		using namespace nbl::system; \
+		using namespace nbl::asset; \
+		using namespace nbl::video
+
+public:
+	//
+	constexpr static inline uint16_t VertexAttrubUTBDescBinding = 0;
+
+	//
+	struct SViewParams
+	{
+		SViewParams(const hlsl::float32_t3x4& _view, const hlsl::float32_t4x4& _viewProj);
+		hlsl::float32_t4x4 computeForInstance(hlsl::float32_t3x4 world) const;
+
+		hlsl::float32_t3x4 view;
+		hlsl::float32_t4x4 viewProj;
+		hlsl::float32_t3x3 normal;
+	};
+	constexpr static inline auto MissingView = hlsl::examples::geometry_creator_scene::SPushConstants::DescriptorCount;
+
+	//this is buffer data
+	struct MeshletObjectData {
+		uint32_t vertCount;
+		uint32_t primCount;
+		uint32_t objectType;
+		uint32_t positionView;
+		uint32_t normalView;
+	};
+	struct MeshDataBuffer {
+		//if gpuGeometry is nullptr or std::nullopt or whatever, then mesh object type is invalid, the CPU memory failed to transfer to GPU for whatever reason
+		core::smart_refctd_ptr<const video::IGPUPolygonGeometry> gpuGeometry{};
+
+		static constexpr std::size_t MaxObjectCount = static_cast<std::size_t>(MeshletObjectTypes::COUNT);
+		static constexpr std::size_t MaxInstanceCount = 64;
+
+		MeshletObjectData meshData[MaxObjectCount];
+		hlsl::float32_t4x4 transforms[MaxInstanceCount];
+
+		//remove index type to avoid branch in shader
+		//asset::E_INDEX_TYPE indexType = asset::EIT_UNKNOWN;
+	};
+	//
+	struct SInstance
+	{
+		struct SPushConstants
+		{
+			NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1 << 16) - 1;
+
+			hlsl::float32_t4x4 matrices;
+			uint32_t objectCount[MeshDataBuffer::MaxObjectCount];
+		};
+		inline SPushConstants computePushConstants(const SViewParams& viewParams) const	{
+			return SPushConstants{
+				.matrices = viewParams.computeForInstance(world),
+				.objectCount{0}
+			};
+		}
+
+		hlsl::float32_t3x4 world;
+	};
+
+	static std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 3> CreateTestShader(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX);
+
+	//
+	static core::smart_refctd_ptr<MeshDebugRenderer> create(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX);
+
+	//
+	static inline core::smart_refctd_ptr<MeshDebugRenderer> create(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX, const std::span<const video::IGPUPolygonGeometry* const> geometries)
+	{
+		auto retval = create(assMan,renderpass,subpassIX);
+		if (retval)
+			retval->addGeometries(geometries);
+		return retval;
+	}
+
+	//
+	struct SInitParams {
+		core::smart_refctd_ptr<video::SubAllocatedDescriptorSet> subAllocDS;
+		core::smart_refctd_ptr<video::IGPUPipelineLayout> layout;
+		core::smart_refctd_ptr<video::IGPUMeshPipeline> pipeline;
+	};
+	inline const SInitParams& getInitParams() const {return m_params;}
+
+	//im not going to go thru every example to fix them up to use this static function instead, so im leaving the old one
+	//device should be const* but im not going to fix it right now 
+	//(scope creep)
+		
+	bool addGeometries(const std::span<const video::IGPUPolygonGeometry* const> geometries);
+
+	void removeGeometry(const uint32_t ix, const video::ISemaphore::SWaitInfo& info);
+
+	inline const auto& getGeometries() const {return m_geoms;}
+
+	void render(video::IGPUCommandBuffer* cmdbuf, const SViewParams& viewParams) const;
+
+	core::vector<SInstance> m_instances;
+
+protected:
+	inline MeshDebugRenderer(SInitParams&& _params) : m_params(std::move(_params)) {}
+	inline ~MeshDebugRenderer()	{
+		// clean shutdown, can also make SubAllocatedDescriptorSet resillient against that, and issue `device->waitIdle` if not everything is freed
+		const_cast<video::ILogicalDevice*>(m_params.layout->getOriginDevice())->waitIdle();
+	}
+	void clearGeometries(const video::ISemaphore::SWaitInfo& info);
+
+	inline void immediateDealloc(video::SubAllocatedDescriptorSet::value_type index)
+	{
+		video::IGPUDescriptorSet::SDropDescriptorSet dummy[1];
+		m_params.subAllocDS->multi_deallocate(dummy,VertexAttrubUTBDescBinding,1,&index);
+	}
+
+	SInitParams m_params;
+	MeshDataBuffer m_geoms;
+#undef EXPOSE_NABLA_NAMESPACES
+};
+
+}
\ No newline at end of file
diff --git a/MeshShader/include/SampleApp.h b/MeshShader/include/SampleApp.h
new file mode 100644
index 000000000..e0045510e
--- /dev/null
+++ b/MeshShader/include/SampleApp.h
@@ -0,0 +1,96 @@
+#pragma once
+
+#include "common.hpp"
+#include "nbl/ui/ICursorControl.h"
+#include "MeshRenderer.hpp"
+
+
+
+
+struct MeshletPush {
+	float32_t4x4 viewProj; //nbl::core::matrix4SIMD is 128bit??
+	constexpr static uint8_t object_type_count_max = 16;//it can go up til this struct hits the limit for push size
+	uint32_t objectInstanceCount[object_type_count_max]; //this data is going to cropped before pushing, if necessary
+};
+
+class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesApplication
+{
+		using device_base_t = MonoWindowApplication;
+		using asset_base_t = BuiltinResourcesApplication;
+
+	public:
+		UISampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) 
+			: IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD),
+			device_base_t({1280,720}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) 
+        {}
+
+		bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override;
+		//smart_refctd_ptr<IGPUDescriptorSetLayout> BuildMeshletDSLayout() const;
+		std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 3> CreateTestShader() const;
+		virtual bool onAppTerminated();
+		IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override;
+
+	protected:
+		const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override;
+	private:
+		void UpdateScene(nbl::video::IGPUCommandBuffer* cb);
+		void update(const std::chrono::microseconds nextPresentationTimestamp);
+		void recreateFramebuffer(const uint16_t2 resolution);
+		void beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info);
+
+		// Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers
+		constexpr static inline uint32_t MaxFramesInFlight = 3u;
+		constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT;
+		constexpr static inline auto finalSceneRenderFormat = EF_R8G8B8A8_SRGB;
+		constexpr static inline auto TexturesImGUIBindingIndex = 0u;
+		// we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes
+		constexpr static inline auto MaxImGUITextures = 2u+MaxFramesInFlight;
+
+		//
+		smart_refctd_ptr<CGeometryCreatorScene> m_scene;
+		smart_refctd_ptr<IGPURenderpass> m_renderpass;
+		smart_refctd_ptr<IGPUFramebuffer> m_framebuffer;
+
+		//i PROBABLY need to replace the debug renderer
+		smart_refctd_ptr<MeshDebugRenderer> m_renderer;
+		//
+		smart_refctd_ptr<ISemaphore> m_semaphore;
+		uint64_t m_realFrameIx = 0;
+		std::array<smart_refctd_ptr<IGPUCommandBuffer>,MaxFramesInFlight> m_cmdBufs;
+		//
+		InputSystem::ChannelReader<IMouseEventChannel> mouse;
+		InputSystem::ChannelReader<IKeyboardEventChannel> keyboard;
+
+		core::smart_refctd_ptr<video::SubAllocatedDescriptorSet> meshlet_subAllocDS;
+		smart_refctd_ptr<IGPUPipelineLayout> meshletLayout;
+		smart_refctd_ptr<IGPUMeshPipeline> meshletPipeline;
+		// UI stuff
+		struct CInterface
+		{
+			void operator()();
+			smart_refctd_ptr<ext::imgui::UI> imGUI;
+			// descriptor set
+			smart_refctd_ptr<SubAllocatedDescriptorSet> subAllocDS;
+			SubAllocatedDescriptorSet::value_type renderColorViewDescIndex = SubAllocatedDescriptorSet::invalid_value;
+			//
+			Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD());
+			// mutables
+			int32_t currentTransform = -1;
+			std::vector<core::matrix3x4SIMD> transforms;
+
+			std::array<std::string, MeshDebugRenderer::MeshDataBuffer::MaxObjectCount> objectNames;
+			std::array<uint32_t, MeshDebugRenderer::MeshDataBuffer::MaxObjectCount> objectCount;
+
+			TransformRequestParams transformParams;
+			uint16_t2 sceneResolution = {1280,720};
+			uint16_t4 widgetBox;
+			float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f;
+			float viewWidth = 10.f;
+			float camYAngle = 165.f / 180.f * 3.14159f;
+			float camXAngle = 32.f / 180.f * 3.14159f;
+			uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed
+			bool isPerspective = true, isLH = true, flipGizmoY = true, move = false;
+			bool firstFrame = true;
+
+		} interface;
+};
diff --git a/MeshShader/include/transform.hpp b/MeshShader/include/transform.hpp
index e1aed2224..c8b19f466 100644
--- a/MeshShader/include/transform.hpp
+++ b/MeshShader/include/transform.hpp
@@ -94,7 +94,7 @@ struct TransformWidget {
 	}
 
 
-	void ViewingGizmo(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
+	ImVec2 ViewingGizmo(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
 		ImGuiIO& io = ImGui::GetIO();
 		float viewManipulateRight = io.DisplaySize.x;
 		float viewManipulateTop = 0;
@@ -103,17 +103,39 @@ struct TransformWidget {
 		info.textureID = params.sceneTexDescIx;
 		info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER;
 
+
+		ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing);
+		ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing);
+		ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f));
+		ImGui::Begin("Gizmo", 0, gizmoWindowFlags);
+		ImGuizmo::SetDrawlist();
+
+		ImVec2 windowPos = ImGui::GetWindowPos();
+		ImVec2 cursorPos = ImGui::GetCursorScreenPos();
+
+		ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
+		ImGui::Image(info, contentRegionSize);
+		ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
+
+		viewManipulateRight = cursorPos.x + contentRegionSize.x;
+		viewManipulateTop = cursorPos.y;
+
+		ImGuiWindow* window = ImGui::GetCurrentWindow();
+		gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0);
+
 		ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL);
 
 		if (params.enableViewManipulate)
 			ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
 
 		ImGui::End();
+
+		return contentRegionSize;
 	}
 
-	void Update(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
+	ImVec2 Update(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
 		EditTransform(matrix, params);
-		ViewingGizmo(cameraView, cameraProjection, matrix, params);
+		return ViewingGizmo(cameraView, cameraProjection, matrix, params);
 	}
 
 };
diff --git a/MeshShader/main.cpp b/MeshShader/main.cpp
index 4baff6159..c55c3d48d 100644
--- a/MeshShader/main.cpp
+++ b/MeshShader/main.cpp
@@ -3,996 +3,14 @@
 // For conditions of distribution and use, see copyright notice in nabla.h
 
 #include "common.hpp"
-#include "nbl/ui/ICursorControl.h"
-
-struct MeshletPush {
-	float32_t4x4 viewProj; //nbl::core::matrix4SIMD is 128bit??
-	constexpr static uint8_t object_type_count_max = 16;//it can go up til this struct hits the limit for push size
-	uint32_t objectCount[object_type_count_max]; 
-};
+#include "SampleApp.h"
 
 /* 
 Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window.
 
 Written with Nabla's UI extension and got integrated with ImGuizmo to handle scene's object translations.
 */
-class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesApplication
-{
-		using device_base_t = MonoWindowApplication;
-		using asset_base_t = BuiltinResourcesApplication;
-
-	public:
-		inline UISampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) 
-			: IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD),
-			device_base_t({1280,720}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {}
-
-		inline bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override
-		{
-			if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system)))
-				return false;
-			if (!device_base_t::onAppInitialized(smart_refctd_ptr(system)))
-				return false;
-
-			m_semaphore = m_device->createSemaphore(m_realFrameIx);
-			if (!m_semaphore)
-				return logFail("Failed to Create a Semaphore!");
-
-			auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT);
-			for (auto i = 0u; i<MaxFramesInFlight; i++)
-			{
-				if (!pool)
-					return logFail("Couldn't create Command Pool!");
-				if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_cmdBufs.data()+i,1}))
-					return logFail("Couldn't create Command Buffer!");
-			}
-			
-			const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()};
-			//auto creator = core::make_smart_refctd_ptr<CGeometryCreator>();
-			//auto cube = creator->createCube({ 1.f,1.f,1.f });
-			//id like to combine all the vertices into 1 buffer but given how it's set up, thats out of scope
-			//cube->getPositionView();
-
-
-			m_scene = CGeometryCreatorScene::create(
-				{
-					.transferQueue = getTransferUpQueue(),
-					.utilities = m_utils.get(),
-					.logger = m_logger.get(),
-					.addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies
-				},
-				CSimpleDebugRenderer::DefaultPolygonGeometryPatch
-			);
-			for(uint8_t i = 0; i < m_scene->getInitParams().geometries.size(); i++){
-				auto const& geom = m_scene->getInitParams().geometries[i];
-				printf("%s - %zu - %zu\n", m_scene->getInitParams().geometryNames[i].c_str(), geom->getVertexReferenceCount(), geom->getIndexCount());				
-			}
-			
-			// for the scene drawing pass
-			{
-				IGPURenderpass::SCreationParams params = {};
-				const IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = {
-					{{
-						{
-							.format = sceneRenderDepthFormat,
-							.samples = IGPUImage::ESCF_1_BIT,
-							.mayAlias = false
-						},
-						/*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR},
-						/*.storeOp = */{IGPURenderpass::STORE_OP::STORE},
-						/*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED},
-						/*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}
-					}},
-					IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd
-				};
-				params.depthStencilAttachments = depthAttachments;
-				const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = {
-					{{
-						{
-							.format = finalSceneRenderFormat,
-							.samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT,
-							.mayAlias = false
-						},
-						/*.loadOp = */IGPURenderpass::LOAD_OP::CLEAR,
-						/*.storeOp = */IGPURenderpass::STORE_OP::STORE,
-						/*.initialLayout = */IGPUImage::LAYOUT::UNDEFINED,
-						/*.finalLayout = */ IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read
-					}},
-					IGPURenderpass::SCreationParams::ColorAttachmentsEnd
-				};
-				params.colorAttachments = colorAttachments;
-				IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = {
-					{},
-					IGPURenderpass::SCreationParams::SubpassesEnd
-				};
-				subpasses[0].depthStencilAttachment = {{.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}};
-				subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}};
-				params.subpasses = subpasses;
-				
-				const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = {
-					// wipe-transition of Color to ATTACHMENT_OPTIMAL and depth
-					{
-						.srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
-						.dstSubpass = 0,
-						.memoryBarrier = {
-							// last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later
-							// while color is sampled by ImGUI
-							.srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,
-							// don't want any writes to be available, as we are clearing both attachments
-							.srcAccessMask = ACCESS_FLAGS::NONE,
-							// destination needs to wait as early as possible
-							// TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h`
-							.dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
-							// because depth and color get cleared first no read mask
-							.dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
-						}
-						// leave view offsets and flags default
-					},
-					{
-						.srcSubpass = 0,
-						.dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
-						.memoryBarrier = {
-							// last place where the color can get modified, depth is implicitly earlier
-							.srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
-							// only write ops, reads can't be made available, also won't be using depth so don't care about it being visible to anyone else
-							.srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,
-							// the ImGUI will sample the color, then next frame we overwrite both attachments
-							.dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,
-							// but we only care about the availability-visibility chain between renderpass and imgui 
-							.dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT
-						}
-						// leave view offsets and flags default
-					},
-					IGPURenderpass::SCreationParams::DependenciesEnd
-				};
-				params.dependencies = {};
-				m_renderpass = m_device->createRenderpass(std::move(params));
-				if (!m_renderpass)
-					return logFail("Failed to create Scene Renderpass!");
-			}
-			const auto& geometries = m_scene->getInitParams().geometries;
-			m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(),m_renderpass.get(),0,{&geometries.front().get(),geometries.size()});
-			// special case
-			{
-				//const auto& pipelines = m_renderer->getInitParams().pipelines;
-				auto ix = 0u;
-				for (const auto& name : m_scene->getInitParams().geometryNames)
-				{
-					if (name=="Cone")
-						//m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone];
-					ix++;
-				}
-			}
-			// we'll only display one thing at a time
-			//m_renderer->m_instances.resize(1);
-
-			// Create ImGUI
-			{
-				auto scRes = static_cast<CDefaultSwapchainFramebuffers*>(m_surface->getSwapchainResources());
-				ext::imgui::UI::SCreationParameters params = {};
-				params.resources.texturesInfo = {.setIx=0u,.bindingIx=TexturesImGUIBindingIndex};
-				params.resources.samplersInfo = {.setIx=0u,.bindingIx=1u};
-				params.utilities = m_utils;
-				params.transfer = getTransferUpQueue();
-				params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(),params.resources.texturesInfo,params.resources.samplersInfo,MaxImGUITextures);
-				params.assetManager = make_smart_refctd_ptr<IAssetManager>(smart_refctd_ptr(m_system));
-				params.renderpass = smart_refctd_ptr<IGPURenderpass>(scRes->getRenderpass());
-				params.subpassIx = 0u;
-				params.pipelineCache = nullptr;
-				interface.imGUI = ext::imgui::UI::create(std::move(params));
-				if (!interface.imGUI)
-					return logFail("Failed to create `nbl::ext::imgui::UI` class");
-			}
-
-			// create rest of User Interface
-			{
-				auto* imgui = interface.imGUI.get();
-				// create the suballocated descriptor set
-				{
-					// note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources
-					const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u);
-					auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT,{&layout,1});
-					auto ds = pool->createDescriptorSet(smart_refctd_ptr<const IGPUDescriptorSetLayout>(layout));
-					interface.subAllocDS = make_smart_refctd_ptr<SubAllocatedDescriptorSet>(std::move(ds));
-					if (!interface.subAllocDS)
-						return logFail("Failed to create the descriptor set");
-					// make sure Texture Atlas slot is taken for eternity
-					{
-						auto dummy = SubAllocatedDescriptorSet::invalid_value;
-						interface.subAllocDS->multi_allocate(0,1,&dummy);
-						assert(dummy==ext::imgui::UI::FontAtlasTexId);
-					}
-					// write constant descriptors, note we don't create info & write pair for the samplers because UI extension's are immutable and baked into DS layout
-					IGPUDescriptorSet::SDescriptorInfo info = {};
-					info.desc = smart_refctd_ptr<nbl::video::IGPUImageView>(interface.imGUI->getFontAtlasView());
-					info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL;
-					const IGPUDescriptorSet::SWriteDescriptorSet write = {
-						.dstSet = interface.subAllocDS->getDescriptorSet(),
-						.binding = TexturesImGUIBindingIndex,
-						.arrayElement = ext::imgui::UI::FontAtlasTexId,
-						.count = 1,
-						.info = &info
-					};
-					if (!m_device->updateDescriptorSets({&write,1},{}))
-						return logFail("Failed to write the descriptor set");
-				}
-				imgui->registerListener([this](){interface();});
-			}
-
-			//create meshlet pipeline
-			CreateMeshPipelines();
-
-			interface.camera.mapKeysToArrows();
-
-			onAppInitializedFinish();
-			return true;
-		}
-
-		smart_refctd_ptr<IGPUDescriptorSetLayout> BuildMeshletDSLayout() const {
-			smart_refctd_ptr<IGPUDescriptorSetLayout> ret;
-			using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS;
-			const IGPUDescriptorSetLayout::SBinding bindings[] =
-			{
-				{
-					.binding = 0,
-					.type = IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER,
-					// need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable
-					.createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | binding_flags_t::ECF_PARTIALLY_BOUND_BIT,
-					.stageFlags = IShader::E_SHADER_STAGE::ESS_MESH,
-					.count = UINT16_MAX
-				},
-				{
-					.binding = 1,
-					.type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER,
-					.createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT,
-					.stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH,
-					.count = 1
-				}
-			};
-			ret = m_device->createDescriptorSetLayout(bindings);
-			if (!ret) {
-				m_logger->log("Could not create descriptor set layout!", ILogger::ELL_ERROR);
-				return nullptr;
-			}
-			return ret;
-		}
-
-		std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 3> CreateTestShader() const {
-
-
-			auto loadCompileAndCreateShader = [&](const std::string& relPath, hlsl::ShaderStage stage, std::span<const asset::IShaderCompiler::SMacroDefinition> extraDefines) -> smart_refctd_ptr<IShader>
-				{
-					IAssetLoader::SAssetLoadParams lp = {};
-					lp.logger = m_logger.get();
-					lp.workingDirectory = ""; // virtual root
-					auto assetBundle = m_assetMgr->getAsset(relPath, lp);
-					const auto assets = assetBundle.getContents();
-					if (assets.empty()){
-						printf("asset was empty - %s\n", relPath.c_str());
-						return nullptr;
-					}
-
-					// lets go straight from ICPUSpecializedShader to IGPUSpecializedShader
-					auto sourceRaw = IAsset::castDown<IShader>(assets[0]);
-					if (!sourceRaw){
-						printf("source raw was nullptr - %s\n", relPath.c_str());
-						return nullptr;
-					}
-
-					nbl::video::ILogicalDevice::SShaderCreationParameters creationParams{
-						.source = sourceRaw.get(),
-						.optimizer = nullptr,
-						.readCache = nullptr,
-						.writeCache = nullptr,
-						.extraDefines = extraDefines,
-						.stage = stage
-					};
-
-					auto ret = m_device->compileShader(creationParams);
-					if (ret.get() == nullptr) {
-						printf("failed to compile shader - %s\n", relPath.c_str());
-					}
-					//m_assetMgr->removeAssetFromCache(assetBundle);
-					//return nullptr;
-					//i dont think that ^ was working
-					return ret;
-			};
-			constexpr uint32_t WorkgroupSize = 64;
-			constexpr uint32_t ObjectCount = WorkgroupSize;
-			constexpr uint32_t InstanceCount = WorkgroupSize;
-			const string WorkgroupSizeAsStr = std::to_string(WorkgroupSize);
-			const string ObjectCountAsStr = std::to_string(ObjectCount);
-			const string InstanceCountAsStr = std::to_string(InstanceCount);
-
-			const IShaderCompiler::SMacroDefinition WorkgroupSizeDefine = { "WORKGROUP_SIZE",WorkgroupSizeAsStr };
-			const IShaderCompiler::SMacroDefinition ObjectCountDefine = { "OBJECT_COUNT", ObjectCountAsStr };
-			const IShaderCompiler::SMacroDefinition InstanceCountDefine = { "INSTANCE_COUNT", InstanceCountAsStr };
-
-			const IShaderCompiler::SMacroDefinition meshArray[] = {WorkgroupSizeDefine, ObjectCountDefine, InstanceCountDefine};
-			return {
-				loadCompileAndCreateShader("app_resources/geom.task.hlsl", IShader::E_SHADER_STAGE::ESS_TASK, { meshArray }),
-				loadCompileAndCreateShader("app_resources/geom.mesh.hlsl", IShader::E_SHADER_STAGE::ESS_MESH, { meshArray }),
-				loadCompileAndCreateShader("app_resources/geom.frag.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT, {})
-			};
-		}
-		
-		bool CreateMeshPipelines() {
-			//referencing example 10 for this
-			//and referencing CSimpleDebugRenderer
-
-			auto shaders = CreateTestShader();
-			auto dsLayout = BuildMeshletDSLayout();
-			{//descriptorset
-				auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, { &dsLayout.get(),1 });
-				auto ds = pool->createDescriptorSet(std::move(dsLayout));
-				if (!ds) {
-					m_logger->log("Could not descriptor set!", ILogger::ELL_ERROR);
-					return false;
-				}
-				meshlet_subAllocDS = make_smart_refctd_ptr<SubAllocatedDescriptorSet>(std::move(ds));
-			}
-
-			{
-				const SPushConstantRange ranges[] = { {
-					.stageFlags = hlsl::ShaderStage::ESS_TASK | hlsl::ShaderStage::ESS_MESH,
-					.offset = 0,
-					.size = sizeof(MeshletPush),
-				} }; 
-				
-				meshletLayout = m_device->createPipelineLayout(ranges, smart_refctd_ptr<const IGPUDescriptorSetLayout>(meshlet_subAllocDS->getDescriptorSet()->getLayout()));
-				IGPUMeshPipeline::SCreationParams params = {};
-				params.layout = meshletLayout.get();
-				params.renderpass = m_renderpass.get();
-				params.cached.subpassIx = 0;
-
-				params.taskShader.shader = shaders[0].get();
-				params.taskShader.entryPoint = "main";
-				params.taskShader.entries = nullptr;
-				params.taskShader.requiredSubgroupSize = static_cast<IPipelineBase::SUBGROUP_SIZE>(4); //ill need to adjust this probably
-
-
-				params.meshShader.shader = shaders[1].get();
-				params.meshShader.entryPoint = "main";
-				params.meshShader.entries = nullptr;
-				params.meshShader.requiredSubgroupSize = static_cast<IPipelineBase::SUBGROUP_SIZE>(5); //ill need to adjust this probably
-
-				params.fragmentShader = { .shader = shaders[2].get(), .entryPoint = "main"};
-
-
-				params.cached.requireFullSubgroups = true;
-				params.cached.rasterization.faceCullingMode = EFCM_NONE; //maybe change this? i was a bit limited in example 61
-
-				if (!m_device->createMeshPipelines(nullptr, { &params, 1 }, &meshletPipeline)) {
-					logFail("Failed to create mesh pipeline!\n");
-				}
-			}
-
-
-		}
-
-
-
-		virtual inline bool onAppTerminated()
-		{
-			SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId;
-			IGPUDescriptorSet::SDropDescriptorSet dummy[1];
-			interface.subAllocDS->multi_deallocate(dummy,TexturesImGUIBindingIndex,1,&fontAtlasDescIx);
-			return device_base_t::onAppTerminated();
-		}
-
-		inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override
-		{
-			// CPU events
-			update(nextPresentationTimestamp);
-
-			const auto& virtualWindowRes = interface.sceneResolution;
-			if (!m_framebuffer || m_framebuffer->getCreationParameters().width!=virtualWindowRes[0] || m_framebuffer->getCreationParameters().height!=virtualWindowRes[1])
-				recreateFramebuffer(virtualWindowRes);
-
-			const auto resourceIx = m_realFrameIx % MaxFramesInFlight;
-
-			auto* const cb = m_cmdBufs.data()[resourceIx].get();
-			cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT);
-			cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
-			// clear to black for both things
-			const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} };
-			if (m_framebuffer)
-			{
-				cb->beginDebugMarker("UISampleApp Scene Frame");
-				{
-					const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f };
-					const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo =
-					{
-						.framebuffer = m_framebuffer.get(),
-						.colorClearValues = &clearValue,
-						.depthStencilClearValues = &farValue,
-						.renderArea = {
-							.offset = {0,0},
-							.extent = {virtualWindowRes[0],virtualWindowRes[1]}
-						}
-					};
-					beginRenderpass(cb, renderpassInfo);
-				}
-				// draw scene
-				UpdateScene(cb);
-				cb->endRenderPass();
-				cb->endDebugMarker();
-			}
-			{
-				cb->beginDebugMarker("UISampleApp IMGUI Frame");
-				{ //begin imgui subpass
-					auto scRes = static_cast<CDefaultSwapchainFramebuffers*>(m_surface->getSwapchainResources());
-					const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = {
-						.framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex),
-						.colorClearValues = &clearValue,
-						.depthStencilClearValues = nullptr,
-						.renderArea = {
-							.offset = {0,0},
-							.extent = {m_window->getWidth(),m_window->getHeight()}
-						}
-					};
-					beginRenderpass(cb, renderpassInfo);
-				}
-				// draw ImGUI
-				{
-					auto* imgui = interface.imGUI.get();
-					auto* pipeline = imgui->getPipeline();
-					cb->bindGraphicsPipeline(pipeline);
-					// note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx
-					const auto* ds = interface.subAllocDS->getDescriptorSet();
-					cb->bindDescriptorSets(EPBP_GRAPHICS,pipeline->getLayout(),imgui->getCreationParameters().resources.texturesInfo.setIx,1u,&ds);
-					// a timepoint in the future to release streaming resources for geometry
-					const ISemaphore::SWaitInfo drawFinished = {.semaphore=m_semaphore.get(),.value=m_realFrameIx+1u};
-					if (!imgui->render(cb,drawFinished))
-					{
-						m_logger->log("TODO: need to present acquired image before bailing because its already acquired.",ILogger::ELL_ERROR);
-						return {};
-					}
-				}
-				cb->endRenderPass();
-				cb->endDebugMarker();
-			}
-			cb->end();
-
-			IQueue::SSubmitInfo::SSemaphoreInfo retval =
-			{
-				.semaphore = m_semaphore.get(),
-				.value = ++m_realFrameIx,
-				.stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS
-			};
-			const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] =
-			{
-				{.cmdbuf = cb }
-			};
-			const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = {
-				{
-					.semaphore = device_base_t::getCurrentAcquire().semaphore,
-					.value = device_base_t::getCurrentAcquire().acquireCount,
-					.stageMask = PIPELINE_STAGE_FLAGS::NONE
-				}
-			};
-			const IQueue::SSubmitInfo infos[] =
-			{
-				{
-					.waitSemaphores = acquired,
-					.commandBuffers = commandBuffers,
-					.signalSemaphores = {&retval,1}
-				}
-			};
-			
-			if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS)
-			{
-				retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal
-				m_realFrameIx--;
-			}
-
-
-			m_window->setCaption("[Nabla Engine] Mesh Shader Demo");
-			return retval;
-		}
-
-	protected:
-		const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override
-		{
-			// Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present
-			const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = {
-				// don't want any writes to be available, we'll clear, only thing to worry about is the layout transition
-				{
-					.srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
-					.dstSubpass = 0,
-					.memoryBarrier = {
-						.srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway 
-						.srcAccessMask = ACCESS_FLAGS::NONE,
-						// layout transition needs to finish before the color write
-						.dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
-						.dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
-					}
-					// leave view offsets and flags default
-				},
-				// want layout transition to begin after all color output is done
-				{
-					.srcSubpass = 0,
-					.dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
-					.memoryBarrier = {
-						// last place where the color can get modified, depth is implicitly earlier
-						.srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
-						// only write ops, reads can't be made available
-						.srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
-						// spec says nothing is needed when presentation is the destination
-					}
-					// leave view offsets and flags default
-				},
-				IGPURenderpass::SCreationParams::DependenciesEnd
-			};
-			return dependencies;
-		}
-
-	private:
-
-		void UpdateScene(nbl::video::IGPUCommandBuffer* cb) {
-			float32_t3x4 viewMatrix;
-			float32_t4x4 viewProjMatrix;
-			// TODO: get rid of legacy matrices //<-- camera.getViewMatrix returns matrix3x4SIMD
-			{
-				const auto& camera = interface.camera;
-				memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix));
-				memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix));
-			}
-			const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix);
-
-			// tear down scene every frame
-			//auto& instance = m_renderer->m_instances[0];
-			//memcpy(&instance.world, &interface.model, sizeof(instance.world));
-			//instance.packedGeo = m_renderer->getGeometries().data() + interface.gcIndex;
-			//m_renderer->render(cb, viewParams);
-
-			//MeshPushConstant mPushConstant = { interface.camera.getConcatenatedMatrix(), cubeCount, coneCount, tubeCount };
-		}
-
-
-		inline void update(const std::chrono::microseconds nextPresentationTimestamp)
-		{
-			auto& camera = interface.camera;
-			camera.setMoveSpeed(interface.moveSpeed);
-			camera.setRotateSpeed(interface.rotateSpeed);
-
-
-			m_inputSystem->getDefaultMouse(&mouse);
-			m_inputSystem->getDefaultKeyboard(&keyboard);
-
-			struct
-			{
-				std::vector<SMouseEvent> mouse{};
-				std::vector<SKeyboardEvent> keyboard{};
-			} uiEvents;
-
-			// TODO: should be a member really
-			static std::chrono::microseconds previousEventTimestamp{};
-
-			// I think begin/end should always be called on camera, just events shouldn't be fed, why?
-			// If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to
-			// `perActionDt` becoming obnoxiously large the first time the even processing resumes due to
-			// `timeDiff` being computed since `lastVirtualUpTimeStamp` 
-			camera.beginInputProcessing(nextPresentationTimestamp);
-			{
-				mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void
-					{
-						if (interface.move)
-							camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl
-
-						for (const auto& e : events) // here capture
-						{
-							if (e.timeStamp < previousEventTimestamp)
-								continue;
-
-							previousEventTimestamp = e.timeStamp;
-							uiEvents.mouse.emplace_back(e);
-
-							if (e.type==nbl::ui::SMouseEvent::EET_SCROLL)// && m_renderer)
-							{
-								interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll));
-								//interface.gcIndex = core::clamp(interface.gcIndex,0ull,m_renderer->getGeometries().size()-1);
-							}
-						}
-					},
-					m_logger.get()
-				);
-				keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void
-					{
-						if (interface.move)
-							camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl
-
-						for (const auto& e : events) // here capture
-						{
-							if (e.timeStamp < previousEventTimestamp)
-								continue;
-
-							previousEventTimestamp = e.timeStamp;
-							uiEvents.keyboard.emplace_back(e);
-						}
-					},
-					m_logger.get()
-				);
-			}
-			camera.endInputProcessing(nextPresentationTimestamp);
-
-			const auto cursorPosition = m_window->getCursorControl()->getPosition();
-
-			ext::imgui::UI::SUpdateParameters params = 
-			{
-				.mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()),
-				.displaySize = {m_window->getWidth(),m_window->getHeight()},
-				.mouseEvents = uiEvents.mouse,
-				.keyboardEvents = uiEvents.keyboard
-			};
-
-			interface.objectName = m_scene->getInitParams().geometryNames[0];
-			interface.imGUI->update(params);
-		}
-
-		void recreateFramebuffer(const uint16_t2 resolution)
-		{
-			auto createImageAndView = [&](E_FORMAT format)->smart_refctd_ptr<IGPUImageView>
-			{
-				auto image = m_device->createImage({{
-					.type = IGPUImage::ET_2D,
-					.samples = IGPUImage::ESCF_1_BIT,
-					.format = format,
-					.extent = {resolution.x,resolution.y,1},
-					.mipLevels = 1,
-					.arrayLayers = 1,
-					.usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT|IGPUImage::EUF_SAMPLED_BIT
-				}});
-				if (!m_device->allocate(image->getMemoryReqs(),image.get()).isValid())
-					return nullptr;
-				IGPUImageView::SCreationParams params = {
-					.image = std::move(image),
-					.viewType = IGPUImageView::ET_2D,
-					.format = format
-				};
-				params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT:IGPUImage::EAF_COLOR_BIT;
-				return m_device->createImageView(std::move(params));
-			};
-			
-			smart_refctd_ptr<IGPUImageView> colorView;
-			// detect window minimization
-			if (resolution.x<0x4000 && resolution.y<0x4000)
-			{
-				colorView = createImageAndView(finalSceneRenderFormat);
-				auto depthView = createImageAndView(sceneRenderDepthFormat);
-				m_framebuffer = m_device->createFramebuffer({ {
-					.renderpass = m_renderpass,
-					.depthStencilAttachments = &depthView.get(),
-					.colorAttachments = &colorView.get(),
-					.width = resolution.x,
-					.height = resolution.y
-				}});
-			}
-			else
-				m_framebuffer = nullptr;
-
-			// release previous slot and its image
-			interface.subAllocDS->multi_deallocate(0,1,&interface.renderColorViewDescIndex,{.semaphore=m_semaphore.get(),.value=m_realFrameIx});
-			//
-			if (colorView)
-			{
-				interface.subAllocDS->multi_allocate(0,1,&interface.renderColorViewDescIndex);
-				// update descriptor set
-				IGPUDescriptorSet::SDescriptorInfo info = {};
-				info.desc = colorView;
-				info.info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL;
-				const IGPUDescriptorSet::SWriteDescriptorSet write = {
-					.dstSet = interface.subAllocDS->getDescriptorSet(),
-					.binding = TexturesImGUIBindingIndex,
-					.arrayElement = interface.renderColorViewDescIndex,
-					.count = 1,
-					.info = &info
-				};
-				m_device->updateDescriptorSets({&write,1},{});
-			}
-			interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndex;
-		}
-
-		inline void beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info)
-		{
-			cb->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE);
-			cb->setScissor(0,1,&info.renderArea);
-			const SViewport viewport = {
-				.x = 0,
-				.y = 0,
-				.width = static_cast<float>(info.renderArea.extent.width),
-				.height = static_cast<float>(info.renderArea.extent.height)
-			};
-			cb->setViewport(0u,1u,&viewport);
-		}
-
-		// Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers
-		constexpr static inline uint32_t MaxFramesInFlight = 3u;
-		constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT;
-		constexpr static inline auto finalSceneRenderFormat = EF_R8G8B8A8_SRGB;
-		constexpr static inline auto TexturesImGUIBindingIndex = 0u;
-		// we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes
-		constexpr static inline auto MaxImGUITextures = 2u+MaxFramesInFlight;
-
-		//
-		smart_refctd_ptr<CGeometryCreatorScene> m_scene;
-		smart_refctd_ptr<IGPURenderpass> m_renderpass;
-		smart_refctd_ptr<IGPUFramebuffer> m_framebuffer;
-		smart_refctd_ptr<CSimpleDebugRenderer> m_renderer;
-		//
-		smart_refctd_ptr<ISemaphore> m_semaphore;
-		uint64_t m_realFrameIx = 0;
-		std::array<smart_refctd_ptr<IGPUCommandBuffer>,MaxFramesInFlight> m_cmdBufs;
-		//
-		InputSystem::ChannelReader<IMouseEventChannel> mouse;
-		InputSystem::ChannelReader<IKeyboardEventChannel> keyboard;
-
-		core::smart_refctd_ptr<video::SubAllocatedDescriptorSet> meshlet_subAllocDS;
-		smart_refctd_ptr<IGPUPipelineLayout> meshletLayout;
-		smart_refctd_ptr<IGPUMeshPipeline> meshletPipeline;
-		// UI stuff
-		struct CInterface
-		{
-			void operator()()
-			{
-				ImGuiIO& io = ImGui::GetIO();
-
-				{
-					matrix4SIMD projection;
-
-					if (isPerspective)
-						if(isLH)
-							projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
-						else
-							projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
-					else
-					{
-						float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x;
-
-						if(isLH)
-							projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar);
-						else
-							projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar);
-					}
-					camera.setProjectionMatrix(projection);
-				}
-
-				ImGuizmo::SetOrthographic(false);
-				ImGuizmo::BeginFrame();
-
-				ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing);
-				ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing);
-
-				// create a window and insert the inspector
-				ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing);
-				ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing);
-				ImGui::Begin("Editor");
-
-				if (ImGui::Button("reload mesh shader")) {
-					//printf("test shader result - %d\n", CreateTestShaderFuncPtr());
-				}
-
-				if (ImGui::RadioButton("Full view", !transformParams.useWindow))
-					transformParams.useWindow = false;
-
-				ImGui::SameLine();
-
-				if (ImGui::RadioButton("Window", transformParams.useWindow))
-					transformParams.useWindow = true;
-
-				ImGui::Text("Camera");
-				bool viewDirty = false;
-
-				if (ImGui::RadioButton("LH", isLH))
-					isLH = true;
-
-				ImGui::SameLine();
-
-				if (ImGui::RadioButton("RH", !isLH))
-					isLH = false;
-
-				if (ImGui::RadioButton("Perspective", isPerspective))
-					isPerspective = true;
-
-				ImGui::SameLine();
-
-				if (ImGui::RadioButton("Orthographic", !isPerspective))
-					isPerspective = false;
-
-				ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate);
-				ImGui::Checkbox("Enable camera movement", &move);
-				ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f);
-				ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f);
-
-				// ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case
-
-				if (isPerspective)
-					ImGui::SliderFloat("Fov", &fov, 20.f, 150.f);
-				else
-					ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20);
-
-				ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f);
-				ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f);
-
-				viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f);
-
-				if (viewDirty || firstFrame)
-				{
-					core::vectorSIMDf cameraPosition(cosf(camYAngle)* cosf(camXAngle)* transformParams.camDistance, sinf(camXAngle)* transformParams.camDistance, sinf(camYAngle)* cosf(camXAngle)* transformParams.camDistance);
-					core::vectorSIMDf cameraTarget(0.f, 0.f, 0.f);
-					const static core::vectorSIMDf up(0.f, 1.f, 0.f);
-
-					camera.setPosition(cameraPosition);
-					camera.setTarget(cameraTarget);
-					camera.setBackupUpVector(up);
-
-					camera.recomputeViewMatrix();
-				}
-				firstFrame = false;
-
-				ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y);
-				if (ImGuizmo::IsUsing())
-				{
-					ImGui::Text("Using gizmo");
-				}
-				else {
-					ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : "");
-					ImGui::SameLine();
-					ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : "");
-					ImGui::SameLine();
-					ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : "");
-					ImGui::SameLine();
-					ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : "");
-				}
-				ImGui::Separator();
-
-				/*
-				* ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout
-				* and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection
-
-				- VIEW:
-
-					ImGuizmo
-
-					|     X[0]          Y[0]          Z[0]         0.0f |
-					|     X[1]          Y[1]          Z[1]         0.0f |
-					|     X[2]          Y[2]          Z[2]         0.0f |
-					| -Dot(X, eye)  -Dot(Y, eye)  -Dot(Z, eye)     1.0f |
-
-					Nabla
-
-					|     X[0]         X[1]           X[2]     -Dot(X, eye)  |
-					|     Y[0]         Y[1]           Y[2]     -Dot(Y, eye)  |
-					|     Z[0]         Z[1]           Z[2]     -Dot(Z, eye)  |
-
-					<ImGuizmo View Matrix> = transpose(nbl::core::matrix4SIMD(<Nabla View Matrix>))
-
-				- PERSPECTIVE [PROJECTION CASE]:
-
-					ImGuizmo
-
-					|      (temp / temp2)                 (0.0)                       (0.0)                   (0.0)  |
-					|          (0.0)                  (temp / temp3)                  (0.0)                   (0.0)  |
-					| ((right + left) / temp2)   ((top + bottom) / temp3)    ((-zfar - znear) / temp4)       (-1.0f) |
-					|          (0.0)                      (0.0)               ((-temp * zfar) / temp4)        (0.0)  |
-
-					Nabla
-
-					|            w                        (0.0)                       (0.0)                   (0.0)               |
-					|          (0.0)                       -h                         (0.0)                   (0.0)               |
-					|          (0.0)                      (0.0)               (-zFar/(zFar-zNear))     (-zNear*zFar/(zFar-zNear)) |
-					|          (0.0)                      (0.0)                      (-1.0)                   (0.0)               |
-
-					<ImGuizmo Projection Matrix> = transpose(<Nabla Projection Matrix>)
-
-				*
-				* the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object,
-				* note it also modifies input view matrix but projection matrix is immutable
-				*/
-
-// TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases
-				static struct
-				{
-					core::matrix4SIMD view, projection, model;
-				} imguizmoM16InOut;
-
-				ImGuizmo::SetID(0u);
-
-				imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix()));
-				imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix());
-				if (currentTransform >= 0 && currentTransform < transforms.size()) {
-					imguizmoM16InOut.model = core::transpose(matrix4SIMD(transforms[currentTransform]));
-				}
-				{
-					transformParams.editTransformDecomposition = true;
-					static TransformWidget transformWidget{};
-					transformWidget.Update(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
-					sceneResolution = widgetBox.zw;
-				}
-
-				if (currentTransform >= 0 && currentTransform < transforms.size()) {
-					transforms[currentTransform] = core::transpose(imguizmoM16InOut.model).extractSub3x4();
-				}
-				// to Nabla + update camera & model matrices
-// TODO: make it more nicely, extract:
-// - Position by computing inverse of the view matrix and grabbing its translation
-// - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position
-// But then set the view matrix this way anyway, because up-vector may not be compatible
-				const auto& view = camera.getViewMatrix();
-				const_cast<core::matrix3x4SIMD&>(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok)
-				// update concatanated matrix
-				const auto& projection = camera.getProjectionMatrix();
-				camera.setProjectionMatrix(projection);
-
-				// object meta display
-				{
-					ImGui::Begin("Object Counts");
-					ImGui::Text("object count - cube[%d] - cone[%d] - tube[%d]", cubeCount, coneCount, tubeCount);
-
-
-					ImGui::End();
-				}
-					
-				// view matrices editor
-				{
-					ImGui::Begin("Matrices");
-
-					auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true)
-					{
-						ImGui::Text(topText);
-						if (ImGui::BeginTable(tableName, columns))
-						{
-							for (int y = 0; y < rows; ++y)
-							{
-								ImGui::TableNextRow();
-								for (int x = 0; x < columns; ++x)
-								{
-									ImGui::TableSetColumnIndex(x);
-									ImGui::Text("%.3f", *(pointer + (y * columns) + x));
-								}
-							}
-							ImGui::EndTable();
-						}
-
-						if (withSeparator)
-							ImGui::Separator();
-					};
-
-					if (currentTransform >= 0 && currentTransform < transforms.size()) {
-						addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, transforms[currentTransform].pointer());
-					}
-					addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer());
-					addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false);
-
-					ImGui::End();
-				}
-			}
-
-			smart_refctd_ptr<ext::imgui::UI> imGUI;
-			// descriptor set
-			smart_refctd_ptr<SubAllocatedDescriptorSet> subAllocDS;
-			SubAllocatedDescriptorSet::value_type renderColorViewDescIndex = SubAllocatedDescriptorSet::invalid_value;
-			//
-			Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD());
-			// mutables
-			int32_t currentTransform = -1;
-			std::vector<core::matrix3x4SIMD> transforms;
-
-			std::string_view objectName;
-			TransformRequestParams transformParams;
-			uint16_t2 sceneResolution = {1280,720};
-			uint16_t4 widgetBox;
-			float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f;
-			float viewWidth = 10.f;
-			float camYAngle = 165.f / 180.f * 3.14159f;
-			float camXAngle = 32.f / 180.f * 3.14159f;
-			uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed
-			bool isPerspective = true, isLH = true, flipGizmoY = true, move = false;
-			bool firstFrame = true;
-
-			uint32_t cubeCount = 0;
-			uint32_t coneCount = 0;
-			uint32_t tubeCount = 0;
-		} interface;
-};
-
-NBL_MAIN_FUNC(UISampleApp)
\ No newline at end of file
+int main(int argc, char** argv) {
+	//expanded macro for easier IDE peeking
+	return UISampleApp::main<UISampleApp>(argc, argv);
+}
\ No newline at end of file
diff --git a/MeshShader/src/MeshRenderer.cpp b/MeshShader/src/MeshRenderer.cpp
new file mode 100644
index 000000000..2af4b1711
--- /dev/null
+++ b/MeshShader/src/MeshRenderer.cpp
@@ -0,0 +1,330 @@
+#include "MeshRenderer.hpp"
+
+namespace nbl::examples {
+	#define EXPOSE_NABLA_NAMESPACES \
+		using namespace nbl::core; \
+		using namespace nbl::system; \
+		using namespace nbl::asset; \
+		using namespace nbl::video
+
+	EXPOSE_NABLA_NAMESPACES;
+
+	constexpr static inline auto DefaultPolygonGeometryPatch = []()->video::CAssetConverter::patch_t<asset::ICPUPolygonGeometry> {
+		// we want to use the vertex data through UTBs
+		using usage_f = video::IGPUBuffer::E_USAGE_FLAGS;
+		video::CAssetConverter::patch_t<asset::ICPUPolygonGeometry> patch = {};
+		patch.positionBufferUsages = usage_f::EUF_UNIFORM_TEXEL_BUFFER_BIT;
+		patch.indexBufferUsages = usage_f::EUF_INDEX_BUFFER_BIT;
+		patch.otherBufferUsages = usage_f::EUF_UNIFORM_TEXEL_BUFFER_BIT;
+		return patch;
+	}();
+
+	MeshDebugRenderer::SViewParams::SViewParams(const hlsl::float32_t3x4& _view, const hlsl::float32_t4x4& _viewProj)
+	{
+		view = _view;
+		viewProj = _viewProj;
+		using namespace nbl::hlsl;
+		normal = transpose(inverse(float32_t3x3(view)));
+	}
+
+	hlsl::float32_t4x4 MeshDebugRenderer::SViewParams::computeForInstance(hlsl::float32_t3x4 world) const
+	{
+		using namespace nbl::hlsl;
+		return float32_t4x4(math::linalg::promoted_mul(float64_t4x4(viewProj), float64_t3x4(world)));
+	}
+
+
+	std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 3> MeshDebugRenderer::CreateTestShader(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX) {
+		auto device = const_cast<ILogicalDevice*>(renderpass->getOriginDevice());
+		auto logger = device->getLogger();
+		auto loadCompileAndCreateShader = [&](const std::string& relPath, hlsl::ShaderStage stage, std::span<const asset::IShaderCompiler::SMacroDefinition> extraDefines) -> smart_refctd_ptr<IShader>
+			{
+				IAssetLoader::SAssetLoadParams lp = {};
+				lp.logger = logger;
+				lp.workingDirectory = ""; // virtual root
+				auto assetBundle = assMan->getAsset(relPath, lp);
+				const auto assets = assetBundle.getContents();
+				if (assets.empty()) {
+					printf("asset was empty - %s\n", relPath.c_str());
+					return nullptr;
+				}
+
+				// lets go straight from ICPUSpecializedShader to IGPUSpecializedShader
+				auto sourceRaw = IAsset::castDown<IShader>(assets[0]);
+				if (!sourceRaw) {
+					printf("source raw was nullptr - %s\n", relPath.c_str());
+					return nullptr;
+				}
+
+				nbl::video::ILogicalDevice::SShaderCreationParameters creationParams{
+					.source = sourceRaw.get(),
+					.optimizer = nullptr,
+					.readCache = nullptr,
+					.writeCache = nullptr,
+					.extraDefines = extraDefines,
+					.stage = stage
+				};
+
+				auto ret = device->compileShader(creationParams);
+				if (ret.get() == nullptr) {
+					printf("failed to compile shader - %s\n", relPath.c_str());
+				}
+				//m_assetMgr->removeAssetFromCache(assetBundle);
+				//return nullptr;
+				//i dont think that ^ was working
+				return ret;
+			};
+		constexpr uint32_t WorkgroupSize = 64;
+		const uint32_t ObjectCount = 7;
+		const uint32_t InstanceCount = WorkgroupSize; //this is going to be based off limits. 64 is PROBABLY safe on all hardware, but cant guarantee
+		const std::string WorkgroupSizeAsStr = std::to_string(WorkgroupSize);
+		const std::string ObjectCountAsStr = std::to_string(ObjectCount);
+		const std::string InstanceCountAsStr = std::to_string(InstanceCount);
+
+		const IShaderCompiler::SMacroDefinition WorkgroupSizeDefine = { "WORKGROUP_SIZE",WorkgroupSizeAsStr };
+		const IShaderCompiler::SMacroDefinition ObjectCountDefine = { "OBJECT_COUNT", ObjectCountAsStr };
+		const IShaderCompiler::SMacroDefinition InstanceCountDefine = { "INSTANCE_COUNT", InstanceCountAsStr };
+
+		const IShaderCompiler::SMacroDefinition meshArray[] = { WorkgroupSizeDefine, ObjectCountDefine, InstanceCountDefine };
+		return {
+			loadCompileAndCreateShader("app_resources/geom.task.hlsl", IShader::E_SHADER_STAGE::ESS_TASK, { meshArray }),
+			loadCompileAndCreateShader("app_resources/geom.mesh.hlsl", IShader::E_SHADER_STAGE::ESS_MESH, { meshArray }),
+			loadCompileAndCreateShader("app_resources/geom.frag.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT, {})
+		};
+	}
+
+	core::smart_refctd_ptr<MeshDebugRenderer> MeshDebugRenderer::create(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX)
+	{
+		EXPOSE_NABLA_NAMESPACES;
+
+		if (!renderpass)
+			return nullptr;
+		auto device = const_cast<ILogicalDevice*>(renderpass->getOriginDevice());
+		auto logger = device->getLogger();
+
+		if (!assMan)
+			return nullptr;
+
+		SInitParams init;
+
+		// create descriptor set
+		{
+			// create Descriptor Set Layout
+			smart_refctd_ptr<IGPUDescriptorSetLayout> dsLayout;
+			{
+				using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS;
+				const IGPUDescriptorSetLayout::SBinding bindings[] =
+				{
+					{ //vertices
+						.binding = VertexAttrubUTBDescBinding,
+						.type = IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER,
+						// need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable
+						.createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | binding_flags_t::ECF_PARTIALLY_BOUND_BIT,
+						.stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH | IShader::E_SHADER_STAGE::ESS_FRAGMENT,
+						.count = MissingView
+					},
+					//{//indices, none of these objects use indices so I'll skip over this
+
+					//},
+					{ //meshletdataobject
+						.binding = 1,
+						.type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER,
+						.createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT,
+						.stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH | IShader::E_SHADER_STAGE::ESS_FRAGMENT,
+						.count = 1
+					}
+				};
+				dsLayout = device->createDescriptorSetLayout(bindings);
+				if (!dsLayout)
+				{
+					logger->log("Could not create descriptor set layout!", ILogger::ELL_ERROR);
+					return nullptr;
+				}
+			}
+
+			// create Descriptor Set
+			auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, { &dsLayout.get(),1 });
+			auto ds = pool->createDescriptorSet(std::move(dsLayout));
+			if (!ds)
+			{
+				logger->log("Could not descriptor set!", ILogger::ELL_ERROR);
+				return nullptr;
+			}
+			init.subAllocDS = make_smart_refctd_ptr<SubAllocatedDescriptorSet>(std::move(ds));
+		}
+
+		// create pipeline layout
+		const SPushConstantRange ranges[] = { {
+			.stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH | hlsl::ShaderStage::ESS_FRAGMENT,
+			.offset = 0,
+			.size = sizeof(SInstance::SPushConstants),
+		} };
+		init.layout = device->createPipelineLayout(ranges, smart_refctd_ptr<const IGPUDescriptorSetLayout>(init.subAllocDS->getDescriptorSet()->getLayout()));
+		auto shaderRet = CreateTestShader(assMan, renderpass, subpassIX);
+		// create pipelines
+		{
+			//this needs to be fixed, the mesh and frag use different files
+			IGPUMeshPipeline::SCreationParams params{
+				.layout = init.layout.get(),
+				.taskShader = {.shader = shaderRet[0].get(), .entryPoint = "main"},
+				.meshShader = {.shader = shaderRet[1].get(), .entryPoint = "main" },
+				.fragmentShader = {.shader = shaderRet[2].get(), .entryPoint = "main" }
+			};
+			// no vertex input, or assembly
+			auto& rasterization = params.cached.rasterization;
+			auto& blend = params.cached.blend;
+			rasterization.faceCullingMode = EFCM_NONE;
+			params.cached.subpassIx = subpassIX;
+			params.renderpass = renderpass;
+
+			if (!device->createMeshPipelines(nullptr, { &params, 1 }, &init.pipeline))
+			{
+				logger->log("Could not create Mesh Pipeline!", ILogger::ELL_ERROR);
+				return nullptr;
+			}
+		}
+
+		return smart_refctd_ptr<MeshDebugRenderer>(new MeshDebugRenderer(std::move(init)), dont_grab);
+	}
+
+	bool MeshDebugRenderer::addGeometries(const std::span<const video::IGPUPolygonGeometry* const> geometries)
+	{
+		EXPOSE_NABLA_NAMESPACES;
+		if (geometries.empty())
+			return false;
+		auto device = const_cast<ILogicalDevice*>(m_params.layout->getOriginDevice());
+
+		core::vector<IGPUDescriptorSet::SWriteDescriptorSet> writes;
+		core::vector<IGPUDescriptorSet::SDescriptorInfo> infos;
+		bool anyFailed = false;
+		auto allocateUTB = [&](const IGeometry<const IGPUBuffer>::SDataView& view)->decltype(SubAllocatedDescriptorSet::invalid_value)
+			{
+				if (!view)
+					return MissingView;
+				auto index = SubAllocatedDescriptorSet::invalid_value;
+				if (m_params.subAllocDS->multi_allocate(VertexAttrubUTBDescBinding, 1, &index) != 0)
+				{
+					anyFailed = true;
+					return MissingView;
+				}
+				const auto infosOffset = infos.size();
+				infos.emplace_back().desc = device->createBufferView(view.src, view.composed.format);
+				writes.emplace_back() = {
+					.dstSet = m_params.subAllocDS->getDescriptorSet(),
+					.binding = VertexAttrubUTBDescBinding,
+					.arrayElement = index,
+					.count = 1,
+					.info = reinterpret_cast<const IGPUDescriptorSet::SDescriptorInfo*>(infosOffset)
+				};
+				return index;
+			};
+
+		auto resetGeoms = core::makeRAIIExiter(
+			[&]()->void {
+				for (auto& write : writes) {
+					immediateDealloc(write.arrayElement);
+				}
+			}
+		);
+
+		//the order doesnt really matter as long as the data is respective
+		uint8_t meshIndex = 0;
+		for (const auto geom : geometries)
+		{
+			// could also check device origin on all buffers
+			if (!geom->valid())
+				return false;
+
+			
+			auto& out = m_geoms.meshData[meshIndex];
+			meshIndex++;
+			out.vertCount = geom->getVertexReferenceCount();
+			out.positionView = allocateUTB(geom->getPositionView());
+			out.normalView = allocateUTB(geom->getNormalView());
+
+			if(geom->getIndexingCallback()->knownTopology() == E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_FAN){
+				out.objectType &= 2;
+			}
+			const auto& view = geom->getIndexView();
+			if (view) {
+				assert(false && "not currently setup to support index buffer");
+			}
+		}
+
+		if (anyFailed)
+			device->getLogger()->log("Failed to allocate a UTB for some geometries, probably ran out of space in Descriptor Set!", system::ILogger::ELL_ERROR);
+
+		// no geometry
+		if (infos.empty())
+			return false;
+
+		// unbase our pointers
+		for (auto& write : writes)
+			write.info = infos.data() + reinterpret_cast<const size_t&>(write.info);
+		if (!device->updateDescriptorSets(writes, {}))
+			return false;
+
+		// retain
+		writes.clear();
+		return true;
+	}
+
+	void MeshDebugRenderer::clearGeometries(const video::ISemaphore::SWaitInfo& info) {
+		for (uint8_t i = 0; i < m_geoms.currentObjectCount; i++) {
+
+		}
+	}
+
+	void MeshDebugRenderer::removeGeometry(const uint32_t ix, const video::ISemaphore::SWaitInfo& info)
+	{
+		EXPOSE_NABLA_NAMESPACES;
+
+		core::vector<SubAllocatedDescriptorSet::value_type> deferredFree;
+		deferredFree.reserve(3);
+		auto deallocate = [&](SubAllocatedDescriptorSet::value_type index)->void
+			{
+				if (index >= MissingView)
+					return;
+				if (info.semaphore)
+					deferredFree.push_back(index);
+				else
+					immediateDealloc(index);
+			};
+		auto geo = m_geoms.meshData[ix];
+		deallocate(geo.positionView);
+		deallocate(geo.normalView);
+
+		if (deferredFree.empty())
+			return;
+		m_params.subAllocDS->multi_deallocate(VertexAttrubUTBDescBinding, deferredFree.size(), deferredFree.data(), info);
+	}
+
+	void MeshDebugRenderer::render(video::IGPUCommandBuffer* cmdbuf, const SViewParams& viewParams) const
+	{
+		EXPOSE_NABLA_NAMESPACES;
+
+		cmdbuf->beginDebugMarker("MeshDebugRenderer::render");
+
+		const auto* layout = m_params.layout.get();
+		const auto ds = m_params.subAllocDS->getDescriptorSet();
+		cmdbuf->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS, layout, 0, 1, &ds);
+
+		for (const auto& instance : m_instances) {
+			cmdbuf->bindMeshPipeline(m_params.pipeline.get());
+			const auto pc = instance.computePushConstants(viewParams);
+			cmdbuf->pushConstants(layout, hlsl::ShaderStage::ESS_MESH | hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc);
+			//if (m_geoms->indexBuffer)
+			//{
+				//cmdbuf->bindIndexBuffer(geo->indexBuffer,geo->indexType);
+				//cmdbuf->drawIndexed(geo->elementCount,1,0,0,0);
+				//cmdbuf->bindDescriptorSets(geo->indexBuffer);
+			//}
+			//else {
+				//cmdbuf->bindDescriptorSets(geo->indexBuffer);
+				//cmdbuf->draw(geo->elementCount, 1, 0, 0);
+			//}
+			cmdbuf->dispatch(1, 1, 1);
+		}
+		cmdbuf->endDebugMarker();
+	}
+}//namespace nbl::examples
\ No newline at end of file
diff --git a/MeshShader/src/SampleApp.cpp b/MeshShader/src/SampleApp.cpp
new file mode 100644
index 000000000..4f304af4f
--- /dev/null
+++ b/MeshShader/src/SampleApp.cpp
@@ -0,0 +1,769 @@
+#include "SampleApp.h"
+
+    bool UISampleApp::onAppInitialized(smart_refctd_ptr<ISystem>&& system) {
+        if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system)))
+            return false;
+        if (!device_base_t::onAppInitialized(smart_refctd_ptr(system)))
+            return false;
+
+        m_semaphore = m_device->createSemaphore(m_realFrameIx);
+        if (!m_semaphore)
+            return logFail("Failed to Create a Semaphore!");
+
+        auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT);
+        for (auto i = 0u; i<MaxFramesInFlight; i++)
+        {
+            if (!pool)
+                return logFail("Couldn't create Command Pool!");
+            if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_cmdBufs.data()+i,1}))
+                return logFail("Couldn't create Command Buffer!");
+        }
+        
+        const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()};
+        //auto creator = core::make_smart_refctd_ptr<CGeometryCreator>();
+        //auto cube = creator->createCube({ 1.f,1.f,1.f });
+        //id like to combine all the vertices into 1 buffer but given how it's set up, thats out of scope
+        //cube->getPositionView();
+
+
+        m_scene = CGeometryCreatorScene::create(
+            {
+                .transferQueue = getTransferUpQueue(),
+                .utilities = m_utils.get(),
+                .logger = m_logger.get(),
+                .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies
+            },
+            CSimpleDebugRenderer::DefaultPolygonGeometryPatch
+        );
+
+        
+        // for the scene drawing pass
+        {
+            IGPURenderpass::SCreationParams params = {};
+            const IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = {
+                {{
+                    {
+                        .format = sceneRenderDepthFormat,
+                        .samples = IGPUImage::ESCF_1_BIT,
+                        .mayAlias = false
+                    },
+                    /*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR},
+                    /*.storeOp = */{IGPURenderpass::STORE_OP::STORE},
+                    /*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED},
+                    /*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}
+                }},
+                IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd
+            };
+            params.depthStencilAttachments = depthAttachments;
+            const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = {
+                {{
+                    {
+                        .format = finalSceneRenderFormat,
+                        .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT,
+                        .mayAlias = false
+                    },
+                    /*.loadOp = */IGPURenderpass::LOAD_OP::CLEAR,
+                    /*.storeOp = */IGPURenderpass::STORE_OP::STORE,
+                    /*.initialLayout = */IGPUImage::LAYOUT::UNDEFINED,
+                    /*.finalLayout = */ IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read
+                }},
+                IGPURenderpass::SCreationParams::ColorAttachmentsEnd
+            };
+            params.colorAttachments = colorAttachments;
+            IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = {
+                {},
+                IGPURenderpass::SCreationParams::SubpassesEnd
+            };
+            subpasses[0].depthStencilAttachment = {{.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}};
+            subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}};
+            params.subpasses = subpasses;
+            params.dependencies = {};
+            m_renderpass = m_device->createRenderpass(std::move(params));
+            if (!m_renderpass)
+                return logFail("Failed to create Scene Renderpass!");
+        }
+
+        const auto& geometries = m_scene->getInitParams().geometries;
+        m_renderer = MeshDebugRenderer::create(m_assetMgr.get(), m_renderpass.get(), 0, { &geometries.front().get(),geometries.size() });
+        m_renderer->m_instances.resize(1); //should probably just get rid of the vector
+
+        // Create ImGUI
+        {
+            auto scRes = static_cast<CDefaultSwapchainFramebuffers*>(m_surface->getSwapchainResources());
+            ext::imgui::UI::SCreationParameters params = {};
+            params.resources.texturesInfo = {.setIx=0u,.bindingIx=TexturesImGUIBindingIndex};
+            params.resources.samplersInfo = {.setIx=0u,.bindingIx=1u};
+
+
+            params.utilities = m_utils;
+            params.transfer = getTransferUpQueue();
+            params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(),params.resources.texturesInfo,params.resources.samplersInfo,MaxImGUITextures);
+            params.assetManager = make_smart_refctd_ptr<IAssetManager>(smart_refctd_ptr(m_system));
+            params.renderpass = smart_refctd_ptr<IGPURenderpass>(scRes->getRenderpass());
+            params.subpassIx = 0u;
+            params.pipelineCache = nullptr;
+            interface.imGUI = ext::imgui::UI::create(std::move(params));
+            if (!interface.imGUI) {
+                return logFail("Failed to create `nbl::ext::imgui::UI` class");
+            }
+        }
+
+        // create rest of User Interface
+        {
+            auto* imgui = interface.imGUI.get();
+            // create the suballocated descriptor set
+            {
+                // note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources
+                const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u);
+                auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT,{&layout,1});
+                auto ds = pool->createDescriptorSet(smart_refctd_ptr<const IGPUDescriptorSetLayout>(layout));
+                if (ds) {
+                    interface.subAllocDS = make_smart_refctd_ptr<SubAllocatedDescriptorSet>(std::move(ds));
+                }
+                else {
+                    interface.subAllocDS = nullptr;
+                }
+                if (!interface.subAllocDS)
+                    return logFail("Failed to create the descriptor set");
+                // make sure Texture Atlas slot is taken for eternity
+                {
+                    auto dummy = SubAllocatedDescriptorSet::invalid_value;
+                    interface.subAllocDS->multi_allocate(0,1,&dummy);
+                    assert(dummy==ext::imgui::UI::FontAtlasTexId);
+                }
+                // write constant descriptors, note we don't create info & write pair for the samplers because UI extension's are immutable and baked into DS layout
+                IGPUDescriptorSet::SDescriptorInfo info = {};
+                info.desc = smart_refctd_ptr<nbl::video::IGPUImageView>(interface.imGUI->getFontAtlasView());
+                info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL;
+                const IGPUDescriptorSet::SWriteDescriptorSet write = {
+                    .dstSet = interface.subAllocDS->getDescriptorSet(),
+                    .binding = TexturesImGUIBindingIndex,
+                    .arrayElement = ext::imgui::UI::FontAtlasTexId,
+                    .count = 1,
+                    .info = &info
+                };
+                if (!m_device->updateDescriptorSets({&write,1},{}))
+                    return logFail("Failed to write the descriptor set");
+            }
+            imgui->registerListener([this](){interface();});
+        }
+        
+        interface.objectNames = {
+            "Cube",
+            "Rectangle",
+            "Disk",
+            "Sphere",
+            "Cylinder",
+            "Cone",
+            "Icosphere"
+            //magicenum reflection?
+        };
+        for (auto& objCount : interface.objectCount) {
+            objCount = 0;
+        }
+        //load up the ICPUGeometry, then convert it to GPU geometry
+
+        interface.camera.mapKeysToArrows();
+
+        onAppInitializedFinish();
+        return true;
+    }
+
+    /*
+    smart_refctd_ptr<IGPUDescriptorSetLayout> UISampleApp::BuildMeshletDSLayout() const {
+        smart_refctd_ptr<IGPUDescriptorSetLayout> ret;
+        using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS;
+        const IGPUDescriptorSetLayout::SBinding bindings[] =
+        {
+            {
+                .binding = 0,
+                .type = IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER,
+                // need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable
+                .createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | binding_flags_t::ECF_PARTIALLY_BOUND_BIT,
+                .stageFlags = IShader::E_SHADER_STAGE::ESS_MESH,
+                .count = UINT16_MAX
+            },
+            {
+                .binding = 1,
+                .type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER,
+                .createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT,
+                .stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH,
+                .count = 1
+            }
+        };
+        ret = m_device->createDescriptorSetLayout(bindings);
+        if (!ret) {
+            m_logger->log("Could not create descriptor set layout!", ILogger::ELL_ERROR);
+            return nullptr;
+        }
+        return ret;
+    }
+    */
+
+   
+
+    bool UISampleApp::onAppTerminated() {
+        SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId;
+        IGPUDescriptorSet::SDropDescriptorSet dummy[1];
+        interface.subAllocDS->multi_deallocate(dummy,TexturesImGUIBindingIndex,1,&fontAtlasDescIx);
+        return device_base_t::onAppTerminated();
+    }
+
+    IQueue::SSubmitInfo::SSemaphoreInfo UISampleApp::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) {
+        // CPU events
+        update(nextPresentationTimestamp);
+
+        const auto& virtualWindowRes = interface.sceneResolution;
+        if (!m_framebuffer || m_framebuffer->getCreationParameters().width!=virtualWindowRes[0] || m_framebuffer->getCreationParameters().height!=virtualWindowRes[1])
+            recreateFramebuffer(virtualWindowRes);
+
+        const auto resourceIx = m_realFrameIx % MaxFramesInFlight;
+
+        auto* const cb = m_cmdBufs.data()[resourceIx].get();
+        cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT);
+        cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
+        // clear to black for both things
+        const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} };
+        if (m_framebuffer)
+        {
+            cb->beginDebugMarker("UISampleApp Scene Frame");
+            {
+                const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f };
+                const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo{
+                    .framebuffer = m_framebuffer.get(),
+                    .colorClearValues = &clearValue,
+                    .depthStencilClearValues = &farValue,
+                    .renderArea = {
+                        .offset = {0,0},
+                        .extent = {virtualWindowRes[0],virtualWindowRes[1]}
+                    }
+                };
+                beginRenderpass(cb, renderpassInfo);
+            }
+            // draw scene
+            UpdateScene(cb);
+            cb->endRenderPass();
+            cb->endDebugMarker();
+        }
+        {
+            cb->beginDebugMarker("UISampleApp IMGUI Frame");
+            { //begin imgui subpass
+                auto scRes = static_cast<CDefaultSwapchainFramebuffers*>(m_surface->getSwapchainResources());
+                const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = {
+                    .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex),
+                    .colorClearValues = &clearValue,
+                    .depthStencilClearValues = nullptr,
+                    .renderArea = {
+                        .offset = {0,0},
+                        .extent = {m_window->getWidth(),m_window->getHeight()}
+                    }
+                };
+                beginRenderpass(cb, renderpassInfo);
+            }
+            // draw ImGUI
+            {
+                auto* imgui = interface.imGUI.get();
+                auto* pipeline = imgui->getPipeline();
+                cb->bindGraphicsPipeline(pipeline);
+                // note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx
+                const auto* ds = interface.subAllocDS->getDescriptorSet();
+                cb->bindDescriptorSets(EPBP_GRAPHICS,pipeline->getLayout(),imgui->getCreationParameters().resources.texturesInfo.setIx,1u,&ds);
+                // a timepoint in the future to release streaming resources for geometry
+                const ISemaphore::SWaitInfo drawFinished = {.semaphore=m_semaphore.get(),.value=m_realFrameIx+1u};
+                if (!imgui->render(cb,drawFinished))
+                {
+                    m_logger->log("TODO: need to present acquired image before bailing because its already acquired.",ILogger::ELL_ERROR);
+                    return {};
+                }
+            }
+            cb->endRenderPass();
+            cb->endDebugMarker();
+        }
+        cb->end();
+
+        IQueue::SSubmitInfo::SSemaphoreInfo retval =
+        {
+            .semaphore = m_semaphore.get(),
+            .value = ++m_realFrameIx,
+            .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS
+        };
+        const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] =
+        {
+            {.cmdbuf = cb }
+        };
+        const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = {
+            {
+                .semaphore = device_base_t::getCurrentAcquire().semaphore,
+                .value = device_base_t::getCurrentAcquire().acquireCount,
+                .stageMask = PIPELINE_STAGE_FLAGS::NONE
+            }
+        };
+        const IQueue::SSubmitInfo infos[] =
+        {
+            {
+                .waitSemaphores = acquired,
+                .commandBuffers = commandBuffers,
+                .signalSemaphores = {&retval,1}
+            }
+        };
+        
+        if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS)
+        {
+            retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal
+            m_realFrameIx--;
+        }
+
+
+        m_window->setCaption("[Nabla Engine] Mesh Shader Demo");
+        return retval;
+    }
+
+    const video::IGPURenderpass::SCreationParams::SSubpassDependency* UISampleApp::getDefaultSubpassDependencies() const {
+        // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present
+        const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = {
+            // don't want any writes to be available, we'll clear, only thing to worry about is the layout transition
+            {
+                .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
+                .dstSubpass = 0,
+                .memoryBarrier = {
+                    .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway 
+                    .srcAccessMask = ACCESS_FLAGS::NONE,
+                    // layout transition needs to finish before the color write
+                    .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
+                    .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
+                }
+                // leave view offsets and flags default
+            },
+            // want layout transition to begin after all color output is done
+            {
+                .srcSubpass = 0,
+                .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
+                .memoryBarrier = {
+                    // last place where the color can get modified, depth is implicitly earlier
+                    .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
+                    // only write ops, reads can't be made available
+                    .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
+                    // spec says nothing is needed when presentation is the destination
+                }
+                // leave view offsets and flags default
+            },
+            IGPURenderpass::SCreationParams::DependenciesEnd
+        };
+        return dependencies;
+    }
+
+
+    void UISampleApp::UpdateScene(nbl::video::IGPUCommandBuffer* cb) {
+        float32_t3x4 viewMatrix;
+        float32_t4x4 viewProjMatrix;
+        // TODO: get rid of legacy matrices //<-- camera.getViewMatrix returns matrix3x4SIMD
+        {
+            const auto& camera = interface.camera;
+            memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix));
+            memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix));
+        }
+        const auto viewParams = MeshDebugRenderer::SViewParams(viewMatrix, viewProjMatrix);
+
+        m_renderer->render(cb, viewParams);
+
+        //MeshPushConstant mPushConstant = { interface.camera.getConcatenatedMatrix(), cubeCount, coneCount, tubeCount };
+    }
+
+
+    void UISampleApp::update(const std::chrono::microseconds nextPresentationTimestamp)
+    {
+        auto& camera = interface.camera;
+        camera.setMoveSpeed(interface.moveSpeed);
+        camera.setRotateSpeed(interface.rotateSpeed);
+
+
+        m_inputSystem->getDefaultMouse(&mouse);
+        m_inputSystem->getDefaultKeyboard(&keyboard);
+
+        struct
+        {
+            std::vector<SMouseEvent> mouse{};
+            std::vector<SKeyboardEvent> keyboard{};
+        } uiEvents;
+
+        // TODO: should be a member really
+        static std::chrono::microseconds previousEventTimestamp{};
+
+        // I think begin/end should always be called on camera, just events shouldn't be fed, why?
+        // If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to
+        // `perActionDt` becoming obnoxiously large the first time the even processing resumes due to
+        // `timeDiff` being computed since `lastVirtualUpTimeStamp` 
+        camera.beginInputProcessing(nextPresentationTimestamp);
+        {
+            mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void
+                {
+                    if (interface.move)
+                        camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl
+
+                    for (const auto& e : events) // here capture
+                    {
+                        if (e.timeStamp < previousEventTimestamp)
+                            continue;
+
+                        previousEventTimestamp = e.timeStamp;
+                        uiEvents.mouse.emplace_back(e);
+
+                        if (e.type==nbl::ui::SMouseEvent::EET_SCROLL)
+                        {
+                            interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll));
+                        }
+                    }
+                },
+                m_logger.get()
+            );
+            keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void
+                {
+                    if (interface.move)
+                        camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl
+
+                    for (const auto& e : events) // here capture
+                    {
+                        if (e.timeStamp < previousEventTimestamp)
+                            continue;
+
+                        previousEventTimestamp = e.timeStamp;
+                        uiEvents.keyboard.emplace_back(e);
+                    }
+                },
+                m_logger.get()
+            );
+        }
+        camera.endInputProcessing(nextPresentationTimestamp);
+
+        const auto cursorPosition = m_window->getCursorControl()->getPosition();
+
+        ext::imgui::UI::SUpdateParameters params = 
+        {
+            .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()),
+            .displaySize = {m_window->getWidth(),m_window->getHeight()},
+            .mouseEvents = uiEvents.mouse,
+            .keyboardEvents = uiEvents.keyboard
+        };
+
+        interface.objectCount = {0, 0, 0, 0, 0};
+        interface.imGUI->update(params);
+    }
+
+    void UISampleApp::recreateFramebuffer(const uint16_t2 resolution)
+    {
+        auto createImageAndView = [&](E_FORMAT format)->smart_refctd_ptr<IGPUImageView>
+        {
+            auto image = m_device->createImage({{
+                .type = IGPUImage::ET_2D,
+                .samples = IGPUImage::ESCF_1_BIT,
+                .format = format,
+                .extent = {resolution.x,resolution.y,1},
+                .mipLevels = 1,
+                .arrayLayers = 1,
+                .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT|IGPUImage::EUF_SAMPLED_BIT
+            }});
+            if (!m_device->allocate(image->getMemoryReqs(),image.get()).isValid())
+                return nullptr;
+            IGPUImageView::SCreationParams params = {
+                .image = std::move(image),
+                .viewType = IGPUImageView::ET_2D,
+                .format = format
+            };
+            params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT:IGPUImage::EAF_COLOR_BIT;
+            return m_device->createImageView(std::move(params));
+        };
+        
+        smart_refctd_ptr<IGPUImageView> colorView;
+        // detect window minimization
+        if (resolution.x<0x4000 && resolution.y<0x4000)
+        {
+            colorView = createImageAndView(finalSceneRenderFormat);
+            auto depthView = createImageAndView(sceneRenderDepthFormat);
+            m_framebuffer = m_device->createFramebuffer({ {
+                .renderpass = m_renderpass,
+                .depthStencilAttachments = &depthView.get(),
+                .colorAttachments = &colorView.get(),
+                .width = resolution.x,
+                .height = resolution.y
+            }});
+        }
+        else
+            m_framebuffer = nullptr;
+
+        // release previous slot and its image
+        interface.subAllocDS->multi_deallocate(0,1,&interface.renderColorViewDescIndex,{.semaphore=m_semaphore.get(),.value=m_realFrameIx});
+        //
+        if (colorView)
+        {
+            interface.subAllocDS->multi_allocate(0,1,&interface.renderColorViewDescIndex);
+            // update descriptor set
+            IGPUDescriptorSet::SDescriptorInfo info = {};
+            info.desc = colorView;
+            info.info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL;
+            const IGPUDescriptorSet::SWriteDescriptorSet write = {
+                .dstSet = interface.subAllocDS->getDescriptorSet(),
+                .binding = TexturesImGUIBindingIndex,
+                .arrayElement = interface.renderColorViewDescIndex,
+                .count = 1,
+                .info = &info
+            };
+            m_device->updateDescriptorSets({&write,1},{});
+        }
+        interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndex;
+    }
+
+    void UISampleApp::beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info)
+    {
+        cb->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE);
+        cb->setScissor(0,1,&info.renderArea);
+        const SViewport viewport = {
+            .x = 0,
+            .y = 0,
+            .width = static_cast<float>(info.renderArea.extent.width),
+            .height = static_cast<float>(info.renderArea.extent.height)
+        };
+        cb->setViewport(0u,1u,&viewport);
+    }
+
+
+
+    void UISampleApp::CInterface::operator()() {
+        ImGuiIO& io = ImGui::GetIO();
+
+        {
+            matrix4SIMD projection;
+
+            if (isPerspective)
+                if(isLH)
+                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
+                else
+                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
+            else
+            {
+                float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x;
+
+                if(isLH)
+                    projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar);
+                else
+                    projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar);
+            }
+            camera.setProjectionMatrix(projection);
+        }
+
+        ImGuizmo::SetOrthographic(false);
+        ImGuizmo::BeginFrame();
+
+        ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing);
+        ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing);
+
+        // create a window and insert the inspector
+        ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing);
+        ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing);
+        ImGui::Begin("Editor");
+
+        for(uint8_t i = 0; i < objectNames.size(); i++){
+            if (ImGui::TreeNode(objectNames[i].c_str())) {
+                ImGui::Text("objectCount - %d", objectCount[i]);
+                
+                ImGui::TreePop();
+            }
+        }
+
+        ImGui::Separator();
+
+        if (ImGui::Button("reload mesh shader")) {
+            //printf("test shader result - %d\n", CreateTestShaderFuncPtr());
+        }
+
+        if (ImGui::RadioButton("Full view", !transformParams.useWindow))
+            transformParams.useWindow = false;
+
+        ImGui::SameLine();
+
+        if (ImGui::RadioButton("Window", transformParams.useWindow))
+            transformParams.useWindow = true;
+
+        ImGui::Text("Camera");
+        bool viewDirty = false;
+
+        if (ImGui::RadioButton("LH", isLH))
+            isLH = true;
+
+        ImGui::SameLine();
+
+        if (ImGui::RadioButton("RH", !isLH))
+            isLH = false;
+
+        if (ImGui::RadioButton("Perspective", isPerspective))
+            isPerspective = true;
+
+        ImGui::SameLine();
+
+        if (ImGui::RadioButton("Orthographic", !isPerspective))
+            isPerspective = false;
+
+        ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate);
+        ImGui::Checkbox("Enable camera movement", &move);
+        ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f);
+        ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f);
+
+        // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case
+
+        if (isPerspective)
+            ImGui::SliderFloat("Fov", &fov, 20.f, 150.f);
+        else
+            ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20);
+
+        ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f);
+        ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f);
+
+        viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f);
+
+        if (viewDirty || firstFrame)
+        {
+            core::vectorSIMDf cameraPosition(cosf(camYAngle)* cosf(camXAngle)* transformParams.camDistance, sinf(camXAngle)* transformParams.camDistance, sinf(camYAngle)* cosf(camXAngle)* transformParams.camDistance);
+            core::vectorSIMDf cameraTarget(0.f, 0.f, 0.f);
+            const static core::vectorSIMDf up(0.f, 1.f, 0.f);
+
+            camera.setPosition(cameraPosition);
+            camera.setTarget(cameraTarget);
+            camera.setBackupUpVector(up);
+
+            camera.recomputeViewMatrix();
+        }
+        firstFrame = false;
+
+        ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y);
+        if (ImGuizmo::IsUsing())
+        {
+            ImGui::Text("Using gizmo");
+        }
+        else {
+            ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : "");
+            ImGui::SameLine();
+            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : "");
+            ImGui::SameLine();
+            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : "");
+            ImGui::SameLine();
+            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : "");
+        }
+        ImGui::Separator();
+
+        /*
+        * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout
+        * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection
+
+        - VIEW:
+
+            ImGuizmo
+
+            |     X[0]          Y[0]          Z[0]         0.0f |
+            |     X[1]          Y[1]          Z[1]         0.0f |
+            |     X[2]          Y[2]          Z[2]         0.0f |
+            | -Dot(X, eye)  -Dot(Y, eye)  -Dot(Z, eye)     1.0f |
+
+            Nabla
+
+            |     X[0]         X[1]           X[2]     -Dot(X, eye)  |
+            |     Y[0]         Y[1]           Y[2]     -Dot(Y, eye)  |
+            |     Z[0]         Z[1]           Z[2]     -Dot(Z, eye)  |
+
+            <ImGuizmo View Matrix> = transpose(nbl::core::matrix4SIMD(<Nabla View Matrix>))
+
+        - PERSPECTIVE [PROJECTION CASE]:
+
+            ImGuizmo
+
+            |      (temp / temp2)                 (0.0)                       (0.0)                   (0.0)  |
+            |          (0.0)                  (temp / temp3)                  (0.0)                   (0.0)  |
+            | ((right + left) / temp2)   ((top + bottom) / temp3)    ((-zfar - znear) / temp4)       (-1.0f) |
+            |          (0.0)                      (0.0)               ((-temp * zfar) / temp4)        (0.0)  |
+
+            Nabla
+
+            |            w                        (0.0)                       (0.0)                   (0.0)               |
+            |          (0.0)                       -h                         (0.0)                   (0.0)               |
+            |          (0.0)                      (0.0)               (-zFar/(zFar-zNear))     (-zNear*zFar/(zFar-zNear)) |
+            |          (0.0)                      (0.0)                      (-1.0)                   (0.0)               |
+
+            <ImGuizmo Projection Matrix> = transpose(<Nabla Projection Matrix>)
+
+        *
+        * the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object,
+        * note it also modifies input view matrix but projection matrix is immutable
+        */
+
+// TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases
+        static struct
+        {
+            core::matrix4SIMD view, projection, model;
+        } imguizmoM16InOut;
+
+        ImGuizmo::SetID(0u);
+
+        imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix()));
+        imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix());
+
+        if (currentTransform < 0) {
+            currentTransform = 0;
+        }
+
+        if (currentTransform >= 0 && currentTransform < transforms.size()) {
+            imguizmoM16InOut.model = core::transpose(matrix4SIMD(transforms[currentTransform]));
+        }
+        {
+            transformParams.editTransformDecomposition = true;
+            static TransformWidget transformWidget{};
+            const auto tempForConversion = transformWidget.Update(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
+            sceneResolution = { tempForConversion.x, tempForConversion.y };
+            
+        }
+
+        if (currentTransform >= 0 && currentTransform < transforms.size()) {
+            transforms[currentTransform] = core::transpose(imguizmoM16InOut.model).extractSub3x4();
+        }
+        // to Nabla + update camera & model matrices
+// TODO: make it more nicely, extract:
+// - Position by computing inverse of the view matrix and grabbing its translation
+// - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position
+// But then set the view matrix this way anyway, because up-vector may not be compatible
+        const auto& view = camera.getViewMatrix();
+        const_cast<core::matrix3x4SIMD&>(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok)
+        // update concatanated matrix
+        const auto& projection = camera.getProjectionMatrix();
+        camera.setProjectionMatrix(projection);
+            
+        // view matrices editor
+        {
+            ImGui::Begin("Matrices");
+
+            auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true)
+            {
+                ImGui::Text(topText);
+                if (ImGui::BeginTable(tableName, columns))
+                {
+                    for (int y = 0; y < rows; ++y)
+                    {
+                        ImGui::TableNextRow();
+                        for (int x = 0; x < columns; ++x)
+                        {
+                            ImGui::TableSetColumnIndex(x);
+                            ImGui::Text("%.3f", *(pointer + (y * columns) + x));
+                        }
+                    }
+                    ImGui::EndTable();
+                }
+
+                if (withSeparator)
+                    ImGui::Separator();
+            };
+
+            if (currentTransform >= 0 && currentTransform < transforms.size()) {
+                addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, transforms[currentTransform].pointer());
+            }
+            addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer());
+            addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false);
+
+            ImGui::End();
+        }
+    }
diff --git a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp
index 9a9e5c966..d56cc953d 100644
--- a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp
+++ b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp
@@ -228,8 +228,7 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted
 			core::smart_refctd_ptr<video::IGPUGraphicsPipeline> pipelines[PipelineType::Count];
 		};
 		inline const SInitParams& getInitParams() const {return m_params;}
-
-		//
+		
 		inline bool addGeometries(const std::span<const video::IGPUPolygonGeometry* const> geometries)
 		{
 			EXPOSE_NABLA_NAMESPACES;
@@ -261,8 +260,6 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted
 				};
 				return index;
 			};
-			if (anyFailed)
-				device->getLogger()->log("Failed to allocate a UTB for some geometries, probably ran out of space in Descriptor Set!",system::ILogger::ELL_ERROR);
 
 			auto sizeToSet = m_geoms.size();
 			auto resetGeoms = core::makeRAIIExiter([&]()->void
@@ -309,6 +306,9 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted
 				out.normalView = allocateUTB(geom->getNormalView());
 			}
 
+			if (anyFailed)
+				device->getLogger()->log("Failed to allocate a UTB for some geometries, probably ran out of space in Descriptor Set!", system::ILogger::ELL_ERROR);
+
 			// no geometry
 			if (infos.empty())
 				return false;
@@ -356,7 +356,7 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted
 		//
 		inline void clearGeometries(const video::ISemaphore::SWaitInfo& info)
 		{
-			// back to front to avoid O(n^2) resize
+			//why woudl oyu delete element by element instead of just deallocating all then clearing once?
 			while (!m_geoms.empty())
 				removeGeometry(m_geoms.size()-1,info);
 		}

From 2be7b1a346c160eb5b4409fd0b1a913a969cf0be Mon Sep 17 00:00:00 2001
From: Corey <corey.w108@gmail.com>
Date: Sat, 20 Dec 2025 14:28:39 -0600
Subject: [PATCH 6/8] only shaders remaining?

i think i just need to fix the shaders and thats it
---
 MeshShader/include/MeshRenderer.hpp |   1 +
 MeshShader/include/transform.hpp    |   1 +
 MeshShader/src/MeshRenderer.cpp     |  12 +-
 MeshShader/src/SampleApp.cpp        | 332 +++++++++++++---------------
 4 files changed, 164 insertions(+), 182 deletions(-)

diff --git a/MeshShader/include/MeshRenderer.hpp b/MeshShader/include/MeshRenderer.hpp
index c05542bf2..51830709a 100644
--- a/MeshShader/include/MeshRenderer.hpp
+++ b/MeshShader/include/MeshRenderer.hpp
@@ -124,6 +124,7 @@ class MeshDebugRenderer final : public core::IReferenceCounted {
 	inline ~MeshDebugRenderer()	{
 		// clean shutdown, can also make SubAllocatedDescriptorSet resillient against that, and issue `device->waitIdle` if not everything is freed
 		const_cast<video::ILogicalDevice*>(m_params.layout->getOriginDevice())->waitIdle();
+		clearGeometries({});
 	}
 	void clearGeometries(const video::ISemaphore::SWaitInfo& info);
 
diff --git a/MeshShader/include/transform.hpp b/MeshShader/include/transform.hpp
index c8b19f466..f02ab0ee3 100644
--- a/MeshShader/include/transform.hpp
+++ b/MeshShader/include/transform.hpp
@@ -129,6 +129,7 @@ struct TransformWidget {
 			ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
 
 		ImGui::End();
+		ImGui::PopStyleColor();
 
 		return contentRegionSize;
 	}
diff --git a/MeshShader/src/MeshRenderer.cpp b/MeshShader/src/MeshRenderer.cpp
index 2af4b1711..c0748d0e9 100644
--- a/MeshShader/src/MeshRenderer.cpp
+++ b/MeshShader/src/MeshRenderer.cpp
@@ -247,7 +247,8 @@ namespace nbl::examples {
 			}
 			const auto& view = geom->getIndexView();
 			if (view) {
-				assert(false && "not currently setup to support index buffer");
+				view.getElementCount();
+				assert(out.vertCount == view.getElementCount() && "not currently setup to support index buffer");
 			}
 		}
 
@@ -270,8 +271,9 @@ namespace nbl::examples {
 	}
 
 	void MeshDebugRenderer::clearGeometries(const video::ISemaphore::SWaitInfo& info) {
-		for (uint8_t i = 0; i < m_geoms.currentObjectCount; i++) {
-
+		//im currently assuming every object gets loaded correctly. definitely incorrect
+		for (uint8_t i = 0; i < m_geoms.MaxObjectCount; i++) {
+			removeGeometry(i, info);
 		}
 	}
 
@@ -312,7 +314,7 @@ namespace nbl::examples {
 		for (const auto& instance : m_instances) {
 			cmdbuf->bindMeshPipeline(m_params.pipeline.get());
 			const auto pc = instance.computePushConstants(viewParams);
-			cmdbuf->pushConstants(layout, hlsl::ShaderStage::ESS_MESH | hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc);
+			cmdbuf->pushConstants(layout, hlsl::ShaderStage::ESS_TASK | hlsl::ShaderStage::ESS_MESH | hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc);
 			//if (m_geoms->indexBuffer)
 			//{
 				//cmdbuf->bindIndexBuffer(geo->indexBuffer,geo->indexType);
@@ -323,7 +325,7 @@ namespace nbl::examples {
 				//cmdbuf->bindDescriptorSets(geo->indexBuffer);
 				//cmdbuf->draw(geo->elementCount, 1, 0, 0);
 			//}
-			cmdbuf->dispatch(1, 1, 1);
+			cmdbuf->drawMeshTasks(1, 1, 1);
 		}
 		cmdbuf->endDebugMarker();
 	}
diff --git a/MeshShader/src/SampleApp.cpp b/MeshShader/src/SampleApp.cpp
index 4f304af4f..db3aa56a0 100644
--- a/MeshShader/src/SampleApp.cpp
+++ b/MeshShader/src/SampleApp.cpp
@@ -529,26 +529,33 @@
 
     void UISampleApp::CInterface::operator()() {
         ImGuiIO& io = ImGui::GetIO();
+        io.ConfigDebugIsDebuggerPresent = true;
 
+        //camera
         {
             matrix4SIMD projection;
+            const float viewHeight = viewWidth * io.DisplaySize.x / io.DisplaySize.y;
 
-            if (isPerspective)
-                if(isLH)
-                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
-                else
-                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
+            if (isPerspective) {
+                if (isLH) {
+                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), viewHeight, zNear, zFar);
+                }
+                else {
+                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), viewHeight, zNear, zFar);
+                }
+            }
             else
             {
-                float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x;
-
-                if(isLH)
-                    projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar);
-                else
-                    projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar);
+                if (isLH) {
+                    projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, 1.f / viewHeight, zNear, zFar);
+                }
+                else {
+                    projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, 1.f / viewHeight, zNear, zFar);
+                }
             }
             camera.setProjectionMatrix(projection);
-        }
+        } //end camera
+        
 
         ImGuizmo::SetOrthographic(false);
         ImGuizmo::BeginFrame();
@@ -559,211 +566,182 @@
         // create a window and insert the inspector
         ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing);
         ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing);
-        ImGui::Begin("Editor");
+        if(ImGui::Begin("Editor")) {
 
-        for(uint8_t i = 0; i < objectNames.size(); i++){
-            if (ImGui::TreeNode(objectNames[i].c_str())) {
-                ImGui::Text("objectCount - %d", objectCount[i]);
+            //object data
+            for(uint8_t i = 0; i < objectNames.size(); i++){
+                if (ImGui::TreeNode(objectNames[i].c_str())) {
+                    ImGui::Text("objectCount - %d", objectCount[i]);
                 
-                ImGui::TreePop();
+                    ImGui::TreePop();
+                }
             }
-        }
-
-        ImGui::Separator();
-
-        if (ImGui::Button("reload mesh shader")) {
-            //printf("test shader result - %d\n", CreateTestShaderFuncPtr());
-        }
 
-        if (ImGui::RadioButton("Full view", !transformParams.useWindow))
-            transformParams.useWindow = false;
+            ImGui::Separator();
 
-        ImGui::SameLine();
-
-        if (ImGui::RadioButton("Window", transformParams.useWindow))
-            transformParams.useWindow = true;
-
-        ImGui::Text("Camera");
-        bool viewDirty = false;
-
-        if (ImGui::RadioButton("LH", isLH))
-            isLH = true;
-
-        ImGui::SameLine();
-
-        if (ImGui::RadioButton("RH", !isLH))
-            isLH = false;
-
-        if (ImGui::RadioButton("Perspective", isPerspective))
-            isPerspective = true;
-
-        ImGui::SameLine();
-
-        if (ImGui::RadioButton("Orthographic", !isPerspective))
-            isPerspective = false;
-
-        ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate);
-        ImGui::Checkbox("Enable camera movement", &move);
-        ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f);
-        ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f);
-
-        // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case
+            //controls
+            {
+                //if (ImGui::Button("reload mesh shader")) {
+                    //printf("test shader result - %d\n", CreateTestShaderFuncPtr());
+                //}
 
-        if (isPerspective)
-            ImGui::SliderFloat("Fov", &fov, 20.f, 150.f);
-        else
-            ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20);
+                if (ImGui::RadioButton("Full view", !transformParams.useWindow))
+                    transformParams.useWindow = false;
 
-        ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f);
-        ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f);
+                ImGui::SameLine();
 
-        viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f);
+                if (ImGui::RadioButton("Window", transformParams.useWindow))
+                    transformParams.useWindow = true;
 
-        if (viewDirty || firstFrame)
-        {
-            core::vectorSIMDf cameraPosition(cosf(camYAngle)* cosf(camXAngle)* transformParams.camDistance, sinf(camXAngle)* transformParams.camDistance, sinf(camYAngle)* cosf(camXAngle)* transformParams.camDistance);
-            core::vectorSIMDf cameraTarget(0.f, 0.f, 0.f);
-            const static core::vectorSIMDf up(0.f, 1.f, 0.f);
+                ImGui::Text("Camera");
+                bool viewDirty = false;
 
-            camera.setPosition(cameraPosition);
-            camera.setTarget(cameraTarget);
-            camera.setBackupUpVector(up);
+                if (ImGui::RadioButton("LH", isLH))
+                    isLH = true;
 
-            camera.recomputeViewMatrix();
-        }
-        firstFrame = false;
+                ImGui::SameLine();
 
-        ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y);
-        if (ImGuizmo::IsUsing())
-        {
-            ImGui::Text("Using gizmo");
-        }
-        else {
-            ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : "");
-            ImGui::SameLine();
-            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : "");
-            ImGui::SameLine();
-            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : "");
-            ImGui::SameLine();
-            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : "");
-        }
-        ImGui::Separator();
+                if (ImGui::RadioButton("RH", !isLH))
+                    isLH = false;
 
-        /*
-        * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout
-        * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection
+                if (ImGui::RadioButton("Perspective", isPerspective))
+                    isPerspective = true;
 
-        - VIEW:
+                ImGui::SameLine();
 
-            ImGuizmo
+                if (ImGui::RadioButton("Orthographic", !isPerspective))
+                    isPerspective = false;
 
-            |     X[0]          Y[0]          Z[0]         0.0f |
-            |     X[1]          Y[1]          Z[1]         0.0f |
-            |     X[2]          Y[2]          Z[2]         0.0f |
-            | -Dot(X, eye)  -Dot(Y, eye)  -Dot(Z, eye)     1.0f |
+                ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate);
+                ImGui::Checkbox("Enable camera movement", &move);
+                ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f);
+                ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f);
 
-            Nabla
+                // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case
 
-            |     X[0]         X[1]           X[2]     -Dot(X, eye)  |
-            |     Y[0]         Y[1]           Y[2]     -Dot(Y, eye)  |
-            |     Z[0]         Z[1]           Z[2]     -Dot(Z, eye)  |
+                if (isPerspective)
+                    ImGui::SliderFloat("Fov", &fov, 20.f, 150.f);
+                else
+                    ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20);
 
-            <ImGuizmo View Matrix> = transpose(nbl::core::matrix4SIMD(<Nabla View Matrix>))
+                ImGui::SliderFloat("zNear", &zNear, 0.1f, zFar);
+                ImGui::SliderFloat("zFar", &zFar, zNear, 10000.f);
 
-        - PERSPECTIVE [PROJECTION CASE]:
+                viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f);
 
-            ImGuizmo
+                if (viewDirty || firstFrame)
+                {
+                    core::vectorSIMDf cameraPosition(cosf(camYAngle) * cosf(camXAngle) * transformParams.camDistance, sinf(camXAngle) * transformParams.camDistance, sinf(camYAngle) * cosf(camXAngle) * transformParams.camDistance);
+                    core::vectorSIMDf cameraTarget(0.f, 0.f, 0.f);
+                    const static core::vectorSIMDf up(0.f, 1.f, 0.f);
 
-            |      (temp / temp2)                 (0.0)                       (0.0)                   (0.0)  |
-            |          (0.0)                  (temp / temp3)                  (0.0)                   (0.0)  |
-            | ((right + left) / temp2)   ((top + bottom) / temp3)    ((-zfar - znear) / temp4)       (-1.0f) |
-            |          (0.0)                      (0.0)               ((-temp * zfar) / temp4)        (0.0)  |
+                    camera.setPosition(cameraPosition);
+                    camera.setTarget(cameraTarget);
+                    camera.setBackupUpVector(up);
 
-            Nabla
+                    camera.recomputeViewMatrix();
+                }
+                firstFrame = false;
 
-            |            w                        (0.0)                       (0.0)                   (0.0)               |
-            |          (0.0)                       -h                         (0.0)                   (0.0)               |
-            |          (0.0)                      (0.0)               (-zFar/(zFar-zNear))     (-zNear*zFar/(zFar-zNear)) |
-            |          (0.0)                      (0.0)                      (-1.0)                   (0.0)               |
+                ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y);
+                if (ImGuizmo::IsUsing())
+                {
+                    ImGui::Text("Using gizmo");
+                }
+                else {
+                    ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : "");
+                    ImGui::SameLine();
+                    ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : "");
+                    ImGui::SameLine();
+                    ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : "");
+                    ImGui::SameLine();
+                    ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : "");
+                }
+            }//end controls
+            
+            ImGui::Separator();
 
-            <ImGuizmo Projection Matrix> = transpose(<Nabla Projection Matrix>)
+            /*
+            * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout
+            * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection
 
-        *
-        * the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object,
-        * note it also modifies input view matrix but projection matrix is immutable
-        */
+            *
+            * the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object,
+            * note it also modifies input view matrix but projection matrix is immutable
+            */
 
-// TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases
-        static struct
-        {
-            core::matrix4SIMD view, projection, model;
-        } imguizmoM16InOut;
+    // TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases
+            static struct
+            {
+                core::matrix4SIMD view, projection, model;
+            } imguizmoM16InOut;
 
-        ImGuizmo::SetID(0u);
+            ImGuizmo::SetID(0u);
 
-        imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix()));
-        imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix());
+            imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix()));
+            imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix());
 
-        if (currentTransform < 0) {
-            currentTransform = 0;
-        }
+            if (currentTransform < 0) {
+                currentTransform = 0;
+            }
 
-        if (currentTransform >= 0 && currentTransform < transforms.size()) {
-            imguizmoM16InOut.model = core::transpose(matrix4SIMD(transforms[currentTransform]));
-        }
-        {
-            transformParams.editTransformDecomposition = true;
-            static TransformWidget transformWidget{};
-            const auto tempForConversion = transformWidget.Update(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
-            sceneResolution = { tempForConversion.x, tempForConversion.y };
+            if (currentTransform >= 0 && currentTransform < transforms.size()) {
+                imguizmoM16InOut.model = core::transpose(matrix4SIMD(transforms[currentTransform]));
+            }
+            {
+                transformParams.editTransformDecomposition = true;
+                static TransformWidget transformWidget{};
+                const auto tempForConversion = transformWidget.Update(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
+                sceneResolution = { tempForConversion.x, tempForConversion.y };
             
-        }
+            }
 
-        if (currentTransform >= 0 && currentTransform < transforms.size()) {
-            transforms[currentTransform] = core::transpose(imguizmoM16InOut.model).extractSub3x4();
-        }
-        // to Nabla + update camera & model matrices
-// TODO: make it more nicely, extract:
-// - Position by computing inverse of the view matrix and grabbing its translation
-// - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position
-// But then set the view matrix this way anyway, because up-vector may not be compatible
-        const auto& view = camera.getViewMatrix();
-        const_cast<core::matrix3x4SIMD&>(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok)
-        // update concatanated matrix
-        const auto& projection = camera.getProjectionMatrix();
-        camera.setProjectionMatrix(projection);
+            if (currentTransform >= 0 && currentTransform < transforms.size()) {
+                transforms[currentTransform] = core::transpose(imguizmoM16InOut.model).extractSub3x4();
+            }
+            // to Nabla + update camera & model matrices
+    // TODO: make it more nicely, extract:
+    // - Position by computing inverse of the view matrix and grabbing its translation
+    // - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position
+    // But then set the view matrix this way anyway, because up-vector may not be compatible
+            const auto& view = camera.getViewMatrix();
+            const_cast<core::matrix3x4SIMD&>(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok)
+            // update concatanated matrix
+            const auto& projection = camera.getProjectionMatrix();
+            camera.setProjectionMatrix(projection);
             
-        // view matrices editor
-        {
-            ImGui::Begin("Matrices");
+            // view matrices editor
+            if(ImGui::Begin("Matrices")){
 
-            auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true)
-            {
-                ImGui::Text(topText);
-                if (ImGui::BeginTable(tableName, columns))
-                {
-                    for (int y = 0; y < rows; ++y)
+                auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true)
                     {
-                        ImGui::TableNextRow();
-                        for (int x = 0; x < columns; ++x)
+                        ImGui::Text(topText);
+                        if (ImGui::BeginTable(tableName, columns))
                         {
-                            ImGui::TableSetColumnIndex(x);
-                            ImGui::Text("%.3f", *(pointer + (y * columns) + x));
+                            for (int y = 0; y < rows; ++y)
+                            {
+                                ImGui::TableNextRow();
+                                for (int x = 0; x < columns; ++x)
+                                {
+                                    ImGui::TableSetColumnIndex(x);
+                                    ImGui::Text("%.3f", *(pointer + (y * columns) + x));
+                                }
+                            }
+                            ImGui::EndTable();
                         }
-                    }
-                    ImGui::EndTable();
-                }
 
-                if (withSeparator)
-                    ImGui::Separator();
-            };
+                        if (withSeparator)
+                            ImGui::Separator();
+                    };
 
-            if (currentTransform >= 0 && currentTransform < transforms.size()) {
-                addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, transforms[currentTransform].pointer());
-            }
-            addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer());
-            addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false);
+                if (currentTransform >= 0 && currentTransform < transforms.size()) {
+                    addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, transforms[currentTransform].pointer());
+                }
+                addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer());
+                addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false);
 
+            } //end view matrix editor
             ImGui::End();
-        }
+        } //end editor window
+        ImGui::End();
     }

From d615ea4394e49d00c933cb2789e2bbc0835d1af7 Mon Sep 17 00:00:00 2001
From: Corey <corey.w108@gmail.com>
Date: Mon, 22 Dec 2025 19:57:21 -0600
Subject: [PATCH 7/8] ive been backed into a corner

[22.12.2025 19:53:22:605382][ERROR]: Descriptor set (000002B092D5C450) doesn't allow descriptor of such type category at binding 1.
[22.12.2025 19:53:22:605551][ERROR]: Invalid descriptor type (descriptorWrites[2]) [4c26069f9][nbl::video::ILogicalDevice::updateDescriptorSets - C:\Projects\Nabla\src\nbl\video\ILogicalDevice.cpp:529]

this is my error code at MeshRenderer.cpp Line 322

It coudl potentially be that the buffer just isn't flagged for storage buffer (it probably isnt), or it could be a deeper issue. My concern is that if I start tugging on this thread next thing I know I'l be rewriting all of nabla.
I'm just going to scrap the current geometry and write a simple cube and call it a day.

I verified that the tasks shaders worked correctly at some point during this process, whoevers reading this is just gonna have to take my word for it.
---
 MeshShader/app_resources/geom.mesh.hlsl       |  48 +-
 MeshShader/app_resources/geom.task.hlsl       |   5 +-
 .../app_resources/task_mesh_common.hlsl       |   9 +-
 MeshShader/include/MeshRenderer.hpp           |  77 +--
 MeshShader/include/SampleApp.h                |  35 +-
 MeshShader/include/transform.hpp              |  11 +-
 MeshShader/main.cpp                           |   2 +-
 MeshShader/src/MeshRenderer.cpp               | 146 +++--
 MeshShader/src/SampleApp.cpp                  | 560 ++++++++++--------
 9 files changed, 550 insertions(+), 343 deletions(-)

diff --git a/MeshShader/app_resources/geom.mesh.hlsl b/MeshShader/app_resources/geom.mesh.hlsl
index d8778c4a5..7b2515c06 100644
--- a/MeshShader/app_resources/geom.mesh.hlsl
+++ b/MeshShader/app_resources/geom.mesh.hlsl
@@ -3,9 +3,12 @@
 #include "task_mesh_common.hlsl"
 
 
-//utb is short for "uniform texel buffer", or its a storage buffer with vec4s
+//utb is short for "uniform texel buffer", could also be considered a storage buffer with vec4s
+//the gpu probably does something different with the data between a utb and a storage buffer but idk
 [[vk::binding(0)]] Buffer<float32_t4> utbs[PushDescCount];
-//none of the objects use the index buffer
+
+//binding 1, set 0, the mesh data is in binding 0
+[[vk::binding(1, 0)]] Buffer<uint> indices[];
 
 struct VertexOut {
     float32_t4 ndc : SV_Position;
@@ -30,11 +33,11 @@ void main(
     //or if it isnt 1 task shader launching every mesh shader, the payload will need to handle
     const float32_t4x4 worldViewProj = pc.viewProj * transform[groupThreadID.x];
 
-
     if(id.x < meshDataCopy.vertCount){
         const float32_t3 position = utbs[meshDataCopy.positionView][id.x].xyz;
 
-        verts[id.x].ndc = mul(float32_t4(position, 1.0), worldViewProj);
+        // verts[id.x].ndc = mul(float32_t4(position, 1.0), worldViewProj);
+        verts[id.x].ndc = mul(worldViewProj, float32_t4(position, 1.0));
 
 
         if (meshDataCopy.normalView < PushDescCount) { // && meshDataCopy.objType != CONE_OBJECT - just going to set cone_object normalView to pushdesccount
@@ -47,26 +50,49 @@ void main(
         }
     }
 
-    if(id.x < meshDataCopy.primCount){
+    //uint outputVertexCount = meshDataCopy.vertCount; //not necessary right now
+    uint outputPrimitiveCount = meshDataCopy.primCount;
+    //we're assuming primCount == vertCount, but most of the time a 
+    //index buffer will exist, so i'll leave the branch in the EXAMPLE
+    if(id.x < meshDataCopy.primCount){ 
+        //a fan has 0 at the center, then around a circle it'll have 1 at 12o'clock, (relatively speaking)
+        //numbers increment as you go clockwise (again, relatively speaking)
+        //so if id.x + 2 is greater than prim count, it wraps back around to 1
         if(meshDataCopy.objType == T_FAN_OBJECT_TYPE){
             uint3 prim = uint3(0, id.x + 1, id.x + 2);
             if(prim.y >= meshDataCopy.vertCount){
                 //not adding
             }
-            if(prim.z >= meshDataCopy.vertCount){
+            else if (prim.z >= meshDataCopy.vertCount) {
                 prim.z = 1;
+                prims[id.x] = prim;
+                printf("adding prim - {%u:%u:%u}", prims[id.x].x, prims[id.x].y, prims[id.x].z);
+            }
+            else {
+                prims[id.x] = prim;
+                printf("adding prim - {%u:%u:%u}", prims[id.x].x, prims[id.x].y, prims[id.x].z);
             }
-            prims[id.x] = prim;
         }
-        else{
+        /* probably incorrect
+        else if(triangle strip){
             uint3 prim = uint3(id.x, id.x + 1, id.x + 2);
             bool lessThan = (prim.x < meshDataCopy.vertCount) && (prim.y < meshDataCopy.vertCount) && (prim.z < meshDataCopy.vertCount);
-            if(lessThan){
+            if (lessThan) {
                 prims[id.x] = prim;
+                printf("adding prim [triangle strip type]- {%u:%u:%u} : {%u:%u:%u}", prims[id.x].x, prims[id.x].y, prims[id.x].z, prim.x, prim.y, prim.z);
+            }
+        }
+        */
+        else { // triangle list.
+            outputPrimitiveCount = meshDataCopy.vertCount / 3;
+            if (id.x < (meshDataCopy.primCount / 3)) { //probably incorrect for a indexed triangle list idk
+                prims[id.x].x = id.x * 3;
+                prims[id.x].y = id.x * 3 + 1;
+                prims[id.x].z = id.x * 3 + 2;
+                printf("adding prim [triangle strip type]- {%u:%u:%u}", prims[id.x].x, prims[id.x].y, prims[id.x].z);
             }
         }
     }
 
-
-    SetMeshOutputCounts(meshDataCopy.vertCount, meshDataCopy.primCount);
+    SetMeshOutputCounts(meshDataCopy.vertCount, outputPrimitiveCount);
 }
\ No newline at end of file
diff --git a/MeshShader/app_resources/geom.task.hlsl b/MeshShader/app_resources/geom.task.hlsl
index 3c228a60f..330bf57fc 100644
--- a/MeshShader/app_resources/geom.task.hlsl
+++ b/MeshShader/app_resources/geom.task.hlsl
@@ -3,21 +3,20 @@
 
 groupshared TaskToMeshPayload taskToMeshPayload;
 
-[numthreads(WORKGROUP_SIZE,1,1)]
+[numthreads(1,1,1)]
 void main(
 	in uint3 id : SV_DispatchThreadID,
 	in uint3 groupThreadId : SV_GroupThreadID
 	//out payload TaskToMeshPayload taskToMeshPayload, interestingly, thats not how it's done here
 ){
-
 	uint objectCount = 0;
 	for(uint i = 0; i < OBJECT_COUNT; i++){
 		for(uint j = 0; j < pc.objectCount[i]; j++){
 			taskToMeshPayload.objectType[objectCount] = i;
 			objectCount++;
 		}
-		objectCount += pc.objectCount[i];
 	}
 
+    printf("dispatching meshes - %u", objectCount);
 	DispatchMesh(objectCount, 1, 1, taskToMeshPayload);
 }
\ No newline at end of file
diff --git a/MeshShader/app_resources/task_mesh_common.hlsl b/MeshShader/app_resources/task_mesh_common.hlsl
index 125a411a5..476792ab6 100644
--- a/MeshShader/app_resources/task_mesh_common.hlsl
+++ b/MeshShader/app_resources/task_mesh_common.hlsl
@@ -2,7 +2,7 @@
 //this is user defined data sent from the task shader to the mesh shader
 //1 packet is sent, but it can use arrays so that each workgroup can receive customized data
 struct TaskToMeshPayload {
-    uint objectType[INSTANCE_COUNT];
+    uint objectType[INSTANCE_COUNT * OBJECT_COUNT];
 };
 
 //1 is cone, 2 is for fan, anything else for trangle list without the special normal calc.
@@ -12,13 +12,14 @@ struct TaskToMeshPayload {
 #define T_FAN_OBJECT_TYPE 2
 struct MeshData{
     uint vertCount;
-    uint primCount;
+    uint primCount; //were assuming vertCount is always equal to primCount (no index buffer)
     uint objType; 
 	uint positionView;
-	uint normalView;
+    uint normalView;
+    uint indexView;
 };
 
-[[vk::binding(1)]] cbuffer MeshDataBuffer {
+[[vk::binding(0, 1)]] cbuffer MeshDataBuffer {
     
     MeshData meshData[OBJECT_COUNT];
     float4x4 transform[INSTANCE_COUNT]; //this is goign to be based on device limits
diff --git a/MeshShader/include/MeshRenderer.hpp b/MeshShader/include/MeshRenderer.hpp
index 51830709a..5cbcb3863 100644
--- a/MeshShader/include/MeshRenderer.hpp
+++ b/MeshShader/include/MeshRenderer.hpp
@@ -6,7 +6,6 @@
 namespace nbl::examples
 {
 
-
 	enum class MeshletObjectTypes {
 		Cube,
 		Rectangle,
@@ -18,6 +17,29 @@ namespace nbl::examples
 
 		COUNT
 	};
+		//this is buffer data
+	struct MeshletObjectData {
+		uint32_t vertCount;
+		uint32_t primCount;
+		uint32_t objectType;
+		uint32_t positionView;
+		uint32_t normalView;
+		uint32_t indexView;
+	};
+	struct MeshDataBuffer {
+		//if gpuGeometry is nullptr or std::nullopt or whatever, then mesh object type is invalid, the CPU memory failed to transfer to GPU for whatever reason
+		core::smart_refctd_ptr<const video::IGPUPolygonGeometry> gpuGeometry{};
+
+		static constexpr std::size_t MaxObjectCount = static_cast<std::size_t>(MeshletObjectTypes::COUNT);
+		static constexpr std::size_t MaxInstanceCount = 8; //for each object
+
+		MeshletObjectData meshData[MaxObjectCount];
+		hlsl::float32_t4x4 transforms[MaxInstanceCount];
+
+		//remove index type to avoid branch in shader
+		//asset::E_INDEX_TYPE indexType = asset::EIT_UNKNOWN;
+	};
+
 
 class MeshDebugRenderer final : public core::IReferenceCounted {
 #define EXPOSE_NABLA_NAMESPACES \
@@ -33,36 +55,14 @@ class MeshDebugRenderer final : public core::IReferenceCounted {
 	//
 	struct SViewParams
 	{
-		SViewParams(const hlsl::float32_t3x4& _view, const hlsl::float32_t4x4& _viewProj);
-		hlsl::float32_t4x4 computeForInstance(hlsl::float32_t3x4 world) const;
+		SViewParams(const hlsl::float32_t4x4& _viewProj, std::array<uint32_t, MeshDataBuffer::MaxObjectCount> const& objectCounts);
 
-		hlsl::float32_t3x4 view;
 		hlsl::float32_t4x4 viewProj;
-		hlsl::float32_t3x3 normal;
+		std::array<uint32_t, MeshDataBuffer::MaxObjectCount> objectCounts;
+		//hlsl::float32_t3x3 normal;
 	};
 	constexpr static inline auto MissingView = hlsl::examples::geometry_creator_scene::SPushConstants::DescriptorCount;
 
-	//this is buffer data
-	struct MeshletObjectData {
-		uint32_t vertCount;
-		uint32_t primCount;
-		uint32_t objectType;
-		uint32_t positionView;
-		uint32_t normalView;
-	};
-	struct MeshDataBuffer {
-		//if gpuGeometry is nullptr or std::nullopt or whatever, then mesh object type is invalid, the CPU memory failed to transfer to GPU for whatever reason
-		core::smart_refctd_ptr<const video::IGPUPolygonGeometry> gpuGeometry{};
-
-		static constexpr std::size_t MaxObjectCount = static_cast<std::size_t>(MeshletObjectTypes::COUNT);
-		static constexpr std::size_t MaxInstanceCount = 64;
-
-		MeshletObjectData meshData[MaxObjectCount];
-		hlsl::float32_t4x4 transforms[MaxInstanceCount];
-
-		//remove index type to avoid branch in shader
-		//asset::E_INDEX_TYPE indexType = asset::EIT_UNKNOWN;
-	};
 	//
 	struct SInstance
 	{
@@ -70,14 +70,15 @@ class MeshDebugRenderer final : public core::IReferenceCounted {
 		{
 			NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1 << 16) - 1;
 
-			hlsl::float32_t4x4 matrices;
+			hlsl::float32_t4x4 viewProj;
 			uint32_t objectCount[MeshDataBuffer::MaxObjectCount];
 		};
 		inline SPushConstants computePushConstants(const SViewParams& viewParams) const	{
-			return SPushConstants{
-				.matrices = viewParams.computeForInstance(world),
-				.objectCount{0}
+			SPushConstants ret{
+				.viewProj = viewParams.viewProj
 			};
+			memcpy(ret.objectCount, viewParams.objectCounts.data(), viewParams.objectCounts.size() * sizeof(uint32_t));
+			return ret;
 		}
 
 		hlsl::float32_t3x4 world;
@@ -99,11 +100,13 @@ class MeshDebugRenderer final : public core::IReferenceCounted {
 
 	//
 	struct SInitParams {
-		core::smart_refctd_ptr<video::SubAllocatedDescriptorSet> subAllocDS;
+
+		core::smart_refctd_ptr<video::IGPUDescriptorSet> meshDescriptor;
+		core::smart_refctd_ptr<video::SubAllocatedDescriptorSet> subAllocDS;//vertex and normal views
 		core::smart_refctd_ptr<video::IGPUPipelineLayout> layout;
 		core::smart_refctd_ptr<video::IGPUMeshPipeline> pipeline;
 	};
-	inline const SInitParams& getInitParams() const {return m_params;}
+	inline SInitParams& getInitParams() {return m_params;}
 
 	//im not going to go thru every example to fix them up to use this static function instead, so im leaving the old one
 	//device should be const* but im not going to fix it right now 
@@ -117,8 +120,15 @@ class MeshDebugRenderer final : public core::IReferenceCounted {
 
 	void render(video::IGPUCommandBuffer* cmdbuf, const SViewParams& viewParams) const;
 
-	core::vector<SInstance> m_instances;
+	SInstance m_instance;
+
+	//mesh layout
+	//PVP vertices at set 0 binding 0
+	//mesh data at set 1 binding 0
+	//they should be in the same set but tiny bit slower (1 additional API call) for a tiny bit easier programming
+	nbl::core::smart_refctd_ptr<nbl::video::IGPUDescriptorSetLayout> mesh_layout{};
 
+	MeshDataBuffer m_geoms;
 protected:
 	inline MeshDebugRenderer(SInitParams&& _params) : m_params(std::move(_params)) {}
 	inline ~MeshDebugRenderer()	{
@@ -135,7 +145,6 @@ class MeshDebugRenderer final : public core::IReferenceCounted {
 	}
 
 	SInitParams m_params;
-	MeshDataBuffer m_geoms;
 #undef EXPOSE_NABLA_NAMESPACES
 };
 
diff --git a/MeshShader/include/SampleApp.h b/MeshShader/include/SampleApp.h
index e0045510e..b56932f1c 100644
--- a/MeshShader/include/SampleApp.h
+++ b/MeshShader/include/SampleApp.h
@@ -13,26 +13,27 @@ struct MeshletPush {
 	uint32_t objectInstanceCount[object_type_count_max]; //this data is going to cropped before pushing, if necessary
 };
 
-class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesApplication
+class MeshSampleApp final : public MonoWindowApplication, public BuiltinResourcesApplication
 {
 		using device_base_t = MonoWindowApplication;
 		using asset_base_t = BuiltinResourcesApplication;
 
 	public:
-		UISampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) 
+		MeshSampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) 
 			: IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD),
 			device_base_t({1280,720}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) 
         {}
 
 		bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override;
-		//smart_refctd_ptr<IGPUDescriptorSetLayout> BuildMeshletDSLayout() const;
-		std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 3> CreateTestShader() const;
 		virtual bool onAppTerminated();
 		IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override;
 
 	protected:
 		const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override;
 	private:
+
+		void UpdateDescriptor();
+		
 		void UpdateScene(nbl::video::IGPUCommandBuffer* cb);
 		void update(const std::chrono::microseconds nextPresentationTimestamp);
 		void recreateFramebuffer(const uint16_t2 resolution);
@@ -64,10 +65,25 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 		core::smart_refctd_ptr<video::SubAllocatedDescriptorSet> meshlet_subAllocDS;
 		smart_refctd_ptr<IGPUPipelineLayout> meshletLayout;
 		smart_refctd_ptr<IGPUMeshPipeline> meshletPipeline;
+
+
+		smart_refctd_ptr<IGPUBuffer> meshGPUBuffer;
+		nbl::video::IDeviceMemoryAllocator::SAllocation mesh_allocation;
 		// UI stuff
+		//i really hate interface beign it's own object
 		struct CInterface
 		{
+			bool meshControlSeparated = false;
+			void DrawMeshControls();
+			bool cameraControlSeparated = false;
+			void DrawCameraControls();
+
+			bool guizmoEnabled = true;
+			void UpdateImguizmo();
+
 			void operator()();
+			
+			bool transposeCameraViewProj = false;
 			smart_refctd_ptr<ext::imgui::UI> imGUI;
 			// descriptor set
 			smart_refctd_ptr<SubAllocatedDescriptorSet> subAllocDS;
@@ -76,21 +92,24 @@ class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesA
 			Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD());
 			// mutables
 			int32_t currentTransform = -1;
-			std::vector<core::matrix3x4SIMD> transforms;
+			std::array<hlsl::matrix<float, 4, 4>, MeshDataBuffer::MaxInstanceCount * MeshDataBuffer::MaxObjectCount> transforms;
 
-			std::array<std::string, MeshDebugRenderer::MeshDataBuffer::MaxObjectCount> objectNames;
-			std::array<uint32_t, MeshDebugRenderer::MeshDataBuffer::MaxObjectCount> objectCount;
+			std::array<std::string, MeshDataBuffer::MaxObjectCount> objectNames;
+			std::array<uint32_t, MeshDataBuffer::MaxObjectCount> objectCount = { 1, 0, 0, 0,  0, 0, 0 };
 
 			TransformRequestParams transformParams;
 			uint16_t2 sceneResolution = {1280,720};
 			uint16_t4 widgetBox;
 			float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f;
 			float viewWidth = 10.f;
-			float camYAngle = 165.f / 180.f * 3.14159f;
+			float camYAngle = 165.f / 180.f * 3.14159f; //wheres my pi constant
 			float camXAngle = 32.f / 180.f * 3.14159f;
 			uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed
 			bool isPerspective = true, isLH = true, flipGizmoY = true, move = false;
 			bool firstFrame = true;
 
+			ILogicalDevice::MappedMemoryRange meshMemoryRange;
+			void* mesh_mapped_memory = nullptr;
+
 		} interface;
 };
diff --git a/MeshShader/include/transform.hpp b/MeshShader/include/transform.hpp
index f02ab0ee3..dd3b37bd9 100644
--- a/MeshShader/include/transform.hpp
+++ b/MeshShader/include/transform.hpp
@@ -123,10 +123,15 @@ struct TransformWidget {
 		ImGuiWindow* window = ImGui::GetCurrentWindow();
 		gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0);
 
-		ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL);
+		//static bool tempEnable = true;
+		//ImGui::Checkbox("temp enable", &tempEnable);
+		//if (tempEnable) { //debug branch
+			ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL);
 
-		if (params.enableViewManipulate)
-			ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
+			if (params.enableViewManipulate) {
+				ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
+			}
+		//}
 
 		ImGui::End();
 		ImGui::PopStyleColor();
diff --git a/MeshShader/main.cpp b/MeshShader/main.cpp
index c55c3d48d..206848f49 100644
--- a/MeshShader/main.cpp
+++ b/MeshShader/main.cpp
@@ -12,5 +12,5 @@ Written with Nabla's UI extension and got integrated with ImGuizmo to handle sce
 */
 int main(int argc, char** argv) {
 	//expanded macro for easier IDE peeking
-	return UISampleApp::main<UISampleApp>(argc, argv);
+	return MeshSampleApp::main<MeshSampleApp>(argc, argv);
 }
\ No newline at end of file
diff --git a/MeshShader/src/MeshRenderer.cpp b/MeshShader/src/MeshRenderer.cpp
index c0748d0e9..d28444a47 100644
--- a/MeshShader/src/MeshRenderer.cpp
+++ b/MeshShader/src/MeshRenderer.cpp
@@ -19,18 +19,10 @@ namespace nbl::examples {
 		return patch;
 	}();
 
-	MeshDebugRenderer::SViewParams::SViewParams(const hlsl::float32_t3x4& _view, const hlsl::float32_t4x4& _viewProj)
+	MeshDebugRenderer::SViewParams::SViewParams(const hlsl::float32_t4x4& _viewProj, std::array<uint32_t, MeshDataBuffer::MaxObjectCount> const& objectCounts)
+		: viewProj{_viewProj},
+		objectCounts{objectCounts}
 	{
-		view = _view;
-		viewProj = _viewProj;
-		using namespace nbl::hlsl;
-		normal = transpose(inverse(float32_t3x3(view)));
-	}
-
-	hlsl::float32_t4x4 MeshDebugRenderer::SViewParams::computeForInstance(hlsl::float32_t3x4 world) const
-	{
-		using namespace nbl::hlsl;
-		return float32_t4x4(math::linalg::promoted_mul(float64_t4x4(viewProj), float64_t3x4(world)));
 	}
 
 
@@ -76,7 +68,7 @@ namespace nbl::examples {
 			};
 		constexpr uint32_t WorkgroupSize = 64;
 		const uint32_t ObjectCount = 7;
-		const uint32_t InstanceCount = WorkgroupSize; //this is going to be based off limits. 64 is PROBABLY safe on all hardware, but cant guarantee
+		const uint32_t InstanceCount = 8; //this is going to be based off limits. 64 is PROBABLY safe on all hardware, but cant guarantee
 		const std::string WorkgroupSizeAsStr = std::to_string(WorkgroupSize);
 		const std::string ObjectCountAsStr = std::to_string(ObjectCount);
 		const std::string InstanceCountAsStr = std::to_string(InstanceCount);
@@ -107,10 +99,13 @@ namespace nbl::examples {
 
 		SInitParams init;
 
+		//
+		smart_refctd_ptr<IGPUDescriptorSetLayout> dsLayout;
+		smart_refctd_ptr<IGPUDescriptorSetLayout> meshLayout;
+
 		// create descriptor set
 		{
 			// create Descriptor Set Layout
-			smart_refctd_ptr<IGPUDescriptorSetLayout> dsLayout;
 			{
 				using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS;
 				const IGPUDescriptorSetLayout::SBinding bindings[] =
@@ -123,27 +118,47 @@ namespace nbl::examples {
 						.stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH | IShader::E_SHADER_STAGE::ESS_FRAGMENT,
 						.count = MissingView
 					},
-					//{//indices, none of these objects use indices so I'll skip over this
-
-					//},
-					{ //meshletdataobject
+					{ //indices
 						.binding = 1,
+						.type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER,
+						// need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable
+						.createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | binding_flags_t::ECF_PARTIALLY_BOUND_BIT,
+						.stageFlags = IShader::E_SHADER_STAGE::ESS_MESH,
+						.count = MissingView
+					},
+				};
+				dsLayout = device->createDescriptorSetLayout(bindings);
+				if (!dsLayout)
+				{
+					logger->log("Could not create descriptor set layout!", ILogger::ELL_ERROR);
+					return nullptr;
+				}
+			}
+			//creating meshdatabuffer descriptor set
+			{
+				using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS;
+				const IGPUDescriptorSetLayout::SBinding bindings[] =
+				{ //meshletdataobject
+					{
+						.binding = 0,
 						.type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER,
-						.createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT,
+						.createFlags = binding_flags_t::ECF_NONE,
 						.stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH | IShader::E_SHADER_STAGE::ESS_FRAGMENT,
 						.count = 1
 					}
 				};
-				dsLayout = device->createDescriptorSetLayout(bindings);
-				if (!dsLayout)
+				meshLayout = device->createDescriptorSetLayout(bindings);
+				if (!meshLayout)
 				{
-					logger->log("Could not create descriptor set layout!", ILogger::ELL_ERROR);
+					logger->log("Could not create mesh descriptor set layout!", ILogger::ELL_ERROR);
 					return nullptr;
 				}
 			}
 
 			// create Descriptor Set
-			auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, { &dsLayout.get(),1 });
+			std::vector< IGPUDescriptorSetLayout const*> dsls{ dsLayout.get(), meshLayout.get() };
+
+			auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dsls);
 			auto ds = pool->createDescriptorSet(std::move(dsLayout));
 			if (!ds)
 			{
@@ -159,7 +174,9 @@ namespace nbl::examples {
 			.offset = 0,
 			.size = sizeof(SInstance::SPushConstants),
 		} };
-		init.layout = device->createPipelineLayout(ranges, smart_refctd_ptr<const IGPUDescriptorSetLayout>(init.subAllocDS->getDescriptorSet()->getLayout()));
+
+		//because of the move semantics, the descriptor set we just created is no longer valid. instead, we need to go and rebuild a smart pointer to that descriptor set.
+		init.layout = device->createPipelineLayout(ranges, smart_refctd_ptr<const IGPUDescriptorSetLayout>(init.subAllocDS->getDescriptorSet()->getLayout()), meshLayout);
 		auto shaderRet = CreateTestShader(assMan, renderpass, subpassIX);
 		// create pipelines
 		{
@@ -184,7 +201,10 @@ namespace nbl::examples {
 			}
 		}
 
-		return smart_refctd_ptr<MeshDebugRenderer>(new MeshDebugRenderer(std::move(init)), dont_grab);
+		auto ret = smart_refctd_ptr<MeshDebugRenderer>(new MeshDebugRenderer(std::move(init)), dont_grab);
+		ret->mesh_layout = meshLayout;
+
+		return ret;
 	}
 
 	bool MeshDebugRenderer::addGeometries(const std::span<const video::IGPUPolygonGeometry* const> geometries)
@@ -196,6 +216,7 @@ namespace nbl::examples {
 
 		core::vector<IGPUDescriptorSet::SWriteDescriptorSet> writes;
 		core::vector<IGPUDescriptorSet::SDescriptorInfo> infos;
+		core::vector<IGPUDescriptorSet::SDescriptorInfo> infos_index;
 		bool anyFailed = false;
 		auto allocateUTB = [&](const IGeometry<const IGPUBuffer>::SDataView& view)->decltype(SubAllocatedDescriptorSet::invalid_value)
 			{
@@ -218,6 +239,29 @@ namespace nbl::examples {
 				};
 				return index;
 			};
+		auto allocateIndexBuffer = [&](const IGeometry<const IGPUBuffer>::SDataView& view)->decltype(SubAllocatedDescriptorSet::invalid_value) {
+				if (!view) {
+					return MissingView;
+				}
+				auto index = SubAllocatedDescriptorSet::invalid_value;
+				if (m_params.subAllocDS->multi_allocate(1, 1, &index) != 0)
+				{
+					anyFailed = true;
+					return MissingView;
+				}
+				const auto infosOffset = infos_index.size();
+				//i dont think the desc was used? but regardless, a storage buffer cant be a view because views are for texel buffers
+				//still going to use bindless, just without views
+				//infos_index.emplace_back().desc = device->createBufferView(view.src, view.composed.format);
+				writes.emplace_back() = {
+					.dstSet = m_params.subAllocDS->getDescriptorSet(),
+					.binding = 1,
+					.arrayElement = index,
+					.count = 1,
+					.info = reinterpret_cast<const IGPUDescriptorSet::SDescriptorInfo*>(infosOffset)
+				};
+				return index;
+			};
 
 		auto resetGeoms = core::makeRAIIExiter(
 			[&]()->void {
@@ -238,17 +282,23 @@ namespace nbl::examples {
 			
 			auto& out = m_geoms.meshData[meshIndex];
 			meshIndex++;
-			out.vertCount = geom->getVertexReferenceCount();
+			out.primCount = geom->getPrimitiveCount();
+
 			out.positionView = allocateUTB(geom->getPositionView());
 			out.normalView = allocateUTB(geom->getNormalView());
 
+			out.objectType = 0;
 			if(geom->getIndexingCallback()->knownTopology() == E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_FAN){
-				out.objectType &= 2;
+				out.objectType |= 2;
 			}
 			const auto& view = geom->getIndexView();
 			if (view) {
-				view.getElementCount();
-				assert(out.vertCount == view.getElementCount() && "not currently setup to support index buffer");
+				out.indexView = allocateIndexBuffer(geom->getIndexView());
+				out.vertCount = view.getElementCount();
+			}
+			else {
+				out.indexView = MissingView;
+				out.vertCount = geom->getVertexReferenceCount();
 			}
 		}
 
@@ -260,8 +310,14 @@ namespace nbl::examples {
 			return false;
 
 		// unbase our pointers
-		for (auto& write : writes)
-			write.info = infos.data() + reinterpret_cast<const size_t&>(write.info);
+		std::size_t bindingZeroPoint = 0;
+		for (; bindingZeroPoint < infos.size(); bindingZeroPoint++) {
+			writes[bindingZeroPoint].info = infos.data() + reinterpret_cast<const size_t&>(writes[bindingZeroPoint].info);
+		}
+		for (std::size_t bindingOnePoint = 0; bindingOnePoint < infos_index.size(); bindingOnePoint++) {
+			writes[bindingOnePoint + bindingZeroPoint].info = infos_index.data() + reinterpret_cast<const size_t&>(writes[bindingOnePoint + bindingZeroPoint].info);
+		}
+
 		if (!device->updateDescriptorSets(writes, {}))
 			return false;
 
@@ -309,24 +365,18 @@ namespace nbl::examples {
 
 		const auto* layout = m_params.layout.get();
 		const auto ds = m_params.subAllocDS->getDescriptorSet();
-		cmdbuf->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS, layout, 0, 1, &ds);
-
-		for (const auto& instance : m_instances) {
-			cmdbuf->bindMeshPipeline(m_params.pipeline.get());
-			const auto pc = instance.computePushConstants(viewParams);
-			cmdbuf->pushConstants(layout, hlsl::ShaderStage::ESS_TASK | hlsl::ShaderStage::ESS_MESH | hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc);
-			//if (m_geoms->indexBuffer)
-			//{
-				//cmdbuf->bindIndexBuffer(geo->indexBuffer,geo->indexType);
-				//cmdbuf->drawIndexed(geo->elementCount,1,0,0,0);
-				//cmdbuf->bindDescriptorSets(geo->indexBuffer);
-			//}
-			//else {
-				//cmdbuf->bindDescriptorSets(geo->indexBuffer);
-				//cmdbuf->draw(geo->elementCount, 1, 0, 0);
-			//}
-			cmdbuf->drawMeshTasks(1, 1, 1);
-		}
+		std::array descriptors = { m_params.subAllocDS->getDescriptorSet(), m_params.meshDescriptor.get()};
+		cmdbuf->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS, layout, 0, descriptors.size(), descriptors.data());
+
+		cmdbuf->bindMeshPipeline(m_params.pipeline.get());
+		SInstance::SPushConstants pc{
+			.viewProj = viewParams.viewProj,
+		};
+		memcpy(pc.objectCount, viewParams.objectCounts.data(), viewParams.objectCounts.size() * sizeof(uint32_t));
+		cmdbuf->pushConstants(layout, hlsl::ShaderStage::ESS_TASK | hlsl::ShaderStage::ESS_MESH | hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc);
+
+		cmdbuf->drawMeshTasks(1, 1, 1);
+		
 		cmdbuf->endDebugMarker();
 	}
 }//namespace nbl::examples
\ No newline at end of file
diff --git a/MeshShader/src/SampleApp.cpp b/MeshShader/src/SampleApp.cpp
index db3aa56a0..f16285380 100644
--- a/MeshShader/src/SampleApp.cpp
+++ b/MeshShader/src/SampleApp.cpp
@@ -1,6 +1,6 @@
 #include "SampleApp.h"
 
-    bool UISampleApp::onAppInitialized(smart_refctd_ptr<ISystem>&& system) {
+    bool MeshSampleApp::onAppInitialized(smart_refctd_ptr<ISystem>&& system) {
         if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system)))
             return false;
         if (!device_base_t::onAppInitialized(smart_refctd_ptr(system)))
@@ -20,10 +20,6 @@
         }
         
         const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()};
-        //auto creator = core::make_smart_refctd_ptr<CGeometryCreator>();
-        //auto cube = creator->createCube({ 1.f,1.f,1.f });
-        //id like to combine all the vertices into 1 buffer but given how it's set up, thats out of scope
-        //cube->getPositionView();
 
 
         m_scene = CGeometryCreatorScene::create(
@@ -85,7 +81,6 @@
 
         const auto& geometries = m_scene->getInitParams().geometries;
         m_renderer = MeshDebugRenderer::create(m_assetMgr.get(), m_renderpass.get(), 0, { &geometries.front().get(),geometries.size() });
-        m_renderer->m_instances.resize(1); //should probably just get rid of the vector
 
         // Create ImGUI
         {
@@ -94,7 +89,6 @@
             params.resources.texturesInfo = {.setIx=0u,.bindingIx=TexturesImGUIBindingIndex};
             params.resources.samplersInfo = {.setIx=0u,.bindingIx=1u};
 
-
             params.utilities = m_utils;
             params.transfer = getTransferUpQueue();
             params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(),params.resources.texturesInfo,params.resources.samplersInfo,MaxImGUITextures);
@@ -123,13 +117,14 @@
                 else {
                     interface.subAllocDS = nullptr;
                 }
-                if (!interface.subAllocDS)
+                if (!interface.subAllocDS) {
                     return logFail("Failed to create the descriptor set");
+                }
                 // make sure Texture Atlas slot is taken for eternity
                 {
                     auto dummy = SubAllocatedDescriptorSet::invalid_value;
                     interface.subAllocDS->multi_allocate(0,1,&dummy);
-                    assert(dummy==ext::imgui::UI::FontAtlasTexId);
+                    assert(dummy==ext::imgui::UI::FontAtlasTexId); //?
                 }
                 // write constant descriptors, note we don't create info & write pair for the samplers because UI extension's are immutable and baked into DS layout
                 IGPUDescriptorSet::SDescriptorInfo info = {};
@@ -145,7 +140,67 @@
                 if (!m_device->updateDescriptorSets({&write,1},{}))
                     return logFail("Failed to write the descriptor set");
             }
+
+
+
+            nbl::video::IGPUBuffer::SCreationParams gpubuff_params = {};
+            gpubuff_params.size = sizeof(MeshletObjectData) * MeshDataBuffer::MaxObjectCount + sizeof(hlsl::float32_t4x4) * MeshDataBuffer::MaxInstanceCount;
+            // While the usages on `ICPUBuffers` are mere hints to our automated CPU-to-GPU conversion systems which need to be patched up anyway,
+            // the usages on an `IGPUBuffer` are crucial to specify correctly.
+            gpubuff_params.usage = IGPUBuffer::EUF_UNIFORM_BUFFER_BIT;
+            meshGPUBuffer = m_device->createBuffer(std::move(gpubuff_params));
+            meshGPUBuffer->setObjectDebugName("mesh data buffer");
+
+            nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = meshGPUBuffer->getMemoryReqs();
+            // you can simply constrain the memory requirements by AND-ing the type bits of the host visible memory types
+            reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits();
+            mesh_allocation = m_device->allocate(reqs, meshGPUBuffer.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE);
+            if (!mesh_allocation.isValid()) {
+                return logFail("failed to allocate device memory");
+            }
+            assert(meshGPUBuffer->getBoundMemory().memory == mesh_allocation.memory.get());
+
+
+
+// This is a cool utility you can use instead of counting up how much of each descriptor type you need to N_i allocate descriptor sets with layout L_i from a single pool
+            smart_refctd_ptr<nbl::video::IDescriptorPool> pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &m_renderer->mesh_layout.get(),1 });
+
+            //i dont really want to move the layout but it seems like im at the mercy of the compiler
+            auto layout_smart_ptr_copy = m_renderer->mesh_layout; 
+            m_renderer->getInitParams().meshDescriptor = pool->createDescriptorSet(layout_smart_ptr_copy);
+            m_renderer->getInitParams().meshDescriptor->setObjectDebugName("mesh descriptor");
+            {
+                IGPUDescriptorSet::SDescriptorInfo info[1];
+                info[0].desc = smart_refctd_ptr(meshGPUBuffer); // bad API, too late to change, should just take raw-pointers since not consumed
+                info[0].info.buffer = { .offset = 0,.size = gpubuff_params.size };
+                IGPUDescriptorSet::SWriteDescriptorSet writes[1] = {
+                    {
+                        .dstSet = m_renderer->getInitParams().meshDescriptor.get(),
+                        .binding = 0,
+                        .arrayElement = 0,
+                        .count = 1,
+                        .info = info
+                    }
+                };
+                m_device->updateDescriptorSets(writes, {});
+            }
+
+            interface.meshMemoryRange.memory = mesh_allocation.memory.get();
+            interface.meshMemoryRange.offset = 0;
+            interface.meshMemoryRange.length = mesh_allocation.memory->getAllocationSize();
+            interface.meshMemoryRange.range = { interface.meshMemoryRange.offset, interface.meshMemoryRange.length };
+            if (!mesh_allocation.memory->map(interface.meshMemoryRange.range, IDeviceMemoryAllocation::EMCAF_WRITE)) {
+                return logFail("failed to map device memory");
+            }
+            interface.mesh_mapped_memory = mesh_allocation.memory->getMappedPointer();
+            if (!interface.mesh_mapped_memory) {
+                return logFail("failed to map device memory");
+            }
+
+            memcpy(interface.mesh_mapped_memory, m_renderer->m_geoms.meshData, sizeof(MeshletObjectData) * MeshDataBuffer::MaxObjectCount);
+
             imgui->registerListener([this](){interface();});
+
         }
         
         interface.objectNames = {
@@ -158,9 +213,15 @@
             "Icosphere"
             //magicenum reflection?
         };
-        for (auto& objCount : interface.objectCount) {
-            objCount = 0;
-        }
+
+        const hlsl::matrix<float, 4, 4> fillVal{
+            1.f, 0.f, 0.f, 0.f,
+            0.f, 1.f, 0.f, 0.f,
+            0.f, 0.f, 1.f, 0.f,
+            0.f, 0.f, 0.f, 1.f
+        };
+        interface.transforms.fill(fillVal);
+
         //load up the ICPUGeometry, then convert it to GPU geometry
 
         interface.camera.mapKeysToArrows();
@@ -169,47 +230,16 @@
         return true;
     }
 
-    /*
-    smart_refctd_ptr<IGPUDescriptorSetLayout> UISampleApp::BuildMeshletDSLayout() const {
-        smart_refctd_ptr<IGPUDescriptorSetLayout> ret;
-        using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS;
-        const IGPUDescriptorSetLayout::SBinding bindings[] =
-        {
-            {
-                .binding = 0,
-                .type = IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER,
-                // need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable
-                .createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | binding_flags_t::ECF_PARTIALLY_BOUND_BIT,
-                .stageFlags = IShader::E_SHADER_STAGE::ESS_MESH,
-                .count = UINT16_MAX
-            },
-            {
-                .binding = 1,
-                .type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER,
-                .createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT,
-                .stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH,
-                .count = 1
-            }
-        };
-        ret = m_device->createDescriptorSetLayout(bindings);
-        if (!ret) {
-            m_logger->log("Could not create descriptor set layout!", ILogger::ELL_ERROR);
-            return nullptr;
-        }
-        return ret;
-    }
-    */
-
    
 
-    bool UISampleApp::onAppTerminated() {
+    bool MeshSampleApp::onAppTerminated() {
         SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId;
         IGPUDescriptorSet::SDropDescriptorSet dummy[1];
         interface.subAllocDS->multi_deallocate(dummy,TexturesImGUIBindingIndex,1,&fontAtlasDescIx);
         return device_base_t::onAppTerminated();
     }
 
-    IQueue::SSubmitInfo::SSemaphoreInfo UISampleApp::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) {
+    IQueue::SSubmitInfo::SSemaphoreInfo MeshSampleApp::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) {
         // CPU events
         update(nextPresentationTimestamp);
 
@@ -318,7 +348,11 @@
         return retval;
     }
 
-    const video::IGPURenderpass::SCreationParams::SSubpassDependency* UISampleApp::getDefaultSubpassDependencies() const {
+    void MeshSampleApp::UpdateDescriptor() {
+        m_renderer.get()->getInitParams().subAllocDS;
+    }
+
+    const video::IGPURenderpass::SCreationParams::SSubpassDependency* MeshSampleApp::getDefaultSubpassDependencies() const {
         // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present
         const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = {
             // don't want any writes to be available, we'll clear, only thing to worry about is the layout transition
@@ -353,24 +387,23 @@
     }
 
 
-    void UISampleApp::UpdateScene(nbl::video::IGPUCommandBuffer* cb) {
-        float32_t3x4 viewMatrix;
+    void MeshSampleApp::UpdateScene(nbl::video::IGPUCommandBuffer* cb) {
         float32_t4x4 viewProjMatrix;
         // TODO: get rid of legacy matrices //<-- camera.getViewMatrix returns matrix3x4SIMD
         {
             const auto& camera = interface.camera;
-            memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix));
             memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix));
         }
-        const auto viewParams = MeshDebugRenderer::SViewParams(viewMatrix, viewProjMatrix);
+        if (interface.transposeCameraViewProj) {
+            viewProjMatrix = hlsl::transpose(viewProjMatrix);
+        }
+        const auto viewParams = MeshDebugRenderer::SViewParams(viewProjMatrix, interface.objectCount);
 
         m_renderer->render(cb, viewParams);
-
-        //MeshPushConstant mPushConstant = { interface.camera.getConcatenatedMatrix(), cubeCount, coneCount, tubeCount };
     }
 
 
-    void UISampleApp::update(const std::chrono::microseconds nextPresentationTimestamp)
+    void MeshSampleApp::update(const std::chrono::microseconds nextPresentationTimestamp)
     {
         auto& camera = interface.camera;
         camera.setMoveSpeed(interface.moveSpeed);
@@ -445,11 +478,20 @@
             .keyboardEvents = uiEvents.keyboard
         };
 
-        interface.objectCount = {0, 0, 0, 0, 0};
         interface.imGUI->update(params);
+
+
+
+        auto* countMem = reinterpret_cast<MeshletObjectData*>(interface.mesh_mapped_memory);
+        //i only need to set the meslet object data once on initialization
+        //memcpy(countMem, interface.objectCount.data(), sizeof(MeshletObjectData) * MeshDataBuffer::MaxObjectCount);
+        countMem += MeshDataBuffer::MaxObjectCount;
+        auto* matrixMem = reinterpret_cast<hlsl::matrix<float, 4, 4>*>(countMem);
+        memcpy(matrixMem, interface.transforms.data(), interface.transforms.size() * sizeof(hlsl::matrix<float, 4, 4>));
+        m_device->flushMappedMemoryRanges(1, &interface.meshMemoryRange);
     }
 
-    void UISampleApp::recreateFramebuffer(const uint16_t2 resolution)
+    void MeshSampleApp::recreateFramebuffer(const uint16_t2 resolution)
     {
         auto createImageAndView = [&](E_FORMAT format)->smart_refctd_ptr<IGPUImageView>
         {
@@ -479,17 +521,21 @@
         {
             colorView = createImageAndView(finalSceneRenderFormat);
             auto depthView = createImageAndView(sceneRenderDepthFormat);
-            m_framebuffer = m_device->createFramebuffer({ {
-                .renderpass = m_renderpass,
-                .depthStencilAttachments = &depthView.get(),
-                .colorAttachments = &colorView.get(),
-                .width = resolution.x,
-                .height = resolution.y
-            }});
+            m_framebuffer = m_device->createFramebuffer(
+                { 
+                    {
+                        .renderpass = m_renderpass,
+                        .depthStencilAttachments = &depthView.get(),
+                        .colorAttachments = &colorView.get(),
+                        .width = resolution.x,
+                        .height = resolution.y
+                    }   
+                }
+            );
         }
-        else
+        else {
             m_framebuffer = nullptr;
-
+        }
         // release previous slot and its image
         interface.subAllocDS->multi_deallocate(0,1,&interface.renderColorViewDescIndex,{.semaphore=m_semaphore.get(),.value=m_realFrameIx});
         //
@@ -512,7 +558,7 @@
         interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndex;
     }
 
-    void UISampleApp::beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info)
+    void MeshSampleApp::beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info)
     {
         cb->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE);
         cb->setScissor(0,1,&info.renderArea);
@@ -525,143 +571,148 @@
         cb->setViewport(0u,1u,&viewport);
     }
 
-
-
-    void UISampleApp::CInterface::operator()() {
-        ImGuiIO& io = ImGui::GetIO();
-        io.ConfigDebugIsDebuggerPresent = true;
-
-        //camera
+    auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true)
         {
-            matrix4SIMD projection;
-            const float viewHeight = viewWidth * io.DisplaySize.x / io.DisplaySize.y;
-
-            if (isPerspective) {
-                if (isLH) {
-                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), viewHeight, zNear, zFar);
-                }
-                else {
-                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), viewHeight, zNear, zFar);
-                }
-            }
-            else
+            ImGui::Text(topText);
+            if (ImGui::BeginTable(tableName, columns))
             {
-                if (isLH) {
-                    projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, 1.f / viewHeight, zNear, zFar);
-                }
-                else {
-                    projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, 1.f / viewHeight, zNear, zFar);
+                for (int y = 0; y < rows; ++y)
+                {
+                    ImGui::TableNextRow();
+                    for (int x = 0; x < columns; ++x)
+                    {
+                        ImGui::TableSetColumnIndex(x);
+                        ImGui::Text("%.3f", *(pointer + (y * columns) + x));
+                    }
                 }
+                ImGui::EndTable();
             }
-            camera.setProjectionMatrix(projection);
-        } //end camera
-        
 
-        ImGuizmo::SetOrthographic(false);
-        ImGuizmo::BeginFrame();
-
-        ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing);
-        ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing);
-
-        // create a window and insert the inspector
-        ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing);
-        ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing);
-        if(ImGui::Begin("Editor")) {
+            if (withSeparator)
+                ImGui::Separator();
+        };
 
-            //object data
-            for(uint8_t i = 0; i < objectNames.size(); i++){
-                if (ImGui::TreeNode(objectNames[i].c_str())) {
-                    ImGui::Text("objectCount - %d", objectCount[i]);
-                
-                    ImGui::TreePop();
+    void MeshSampleApp::CInterface::DrawMeshControls() {
+        //this was for learning hlsl, given the shader takes like 2 minutes to compile, might as well just relaunch
+        //if (ImGui::Button("reload mesh shader")) {
+            //printf("test shader result - %d\n", CreateTestShaderFuncPtr());
+        //}
+
+        ImGui::DragInt("current transform editting", &currentTransform, 1, 0, MeshDataBuffer::MaxObjectCount * MeshDataBuffer::MaxInstanceCount);
+
+        for (uint8_t i = 0; i < objectNames.size(); i++) {
+            const std::string objNameWithCount = objectNames[i] + " {" + std::to_string(objectCount[i]) + '}';
+            if (ImGui::TreeNode(objNameWithCount.c_str())) {
+                const std::string objCountDraggerName = ("object count ##") + objectNames[i];
+
+                int imguiCopy = objectCount[i];
+                //ImGui::DragInt(objCountDraggerName.c_str(), &imguiCopy, 1, 0, localMax);
+                ImGui::SliderInt(objCountDraggerName.c_str(), &imguiCopy, 0, MeshDataBuffer::MaxInstanceCount);
+                objectCount[i] = imguiCopy;
+
+                for (uint64_t j = 0; j < objectCount[i]; j++) {
+                    const std::string treeName = std::string("transform[") + std::to_string(j) + "]##" + objectNames[i];
+                    if (ImGui::TreeNode(treeName.c_str())) {
+                        const std::size_t transformIndex = i * MeshDataBuffer::MaxInstanceCount + j;
+                        addMatrixTable("model", "", 4, 4, &transforms[transformIndex][0][0]);
+
+                        //imguizmo overwrites these changes
+                        //for (uint8_t x = 0; x < 4; x++) {
+                        //    const std::size_t rowIndex = transformIndex * 4 + x;
+                        //    const std::string rowName = std::string("##") + std::to_string(rowIndex);
+                        //    ImGui::DragFloat4(rowName.c_str(), &transforms[transformIndex][x][0], 0.1f, -100.f, 100.f);
+                        //}
+                        ImGui::TreePop();
+                    }
                 }
-            }
 
-            ImGui::Separator();
+                ImGui::TreePop();
+            }
+        }
+    }
 
-            //controls
-            {
-                //if (ImGui::Button("reload mesh shader")) {
-                    //printf("test shader result - %d\n", CreateTestShaderFuncPtr());
-                //}
+    void MeshSampleApp::CInterface::DrawCameraControls() {
+        ImGuiIO& io = ImGui::GetIO();
 
-                if (ImGui::RadioButton("Full view", !transformParams.useWindow))
-                    transformParams.useWindow = false;
+        ImGui::Text("Camera");
+        bool viewDirty = false;
 
-                ImGui::SameLine();
+        ImGui::Checkbox("transpose view proj (holy space intermixing)", &transposeCameraViewProj);
 
-                if (ImGui::RadioButton("Window", transformParams.useWindow))
-                    transformParams.useWindow = true;
+        if (ImGui::RadioButton("LH", isLH))
+            isLH = true;
 
-                ImGui::Text("Camera");
-                bool viewDirty = false;
+        ImGui::SameLine();
 
-                if (ImGui::RadioButton("LH", isLH))
-                    isLH = true;
+        if (ImGui::RadioButton("RH", !isLH))
+            isLH = false;
 
-                ImGui::SameLine();
+        if (ImGui::RadioButton("Perspective", isPerspective))
+            isPerspective = true;
 
-                if (ImGui::RadioButton("RH", !isLH))
-                    isLH = false;
+        ImGui::SameLine();
 
-                if (ImGui::RadioButton("Perspective", isPerspective))
-                    isPerspective = true;
+        if (ImGui::RadioButton("Orthographic", !isPerspective))
+            isPerspective = false;
 
-                ImGui::SameLine();
+        ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate);
+        ImGui::Checkbox("Enable camera movement", &move);
+        ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f);
+        ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f);
 
-                if (ImGui::RadioButton("Orthographic", !isPerspective))
-                    isPerspective = false;
+        // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case
 
-                ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate);
-                ImGui::Checkbox("Enable camera movement", &move);
-                ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f);
-                ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f);
+        if (isPerspective)
+            ImGui::SliderFloat("Fov", &fov, 20.f, 150.f);
+        else
+            ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20);
 
-                // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case
+        ImGui::SliderFloat("zNear", &zNear, 0.1f, zFar);
+        ImGui::SliderFloat("zFar", &zFar, zNear, 10000.f);
 
-                if (isPerspective)
-                    ImGui::SliderFloat("Fov", &fov, 20.f, 150.f);
-                else
-                    ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20);
+        viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f);
 
-                ImGui::SliderFloat("zNear", &zNear, 0.1f, zFar);
-                ImGui::SliderFloat("zFar", &zFar, zNear, 10000.f);
+        if (viewDirty || firstFrame)
+        {
+            core::vectorSIMDf cameraPosition(cosf(camYAngle) * cosf(camXAngle) * transformParams.camDistance, sinf(camXAngle) * transformParams.camDistance, sinf(camYAngle) * cosf(camXAngle) * transformParams.camDistance);
+            core::vectorSIMDf cameraTarget(0.f, 0.f, 0.f);
+            const static core::vectorSIMDf up(0.f, 1.f, 0.f);
 
-                viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f);
+            camera.setPosition(cameraPosition);
+            camera.setTarget(cameraTarget);
+            camera.setBackupUpVector(up);
 
-                if (viewDirty || firstFrame)
-                {
-                    core::vectorSIMDf cameraPosition(cosf(camYAngle) * cosf(camXAngle) * transformParams.camDistance, sinf(camXAngle) * transformParams.camDistance, sinf(camYAngle) * cosf(camXAngle) * transformParams.camDistance);
-                    core::vectorSIMDf cameraTarget(0.f, 0.f, 0.f);
-                    const static core::vectorSIMDf up(0.f, 1.f, 0.f);
+            camera.recomputeViewMatrix();
+        }
+        firstFrame = false;
 
-                    camera.setPosition(cameraPosition);
-                    camera.setTarget(cameraTarget);
-                    camera.setBackupUpVector(up);
+        ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y);
+        if (ImGuizmo::IsUsing())
+        {
+            ImGui::Text("Using gizmo");
+        }
+        else {
+            ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : "");
+            ImGui::SameLine();
+            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : "");
+            ImGui::SameLine();
+            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : "");
+            ImGui::SameLine();
+            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : "");
+        }
 
-                    camera.recomputeViewMatrix();
-                }
-                firstFrame = false;
+        const auto& view = camera.getViewMatrix(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok)
 
-                ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y);
-                if (ImGuizmo::IsUsing())
-                {
-                    ImGui::Text("Using gizmo");
-                }
-                else {
-                    ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : "");
-                    ImGui::SameLine();
-                    ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : "");
-                    ImGui::SameLine();
-                    ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : "");
-                    ImGui::SameLine();
-                    ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : "");
-                }
-            }//end controls
-            
-            ImGui::Separator();
+        auto const& projection = camera.getProjectionMatrix();
+        if (ImGui::TreeNode("camera matrices")) {
+            addMatrixTable("View", "ViewMatrixTable", 3, 4, view.pointer());
+            addMatrixTable("Projection", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false);
+            ImGui::TreePop();
+        }
+    }
 
-            /*
+    void MeshSampleApp::CInterface::UpdateImguizmo() {
+        /*
             * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout
             * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection
 
@@ -671,77 +722,124 @@
             */
 
     // TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases
-            static struct
-            {
-                core::matrix4SIMD view, projection, model;
-            } imguizmoM16InOut;
+        static struct
+        {
+            core::matrix4SIMD view, projection, model;
+        } imguizmoM16InOut;
 
-            ImGuizmo::SetID(0u);
+        ImGuizmo::SetID(0u);
 
-            imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix()));
-            imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix());
+        imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix()));
+        imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix());
 
-            if (currentTransform < 0) {
-                currentTransform = 0;
-            }
+        if (currentTransform < 0) {
+            currentTransform = 0;
+        }
 
-            if (currentTransform >= 0 && currentTransform < transforms.size()) {
-                imguizmoM16InOut.model = core::transpose(matrix4SIMD(transforms[currentTransform]));
-            }
-            {
-                transformParams.editTransformDecomposition = true;
-                static TransformWidget transformWidget{};
-                const auto tempForConversion = transformWidget.Update(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
-                sceneResolution = { tempForConversion.x, tempForConversion.y };
-            
+        if (currentTransform >= 0 && currentTransform < transforms.size()) {
+            //auto transposedTemp = core::transpose(transforms[currentTransform]);
+            //the model is a double matrix, so a memcpy or reinterpret wont work
+            //i might have the x and y backwards it doesnt matter as long as its x:x and y:y
+            //skipping the transform from example 61
+            for (uint8_t x = 0; x < 4; x++) {
+                for (uint8_t y = 0; y < 4; y++) {
+                    imguizmoM16InOut.model[x][y] = transforms[currentTransform][x][y];
+                }
             }
+        }
+        {
+            transformParams.editTransformDecomposition = true;
+            static TransformWidget transformWidget{};
+            const auto tempForConversion = transformWidget.Update(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
+            sceneResolution = { tempForConversion.x, tempForConversion.y };
+
+        }
 
-            if (currentTransform >= 0 && currentTransform < transforms.size()) {
-                transforms[currentTransform] = core::transpose(imguizmoM16InOut.model).extractSub3x4();
+        if (currentTransform >= 0 && currentTransform < transforms.size()) {
+            for (uint8_t x = 0; x < 4; x++) {
+                for (uint8_t y = 0; y < 4; y++) {
+                    //tranposed
+                    transforms[currentTransform][x][y] = imguizmoM16InOut.model[x][y];
+                }
             }
-            // to Nabla + update camera & model matrices
-    // TODO: make it more nicely, extract:
-    // - Position by computing inverse of the view matrix and grabbing its translation
-    // - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position
-    // But then set the view matrix this way anyway, because up-vector may not be compatible
-            const auto& view = camera.getViewMatrix();
-            const_cast<core::matrix3x4SIMD&>(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok)
-            // update concatanated matrix
-            const auto& projection = camera.getProjectionMatrix();
-            camera.setProjectionMatrix(projection);
-            
-            // view matrices editor
-            if(ImGui::Begin("Matrices")){
+        }
+        const auto& view = camera.getViewMatrix();
+        const_cast<core::matrix3x4SIMD&>(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok)
+        const auto& projection = camera.getProjectionMatrix();
+        camera.setProjectionMatrix(projection); //this recalcs viewproj
 
-                auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true)
-                    {
-                        ImGui::Text(topText);
-                        if (ImGui::BeginTable(tableName, columns))
-                        {
-                            for (int y = 0; y < rows; ++y)
-                            {
-                                ImGui::TableNextRow();
-                                for (int x = 0; x < columns; ++x)
-                                {
-                                    ImGui::TableSetColumnIndex(x);
-                                    ImGui::Text("%.3f", *(pointer + (y * columns) + x));
-                                }
-                            }
-                            ImGui::EndTable();
-                        }
+    }
 
-                        if (withSeparator)
-                            ImGui::Separator();
-                    };
+    void MeshSampleApp::CInterface::operator()() {
+        ImGuiIO& io = ImGui::GetIO();
+        //io.ConfigDebugIsDebuggerPresent = true;
 
-                if (currentTransform >= 0 && currentTransform < transforms.size()) {
-                    addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, transforms[currentTransform].pointer());
+        //camera
+        matrix4SIMD projection;
+        {
+            const float viewHeight = viewWidth * io.DisplaySize.x / io.DisplaySize.y;
+
+            if (isPerspective) {
+                if (isLH) {
+                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), viewHeight, zNear, zFar);
                 }
-                addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer());
-                addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false);
+                else {
+                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), viewHeight, zNear, zFar);
+                }
+            }
+            else
+            {
+                if (isLH) {
+                    projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, 1.f / viewHeight, zNear, zFar);
+                }
+                else {
+                    projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, 1.f / viewHeight, zNear, zFar);
+                }
+            }
+            camera.setProjectionMatrix(projection);
+        } //end camera
+        
+        ImGuizmo::SetOrthographic(false);
+        ImGuizmo::BeginFrame();
+        
+
+        ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing);
+        ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing);
 
-            } //end view matrix editor
+        // create a window and insert the inspector
+        ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing);
+        ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing);
+
+        if (meshControlSeparated) {
+            if (ImGui::Begin("mesh controls", &meshControlSeparated)) {
+                meshControlSeparated = !ImGui::Button("Rejoin mesh control");
+                DrawMeshControls();
+            }
             ImGui::End();
+        }
+        if (cameraControlSeparated) {
+            if (ImGui::Begin("camera controls", &cameraControlSeparated)) {
+                cameraControlSeparated = !ImGui::Button("Rejoin camera control");
+                DrawCameraControls();
+            }
+            ImGui::End();
+        }
+        if(ImGui::Begin("Editor")) {
+
+            if (!meshControlSeparated) {
+                meshControlSeparated = ImGui::Button("Separate mesh control");
+                DrawMeshControls();
+                ImGui::Separator();
+            }
+
+            if (!cameraControlSeparated) {
+                cameraControlSeparated = ImGui::Button("Separate camera controls");
+                DrawCameraControls();
+                ImGui::Separator();
+            }
+
+            ImGui::Checkbox("update guizmo", &guizmoEnabled);
+            UpdateImguizmo();
         } //end editor window
         ImGui::End();
     }

From eca5ff5f739053b40f0aa5713f6c8813df55b2e0 Mon Sep 17 00:00:00 2001
From: Corey <corey.w108@gmail.com>
Date: Wed, 24 Dec 2025 07:19:01 -0600
Subject: [PATCH 8/8] stable

---
 MeshShader/app_resources/geom.mesh.hlsl       |  91 ++---
 .../app_resources/task_mesh_common.hlsl       |  15 +-
 MeshShader/include/MeshRenderer.hpp           |  48 +--
 MeshShader/include/SampleApp.h                |  18 +-
 MeshShader/include/transform.hpp              | 202 ++++++-----
 MeshShader/src/MeshRenderer.cpp               | 243 ++-----------
 MeshShader/src/SampleApp.cpp                  | 322 ++++++++----------
 7 files changed, 312 insertions(+), 627 deletions(-)

diff --git a/MeshShader/app_resources/geom.mesh.hlsl b/MeshShader/app_resources/geom.mesh.hlsl
index 7b2515c06..ac75af0d3 100644
--- a/MeshShader/app_resources/geom.mesh.hlsl
+++ b/MeshShader/app_resources/geom.mesh.hlsl
@@ -2,13 +2,8 @@
 
 #include "task_mesh_common.hlsl"
 
-
-//utb is short for "uniform texel buffer", could also be considered a storage buffer with vec4s
-//the gpu probably does something different with the data between a utb and a storage buffer but idk
-[[vk::binding(0)]] Buffer<float32_t4> utbs[PushDescCount];
-
-//binding 1, set 0, the mesh data is in binding 0
-[[vk::binding(1, 0)]] Buffer<uint> indices[];
+//            (binding, set)
+[[vk::binding(0, 0)]] StructuredBuffer<float3> MeshVertexBuffer;
 
 struct VertexOut {
     float32_t4 ndc : SV_Position;
@@ -22,77 +17,35 @@ struct VertexOut {
 void main(
     in uint3 id : SV_DispatchThreadID,
     in uint3 groupThreadID : SV_GroupThreadID,
-    in payload TaskToMeshPayload taskToMeshPayload,
     out vertices VertexOut verts[WORKGROUP_SIZE],
     out indices uint3 prims[WORKGROUP_SIZE]
 )
-{   
-    MeshData meshDataCopy = meshData[taskToMeshPayload.objectType[groupThreadID.x]];
-
-    //if the ratio isnt 1 object to 1 transform, the payload can be used to pass in a transform index
-    //or if it isnt 1 task shader launching every mesh shader, the payload will need to handle
-    const float32_t4x4 worldViewProj = pc.viewProj * transform[groupThreadID.x];
+{
 
-    if(id.x < meshDataCopy.vertCount){
-        const float32_t3 position = utbs[meshDataCopy.positionView][id.x].xyz;
+    // i havent benchmarked this personally, but my understandign is that AMD devices prefer mesh shaders to be "by primitive"
+    // and that nvidia devices prefer mesh shaders to be "by vertex".
+    // ideally, i'd benchmark both and setup branches so that each device can specialize the shader basedo n what it likes 
+    //(theres a property in VkMeshProperties that would indicate this)
+    if (id.x < pc.vertCount) {
+        const float32_t3 position = MeshVertexBuffer[id.x];
 
         // verts[id.x].ndc = mul(float32_t4(position, 1.0), worldViewProj);
-        verts[id.x].ndc = mul(worldViewProj, float32_t4(position, 1.0));
+        verts[id.x].ndc = mul(pc.mvp, float32_t4(position, 1.0));
 
-
-        if (meshDataCopy.normalView < PushDescCount) { // && meshDataCopy.objType != CONE_OBJECT - just going to set cone_object normalView to pushdesccount
-            verts[id.x].meta = utbs[meshDataCopy.normalView][id.x].xyz;
-        }
-        else {
-        //i could reconstruct the normal right here in the mesh shader for the cone
-            //verts[id.x].meta = mul(inverse(transpose(pc.matrices.normal)),position);
-            verts[id.x].meta = float32_t3(0.0, 0.0, 0.0); //id like to check if cones even have a normal first
-        }
+        verts[id.x].meta = position;
     }
 
-    //uint outputVertexCount = meshDataCopy.vertCount; //not necessary right now
-    uint outputPrimitiveCount = meshDataCopy.primCount;
-    //we're assuming primCount == vertCount, but most of the time a 
-    //index buffer will exist, so i'll leave the branch in the EXAMPLE
-    if(id.x < meshDataCopy.primCount){ 
-        //a fan has 0 at the center, then around a circle it'll have 1 at 12o'clock, (relatively speaking)
-        //numbers increment as you go clockwise (again, relatively speaking)
-        //so if id.x + 2 is greater than prim count, it wraps back around to 1
-        if(meshDataCopy.objType == T_FAN_OBJECT_TYPE){
-            uint3 prim = uint3(0, id.x + 1, id.x + 2);
-            if(prim.y >= meshDataCopy.vertCount){
-                //not adding
-            }
-            else if (prim.z >= meshDataCopy.vertCount) {
-                prim.z = 1;
-                prims[id.x] = prim;
-                printf("adding prim - {%u:%u:%u}", prims[id.x].x, prims[id.x].y, prims[id.x].z);
-            }
-            else {
-                prims[id.x] = prim;
-                printf("adding prim - {%u:%u:%u}", prims[id.x].x, prims[id.x].y, prims[id.x].z);
-            }
-        }
-        /* probably incorrect
-        else if(triangle strip){
-            uint3 prim = uint3(id.x, id.x + 1, id.x + 2);
-            bool lessThan = (prim.x < meshDataCopy.vertCount) && (prim.y < meshDataCopy.vertCount) && (prim.z < meshDataCopy.vertCount);
-            if (lessThan) {
-                prims[id.x] = prim;
-                printf("adding prim [triangle strip type]- {%u:%u:%u} : {%u:%u:%u}", prims[id.x].x, prims[id.x].y, prims[id.x].z, prim.x, prim.y, prim.z);
-            }
-        }
-        */
-        else { // triangle list.
-            outputPrimitiveCount = meshDataCopy.vertCount / 3;
-            if (id.x < (meshDataCopy.primCount / 3)) { //probably incorrect for a indexed triangle list idk
-                prims[id.x].x = id.x * 3;
-                prims[id.x].y = id.x * 3 + 1;
-                prims[id.x].z = id.x * 3 + 2;
-                printf("adding prim [triangle strip type]- {%u:%u:%u}", prims[id.x].x, prims[id.x].y, prims[id.x].z);
-            }
-        }
+    // im just assuming its a triangle list right now. wont work if its not
+    if (id.x < pc.vertCount / 3) {
+
+        prims[id.x] = uint3(
+                        id.x * 3, 
+                        id.x * 3 + 1, 
+                        id.x * 3 + 2
+                    );
     }
 
-    SetMeshOutputCounts(meshDataCopy.vertCount, outputPrimitiveCount);
+    
+
+    SetMeshOutputCounts(pc.vertCount, pc.vertCount / 3);
 }
\ No newline at end of file
diff --git a/MeshShader/app_resources/task_mesh_common.hlsl b/MeshShader/app_resources/task_mesh_common.hlsl
index 476792ab6..8ca1cfe23 100644
--- a/MeshShader/app_resources/task_mesh_common.hlsl
+++ b/MeshShader/app_resources/task_mesh_common.hlsl
@@ -1,9 +1,9 @@
 
 //this is user defined data sent from the task shader to the mesh shader
 //1 packet is sent, but it can use arrays so that each workgroup can receive customized data
-struct TaskToMeshPayload {
-    uint objectType[INSTANCE_COUNT * OBJECT_COUNT];
-};
+//struct TaskToMeshPayload {
+//    uint objectType[INSTANCE_COUNT * OBJECT_COUNT];
+//};
 
 //1 is cone, 2 is for fan, anything else for trangle list without the special normal calc.
 //cone can be handled in the task shader or the mesh shader, I'm going to handle it in the task shader
@@ -19,16 +19,11 @@ struct MeshData{
     uint indexView;
 };
 
-[[vk::binding(0, 1)]] cbuffer MeshDataBuffer {
-    
-    MeshData meshData[OBJECT_COUNT];
-    float4x4 transform[INSTANCE_COUNT]; //this is goign to be based on device limits
-};
 
 #define PushDescCount (0x1<<16)-1
 struct SPushConstants {
-	float4x4 viewProj;
-    uint objectCount[OBJECT_COUNT];
+	float4x4 mvp;
+    uint vertCount;
 };
 
 //im not keen on trying to figure out how the push constant abstraction worked before without documentation
diff --git a/MeshShader/include/MeshRenderer.hpp b/MeshShader/include/MeshRenderer.hpp
index 5cbcb3863..2bb559c5f 100644
--- a/MeshShader/include/MeshRenderer.hpp
+++ b/MeshShader/include/MeshRenderer.hpp
@@ -20,7 +20,6 @@ namespace nbl::examples
 		//this is buffer data
 	struct MeshletObjectData {
 		uint32_t vertCount;
-		uint32_t primCount;
 		uint32_t objectType;
 		uint32_t positionView;
 		uint32_t normalView;
@@ -52,15 +51,6 @@ class MeshDebugRenderer final : public core::IReferenceCounted {
 	//
 	constexpr static inline uint16_t VertexAttrubUTBDescBinding = 0;
 
-	//
-	struct SViewParams
-	{
-		SViewParams(const hlsl::float32_t4x4& _viewProj, std::array<uint32_t, MeshDataBuffer::MaxObjectCount> const& objectCounts);
-
-		hlsl::float32_t4x4 viewProj;
-		std::array<uint32_t, MeshDataBuffer::MaxObjectCount> objectCounts;
-		//hlsl::float32_t3x3 normal;
-	};
 	constexpr static inline auto MissingView = hlsl::examples::geometry_creator_scene::SPushConstants::DescriptorCount;
 
 	//
@@ -70,40 +60,22 @@ class MeshDebugRenderer final : public core::IReferenceCounted {
 		{
 			NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1 << 16) - 1;
 
-			hlsl::float32_t4x4 viewProj;
-			uint32_t objectCount[MeshDataBuffer::MaxObjectCount];
+			nbl::hlsl::float32_t4x4 viewProj;
+			uint32_t vertCount;
 		};
-		inline SPushConstants computePushConstants(const SViewParams& viewParams) const	{
-			SPushConstants ret{
-				.viewProj = viewParams.viewProj
-			};
-			memcpy(ret.objectCount, viewParams.objectCounts.data(), viewParams.objectCounts.size() * sizeof(uint32_t));
-			return ret;
-		}
 
 		hlsl::float32_t3x4 world;
 	};
 
-	static std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 3> CreateTestShader(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX);
+	static std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 2> CreateTestShader(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX);
 
 	//
 	static core::smart_refctd_ptr<MeshDebugRenderer> create(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX);
-
-	//
-	static inline core::smart_refctd_ptr<MeshDebugRenderer> create(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX, const std::span<const video::IGPUPolygonGeometry* const> geometries)
-	{
-		auto retval = create(assMan,renderpass,subpassIX);
-		if (retval)
-			retval->addGeometries(geometries);
-		return retval;
-	}
-
 	//
 	struct SInitParams {
 
 		core::smart_refctd_ptr<video::IGPUDescriptorSet> meshDescriptor;
-		core::smart_refctd_ptr<video::SubAllocatedDescriptorSet> subAllocDS;//vertex and normal views
-		core::smart_refctd_ptr<video::IGPUPipelineLayout> layout;
+		core::smart_refctd_ptr<video::IGPUPipelineLayout> pipe_layout; //when im looking at it from outside the class i need to know what kind of layout this is
 		core::smart_refctd_ptr<video::IGPUMeshPipeline> pipeline;
 	};
 	inline SInitParams& getInitParams() {return m_params;}
@@ -112,13 +84,13 @@ class MeshDebugRenderer final : public core::IReferenceCounted {
 	//device should be const* but im not going to fix it right now 
 	//(scope creep)
 		
-	bool addGeometries(const std::span<const video::IGPUPolygonGeometry* const> geometries);
+	bool addGeometries();
 
 	void removeGeometry(const uint32_t ix, const video::ISemaphore::SWaitInfo& info);
 
 	inline const auto& getGeometries() const {return m_geoms;}
 
-	void render(video::IGPUCommandBuffer* cmdbuf, const SViewParams& viewParams) const;
+	void render(video::IGPUCommandBuffer* cmdbuf, nbl::hlsl::float32_t4x4 const& mvp) const;
 
 	SInstance m_instance;
 
@@ -133,17 +105,11 @@ class MeshDebugRenderer final : public core::IReferenceCounted {
 	inline MeshDebugRenderer(SInitParams&& _params) : m_params(std::move(_params)) {}
 	inline ~MeshDebugRenderer()	{
 		// clean shutdown, can also make SubAllocatedDescriptorSet resillient against that, and issue `device->waitIdle` if not everything is freed
-		const_cast<video::ILogicalDevice*>(m_params.layout->getOriginDevice())->waitIdle();
+		const_cast<video::ILogicalDevice*>(m_params.pipe_layout->getOriginDevice())->waitIdle();
 		clearGeometries({});
 	}
 	void clearGeometries(const video::ISemaphore::SWaitInfo& info);
 
-	inline void immediateDealloc(video::SubAllocatedDescriptorSet::value_type index)
-	{
-		video::IGPUDescriptorSet::SDropDescriptorSet dummy[1];
-		m_params.subAllocDS->multi_deallocate(dummy,VertexAttrubUTBDescBinding,1,&index);
-	}
-
 	SInitParams m_params;
 #undef EXPOSE_NABLA_NAMESPACES
 };
diff --git a/MeshShader/include/SampleApp.h b/MeshShader/include/SampleApp.h
index b56932f1c..3821528d4 100644
--- a/MeshShader/include/SampleApp.h
+++ b/MeshShader/include/SampleApp.h
@@ -31,9 +31,6 @@ class MeshSampleApp final : public MonoWindowApplication, public BuiltinResource
 	protected:
 		const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override;
 	private:
-
-		void UpdateDescriptor();
-		
 		void UpdateScene(nbl::video::IGPUCommandBuffer* cb);
 		void update(const std::chrono::microseconds nextPresentationTimestamp);
 		void recreateFramebuffer(const uint16_t2 resolution);
@@ -47,8 +44,6 @@ class MeshSampleApp final : public MonoWindowApplication, public BuiltinResource
 		// we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes
 		constexpr static inline auto MaxImGUITextures = 2u+MaxFramesInFlight;
 
-		//
-		smart_refctd_ptr<CGeometryCreatorScene> m_scene;
 		smart_refctd_ptr<IGPURenderpass> m_renderpass;
 		smart_refctd_ptr<IGPUFramebuffer> m_framebuffer;
 
@@ -73,8 +68,6 @@ class MeshSampleApp final : public MonoWindowApplication, public BuiltinResource
 		//i really hate interface beign it's own object
 		struct CInterface
 		{
-			bool meshControlSeparated = false;
-			void DrawMeshControls();
 			bool cameraControlSeparated = false;
 			void DrawCameraControls();
 
@@ -83,19 +76,14 @@ class MeshSampleApp final : public MonoWindowApplication, public BuiltinResource
 
 			void operator()();
 			
-			bool transposeCameraViewProj = false;
 			smart_refctd_ptr<ext::imgui::UI> imGUI;
 			// descriptor set
 			smart_refctd_ptr<SubAllocatedDescriptorSet> subAllocDS;
 			SubAllocatedDescriptorSet::value_type renderColorViewDescIndex = SubAllocatedDescriptorSet::invalid_value;
-			//
-			Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD());
-			// mutables
-			int32_t currentTransform = -1;
-			std::array<hlsl::matrix<float, 4, 4>, MeshDataBuffer::MaxInstanceCount * MeshDataBuffer::MaxObjectCount> transforms;
 
-			std::array<std::string, MeshDataBuffer::MaxObjectCount> objectNames;
-			std::array<uint32_t, MeshDataBuffer::MaxObjectCount> objectCount = { 1, 0, 0, 0,  0, 0, 0 };
+			core::matrix3x4SIMD model;
+
+			Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD());
 
 			TransformRequestParams transformParams;
 			uint16_t2 sceneResolution = {1280,720};
diff --git a/MeshShader/include/transform.hpp b/MeshShader/include/transform.hpp
index dd3b37bd9..201966e3b 100644
--- a/MeshShader/include/transform.hpp
+++ b/MeshShader/include/transform.hpp
@@ -1,6 +1,11 @@
+#pragma once
+
 #ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_
 #define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_
 
+
+#include "nbl/ui/ICursorControl.h"
+
 #include "nbl/ext/ImGui/ImGui.h"
 
 #include "imgui/imgui_internal.h"
@@ -11,65 +16,60 @@ struct TransformRequestParams
 {
 	float camDistance = 8.f;
 	uint8_t sceneTexDescIx = ~0;
-	bool useWindow = true;
-	bool editTransformDecomposition = false;
-	bool enableViewManipulate = false;
+	bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false;
 };
 
-struct TransformWidget {
-	ImGuizmo::OPERATION mCurrentGizmoOperation{ ImGuizmo::TRANSLATE };
-	ImGuizmo::MODE mCurrentGizmoMode{ImGuizmo::LOCAL};
-	bool useSnap = false;
-	float snap[3] = { 1.f, 1.f, 1.f };
-	float bounds[6] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f };
-	float boundsSnap[3] = { 0.1f, 0.1f, 0.1f };
-	bool boundSizing = false;
-	bool boundSizingSnap = false;
-
-
-	void EditTransform(float* matrix, const TransformRequestParams& params) {
-
-
-		if (params.editTransformDecomposition)
+inline nbl::hlsl::uint16_t2 EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params)
+{
+	static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE);
+	static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL);
+	static bool useSnap = false;
+	static float snap[3] = { 1.f, 1.f, 1.f };
+	static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f };
+	static float boundsSnap[] = { 0.1f, 0.1f, 0.1f };
+	static bool boundSizing = false;
+	static bool boundSizingSnap = false;
+
+	if (params.editTransformDecomposition)
+	{
+		if (ImGui::IsKeyPressed(ImGuiKey_T))
+			mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
+		if (ImGui::IsKeyPressed(ImGuiKey_R))
+			mCurrentGizmoOperation = ImGuizmo::ROTATE;
+		if (ImGui::IsKeyPressed(ImGuiKey_S))
+			mCurrentGizmoOperation = ImGuizmo::SCALE;
+		if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE))
+			mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
+		ImGui::SameLine();
+		if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE))
+			mCurrentGizmoOperation = ImGuizmo::ROTATE;
+		ImGui::SameLine();
+		if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE))
+			mCurrentGizmoOperation = ImGuizmo::SCALE;
+		if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL))
+			mCurrentGizmoOperation = ImGuizmo::UNIVERSAL;
+		float matrixTranslation[3], matrixRotation[3], matrixScale[3];
+		ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale);
+		ImGui::InputFloat3("Tr", matrixTranslation);
+		ImGui::InputFloat3("Rt", matrixRotation);
+		ImGui::InputFloat3("Sc", matrixScale);
+		ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix);
+
+		if (mCurrentGizmoOperation != ImGuizmo::SCALE)
 		{
-			if (ImGui::IsKeyPressed(ImGuiKey_T))
-				mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
-			if (ImGui::IsKeyPressed(ImGuiKey_R))
-				mCurrentGizmoOperation = ImGuizmo::ROTATE;
-			if (ImGui::IsKeyPressed(ImGuiKey_S))
-				mCurrentGizmoOperation = ImGuizmo::SCALE;
-			if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE))
-				mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
-			ImGui::SameLine();
-			if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE))
-				mCurrentGizmoOperation = ImGuizmo::ROTATE;
-			ImGui::SameLine();
-			if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE))
-				mCurrentGizmoOperation = ImGuizmo::SCALE;
-			if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL))
-				mCurrentGizmoOperation = ImGuizmo::UNIVERSAL;
-			float matrixTranslation[3], matrixRotation[3], matrixScale[3];
-			ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale);
-			ImGui::InputFloat3("Tr", matrixTranslation);
-			ImGui::InputFloat3("Rt", matrixRotation);
-			ImGui::InputFloat3("Sc", matrixScale);
-			ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix);
-
-			if (mCurrentGizmoOperation != ImGuizmo::SCALE)
-			{
-				if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL))
-					mCurrentGizmoMode = ImGuizmo::LOCAL;
-				ImGui::SameLine();
-				if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD))
-					mCurrentGizmoMode = ImGuizmo::WORLD;
-			}
-			if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift))
-				useSnap = !useSnap;
-			ImGui::Checkbox("##UseSnap", &useSnap);
+			if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL))
+				mCurrentGizmoMode = ImGuizmo::LOCAL;
 			ImGui::SameLine();
+			if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD))
+				mCurrentGizmoMode = ImGuizmo::WORLD;
+		}
+		if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift))
+			useSnap = !useSnap;
+		ImGui::Checkbox("##UseSnap", &useSnap);
+		ImGui::SameLine();
 
-			switch (mCurrentGizmoOperation)
-			{
+		switch (mCurrentGizmoOperation)
+		{
 			case ImGuizmo::TRANSLATE:
 				ImGui::InputFloat3("Snap", &snap[0]);
 				break;
@@ -79,72 +79,86 @@ struct TransformWidget {
 			case ImGuizmo::SCALE:
 				ImGui::InputFloat("Scale Snap", &snap[0]);
 				break;
-			}
-			ImGui::Checkbox("Bound Sizing", &boundSizing);
-			if (boundSizing)
-			{
-				ImGui::PushID(3);
-				ImGui::Checkbox("##BoundSizing", &boundSizingSnap);
-				ImGui::SameLine();
-				ImGui::InputFloat3("Snap", boundsSnap);
-				ImGui::PopID();
-			}
 		}
-	
+		ImGui::Checkbox("Bound Sizing", &boundSizing);
+		if (boundSizing)
+		{
+			ImGui::PushID(3);
+			ImGui::Checkbox("##BoundSizing", &boundSizingSnap);
+			ImGui::SameLine();
+			ImGui::InputFloat3("Snap", boundsSnap);
+			ImGui::PopID();
+		}
 	}
 
-
-	ImVec2 ViewingGizmo(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
-		ImGuiIO& io = ImGui::GetIO();
-		float viewManipulateRight = io.DisplaySize.x;
-		float viewManipulateTop = 0;
-		static ImGuiWindowFlags gizmoWindowFlags = 0;
-		SImResourceInfo info;
-		info.textureID = params.sceneTexDescIx;
-		info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER;
-
-
+	ImGuiIO& io = ImGui::GetIO();
+	float viewManipulateRight = io.DisplaySize.x;
+	float viewManipulateTop = 0;
+	static ImGuiWindowFlags gizmoWindowFlags = 0;
+
+	/*
+		for the "useWindow" case we just render to a gui area,
+		otherwise to fake full screen transparent window
+
+		note that for both cases we make sure gizmo being
+		rendered is aligned to our texture scene using
+		imgui  "cursor" screen positions
+	*/
+// TODO: this shouldn't be handled here I think
+	SImResourceInfo info;
+	info.textureID = params.sceneTexDescIx;
+	info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER;
+
+	nbl::hlsl::uint16_t2 retval;
+	if (params.useWindow)
+	{
 		ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing);
 		ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing);
 		ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f));
 		ImGui::Begin("Gizmo", 0, gizmoWindowFlags);
 		ImGuizmo::SetDrawlist();
 
+		ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
 		ImVec2 windowPos = ImGui::GetWindowPos();
 		ImVec2 cursorPos = ImGui::GetCursorScreenPos();
 
-		ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
 		ImGui::Image(info, contentRegionSize);
 		ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
+		retval = { contentRegionSize.x,contentRegionSize.y };
 
 		viewManipulateRight = cursorPos.x + contentRegionSize.x;
 		viewManipulateTop = cursorPos.y;
 
 		ImGuiWindow* window = ImGui::GetCurrentWindow();
 		gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0);
+	}
+	else
+	{
+		ImGui::SetNextWindowPos(ImVec2(0, 0));
+		ImGui::SetNextWindowSize(io.DisplaySize);
+		ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window
+		ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs);
 
-		//static bool tempEnable = true;
-		//ImGui::Checkbox("temp enable", &tempEnable);
-		//if (tempEnable) { //debug branch
-			ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL);
-
-			if (params.enableViewManipulate) {
-				ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
-			}
-		//}
+		ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
+		ImVec2 cursorPos = ImGui::GetCursorScreenPos();
 
-		ImGui::End();
-		ImGui::PopStyleColor();
+		ImGui::Image(info, contentRegionSize);
+		ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
+		retval = { contentRegionSize.x,contentRegionSize.y };
 
-		return contentRegionSize;
+		viewManipulateRight = cursorPos.x + contentRegionSize.x;
+		viewManipulateTop = cursorPos.y;
 	}
 
-	ImVec2 Update(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) {
-		EditTransform(matrix, params);
-		return ViewingGizmo(cameraView, cameraProjection, matrix, params);
-	}
+	ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL);
 
-};
+	if (params.enableViewManipulate)
+		ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
+
+	ImGui::End();
+	ImGui::PopStyleColor();
 
+	return retval;
+}
 
 #endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__
\ No newline at end of file
diff --git a/MeshShader/src/MeshRenderer.cpp b/MeshShader/src/MeshRenderer.cpp
index d28444a47..37ea7f631 100644
--- a/MeshShader/src/MeshRenderer.cpp
+++ b/MeshShader/src/MeshRenderer.cpp
@@ -1,6 +1,11 @@
 #include "MeshRenderer.hpp"
 
+
+
 namespace nbl::examples {
+
+
+
 	#define EXPOSE_NABLA_NAMESPACES \
 		using namespace nbl::core; \
 		using namespace nbl::system; \
@@ -9,24 +14,7 @@ namespace nbl::examples {
 
 	EXPOSE_NABLA_NAMESPACES;
 
-	constexpr static inline auto DefaultPolygonGeometryPatch = []()->video::CAssetConverter::patch_t<asset::ICPUPolygonGeometry> {
-		// we want to use the vertex data through UTBs
-		using usage_f = video::IGPUBuffer::E_USAGE_FLAGS;
-		video::CAssetConverter::patch_t<asset::ICPUPolygonGeometry> patch = {};
-		patch.positionBufferUsages = usage_f::EUF_UNIFORM_TEXEL_BUFFER_BIT;
-		patch.indexBufferUsages = usage_f::EUF_INDEX_BUFFER_BIT;
-		patch.otherBufferUsages = usage_f::EUF_UNIFORM_TEXEL_BUFFER_BIT;
-		return patch;
-	}();
-
-	MeshDebugRenderer::SViewParams::SViewParams(const hlsl::float32_t4x4& _viewProj, std::array<uint32_t, MeshDataBuffer::MaxObjectCount> const& objectCounts)
-		: viewProj{_viewProj},
-		objectCounts{objectCounts}
-	{
-	}
-
-
-	std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 3> MeshDebugRenderer::CreateTestShader(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX) {
+	std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 2> MeshDebugRenderer::CreateTestShader(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX) {
 		auto device = const_cast<ILogicalDevice*>(renderpass->getOriginDevice());
 		auto logger = device->getLogger();
 		auto loadCompileAndCreateShader = [&](const std::string& relPath, hlsl::ShaderStage stage, std::span<const asset::IShaderCompiler::SMacroDefinition> extraDefines) -> smart_refctd_ptr<IShader>
@@ -67,19 +55,19 @@ namespace nbl::examples {
 				return ret;
 			};
 		constexpr uint32_t WorkgroupSize = 64;
-		const uint32_t ObjectCount = 7;
-		const uint32_t InstanceCount = 8; //this is going to be based off limits. 64 is PROBABLY safe on all hardware, but cant guarantee
+		//const uint32_t ObjectCount = 7;
+		//const uint32_t InstanceCount = 8; //this is going to be based off limits. 64 is PROBABLY safe on all hardware, but cant guarantee
 		const std::string WorkgroupSizeAsStr = std::to_string(WorkgroupSize);
-		const std::string ObjectCountAsStr = std::to_string(ObjectCount);
-		const std::string InstanceCountAsStr = std::to_string(InstanceCount);
+		//const std::string ObjectCountAsStr = std::to_string(ObjectCount);
+		//const std::string InstanceCountAsStr = std::to_string(InstanceCount);
 
 		const IShaderCompiler::SMacroDefinition WorkgroupSizeDefine = { "WORKGROUP_SIZE",WorkgroupSizeAsStr };
-		const IShaderCompiler::SMacroDefinition ObjectCountDefine = { "OBJECT_COUNT", ObjectCountAsStr };
-		const IShaderCompiler::SMacroDefinition InstanceCountDefine = { "INSTANCE_COUNT", InstanceCountAsStr };
+		//const IShaderCompiler::SMacroDefinition ObjectCountDefine = { "OBJECT_COUNT", ObjectCountAsStr };
+		//const IShaderCompiler::SMacroDefinition InstanceCountDefine = { "INSTANCE_COUNT", InstanceCountAsStr };
 
-		const IShaderCompiler::SMacroDefinition meshArray[] = { WorkgroupSizeDefine, ObjectCountDefine, InstanceCountDefine };
+		const IShaderCompiler::SMacroDefinition meshArray[] = { WorkgroupSizeDefine };// , ObjectCountDefine, InstanceCountDefine};
 		return {
-			loadCompileAndCreateShader("app_resources/geom.task.hlsl", IShader::E_SHADER_STAGE::ESS_TASK, { meshArray }),
+			//loadCompileAndCreateShader("app_resources/geom.task.hlsl", IShader::E_SHADER_STAGE::ESS_TASK, { meshArray }),
 			loadCompileAndCreateShader("app_resources/geom.mesh.hlsl", IShader::E_SHADER_STAGE::ESS_MESH, { meshArray }),
 			loadCompileAndCreateShader("app_resources/geom.frag.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT, {})
 		};
@@ -99,41 +87,10 @@ namespace nbl::examples {
 
 		SInitParams init;
 
-		//
-		smart_refctd_ptr<IGPUDescriptorSetLayout> dsLayout;
 		smart_refctd_ptr<IGPUDescriptorSetLayout> meshLayout;
 
 		// create descriptor set
 		{
-			// create Descriptor Set Layout
-			{
-				using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS;
-				const IGPUDescriptorSetLayout::SBinding bindings[] =
-				{
-					{ //vertices
-						.binding = VertexAttrubUTBDescBinding,
-						.type = IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER,
-						// need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable
-						.createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | binding_flags_t::ECF_PARTIALLY_BOUND_BIT,
-						.stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH | IShader::E_SHADER_STAGE::ESS_FRAGMENT,
-						.count = MissingView
-					},
-					{ //indices
-						.binding = 1,
-						.type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER,
-						// need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable
-						.createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT | binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | binding_flags_t::ECF_PARTIALLY_BOUND_BIT,
-						.stageFlags = IShader::E_SHADER_STAGE::ESS_MESH,
-						.count = MissingView
-					},
-				};
-				dsLayout = device->createDescriptorSetLayout(bindings);
-				if (!dsLayout)
-				{
-					logger->log("Could not create descriptor set layout!", ILogger::ELL_ERROR);
-					return nullptr;
-				}
-			}
 			//creating meshdatabuffer descriptor set
 			{
 				using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS;
@@ -141,9 +98,9 @@ namespace nbl::examples {
 				{ //meshletdataobject
 					{
 						.binding = 0,
-						.type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER,
+						.type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER,
 						.createFlags = binding_flags_t::ECF_NONE,
-						.stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH | IShader::E_SHADER_STAGE::ESS_FRAGMENT,
+						.stageFlags = IShader::E_SHADER_STAGE::ESS_MESH,
 						.count = 1
 					}
 				};
@@ -156,17 +113,9 @@ namespace nbl::examples {
 			}
 
 			// create Descriptor Set
-			std::vector< IGPUDescriptorSetLayout const*> dsls{ dsLayout.get(), meshLayout.get() };
+			std::vector< IGPUDescriptorSetLayout const*> dsls{ meshLayout.get() };
 
-			auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dsls);
-			auto ds = pool->createDescriptorSet(std::move(dsLayout));
-			if (!ds)
-			{
-				logger->log("Could not descriptor set!", ILogger::ELL_ERROR);
-				return nullptr;
-			}
-			init.subAllocDS = make_smart_refctd_ptr<SubAllocatedDescriptorSet>(std::move(ds));
-		}
+			auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dsls);		}
 
 		// create pipeline layout
 		const SPushConstantRange ranges[] = { {
@@ -176,16 +125,16 @@ namespace nbl::examples {
 		} };
 
 		//because of the move semantics, the descriptor set we just created is no longer valid. instead, we need to go and rebuild a smart pointer to that descriptor set.
-		init.layout = device->createPipelineLayout(ranges, smart_refctd_ptr<const IGPUDescriptorSetLayout>(init.subAllocDS->getDescriptorSet()->getLayout()), meshLayout);
+		init.pipe_layout = device->createPipelineLayout(ranges, smart_refctd_ptr<const IGPUDescriptorSetLayout>(meshLayout));
 		auto shaderRet = CreateTestShader(assMan, renderpass, subpassIX);
 		// create pipelines
 		{
 			//this needs to be fixed, the mesh and frag use different files
 			IGPUMeshPipeline::SCreationParams params{
-				.layout = init.layout.get(),
-				.taskShader = {.shader = shaderRet[0].get(), .entryPoint = "main"},
-				.meshShader = {.shader = shaderRet[1].get(), .entryPoint = "main" },
-				.fragmentShader = {.shader = shaderRet[2].get(), .entryPoint = "main" }
+				.layout = init.pipe_layout.get(),
+				//.taskShader = {.shader = shaderRet[0].get(), .entryPoint = "main"},
+				.meshShader = {.shader = shaderRet[0].get(), .entryPoint = "main" },
+				.fragmentShader = {.shader = shaderRet[1].get(), .entryPoint = "main" }
 			};
 			// no vertex input, or assembly
 			auto& rasterization = params.cached.rasterization;
@@ -207,124 +156,6 @@ namespace nbl::examples {
 		return ret;
 	}
 
-	bool MeshDebugRenderer::addGeometries(const std::span<const video::IGPUPolygonGeometry* const> geometries)
-	{
-		EXPOSE_NABLA_NAMESPACES;
-		if (geometries.empty())
-			return false;
-		auto device = const_cast<ILogicalDevice*>(m_params.layout->getOriginDevice());
-
-		core::vector<IGPUDescriptorSet::SWriteDescriptorSet> writes;
-		core::vector<IGPUDescriptorSet::SDescriptorInfo> infos;
-		core::vector<IGPUDescriptorSet::SDescriptorInfo> infos_index;
-		bool anyFailed = false;
-		auto allocateUTB = [&](const IGeometry<const IGPUBuffer>::SDataView& view)->decltype(SubAllocatedDescriptorSet::invalid_value)
-			{
-				if (!view)
-					return MissingView;
-				auto index = SubAllocatedDescriptorSet::invalid_value;
-				if (m_params.subAllocDS->multi_allocate(VertexAttrubUTBDescBinding, 1, &index) != 0)
-				{
-					anyFailed = true;
-					return MissingView;
-				}
-				const auto infosOffset = infos.size();
-				infos.emplace_back().desc = device->createBufferView(view.src, view.composed.format);
-				writes.emplace_back() = {
-					.dstSet = m_params.subAllocDS->getDescriptorSet(),
-					.binding = VertexAttrubUTBDescBinding,
-					.arrayElement = index,
-					.count = 1,
-					.info = reinterpret_cast<const IGPUDescriptorSet::SDescriptorInfo*>(infosOffset)
-				};
-				return index;
-			};
-		auto allocateIndexBuffer = [&](const IGeometry<const IGPUBuffer>::SDataView& view)->decltype(SubAllocatedDescriptorSet::invalid_value) {
-				if (!view) {
-					return MissingView;
-				}
-				auto index = SubAllocatedDescriptorSet::invalid_value;
-				if (m_params.subAllocDS->multi_allocate(1, 1, &index) != 0)
-				{
-					anyFailed = true;
-					return MissingView;
-				}
-				const auto infosOffset = infos_index.size();
-				//i dont think the desc was used? but regardless, a storage buffer cant be a view because views are for texel buffers
-				//still going to use bindless, just without views
-				//infos_index.emplace_back().desc = device->createBufferView(view.src, view.composed.format);
-				writes.emplace_back() = {
-					.dstSet = m_params.subAllocDS->getDescriptorSet(),
-					.binding = 1,
-					.arrayElement = index,
-					.count = 1,
-					.info = reinterpret_cast<const IGPUDescriptorSet::SDescriptorInfo*>(infosOffset)
-				};
-				return index;
-			};
-
-		auto resetGeoms = core::makeRAIIExiter(
-			[&]()->void {
-				for (auto& write : writes) {
-					immediateDealloc(write.arrayElement);
-				}
-			}
-		);
-
-		//the order doesnt really matter as long as the data is respective
-		uint8_t meshIndex = 0;
-		for (const auto geom : geometries)
-		{
-			// could also check device origin on all buffers
-			if (!geom->valid())
-				return false;
-
-			
-			auto& out = m_geoms.meshData[meshIndex];
-			meshIndex++;
-			out.primCount = geom->getPrimitiveCount();
-
-			out.positionView = allocateUTB(geom->getPositionView());
-			out.normalView = allocateUTB(geom->getNormalView());
-
-			out.objectType = 0;
-			if(geom->getIndexingCallback()->knownTopology() == E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_FAN){
-				out.objectType |= 2;
-			}
-			const auto& view = geom->getIndexView();
-			if (view) {
-				out.indexView = allocateIndexBuffer(geom->getIndexView());
-				out.vertCount = view.getElementCount();
-			}
-			else {
-				out.indexView = MissingView;
-				out.vertCount = geom->getVertexReferenceCount();
-			}
-		}
-
-		if (anyFailed)
-			device->getLogger()->log("Failed to allocate a UTB for some geometries, probably ran out of space in Descriptor Set!", system::ILogger::ELL_ERROR);
-
-		// no geometry
-		if (infos.empty())
-			return false;
-
-		// unbase our pointers
-		std::size_t bindingZeroPoint = 0;
-		for (; bindingZeroPoint < infos.size(); bindingZeroPoint++) {
-			writes[bindingZeroPoint].info = infos.data() + reinterpret_cast<const size_t&>(writes[bindingZeroPoint].info);
-		}
-		for (std::size_t bindingOnePoint = 0; bindingOnePoint < infos_index.size(); bindingOnePoint++) {
-			writes[bindingOnePoint + bindingZeroPoint].info = infos_index.data() + reinterpret_cast<const size_t&>(writes[bindingOnePoint + bindingZeroPoint].info);
-		}
-
-		if (!device->updateDescriptorSets(writes, {}))
-			return false;
-
-		// retain
-		writes.clear();
-		return true;
-	}
 
 	void MeshDebugRenderer::clearGeometries(const video::ISemaphore::SWaitInfo& info) {
 		//im currently assuming every object gets loaded correctly. definitely incorrect
@@ -337,42 +168,24 @@ namespace nbl::examples {
 	{
 		EXPOSE_NABLA_NAMESPACES;
 
-		core::vector<SubAllocatedDescriptorSet::value_type> deferredFree;
-		deferredFree.reserve(3);
-		auto deallocate = [&](SubAllocatedDescriptorSet::value_type index)->void
-			{
-				if (index >= MissingView)
-					return;
-				if (info.semaphore)
-					deferredFree.push_back(index);
-				else
-					immediateDealloc(index);
-			};
-		auto geo = m_geoms.meshData[ix];
-		deallocate(geo.positionView);
-		deallocate(geo.normalView);
 
-		if (deferredFree.empty())
-			return;
-		m_params.subAllocDS->multi_deallocate(VertexAttrubUTBDescBinding, deferredFree.size(), deferredFree.data(), info);
 	}
 
-	void MeshDebugRenderer::render(video::IGPUCommandBuffer* cmdbuf, const SViewParams& viewParams) const
+	void MeshDebugRenderer::render(video::IGPUCommandBuffer* cmdbuf, nbl::hlsl::float32_t4x4 const& mvp) const
 	{
 		EXPOSE_NABLA_NAMESPACES;
 
 		cmdbuf->beginDebugMarker("MeshDebugRenderer::render");
 
-		const auto* layout = m_params.layout.get();
-		const auto ds = m_params.subAllocDS->getDescriptorSet();
-		std::array descriptors = { m_params.subAllocDS->getDescriptorSet(), m_params.meshDescriptor.get()};
+		const auto* layout = m_params.pipe_layout.get();
+		std::array descriptors = { m_params.meshDescriptor.get()};
 		cmdbuf->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS, layout, 0, descriptors.size(), descriptors.data());
 
 		cmdbuf->bindMeshPipeline(m_params.pipeline.get());
 		SInstance::SPushConstants pc{
-			.viewProj = viewParams.viewProj,
+			.viewProj = mvp,
+			.vertCount = 36
 		};
-		memcpy(pc.objectCount, viewParams.objectCounts.data(), viewParams.objectCounts.size() * sizeof(uint32_t));
 		cmdbuf->pushConstants(layout, hlsl::ShaderStage::ESS_TASK | hlsl::ShaderStage::ESS_MESH | hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc);
 
 		cmdbuf->drawMeshTasks(1, 1, 1);
diff --git a/MeshShader/src/SampleApp.cpp b/MeshShader/src/SampleApp.cpp
index f16285380..969b07fcd 100644
--- a/MeshShader/src/SampleApp.cpp
+++ b/MeshShader/src/SampleApp.cpp
@@ -1,5 +1,86 @@
 #include "SampleApp.h"
 
+#include "transform.hpp"
+
+#include <glm/gtc/matrix_transform.hpp>
+
+
+
+std::vector<hlsl::vector<float, 3>> GetCubeData()
+{
+
+    std::array<hlsl::vector<float, 3>, 8> cube_vertices{
+        hlsl::vector<float, 3>{-0.5f, -0.5f, -0.5f},
+        hlsl::vector<float, 3>{ 0.5f, -0.5f, -0.5f},
+        hlsl::vector<float, 3>{ 0.5f,  0.5f, -0.5f},
+        hlsl::vector<float, 3>{-0.5f,  0.5f, -0.5f},
+        hlsl::vector<float, 3>{-0.5f, -0.5f,  0.5f},
+        hlsl::vector<float, 3>{ 0.5f, -0.5f,  0.5f},
+        hlsl::vector<float, 3>{ 0.5f,  0.5f,  0.5f},
+        hlsl::vector<float, 3>{-0.5f,  0.5f,  0.5f}
+    };
+
+    std::vector<hlsl::vector<float, 3>> triangleList;
+
+    //-z
+    triangleList.push_back(cube_vertices[0]);
+    triangleList.push_back(cube_vertices[2]);
+    triangleList.push_back(cube_vertices[1]);
+
+    triangleList.push_back(cube_vertices[0]);
+    triangleList.push_back(cube_vertices[3]);
+    triangleList.push_back(cube_vertices[2]);
+
+    //+z
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[5]);
+    triangleList.push_back(cube_vertices[6]);
+
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[6]);
+    triangleList.push_back(cube_vertices[7]);
+
+    //-x
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[7]);
+    triangleList.push_back(cube_vertices[3]);
+
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[3]);
+    triangleList.push_back(cube_vertices[0]);
+
+    //+x
+    triangleList.push_back(cube_vertices[1]);
+    triangleList.push_back(cube_vertices[2]);
+    triangleList.push_back(cube_vertices[6]);
+
+    triangleList.push_back(cube_vertices[1]);
+    triangleList.push_back(cube_vertices[6]);
+    triangleList.push_back(cube_vertices[5]);
+
+    //-y
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[0]);
+    triangleList.push_back(cube_vertices[1]);
+
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[1]);
+    triangleList.push_back(cube_vertices[5]);
+
+    //+y
+    triangleList.push_back(cube_vertices[3]);
+    triangleList.push_back(cube_vertices[7]);
+    triangleList.push_back(cube_vertices[6]);
+
+    triangleList.push_back(cube_vertices[3]);
+    triangleList.push_back(cube_vertices[6]);
+    triangleList.push_back(cube_vertices[2]);
+
+    return triangleList;
+}
+
+
+
     bool MeshSampleApp::onAppInitialized(smart_refctd_ptr<ISystem>&& system) {
         if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system)))
             return false;
@@ -21,17 +102,6 @@
         
         const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()};
 
-
-        m_scene = CGeometryCreatorScene::create(
-            {
-                .transferQueue = getTransferUpQueue(),
-                .utilities = m_utils.get(),
-                .logger = m_logger.get(),
-                .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies
-            },
-            CSimpleDebugRenderer::DefaultPolygonGeometryPatch
-        );
-
         
         // for the scene drawing pass
         {
@@ -79,8 +149,7 @@
                 return logFail("Failed to create Scene Renderpass!");
         }
 
-        const auto& geometries = m_scene->getInitParams().geometries;
-        m_renderer = MeshDebugRenderer::create(m_assetMgr.get(), m_renderpass.get(), 0, { &geometries.front().get(),geometries.size() });
+        m_renderer = MeshDebugRenderer::create(m_assetMgr.get(), m_renderpass.get(), 0);
 
         // Create ImGUI
         {
@@ -197,34 +266,24 @@
                 return logFail("failed to map device memory");
             }
 
-            memcpy(interface.mesh_mapped_memory, m_renderer->m_geoms.meshData, sizeof(MeshletObjectData) * MeshDataBuffer::MaxObjectCount);
+            auto cubeData = GetCubeData();
+
+            memcpy(interface.mesh_mapped_memory, cubeData.data(), sizeof(hlsl::vector<float, 3>) * cubeData.size());
+            m_device->flushMappedMemoryRanges(1, &interface.meshMemoryRange);
+            //flush it here
 
             imgui->registerListener([this](){interface();});
 
         }
-        
-        interface.objectNames = {
-            "Cube",
-            "Rectangle",
-            "Disk",
-            "Sphere",
-            "Cylinder",
-            "Cone",
-            "Icosphere"
-            //magicenum reflection?
-        };
 
-        const hlsl::matrix<float, 4, 4> fillVal{
-            1.f, 0.f, 0.f, 0.f,
-            0.f, 1.f, 0.f, 0.f,
-            0.f, 0.f, 1.f, 0.f,
-            0.f, 0.f, 0.f, 1.f
-        };
-        interface.transforms.fill(fillVal);
+        //interface.transform = {
+        //    1.f, 0.f, 0.f, 0.f,
+        //    0.f, 1.f, 0.f, 0.f,
+        //    0.f, 0.f, 1.f, 0.f,
+        //    0.f, 0.f, 0.f, 1.f
+        //};
+        //interface.transforms.fill(fillVal);
 
-        //load up the ICPUGeometry, then convert it to GPU geometry
-
-        interface.camera.mapKeysToArrows();
 
         onAppInitializedFinish();
         return true;
@@ -348,9 +407,6 @@
         return retval;
     }
 
-    void MeshSampleApp::UpdateDescriptor() {
-        m_renderer.get()->getInitParams().subAllocDS;
-    }
 
     const video::IGPURenderpass::SCreationParams::SSubpassDependency* MeshSampleApp::getDefaultSubpassDependencies() const {
         // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present
@@ -388,26 +444,25 @@
 
 
     void MeshSampleApp::UpdateScene(nbl::video::IGPUCommandBuffer* cb) {
-        float32_t4x4 viewProjMatrix;
-        // TODO: get rid of legacy matrices //<-- camera.getViewMatrix returns matrix3x4SIMD
-        {
-            const auto& camera = interface.camera;
-            memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix));
-        }
-        if (interface.transposeCameraViewProj) {
-            viewProjMatrix = hlsl::transpose(viewProjMatrix);
-        }
-        const auto viewParams = MeshDebugRenderer::SViewParams(viewProjMatrix, interface.objectCount);
+        //viewProjMatrix = hlsl::transpose(viewProjMatrix);
 
-        m_renderer->render(cb, viewParams);
+        float32_t3x4 viewMatrix;
+        float32_t4x4 viewProjMatrix;
+        const auto& camera = interface.camera;
+        memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix));
+        memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix));
+        hlsl::float32_t3x4 world;
+        memcpy(&world, &interface.model, sizeof(world));
+        float32_t4x4 worldViewProj = float32_t4x4(math::linalg::promoted_mul(float64_t4x4(viewProjMatrix), float64_t3x4(world)));
+
+        m_renderer->render(cb, worldViewProj);
     }
 
 
     void MeshSampleApp::update(const std::chrono::microseconds nextPresentationTimestamp)
     {
-        auto& camera = interface.camera;
-        camera.setMoveSpeed(interface.moveSpeed);
-        camera.setRotateSpeed(interface.rotateSpeed);
+        interface.camera.setMoveSpeed(interface.moveSpeed);
+        interface.camera.setRotateSpeed(interface.rotateSpeed);
 
 
         m_inputSystem->getDefaultMouse(&mouse);
@@ -426,12 +481,12 @@
         // If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to
         // `perActionDt` becoming obnoxiously large the first time the even processing resumes due to
         // `timeDiff` being computed since `lastVirtualUpTimeStamp` 
-        camera.beginInputProcessing(nextPresentationTimestamp);
+        interface.camera.beginInputProcessing(nextPresentationTimestamp);
         {
             mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void
                 {
                     if (interface.move)
-                        camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl
+                        interface.camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl
 
                     for (const auto& e : events) // here capture
                     {
@@ -452,7 +507,7 @@
             keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void
                 {
                     if (interface.move)
-                        camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl
+                        interface.camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl
 
                     for (const auto& e : events) // here capture
                     {
@@ -466,7 +521,7 @@
                 m_logger.get()
             );
         }
-        camera.endInputProcessing(nextPresentationTimestamp);
+        interface.camera.endInputProcessing(nextPresentationTimestamp);
 
         const auto cursorPosition = m_window->getCursorControl()->getPosition();
 
@@ -479,16 +534,6 @@
         };
 
         interface.imGUI->update(params);
-
-
-
-        auto* countMem = reinterpret_cast<MeshletObjectData*>(interface.mesh_mapped_memory);
-        //i only need to set the meslet object data once on initialization
-        //memcpy(countMem, interface.objectCount.data(), sizeof(MeshletObjectData) * MeshDataBuffer::MaxObjectCount);
-        countMem += MeshDataBuffer::MaxObjectCount;
-        auto* matrixMem = reinterpret_cast<hlsl::matrix<float, 4, 4>*>(countMem);
-        memcpy(matrixMem, interface.transforms.data(), interface.transforms.size() * sizeof(hlsl::matrix<float, 4, 4>));
-        m_device->flushMappedMemoryRanges(1, &interface.meshMemoryRange);
     }
 
     void MeshSampleApp::recreateFramebuffer(const uint16_t2 resolution)
@@ -592,53 +637,11 @@
                 ImGui::Separator();
         };
 
-    void MeshSampleApp::CInterface::DrawMeshControls() {
-        //this was for learning hlsl, given the shader takes like 2 minutes to compile, might as well just relaunch
-        //if (ImGui::Button("reload mesh shader")) {
-            //printf("test shader result - %d\n", CreateTestShaderFuncPtr());
-        //}
-
-        ImGui::DragInt("current transform editting", &currentTransform, 1, 0, MeshDataBuffer::MaxObjectCount * MeshDataBuffer::MaxInstanceCount);
-
-        for (uint8_t i = 0; i < objectNames.size(); i++) {
-            const std::string objNameWithCount = objectNames[i] + " {" + std::to_string(objectCount[i]) + '}';
-            if (ImGui::TreeNode(objNameWithCount.c_str())) {
-                const std::string objCountDraggerName = ("object count ##") + objectNames[i];
-
-                int imguiCopy = objectCount[i];
-                //ImGui::DragInt(objCountDraggerName.c_str(), &imguiCopy, 1, 0, localMax);
-                ImGui::SliderInt(objCountDraggerName.c_str(), &imguiCopy, 0, MeshDataBuffer::MaxInstanceCount);
-                objectCount[i] = imguiCopy;
-
-                for (uint64_t j = 0; j < objectCount[i]; j++) {
-                    const std::string treeName = std::string("transform[") + std::to_string(j) + "]##" + objectNames[i];
-                    if (ImGui::TreeNode(treeName.c_str())) {
-                        const std::size_t transformIndex = i * MeshDataBuffer::MaxInstanceCount + j;
-                        addMatrixTable("model", "", 4, 4, &transforms[transformIndex][0][0]);
-
-                        //imguizmo overwrites these changes
-                        //for (uint8_t x = 0; x < 4; x++) {
-                        //    const std::size_t rowIndex = transformIndex * 4 + x;
-                        //    const std::string rowName = std::string("##") + std::to_string(rowIndex);
-                        //    ImGui::DragFloat4(rowName.c_str(), &transforms[transformIndex][x][0], 0.1f, -100.f, 100.f);
-                        //}
-                        ImGui::TreePop();
-                    }
-                }
-
-                ImGui::TreePop();
-            }
-        }
-    }
-
     void MeshSampleApp::CInterface::DrawCameraControls() {
         ImGuiIO& io = ImGui::GetIO();
 
         ImGui::Text("Camera");
         bool viewDirty = false;
-
-        ImGui::Checkbox("transpose view proj (holy space intermixing)", &transposeCameraViewProj);
-
         if (ImGui::RadioButton("LH", isLH))
             isLH = true;
 
@@ -662,10 +665,7 @@
 
         // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case
 
-        if (isPerspective)
-            ImGui::SliderFloat("Fov", &fov, 20.f, 150.f);
-        else
-            ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20);
+        ImGui::SliderFloat("Fov", &fov, 20.f, 150.f);
 
         ImGui::SliderFloat("zNear", &zNear, 0.1f, zFar);
         ImGui::SliderFloat("zFar", &zFar, zNear, 10000.f);
@@ -701,12 +701,10 @@
             ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : "");
         }
 
-        const auto& view = camera.getViewMatrix(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok)
-
-        auto const& projection = camera.getProjectionMatrix();
-        if (ImGui::TreeNode("camera matrices")) {
-            addMatrixTable("View", "ViewMatrixTable", 3, 4, view.pointer());
-            addMatrixTable("Projection", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false);
+        if (ImGui::TreeNode("matrices")) {
+            addMatrixTable("View", "ViewMatrixTable", 3, 4, camera.getViewMatrix().pointer());
+            addMatrixTable("Projection", "ViewProjectionMatrixTable", 4, 4, camera.getProjectionMatrix().pointer(), false);
+            addMatrixTable("model", "transform", 3, 4, model.pointer(), false);
             ImGui::TreePop();
         }
     }
@@ -729,75 +727,46 @@
 
         ImGuizmo::SetID(0u);
 
+
         imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix()));
         imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix());
-
-        if (currentTransform < 0) {
-            currentTransform = 0;
-        }
-
-        if (currentTransform >= 0 && currentTransform < transforms.size()) {
-            //auto transposedTemp = core::transpose(transforms[currentTransform]);
-            //the model is a double matrix, so a memcpy or reinterpret wont work
-            //i might have the x and y backwards it doesnt matter as long as its x:x and y:y
-            //skipping the transform from example 61
-            for (uint8_t x = 0; x < 4; x++) {
-                for (uint8_t y = 0; y < 4; y++) {
-                    imguizmoM16InOut.model[x][y] = transforms[currentTransform][x][y];
-                }
-            }
-        }
+        imguizmoM16InOut.model = core::transpose(matrix4SIMD(model));
         {
-            transformParams.editTransformDecomposition = true;
-            static TransformWidget transformWidget{};
-            const auto tempForConversion = transformWidget.Update(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
-            sceneResolution = { tempForConversion.x, tempForConversion.y };
+            if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates
+                imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/	
 
+            transformParams.editTransformDecomposition = true;
+            sceneResolution = EditTransform(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
         }
 
-        if (currentTransform >= 0 && currentTransform < transforms.size()) {
-            for (uint8_t x = 0; x < 4; x++) {
-                for (uint8_t y = 0; y < 4; y++) {
-                    //tranposed
-                    transforms[currentTransform][x][y] = imguizmoM16InOut.model[x][y];
-                }
-            }
-        }
-        const auto& view = camera.getViewMatrix();
-        const_cast<core::matrix3x4SIMD&>(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok)
-        const auto& projection = camera.getProjectionMatrix();
-        camera.setProjectionMatrix(projection); //this recalcs viewproj
-
+        model = core::transpose(imguizmoM16InOut.model).extractSub3x4();
     }
 
     void MeshSampleApp::CInterface::operator()() {
         ImGuiIO& io = ImGui::GetIO();
         //io.ConfigDebugIsDebuggerPresent = true;
 
-        //camera
-        matrix4SIMD projection;
+        camera.setProjectionMatrix([&]()
         {
-            const float viewHeight = viewWidth * io.DisplaySize.x / io.DisplaySize.y;
+            matrix4SIMD projection;
 
-            if (isPerspective) {
-                if (isLH) {
-                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), viewHeight, zNear, zFar);
-                }
-                else {
-                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), viewHeight, zNear, zFar);
-                }
-            }
+            if (isPerspective)
+                if (isLH)
+                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
+                else
+                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
             else
             {
-                if (isLH) {
-                    projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, 1.f / viewHeight, zNear, zFar);
-                }
-                else {
-                    projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, 1.f / viewHeight, zNear, zFar);
-                }
+                float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x;
+
+                if (isLH)
+                    projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar);
+                else
+                    projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar);
             }
-            camera.setProjectionMatrix(projection);
-        } //end camera
+
+            return projection;
+        }());
         
         ImGuizmo::SetOrthographic(false);
         ImGuizmo::BeginFrame();
@@ -810,13 +779,6 @@
         ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing);
         ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing);
 
-        if (meshControlSeparated) {
-            if (ImGui::Begin("mesh controls", &meshControlSeparated)) {
-                meshControlSeparated = !ImGui::Button("Rejoin mesh control");
-                DrawMeshControls();
-            }
-            ImGui::End();
-        }
         if (cameraControlSeparated) {
             if (ImGui::Begin("camera controls", &cameraControlSeparated)) {
                 cameraControlSeparated = !ImGui::Button("Rejoin camera control");
@@ -826,12 +788,6 @@
         }
         if(ImGui::Begin("Editor")) {
 
-            if (!meshControlSeparated) {
-                meshControlSeparated = ImGui::Button("Separate mesh control");
-                DrawMeshControls();
-                ImGui::Separator();
-            }
-
             if (!cameraControlSeparated) {
                 cameraControlSeparated = ImGui::Button("Separate camera controls");
                 DrawCameraControls();