diff --git a/.gitignore b/.gitignore
index f119890ee..e95b918cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@ compiled.spv
 */.vscode/*
 */__main__.py
 /tmp/rtSamples.bin
+imgui.ini
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 66b82f37f..3ece09d5c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -98,6 +98,8 @@ if(NBL_BUILD_EXAMPLES)
 	add_subdirectory(71_RayTracingPipeline)
 	add_subdirectory(72_CooperativeBinarySearch)
 
+	add_subdirectory(MeshShader)
+
 	# add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory
 	NBL_GET_ALL_TARGETS(TARGETS)
 
diff --git a/MeshShader/CMakeLists.txt b/MeshShader/CMakeLists.txt
new file mode 100644
index 000000000..315040e99
--- /dev/null
+++ b/MeshShader/CMakeLists.txt
@@ -0,0 +1,45 @@
+include(common RESULT_VARIABLE RES)
+if(NOT RES)
+        message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
+endif()
+
+if(NBL_BUILD_IMGUI)
+	set(NBL_EXTRA_SOURCES
+		#"${CMAKE_CURRENT_SOURCE_DIR}/src/transform.cpp" #just leaving this so i can easily reference it later
+		"${CMAKE_CURRENT_SOURCE_DIR}/src/SampleApp.cpp"
+		"${CMAKE_CURRENT_SOURCE_DIR}/src/MeshRenderer.cpp"
+	)
+
+	set(NBL_INCLUDE_SEARCH_DIRECTORIES
+		"${CMAKE_CURRENT_SOURCE_DIR}/include"
+	)
+
+	list(APPEND NBL_LIBRARIES 
+		imtestengine
+		imguizmo
+		"${NBL_EXT_IMGUI_UI_LIB}"
+	)
+	if(NBL_EMBED_BUILTIN_RESOURCES)
+		set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData)
+		set(RESOURCE_DIR "app_resources")
+
+		get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
+		get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE)
+		get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE)
+
+		file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*")
+		foreach(RES_FILE ${BUILTIN_RESOURCE_FILES})
+			LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}")
+		endforeach()
+
+		ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")
+
+		LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
+	endif()
+	
+	
+	# TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !?
+	nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SEARCH_DIRECTORIES}" "${NBL_LIBRARIES}")
+	# TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet
+	# LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD)
+endif()
\ No newline at end of file
diff --git a/MeshShader/README.md b/MeshShader/README.md
new file mode 100644
index 000000000..01640d5a1
--- /dev/null
+++ b/MeshShader/README.md
@@ -0,0 +1,49 @@
+## 9/30/2025 - GDBobby
+Here's the current plan, front to back
+
+1. Remove all unnecessary parts from my copy of example 61.
+
+    1.1 figure out what IS necessary.
+    
+    1.2 trace the graphics pipeline used, so I can figure out how the mesh pipeline should look
+
+2. i dont have much experience with viewports and scissors yet, so I'd like to change
+    how the imgui viewport is handled just for the fun of it. 61 mentions it's rendered to a
+    temporary color attachment which is then sourced as a texture in imgui. id like to change it so
+    that imgui literally just puts a box around a viewport thats rendered to directly
+
+3. Create the Mesh Pipeline.
+
+    3.1. I want to support generative (procedural) mesh shaders, which take 0 input vertices
+    
+    3.2. I want to support meshlets - small meshes that are defined by pre-existing vertices
+    
+    3.3. I want to re-compile the mesh shader into a compute and vertex shader combo, 
+        which can be used on machines that don't support the mesh shader extension 
+        (mostly GPUs older than 2016)
+
+
+I think, to prevent controlling two different branches in two different repos, I'll stuff everything into this example in the beginning. 
+Once everything start to come together, I'll start moving things, like the Mesh Pipeline class, into more appropriate places, like Nabla itself.
+
+
+## 9/31
+I'll create a mesh shader tomorrow. I don't really know what to do yet but I'll start with procedural gen.
+
+I think I'll also make a different pipeline object that supports the geometry from example 61?
+
+I had my fun with viewports. idk what i expected tbh
+
+I need to search a little deeper in the spec for other mesh pipeline related rules. I need to research subpasses as well.
+
+
+## 10/3
+Beginning shader experimentation. Setting up easy reload of shaders so I don't have to relaunch every test iteration.
+
+## 10/6
+I need to add CPU side verification that mesh shader vert and prim count are below vulkan limits. The same as how work group size is verified.
+
+Mesh and Task shaders having branches where the output is not defined is incorrect. The glslc compiler won't warn the user, I'll have to check for DXC. Nvidia will assume it's a 0 group output, but AMD will get DEVICE_LOST. If it's possible, having a warning or compile check for that would be nice. Most likely outside the scope of Nabla, but possibly not. I'll have to ask.
+
+## 10/9
+On the bug hunting phase. Should be finished shortly, then I'll hit the cleanup phase.
\ No newline at end of file
diff --git a/MeshShader/app_resources/FirstBuild.mesh.hlsl b/MeshShader/app_resources/FirstBuild.mesh.hlsl
new file mode 100644
index 000000000..db0ed585d
--- /dev/null
+++ b/MeshShader/app_resources/FirstBuild.mesh.hlsl
@@ -0,0 +1,23 @@
+//https://microsoft.github.io/DirectX-Specs/d3d/MeshShader.html#primitive-attributes
+
+struct SInterpolants{
+    float4 ndc : SV_Position;
+};
+struct Primo {
+    uint vertexID : SV_PrimitiveID;
+};
+
+[numthreads(WORKGROUP_SIZE,1,1)]
+[outputtopology("point")]
+
+[shader("mesh")]
+void main(
+    in uint3 ID : SV_DispatchThreadID,
+    out vertices SInterpolants verts[WORKGROUP_SIZE],
+    out indices uint prims[WORKGROUP_SIZE]
+)
+{
+    verts[ID.x].ndc = float32_t4(ID.x, 0.0, 0.0, 1.0);
+    prims[ID.x] = ID.x;
+    SetMeshOutputCounts(WORKGROUP_SIZE, WORKGROUP_SIZE);
+}
\ No newline at end of file
diff --git a/MeshShader/app_resources/geom.frag.hlsl b/MeshShader/app_resources/geom.frag.hlsl
new file mode 100644
index 000000000..0c4e051e0
--- /dev/null
+++ b/MeshShader/app_resources/geom.frag.hlsl
@@ -0,0 +1,13 @@
+
+struct VertexOut {
+    float32_t4 ndc : SV_Position;
+    float32_t3 meta : COLOR1;
+};
+
+
+[shader("pixel")]
+float32_t4 main(VertexOut input) : SV_Target0
+{
+    const float32_t3 normal = input.meta;
+    return float32_t4(normalize(normal) * 0.5f + float32_t3(0.5f, 0.5f, 0.5f), 1.f);
+}
\ No newline at end of file
diff --git a/MeshShader/app_resources/geom.mesh.hlsl b/MeshShader/app_resources/geom.mesh.hlsl
new file mode 100644
index 000000000..ac75af0d3
--- /dev/null
+++ b/MeshShader/app_resources/geom.mesh.hlsl
@@ -0,0 +1,51 @@
+//https://microsoft.github.io/DirectX-Specs/d3d/MeshShader.html#primitive-attributes
+
+#include "task_mesh_common.hlsl"
+
+//            (binding, set)
+[[vk::binding(0, 0)]] StructuredBuffer<float3> MeshVertexBuffer;
+
+struct VertexOut {
+    float32_t4 ndc : SV_Position;
+    float32_t3 meta : COLOR1;
+};
+
+[numthreads(WORKGROUP_SIZE,1,1)]
+
+[outputtopology("triangle")]
+[shader("mesh")]
+void main(
+    in uint3 id : SV_DispatchThreadID,
+    in uint3 groupThreadID : SV_GroupThreadID,
+    out vertices VertexOut verts[WORKGROUP_SIZE],
+    out indices uint3 prims[WORKGROUP_SIZE]
+)
+{
+
+    // i havent benchmarked this personally, but my understandign is that AMD devices prefer mesh shaders to be "by primitive"
+    // and that nvidia devices prefer mesh shaders to be "by vertex".
+    // ideally, i'd benchmark both and setup branches so that each device can specialize the shader basedo n what it likes 
+    //(theres a property in VkMeshProperties that would indicate this)
+    if (id.x < pc.vertCount) {
+        const float32_t3 position = MeshVertexBuffer[id.x];
+
+        // verts[id.x].ndc = mul(float32_t4(position, 1.0), worldViewProj);
+        verts[id.x].ndc = mul(pc.mvp, float32_t4(position, 1.0));
+
+        verts[id.x].meta = position;
+    }
+
+    // im just assuming its a triangle list right now. wont work if its not
+    if (id.x < pc.vertCount / 3) {
+
+        prims[id.x] = uint3(
+                        id.x * 3, 
+                        id.x * 3 + 1, 
+                        id.x * 3 + 2
+                    );
+    }
+
+    
+
+    SetMeshOutputCounts(pc.vertCount, pc.vertCount / 3);
+}
\ No newline at end of file
diff --git a/MeshShader/app_resources/geom.task.hlsl b/MeshShader/app_resources/geom.task.hlsl
new file mode 100644
index 000000000..330bf57fc
--- /dev/null
+++ b/MeshShader/app_resources/geom.task.hlsl
@@ -0,0 +1,22 @@
+
+#include "task_mesh_common.hlsl"
+
+groupshared TaskToMeshPayload taskToMeshPayload;
+
+[numthreads(1,1,1)]
+void main(
+	in uint3 id : SV_DispatchThreadID,
+	in uint3 groupThreadId : SV_GroupThreadID
+	//out payload TaskToMeshPayload taskToMeshPayload, interestingly, thats not how it's done here
+){
+	uint objectCount = 0;
+	for(uint i = 0; i < OBJECT_COUNT; i++){
+		for(uint j = 0; j < pc.objectCount[i]; j++){
+			taskToMeshPayload.objectType[objectCount] = i;
+			objectCount++;
+		}
+	}
+
+    printf("dispatching meshes - %u", objectCount);
+	DispatchMesh(objectCount, 1, 1, taskToMeshPayload);
+}
\ No newline at end of file
diff --git a/MeshShader/app_resources/task_mesh_common.hlsl b/MeshShader/app_resources/task_mesh_common.hlsl
new file mode 100644
index 000000000..8ca1cfe23
--- /dev/null
+++ b/MeshShader/app_resources/task_mesh_common.hlsl
@@ -0,0 +1,30 @@
+
+//this is user defined data sent from the task shader to the mesh shader
+//1 packet is sent, but it can use arrays so that each workgroup can receive customized data
+//struct TaskToMeshPayload {
+//    uint objectType[INSTANCE_COUNT * OBJECT_COUNT];
+//};
+
+//1 is cone, 2 is for fan, anything else for trangle list without the special normal calc.
+//cone can be handled in the task shader or the mesh shader, I'm going to handle it in the task shader
+//#define OTHER_OBJECTS 0
+#define CONE_OBJECT_TYPE 1
+#define T_FAN_OBJECT_TYPE 2
+struct MeshData{
+    uint vertCount;
+    uint primCount; //were assuming vertCount is always equal to primCount (no index buffer)
+    uint objType; 
+	uint positionView;
+    uint normalView;
+    uint indexView;
+};
+
+
+#define PushDescCount (0x1<<16)-1
+struct SPushConstants {
+	float4x4 mvp;
+    uint vertCount;
+};
+
+//im not keen on trying to figure out how the push constant abstraction worked before without documentation
+[[vk::push_constant]] SPushConstants pc;
\ No newline at end of file
diff --git a/MeshShader/include/MeshRenderer.hpp b/MeshShader/include/MeshRenderer.hpp
new file mode 100644
index 000000000..2bb559c5f
--- /dev/null
+++ b/MeshShader/include/MeshRenderer.hpp
@@ -0,0 +1,117 @@
+#pragma once
+
+#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl"
+#include "nbl/examples/geometry/SPushConstants.hlsl"
+
+namespace nbl::examples
+{
+
+	enum class MeshletObjectTypes {
+		Cube,
+		Rectangle,
+		Disk,
+		Sphere,
+		Cylinder,
+		Cone,
+		Icosphere,
+
+		COUNT
+	};
+		//this is buffer data
+	struct MeshletObjectData {
+		uint32_t vertCount;
+		uint32_t objectType;
+		uint32_t positionView;
+		uint32_t normalView;
+		uint32_t indexView;
+	};
+	struct MeshDataBuffer {
+		//if gpuGeometry is nullptr or std::nullopt or whatever, then mesh object type is invalid, the CPU memory failed to transfer to GPU for whatever reason
+		core::smart_refctd_ptr<const video::IGPUPolygonGeometry> gpuGeometry{};
+
+		static constexpr std::size_t MaxObjectCount = static_cast<std::size_t>(MeshletObjectTypes::COUNT);
+		static constexpr std::size_t MaxInstanceCount = 8; //for each object
+
+		MeshletObjectData meshData[MaxObjectCount];
+		hlsl::float32_t4x4 transforms[MaxInstanceCount];
+
+		//remove index type to avoid branch in shader
+		//asset::E_INDEX_TYPE indexType = asset::EIT_UNKNOWN;
+	};
+
+
+class MeshDebugRenderer final : public core::IReferenceCounted {
+#define EXPOSE_NABLA_NAMESPACES \
+		using namespace nbl::core; \
+		using namespace nbl::system; \
+		using namespace nbl::asset; \
+		using namespace nbl::video
+
+public:
+	//
+	constexpr static inline uint16_t VertexAttrubUTBDescBinding = 0;
+
+	constexpr static inline auto MissingView = hlsl::examples::geometry_creator_scene::SPushConstants::DescriptorCount;
+
+	//
+	struct SInstance
+	{
+		struct SPushConstants
+		{
+			NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1 << 16) - 1;
+
+			nbl::hlsl::float32_t4x4 viewProj;
+			uint32_t vertCount;
+		};
+
+		hlsl::float32_t3x4 world;
+	};
+
+	static std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 2> CreateTestShader(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX);
+
+	//
+	static core::smart_refctd_ptr<MeshDebugRenderer> create(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX);
+	//
+	struct SInitParams {
+
+		core::smart_refctd_ptr<video::IGPUDescriptorSet> meshDescriptor;
+		core::smart_refctd_ptr<video::IGPUPipelineLayout> pipe_layout; //when im looking at it from outside the class i need to know what kind of layout this is
+		core::smart_refctd_ptr<video::IGPUMeshPipeline> pipeline;
+	};
+	inline SInitParams& getInitParams() {return m_params;}
+
+	//im not going to go thru every example to fix them up to use this static function instead, so im leaving the old one
+	//device should be const* but im not going to fix it right now 
+	//(scope creep)
+		
+	bool addGeometries();
+
+	void removeGeometry(const uint32_t ix, const video::ISemaphore::SWaitInfo& info);
+
+	inline const auto& getGeometries() const {return m_geoms;}
+
+	void render(video::IGPUCommandBuffer* cmdbuf, nbl::hlsl::float32_t4x4 const& mvp) const;
+
+	SInstance m_instance;
+
+	//mesh layout
+	//PVP vertices at set 0 binding 0
+	//mesh data at set 1 binding 0
+	//they should be in the same set but tiny bit slower (1 additional API call) for a tiny bit easier programming
+	nbl::core::smart_refctd_ptr<nbl::video::IGPUDescriptorSetLayout> mesh_layout{};
+
+	MeshDataBuffer m_geoms;
+protected:
+	inline MeshDebugRenderer(SInitParams&& _params) : m_params(std::move(_params)) {}
+	inline ~MeshDebugRenderer()	{
+		// clean shutdown, can also make SubAllocatedDescriptorSet resillient against that, and issue `device->waitIdle` if not everything is freed
+		const_cast<video::ILogicalDevice*>(m_params.pipe_layout->getOriginDevice())->waitIdle();
+		clearGeometries({});
+	}
+	void clearGeometries(const video::ISemaphore::SWaitInfo& info);
+
+	SInitParams m_params;
+#undef EXPOSE_NABLA_NAMESPACES
+};
+
+}
\ No newline at end of file
diff --git a/MeshShader/include/SampleApp.h b/MeshShader/include/SampleApp.h
new file mode 100644
index 000000000..3821528d4
--- /dev/null
+++ b/MeshShader/include/SampleApp.h
@@ -0,0 +1,103 @@
+#pragma once
+
+#include "common.hpp"
+#include "nbl/ui/ICursorControl.h"
+#include "MeshRenderer.hpp"
+
+
+
+
+struct MeshletPush {
+	float32_t4x4 viewProj; //nbl::core::matrix4SIMD is 128bit??
+	constexpr static uint8_t object_type_count_max = 16;//it can go up til this struct hits the limit for push size
+	uint32_t objectInstanceCount[object_type_count_max]; //this data is going to cropped before pushing, if necessary
+};
+
+class MeshSampleApp final : public MonoWindowApplication, public BuiltinResourcesApplication
+{
+		using device_base_t = MonoWindowApplication;
+		using asset_base_t = BuiltinResourcesApplication;
+
+	public:
+		MeshSampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) 
+			: IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD),
+			device_base_t({1280,720}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) 
+        {}
+
+		bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override;
+		virtual bool onAppTerminated();
+		IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override;
+
+	protected:
+		const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override;
+	private:
+		void UpdateScene(nbl::video::IGPUCommandBuffer* cb);
+		void update(const std::chrono::microseconds nextPresentationTimestamp);
+		void recreateFramebuffer(const uint16_t2 resolution);
+		void beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info);
+
+		// Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers
+		constexpr static inline uint32_t MaxFramesInFlight = 3u;
+		constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT;
+		constexpr static inline auto finalSceneRenderFormat = EF_R8G8B8A8_SRGB;
+		constexpr static inline auto TexturesImGUIBindingIndex = 0u;
+		// we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes
+		constexpr static inline auto MaxImGUITextures = 2u+MaxFramesInFlight;
+
+		smart_refctd_ptr<IGPURenderpass> m_renderpass;
+		smart_refctd_ptr<IGPUFramebuffer> m_framebuffer;
+
+		//i PROBABLY need to replace the debug renderer
+		smart_refctd_ptr<MeshDebugRenderer> m_renderer;
+		//
+		smart_refctd_ptr<ISemaphore> m_semaphore;
+		uint64_t m_realFrameIx = 0;
+		std::array<smart_refctd_ptr<IGPUCommandBuffer>,MaxFramesInFlight> m_cmdBufs;
+		//
+		InputSystem::ChannelReader<IMouseEventChannel> mouse;
+		InputSystem::ChannelReader<IKeyboardEventChannel> keyboard;
+
+		core::smart_refctd_ptr<video::SubAllocatedDescriptorSet> meshlet_subAllocDS;
+		smart_refctd_ptr<IGPUPipelineLayout> meshletLayout;
+		smart_refctd_ptr<IGPUMeshPipeline> meshletPipeline;
+
+
+		smart_refctd_ptr<IGPUBuffer> meshGPUBuffer;
+		nbl::video::IDeviceMemoryAllocator::SAllocation mesh_allocation;
+		// UI stuff
+		//i really hate interface beign it's own object
+		struct CInterface
+		{
+			bool cameraControlSeparated = false;
+			void DrawCameraControls();
+
+			bool guizmoEnabled = true;
+			void UpdateImguizmo();
+
+			void operator()();
+			
+			smart_refctd_ptr<ext::imgui::UI> imGUI;
+			// descriptor set
+			smart_refctd_ptr<SubAllocatedDescriptorSet> subAllocDS;
+			SubAllocatedDescriptorSet::value_type renderColorViewDescIndex = SubAllocatedDescriptorSet::invalid_value;
+
+			core::matrix3x4SIMD model;
+
+			Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD());
+
+			TransformRequestParams transformParams;
+			uint16_t2 sceneResolution = {1280,720};
+			uint16_t4 widgetBox;
+			float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f;
+			float viewWidth = 10.f;
+			float camYAngle = 165.f / 180.f * 3.14159f; //wheres my pi constant
+			float camXAngle = 32.f / 180.f * 3.14159f;
+			uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed
+			bool isPerspective = true, isLH = true, flipGizmoY = true, move = false;
+			bool firstFrame = true;
+
+			ILogicalDevice::MappedMemoryRange meshMemoryRange;
+			void* mesh_mapped_memory = nullptr;
+
+		} interface;
+};
diff --git a/MeshShader/include/common.hpp b/MeshShader/include/common.hpp
new file mode 100644
index 000000000..fe7d086dd
--- /dev/null
+++ b/MeshShader/include/common.hpp
@@ -0,0 +1,19 @@
+#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
+#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
+
+
+#include "nbl/examples/examples.hpp"
+
+// the example's headers
+#include "transform.hpp"
+
+using namespace nbl;
+using namespace nbl::core;
+using namespace nbl::hlsl;
+using namespace nbl::system;
+using namespace nbl::asset;
+using namespace nbl::ui;
+using namespace nbl::video;
+using namespace nbl::examples;
+
+#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
\ No newline at end of file
diff --git a/MeshShader/include/transform.hpp b/MeshShader/include/transform.hpp
new file mode 100644
index 000000000..201966e3b
--- /dev/null
+++ b/MeshShader/include/transform.hpp
@@ -0,0 +1,164 @@
+#pragma once
+
+#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_
+#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_
+
+
+#include "nbl/ui/ICursorControl.h"
+
+#include "nbl/ext/ImGui/ImGui.h"
+
+#include "imgui/imgui_internal.h"
+#include "imguizmo/ImGuizmo.h"
+
+
+struct TransformRequestParams
+{
+	float camDistance = 8.f;
+	uint8_t sceneTexDescIx = ~0;
+	bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false;
+};
+
+inline nbl::hlsl::uint16_t2 EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params)
+{
+	static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE);
+	static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL);
+	static bool useSnap = false;
+	static float snap[3] = { 1.f, 1.f, 1.f };
+	static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f };
+	static float boundsSnap[] = { 0.1f, 0.1f, 0.1f };
+	static bool boundSizing = false;
+	static bool boundSizingSnap = false;
+
+	if (params.editTransformDecomposition)
+	{
+		if (ImGui::IsKeyPressed(ImGuiKey_T))
+			mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
+		if (ImGui::IsKeyPressed(ImGuiKey_R))
+			mCurrentGizmoOperation = ImGuizmo::ROTATE;
+		if (ImGui::IsKeyPressed(ImGuiKey_S))
+			mCurrentGizmoOperation = ImGuizmo::SCALE;
+		if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE))
+			mCurrentGizmoOperation = ImGuizmo::TRANSLATE;
+		ImGui::SameLine();
+		if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE))
+			mCurrentGizmoOperation = ImGuizmo::ROTATE;
+		ImGui::SameLine();
+		if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE))
+			mCurrentGizmoOperation = ImGuizmo::SCALE;
+		if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL))
+			mCurrentGizmoOperation = ImGuizmo::UNIVERSAL;
+		float matrixTranslation[3], matrixRotation[3], matrixScale[3];
+		ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale);
+		ImGui::InputFloat3("Tr", matrixTranslation);
+		ImGui::InputFloat3("Rt", matrixRotation);
+		ImGui::InputFloat3("Sc", matrixScale);
+		ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix);
+
+		if (mCurrentGizmoOperation != ImGuizmo::SCALE)
+		{
+			if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL))
+				mCurrentGizmoMode = ImGuizmo::LOCAL;
+			ImGui::SameLine();
+			if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD))
+				mCurrentGizmoMode = ImGuizmo::WORLD;
+		}
+		if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift))
+			useSnap = !useSnap;
+		ImGui::Checkbox("##UseSnap", &useSnap);
+		ImGui::SameLine();
+
+		switch (mCurrentGizmoOperation)
+		{
+			case ImGuizmo::TRANSLATE:
+				ImGui::InputFloat3("Snap", &snap[0]);
+				break;
+			case ImGuizmo::ROTATE:
+				ImGui::InputFloat("Angle Snap", &snap[0]);
+				break;
+			case ImGuizmo::SCALE:
+				ImGui::InputFloat("Scale Snap", &snap[0]);
+				break;
+		}
+		ImGui::Checkbox("Bound Sizing", &boundSizing);
+		if (boundSizing)
+		{
+			ImGui::PushID(3);
+			ImGui::Checkbox("##BoundSizing", &boundSizingSnap);
+			ImGui::SameLine();
+			ImGui::InputFloat3("Snap", boundsSnap);
+			ImGui::PopID();
+		}
+	}
+
+	ImGuiIO& io = ImGui::GetIO();
+	float viewManipulateRight = io.DisplaySize.x;
+	float viewManipulateTop = 0;
+	static ImGuiWindowFlags gizmoWindowFlags = 0;
+
+	/*
+		for the "useWindow" case we just render to a gui area,
+		otherwise to fake full screen transparent window
+
+		note that for both cases we make sure gizmo being
+		rendered is aligned to our texture scene using
+		imgui  "cursor" screen positions
+	*/
+// TODO: this shouldn't be handled here I think
+	SImResourceInfo info;
+	info.textureID = params.sceneTexDescIx;
+	info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER;
+
+	nbl::hlsl::uint16_t2 retval;
+	if (params.useWindow)
+	{
+		ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing);
+		ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing);
+		ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f));
+		ImGui::Begin("Gizmo", 0, gizmoWindowFlags);
+		ImGuizmo::SetDrawlist();
+
+		ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
+		ImVec2 windowPos = ImGui::GetWindowPos();
+		ImVec2 cursorPos = ImGui::GetCursorScreenPos();
+
+		ImGui::Image(info, contentRegionSize);
+		ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
+		retval = { contentRegionSize.x,contentRegionSize.y };
+
+		viewManipulateRight = cursorPos.x + contentRegionSize.x;
+		viewManipulateTop = cursorPos.y;
+
+		ImGuiWindow* window = ImGui::GetCurrentWindow();
+		gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0);
+	}
+	else
+	{
+		ImGui::SetNextWindowPos(ImVec2(0, 0));
+		ImGui::SetNextWindowSize(io.DisplaySize);
+		ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window
+		ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs);
+
+		ImVec2 contentRegionSize = ImGui::GetContentRegionAvail();
+		ImVec2 cursorPos = ImGui::GetCursorScreenPos();
+
+		ImGui::Image(info, contentRegionSize);
+		ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y);
+		retval = { contentRegionSize.x,contentRegionSize.y };
+
+		viewManipulateRight = cursorPos.x + contentRegionSize.x;
+		viewManipulateTop = cursorPos.y;
+	}
+
+	ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL);
+
+	if (params.enableViewManipulate)
+		ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010);
+
+	ImGui::End();
+	ImGui::PopStyleColor();
+
+	return retval;
+}
+
+#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__
\ No newline at end of file
diff --git a/MeshShader/main.cpp b/MeshShader/main.cpp
new file mode 100644
index 000000000..206848f49
--- /dev/null
+++ b/MeshShader/main.cpp
@@ -0,0 +1,16 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#include "common.hpp"
+#include "SampleApp.h"
+
+/* 
+Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window.
+
+Written with Nabla's UI extension and got integrated with ImGuizmo to handle scene's object translations.
+*/
+int main(int argc, char** argv) {
+	//expanded macro for easier IDE peeking
+	return MeshSampleApp::main<MeshSampleApp>(argc, argv);
+}
\ No newline at end of file
diff --git a/MeshShader/src/MeshRenderer.cpp b/MeshShader/src/MeshRenderer.cpp
new file mode 100644
index 000000000..37ea7f631
--- /dev/null
+++ b/MeshShader/src/MeshRenderer.cpp
@@ -0,0 +1,195 @@
+#include "MeshRenderer.hpp"
+
+
+
+namespace nbl::examples {
+
+
+
+	#define EXPOSE_NABLA_NAMESPACES \
+		using namespace nbl::core; \
+		using namespace nbl::system; \
+		using namespace nbl::asset; \
+		using namespace nbl::video
+
+	EXPOSE_NABLA_NAMESPACES;
+
+	std::array<const core::smart_refctd_ptr<nbl::asset::IShader>, 2> MeshDebugRenderer::CreateTestShader(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX) {
+		auto device = const_cast<ILogicalDevice*>(renderpass->getOriginDevice());
+		auto logger = device->getLogger();
+		auto loadCompileAndCreateShader = [&](const std::string& relPath, hlsl::ShaderStage stage, std::span<const asset::IShaderCompiler::SMacroDefinition> extraDefines) -> smart_refctd_ptr<IShader>
+			{
+				IAssetLoader::SAssetLoadParams lp = {};
+				lp.logger = logger;
+				lp.workingDirectory = ""; // virtual root
+				auto assetBundle = assMan->getAsset(relPath, lp);
+				const auto assets = assetBundle.getContents();
+				if (assets.empty()) {
+					printf("asset was empty - %s\n", relPath.c_str());
+					return nullptr;
+				}
+
+				// lets go straight from ICPUSpecializedShader to IGPUSpecializedShader
+				auto sourceRaw = IAsset::castDown<IShader>(assets[0]);
+				if (!sourceRaw) {
+					printf("source raw was nullptr - %s\n", relPath.c_str());
+					return nullptr;
+				}
+
+				nbl::video::ILogicalDevice::SShaderCreationParameters creationParams{
+					.source = sourceRaw.get(),
+					.optimizer = nullptr,
+					.readCache = nullptr,
+					.writeCache = nullptr,
+					.extraDefines = extraDefines,
+					.stage = stage
+				};
+
+				auto ret = device->compileShader(creationParams);
+				if (ret.get() == nullptr) {
+					printf("failed to compile shader - %s\n", relPath.c_str());
+				}
+				//m_assetMgr->removeAssetFromCache(assetBundle);
+				//return nullptr;
+				//i dont think that ^ was working
+				return ret;
+			};
+		constexpr uint32_t WorkgroupSize = 64;
+		//const uint32_t ObjectCount = 7;
+		//const uint32_t InstanceCount = 8; //this is going to be based off limits. 64 is PROBABLY safe on all hardware, but cant guarantee
+		const std::string WorkgroupSizeAsStr = std::to_string(WorkgroupSize);
+		//const std::string ObjectCountAsStr = std::to_string(ObjectCount);
+		//const std::string InstanceCountAsStr = std::to_string(InstanceCount);
+
+		const IShaderCompiler::SMacroDefinition WorkgroupSizeDefine = { "WORKGROUP_SIZE",WorkgroupSizeAsStr };
+		//const IShaderCompiler::SMacroDefinition ObjectCountDefine = { "OBJECT_COUNT", ObjectCountAsStr };
+		//const IShaderCompiler::SMacroDefinition InstanceCountDefine = { "INSTANCE_COUNT", InstanceCountAsStr };
+
+		const IShaderCompiler::SMacroDefinition meshArray[] = { WorkgroupSizeDefine };// , ObjectCountDefine, InstanceCountDefine};
+		return {
+			//loadCompileAndCreateShader("app_resources/geom.task.hlsl", IShader::E_SHADER_STAGE::ESS_TASK, { meshArray }),
+			loadCompileAndCreateShader("app_resources/geom.mesh.hlsl", IShader::E_SHADER_STAGE::ESS_MESH, { meshArray }),
+			loadCompileAndCreateShader("app_resources/geom.frag.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT, {})
+		};
+	}
+
+	core::smart_refctd_ptr<MeshDebugRenderer> MeshDebugRenderer::create(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX)
+	{
+		EXPOSE_NABLA_NAMESPACES;
+
+		if (!renderpass)
+			return nullptr;
+		auto device = const_cast<ILogicalDevice*>(renderpass->getOriginDevice());
+		auto logger = device->getLogger();
+
+		if (!assMan)
+			return nullptr;
+
+		SInitParams init;
+
+		smart_refctd_ptr<IGPUDescriptorSetLayout> meshLayout;
+
+		// create descriptor set
+		{
+			//creating meshdatabuffer descriptor set
+			{
+				using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS;
+				const IGPUDescriptorSetLayout::SBinding bindings[] =
+				{ //meshletdataobject
+					{
+						.binding = 0,
+						.type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER,
+						.createFlags = binding_flags_t::ECF_NONE,
+						.stageFlags = IShader::E_SHADER_STAGE::ESS_MESH,
+						.count = 1
+					}
+				};
+				meshLayout = device->createDescriptorSetLayout(bindings);
+				if (!meshLayout)
+				{
+					logger->log("Could not create mesh descriptor set layout!", ILogger::ELL_ERROR);
+					return nullptr;
+				}
+			}
+
+			// create Descriptor Set
+			std::vector< IGPUDescriptorSetLayout const*> dsls{ meshLayout.get() };
+
+			auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, dsls);		}
+
+		// create pipeline layout
+		const SPushConstantRange ranges[] = { {
+			.stageFlags = IShader::E_SHADER_STAGE::ESS_TASK | IShader::E_SHADER_STAGE::ESS_MESH | hlsl::ShaderStage::ESS_FRAGMENT,
+			.offset = 0,
+			.size = sizeof(SInstance::SPushConstants),
+		} };
+
+		//because of the move semantics, the descriptor set we just created is no longer valid. instead, we need to go and rebuild a smart pointer to that descriptor set.
+		init.pipe_layout = device->createPipelineLayout(ranges, smart_refctd_ptr<const IGPUDescriptorSetLayout>(meshLayout));
+		auto shaderRet = CreateTestShader(assMan, renderpass, subpassIX);
+		// create pipelines
+		{
+			//this needs to be fixed, the mesh and frag use different files
+			IGPUMeshPipeline::SCreationParams params{
+				.layout = init.pipe_layout.get(),
+				//.taskShader = {.shader = shaderRet[0].get(), .entryPoint = "main"},
+				.meshShader = {.shader = shaderRet[0].get(), .entryPoint = "main" },
+				.fragmentShader = {.shader = shaderRet[1].get(), .entryPoint = "main" }
+			};
+			// no vertex input, or assembly
+			auto& rasterization = params.cached.rasterization;
+			auto& blend = params.cached.blend;
+			rasterization.faceCullingMode = EFCM_NONE;
+			params.cached.subpassIx = subpassIX;
+			params.renderpass = renderpass;
+
+			if (!device->createMeshPipelines(nullptr, { &params, 1 }, &init.pipeline))
+			{
+				logger->log("Could not create Mesh Pipeline!", ILogger::ELL_ERROR);
+				return nullptr;
+			}
+		}
+
+		auto ret = smart_refctd_ptr<MeshDebugRenderer>(new MeshDebugRenderer(std::move(init)), dont_grab);
+		ret->mesh_layout = meshLayout;
+
+		return ret;
+	}
+
+
+	void MeshDebugRenderer::clearGeometries(const video::ISemaphore::SWaitInfo& info) {
+		//im currently assuming every object gets loaded correctly. definitely incorrect
+		for (uint8_t i = 0; i < m_geoms.MaxObjectCount; i++) {
+			removeGeometry(i, info);
+		}
+	}
+
+	void MeshDebugRenderer::removeGeometry(const uint32_t ix, const video::ISemaphore::SWaitInfo& info)
+	{
+		EXPOSE_NABLA_NAMESPACES;
+
+
+	}
+
+	void MeshDebugRenderer::render(video::IGPUCommandBuffer* cmdbuf, nbl::hlsl::float32_t4x4 const& mvp) const
+	{
+		EXPOSE_NABLA_NAMESPACES;
+
+		cmdbuf->beginDebugMarker("MeshDebugRenderer::render");
+
+		const auto* layout = m_params.pipe_layout.get();
+		std::array descriptors = { m_params.meshDescriptor.get()};
+		cmdbuf->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS, layout, 0, descriptors.size(), descriptors.data());
+
+		cmdbuf->bindMeshPipeline(m_params.pipeline.get());
+		SInstance::SPushConstants pc{
+			.viewProj = mvp,
+			.vertCount = 36
+		};
+		cmdbuf->pushConstants(layout, hlsl::ShaderStage::ESS_TASK | hlsl::ShaderStage::ESS_MESH | hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(pc), &pc);
+
+		cmdbuf->drawMeshTasks(1, 1, 1);
+		
+		cmdbuf->endDebugMarker();
+	}
+}//namespace nbl::examples
\ No newline at end of file
diff --git a/MeshShader/src/SampleApp.cpp b/MeshShader/src/SampleApp.cpp
new file mode 100644
index 000000000..969b07fcd
--- /dev/null
+++ b/MeshShader/src/SampleApp.cpp
@@ -0,0 +1,801 @@
+#include "SampleApp.h"
+
+#include "transform.hpp"
+
+#include <glm/gtc/matrix_transform.hpp>
+
+
+
+std::vector<hlsl::vector<float, 3>> GetCubeData()
+{
+
+    std::array<hlsl::vector<float, 3>, 8> cube_vertices{
+        hlsl::vector<float, 3>{-0.5f, -0.5f, -0.5f},
+        hlsl::vector<float, 3>{ 0.5f, -0.5f, -0.5f},
+        hlsl::vector<float, 3>{ 0.5f,  0.5f, -0.5f},
+        hlsl::vector<float, 3>{-0.5f,  0.5f, -0.5f},
+        hlsl::vector<float, 3>{-0.5f, -0.5f,  0.5f},
+        hlsl::vector<float, 3>{ 0.5f, -0.5f,  0.5f},
+        hlsl::vector<float, 3>{ 0.5f,  0.5f,  0.5f},
+        hlsl::vector<float, 3>{-0.5f,  0.5f,  0.5f}
+    };
+
+    std::vector<hlsl::vector<float, 3>> triangleList;
+
+    //-z
+    triangleList.push_back(cube_vertices[0]);
+    triangleList.push_back(cube_vertices[2]);
+    triangleList.push_back(cube_vertices[1]);
+
+    triangleList.push_back(cube_vertices[0]);
+    triangleList.push_back(cube_vertices[3]);
+    triangleList.push_back(cube_vertices[2]);
+
+    //+z
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[5]);
+    triangleList.push_back(cube_vertices[6]);
+
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[6]);
+    triangleList.push_back(cube_vertices[7]);
+
+    //-x
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[7]);
+    triangleList.push_back(cube_vertices[3]);
+
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[3]);
+    triangleList.push_back(cube_vertices[0]);
+
+    //+x
+    triangleList.push_back(cube_vertices[1]);
+    triangleList.push_back(cube_vertices[2]);
+    triangleList.push_back(cube_vertices[6]);
+
+    triangleList.push_back(cube_vertices[1]);
+    triangleList.push_back(cube_vertices[6]);
+    triangleList.push_back(cube_vertices[5]);
+
+    //-y
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[0]);
+    triangleList.push_back(cube_vertices[1]);
+
+    triangleList.push_back(cube_vertices[4]);
+    triangleList.push_back(cube_vertices[1]);
+    triangleList.push_back(cube_vertices[5]);
+
+    //+y
+    triangleList.push_back(cube_vertices[3]);
+    triangleList.push_back(cube_vertices[7]);
+    triangleList.push_back(cube_vertices[6]);
+
+    triangleList.push_back(cube_vertices[3]);
+    triangleList.push_back(cube_vertices[6]);
+    triangleList.push_back(cube_vertices[2]);
+
+    return triangleList;
+}
+
+
+
+    bool MeshSampleApp::onAppInitialized(smart_refctd_ptr<ISystem>&& system) {
+        if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system)))
+            return false;
+        if (!device_base_t::onAppInitialized(smart_refctd_ptr(system)))
+            return false;
+
+        m_semaphore = m_device->createSemaphore(m_realFrameIx);
+        if (!m_semaphore)
+            return logFail("Failed to Create a Semaphore!");
+
+        auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT);
+        for (auto i = 0u; i<MaxFramesInFlight; i++)
+        {
+            if (!pool)
+                return logFail("Couldn't create Command Pool!");
+            if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_cmdBufs.data()+i,1}))
+                return logFail("Couldn't create Command Buffer!");
+        }
+        
+        const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()};
+
+        
+        // for the scene drawing pass
+        {
+            IGPURenderpass::SCreationParams params = {};
+            const IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = {
+                {{
+                    {
+                        .format = sceneRenderDepthFormat,
+                        .samples = IGPUImage::ESCF_1_BIT,
+                        .mayAlias = false
+                    },
+                    /*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR},
+                    /*.storeOp = */{IGPURenderpass::STORE_OP::STORE},
+                    /*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED},
+                    /*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}
+                }},
+                IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd
+            };
+            params.depthStencilAttachments = depthAttachments;
+            const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = {
+                {{
+                    {
+                        .format = finalSceneRenderFormat,
+                        .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT,
+                        .mayAlias = false
+                    },
+                    /*.loadOp = */IGPURenderpass::LOAD_OP::CLEAR,
+                    /*.storeOp = */IGPURenderpass::STORE_OP::STORE,
+                    /*.initialLayout = */IGPUImage::LAYOUT::UNDEFINED,
+                    /*.finalLayout = */ IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read
+                }},
+                IGPURenderpass::SCreationParams::ColorAttachmentsEnd
+            };
+            params.colorAttachments = colorAttachments;
+            IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = {
+                {},
+                IGPURenderpass::SCreationParams::SubpassesEnd
+            };
+            subpasses[0].depthStencilAttachment = {{.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}};
+            subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}};
+            params.subpasses = subpasses;
+            params.dependencies = {};
+            m_renderpass = m_device->createRenderpass(std::move(params));
+            if (!m_renderpass)
+                return logFail("Failed to create Scene Renderpass!");
+        }
+
+        m_renderer = MeshDebugRenderer::create(m_assetMgr.get(), m_renderpass.get(), 0);
+
+        // Create ImGUI
+        {
+            auto scRes = static_cast<CDefaultSwapchainFramebuffers*>(m_surface->getSwapchainResources());
+            ext::imgui::UI::SCreationParameters params = {};
+            params.resources.texturesInfo = {.setIx=0u,.bindingIx=TexturesImGUIBindingIndex};
+            params.resources.samplersInfo = {.setIx=0u,.bindingIx=1u};
+
+            params.utilities = m_utils;
+            params.transfer = getTransferUpQueue();
+            params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(),params.resources.texturesInfo,params.resources.samplersInfo,MaxImGUITextures);
+            params.assetManager = make_smart_refctd_ptr<IAssetManager>(smart_refctd_ptr(m_system));
+            params.renderpass = smart_refctd_ptr<IGPURenderpass>(scRes->getRenderpass());
+            params.subpassIx = 0u;
+            params.pipelineCache = nullptr;
+            interface.imGUI = ext::imgui::UI::create(std::move(params));
+            if (!interface.imGUI) {
+                return logFail("Failed to create `nbl::ext::imgui::UI` class");
+            }
+        }
+
+        // create rest of User Interface
+        {
+            auto* imgui = interface.imGUI.get();
+            // create the suballocated descriptor set
+            {
+                // note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources
+                const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u);
+                auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT,{&layout,1});
+                auto ds = pool->createDescriptorSet(smart_refctd_ptr<const IGPUDescriptorSetLayout>(layout));
+                if (ds) {
+                    interface.subAllocDS = make_smart_refctd_ptr<SubAllocatedDescriptorSet>(std::move(ds));
+                }
+                else {
+                    interface.subAllocDS = nullptr;
+                }
+                if (!interface.subAllocDS) {
+                    return logFail("Failed to create the descriptor set");
+                }
+                // make sure Texture Atlas slot is taken for eternity
+                {
+                    auto dummy = SubAllocatedDescriptorSet::invalid_value;
+                    interface.subAllocDS->multi_allocate(0,1,&dummy);
+                    assert(dummy==ext::imgui::UI::FontAtlasTexId); //?
+                }
+                // write constant descriptors, note we don't create info & write pair for the samplers because UI extension's are immutable and baked into DS layout
+                IGPUDescriptorSet::SDescriptorInfo info = {};
+                info.desc = smart_refctd_ptr<nbl::video::IGPUImageView>(interface.imGUI->getFontAtlasView());
+                info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL;
+                const IGPUDescriptorSet::SWriteDescriptorSet write = {
+                    .dstSet = interface.subAllocDS->getDescriptorSet(),
+                    .binding = TexturesImGUIBindingIndex,
+                    .arrayElement = ext::imgui::UI::FontAtlasTexId,
+                    .count = 1,
+                    .info = &info
+                };
+                if (!m_device->updateDescriptorSets({&write,1},{}))
+                    return logFail("Failed to write the descriptor set");
+            }
+
+
+
+            nbl::video::IGPUBuffer::SCreationParams gpubuff_params = {};
+            gpubuff_params.size = sizeof(MeshletObjectData) * MeshDataBuffer::MaxObjectCount + sizeof(hlsl::float32_t4x4) * MeshDataBuffer::MaxInstanceCount;
+            // While the usages on `ICPUBuffers` are mere hints to our automated CPU-to-GPU conversion systems which need to be patched up anyway,
+            // the usages on an `IGPUBuffer` are crucial to specify correctly.
+            gpubuff_params.usage = IGPUBuffer::EUF_UNIFORM_BUFFER_BIT;
+            meshGPUBuffer = m_device->createBuffer(std::move(gpubuff_params));
+            meshGPUBuffer->setObjectDebugName("mesh data buffer");
+
+            nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = meshGPUBuffer->getMemoryReqs();
+            // you can simply constrain the memory requirements by AND-ing the type bits of the host visible memory types
+            reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits();
+            mesh_allocation = m_device->allocate(reqs, meshGPUBuffer.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE);
+            if (!mesh_allocation.isValid()) {
+                return logFail("failed to allocate device memory");
+            }
+            assert(meshGPUBuffer->getBoundMemory().memory == mesh_allocation.memory.get());
+
+
+
+// This is a cool utility you can use instead of counting up how much of each descriptor type you need to N_i allocate descriptor sets with layout L_i from a single pool
+            smart_refctd_ptr<nbl::video::IDescriptorPool> pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &m_renderer->mesh_layout.get(),1 });
+
+            //i dont really want to move the layout but it seems like im at the mercy of the compiler
+            auto layout_smart_ptr_copy = m_renderer->mesh_layout; 
+            m_renderer->getInitParams().meshDescriptor = pool->createDescriptorSet(layout_smart_ptr_copy);
+            m_renderer->getInitParams().meshDescriptor->setObjectDebugName("mesh descriptor");
+            {
+                IGPUDescriptorSet::SDescriptorInfo info[1];
+                info[0].desc = smart_refctd_ptr(meshGPUBuffer); // bad API, too late to change, should just take raw-pointers since not consumed
+                info[0].info.buffer = { .offset = 0,.size = gpubuff_params.size };
+                IGPUDescriptorSet::SWriteDescriptorSet writes[1] = {
+                    {
+                        .dstSet = m_renderer->getInitParams().meshDescriptor.get(),
+                        .binding = 0,
+                        .arrayElement = 0,
+                        .count = 1,
+                        .info = info
+                    }
+                };
+                m_device->updateDescriptorSets(writes, {});
+            }
+
+            interface.meshMemoryRange.memory = mesh_allocation.memory.get();
+            interface.meshMemoryRange.offset = 0;
+            interface.meshMemoryRange.length = mesh_allocation.memory->getAllocationSize();
+            interface.meshMemoryRange.range = { interface.meshMemoryRange.offset, interface.meshMemoryRange.length };
+            if (!mesh_allocation.memory->map(interface.meshMemoryRange.range, IDeviceMemoryAllocation::EMCAF_WRITE)) {
+                return logFail("failed to map device memory");
+            }
+            interface.mesh_mapped_memory = mesh_allocation.memory->getMappedPointer();
+            if (!interface.mesh_mapped_memory) {
+                return logFail("failed to map device memory");
+            }
+
+            auto cubeData = GetCubeData();
+
+            memcpy(interface.mesh_mapped_memory, cubeData.data(), sizeof(hlsl::vector<float, 3>) * cubeData.size());
+            m_device->flushMappedMemoryRanges(1, &interface.meshMemoryRange);
+            //flush it here
+
+            imgui->registerListener([this](){interface();});
+
+        }
+
+        //interface.transform = {
+        //    1.f, 0.f, 0.f, 0.f,
+        //    0.f, 1.f, 0.f, 0.f,
+        //    0.f, 0.f, 1.f, 0.f,
+        //    0.f, 0.f, 0.f, 1.f
+        //};
+        //interface.transforms.fill(fillVal);
+
+
+        onAppInitializedFinish();
+        return true;
+    }
+
+   
+
+    bool MeshSampleApp::onAppTerminated() {
+        SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId;
+        IGPUDescriptorSet::SDropDescriptorSet dummy[1];
+        interface.subAllocDS->multi_deallocate(dummy,TexturesImGUIBindingIndex,1,&fontAtlasDescIx);
+        return device_base_t::onAppTerminated();
+    }
+
+    IQueue::SSubmitInfo::SSemaphoreInfo MeshSampleApp::renderFrame(const std::chrono::microseconds nextPresentationTimestamp) {
+        // CPU events
+        update(nextPresentationTimestamp);
+
+        const auto& virtualWindowRes = interface.sceneResolution;
+        if (!m_framebuffer || m_framebuffer->getCreationParameters().width!=virtualWindowRes[0] || m_framebuffer->getCreationParameters().height!=virtualWindowRes[1])
+            recreateFramebuffer(virtualWindowRes);
+
+        const auto resourceIx = m_realFrameIx % MaxFramesInFlight;
+
+        auto* const cb = m_cmdBufs.data()[resourceIx].get();
+        cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT);
+        cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
+        // clear to black for both things
+        const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} };
+        if (m_framebuffer)
+        {
+            cb->beginDebugMarker("UISampleApp Scene Frame");
+            {
+                const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f };
+                const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo{
+                    .framebuffer = m_framebuffer.get(),
+                    .colorClearValues = &clearValue,
+                    .depthStencilClearValues = &farValue,
+                    .renderArea = {
+                        .offset = {0,0},
+                        .extent = {virtualWindowRes[0],virtualWindowRes[1]}
+                    }
+                };
+                beginRenderpass(cb, renderpassInfo);
+            }
+            // draw scene
+            UpdateScene(cb);
+            cb->endRenderPass();
+            cb->endDebugMarker();
+        }
+        {
+            cb->beginDebugMarker("UISampleApp IMGUI Frame");
+            { //begin imgui subpass
+                auto scRes = static_cast<CDefaultSwapchainFramebuffers*>(m_surface->getSwapchainResources());
+                const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = {
+                    .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex),
+                    .colorClearValues = &clearValue,
+                    .depthStencilClearValues = nullptr,
+                    .renderArea = {
+                        .offset = {0,0},
+                        .extent = {m_window->getWidth(),m_window->getHeight()}
+                    }
+                };
+                beginRenderpass(cb, renderpassInfo);
+            }
+            // draw ImGUI
+            {
+                auto* imgui = interface.imGUI.get();
+                auto* pipeline = imgui->getPipeline();
+                cb->bindGraphicsPipeline(pipeline);
+                // note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx
+                const auto* ds = interface.subAllocDS->getDescriptorSet();
+                cb->bindDescriptorSets(EPBP_GRAPHICS,pipeline->getLayout(),imgui->getCreationParameters().resources.texturesInfo.setIx,1u,&ds);
+                // a timepoint in the future to release streaming resources for geometry
+                const ISemaphore::SWaitInfo drawFinished = {.semaphore=m_semaphore.get(),.value=m_realFrameIx+1u};
+                if (!imgui->render(cb,drawFinished))
+                {
+                    m_logger->log("TODO: need to present acquired image before bailing because its already acquired.",ILogger::ELL_ERROR);
+                    return {};
+                }
+            }
+            cb->endRenderPass();
+            cb->endDebugMarker();
+        }
+        cb->end();
+
+        IQueue::SSubmitInfo::SSemaphoreInfo retval =
+        {
+            .semaphore = m_semaphore.get(),
+            .value = ++m_realFrameIx,
+            .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS
+        };
+        const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] =
+        {
+            {.cmdbuf = cb }
+        };
+        const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = {
+            {
+                .semaphore = device_base_t::getCurrentAcquire().semaphore,
+                .value = device_base_t::getCurrentAcquire().acquireCount,
+                .stageMask = PIPELINE_STAGE_FLAGS::NONE
+            }
+        };
+        const IQueue::SSubmitInfo infos[] =
+        {
+            {
+                .waitSemaphores = acquired,
+                .commandBuffers = commandBuffers,
+                .signalSemaphores = {&retval,1}
+            }
+        };
+        
+        if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS)
+        {
+            retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal
+            m_realFrameIx--;
+        }
+
+
+        m_window->setCaption("[Nabla Engine] Mesh Shader Demo");
+        return retval;
+    }
+
+
+    const video::IGPURenderpass::SCreationParams::SSubpassDependency* MeshSampleApp::getDefaultSubpassDependencies() const {
+        // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present
+        const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = {
+            // don't want any writes to be available, we'll clear, only thing to worry about is the layout transition
+            {
+                .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
+                .dstSubpass = 0,
+                .memoryBarrier = {
+                    .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway 
+                    .srcAccessMask = ACCESS_FLAGS::NONE,
+                    // layout transition needs to finish before the color write
+                    .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
+                    .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
+                }
+                // leave view offsets and flags default
+            },
+            // want layout transition to begin after all color output is done
+            {
+                .srcSubpass = 0,
+                .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External,
+                .memoryBarrier = {
+                    // last place where the color can get modified, depth is implicitly earlier
+                    .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,
+                    // only write ops, reads can't be made available
+                    .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT
+                    // spec says nothing is needed when presentation is the destination
+                }
+                // leave view offsets and flags default
+            },
+            IGPURenderpass::SCreationParams::DependenciesEnd
+        };
+        return dependencies;
+    }
+
+
+    void MeshSampleApp::UpdateScene(nbl::video::IGPUCommandBuffer* cb) {
+        //viewProjMatrix = hlsl::transpose(viewProjMatrix);
+
+        float32_t3x4 viewMatrix;
+        float32_t4x4 viewProjMatrix;
+        const auto& camera = interface.camera;
+        memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix));
+        memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix));
+        hlsl::float32_t3x4 world;
+        memcpy(&world, &interface.model, sizeof(world));
+        float32_t4x4 worldViewProj = float32_t4x4(math::linalg::promoted_mul(float64_t4x4(viewProjMatrix), float64_t3x4(world)));
+
+        m_renderer->render(cb, worldViewProj);
+    }
+
+
+    void MeshSampleApp::update(const std::chrono::microseconds nextPresentationTimestamp)
+    {
+        interface.camera.setMoveSpeed(interface.moveSpeed);
+        interface.camera.setRotateSpeed(interface.rotateSpeed);
+
+
+        m_inputSystem->getDefaultMouse(&mouse);
+        m_inputSystem->getDefaultKeyboard(&keyboard);
+
+        struct
+        {
+            std::vector<SMouseEvent> mouse{};
+            std::vector<SKeyboardEvent> keyboard{};
+        } uiEvents;
+
+        // TODO: should be a member really
+        static std::chrono::microseconds previousEventTimestamp{};
+
+        // I think begin/end should always be called on camera, just events shouldn't be fed, why?
+        // If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to
+        // `perActionDt` becoming obnoxiously large the first time the even processing resumes due to
+        // `timeDiff` being computed since `lastVirtualUpTimeStamp` 
+        interface.camera.beginInputProcessing(nextPresentationTimestamp);
+        {
+            mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void
+                {
+                    if (interface.move)
+                        interface.camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl
+
+                    for (const auto& e : events) // here capture
+                    {
+                        if (e.timeStamp < previousEventTimestamp)
+                            continue;
+
+                        previousEventTimestamp = e.timeStamp;
+                        uiEvents.mouse.emplace_back(e);
+
+                        if (e.type==nbl::ui::SMouseEvent::EET_SCROLL)
+                        {
+                            interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll));
+                        }
+                    }
+                },
+                m_logger.get()
+            );
+            keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void
+                {
+                    if (interface.move)
+                        interface.camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl
+
+                    for (const auto& e : events) // here capture
+                    {
+                        if (e.timeStamp < previousEventTimestamp)
+                            continue;
+
+                        previousEventTimestamp = e.timeStamp;
+                        uiEvents.keyboard.emplace_back(e);
+                    }
+                },
+                m_logger.get()
+            );
+        }
+        interface.camera.endInputProcessing(nextPresentationTimestamp);
+
+        const auto cursorPosition = m_window->getCursorControl()->getPosition();
+
+        ext::imgui::UI::SUpdateParameters params = 
+        {
+            .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()),
+            .displaySize = {m_window->getWidth(),m_window->getHeight()},
+            .mouseEvents = uiEvents.mouse,
+            .keyboardEvents = uiEvents.keyboard
+        };
+
+        interface.imGUI->update(params);
+    }
+
+    void MeshSampleApp::recreateFramebuffer(const uint16_t2 resolution)
+    {
+        auto createImageAndView = [&](E_FORMAT format)->smart_refctd_ptr<IGPUImageView>
+        {
+            auto image = m_device->createImage({{
+                .type = IGPUImage::ET_2D,
+                .samples = IGPUImage::ESCF_1_BIT,
+                .format = format,
+                .extent = {resolution.x,resolution.y,1},
+                .mipLevels = 1,
+                .arrayLayers = 1,
+                .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT|IGPUImage::EUF_SAMPLED_BIT
+            }});
+            if (!m_device->allocate(image->getMemoryReqs(),image.get()).isValid())
+                return nullptr;
+            IGPUImageView::SCreationParams params = {
+                .image = std::move(image),
+                .viewType = IGPUImageView::ET_2D,
+                .format = format
+            };
+            params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT:IGPUImage::EAF_COLOR_BIT;
+            return m_device->createImageView(std::move(params));
+        };
+        
+        smart_refctd_ptr<IGPUImageView> colorView;
+        // detect window minimization
+        if (resolution.x<0x4000 && resolution.y<0x4000)
+        {
+            colorView = createImageAndView(finalSceneRenderFormat);
+            auto depthView = createImageAndView(sceneRenderDepthFormat);
+            m_framebuffer = m_device->createFramebuffer(
+                { 
+                    {
+                        .renderpass = m_renderpass,
+                        .depthStencilAttachments = &depthView.get(),
+                        .colorAttachments = &colorView.get(),
+                        .width = resolution.x,
+                        .height = resolution.y
+                    }   
+                }
+            );
+        }
+        else {
+            m_framebuffer = nullptr;
+        }
+        // release previous slot and its image
+        interface.subAllocDS->multi_deallocate(0,1,&interface.renderColorViewDescIndex,{.semaphore=m_semaphore.get(),.value=m_realFrameIx});
+        //
+        if (colorView)
+        {
+            interface.subAllocDS->multi_allocate(0,1,&interface.renderColorViewDescIndex);
+            // update descriptor set
+            IGPUDescriptorSet::SDescriptorInfo info = {};
+            info.desc = colorView;
+            info.info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL;
+            const IGPUDescriptorSet::SWriteDescriptorSet write = {
+                .dstSet = interface.subAllocDS->getDescriptorSet(),
+                .binding = TexturesImGUIBindingIndex,
+                .arrayElement = interface.renderColorViewDescIndex,
+                .count = 1,
+                .info = &info
+            };
+            m_device->updateDescriptorSets({&write,1},{});
+        }
+        interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndex;
+    }
+
+    void MeshSampleApp::beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info)
+    {
+        cb->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE);
+        cb->setScissor(0,1,&info.renderArea);
+        const SViewport viewport = {
+            .x = 0,
+            .y = 0,
+            .width = static_cast<float>(info.renderArea.extent.width),
+            .height = static_cast<float>(info.renderArea.extent.height)
+        };
+        cb->setViewport(0u,1u,&viewport);
+    }
+
+    auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true)
+        {
+            ImGui::Text(topText);
+            if (ImGui::BeginTable(tableName, columns))
+            {
+                for (int y = 0; y < rows; ++y)
+                {
+                    ImGui::TableNextRow();
+                    for (int x = 0; x < columns; ++x)
+                    {
+                        ImGui::TableSetColumnIndex(x);
+                        ImGui::Text("%.3f", *(pointer + (y * columns) + x));
+                    }
+                }
+                ImGui::EndTable();
+            }
+
+            if (withSeparator)
+                ImGui::Separator();
+        };
+
+    void MeshSampleApp::CInterface::DrawCameraControls() {
+        ImGuiIO& io = ImGui::GetIO();
+
+        ImGui::Text("Camera");
+        bool viewDirty = false;
+        if (ImGui::RadioButton("LH", isLH))
+            isLH = true;
+
+        ImGui::SameLine();
+
+        if (ImGui::RadioButton("RH", !isLH))
+            isLH = false;
+
+        if (ImGui::RadioButton("Perspective", isPerspective))
+            isPerspective = true;
+
+        ImGui::SameLine();
+
+        if (ImGui::RadioButton("Orthographic", !isPerspective))
+            isPerspective = false;
+
+        ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate);
+        ImGui::Checkbox("Enable camera movement", &move);
+        ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f);
+        ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f);
+
+        // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case
+
+        ImGui::SliderFloat("Fov", &fov, 20.f, 150.f);
+
+        ImGui::SliderFloat("zNear", &zNear, 0.1f, zFar);
+        ImGui::SliderFloat("zFar", &zFar, zNear, 10000.f);
+
+        viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f);
+
+        if (viewDirty || firstFrame)
+        {
+            core::vectorSIMDf cameraPosition(cosf(camYAngle) * cosf(camXAngle) * transformParams.camDistance, sinf(camXAngle) * transformParams.camDistance, sinf(camYAngle) * cosf(camXAngle) * transformParams.camDistance);
+            core::vectorSIMDf cameraTarget(0.f, 0.f, 0.f);
+            const static core::vectorSIMDf up(0.f, 1.f, 0.f);
+
+            camera.setPosition(cameraPosition);
+            camera.setTarget(cameraTarget);
+            camera.setBackupUpVector(up);
+
+            camera.recomputeViewMatrix();
+        }
+        firstFrame = false;
+
+        ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y);
+        if (ImGuizmo::IsUsing())
+        {
+            ImGui::Text("Using gizmo");
+        }
+        else {
+            ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : "");
+            ImGui::SameLine();
+            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : "");
+            ImGui::SameLine();
+            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : "");
+            ImGui::SameLine();
+            ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : "");
+        }
+
+        if (ImGui::TreeNode("matrices")) {
+            addMatrixTable("View", "ViewMatrixTable", 3, 4, camera.getViewMatrix().pointer());
+            addMatrixTable("Projection", "ViewProjectionMatrixTable", 4, 4, camera.getProjectionMatrix().pointer(), false);
+            addMatrixTable("model", "transform", 3, 4, model.pointer(), false);
+            ImGui::TreePop();
+        }
+    }
+
+    void MeshSampleApp::CInterface::UpdateImguizmo() {
+        /*
+            * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout
+            * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection
+
+            *
+            * the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object,
+            * note it also modifies input view matrix but projection matrix is immutable
+            */
+
+    // TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases
+        static struct
+        {
+            core::matrix4SIMD view, projection, model;
+        } imguizmoM16InOut;
+
+        ImGuizmo::SetID(0u);
+
+
+        imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix()));
+        imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix());
+        imguizmoM16InOut.model = core::transpose(matrix4SIMD(model));
+        {
+            if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates
+                imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/	
+
+            transformParams.editTransformDecomposition = true;
+            sceneResolution = EditTransform(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams);
+        }
+
+        model = core::transpose(imguizmoM16InOut.model).extractSub3x4();
+    }
+
+    void MeshSampleApp::CInterface::operator()() {
+        ImGuiIO& io = ImGui::GetIO();
+        //io.ConfigDebugIsDebuggerPresent = true;
+
+        camera.setProjectionMatrix([&]()
+        {
+            matrix4SIMD projection;
+
+            if (isPerspective)
+                if (isLH)
+                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
+                else
+                    projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar);
+            else
+            {
+                float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x;
+
+                if (isLH)
+                    projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar);
+                else
+                    projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar);
+            }
+
+            return projection;
+        }());
+        
+        ImGuizmo::SetOrthographic(false);
+        ImGuizmo::BeginFrame();
+        
+
+        ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing);
+        ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing);
+
+        // create a window and insert the inspector
+        ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing);
+        ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing);
+
+        if (cameraControlSeparated) {
+            if (ImGui::Begin("camera controls", &cameraControlSeparated)) {
+                cameraControlSeparated = !ImGui::Button("Rejoin camera control");
+                DrawCameraControls();
+            }
+            ImGui::End();
+        }
+        if(ImGui::Begin("Editor")) {
+
+            if (!cameraControlSeparated) {
+                cameraControlSeparated = ImGui::Button("Separate camera controls");
+                DrawCameraControls();
+                ImGui::Separator();
+            }
+
+            ImGui::Checkbox("update guizmo", &guizmoEnabled);
+            UpdateImguizmo();
+        } //end editor window
+        ImGui::End();
+    }
diff --git a/MeshShader/src/transform.cpp b/MeshShader/src/transform.cpp
new file mode 100644
index 000000000..e69de29bb
diff --git a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp
index 9a9e5c966..d56cc953d 100644
--- a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp
+++ b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp
@@ -228,8 +228,7 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted
 			core::smart_refctd_ptr<video::IGPUGraphicsPipeline> pipelines[PipelineType::Count];
 		};
 		inline const SInitParams& getInitParams() const {return m_params;}
-
-		//
+		
 		inline bool addGeometries(const std::span<const video::IGPUPolygonGeometry* const> geometries)
 		{
 			EXPOSE_NABLA_NAMESPACES;
@@ -261,8 +260,6 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted
 				};
 				return index;
 			};
-			if (anyFailed)
-				device->getLogger()->log("Failed to allocate a UTB for some geometries, probably ran out of space in Descriptor Set!",system::ILogger::ELL_ERROR);
 
 			auto sizeToSet = m_geoms.size();
 			auto resetGeoms = core::makeRAIIExiter([&]()->void
@@ -309,6 +306,9 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted
 				out.normalView = allocateUTB(geom->getNormalView());
 			}
 
+			if (anyFailed)
+				device->getLogger()->log("Failed to allocate a UTB for some geometries, probably ran out of space in Descriptor Set!", system::ILogger::ELL_ERROR);
+
 			// no geometry
 			if (infos.empty())
 				return false;
@@ -356,7 +356,7 @@ class CSimpleDebugRenderer final : public core::IReferenceCounted
 		//
 		inline void clearGeometries(const video::ISemaphore::SWaitInfo& info)
 		{
-			// back to front to avoid O(n^2) resize
+			//why woudl oyu delete element by element instead of just deallocating all then clearing once?
 			while (!m_geoms.empty())
 				removeGeometry(m_geoms.size()-1,info);
 		}