pulp-platform · Victor-Jung · Apr 13, 2026 · Feb 12, 2026 · Feb 15, 2026 · Apr 13, 2026
@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -25,6 +25,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -24,6 +24,8 @@ jobs:
     container:
       image: ${{ inputs.docker-image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -35,6 +35,8 @@ jobs:
     container:
       image: ${{ needs.select-env.outputs.image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:
@@ -49,6 +51,8 @@ jobs:
     container:
       image: ${{ needs.select-env.outputs.image }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -23,6 +23,8 @@ jobs:
     container:
       image: ${{ github.event.inputs.docker_image_deeploy || 'ghcr.io/pulp-platform/deeploy-gap9:devel' }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -22,6 +22,8 @@ jobs:
     container:
       image: ${{ github.event.inputs.docker_image_deeploy || 'ghcr.io/pulp-platform/deeploy:devel' }}
     steps:
+      - name: Mark workspace as safe
+        run: git config --global --add safe.directory '*'
       - name: Checkout Repo
         uses: actions/checkout@v4
         with:

@@ -5,6 +5,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 
 
 ### List of Pull Requests
+- Add Microbenchmarking Infrastructure and CI Using GVSoC CSR [#162](https://github.com/pulp-platform/Deeploy/pull/162)
 - Fix CI Cache Generation [#176](https://github.com/pulp-platform/Deeploy/pull/176)
 - Fix Broken CI [#175](https://github.com/pulp-platform/Deeploy/pull/175)
 - Improve Docstring and Debugging [#160](https://github.com/pulp-platform/Deeploy/pull/160)
@@ -23,6 +24,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - Shell Format pre-commit hook
 - Add integer MaxPool1D for Generic platform and RQSConv1D support for PULPOpen, with corresponding kernel tests.
 - Added GAP9 Platform Support: Deployer, Bindings, Templates, Tiler, DMA (L3Dma/MchanDma), target library, CI workflows
+- Per-layer microbenchmarking on PULPOpen via `--profileMicrobenchmark`: new `PULPMicrobenchmark` code-transformation pass + `perf_utils.h` helpers report cycles, instructions, stalls and cache misses per layer in `RunNetwork`
 
 ### Changed
 - Use by default `devel` container for GAP9 CI

@@ -53,6 +53,7 @@ class CodeGenVerbosity:
 
     tilingProfiling: Optional[bool] = False  # Specifies if we should profile the tiling code
     untiledProfiling: Optional[bool] = None  #  Specifies if we should profile the untilied code
+    microbenchmarkProfiling: Optional[bool] = False  # Wrap each layer with PULP perf-counter microbenchmark
 
 
 _NoVerbosity = CodeGenVerbosity(None)

@@ -24,6 +24,7 @@
 from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass
 from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
 from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling
+from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPMicrobenchmark import PULPMicrobenchmark
 from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPProfileUntiled import PULPProfileUntiled
 from Deeploy.Targets.PULPOpen.DataTypes import PULPDMAFuture
 from Deeploy.Targets.PULPOpen.DMA.L3Dma import l3DmaHack
@@ -115,6 +116,7 @@
     MemoryManagementGeneration("L2"),
     MemoryManagementGeneration("L3.*"),
     MemoryManagementGeneration(),
+    PULPMicrobenchmark(),
 ])
 
 ClusterTransformer = CodeTransformation([
@@ -133,6 +135,7 @@
     MemoryManagementGeneration("L2"),
     MemoryManagementGeneration("L3.*"),
     MemoryManagementGeneration(),
+    PULPMicrobenchmark(),
 ])
 
 SimpleTransformer = CodeTransformation([

@@ -0,0 +1,42 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Tuple
+
+from Deeploy.DeeployTypes import CodeGenVerbosity, CodeTransformationPass, ExecutionBlock, NetworkContext, \
+    NodeTemplate, _NoVerbosity
+
+
+class PULPMicrobenchmark(CodeTransformationPass):
+
+    _preTemplate = NodeTemplate("""
+    perf_stats_t ${op}_perf_start, ${op}_perf_end, ${op}_perf_total;
+    if (pi_core_id() == 0) {
+        perf_bench_init();
+        perf_bench_start();
+        perf_bench_read(&${op}_perf_start);
+    }
+    """)
+
+    _postTemplate = NodeTemplate("""
+    if (pi_core_id() == 0) {
+        perf_bench_stop();
+        perf_bench_read(&${op}_perf_end);
+        perf_bench_diff(&${op}_perf_total, &${op}_perf_end, &${op}_perf_start);
+        perf_bench_print("${op}", &${op}_perf_total);
+    }
+    """)
+
+    def apply(self,
+              ctxt: NetworkContext,
+              executionBlock: ExecutionBlock,
+              name: str,
+              verbose: CodeGenVerbosity = _NoVerbosity) -> Tuple[NetworkContext, ExecutionBlock]:
+
+        if not verbose.microbenchmarkProfiling:
+            return ctxt, executionBlock
+
+        executionBlock.addLeft(self._preTemplate, {"op": name})
+        executionBlock.addRight(self._postTemplate, {"op": name})
+        return ctxt, executionBlock
@@ -248,7 +248,8 @@ class PULPStructBuffer(StructBuffer):
 
 # SCHEREMO: stdint is included before pulp_nn_kernels.h because it is supposed to be included in there, but isn't...
 _includeList = [
-    "pmsis.h", "stdint.h", "pulp_nn_kernels.h", "DeeployPULPMath.h", "mchan_siracusa.h", "dory_mem.h", "bsp/ram.h"
+    "pmsis.h", "stdint.h", "pulp_nn_kernels.h", "DeeployPULPMath.h", "mchan_siracusa.h", "dory_mem.h", "bsp/ram.h",
+    "perf_utils.h"
 ]
 
 

@@ -141,6 +141,7 @@ def generateNetwork(args):
     verbosityCfg = _NoVerbosity
     if isinstance(platform, PULPPlatform):
         verbosityCfg.untiledProfiling = args.profileUntiled
+        verbosityCfg.microbenchmarkProfiling = args.profileMicrobenchmark
 
     # Parse graph and infer output levels and signedness
     _ = deployer.prepare(verbosityCfg)
@@ -172,6 +173,11 @@ def generateNetwork(args):
                         dest = 'profileUntiled',
                         default = False,
                         help = 'Profile Untiled for L2\n')
+    parser.add_argument('--profileMicrobenchmark',
+                        action = 'store_true',
+                        dest = 'profileMicrobenchmark',
+                        default = False,
+                        help = 'Wrap each layer with PULP perf-counter microbenchmark\n')
     parser.add_argument('--input-type-map',
                         nargs = '*',
                         default = [],

@@ -202,7 +202,10 @@ def setupDeployer(graph: gs.Graph, memoryHierarchy: MemoryHierarchy, defaultTarg
                             - max: Initalize all variables at their maximal value.
                             - min: Initalize all variables at their minimal value.
                         """)
-    parser.add_argument('--profileTiling', action = "store_true")
+    parser.add_argument('--profileTiling', action = "store_true", help = 'Enable tiling profiling')
+    parser.add_argument('--profileMicrobenchmark',
+                        action = "store_true",
+                        help = 'Wrap each layer with PULP perf-counter microbenchmark instrumentation')
     parser.add_argument('--plotMemAlloc',
                         action = 'store_true',
                         help = 'Turn on plotting of the memory allocation and save it in the deeployState folder\n')
@@ -224,6 +227,9 @@ def setupDeployer(graph: gs.Graph, memoryHierarchy: MemoryHierarchy, defaultTarg
     if args.profileTiling:
         verbosityCfg.tilingProfiling = True
 
+    if args.profileMicrobenchmark:
+        verbosityCfg.microbenchmarkProfiling = True
+
     onnx_graph = onnx.load_model(f'{args.dir}/network.onnx')
     graph = gs.import_onnx(onnx_graph)
 

@@ -94,6 +94,12 @@ def __init__(self,
                           action = 'store_true',
                           default = False,
                           help = 'Enable untiled profiling (Siracusa only)\n')
+        self.add_argument('--profileMicrobenchmark',
+                          '--profile-microbenchmark',
+                          dest = 'profileMicrobenchmark',
+                          action = 'store_true',
+                          default = False,
+                          help = 'Wrap each layer with PULP perf-counter microbenchmark\n')
         self.add_argument('--toolchain',
                           metavar = '<LLVM|GCC>',
                           dest = 'toolchain',
@@ -235,6 +241,9 @@ def create_config_from_args(args: argparse.Namespace,
     if not tiling and getattr(args, 'profileUntiled', False):
         gen_args_list.append("--profileUntiled")
 
+    if getattr(args, 'profileMicrobenchmark', False):
+        gen_args_list.append("--profileMicrobenchmark")
+
     config = DeeployTestConfig(
         test_name = test_name,
         test_dir = test_dir_abs,

@@ -45,6 +45,7 @@ def create_test_config(
     mem_alloc_strategy: str = "MiniMalloc",
     search_strategy: str = "random-max",
     profile_tiling: bool = False,
+    profile_microbenchmark: bool = False,
     plot_mem_alloc: bool = False,
     randomized_mem_scheduler: bool = False,
     profile_untiled: bool = False,
@@ -86,6 +87,8 @@ def create_test_config(
             gen_args_list.append(f"--searchStrategy={search_strategy}")
         if profile_tiling:
             gen_args_list.append("--profileTiling")
+        if profile_microbenchmark:
+            gen_args_list.append("--profileMicrobenchmark")
         if plot_mem_alloc:
             gen_args_list.append("--plotMemAlloc")
         if randomized_mem_scheduler:

@@ -211,6 +211,9 @@ def __init__(self, tiling_arguments: bool, description = None):
                               action = "store_true",
                               help = 'Enable randomized memory scheduler\n')
             self.add_argument('--profileTiling', action = 'store_true', help = 'Enable tiling profiling\n')
+            self.add_argument('--profileMicrobenchmark',
+                              action = 'store_true',
+                              help = 'Wrap each layer with PULP perf-counter microbenchmark\n')
             self.add_argument('--memAllocStrategy',
                               metavar = 'memAllocStrategy',
                               dest = 'memAllocStrategy',
@@ -271,6 +274,8 @@ def generate_cmd_args(self) -> str:
                 command += " --randomizedMemoryScheduler"
             if self.args.profileTiling:
                 command += f" --profileTiling"
+            if self.args.profileMicrobenchmark:
+                command += f" --profileMicrobenchmark"
             if self.args.memAllocStrategy:
                 command += f" --memAllocStrategy={self.args.memAllocStrategy}"
             if self.args.plotMemAlloc: