From 848c58f319163904f0d98d35301dbb22bf03623d Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Wed, 1 Apr 2026 11:46:42 +0000
Subject: [PATCH 1/6] Add more CI testcases for Exynos Backend

- Support necessary stages for E2E test
- Enable test for float/quant ops and float models

Co-authored-by: chong.checn <chong.chen@samsung.com>
Co-authored-by: jhbb.cha <jhbb.cha@samsung.com>
Co-authored-by: xz.linghu <xz.linghu@samsung.com>
Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 .github/workflows/pull.yml                    |  1 +
 .../samsung/test/models/test_deeplab_v3.py    |  3 +-
 backends/samsung/test/models/test_edsr.py     |  3 +-
 .../samsung/test/models/test_inception_v3.py  |  3 +-
 .../samsung/test/models/test_inception_v4.py  | 42 +++++++++++++-
 .../samsung/test/models/test_mobilenet_v2.py  |  3 +-
 .../samsung/test/models/test_mobilenet_v3.py  |  3 +-
 backends/samsung/test/models/test_resnet18.py |  3 +-
 backends/samsung/test/models/test_resnet50.py |  3 +-
 .../test/models/test_torchvision_vit.py       |  3 +-
 .../samsung/test/models/test_wav2letter.py    |  3 +-
 backends/samsung/test/ops/test_add.py         | 28 ++++++++-
 backends/samsung/test/ops/test_avg_pool2d.py  | 32 +++++++++-
 backends/samsung/test/ops/test_batch_norm.py  |  3 +-
 backends/samsung/test/ops/test_bmm.py         |  3 +-
 backends/samsung/test/ops/test_cat.py         |  3 +-
 backends/samsung/test/ops/test_clamp.py       | 22 ++++++-
 .../samsung/test/ops/test_constant_pad_nd.py  |  3 +-
 backends/samsung/test/ops/test_conv2d.py      | 39 ++++++++++++-
 backends/samsung/test/ops/test_div.py         | 28 ++++++++-
 backends/samsung/test/ops/test_embedding.py   |  3 +-
 backends/samsung/test/ops/test_expand_copy.py |  3 +-
 backends/samsung/test/ops/test_gelu.py        |  3 +-
 backends/samsung/test/ops/test_leaky_relu.py  |  3 +-
 backends/samsung/test/ops/test_linear.py      |  3 +-
 backends/samsung/test/ops/test_log_softmax.py |  3 +-
 backends/samsung/test/ops/test_max_pool2d.py  | 44 +++++++++++++-
 backends/samsung/test/ops/test_mean_dim.py    | 28 ++++++++-
 backends/samsung/test/ops/test_minimum.py     |  3 +-
 backends/samsung/test/ops/test_mul.py         | 32 +++++++++-
 backends/samsung/test/ops/test_permute.py     |  3 +-
 .../samsung/test/ops/test_pixel_shuffle.py    |  3 +-
 backends/samsung/test/ops/test_relu.py        | 28 ++++++++-
 backends/samsung/test/ops/test_reshape.py     |  3 +-
 backends/samsung/test/ops/test_rsqrt.py       |  3 +-
 backends/samsung/test/ops/test_select.py      |  3 +-
 backends/samsung/test/ops/test_slice_copy.py  |  3 +-
 backends/samsung/test/ops/test_softmax.py     |  3 +-
 backends/samsung/test/ops/test_sqrt.py        |  3 +-
 backends/samsung/test/ops/test_squeeze.py     |  3 +-
 backends/samsung/test/ops/test_sub.py         |  3 +-
 backends/samsung/test/ops/test_to_copy.py     |  3 +-
 backends/samsung/test/ops/test_unsqueeze.py   |  3 +-
 .../test/ops/test_upsample_bilinear2d.py      | 26 ++++++++-
 .../test/ops/test_upsample_nearest2d.py       |  3 +-
 .../samsung/test/tester/samsung_tester.py     | 40 ++++++++++++-
 backends/samsung/test/utils/run_tests.py      | 58 +++++++++++++++++++
 backends/samsung/test/utils/utils.py          | 11 ++++
 48 files changed, 512 insertions(+), 46 deletions(-)
 create mode 100644 backends/samsung/test/utils/run_tests.py
 create mode 100644 backends/samsung/test/utils/utils.py

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 8a5b2f4805a..ca59156108b 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -1247,6 +1247,7 @@ jobs:
         fi
 
         # Test models
+        #python -m executorch.backends.samsung.test.utils.run_tests --chipset E9955
         python -m unittest discover -s backends/samsung/test/models -p "test_*.py"
 
   test-vulkan-models-linux:
diff --git a/backends/samsung/test/models/test_deeplab_v3.py b/backends/samsung/test/models/test_deeplab_v3.py
index a2b3fcb93a0..cd6a6527980 100644
--- a/backends/samsung/test/models/test_deeplab_v3.py
+++ b/backends/samsung/test/models/test_deeplab_v3.py
@@ -10,6 +10,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.deeplab_v3 import DeepLabV3ResNet50Model
 
 
@@ -18,7 +19,7 @@ def test_dl3_fp16(self):
         model = DeepLabV3ResNet50Model().get_eager_model()
         example_input = DeepLabV3ResNet50Model().get_example_inputs()
         tester = SamsungTester(
-            model, example_input, [gen_samsung_backend_compile_spec("E9955")]
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/models/test_edsr.py b/backends/samsung/test/models/test_edsr.py
index 326296fc55a..e69d5cc459c 100644
--- a/backends/samsung/test/models/test_edsr.py
+++ b/backends/samsung/test/models/test_edsr.py
@@ -12,6 +12,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.edsr import EdsrModel
 
 
@@ -20,7 +21,7 @@ def test_edsr_fp16(self):
         model = EdsrModel().get_eager_model()
         example_input = EdsrModel().get_example_inputs()
         tester = SamsungTester(
-            model, example_input, [gen_samsung_backend_compile_spec("E9955")]
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/models/test_inception_v3.py b/backends/samsung/test/models/test_inception_v3.py
index ef3a94c2a62..faeea4ab4a1 100644
--- a/backends/samsung/test/models/test_inception_v3.py
+++ b/backends/samsung/test/models/test_inception_v3.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.inception_v3 import InceptionV3Model
 
 
@@ -23,7 +24,7 @@ def test_inception_v3_fp16(self):
         model = InceptionV3Model().get_eager_model()
         example_input = InceptionV3Model().get_example_inputs()
         tester = SamsungTester(
-            model, example_input, [gen_samsung_backend_compile_spec("E9955")]
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/models/test_inception_v4.py b/backends/samsung/test/models/test_inception_v4.py
index 53bd209d5d2..2998fd894db 100644
--- a/backends/samsung/test/models/test_inception_v4.py
+++ b/backends/samsung/test/models/test_inception_v4.py
@@ -5,22 +5,60 @@
 # except in compliance with the License. See the license file in the root
 # directory of this source tree for more details.
 
-
+import logging
+import os
 import unittest
 
 from executorch.backends.samsung.serialization.compile_options import (
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.inception_v4 import InceptionV4Model
 
 
+def patch_iv4(weight_path: str):
+    assert os.path.isfile(weight_path), "Can not found weight path for iv4"
+    from safetensors import safe_open
+    from timm.models import inception_v4
+
+    def _monkeypatch_get_eager_model(self):
+        tensors = {}
+        with safe_open(weight_path, framework="pt") as st:
+            for k in st.keys():
+                tensors[k] = st.get_tensor(k)
+        logging.info("Loading inception_v4 model")
+        m = inception_v4(pretrained=True, pretrained_cfg={"state_dict": tensors})
+        logging.info("Loaded inception_v4 model")
+        return m
+
+    old_func = InceptionV4Model.get_eager_model
+    InceptionV4Model.get_eager_model = _monkeypatch_get_eager_model
+    return old_func
+
+
+def recover_iv4(old_func):
+    InceptionV4Model.get_eager_model = old_func
+
+
 class TestMilestoneInceptionV4(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        assert (model_cache_dir := os.getenv("MODEL_CACHE")), "MODEL_CACHE not set!"
+        weight_path = os.path.join(
+            model_cache_dir, os.path.join(model_cache_dir, "iv4/model.safetensors")
+        )
+        cls._old_func = patch_iv4(weight_path)
+
+    @classmethod
+    def tearDownClass(cls):
+        recover_iv4(cls._old_func)
+
     def test_inception_v4_fp16(self):
         model = InceptionV4Model().get_eager_model()
         example_input = InceptionV4Model().get_example_inputs()
         tester = SamsungTester(
-            model, example_input, [gen_samsung_backend_compile_spec("E9955")]
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/models/test_mobilenet_v2.py b/backends/samsung/test/models/test_mobilenet_v2.py
index 86805e5cbc2..51512be57ee 100644
--- a/backends/samsung/test/models/test_mobilenet_v2.py
+++ b/backends/samsung/test/models/test_mobilenet_v2.py
@@ -10,6 +10,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.mobilenet_v2 import MV2Model
 
 
@@ -18,7 +19,7 @@ def test_mv2_fp16(self):
         model = MV2Model().get_eager_model()
         example_input = MV2Model().get_example_inputs()
         tester = SamsungTester(
-            model, example_input, [gen_samsung_backend_compile_spec("E9955")]
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/models/test_mobilenet_v3.py b/backends/samsung/test/models/test_mobilenet_v3.py
index 669cca1db12..fbfc4716b73 100644
--- a/backends/samsung/test/models/test_mobilenet_v3.py
+++ b/backends/samsung/test/models/test_mobilenet_v3.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.mobilenet_v3 import MV3Model
 
 
@@ -23,7 +24,7 @@ def test_mv3_fp16(self):
         model = MV3Model().get_eager_model()
         example_input = MV3Model().get_example_inputs()
         tester = SamsungTester(
-            model, example_input, [gen_samsung_backend_compile_spec("E9955")]
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/models/test_resnet18.py b/backends/samsung/test/models/test_resnet18.py
index 429218649b8..b2d14d42303 100644
--- a/backends/samsung/test/models/test_resnet18.py
+++ b/backends/samsung/test/models/test_resnet18.py
@@ -12,6 +12,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.resnet import ResNet18Model
 
 
@@ -20,7 +21,7 @@ def test_resnet18_fp16(self):
         model = ResNet18Model().get_eager_model()
         example_input = ResNet18Model().get_example_inputs()
         tester = SamsungTester(
-            model, example_input, [gen_samsung_backend_compile_spec("E9955")]
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/models/test_resnet50.py b/backends/samsung/test/models/test_resnet50.py
index 0c6b32526b1..00d33fe79ea 100644
--- a/backends/samsung/test/models/test_resnet50.py
+++ b/backends/samsung/test/models/test_resnet50.py
@@ -12,6 +12,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.resnet import ResNet50Model
 
 
@@ -20,7 +21,7 @@ def test_resnet50_fp16(self):
         model = ResNet50Model().get_eager_model()
         example_input = ResNet50Model().get_example_inputs()
         tester = SamsungTester(
-            model, example_input, [gen_samsung_backend_compile_spec("E9955")]
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/models/test_torchvision_vit.py b/backends/samsung/test/models/test_torchvision_vit.py
index 7cdb4cabada..a32dd6ac9ac 100644
--- a/backends/samsung/test/models/test_torchvision_vit.py
+++ b/backends/samsung/test/models/test_torchvision_vit.py
@@ -11,6 +11,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.torchvision_vit import TorchVisionViTModel
 
 
@@ -20,7 +21,7 @@ def test_torchvision_vit_fp16(self):
         model = TorchVisionViTModel().get_eager_model()
         example_input = TorchVisionViTModel().get_example_inputs()
         tester = SamsungTester(
-            model, example_input, [gen_samsung_backend_compile_spec("E9955")]
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/models/test_wav2letter.py b/backends/samsung/test/models/test_wav2letter.py
index 4d016763b2b..569e3decfec 100644
--- a/backends/samsung/test/models/test_wav2letter.py
+++ b/backends/samsung/test/models/test_wav2letter.py
@@ -10,6 +10,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.wav2letter import Wav2LetterModel
 
 
@@ -18,7 +19,7 @@ def test_w2l_fp16(self):
         model = Wav2LetterModel().get_eager_model()
         example_input = Wav2LetterModel().get_example_inputs()
         tester = SamsungTester(
-            model, example_input, [gen_samsung_backend_compile_spec("E9955")]
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_add.py b/backends/samsung/test/ops/test_add.py
index 58e49f7bb10..f0e51888ca5 100644
--- a/backends/samsung/test/ops/test_add.py
+++ b/backends/samsung/test/ops/test_add.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Add(torch.nn.Module):
@@ -38,7 +39,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
@@ -50,6 +51,23 @@ def _test(self, module: torch.nn.Module, inputs):
             .run_method_and_compare_outputs(inputs=inputs)
         )
 
+    def _test_a8w8(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
+        )
+        (
+            tester.quantize()
+            .export()
+            .check_count({"torch.ops.aten.add.Tensor": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_add_Tensor"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=inputs, atol=0.2)
+        )
+
     def test_fp32_simple_add(self):
         inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8))
         self._test(Add(), inputs)
@@ -61,3 +79,11 @@ def test_fp32_const_add(self):
     def test_fp32_add_broadcast(self):
         inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8))
         self._test(Add(), inputs)
+
+    def test_a8w8_simple_add(self):
+        inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8))
+        self._test_a8w8(Add(), inputs)
+
+    def test_a8w8_const_add(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test_a8w8(AddConstant(torch.randn(1, 3, 8, 8)), inputs)
diff --git a/backends/samsung/test/ops/test_avg_pool2d.py b/backends/samsung/test/ops/test_avg_pool2d.py
index e00f49a47fd..8aecb445be6 100644
--- a/backends/samsung/test/ops/test_avg_pool2d.py
+++ b/backends/samsung/test/ops/test_avg_pool2d.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class AvgPool2d(torch.nn.Module):
@@ -41,7 +42,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
@@ -53,6 +54,23 @@ def _test(self, module: torch.nn.Module, inputs):
             .run_method_and_compare_outputs(inputs=inputs)
         )
 
+    def _test_a8w8(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
+        )
+        (
+            tester.quantize()
+            .export()
+            .check_count({"torch.ops.aten.avg_pool2d.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_avg_pool2d_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=inputs, atol=0.2)
+        )
+
     def test_fp32_avg_pool2d(self):
         inputs = (torch.randn(1, 16, 24, 24),)
         self._test(AvgPool2d(), inputs)
@@ -64,3 +82,15 @@ def test_fp32_avg_pool2d_with_stride(self):
     def test_fp32_avg_pool2d_with_kernel_size(self):
         inputs = (torch.randn(1, 16, 24, 24),)
         self._test(AvgPool2d(kernel_size=4), inputs)
+
+    def test_a8w8_avg_pool2d(self):
+        inputs = (torch.randn(1, 16, 24, 24),)
+        self._test_a8w8(AvgPool2d(), inputs)
+
+    def test_a8w8_avg_pool2d_with_stride(self):
+        inputs = (torch.randn(1, 16, 24, 24),)
+        self._test_a8w8(AvgPool2d(stride=1), inputs)
+
+    def test_a8w8_avg_pool2d_with_kernel_size(self):
+        inputs = (torch.randn(1, 16, 24, 24),)
+        self._test_a8w8(AvgPool2d(kernel_size=4), inputs)
diff --git a/backends/samsung/test/ops/test_batch_norm.py b/backends/samsung/test/ops/test_batch_norm.py
index 3c73f6d993a..db1b9761e51 100644
--- a/backends/samsung/test/ops/test_batch_norm.py
+++ b/backends/samsung/test/ops/test_batch_norm.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class BatchNorm(torch.nn.Module):
@@ -31,7 +32,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestBatchNorm(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+            module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_bmm.py b/backends/samsung/test/ops/test_bmm.py
index f927b051603..b4c32a5ace0 100644
--- a/backends/samsung/test/ops/test_bmm.py
+++ b/backends/samsung/test/ops/test_bmm.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class BatchMatmul(torch.nn.Module):
@@ -34,7 +35,7 @@ def _test(self, module: torch.nn.Module):
         torch.manual_seed(8)
         inputs = module.get_example_inputs()
         tester = SamsungTester(
-            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+            module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_cat.py b/backends/samsung/test/ops/test_cat.py
index a2d42370da5..9690a5dacc3 100644
--- a/backends/samsung/test/ops/test_cat.py
+++ b/backends/samsung/test/ops/test_cat.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Concat(torch.nn.Module):
@@ -28,7 +29,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 class TestConcat(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+            module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_clamp.py b/backends/samsung/test/ops/test_clamp.py
index 3c1ac40539b..8ce7f46b3ce 100644
--- a/backends/samsung/test/ops/test_clamp.py
+++ b/backends/samsung/test/ops/test_clamp.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Clamp(torch.nn.Module):
@@ -33,7 +34,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestClamp(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+            module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
@@ -45,6 +46,25 @@ def _test(self, module: torch.nn.Module, inputs):
             .run_method_and_compare_outputs(inputs=inputs)
         )
 
+    def _test_a8w8(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+        (
+            tester.quantize()
+            .export()
+            .check_count({"torch.ops.aten.clamp.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=inputs, atol=0.2)
+        )
+
     def test_fp32_clamp(self):
         inputs = (torch.randn(1, 16, 8, 8),)
         self._test(Clamp(minimum=0, maximum=2.0), inputs)
+
+    def test_a8w8_clamp(self):
+        inputs = (torch.randn(1, 16, 8, 8),)
+        self._test_a8w8(Clamp(minimum=0, maximum=2.0), inputs)
diff --git a/backends/samsung/test/ops/test_constant_pad_nd.py b/backends/samsung/test/ops/test_constant_pad_nd.py
index 5c6c6e4376c..192e3ac1a7b 100644
--- a/backends/samsung/test/ops/test_constant_pad_nd.py
+++ b/backends/samsung/test/ops/test_constant_pad_nd.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class ConstantPadND(torch.nn.Module):
@@ -28,7 +29,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestConstantPadND(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+            module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_conv2d.py b/backends/samsung/test/ops/test_conv2d.py
index 39c2b2508e6..f23eed52aa9 100644
--- a/backends/samsung/test/ops/test_conv2d.py
+++ b/backends/samsung/test/ops/test_conv2d.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Conv2d(torch.nn.Module):
@@ -67,7 +68,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
@@ -78,6 +79,22 @@ def _test(self, module: torch.nn.Module, inputs):
             .run_method_and_compare_outputs(inputs=inputs)
         )
 
+    def _test_a8w8(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
+        )
+        (
+            tester.quantize()
+            .export()
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_convolution_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=inputs, atol=0.2)
+        )
+
     def test_fp32_conv2d_without_bias(self):
         inputs = (torch.randn(1, 3, 24, 24),)
         self._test(Conv2d(bias=False), inputs)
@@ -93,3 +110,23 @@ def test_fp32_depthwise_conv2d(self):
     def test_fp32_transpose_conv2d(self):
         inputs = (torch.randn(1, 32, 24, 24),)
         self._test(TransposeConv2d(), inputs)
+
+    def test_fp32_conv2d_with_dilation(self):
+        inputs = (torch.randn(1, 3, 24, 24),)
+        self._test(Conv2d(dilation=(2, 2)), inputs)
+
+    def test_a8w8_conv2d_without_bias(self):
+        inputs = (torch.randn(1, 3, 24, 24),)
+        self._test_a8w8(Conv2d(bias=False), inputs)
+
+    def test_a8w8_conv2d_with_bias(self):
+        inputs = (torch.randn(1, 3, 24, 24),)
+        self._test_a8w8(Conv2d(bias=True), inputs)
+
+    def test_a8w8_depthwise_conv2d(self):
+        inputs = (torch.randn(1, 8, 24, 24),)
+        self._test_a8w8(Conv2d(in_channels=8, out_channels=8, groups=8), inputs)
+
+    def test_a8w8_conv2d_with_dilation(self):
+        inputs = (torch.randn(1, 3, 24, 24),)
+        self._test_a8w8(Conv2d(dilation=(2, 2)), inputs)
diff --git a/backends/samsung/test/ops/test_div.py b/backends/samsung/test/ops/test_div.py
index 5a27531a96e..b491b1167a1 100644
--- a/backends/samsung/test/ops/test_div.py
+++ b/backends/samsung/test/ops/test_div.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Div(torch.nn.Module):
@@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
@@ -41,6 +42,23 @@ def _test(self, module: torch.nn.Module, inputs):
             .run_method_and_compare_outputs(inputs=inputs)
         )
 
+    def _test_a8w8(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
+        )
+        (
+            tester.quantize()
+            .export()
+            .check_count({"torch.ops.aten.div.Tensor": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_div_Tensor"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=inputs, atol=0.2)
+        )
+
     def test_fp32_simple_div(self):
         inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8).abs() + 1e-3)
         self._test(Div(), inputs)
@@ -48,3 +66,11 @@ def test_fp32_simple_div(self):
     def test_fp32_div_broadcast(self):
         inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8).abs() + 1e-3)
         self._test(Div(), inputs)
+
+    def test_a8w8_simple_div(self):
+        inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8).abs() + 1e-3)
+        self._test_a8w8(Div(), inputs)
+
+    def test_a8w8_div_broadcast(self):
+        inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8).abs() + 1e-3)
+        self._test_a8w8(Div(), inputs)
diff --git a/backends/samsung/test/ops/test_embedding.py b/backends/samsung/test/ops/test_embedding.py
index ca3899d4c24..17e5fe6bf98 100644
--- a/backends/samsung/test/ops/test_embedding.py
+++ b/backends/samsung/test/ops/test_embedding.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Embedding(torch.nn.Module):
@@ -28,7 +29,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class TestEmbedding(unittest.TestCase):
     def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
-            module, inputs, [gen_samsung_backend_compile_spec("E9955")]
+            module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_expand_copy.py b/backends/samsung/test/ops/test_expand_copy.py
index de0f36e03d0..ba4067e14d6 100644
--- a/backends/samsung/test/ops/test_expand_copy.py
+++ b/backends/samsung/test/ops/test_expand_copy.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class ExpandCopy(torch.nn.Module):
@@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_gelu.py b/backends/samsung/test/ops/test_gelu.py
index 20f93559fda..5b95c9d00c8 100644
--- a/backends/samsung/test/ops/test_gelu.py
+++ b/backends/samsung/test/ops/test_gelu.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class GELU(torch.nn.Module):
@@ -44,7 +45,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_leaky_relu.py b/backends/samsung/test/ops/test_leaky_relu.py
index 4ad510528f9..edcd2aed62e 100644
--- a/backends/samsung/test/ops/test_leaky_relu.py
+++ b/backends/samsung/test/ops/test_leaky_relu.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class LeakyReLU(torch.nn.Module):
@@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_linear.py b/backends/samsung/test/ops/test_linear.py
index ce1f13d1a1f..e4e24075beb 100644
--- a/backends/samsung/test/ops/test_linear.py
+++ b/backends/samsung/test/ops/test_linear.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Linear(torch.nn.Module):
@@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_log_softmax.py b/backends/samsung/test/ops/test_log_softmax.py
index 2aeb600e977..2f0d1ba1ed9 100644
--- a/backends/samsung/test/ops/test_log_softmax.py
+++ b/backends/samsung/test/ops/test_log_softmax.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class LogSoftmax(torch.nn.Module):
@@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_max_pool2d.py b/backends/samsung/test/ops/test_max_pool2d.py
index d944c38a678..8057ffb5fe3 100644
--- a/backends/samsung/test/ops/test_max_pool2d.py
+++ b/backends/samsung/test/ops/test_max_pool2d.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class MaxPool2d(torch.nn.Module):
@@ -43,7 +44,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
@@ -55,6 +56,23 @@ def _test(self, module: torch.nn.Module, inputs):
             .run_method_and_compare_outputs(inputs=inputs)
         )
 
+    def _test_a8w8(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
+        )
+        (
+            tester.quantize()
+            .export()
+            .check_count({"torch.ops.aten.max_pool2d.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_max_pool2d_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=inputs, atol=0.2)
+        )
+
     def test_fp32_max_pool2d(self):
         inputs = (torch.randn(1, 16, 24, 24),)
         self._test(MaxPool2d(), inputs)
@@ -70,3 +88,27 @@ def test_fp32_max_pool2d_with_kernel_size(self):
     def test_fp32_max_pool2d_with_dilation(self):
         inputs = (torch.randn(1, 16, 24, 24),)
         self._test(MaxPool2d(dilation=2), inputs)
+
+    def test_fp32_max_pool2d_with_stride(self):
+        inputs = (torch.randn(1, 16, 24, 24),)
+        self._test(MaxPool2d(stride=1), inputs)
+
+    def test_a8w8_max_pool2d(self):
+        inputs = (torch.randn(1, 16, 24, 24),)
+        self._test_a8w8(MaxPool2d(), inputs)
+
+    def test_a8w8_max_pool2d_with_padding(self):
+        inputs = (torch.randn(1, 16, 24, 24),)
+        self._test_a8w8(MaxPool2d(padding=1), inputs)
+
+    def test_a8w8_max_pool2d_with_kernel_size(self):
+        inputs = (torch.randn(1, 16, 24, 24),)
+        self._test_a8w8(MaxPool2d(kernel_size=4), inputs)
+
+    def test_a8w8_max_pool2d_with_dilation(self):
+        inputs = (torch.randn(1, 16, 24, 24),)
+        self._test_a8w8(MaxPool2d(dilation=2), inputs)
+
+    def test_a8w8_max_pool2d_with_stride(self):
+        inputs = (torch.randn(1, 16, 24, 24),)
+        self._test_a8w8(MaxPool2d(stride=1), inputs)
diff --git a/backends/samsung/test/ops/test_mean_dim.py b/backends/samsung/test/ops/test_mean_dim.py
index 5c6378000bd..6f4166f67a4 100644
--- a/backends/samsung/test/ops/test_mean_dim.py
+++ b/backends/samsung/test/ops/test_mean_dim.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class MeanDim(torch.nn.Module):
@@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
@@ -42,6 +43,23 @@ def _test(self, module: torch.nn.Module, inputs):
             .run_method_and_compare_outputs(inputs=inputs)
         )
 
+    def _test_a8w8(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
+        )
+        (
+            tester.quantize()
+            .export()
+            .check_count({"torch.ops.aten.mean.dim": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_mean_dim"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=inputs, atol=0.2)
+        )
+
     def test_fp32_mean_with_keep_dims(self):
         inputs = (torch.randn(1, 3, 8, 8),)
         self._test(MeanDim(), inputs)
@@ -49,3 +67,11 @@ def test_fp32_mean_with_keep_dims(self):
     def test_fp32_mean_without_keep_dims(self):
         inputs = (torch.randn(1, 3, 8, 8),)
         self._test(MeanDim(keep_dims=False), inputs)
+
+    def test_a8w8_mean_with_keep_dims(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test_a8w8(MeanDim(), inputs)
+
+    def test_a8w8_mean_without_keep_dims(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test_a8w8(MeanDim(keep_dims=False), inputs)
diff --git a/backends/samsung/test/ops/test_minimum.py b/backends/samsung/test/ops/test_minimum.py
index e82b2e0c428..43c8a32727c 100644
--- a/backends/samsung/test/ops/test_minimum.py
+++ b/backends/samsung/test/ops/test_minimum.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Minimum(torch.nn.Module):
@@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_mul.py b/backends/samsung/test/ops/test_mul.py
index 0f77a5e8f55..3db6003b482 100644
--- a/backends/samsung/test/ops/test_mul.py
+++ b/backends/samsung/test/ops/test_mul.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Mul(torch.nn.Module):
@@ -38,7 +39,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
@@ -50,6 +51,23 @@ def _test(self, module: torch.nn.Module, inputs):
             .run_method_and_compare_outputs(inputs=inputs)
         )
 
+    def _test_a8w8(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
+        )
+        (
+            tester.quantize()
+            .export()
+            .check_count({"torch.ops.aten.mul.Tensor": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_mul_Tensor"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=inputs, atol=0.2)
+        )
+
     def test_fp32_simple_mul(self):
         inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8))
         self._test(Mul(), inputs)
@@ -61,3 +79,15 @@ def test_fp32_const_mul(self):
     def test_fp32_mul_broadcast(self):
         inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8))
         self._test(Mul(), inputs)
+
+    def test_a8w8_simple_mul(self):
+        inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8))
+        self._test_a8w8(Mul(), inputs)
+
+    def test_a8w8_const_mul(self):
+        inputs = (torch.randn(1, 3, 8, 8),)
+        self._test_a8w8(MulConstant(torch.randn(1, 3, 8, 8)), inputs)
+
+    def test_a8w8_mul_broadcast(self):
+        inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8))
+        self._test_a8w8(Mul(), inputs)
diff --git a/backends/samsung/test/ops/test_permute.py b/backends/samsung/test/ops/test_permute.py
index e0052c3ec37..7a289b9ef06 100644
--- a/backends/samsung/test/ops/test_permute.py
+++ b/backends/samsung/test/ops/test_permute.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Permute(torch.nn.Module):
@@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_pixel_shuffle.py b/backends/samsung/test/ops/test_pixel_shuffle.py
index f7d86e5b1a9..18672a491d2 100644
--- a/backends/samsung/test/ops/test_pixel_shuffle.py
+++ b/backends/samsung/test/ops/test_pixel_shuffle.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class PixelShuffle(torch.nn.Module):
@@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_relu.py b/backends/samsung/test/ops/test_relu.py
index 20da52cb10f..404064b9a51 100644
--- a/backends/samsung/test/ops/test_relu.py
+++ b/backends/samsung/test/ops/test_relu.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class ReLU(torch.nn.Module):
@@ -44,7 +45,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
@@ -56,6 +57,23 @@ def _test(self, module: torch.nn.Module, inputs):
             .run_method_and_compare_outputs(inputs=inputs)
         )
 
+    def _test_a8w8(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
+        )
+        (
+            tester.quantize()
+            .export()
+            .check_count({"torch.ops.aten.relu.default": 1})
+            .to_edge_transform_and_lower()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_relu_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=inputs, atol=0.2)
+        )
+
     def test_fp32_single_relu(self):
         inputs = (torch.randn(1, 3, 56, 56),)
         self._test(ReLU(with_conv=False), inputs)
@@ -63,3 +81,11 @@ def test_fp32_single_relu(self):
     def test_fp32_conv_relu(self):
         inputs = (torch.randn(1, 3, 56, 56),)
         self._test(ReLU(with_conv=True), inputs)
+
+    def test_a8w8_single_relu(self):
+        inputs = (torch.randn(1, 3, 56, 56),)
+        self._test_a8w8(ReLU(with_conv=False), inputs)
+
+    def test_a8w8_conv_relu(self):
+        inputs = (torch.randn(1, 3, 56, 56),)
+        self._test_a8w8(ReLU(with_conv=True), inputs)
diff --git a/backends/samsung/test/ops/test_reshape.py b/backends/samsung/test/ops/test_reshape.py
index 148186fb997..d3477f354fc 100644
--- a/backends/samsung/test/ops/test_reshape.py
+++ b/backends/samsung/test/ops/test_reshape.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Reshape(torch.nn.Module):
@@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_rsqrt.py b/backends/samsung/test/ops/test_rsqrt.py
index 9cab9456d64..216e1ed6e0e 100644
--- a/backends/samsung/test/ops/test_rsqrt.py
+++ b/backends/samsung/test/ops/test_rsqrt.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Rsqrt(torch.nn.Module):
@@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_select.py b/backends/samsung/test/ops/test_select.py
index 3d619f37a0f..9909f7653b7 100644
--- a/backends/samsung/test/ops/test_select.py
+++ b/backends/samsung/test/ops/test_select.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class SelectCopy(torch.nn.Module):
@@ -31,7 +32,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_slice_copy.py b/backends/samsung/test/ops/test_slice_copy.py
index 4b3a100f927..17868fac327 100644
--- a/backends/samsung/test/ops/test_slice_copy.py
+++ b/backends/samsung/test/ops/test_slice_copy.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class SliceCopy(torch.nn.Module):
@@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_softmax.py b/backends/samsung/test/ops/test_softmax.py
index 8721df588d1..9c0887f6b66 100644
--- a/backends/samsung/test/ops/test_softmax.py
+++ b/backends/samsung/test/ops/test_softmax.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Softmax(torch.nn.Module):
@@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_sqrt.py b/backends/samsung/test/ops/test_sqrt.py
index 1ed31277dc3..1d3a584c808 100644
--- a/backends/samsung/test/ops/test_sqrt.py
+++ b/backends/samsung/test/ops/test_sqrt.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Sqrt(torch.nn.Module):
@@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_squeeze.py b/backends/samsung/test/ops/test_squeeze.py
index 329053adc8c..a69dc92a067 100644
--- a/backends/samsung/test/ops/test_squeeze.py
+++ b/backends/samsung/test/ops/test_squeeze.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Squeeze(torch.nn.Module):
@@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_sub.py b/backends/samsung/test/ops/test_sub.py
index aea428b34b8..89e90fadc86 100644
--- a/backends/samsung/test/ops/test_sub.py
+++ b/backends/samsung/test/ops/test_sub.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class Sub(torch.nn.Module):
@@ -38,7 +39,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_to_copy.py b/backends/samsung/test/ops/test_to_copy.py
index 002e85801fe..d867ed02655 100644
--- a/backends/samsung/test/ops/test_to_copy.py
+++ b/backends/samsung/test/ops/test_to_copy.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class ToCopy(torch.nn.Module):
@@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_unsqueeze.py b/backends/samsung/test/ops/test_unsqueeze.py
index e10745fa839..125b085d155 100644
--- a/backends/samsung/test/ops/test_unsqueeze.py
+++ b/backends/samsung/test/ops/test_unsqueeze.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class UnSqueeze(torch.nn.Module):
@@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/ops/test_upsample_bilinear2d.py b/backends/samsung/test/ops/test_upsample_bilinear2d.py
index 7bdf3ab4041..1131ac9774e 100644
--- a/backends/samsung/test/ops/test_upsample_bilinear2d.py
+++ b/backends/samsung/test/ops/test_upsample_bilinear2d.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class UpsampleBilinear2d(torch.nn.Module):
@@ -35,7 +36,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
@@ -49,6 +50,29 @@ def _test(self, module: torch.nn.Module, inputs):
             .run_method_and_compare_outputs(inputs=inputs)
         )
 
+    def _test_a8w8(self, module: torch.nn.Module, inputs):
+        tester = SamsungTester(
+            module,
+            inputs,
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
+        )
+        (
+            tester.quantize()
+            .export()
+            .check_count({"torch.ops.aten.upsample_bilinear2d.vec": 1})
+            .to_edge_transform_and_lower()
+            .check_not(
+                ["executorch_exir_dialects_edge__ops_aten_upsample_bilinear2d_vec"]
+            )
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=inputs, atol=0.7, rtol=0.1)
+        )
+
     def test_fp32_upsample_bilinear2d(self):
         inputs = (torch.randn(1, 16, 16, 16),)
         self._test(UpsampleBilinear2d(), inputs)
+
+    def test_a8w8_upsample_bilinear2d(self):
+        inputs = (torch.randn(1, 16, 16, 16),)
+        self._test_a8w8(UpsampleBilinear2d(), inputs)
diff --git a/backends/samsung/test/ops/test_upsample_nearest2d.py b/backends/samsung/test/ops/test_upsample_nearest2d.py
index bbdff40a0e9..d59c907e84b 100644
--- a/backends/samsung/test/ops/test_upsample_nearest2d.py
+++ b/backends/samsung/test/ops/test_upsample_nearest2d.py
@@ -14,6 +14,7 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
 
 
 class UpsampleNearest2d(torch.nn.Module):
@@ -34,7 +35,7 @@ def _test(self, module: torch.nn.Module, inputs):
         tester = SamsungTester(
             module,
             inputs,
-            [gen_samsung_backend_compile_spec("E9955")],
+            [gen_samsung_backend_compile_spec(TestConfig.chipset)],
         )
         (
             tester.export()
diff --git a/backends/samsung/test/tester/samsung_tester.py b/backends/samsung/test/tester/samsung_tester.py
index f33d508dfca..7f1f65f0d6c 100644
--- a/backends/samsung/test/tester/samsung_tester.py
+++ b/backends/samsung/test/tester/samsung_tester.py
@@ -18,11 +18,20 @@
 )
 from executorch.backends.test.harness import Tester as TesterBase
 from executorch.backends.test.harness.stages import StageType
+from executorch.backends.transforms.decompose_sdpa import (
+    DecomposeScaledDotProductAttention,
+)
 from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
 from executorch.exir.backend.backend_details import CompileSpec
 
 from executorch.exir.pass_manager import PassType
-from torch.export import ExportedProgram
+from torch.export import export, ExportedProgram
+
+from torchao.quantization.pt2e.quantize_pt2e import (
+    convert_pt2e,
+    prepare_pt2e,
+    prepare_qat_pt2e,
+)
 
 from torchao.quantization.pt2e.quantizer import Quantizer
 
@@ -48,6 +57,35 @@ def __init__(
             is_qat=is_qat,
         )
 
+    def run(
+        self, artifact: torch.nn.Module, inputs: Optional[Tuple[torch.Tensor]]
+    ) -> None:
+        assert inputs is not None
+        if self.is_qat:
+            artifact.train()
+        captured_graph = export(artifact, inputs, strict=True).module()
+
+        assert isinstance(captured_graph, torch.fx.GraphModule)
+
+        DecomposeScaledDotProductAttention()(captured_graph)
+
+        if self.is_qat:
+            prepared = prepare_qat_pt2e(captured_graph, self.quantizer)
+        else:
+            prepared = prepare_pt2e(captured_graph, self.quantizer)
+
+        if self.calibrate:
+            # Calibrate prepared model to provide data to quantization observers.
+            if self.calibration_samples is not None:
+                for inp in self.calibration_samples:
+                    prepared(*inp)
+            else:
+                prepared(*inputs)
+
+        converted = convert_pt2e(prepared, fold_quantize=False)
+
+        self.converted_graph = converted
+
 
 class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower):
     def __init__(
diff --git a/backends/samsung/test/utils/run_tests.py b/backends/samsung/test/utils/run_tests.py
new file mode 100644
index 00000000000..4dd1c0b021a
--- /dev/null
+++ b/backends/samsung/test/utils/run_tests.py
@@ -0,0 +1,58 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+import argparse
+import os
+import sys
+import unittest
+
+from executorch.backends.samsung.test.utils.utils import TestConfig
+
+
+TESTS_SEARCH_DIRS = ["ops", "models"]
+current_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+def setup_env_with_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--chipset",
+        default="E9955",
+        help="Samsung chipset, i.e. E9955, E9965, etc",
+        type=str,
+    )
+    parser.add_argument(
+        "--host",
+        help="Host ip address with device connecting",
+        type=str,
+    )
+    args = parser.parse_args()
+
+    TestConfig.host_ip = args.host
+    TestConfig.chipset = args.chipset
+
+
+if __name__ == "__main__":
+    setup_env_with_args()
+    test_suite = unittest.TestSuite()
+
+    for test_search_dir in TESTS_SEARCH_DIRS:
+        tests = unittest.TestLoader().discover(
+            start_dir=os.path.join(f"{current_dir}/../", test_search_dir),
+            pattern="test*.py",
+            top_level_dir=None,
+        )
+        test_suite.addTest(tests)
+
+    test_runner = unittest.TextTestRunner()
+    result = test_runner.run(test_suite)
+
+    if not result.wasSuccessful():
+        print("----------------------------------------------------------------------")
+        for fail_case in result.failures:
+            print(f"  {fail_case[0]}")
+        sys.exit(1)
diff --git a/backends/samsung/test/utils/utils.py b/backends/samsung/test/utils/utils.py
new file mode 100644
index 00000000000..4385245daf9
--- /dev/null
+++ b/backends/samsung/test/utils/utils.py
@@ -0,0 +1,11 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+
+class TestConfig:
+    host_ip: str = "111.111.111.111"
+    chipset: str = "E9965"

From d5f491008a2c2af01b05349ffc4432a10bca9ff7 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Thu, 2 Apr 2026 09:44:46 +0000
Subject: [PATCH 2/6] Disable mobileBert test

- this test will be enabled after fixing transformers version issue

Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 backends/samsung/test/models/test_torchvision_vit.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/backends/samsung/test/models/test_torchvision_vit.py b/backends/samsung/test/models/test_torchvision_vit.py
index a32dd6ac9ac..127bc43b5c8 100644
--- a/backends/samsung/test/models/test_torchvision_vit.py
+++ b/backends/samsung/test/models/test_torchvision_vit.py
@@ -16,6 +16,10 @@
 
 
 class TestMilestoneTorchVisionViT(unittest.TestCase):
+
+    # This model is skipped because transformers=5.0.0rc1. 
+    # it will re-enable after fixing the issue
+    @unittest.skip
     def test_torchvision_vit_fp16(self):
         torch.manual_seed(8)
         model = TorchVisionViTModel().get_eager_model()

From 34f372318a4207f62c557073fc95cb42fbddc11d Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Wed, 1 Apr 2026 11:46:42 +0000
Subject: [PATCH 3/6] Add more CI testcases for Exynos Backend

- Support necessary stages for E2E test
- Enable test for float/quant ops and float models

Co-authored-by: chong.checn <chong.chen@samsung.com>
Co-authored-by: jhbb.cha <jhbb.cha@samsung.com>
Co-authored-by: xz.linghu <xz.linghu@samsung.com>
Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 .../test/models/test_mobilebert_finetuning.py | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 backends/samsung/test/models/test_mobilebert_finetuning.py

diff --git a/backends/samsung/test/models/test_mobilebert_finetuning.py b/backends/samsung/test/models/test_mobilebert_finetuning.py
new file mode 100644
index 00000000000..92b52e38c9e
--- /dev/null
+++ b/backends/samsung/test/models/test_mobilebert_finetuning.py
@@ -0,0 +1,65 @@
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+import os
+import unittest
+
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.utils import TestConfig
+
+from executorch.examples.samsung.scripts.mobilebert_finetune import MobileBertFinetune
+from transformers import AutoTokenizer
+
+
+def patch_mobilebert_finetuning(model_cache_dir: str):
+    assert os.path.isdir(
+        model_cache_dir
+    ), "Can not found model cache dirrecory for mobilebert finetuning"
+
+    def _monkeypatch_load_tokenizer(self):
+        tokenizer = AutoTokenizer.from_pretrained(model_cache_dir)
+        return tokenizer
+
+    old_func = MobileBertFinetune.load_tokenizer
+    MobileBertFinetune.load_tokenizer = _monkeypatch_load_tokenizer
+    return old_func
+
+
+def recover_mobilebert_finetuning(old_func):
+    MobileBertFinetune.load_tokenizer = old_func
+
+
+class Test_Milestone_MobileBertFinetune(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        assert (model_cache_dir := os.getenv("MODEL_CACHE")), "MODEL_CACHE not set!"
+        cls.model_cache_dir = os.path.join(model_cache_dir, "mobilebert")
+        cls._old_func = patch_mobilebert_finetuning(cls.model_cache_dir)
+
+    @classmethod
+    def tearDownClass(cls):
+        recover_mobilebert_finetuning(cls._old_func)
+
+    # This model need to be fixed according new transformer version
+    @unittest.skip
+    def test_mobilebert_finetuning_fp16(self):
+        mobilebert_finetune = MobileBertFinetune()
+        model, _ = mobilebert_finetune.get_finetune_mobilebert(self.model_cache_dir)
+        example_input = mobilebert_finetune.get_example_inputs()
+        tester = SamsungTester(
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+
+        (
+            tester.export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=example_input, atol=0.008)
+        )

From f9f988d4bb0a7aee3960473a3e2a758521dc6873 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Thu, 2 Apr 2026 10:14:59 +0000
Subject: [PATCH 4/6] Support Quantized MobileBert

- update annotator
- Support quantized mobilebert
- update Quantization strategy

Co-authored-by: chen.zhao <chen03.zhao@samsung.com>
Co-authored-by: Sangsoo.ko <sangsoo.ko@samsung.com>
Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 backends/samsung/_passes/annotate_qparams.py  |  39 +-
 .../_passes/annotate_scalar_parameters.py     |  50 +-
 .../{fuse_conv_act.py => fuse_activation.py}  |  30 +-
 backends/samsung/_passes/insert_qdq.py        |   9 +
 .../_passes/transform_quantized_mask.py       | 105 +++
 backends/samsung/builders/__init__.py         |   2 +
 .../samsung/builders/op_constant_pad_nd.py    |   2 +-
 backends/samsung/builders/op_embedding.py     |   1 +
 backends/samsung/builders/op_placeholder.py   |  37 +
 backends/samsung/builders/op_slice_copy.py    |   8 +-
 backends/samsung/builders/op_sub.py           |   6 +-
 backends/samsung/enn_preprocess.py            |  15 +-
 backends/samsung/partition/enn_partitioner.py |   1 +
 backends/samsung/quantizer/annotator.py       |  98 +--
 backends/samsung/quantizer/qconfig.py         |  56 +-
 backends/samsung/quantizer/quantizer.py       |   7 +-
 .../samsung/serialization/enn_graph_schema.py |   6 +-
 .../test/models/test_torchvision_vit.py       |   3 -
 .../samsung/test/tester/samsung_tester.py     |   7 +-
 backends/samsung/test/utils/datasets.py       | 261 +++++++
 backends/samsung/test/utils/quant_checkers.py | 240 ++++++
 backends/samsung/utils/export_utils.py        |  17 +-
 .../scripts/mobilebert_finetune_QAT.py        | 686 ++++++++++++++++++
 23 files changed, 1502 insertions(+), 184 deletions(-)
 rename backends/samsung/_passes/{fuse_conv_act.py => fuse_activation.py} (71%)
 create mode 100644 backends/samsung/_passes/transform_quantized_mask.py
 create mode 100644 backends/samsung/builders/op_placeholder.py
 create mode 100644 backends/samsung/test/utils/datasets.py
 create mode 100644 backends/samsung/test/utils/quant_checkers.py
 create mode 100644 examples/samsung/scripts/mobilebert_finetune_QAT.py

diff --git a/backends/samsung/_passes/annotate_qparams.py b/backends/samsung/_passes/annotate_qparams.py
index 663d1fdf5fa..ede71a6ff16 100644
--- a/backends/samsung/_passes/annotate_qparams.py
+++ b/backends/samsung/_passes/annotate_qparams.py
@@ -14,6 +14,7 @@
 from torch._export.utils import get_buffer
 from torch.export import ExportedProgram
 from torch.fx import GraphModule, Node
+from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
 
 
 class AnnotateQparamsPass(ExportPass):
@@ -148,13 +149,34 @@ def _check_same(requant_obj, ori_obj) -> bool:
                 _check_same(ori_quant_attrs[key], requantize_attrs[key])
                 for key in key_map.values()
             ):
-                requantize_map[idx] = requantize_attrs
+                if (
+                    ori_quant_attrs[QuantConstants.QUANT_KEY.quant_dtype]
+                    != requantize_attrs[QuantConstants.QUANT_KEY.quant_dtype]
+                ):
+                    # For Q-DQ who will change quant dtype, we will insert requantization node
+                    requantize_map[idx] = requantize_attrs
+                else:
+                    node.meta["quantize_attrs"] = requantize_attrs
 
     def _annotate(self, graph_module: GraphModule):
         for node in graph_module.graph.nodes:
+            if key_map := QuantConstants.DEQUANT_OPS_KEY_MAP.get(node.target, None):
+                # We will fold node with constant output in the future pass as a constant node
+                # example: Constant->Q->DQ->nodeN->Q->DQ, this seq will be folded to one
+                # We need to store the q-params from last DQ params for quantizing constant value
+                quant_attrs = self.get_quant_attrs(node, key_map)
+                if node.args[0].target in QuantConstants.QUANT_OPS_KEY_MAP:
+                    node.meta["quantize_attrs"] = quant_attrs
+                else:
+                    node.args[0].meta["quantize_attrs"] = quant_attrs
+                continue
             key_map = QuantConstants.QUANT_OPS_KEY_MAP.get(node.target, None)
             if not key_map:
                 continue
+            quant_attrs = self.get_quant_attrs(node, key_map)
+            if node.args[0].target in QuantConstants.QUANT_OPS_KEY_MAP:
+                node.meta["quantize_attrs"] = quant_attrs
+                continue
             source_node = node.args[0]
             if source_node.target in (
                 *QuantConstants.QUANT_OPS_KEY_MAP,
@@ -164,13 +186,26 @@ def _annotate(self, graph_module: GraphModule):
                 continue
             elif source_node.target == operator.getitem:
                 source_node = source_node.args[0]
-            quant_attrs = self.get_quant_attrs(node, key_map)
+
             source_node.meta["quantize_attrs"] = quant_attrs
             self._annotate_requantize(source_node)
             self._propagate_quant_params(source_node)
 
+    def _annotate_decomposed_mm(self, graph_module: GraphModule):
+        for source_list in get_source_partitions(graph_module.graph, ["matmul"]).get(
+            "matmul", {}
+        ):
+            final_view = source_list.output_nodes[0]
+            if not (quantize_attrs := final_view.meta.get("quantize_attrs")):
+                continue
+            for node in source_list.nodes:
+                if node.target == exir_ops.edge.aten.bmm.default:
+                    node.meta["quantize_attrs"] = quantize_attrs
+                    break
+
     def call(self, graph_module: GraphModule):
         self._annotate(graph_module)
+        self._annotate_decomposed_mm(graph_module)
         graph_module.recompile()
         return PassResult(graph_module, True)
 
diff --git a/backends/samsung/_passes/annotate_scalar_parameters.py b/backends/samsung/_passes/annotate_scalar_parameters.py
index 643685bdb25..2d3e9778f7c 100644
--- a/backends/samsung/_passes/annotate_scalar_parameters.py
+++ b/backends/samsung/_passes/annotate_scalar_parameters.py
@@ -5,7 +5,6 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
-from executorch.backends.samsung.quantizer.quantizer import global_quant_info
 from executorch.backends.samsung.utils.constants import QuantConstants
 from executorch.backends.transforms.utils import get_param_tensor, is_param_node
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -25,6 +24,7 @@ class AnnotateScalarParametersPass(ExportPass):
         exir_ops.edge.aten.mul.Tensor,
         exir_ops.edge.aten.add.Tensor,
         exir_ops.edge.aten.div.Tensor,
+        exir_ops.edge.aten.sub.Tensor,
     }
 
     def __init__(self, edge_program: ExportedProgram):
@@ -35,27 +35,37 @@ def annotate(self, graph_module: torch.fx.GraphModule):
         for node in graph_module.graph.nodes:
             if node.target not in self.TARGET_OPS or "quantize_attrs" not in node.meta:
                 continue
-            torch_quant_dtype = global_quant_info.weight_precison.torch_dtype
-            for input_arg in node.all_input_nodes:
-                if input_arg.op not in ("placeholder", "get_attr") or not is_param_node(
-                    self.edge_program, input_arg
+            input0, input1 = node.all_input_nodes[0], node.all_input_nodes[1]
+            if input0.op not in ("placeholder", "get_attr") or not is_param_node(
+                self.edge_program, input0
+            ):
+                if input1.op not in ("placeholder", "get_attr") or not is_param_node(
+                    self.edge_program, input1
                 ):
                     continue
-                else:
-                    tensor = get_param_tensor(self.edge_program, input_arg)
-                    if not tensor.shape:
-                        qparams = {
-                            QuantConstants.QUANT_KEY.scale: float(tensor),
-                            QuantConstants.QUANT_KEY.quant_dtype: torch_quant_dtype,
-                            QuantConstants.QUANT_KEY.quant_max: torch.iinfo(
-                                torch_quant_dtype
-                            ).max,
-                            QuantConstants.QUANT_KEY.quant_min: torch.iinfo(
-                                torch_quant_dtype
-                            ).min,
-                            QuantConstants.QUANT_KEY.zero_point: 0,
-                        }
-                        input_arg.meta["quantize_attrs"] = qparams
+                ifm_node, param_tensor_node = input0, input1
+            else:
+                ifm_node, param_tensor_node = input1, input0
+            if not (quantize_attrs := ifm_node.meta.get("quantize_attrs")):
+                continue
+            param_tensor = get_param_tensor(self.edge_program, param_tensor_node)
+            if not param_tensor.shape:
+                scale = (
+                    float(param_tensor) if param_tensor > 0 else -float(param_tensor)
+                )
+            else:
+                continue
+            q_dtype = quantize_attrs[QuantConstants.QUANT_KEY.quant_dtype]
+            if scale == 0:
+                scale = 1.0
+            qparams = {
+                QuantConstants.QUANT_KEY.scale: scale,
+                QuantConstants.QUANT_KEY.quant_dtype: q_dtype,
+                QuantConstants.QUANT_KEY.quant_max: torch.iinfo(q_dtype).max,
+                QuantConstants.QUANT_KEY.quant_min: torch.iinfo(q_dtype).min,
+                QuantConstants.QUANT_KEY.zero_point: 0,
+            }
+            param_tensor_node.meta["quantize_attrs"] = qparams
 
     def call(self, graph_module: torch.fx.GraphModule):
         graph = graph_module.graph
diff --git a/backends/samsung/_passes/fuse_conv_act.py b/backends/samsung/_passes/fuse_activation.py
similarity index 71%
rename from backends/samsung/_passes/fuse_conv_act.py
rename to backends/samsung/_passes/fuse_activation.py
index c034c98bb14..54dc3ab3873 100644
--- a/backends/samsung/_passes/fuse_conv_act.py
+++ b/backends/samsung/_passes/fuse_activation.py
@@ -24,7 +24,7 @@ def map_hardtan_relux(tanhnode: torch.fx.node.Node) -> Optional[str]:
     return None
 
 
-class FuseConvActPass(ExportPass):
+class FuseActivationPass(ExportPass):
     TARGET_ACTS_MAP = {
         exir_ops.edge.aten.relu.default: (lambda x: "RELU"),
         exir_ops.edge.aten.relu_.default: (lambda x: "RELU"),
@@ -33,39 +33,40 @@ class FuseConvActPass(ExportPass):
         exir_ops.edge.aten.hardtanh.default: map_hardtan_relux,
         exir_ops.edge.aten.hardtanh_.default: map_hardtan_relux,
     }
+    TARGET_SOURCE_NODES = {
+        exir_ops.edge.aten.convolution.default,
+        exir_ops.edge.aten.linear.default,
+    }
 
     def _fuse(
         self,
         graph_module: GraphModule,
     ):
-        for target_conv, target_act in self.get_target_conv_act(graph_module):
+        for target_src, target_act in self.get_target_src_act(graph_module):
             assert (
                 act_name := self.TARGET_ACTS_MAP.get(target_act.target)(target_act)
             ), f"Not supported {target_act.name} now."
-            target_conv.meta["activation"] = act_name
+            target_src.meta["activation"] = act_name
             if "quantize_attrs" in target_act.meta:
-                target_conv.meta["quantize_attrs"] = target_act.meta["quantize_attrs"]
-
-            # If we merge the real out activation to conv, the conv should be the real out
-            if "real_out" in target_act.meta:
-                target_conv.meta["real_out"] = target_act.meta["real_out"]
+                target_src.meta["quantize_attrs"] = target_act.meta["quantize_attrs"]
+            else:
+                continue
             for user in [user for user in target_act.users.keys()]:  # noqa: C416
-                user.replace_input_with(target_act, target_conv)
+                user.replace_input_with(target_act, target_src)
             graph_module.graph.erase_node(target_act)
 
-    def get_target_conv_act(self, graph_module: GraphModule):
+    def get_target_src_act(self, graph_module: GraphModule):
         for node in graph_module.graph.nodes:
-            if node.target != exir_ops.edge.aten.convolution.default:
+            if node.target not in self.TARGET_SOURCE_NODES:
                 continue
             if len(node.users) != 1:
-                # Such cases couldn't be conv + act
+                # Such cases couldn't be src + act
                 continue
             act_node = list(node.users.keys())[0]
             if act_node.target not in self.TARGET_ACTS_MAP:
                 continue
             if "quantize_attrs" in node.meta:
-                # If the conv's output is quantized
-                # We do not fuse them
+                # If we merge the real out activation to source, the source should be the real out
                 continue
             yield node, act_node
 
@@ -73,5 +74,4 @@ def call(self, graph_module: GraphModule):
         self._fuse(graph_module)
         graph_module.recompile()
         dead_code_elimination_pass(graph_module)
-        _ = super().call(graph_module).graph_module
         return PassResult(graph_module, True)
diff --git a/backends/samsung/_passes/insert_qdq.py b/backends/samsung/_passes/insert_qdq.py
index a59b011ac4b..fb473810c5a 100644
--- a/backends/samsung/_passes/insert_qdq.py
+++ b/backends/samsung/_passes/insert_qdq.py
@@ -156,9 +156,18 @@ def _add_qdq(self, graph_module: GraphModule):
             elif is_graph_output(node):
                 self._add_dq_after(graph_module, node)
 
+    def _add_q_for_cast(self, graph_module: GraphModule):
+        for node in list(graph_module.graph.nodes):
+            if not node.target == exir_ops.edge.aten._to_copy.default:
+                continue
+            if "quantize_attrs" not in node.meta:
+                continue
+            self._add_q_after(graph_module, node)
+
     def call(self, graph_module: GraphModule):
         self._add_qdq(graph_module)
         self._add_qdq_for_requantize(graph_module)
+        self._add_q_for_cast(graph_module)
         graph_module.graph.eliminate_dead_code()
         graph_module.recompile()
         return PassResult(graph_module, True)
diff --git a/backends/samsung/_passes/transform_quantized_mask.py b/backends/samsung/_passes/transform_quantized_mask.py
new file mode 100644
index 00000000000..e5116024eee
--- /dev/null
+++ b/backends/samsung/_passes/transform_quantized_mask.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.samsung.utils.constants import QuantConstants
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+from executorch.exir.passes import dead_code_elimination_pass
+from torch.export import ExportedProgram
+from torch.fx import GraphModule
+
+
+class TransformQuantizedMaskPass(ExportPass):
+    def __init__(self, edge_program: ExportedProgram):
+        super().__init__()
+        self.edge_program = edge_program
+
+    def get_mask_mul(self, graph_module: GraphModule):
+        """
+        Iterator for each patterns in the graph.
+        The obj returned by iterator is the first node of the pattern.
+        """
+        nodes_in_pattern = (
+            exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
+            exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
+            exir_ops.edge.aten.sub.Tensor,
+            exir_ops.edge.aten._to_copy.default,
+            exir_ops.edge.aten.unsqueeze_copy.default,
+            exir_ops.edge.aten.mul.Tensor,
+        )
+        mask_node = None
+        for node in graph_module.graph.nodes:
+            if node.target != "attention_mask":
+                continue
+            else:
+                mask_node = node
+                break
+        if mask_node is None:
+            return None
+        while node.target != exir_ops.edge.aten.mul.Tensor:
+            find_next = False
+            for successor in list(node.users.keys()):
+                if successor.target in nodes_in_pattern:
+                    node = successor
+                    find_next = True
+                    break
+            if not find_next:
+                return None
+        return node
+
+    def transform(
+        self,
+        graph_module: GraphModule,
+    ):
+        mask_mul = self.get_mask_mul(graph_module)
+        if mask_mul is None:
+            return
+        rsub_node = mask_mul.args[0]
+        manual_mul_idx = 0
+        for add in list(mask_mul.users.keys()):
+            custom_tensor_name = f"_custom_tensor_{manual_mul_idx}"
+            div_node = add.args[0]
+            if "quantize_attrs" not in div_node.meta:
+                return
+            div_quant_args = div_node.meta["quantize_attrs"]
+            custom_tensor = torch.tensor(
+                (
+                    div_node.meta["quantize_attrs"][QuantConstants.QUANT_KEY.quant_min]
+                    - div_node.meta["quantize_attrs"][
+                        QuantConstants.QUANT_KEY.zero_point
+                    ]
+                )
+                * div_node.meta["quantize_attrs"][QuantConstants.QUANT_KEY.scale],
+                dtype=torch.float32,
+            )
+            graph_module.register_buffer(custom_tensor_name, custom_tensor)
+            add.meta["quantize_attrs"] = div_quant_args
+            with graph_module.graph.inserting_after(rsub_node):
+                custom_attr = graph_module.graph.get_attr(custom_tensor_name)
+            with graph_module.graph.inserting_after(custom_attr):
+                new_mul = graph_module.graph.create_node(
+                    "call_function",
+                    exir_ops.edge.aten.mul.Tensor,
+                    (mask_mul.args[0], custom_attr),
+                )
+                new_mul.meta["quantize_attrs"] = div_quant_args
+                add.replace_input_with(mask_mul, new_mul)
+
+            rsub_in = rsub_node.args[1]
+            with graph_module.graph.inserting_before(add):
+                new_mul = graph_module.graph.create_node(
+                    "call_function", exir_ops.edge.aten.mul.Tensor, (div_node, rsub_in)
+                )
+                new_mul.meta["quantize_attrs"] = div_quant_args
+                add.replace_input_with(div_node, new_mul)
+            manual_mul_idx += 1
+
+    def call(self, graph_module: GraphModule):
+        self.transform(graph_module)
+        graph_module.recompile()
+        dead_code_elimination_pass(graph_module)
+        return PassResult(graph_module, True)
diff --git a/backends/samsung/builders/__init__.py b/backends/samsung/builders/__init__.py
index 978da82b370..57e181c7200 100644
--- a/backends/samsung/builders/__init__.py
+++ b/backends/samsung/builders/__init__.py
@@ -34,6 +34,7 @@
     op_mul,
     op_permute,
     op_pixel_shuffle,
+    op_placeholder,
     op_quantize,
     op_relu,
     op_reshape,
@@ -80,6 +81,7 @@
     op_mul,
     op_permute,
     op_pixel_shuffle,
+    op_placeholder,
     op_quantize,
     op_relu,
     op_reshape,
diff --git a/backends/samsung/builders/op_constant_pad_nd.py b/backends/samsung/builders/op_constant_pad_nd.py
index cc7cdc5751b..006f52619ff 100644
--- a/backends/samsung/builders/op_constant_pad_nd.py
+++ b/backends/samsung/builders/op_constant_pad_nd.py
@@ -52,5 +52,5 @@ def define_node(
             "padding": "EXPLICIT",
             "padding_type": "CONSTANT",
         }
-
+        self._update_params_qdtype(node, params)
         enn_graph.define_op(node.name, "PAD", [input_id], [output_id], params)
diff --git a/backends/samsung/builders/op_embedding.py b/backends/samsung/builders/op_embedding.py
index f37c46a56d6..c8ef686d3d3 100644
--- a/backends/samsung/builders/op_embedding.py
+++ b/backends/samsung/builders/op_embedding.py
@@ -36,6 +36,7 @@ def define_node(
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
 
         params = {"axis": 0, "input_type": "indices"}
+        self._update_params_qdtype(node, params)
         enn_graph.define_op(
             node.name, "GATHER", [input_id, weight_id], [output_id], params
         )
diff --git a/backends/samsung/builders/op_placeholder.py b/backends/samsung/builders/op_placeholder.py
new file mode 100644
index 00000000000..b4b606f56ea
--- /dev/null
+++ b/backends/samsung/builders/op_placeholder.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Dict
+
+import torch
+from executorch.backends.samsung.builders.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph
+from executorch.backends.transforms.utils import is_param_node
+
+
+@register_node_visitor
+class PlaceholderVisitor(NodeVisitor):
+    """
+    To define input tensors.
+    This is to make the order of inputs correct.
+    """
+
+    target = "placeholder"
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        enn_graph: EnnGraph,
+        vals_to_ids: Dict[torch.Tensor, int],
+    ) -> None:
+        if is_param_node(self.exported_program, node):
+            return
+        self.define_tensor(node, enn_graph, vals_to_ids)
diff --git a/backends/samsung/builders/op_slice_copy.py b/backends/samsung/builders/op_slice_copy.py
index 0d7a23118a0..e85b6bf60c3 100644
--- a/backends/samsung/builders/op_slice_copy.py
+++ b/backends/samsung/builders/op_slice_copy.py
@@ -38,10 +38,14 @@ def define_node(
         dim = cast(int, node.args[1])
         if dim < 0:
             dim = dim + len(in_shape)
-        start_val = cast(int, node.args[2])
+        start_val = cast(int, node.args[2]) if node.args[2] else 0
         if start_val < 0:
             start_val = start_val + in_shape[dim]
-        end_val = min(cast(int, node.args[3]), in_shape[dim])
+        end_val = (
+            in_shape[dim]
+            if len(node.args) < 4
+            else min(cast(int, node.args[3]), in_shape[dim])
+        )
         if end_val < 0:
             end_val = end_val + in_shape[dim]
 
diff --git a/backends/samsung/builders/op_sub.py b/backends/samsung/builders/op_sub.py
index af2931f298e..7dc97bfa7ca 100644
--- a/backends/samsung/builders/op_sub.py
+++ b/backends/samsung/builders/op_sub.py
@@ -36,4 +36,8 @@ def define_node(
         # output
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
 
-        enn_graph.define_op(node.name, "SUB", [input_id_1, input_id_2], [output_id])
+        params = {}
+        self._update_params_qdtype(node, params)
+        enn_graph.define_op(
+            node.name, "SUB", [input_id_1, input_id_2], [output_id], params
+        )
diff --git a/backends/samsung/enn_preprocess.py b/backends/samsung/enn_preprocess.py
index 0847ec0adeb..e0ccf2d643d 100644
--- a/backends/samsung/enn_preprocess.py
+++ b/backends/samsung/enn_preprocess.py
@@ -18,8 +18,13 @@
     ConstantPropPass,
 )
 from executorch.backends.samsung._passes.fold_qdq import FoldQDQPass
+from executorch.backends.samsung._passes.fuse_activation import FuseActivationPass
 from executorch.backends.samsung._passes.insert_qdq import InsertQDQPass
+from executorch.backends.samsung._passes.remove_useless_ops import RemoveUselessOpPass
 from executorch.backends.samsung._passes.replace_scalar_ops import ReplaceOpsWithScalar
+from executorch.backends.samsung._passes.transform_quantized_mask import (
+    TransformQuantizedMaskPass,
+)
 from executorch.backends.samsung.builders.node_visitor import get_node_visitors
 from executorch.backends.samsung.serialization.compile_options import (
     ENN_COMPILE_OPTION_TITLE,
@@ -30,6 +35,7 @@
 from executorch.backends.transforms.fuse_batch_norm_with_conv import (
     FuseBatchNormWithConvPass,
 )
+from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform
 
 from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass
 
@@ -59,9 +65,13 @@ def preprocess(
 
         enn_preprocess_passes = PassManager(
             passes=[
+                RemoveUselessOpPass(),
+                RemoveCloneOpsTransform(),
                 AnnotateQparamsPass(edge_program),
+                FuseActivationPass(),
                 FoldQDQPass(),
                 ConstantPropPass(edge_program),
+                TransformQuantizedMaskPass(edge_program),
                 Conv1dToConv2d(edge_program),
                 FuseBatchNormWithConvPass(edge_program),
                 AddmmToLinearTransform(),
@@ -79,6 +89,7 @@ def preprocess(
         node_visitors = get_node_visitors(edge_program)
 
         vals_to_ids: Dict[torch.fx.Node, int] = {}
+        placeholder_vistor = node_visitors["placeholder"]
         for node in pass_result.graph_module.graph.nodes:
             if node.op == "call_function":
                 logging.info(f"Visiting: {node}, {node.target.__name__}")
@@ -90,9 +101,11 @@ def preprocess(
                     raise RuntimeError(
                         f"{node.target.__name__}" " is not supported in ENN Delegate"
                     )
+            elif node.op == "placeholder":
+                logging.info(f"Visiting input of graph: {node}")
+                placeholder_vistor.define_node(node, enn_graph, vals_to_ids)
             elif node.op in [
                 "get_attr",
-                "placeholder",
                 "output",
             ]:
                 continue
diff --git a/backends/samsung/partition/enn_partitioner.py b/backends/samsung/partition/enn_partitioner.py
index 368d069c380..03fad83b32c 100644
--- a/backends/samsung/partition/enn_partitioner.py
+++ b/backends/samsung/partition/enn_partitioner.py
@@ -38,6 +38,7 @@
     exir_ops.edge.aten.sub.Scalar,
     exir_ops.edge.aten.mul.Scalar,
     exir_ops.edge.aten.div.Scalar,
+    exir_ops.edge.aten.clone.default,
 ]
 
 
diff --git a/backends/samsung/quantizer/annotator.py b/backends/samsung/quantizer/annotator.py
index 31015698006..ea29c0c90c5 100644
--- a/backends/samsung/quantizer/annotator.py
+++ b/backends/samsung/quantizer/annotator.py
@@ -55,11 +55,7 @@ def annotate(graph: Graph, quant_config: QuantizationConfig) -> None:
 
 
 def _is_annotated(nodes: List[Node]):
-    """
-    Given a list of nodes (that represents an operator pattern),
-    return True if any of the node
-    is annotated, otherwise return False
-    """
+    # Checking if nodes are annotated.
     annotated = False
     for node in nodes:
         annotated = annotated or (
@@ -80,10 +76,7 @@ def _is_fake_tensor(node: Node):
 
 
 def _is_float_tensor(node: Node):
-    """Check if the node's tensor is a float tensor,
-    so that we can skip quantization for the node
-    since observers only works with float Tensors
-    """
+    # checking if the node is quantized.
     if not _is_fake_tensor(node):
         return False
     return node.meta["val"].dtype in [torch.float32, torch.float16]
@@ -272,18 +265,7 @@ def annotate_2in1out_with_SharedQuant(
     # skipping quantization if 1st input is not float.
     if _is_annotated([node]) or not _is_float_tensor(input0):
         return
-    if (
-        isinstance(input0, Node)
-        and isinstance(input1, float)
-        and not _get_quantization_annotation(input0)
-    ):
-        return
-    if (
-        isinstance(input0, float)
-        and isinstance(input1, Node)
-        and not _get_quantization_annotation(input1)
-    ):
-        return
+
     if isinstance(input0, Node) and isinstance(input1, Node):
         shared_qspec = SharedQuantizationSpec((input0, node))
         input_qspec_map[input0] = quant_config.input_activation
@@ -322,7 +304,6 @@ def annotate_2in1out_with_SharedQuant(
 def annotate_add_ops_with_SharedQuant(
     node: Node, quant_config: QuantizationConfig
 ) -> None:
-
     input_qspec_map = {}
     input0 = node.args[0]
     input1 = node.args[1]
@@ -578,26 +559,10 @@ def annotate_index(node: Node, quant_config: QuantizationConfig) -> None:
 )
 def annotate_index_put(node: Node, quant_config: QuantizationConfig) -> None:
     input_qspec_map = {}
-    input = node.args[0]  # from KVCache in LLAMA
     value = node.args[2]  # from linear projection layer
-    assert isinstance(input, Node)
-    assert isinstance(value, Node)
-
-    if _is_annotated([node]) or not _is_float_tensor(input):
-        return
 
-    # get QuantAnnot from input path
-    shared_quant_node = _get_quantization_annotation(input)
-    if shared_quant_node:
-        shared_qspec = SharedQuantizationSpec((shared_quant_node, node))
-        input_qspec_map[input] = shared_qspec
-        input_qspec_map[value] = shared_qspec
-        output_qspec = shared_qspec
-    else:
-        # if no QuantAnnot in input path, asign the default QuantAnnot from quant_config.
-        input_qspec_map[input] = quant_config.input_activation
-        input_qspec_map[value] = SharedQuantizationSpec((input, node))
-        output_qspec = SharedQuantizationSpec((input, node))
+    input_qspec_map[value] = quant_config.input_activation
+    output_qspec = SharedQuantizationSpec((value, node))
 
     node.meta["quantization_annotation"] = QuantizationAnnotation(
         input_qspec_map=input_qspec_map,
@@ -686,10 +651,11 @@ def annotate_embedding(node: Node, quant_config: QuantizationConfig) -> None:
         return
 
     input_qspec_map[weight] = quant_config.input_activation
+    shared_qspec = SharedQuantizationSpec((weight, node))
 
     node.meta["quantization_annotation"] = QuantizationAnnotation(
         input_qspec_map=input_qspec_map,
-        output_qspec=quant_config.output_activation,
+        output_qspec=shared_qspec,
         _annotated=True,
     )
 
@@ -822,50 +788,48 @@ def annotate_batch_norm(node: Node, quant_config: QuantizationConfig) -> None:
 # CASE 11: Sigmoid
 @register_annotator([torch.ops.aten.sigmoid, torch.ops.aten.sigmoid.default])
 def annotate_sigmoid(node: Node, quant_config: QuantizationConfig) -> None:
-    if _is_annotated([node]):
+    input_act = node.args[0]
+    # skipping quantization if 1st input is not float.
+    if _is_annotated([node]) or not _is_float_tensor(input_act):
         return
 
+    input_act_qspec = quant_config.input_activation
     input_qspec_map = {}
-    input_act = node.args[0]
-    input_qspec_map[input_act] = quant_config.input_activation
+    if _is_float_tensor(input_act):
+        input_qspec_map[input_act] = input_act_qspec
 
-    assert isinstance(input_act, Node)
+    # bias observer setting
     out_qconf = quant_config.output_activation
+    if out_qconf.quant_max is not None and out_qconf.quant_min is not None:
+        quant_max = out_qconf.quant_max
+        quant_min = out_qconf.quant_min
+    else:
+        quant_max = torch.iinfo(out_qconf.dtype).max
+        quant_min = torch.iinfo(out_qconf.dtype).min
 
-    q_max = (
-        torch.iinfo(out_qconf.dtype).max
-        if out_qconf.quant_max is None
-        else out_qconf.quant_max
-    )
-    q_min = (
-        torch.iinfo(out_qconf.dtype).min
-        if out_qconf.quant_min is None
-        else out_qconf.quant_min
-    )
-
-    scale = 1 / (q_max - q_min + 1)
+    quant_scale = 1 / (quant_max - quant_min + 1)
 
-    bias_obs_ctr = FixedQParamsObserver.with_args(
-        scale=scale,
+    bias_observer_setting = FixedQParamsObserver.with_args(
+        scale=quant_scale,
         zero_point=0,
         dtype=quant_config.output_activation.dtype,
         qscheme=torch.torch.per_tensor_affine,
-        quant_max=q_max,
-        quant_min=q_min,
+        quant_max=quant_max,
+        quant_min=quant_min,
     )
 
-    # make sigmoid map to the range between 0~1
-    out_act_quantization_spec = QuantizationSpec(
+    # output spec with bias oberver
+    output_act_qspec = QuantizationSpec(
         dtype=quant_config.output_activation.dtype,
-        quant_max=q_max,
-        quant_min=q_min,
-        observer_or_fake_quant_ctr=bias_obs_ctr,
+        quant_max=quant_max,
+        quant_min=quant_min,
+        observer_or_fake_quant_ctr=bias_observer_setting,
         qscheme=torch.torch.per_tensor_affine,
     )
 
     if _is_float_tensor(node):
         node.meta["quantization_annotation"] = QuantizationAnnotation(
             input_qspec_map=input_qspec_map,
-            output_qspec=out_act_quantization_spec,
+            output_qspec=output_act_qspec,
             _annotated=True,
         )
diff --git a/backends/samsung/quantizer/qconfig.py b/backends/samsung/quantizer/qconfig.py
index f32c8d39796..03bc9e6f509 100644
--- a/backends/samsung/quantizer/qconfig.py
+++ b/backends/samsung/quantizer/qconfig.py
@@ -10,8 +10,10 @@
 
 import torch
 from torchao.quantization.pt2e import (
-    FakeQuantize,
+    FusedMovingAvgObsFakeQuantize,
     MinMaxObserver,
+    MovingAverageMinMaxObserver,
+    MovingAveragePerChannelMinMaxObserver,
     PerChannelMinMaxObserver,
 )
 from torchao.quantization.pt2e.quantizer import QuantizationSpec
@@ -64,9 +66,7 @@ def _get_activation_qspec(
 
     qscheme = torch.per_tensor_symmetric if is_symmetric else torch.per_tensor_affine
     if is_qat:
-        observer_or_fake_quant = FakeQuantize.with_args(
-            observer=observer_cls, eps=eps_value
-        )
+        observer_or_fake_quant = FusedMovingAvgObsFakeQuantize.with_args(eps=eps_value)
     else:
         observer_or_fake_quant = observer_cls.with_args(eps=eps_value)
 
@@ -103,8 +103,14 @@ def _get_weight_qspec(
         observer_cls = PerChannelMinMaxObserver
 
     if is_qat:
-        observer_or_fake_quant = FakeQuantize.with_args(
-            observer=observer_cls, eps=eps_value
+        observer_cls = FusedMovingAvgObsFakeQuantize
+        if not is_per_channel:
+            weight_qat_observer = MovingAverageMinMaxObserver
+        else:
+            weight_qat_observer = MovingAveragePerChannelMinMaxObserver
+        observer_or_fake_quant = observer_cls.with_args(
+            eps=eps_value,
+            observer=weight_qat_observer,
         )
     else:
         observer_or_fake_quant = observer_cls.with_args(eps=eps_value)
@@ -134,41 +140,3 @@ def get_a8w8_enn_quant_config(
         bias=bias_quantization_spec,
     )
     return quantization_config
-
-
-class QuantInfo:
-    def __init__(self, torch_dtype: torch.dtype, string: str):
-        self._torch_dtype = torch_dtype
-        self._string = string
-
-    @property
-    def torch_dtype(self):
-        return self._torch_dtype
-
-    @property
-    def string(self):
-        return self._string
-
-
-class QuantInfoManager:
-    QUANT_INFO_MAP = {
-        Precision.A8W8: (QuantInfo(torch.int8, "INT8"), QuantInfo(torch.int8, "INT8")),
-    }
-    FP_INFO = (
-        QuantInfo(torch.float32, "FLOAT32"),
-        QuantInfo(torch.float32, "FLOAT32"),
-    )
-
-    def __init__(self):
-        self.precision = None
-
-    def set_precision(self, precision: Precision):
-        self.precision = precision
-
-    @property
-    def weight_precison(self) -> Optional[QuantInfo]:
-        return self.QUANT_INFO_MAP.get(self.precision, self.FP_INFO)[0]
-
-    @property
-    def act_precision(self) -> Optional[QuantInfo]:
-        return self.QUANT_INFO_MAP.get(self.precision, self.FP_INFO)[1]
diff --git a/backends/samsung/quantizer/quantizer.py b/backends/samsung/quantizer/quantizer.py
index cf46677d000..83e43f13956 100644
--- a/backends/samsung/quantizer/quantizer.py
+++ b/backends/samsung/quantizer/quantizer.py
@@ -11,10 +11,7 @@
 from torchao.quantization.pt2e.quantizer import Quantizer
 
 from .annotator import annotate
-from .qconfig import get_quant_config, Precision, QuantInfoManager
-
-
-global_quant_info = QuantInfoManager()
+from .qconfig import get_quant_config, Precision
 
 
 class EnnQuantizer(Quantizer):
@@ -23,7 +20,6 @@ def __init__(self):
         super().__init__()
 
         self._precision = Precision.A8W8
-        global_quant_info.set_precision(self._precision)
         self._is_per_channel = True
         self._is_qat = False
         self.custom_quant_annotations: Sequence[Callable] = []
@@ -31,7 +27,6 @@ def __init__(self):
     def setup_precision(self, quant_dtype: Precision) -> None:
         assert quant_dtype in Precision, f"No support for Precision {quant_dtype}."
         self._precision = quant_dtype
-        global_quant_info.set_precision(self._precision)
 
     def setup_quant_params(
         self, quant_dtype: Precision, is_per_channel=True, is_qat=False
diff --git a/backends/samsung/serialization/enn_graph_schema.py b/backends/samsung/serialization/enn_graph_schema.py
index 5209a8672ee..8448854fe22 100644
--- a/backends/samsung/serialization/enn_graph_schema.py
+++ b/backends/samsung/serialization/enn_graph_schema.py
@@ -90,7 +90,11 @@ def define_tensor(  # noqa: C901
             )
             tensor.AddQuantizeParam(q_dtype, scales, zero_points)
 
-            if need_quantize and data is not None:
+            if (
+                need_quantize
+                and data is not None
+                and data.dtype in (torch.float16, torch.float32, np.float32, np.float16)
+            ):
                 if isinstance(data, np.ndarray):
                     data = torch.tensor(data)
                 data = quantize_tensor(
diff --git a/backends/samsung/test/models/test_torchvision_vit.py b/backends/samsung/test/models/test_torchvision_vit.py
index 127bc43b5c8..bab146d9979 100644
--- a/backends/samsung/test/models/test_torchvision_vit.py
+++ b/backends/samsung/test/models/test_torchvision_vit.py
@@ -17,9 +17,6 @@
 
 class TestMilestoneTorchVisionViT(unittest.TestCase):
 
-    # This model is skipped because transformers=5.0.0rc1. 
-    # it will re-enable after fixing the issue
-    @unittest.skip
     def test_torchvision_vit_fp16(self):
         torch.manual_seed(8)
         model = TorchVisionViTModel().get_eager_model()
diff --git a/backends/samsung/test/tester/samsung_tester.py b/backends/samsung/test/tester/samsung_tester.py
index 7f1f65f0d6c..a6eb170a61b 100644
--- a/backends/samsung/test/tester/samsung_tester.py
+++ b/backends/samsung/test/tester/samsung_tester.py
@@ -12,10 +12,7 @@
 from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner
 from executorch.backends.samsung.quantizer.quantizer import EnnQuantizer, Precision
 from executorch.backends.samsung.test.utils import RuntimeExecutor
-from executorch.backends.samsung.utils.export_utils import (
-    get_edge_compile_config,
-    get_enn_pass_list,
-)
+from executorch.backends.samsung.utils.export_utils import get_edge_compile_config
 from executorch.backends.test.harness import Tester as TesterBase
 from executorch.backends.test.harness.stages import StageType
 from executorch.backends.transforms.decompose_sdpa import (
@@ -97,7 +94,7 @@ def __init__(
         compile_specs = compile_specs or []
         self.partitioners = [EnnPartitioner(compile_specs=compile_specs)]
         self.edge_compile_config = edge_compile_config or get_edge_compile_config()
-        self.transform_passes = transform_passes or get_enn_pass_list()
+        self.transform_passes = transform_passes
         self.edge_dialect_program = None
 
     def run(
diff --git a/backends/samsung/test/utils/datasets.py b/backends/samsung/test/utils/datasets.py
new file mode 100644
index 00000000000..be935dff271
--- /dev/null
+++ b/backends/samsung/test/utils/datasets.py
@@ -0,0 +1,261 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Literal, Optional, Tuple
+
+import torch
+import torchvision.transforms.v2 as vision_transform_v2
+from executorch.backends.samsung.test.utils.utils import GreedyLM
+from torchsr import transforms as sr_transforms
+from torchvision import transforms as vision_transforms
+from torchvision.datasets import ImageFolder, VOCSegmentation
+
+
+def get_quant_test_data_classify(
+    data_dir: str,
+    calinum=100,
+    testnum=500,
+    transform_compose: Optional[vision_transforms.Compose] = None,
+) -> Tuple:
+    """
+    Generate test data for quantization model
+
+    :param data_dir: Dir of dataset. Structure should be imagenet-like
+    :param calinum: Number of calibration data. Default 100
+    :param testnum: Number of test data. Default 500
+    :param transform_compose: Transforms to be applied to data.
+
+        Default:
+        [
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+            ),
+            transforms.Lambda(lambda x: x.unsqueeze(0)),  # Add batch dim
+        ]
+    :type data_dir: str
+    :type calinum: int
+    :type testnum: int
+    :type transform_compose: transforms.Compose | None
+    :return: (example_input, test_data)
+    """
+    if not transform_compose:
+        transform_compose = vision_transforms.Compose(
+            [
+                vision_transforms.Resize((256, 256)),
+                vision_transforms.CenterCrop(224),
+                vision_transforms.ToTensor(),
+                vision_transforms.Normalize(
+                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+                ),
+                vision_transforms.Lambda(lambda x: x.unsqueeze(0)),  # Add batch dim
+            ]
+        )
+    dataset = ImageFolder(root=data_dir, transform=transform_compose)
+    cali_data = [(dataset[i][0],) for i in range(min(calinum, len(dataset)))]
+    test_data = [dataset[i] for i in range(min(testnum, len(dataset)))]
+    example_input = (dataset[0][0],)
+    return example_input, cali_data, test_data
+
+
+def get_quant_test_data_super_resolution(
+    root_dir: str,
+    dataset_name: Literal["B100", "Set5", "Set14", "Urban100"],
+    calinum=100,
+    testnum=500,
+    transform_compose: Optional[sr_transforms.Compose] = None,
+) -> Tuple:
+    """
+    Generate test data for quantization model
+
+    :param root_dir: Dir of dataset. The real dataset should be in root_dir/SRBenchmarks/benchmark/
+    :param dataset_name: data_set name
+    :param testnum: Number of test data. Default 500
+    :param transform_compose: Transforms to be applied to data.
+        Default:
+        transform_compose = transforms.Compose(
+            [transforms.ToTensor()] # Convert Pillows Image to tensor
+        )
+    :type root_dir: str
+    :type dataset_name: "B100"|"Set5"|"Set14"|"Urban100"
+    :type calinum: int
+    :type testnum: int
+    :type transform_compose: transforms.Compose | None
+    :return: (example_input, cali_data, test_data)
+    """
+
+    class SrResize:
+        def __init__(self, expected_size: List[List[int]]):
+            self.expected_size = expected_size
+
+        def __call__(self, x):
+            return (
+                x[0].resize(self.expected_size[0]),
+                x[1].resize(self.expected_size[1]),
+            )
+
+    class SrUnsqueeze:
+        def __call__(self, x):
+            return (
+                x[0].unsqueeze(0),
+                x[1].unsqueeze(0),
+            )
+
+    if not transform_compose:
+        transform_compose = sr_transforms.Compose(
+            [
+                SrResize([[448, 448], [224, 224]]),
+                sr_transforms.ToTensor(),  # Convert Pillows Image to tensor
+                SrUnsqueeze(),
+            ]
+        )
+    from torchsr.datasets import B100, Set14, Set5, Urban100
+
+    dataset_cls_map = {
+        "B100": B100,
+        "Set5": Set5,
+        "Set14": Set14,
+        "Urban100": Urban100,
+    }
+
+    dataset_cls = dataset_cls_map.get(dataset_name)
+    assert dataset_cls
+    dataset = dataset_cls(root=root_dir, transform=transform_compose, scale=2)
+    calib_data = [(dataset[i][1],) for i in range(min(calinum, len(dataset)))]
+    test_data = [
+        (dataset[i][1], dataset[i][0]) for i in range(min(testnum, len(dataset)))
+    ]
+    example_input = (dataset[0][1],)
+    return example_input, calib_data, test_data
+
+
+def get_quant_test_data_segmentation(
+    data_dir: str,
+    calinum=100,
+    testnum=500,
+    input_transform_compose: Optional[vision_transform_v2.Compose] = None,
+    target_transform_compose: Optional[vision_transform_v2.Compose] = None,
+):
+    if not input_transform_compose:
+        input_transform_compose = vision_transform_v2.Compose(
+            [
+                vision_transform_v2.Resize([224, 224]),
+                vision_transform_v2.ToImage(),
+                vision_transform_v2.ToDtype(torch.float32, scale=True),
+                vision_transform_v2.Normalize(
+                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+                ),
+                vision_transform_v2.Lambda(lambda x: x.unsqueeze(0)),  # Add batch dim
+            ]
+        )
+    if not target_transform_compose:
+        target_transform_compose = vision_transform_v2.Compose(
+            [
+                vision_transform_v2.Resize([224, 224]),
+                vision_transform_v2.ToImage(),
+                vision_transform_v2.ToDtype(torch.long, scale=False),
+                vision_transform_v2.Lambda(lambda x: x.unsqueeze(0)),  # Add batch dim
+            ]
+        )
+    voc_dataset = VOCSegmentation(
+        data_dir,
+        "2012",
+        "val",
+        transform=input_transform_compose,
+        target_transform=target_transform_compose,
+    )
+    calib_data = [(voc_dataset[i][0],) for i in range(min(calinum, len(voc_dataset)))]
+    test_data = [voc_dataset[i] for i in range(min(testnum, len(voc_dataset)))]
+    example_input = (voc_dataset[0][0],)
+    return example_input, calib_data, test_data
+
+
+def _get_voice_dataset(
+    data_size: int, data_dir: str, labels: List[str], fixed_token_num: int
+):
+    from torch.utils.data import DataLoader
+    from torchaudio.datasets import LIBRISPEECH
+
+    def collate_fun(batch, encode_fn, mode="train"):
+        waves = []
+        text_ids = []
+        input_lengths = []
+        output_lengths = []
+
+        if mode == "train":
+            shifts = torch.randn(len(batch)) > 0.0
+
+        for i, (wave, _, text, *_) in enumerate(batch):
+            if mode == "train" and shifts[i]:
+                wave = wave[:, 160:]
+            waves.append(wave[0])
+            ids = torch.LongTensor(encode_fn(text))
+            text_ids.append(ids)
+            input_lengths.append(wave.size(1) // 320)
+            output_lengths.append(len(ids))
+
+        waves = torch.nn.utils.rnn.pad_sequence(waves, batch_first=True).unsqueeze(1)
+        labels = torch.nn.utils.rnn.pad_sequence(text_ids, batch_first=True)
+
+        return waves, labels, input_lengths, output_lengths
+
+    lm = GreedyLM(labels)
+
+    testset_url = "test-clean"
+    # testset_url = 'test-clean'
+    dataset = LIBRISPEECH(data_dir, url=testset_url)
+    data_loader = DataLoader(
+        dataset=dataset,
+        batch_size=1,
+        shuffle=True,
+        collate_fn=lambda x: collate_fun(x, lm.encode, "valid"),
+    )
+    # prepare input data
+    inputs, targets = [], []
+    in_lens, tar_lens = [], []
+
+    def _loader():
+        for waves, labels, inputs_len, targets_len in data_loader:
+            if inputs_len[0] >= fixed_token_num:
+                continue
+            zero_padding = torch.zeros([1, 1, fixed_token_num * 320 - waves.shape[2]])
+            waves = torch.concat((waves, zero_padding), axis=2)
+            yield waves, labels, [fixed_token_num + 1], targets_len
+
+    for i, (waves, labels, inputs_len, targets_len) in enumerate(
+        _loader()
+    ):  # waves, labels, input_lens, output_lens
+        inputs.append(waves)
+        targets.append(labels)
+        in_lens.append(inputs_len)
+        tar_lens.append(targets_len)
+        if i >= data_size:
+            break
+
+    return inputs, targets, in_lens, tar_lens
+
+
+def get_quant_test_data_voice(
+    data_dir: str,
+    calinum=100,
+    testnum=500,
+    fixed_out_token=300,
+    labels=None,
+):
+    if labels is None:
+        labels = [" ", *"abcdefghijklmnopqrstuvwxyz", "'", "*"]
+    dataset = _get_voice_dataset(
+        max(testnum, calinum), data_dir, labels, fixed_out_token
+    )
+    calib_data = [(dataset[0][i],) for i in range(min(calinum, len(dataset[0])))]
+    test_data = [
+        (dataset[0][i], (dataset[1][i], dataset[2][i], dataset[3][i]))
+        for i in range(min(testnum, len(dataset[0])))
+    ]
+    example_input = (dataset[0][0],)
+    return example_input, calib_data, test_data
diff --git a/backends/samsung/test/utils/quant_checkers.py b/backends/samsung/test/utils/quant_checkers.py
new file mode 100644
index 00000000000..7ae38d0c186
--- /dev/null
+++ b/backends/samsung/test/utils/quant_checkers.py
@@ -0,0 +1,240 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
+
+import dataclasses
+import logging
+from abc import abstractmethod
+
+import numpy as np
+
+import torch
+import torchaudio
+from executorch.backends.samsung.test.utils.utils import GreedyLM
+
+
+@dataclasses.dataclass
+class CheckerConfig:
+    checker: str
+    kargs: dict
+
+
+class CheckerBase:
+    necessary_params = []
+    default_params = {}
+
+    def __init__(
+        self,
+        original_module: torch.nn.Module,
+        current_module: torch.nn.Module,
+        config: CheckerConfig,
+    ):
+        self.origin_module = original_module
+        self.current_module = current_module
+        self.config = config
+        self.check_and_set_params()
+
+    @abstractmethod
+    def check(self, **kwargs):
+        pass
+
+    def check_and_set_params(self):
+        expected_list = []
+        for key in self.necessary_params:
+            if key not in self.config.kargs:
+                expected_list.append(key)
+            else:
+                setattr(self, key, self.config.kargs[key])
+        assert (
+            not expected_list
+        ), f"More args expected for {type(self)} in config.kargs: " + ",".join(
+            expected_list
+        )
+        for key in self.default_params:
+            if key not in self.config.kargs:
+                default_value = self.default_params[key]
+                logging.info(
+                    f"{key} not set in config.kargs for checker {type(self)}, using default value {default_value}"
+                )
+                setattr(self, key, default_value)
+            else:
+                setattr(self, key, self.config.kargs[key])
+
+
+CHECKER_REGISTER = {}
+
+
+def checker_register(checker_name: str):
+    def _wrapper(cls):
+        CHECKER_REGISTER[checker_name] = cls
+
+    return _wrapper
+
+
+def get_checker(origin_module, quantized_module, config) -> CheckerBase:
+    assert config.checker in CHECKER_REGISTER, (
+        f'Could not find checker "{config.checker}", registered checkers: \n\t'
+        + "\n\t".join(CHECKER_REGISTER.keys())
+    )
+    return CHECKER_REGISTER[config.checker](origin_module, quantized_module, config)
+
+
+@checker_register("classifier")
+class ClassifierChecker(CheckerBase):
+    necessary_params = ["dataset"]
+    default_params = {
+        "topktol": {
+            1: 0.9,
+            3: 0.95,
+        },
+    }
+
+    def check(self):
+        assert self.dataset
+        assert min(self.topktol.keys()) > 0, "Topk number must be positive int"
+        max_topk = max(self.topktol.keys())
+
+        print("Check Quantization Classifier...")
+
+        correct = torch.Tensor([0] * max_topk, device="cpu")
+        total = 0
+        for batch_data, _ in self.dataset:
+            batch_size = batch_data.shape[0]
+            total += batch_size
+            # TODO: Use ground truth to replace fp models' result
+            fp_out: torch.Tensor = self.origin_module(batch_data)
+            _, fp_top1 = fp_out.topk(1, dim=-1)
+            fp_top1 = fp_top1.view(1, -1)
+
+            quant_out: torch.Tensor = self.current_module(batch_data)
+            _, quant_topk = quant_out.topk(max_topk, dim=-1)
+            quant_topk = quant_topk.t()
+            for k_idx in range(max_topk):
+                correct[k_idx:] += quant_topk[k_idx].eq(fp_top1).view(-1).sum().float()
+        error_messages = []
+        msg_template = "\tFailed in checking Top{}, Target: {:.2f} vs Current: {:.2f}"
+        for topk_num, topk_tol in self.topktol.items():
+            correct_num = correct[topk_num - 1]
+            accuracy_score = correct_num / total * 100
+            print(accuracy_score)
+            if accuracy_score < topk_tol:
+
+                error_messages.append(
+                    msg_template.format(topk_num, topk_tol, accuracy_score)
+                )
+        assert not error_messages, "\n".join(["\n", *error_messages])
+        print("Check Quantization Classifier Finished.")
+
+
+@checker_register("super_resolution")
+class SRChecker(CheckerBase):
+    necessary_params = ["dataset"]
+    default_params = {"threshold": 35.0}
+
+    def check(self):
+        peak = 1.0  # Images are scaled to 0-1
+
+        def calc_unbatch_mse(x: torch.Tensor, target: torch.Tensor):
+            # We calc PSNR for each single image
+            num = torch.prod(torch.tensor(x.shape)[1:])
+            return (x - target).pow(2).sum(dim=list(range(1, len(x.shape)))).pow(
+                0.5
+            ) / num
+
+        data_num = 0
+        total_psnr = 0
+        for x, target in self.dataset:
+            data_num += len(x)
+            quant_out: torch.Tensor = self.current_module(x)
+            unbatch_mse = calc_unbatch_mse(target, quant_out)
+            unbatch_psnr = 10 * torch.log10(peak * peak / unbatch_mse)
+            total_psnr += unbatch_psnr.sum()
+        avg_psnr = total_psnr / data_num
+        assert (
+            avg_psnr > self.threshold
+        ), "PSNR need to be larger than {:.2f}, but get {:.2f}. ".format(
+            self.threshold, avg_psnr
+        )
+        print("Check Quantization Super Resolution Finished.")
+
+
+@checker_register("segmentation")
+class SegChecker(CheckerBase):
+    necessary_params = ["dataset"]
+    default_params = {"threshold": 0.7}
+
+    def check(self):
+        def calc_miou(target: torch.Tensor, pred: torch.Tensor, class_num=21):
+            target = target.numpy().flatten()
+            mask = target != 255  # Don't consider edge
+            target = target[mask]
+            pred = pred.numpy().flatten()[mask]
+            target *= class_num
+            target += pred
+            # I of class a: mixmat[a, a]
+            # U of class a: mixmat[a, :].sum() + mixmat[:, a].sum - mixmat[a, a]
+            mixmat = np.bincount(target, minlength=class_num**2).reshape(
+                (class_num, class_num)
+            )
+            i = mixmat.diagonal()
+            return np.nanmean((i / (mixmat.sum(0) + mixmat.sum(1) - i)))
+
+        data_num = 0
+        total_miou = 0
+        for x, targets in self.dataset:
+            data_num += len(x)
+            quant_out: torch.Tensor = self.current_module(x)["out"].argmax(1)
+            total_miou += np.sum(
+                [calc_miou(target, pred) for target, pred in zip(targets, quant_out)]
+            )
+        avg_miou_percentage = total_miou / data_num * 100
+        assert (
+            avg_miou_percentage > self.threshold
+        ), "MIOU need to be larger than {:.2f}%, but get {:.2f}%. ".format(
+            self.threshold, avg_miou_percentage
+        )
+        print("Check Quantization Segmentation  Finished.")
+
+
+@checker_register("wave2letter")
+class W2lChecker(CheckerBase):
+    necessary_params = ["dataset", "labels"]
+    default_params = {"threshold": 0.7}
+
+    def check(self):
+        criterion = torch.nn.CTCLoss(blank=len(self.labels) - 1, zero_infinity=True)
+        data_num = 0
+        lm = GreedyLM(self.labels)
+        c_ldist_sum, c_ref_len_sum = 0, 0
+        w_ldist_sum = 0
+        test_loss_sum = 0
+        for x, (targets, input_lens, output_lens) in self.dataset:
+            data_num += len(x)
+            quant_out: torch.Tensor = self.current_module(x)
+            quant_out = quant_out.view((1, 29, quant_out.numel() // 29))
+            loss = criterion(
+                quant_out.permute(2, 0, 1), targets, input_lens, output_lens
+            )
+            test_loss_sum += loss.item()
+            decoded_preds = lm.decode_ctc(quant_out)
+            decoded_targets = lm.decode_ids(targets)
+            decoded_targets = [t[:len] for t, len in zip(decoded_targets, output_lens)]
+
+            for hypo, ref in zip(decoded_preds, decoded_targets):
+                c_ldist_sum += torchaudio.functional.edit_distance(ref, hypo)
+                c_ref_len_sum += len(ref)
+                hypo_words = "".join(hypo).split()
+                ref_words = "".join(ref).split()
+                w_ldist_sum += torchaudio.functional.edit_distance(
+                    ref_words, hypo_words
+                )
+        test_loss = test_loss_sum / len(self.dataset)
+        assert (
+            test_loss < self.threshold
+        ), "CTC need to be smaller than {:.2f}%, but get {:.2f}%. ".format(
+            self.threshold, test_loss
+        )
+        return self
diff --git a/backends/samsung/utils/export_utils.py b/backends/samsung/utils/export_utils.py
index e075f4dca0b..a6d87ba933e 100644
--- a/backends/samsung/utils/export_utils.py
+++ b/backends/samsung/utils/export_utils.py
@@ -9,14 +9,11 @@
 
 import executorch.exir as exir
 import torch
-from executorch.backends.samsung._passes.fuse_conv_act import FuseConvActPass
-from executorch.backends.samsung._passes.remove_useless_ops import RemoveUselessOpPass
 from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner
 from executorch.backends.samsung.quantizer.quantizer import EnnQuantizer, Precision
 from executorch.backends.transforms.decompose_sdpa import (
     DecomposeScaledDotProductAttention,
 )
-from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform
 from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.backend_details import CompileSpec
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -44,14 +41,6 @@ def get_edge_compile_config():
     )
 
 
-def get_enn_pass_list() -> List[PassType]:
-    return [
-        RemoveUselessOpPass(),
-        RemoveCloneOpsTransform(),
-        FuseConvActPass(),
-    ]
-
-
 def quantize_module(
     module: torch.nn.Module,
     inputs,
@@ -82,12 +71,8 @@ def to_edge_transform_and_lower_to_enn(
 ) -> exir.ExecutorchProgramManager:
     assert compile_specs is not None, "For now, we must deliver complile specs"
     prog = torch.export.export(module, inputs)
-    pass_list = get_enn_pass_list()
-    if custom_pass_config:
-        pass_list.extend(custom_pass_config)
     return to_edge_transform_and_lower(
         prog,
-        pass_list,
-        {"forward": [EnnPartitioner(compile_specs)]},
+        partitioner={"forward": [EnnPartitioner(compile_specs)]},
         compile_config=get_edge_compile_config(),
     )
diff --git a/examples/samsung/scripts/mobilebert_finetune_QAT.py b/examples/samsung/scripts/mobilebert_finetune_QAT.py
new file mode 100644
index 00000000000..fb23722223d
--- /dev/null
+++ b/examples/samsung/scripts/mobilebert_finetune_QAT.py
@@ -0,0 +1,686 @@
+# Copyright (c) 2025 Samsung Electronics Co. LTD
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+from pathlib import Path
+from typing import Optional
+
+import evaluate
+import numpy as np
+import requests
+
+import torch
+import torch.nn as nn
+import torchao
+
+from datasets import ClassLabel, DatasetDict, load_dataset
+
+from executorch.backends.samsung.quantizer import EnnQuantizer, Precision
+from executorch.backends.samsung.serialization.compile_options import (
+    gen_samsung_backend_compile_spec,
+)
+from executorch.backends.samsung.utils.export_utils import (
+    to_edge_transform_and_lower_to_enn,
+)
+from executorch.examples.samsung.utils import save_tensors
+from executorch.exir import ExecutorchBackendConfig
+from executorch.extension.export_util.utils import save_pte_program
+from torch.optim import AdamW
+from torch.utils.data import DataLoader
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+from tqdm import tqdm
+
+from transformers import (
+    AutoTokenizer,
+    MobileBertForSequenceClassification,
+    Trainer,
+    TrainingArguments,
+)
+
+# For removing the tokenizer warning
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+
+
+class MobileBertFinetune:
+    def __init__(self, metric, args):
+        self.tokenizer = self.load_tokenizer()
+        self.artifact = args.artifact
+        self.max_length = args.max_length
+        self.csv_dataset = args.csv_dataset
+        self.metric = metric if metric is not None else evaluate.load("accuracy")
+        self.batch_size_training = args.batch_size
+        self.num_epochs = args.num_epochs_for_finetune
+
+    def load_tokenizer(self):
+        return AutoTokenizer.from_pretrained("google/mobilebert-uncased")
+
+    def load_CSV_dataset(self):
+        # grab dataset
+        if self.csv_dataset is None:
+            url = "https://raw.githubusercontent.com/susanli2016/NLP-with-Python/master/data/title_conference.csv"
+            print(
+                "Because a CSV file is not assigned, a CSV file is downloaded from ",
+                str(url),
+            )
+            response = requests.get(url, allow_redirects=True)
+            cvs_file_path = os.path.join(self.artifact, "title_conference.csv")
+            if response.status_code == 200:
+                with open(cvs_file_path, "wb") as f:
+                    f.write(response.content)
+                print("CSV file downloaded successfully!\n\n")
+            else:
+                print(
+                    f"Failed to download the file. Status code: {response.status_code}\n\n"
+                )
+        else:
+            cvs_file_path = self.csv_dataset
+
+        # load dataset
+        try:
+            loaded_datasets = load_dataset("csv", data_files=cvs_file_path)
+            raw_labels = loaded_datasets["train"].unique("Conference")
+        except:
+            print(f"Error: the file '{cvs_file_path}' was not avaiable.")
+
+        # Creating ClassLabel
+        class_labels = ClassLabel(names=raw_labels)
+        labels = {key: index for index, key in enumerate(raw_labels)}
+
+        def encode_labels(example):
+            example["label"] = class_labels.str2int(example["Conference"])
+            return example
+
+        loaded_datasets = loaded_datasets.map(encode_labels)
+
+        split_dataset = loaded_datasets["train"].train_test_split(
+            test_size=0.15, seed=51
+        )
+        raw_datasets = DatasetDict(
+            {"train": split_dataset["train"], "validation": split_dataset["test"]}
+        )
+
+        if self.max_length is None:
+
+            def preprocess_function(examples):
+                return self.tokenizer(examples["Title"], truncation=True, padding=True)
+
+        else:
+
+            def preprocess_function(examples):
+                return self.tokenizer(
+                    examples["Title"],
+                    truncation=True,
+                    padding="max_length",
+                    max_length=self.max_length,
+                )
+
+        print("Preprocessing data...")
+        tokenized_datasets = raw_datasets.map(preprocess_function, batched=True)
+        tokenized_datasets.set_format(
+            type="torch", columns=["input_ids", "attention_mask", "label"]
+        )
+        return tokenized_datasets, labels
+
+    # Define compute metrics function
+    def compute_metrics(self, eval_pred):
+        logits, labels = eval_pred
+        predictions = np.argmax(logits, axis=-1)
+        return self.metric.compute(predictions=predictions, references=labels)
+
+    def training(
+        self,
+        model,
+        tokenized_datasets,
+        tokenizer,
+        compute_metrics,
+        batch_size=8,
+        num_epochs=3,
+        device="cpu",
+    ):
+        # Training arguments
+        training_args = TrainingArguments(
+            output_dir="./results",
+            eval_strategy="epoch",
+            save_strategy="epoch",
+            learning_rate=2e-5,
+            per_device_train_batch_size=batch_size,
+            per_device_eval_batch_size=batch_size,
+            num_train_epochs=num_epochs,
+            weight_decay=0.01,
+            logging_dir="./logs",
+            load_best_model_at_end=True,
+            metric_for_best_model="accuracy",
+            dataloader_pin_memory=False if device == torch.device(type="cpu") else True,
+        )
+
+        # Trainer
+        trainer = Trainer(
+            model=model,
+            args=training_args,
+            train_dataset=tokenized_datasets["train"],
+            eval_dataset=tokenized_datasets["validation"],
+            processing_class=tokenizer,
+            compute_metrics=compute_metrics,
+        )
+        return trainer
+
+    def get_finetune_mobilebert(self, artifacts_dir):
+        # Pretrained bert's output ranges in a large scale. It is challenge for enn backend to support directly.
+        # Please finetune mobilebert on specific tasks, make sure that bert's output and hidden states are friendly
+        # to resource-constraint device.
+
+        # Load data for classification
+        tokenized_datasets, labels = self.load_CSV_dataset()
+
+        artifacts_dir = artifacts_dir if artifacts_dir is not None else "./mobilebert"
+        need_finetune = True
+        os.makedirs(artifacts_dir, exist_ok=True)
+        pretrained_required_files = ["config.json", "model.safetensors"]
+        path = Path(artifacts_dir)
+        if (path / pretrained_required_files[0]).exists() and (
+            path / pretrained_required_files[1]
+        ).exists():
+            need_finetune = False
+
+        # get pre-trained mobilebert
+        model = MobileBertForSequenceClassification.from_pretrained(
+            "google/mobilebert-uncased" if need_finetune else artifacts_dir,
+            num_labels=len(labels),
+            # return_dict=False,
+        )
+
+        if not need_finetune:
+            return model.eval(), tokenized_datasets
+
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model.to(device)
+
+        trainer = self.training(
+            model,
+            tokenized_datasets,
+            self.tokenizer,
+            self.compute_metrics,
+            self.batch_size_training,
+            self.num_epochs,
+            device,
+        )
+
+        # Train the model
+        print(
+            "\n==== Starting training for fine tuning for ",
+            self.num_epochs,
+            "epochs....",
+        )
+        trainer.train()
+
+        # Evaluate on validation set
+        print("\n==== Starting evaluating the fine tuned model ....")
+        FP_eval_results = trainer.evaluate()
+        print("The eval results of the trained model =", FP_eval_results)
+
+        model.save_pretrained(artifacts_dir)
+
+        return model, tokenized_datasets
+
+
+def get_dataset(data_size, tokenized_datasets, batch_size, num_workers, device):
+    # making dataset for calibrating the model...
+    inputs, labels = [], []
+    for i, (batch) in enumerate(
+        tqdm(
+            DataLoader(
+                tokenized_datasets["validation"],
+                batch_size=batch_size,
+                shuffle=True,
+                num_workers=num_workers,
+                pin_memory=False if device == torch.device(type="cpu") else True,
+            )
+        )
+    ):
+        inputs.append((batch["input_ids"], batch["attention_mask"]))
+        labels.append(batch["label"].tolist())
+        if i >= int(data_size):
+            break
+
+    return inputs, labels
+
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+
+    def __init__(self, name, fmt=":f"):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+    def __str__(self):
+        fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
+        return fmtstr.format(**self.__dict__)
+
+
+def trainingQuantModel_QAT(
+    model, tokenized_datasets, batch_size, workers, device, num_epochs
+):
+    avgloss = AverageMeter("Loss", "1.5f")
+
+    model = torchao.quantization.pt2e.move_exported_model_to_train(model)
+    optimizer = AdamW(model.parameters(), lr=3e-5, weight_decay=0.01)
+
+    criterion = nn.CrossEntropyLoss()
+    model.to(device)
+    print(f"\n=== Starting training on {device} for {num_epochs} epochs...")
+
+    data_loader = DataLoader(
+        tokenized_datasets["train"],
+        batch_size=batch_size,
+        shuffle=True,
+        num_workers=workers,
+        pin_memory=False if device == torch.device(type="cpu") else True,
+    )
+
+    # --- Training and Evaluation Loop ---
+    for nepoch in range(num_epochs):
+        for batch in tqdm(data_loader, desc=f"Training Epoch {nepoch + 1}"):
+            batch_input_ids = batch["input_ids"].to(device)
+            batch_attention_mask = batch["attention_mask"].to(device)
+            batch_label = batch["label"].to(device)
+            logits = model(batch_input_ids, batch_attention_mask).logits
+            loss = criterion(logits, batch_label)
+            loss.backward()
+            optimizer.step()
+            optimizer.zero_grad()
+            avgloss.update(loss, batch["label"].size(0))
+        print(f"Epoch {nepoch + 1} | Average Training Loss: {avgloss.avg:.4f} \n")
+
+    return torchao.quantization.pt2e.move_exported_model_to_eval(model)
+
+
+# Eval a mobileBert model
+def evaluatingQuantModel_mobileBert(
+    quantized_model,
+    tokenized_datasets,
+    device,
+    batch_size_edge,
+    workers,
+    metric=None,
+):
+    if metric is None:
+        metric = evaluate.load("glue", "mrpc")
+
+    # Collect predictions
+    predictions = []
+    labels = []
+
+    for batch in tqdm(
+        DataLoader(
+            tokenized_datasets["validation"],
+            batch_size=batch_size_edge,
+            shuffle=True,
+            num_workers=workers,
+            pin_memory=True,
+        )
+    ):
+        batch_input_ids = batch["input_ids"].to(device)
+        batch_attention_mask = batch["attention_mask"].to(device)
+        outputs = quantized_model(batch_input_ids, batch_attention_mask)
+        logits = outputs.logits
+        preds = torch.argmax(logits, dim=-1)
+        predictions.extend(preds.tolist())
+        labels.extend(batch["label"].tolist())
+
+    # Compute accuracy and F1
+    results = metric.compute(predictions=predictions, references=labels)
+    print("Evaluation results:", results)
+
+    return results
+
+
+def build_aten_to_qat_mobilebert(
+    model,
+    inputs,
+    quant_dtype: Optional[Precision] = None,
+    is_per_channel=True,
+    is_qat=True,
+    tokenized_datasets="",
+    batch_size_training=1,
+    batch_size_edge=1,
+    num_workers=8,
+    num_epochs=100,
+    qat_file_name="mobilebert_qat_model.pt2",
+    qat_file_name_for_cpu="mobilebert_qat_model_for_cpu.pt2",
+    metric=None,
+    device="cpu",
+):
+    # Evaluating a FP32 model
+    print("==================================================")
+    print("\nEvaluation a FP model")
+    FP_results = evaluatingQuantModel_mobileBert(
+        model.eval().to(device),
+        tokenized_datasets,
+        device,
+        batch_size_training,
+        num_workers,
+        metric,
+    )
+
+    # Training a quantized model with QAT
+    print("\n\n==================================================")
+    print("==== Starting QAT(Quantization Aware Training)....")
+    quantizer = EnnQuantizer()
+    quantizer.setup_quant_params(quant_dtype, is_per_channel, is_qat)
+    batch_dim = torch.export.Dim("batch_size", min=1, max=batch_size_training)
+
+    size_input_ids = (batch_size_training, inputs[0].size(1))
+    size_attention_mask = (batch_size_training, inputs[1].size(1))
+    vector_input_ids = torch.randint(0, 256, size_input_ids).to(device)
+    vector_attention_mask = torch.randint(0, 1, size_attention_mask).to(device)
+    example_inputs = (
+        vector_input_ids,
+        vector_attention_mask,
+    )
+
+    exported_model = torch.export.export(
+        model.eval().to(device),
+        example_inputs,
+        dynamic_shapes={"input_ids": {0: batch_dim}, "attention_mask": {0: batch_dim}},
+    ).module()
+    prepared_model = prepare_pt2e(exported_model, quantizer)
+
+    prepared_model = trainingQuantModel_QAT(
+        prepared_model,
+        tokenized_datasets,
+        batch_size=batch_size_training,
+        workers=num_workers,
+        device=device,
+        num_epochs=num_epochs,
+    )
+
+    quantized_model = convert_pt2e(prepared_model)
+
+    # Evaluating a quantized model with QAT
+    print("\nEvaluation a quantized model")
+    results = evaluatingQuantModel_mobileBert(
+        quantized_model.to(device),
+        tokenized_datasets,
+        device,
+        batch_size_training,
+        num_workers,
+        metric,
+    )
+
+    print("\n------------------------------------")
+    print("     FP32 Model, accuracy=", FP_results["accuracy"])
+    print("Quantized Model, accuracy=", results["accuracy"])
+    print(
+        "  Accurarcy drop, accuracy=",
+        (results["accuracy"] / FP_results["accuracy"]) * 100,
+        "%",
+    )
+    print("------------------------------------")
+    print("==== Model Evaluation complete! \n\n")
+
+    # Saving a quantized model for GPU servers
+    size_input_ids = (batch_size_edge, inputs[0].size(1))
+    size_attention_mask = (batch_size_edge, inputs[1].size(1))
+    vector_input_ids = torch.randint(0, 256, size_input_ids).to(device)
+    vector_attention_mask = torch.randint(0, 1, size_attention_mask).to(device)
+    example_inputs = (
+        vector_input_ids,
+        vector_attention_mask,
+    )
+
+    exported_model = torch.export.export(quantized_model, example_inputs)
+    torch.export.save(exported_model, qat_file_name)
+    print(f"QAT model for {device} is saved in ", qat_file_name)
+
+    # Saving a quantized model for CPU servers
+    device_cpu = torch.device(type="cpu")
+    quantized_model = quantized_model.to(device_cpu)
+    quantized_model = removing_gpu_node_in_graph(quantized_model)
+    cpu_vector_input_ids = torch.randint(0, 256, size_input_ids).to(device_cpu)
+    cpu_vector_attention_mask = torch.randint(0, 1, size_attention_mask).to(device_cpu)
+    example_inputs_cpu = (
+        cpu_vector_input_ids,
+        cpu_vector_attention_mask,
+    )
+
+    exported_model = torch.export.export(quantized_model, example_inputs_cpu)
+    torch.export.save(exported_model, qat_file_name_for_cpu)
+    print(f"QAT model for {device_cpu} is saved in ", qat_file_name_for_cpu)
+
+    # Reloading a quantized model for GPU servers
+    exported_model = torch.export.load(qat_file_name)
+    print("==== QAT Training complete! \n\n")
+
+    return exported_model.module()
+
+
+def removing_gpu_node_in_graph(model):
+    graph = model.graph
+    for node in list(graph.nodes):
+        if node.target == torch.ops.aten._assert_tensor_metadata.default:
+            # remove torch.ops.aten._assert_tensor_metadata.default
+            node.replace_all_uses_with(node.args[0])  # bypass
+            graph.erase_node(node)
+        if node.target == torch.ops.aten.zeros.default:
+            # Change torch.ops.aten.zeros.default
+            node.kwargs = {
+                "dtype": torch.int64,
+                "device": torch.device("cpu"),
+                "pin_memory": False,
+            }
+    model.graph.eliminate_dead_code()
+    model.recompile()
+    # complete converting GPU target ops to CPU ones.
+
+    return model
+
+
+def main(args):
+    # ensure the working directory exist.
+    os.makedirs(args.artifact, exist_ok=True)
+
+    # define the metric for the model evaluation
+    metric = evaluate.load("accuracy")
+
+    # Fine tuning model with a csv dataset
+    mobilebert_finetune = MobileBertFinetune(metric, args)
+    model, tokenized_datasets = mobilebert_finetune.get_finetune_mobilebert(
+        args.artifact
+    )
+
+    # Setting for QAT training
+    batch_size_edge = 1  # The batch of the final graph for a target edge device is 1
+    batch_size_training = args.batch_size
+    num_workers = args.num_workers  # Num of dataset loaders
+    num_epochs = args.num_epochs_for_QAT  # Num of epochs in QAT training
+    data_num = args.calibration_number  # Num of dataset for quantization calibration
+
+    # searching an avaiable device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+
+    # making dataset for calibrating the model...
+    print("\n==== Loading calibration dataset for PTQ quantization....")
+    inputs, labels = get_dataset(
+        data_num, tokenized_datasets, batch_size_edge, num_workers, device
+    )
+
+    # running an example
+    example_ref_input_ids = inputs[0][0].to(device)
+    example_ref_attention_mask = inputs[0][1].to(device)
+    example_inputs = (example_ref_input_ids, example_ref_attention_mask)
+    float_out = model(*example_inputs)
+
+    # QAT Training with a csv dataset
+    qat_file_path = os.path.join(args.artifact, "mobilebert_qat_model_csv.pt2")
+    qat_file_path_for_cpu = os.path.join(
+        args.artifact, "mobilebert_qat_model_csv_for_cpu.pt2"
+    )
+    if args.qat and args.precision is not None:
+        model = build_aten_to_qat_mobilebert(
+            model.train(),
+            example_inputs,
+            quant_dtype=getattr(Precision, args.precision),
+            is_qat=True,
+            tokenized_datasets=tokenized_datasets,
+            batch_size_training=batch_size_training,
+            batch_size_edge=batch_size_edge,
+            num_epochs=num_epochs,
+            qat_file_name=qat_file_path,
+            qat_file_name_for_cpu=qat_file_path_for_cpu,
+            metric=metric,
+            device=device,
+        )
+        quant_out = model(*example_inputs)
+    else:
+        # trying to load a pretrained QAT model
+        if device == torch.device(type="cpu"):
+            model_path = qat_file_path_for_cpu
+        else:
+            model_path = qat_file_path
+
+        print(f"\n==== Loading a pretrained QAT model from '{model_path}'....")
+        try:
+            loaded_model = torch.export.load(model_path)
+            model = loaded_model.module().to(device)
+        except:
+            print(f"Error: the file '{model_path}' was not avaiable.")
+
+        quant_out = model(*example_inputs)
+
+    compile_specs = [gen_samsung_backend_compile_spec(args.chipset)]
+    edge = to_edge_transform_and_lower_to_enn(
+        model, example_inputs, compile_specs=compile_specs
+    )
+    model_name = "mobilebert_exynos"
+    exec_prog = edge.to_executorch(
+        config=ExecutorchBackendConfig(extract_delegate_segments=True)
+    )
+    save_pte_program(exec_prog, model_name, args.artifact)
+
+    if args.dump:
+        # Expect example inputs are tuple, including input ids and attn mask
+        save_tensors(example_inputs, prefix="float_input", artifact_dir=args.artifact)
+        save_tensors(float_out, prefix="float_output", artifact_dir=args.artifact)
+        if args.precision:
+            save_tensors(quant_out, "quant_out", artifact_dir=args.artifact)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "-c",
+        "--chipset",
+        required=True,
+        help="Samsung chipset, i.e. E9955, etc",
+        type=str,
+    )
+    parser.add_argument(
+        "-a",
+        "--artifact",
+        help="path for storing generated artifacts by this example.",
+        default="./mobilebert",
+        type=str,
+    )
+    parser.add_argument(
+        "--csv_dataset",
+        default=None,
+        help=(
+            "path of a csv file  "
+            "e.g. --csv_dataset ./mobilebert/title_conference.csv "
+            "If you don't assign a cvs file, a csv file is loaded automatically "
+            "from https://raw.githubusercontent.com/susanli2016/NLP-with-Python/master/data/title_conference.csv"
+        ),
+        type=str,
+    )
+    parser.add_argument(
+        "-p",
+        "--precision",
+        default="A8W8",
+        help=("Quantizaiton precision. If not set, the model will not be quantized."),
+        choices=[None, "A8W8"],
+        type=str,
+    )
+    parser.add_argument(
+        "-cn",
+        "--calibration_number",
+        default=100,
+        help=(
+            "Assign the number of data you want "
+            "to use for calibrating the quant params."
+        ),
+        type=int,
+    )
+    parser.add_argument(
+        "--num-epochs-for-finetune",
+        default=12,
+        type=int,
+        help="# of epochs for finetune training",
+    )
+    parser.add_argument(
+        "-m",
+        "--max-length",
+        default=256,
+        type=int,
+        help="The max length of input tokens",
+    )
+    parser.add_argument(
+        "--batch-size",
+        default=32,
+        type=int,
+        help=(
+            "Batch size for finetuning and QAT training"
+            "The batch of the final graph for a target edge device is 1."
+            "  It is independent on the setting of batch-size. "
+        ),
+    )
+    parser.add_argument(
+        "--num-workers",
+        default=8,
+        type=int,
+        help="# of workers for DataLoader in QAT training",
+    )
+    parser.add_argument(
+        "--qat",
+        default=False,
+        const=True,
+        nargs="?",
+        help=("Whether to train the model with QAT."),
+        type=bool,
+    )
+    parser.add_argument(
+        "--num-epochs-for-QAT",
+        default=12,
+        type=int,
+        help=(
+            "# of epochs for QAT training"
+            ">1000 epochs is recommended to get proper accuracy"
+            " with a GPU server."
+        ),
+    )
+    parser.add_argument(
+        "--dump",
+        default=False,
+        action="store_true",
+        help=("Whether to dump all outputs. If not set, we only dump pte."),
+    )
+    args = parser.parse_args()
+    main(args)

From b2dc6426c492029f877786b94e125342288ac278 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Thu, 2 Apr 2026 11:15:38 +0000
Subject: [PATCH 5/6] Add framework for quantization tc

- Add Quantization TestCases

Co-authored-by: chen03.zhao@samsung.com <chen03.zhao@samsung.com>
Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 .../samsung/test/models/test_deeplab_v3.py    | 30 ++++++++++++++-
 backends/samsung/test/models/test_edsr.py     | 26 ++++++++++++-
 .../samsung/test/models/test_inception_v3.py  | 27 ++++++++++++-
 .../samsung/test/models/test_inception_v4.py  | 38 ++++++++++++++++++-
 .../test/models/test_mobilebert_finetuning.py |  2 +-
 .../samsung/test/models/test_mobilenet_v2.py  | 27 ++++++++++++-
 .../samsung/test/models/test_mobilenet_v3.py  | 28 +++++++++++++-
 backends/samsung/test/models/test_resnet18.py | 27 ++++++++++++-
 backends/samsung/test/models/test_resnet50.py | 27 ++++++++++++-
 .../test/models/test_torchvision_vit.py       | 27 ++++++++++++-
 .../samsung/test/models/test_wav2letter.py    | 34 ++++++++++++++++-
 .../samsung/test/tester/samsung_tester.py     | 19 +++++++++-
 backends/samsung/test/utils/utils.py          | 33 +++++++++++++++-
 13 files changed, 331 insertions(+), 14 deletions(-)

diff --git a/backends/samsung/test/models/test_deeplab_v3.py b/backends/samsung/test/models/test_deeplab_v3.py
index cd6a6527980..634cf69911b 100644
--- a/backends/samsung/test/models/test_deeplab_v3.py
+++ b/backends/samsung/test/models/test_deeplab_v3.py
@@ -1,15 +1,20 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
 # except in compliance with the License. See the license file in the root
 # directory of this source tree for more details.
+import os
 import unittest
 
 from executorch.backends.samsung.serialization.compile_options import (
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.datasets import (
+    get_quant_test_data_segmentation,
+)
+from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig
 from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.deeplab_v3 import DeepLabV3ResNet50Model
 
@@ -27,3 +32,26 @@ def test_dl3_fp16(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=example_input, atol=0.009)
         )
+
+    def test_dl3_a8w8(self):
+        model = DeepLabV3ResNet50Model().get_eager_model()
+        example_input, cali, testdata = get_quant_test_data_segmentation(
+            os.path.join(os.environ["DATASET_PATH"], "VOC_image")
+        )
+        checker_config = CheckerConfig(
+            "segmentation",
+            {
+                "dataset": testdata,
+                "threshold": 0.7,
+            },
+        )
+        tester = SamsungTester(
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+        (
+            tester.quantize(cali_dataset=cali, checker_config=checker_config)
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1)
+        )
diff --git a/backends/samsung/test/models/test_edsr.py b/backends/samsung/test/models/test_edsr.py
index e69d5cc459c..77f6223b9d0 100644
--- a/backends/samsung/test/models/test_edsr.py
+++ b/backends/samsung/test/models/test_edsr.py
@@ -1,4 +1,4 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
@@ -6,12 +6,17 @@
 # directory of this source tree for more details.
 
 
+import os
 import unittest
 
 from executorch.backends.samsung.serialization.compile_options import (
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.datasets import (
+    get_quant_test_data_super_resolution,
+)
+from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig
 from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.edsr import EdsrModel
 
@@ -29,3 +34,22 @@ def test_edsr_fp16(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=example_input, atol=0.02)
         )
+
+    def test_edsr_a8w8(self):
+        example_input, cali, testdata = get_quant_test_data_super_resolution(
+            os.path.join(os.environ["DATASET_PATH"]), "B100"
+        )
+        model = EdsrModel().get_eager_model()
+        checker_config = CheckerConfig(
+            "super_resolution", {"dataset": testdata, "threshold": 0.7}
+        )
+        tester = SamsungTester(
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+        (
+            tester.quantize(cali_dataset=cali, checker_config=checker_config)
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1)
+        )
diff --git a/backends/samsung/test/models/test_inception_v3.py b/backends/samsung/test/models/test_inception_v3.py
index faeea4ab4a1..ce1dba86c7b 100644
--- a/backends/samsung/test/models/test_inception_v3.py
+++ b/backends/samsung/test/models/test_inception_v3.py
@@ -1,4 +1,4 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
@@ -6,6 +6,7 @@
 # directory of this source tree for more details.
 
 
+import os
 import unittest
 
 import torch
@@ -14,6 +15,8 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify
+from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig
 from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.inception_v3 import InceptionV3Model
 
@@ -32,3 +35,25 @@ def test_inception_v3_fp16(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=example_input, atol=0.02, rtol=0.02)
         )
+
+    def test_inception_v3_a8w8(self):
+        example_input, cali, testdata = get_quant_test_data_classify(
+            os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset")
+        )
+        checker_config = CheckerConfig(
+            "classifier",
+            {
+                "dataset": testdata,
+            },
+        )
+        model = InceptionV3Model().get_eager_model()
+        tester = SamsungTester(
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+        (
+            tester.quantize(cali_dataset=cali, checker_config=checker_config)
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1)
+        )
diff --git a/backends/samsung/test/models/test_inception_v4.py b/backends/samsung/test/models/test_inception_v4.py
index 2998fd894db..ad1def29a5f 100644
--- a/backends/samsung/test/models/test_inception_v4.py
+++ b/backends/samsung/test/models/test_inception_v4.py
@@ -1,4 +1,4 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
@@ -13,8 +13,11 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify
+from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig
 from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.inception_v4 import InceptionV4Model
+from torchvision import transforms
 
 
 def patch_iv4(weight_path: str):
@@ -66,3 +69,36 @@ def test_inception_v4_fp16(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=example_input, atol=0.02, rtol=0.02)
         )
+
+    def test_inception_v4_a8w8(self):
+        transform_compose = transforms.Compose(
+            [
+                transforms.Resize((299, 299)),
+                transforms.ToTensor(),
+                transforms.Normalize(
+                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+                ),
+                transforms.Lambda(lambda x: x.unsqueeze(0)),  # Add batch dim
+            ]
+        )
+        example_input, cali, testdata = get_quant_test_data_classify(
+            os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset"),
+            transform_compose=transform_compose,
+        )
+        checker_config = CheckerConfig(
+            "classifier",
+            {
+                "dataset": testdata,
+            },
+        )
+        model = InceptionV4Model().get_eager_model()
+        tester = SamsungTester(
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+        (
+            tester.quantize(cali_dataset=cali, checker_config=checker_config)
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1)
+        )
diff --git a/backends/samsung/test/models/test_mobilebert_finetuning.py b/backends/samsung/test/models/test_mobilebert_finetuning.py
index 92b52e38c9e..22666aa0e65 100644
--- a/backends/samsung/test/models/test_mobilebert_finetuning.py
+++ b/backends/samsung/test/models/test_mobilebert_finetuning.py
@@ -1,4 +1,4 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
diff --git a/backends/samsung/test/models/test_mobilenet_v2.py b/backends/samsung/test/models/test_mobilenet_v2.py
index 51512be57ee..c9a39e84874 100644
--- a/backends/samsung/test/models/test_mobilenet_v2.py
+++ b/backends/samsung/test/models/test_mobilenet_v2.py
@@ -1,15 +1,18 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
 # except in compliance with the License. See the license file in the root
 # directory of this source tree for more details.
+import os
 import unittest
 
 from executorch.backends.samsung.serialization.compile_options import (
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify
+from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig
 from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.mobilenet_v2 import MV2Model
 
@@ -27,3 +30,25 @@ def test_mv2_fp16(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=example_input, atol=0.02)
         )
+
+    def test_mv2_a8w8(self):
+        example_input, cali, testdata = get_quant_test_data_classify(
+            os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset")
+        )
+        checker_config = CheckerConfig(
+            "classifier",
+            {
+                "dataset": testdata,
+            },
+        )
+        model = MV2Model().get_eager_model()
+        tester = SamsungTester(
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+        (
+            tester.quantize(cali_dataset=cali, checker_config=checker_config)
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1)
+        )
diff --git a/backends/samsung/test/models/test_mobilenet_v3.py b/backends/samsung/test/models/test_mobilenet_v3.py
index fbfc4716b73..81fe7f1a287 100644
--- a/backends/samsung/test/models/test_mobilenet_v3.py
+++ b/backends/samsung/test/models/test_mobilenet_v3.py
@@ -1,4 +1,4 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
@@ -6,6 +6,7 @@
 # directory of this source tree for more details.
 
 
+import os
 import unittest
 
 import torch
@@ -14,6 +15,8 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify
+from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig
 from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.mobilenet_v3 import MV3Model
 
@@ -32,3 +35,26 @@ def test_mv3_fp16(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=example_input, atol=0.07, rtol=0.07)
         )
+
+    def test_mv3_a8w8(self):
+        example_input, cali, testdata = get_quant_test_data_classify(
+            os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset")
+        )
+        checker_config = CheckerConfig(
+            "classifier",
+            {
+                "dataset": testdata,
+                "topktol": {1: 0.0, 2: 0.0},
+            },
+        )
+        model = MV3Model().get_eager_model()
+        tester = SamsungTester(
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+        (
+            tester.quantize(cali_dataset=cali, checker_config=checker_config)
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=example_input, atol=3, rtol=3)
+        )
diff --git a/backends/samsung/test/models/test_resnet18.py b/backends/samsung/test/models/test_resnet18.py
index b2d14d42303..59e68231a0d 100644
--- a/backends/samsung/test/models/test_resnet18.py
+++ b/backends/samsung/test/models/test_resnet18.py
@@ -1,4 +1,4 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
@@ -6,12 +6,15 @@
 # directory of this source tree for more details.
 
 
+import os
 import unittest
 
 from executorch.backends.samsung.serialization.compile_options import (
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify
+from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig
 from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.resnet import ResNet18Model
 
@@ -29,3 +32,25 @@ def test_resnet18_fp16(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=example_input, atol=0.02, rtol=0.02)
         )
+
+    def test_resnet18_a8w8(self):
+        example_input, cali, testdata = get_quant_test_data_classify(
+            os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset")
+        )
+        checker_config = CheckerConfig(
+            "classifier",
+            {
+                "dataset": testdata,
+            },
+        )
+        model = ResNet18Model().get_eager_model()
+        tester = SamsungTester(
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+        (
+            tester.quantize(cali_dataset=cali, checker_config=checker_config)
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1)
+        )
diff --git a/backends/samsung/test/models/test_resnet50.py b/backends/samsung/test/models/test_resnet50.py
index 00d33fe79ea..88925c742c1 100644
--- a/backends/samsung/test/models/test_resnet50.py
+++ b/backends/samsung/test/models/test_resnet50.py
@@ -1,4 +1,4 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
@@ -6,12 +6,15 @@
 # directory of this source tree for more details.
 
 
+import os
 import unittest
 
 from executorch.backends.samsung.serialization.compile_options import (
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify
+from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig
 from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.resnet import ResNet50Model
 
@@ -29,3 +32,25 @@ def test_resnet50_fp16(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=example_input, atol=0.02, rtol=0.02)
         )
+
+    def test_resnet50_a8w8(self):
+        example_input, cali, testdata = get_quant_test_data_classify(
+            os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset")
+        )
+        checker_config = CheckerConfig(
+            "classifier",
+            {
+                "dataset": testdata,
+            },
+        )
+        model = ResNet50Model().get_eager_model()
+        tester = SamsungTester(
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+        (
+            tester.quantize(cali_dataset=cali, checker_config=checker_config)
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1)
+        )
diff --git a/backends/samsung/test/models/test_torchvision_vit.py b/backends/samsung/test/models/test_torchvision_vit.py
index bab146d9979..4239755c526 100644
--- a/backends/samsung/test/models/test_torchvision_vit.py
+++ b/backends/samsung/test/models/test_torchvision_vit.py
@@ -1,9 +1,10 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
 # except in compliance with the License. See the license file in the root
 # directory of this source tree for more details.
+import os
 import unittest
 
 import torch
@@ -11,6 +12,8 @@
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify
+from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig
 from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.torchvision_vit import TorchVisionViTModel
 
@@ -32,3 +35,25 @@ def test_torchvision_vit_fp16(self):
                 inputs=example_input, atol=0.005, rtol=0.005
             )
         )
+
+    def test_torchvision_vit_a8w8(self):
+        example_input, cali, testdata = get_quant_test_data_classify(
+            os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset")
+        )
+        checker_config = CheckerConfig(
+            "classifier",
+            {
+                "dataset": testdata,
+            },
+        )
+        model = TorchVisionViTModel().get_eager_model()
+        tester = SamsungTester(
+            model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)]
+        )
+        (
+            tester.quantize(cali_dataset=cali, checker_config=checker_config)
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=example_input, atol=2, rtol=2)
+        )
diff --git a/backends/samsung/test/models/test_wav2letter.py b/backends/samsung/test/models/test_wav2letter.py
index 569e3decfec..6daccabd81e 100644
--- a/backends/samsung/test/models/test_wav2letter.py
+++ b/backends/samsung/test/models/test_wav2letter.py
@@ -1,15 +1,19 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
 # except in compliance with the License. See the license file in the root
 # directory of this source tree for more details.
+import os
 import unittest
 
+import torch
 from executorch.backends.samsung.serialization.compile_options import (
     gen_samsung_backend_compile_spec,
 )
 from executorch.backends.samsung.test.tester import SamsungTester
+from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_voice
+from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig
 from executorch.backends.samsung.test.utils.utils import TestConfig
 from executorch.examples.models.wav2letter import Wav2LetterModel
 
@@ -27,3 +31,31 @@ def test_w2l_fp16(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=example_input, atol=0.009)
         )
+
+    def test_w2l_quant(self):
+        factory = Wav2LetterModel()
+        factory.vocab_size = 29
+        assert (model_cache_dir := os.getenv("MODEL_CACHE")), "MODEL_CACHE not set!"
+        weight_path = os.path.join(model_cache_dir, "w2l/states_fused.pth")
+        state_dict = torch.load(weight_path, weights_only=False)
+        model = factory.get_eager_model()
+        model.load_state_dict(state_dict)
+        example_input, calib_data, quant_test_data = get_quant_test_data_voice(
+            os.path.join(os.environ["DATASET_PATH"], "w2l/wav2letter")
+        )
+        labels = [" ", *"abcdefghijklmnopqrstuvwxyz", "'", "*"]
+        checker_config = CheckerConfig(
+            "wave2letter", {"dataset": quant_test_data, "labels": labels}
+        )
+        (
+            SamsungTester(
+                model,
+                example_input,
+                [gen_samsung_backend_compile_spec(TestConfig.chipset)],
+            )
+            .quantize(cali_dataset=calib_data, checker_config=checker_config)
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .run_method_and_compare_outputs(atol=1.0, rtol=1.0)
+        )
diff --git a/backends/samsung/test/tester/samsung_tester.py b/backends/samsung/test/tester/samsung_tester.py
index a6eb170a61b..258aef191d0 100644
--- a/backends/samsung/test/tester/samsung_tester.py
+++ b/backends/samsung/test/tester/samsung_tester.py
@@ -12,6 +12,7 @@
 from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner
 from executorch.backends.samsung.quantizer.quantizer import EnnQuantizer, Precision
 from executorch.backends.samsung.test.utils import RuntimeExecutor
+from executorch.backends.samsung.test.utils.quant_checkers import get_checker
 from executorch.backends.samsung.utils.export_utils import get_edge_compile_config
 from executorch.backends.test.harness import Tester as TesterBase
 from executorch.backends.test.harness.stages import StageType
@@ -45,6 +46,7 @@ def __init__(
         calibrate: bool = True,
         calibration_samples: Optional[Sequence[Any]] = None,
         is_qat: Optional[bool] = False,
+        checker_config=None,
     ):
         super().__init__(
             quantizer=quantizer,
@@ -53,6 +55,7 @@ def __init__(
             calibration_samples=calibration_samples,
             is_qat=is_qat,
         )
+        self.checker_config = checker_config
 
     def run(
         self, artifact: torch.nn.Module, inputs: Optional[Tuple[torch.Tensor]]
@@ -82,6 +85,9 @@ def run(
         converted = convert_pt2e(prepared, fold_quantize=False)
 
         self.converted_graph = converted
+        if self.checker_config:
+            checker = get_checker(artifact, converted, self.checker_config)
+            checker.check()
 
 
 class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower):
@@ -141,11 +147,20 @@ def __init__(
         self.example_inputs = example_inputs
         self.compile_specs = compile_specs
 
-    def quantize(self, quantize_stage: Optional[Quantize] = None):
+    def quantize(
+        self,
+        quantize_stage: Optional[Quantize] = None,
+        cali_dataset=None,
+        checker_config=None,
+    ):
         if quantize_stage is None:
             quantizer = EnnQuantizer()
             quantizer.setup_quant_params(Precision.A8W8)
-            quantize_stage = Quantize(quantizer)
+            quantize_stage = Quantize(
+                quantizer,
+                calibration_samples=cali_dataset,
+                checker_config=checker_config,
+            )
 
         return super().quantize(quantize_stage)
 
diff --git a/backends/samsung/test/utils/utils.py b/backends/samsung/test/utils/utils.py
index 4385245daf9..936e2727c96 100644
--- a/backends/samsung/test/utils/utils.py
+++ b/backends/samsung/test/utils/utils.py
@@ -1,11 +1,42 @@
-# Copyright (c) Samsung Electronics Co. LTD
+# Copyright (c) 2025 Samsung Electronics Co. LTD
 # All rights reserved
 #
 # Licensed under the BSD License (the "License"); you may not use this file
 # except in compliance with the License. See the license file in the root
 # directory of this source tree for more details.
 
+import torch
+
 
 class TestConfig:
     host_ip: str = "111.111.111.111"
     chipset: str = "E9965"
+
+
+class GreedyLM:
+    def __init__(self, vocab, blank_label="*"):
+        self.vocab = vocab
+        self.char_to_id = {c: i for i, c in enumerate(vocab)}
+        self.blank_label = blank_label
+
+    def encode(self, text):
+        return [self.char_to_id[c] for c in text.lower()]
+
+    def decode_ids(self, ids):
+        if ids.ndim == 2:  # batch|steps
+            return [self.decode_ids(t) for t in ids]
+
+        decoded_text = "".join([self.vocab[id] for id in ids])
+
+        return decoded_text
+
+    def decode_ctc(self, emissions):
+        if emissions.ndim == 3:  # batch|labels|steps
+            return [self.decode_ctc(t) for t in emissions]
+
+        amax_ids = emissions.argmax(0)
+        amax_ids_collapsed = torch.unique_consecutive(amax_ids)
+        decoded_text = "".join([self.vocab[id] for id in amax_ids_collapsed])
+        decoded_text = decoded_text.replace(self.blank_label, "")
+
+        return decoded_text

From a96dd33ee39b704ef8e45a013457f31ec1882fd1 Mon Sep 17 00:00:00 2001
From: "jiseong.oh" <jiseong.oh@samsung.com>
Date: Mon, 6 Apr 2026 12:01:24 +0000
Subject: [PATCH 6/6] update Litecore version to 1.1.0

Signed-off-by: jiseong.oh <jiseong.oh@samsung.com>
---
 .ci/scripts/setup-samsung-linux-deps.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/scripts/setup-samsung-linux-deps.sh b/.ci/scripts/setup-samsung-linux-deps.sh
index 9aa9c4380a5..e502baa87e5 100644
--- a/.ci/scripts/setup-samsung-linux-deps.sh
+++ b/.ci/scripts/setup-samsung-linux-deps.sh
@@ -29,7 +29,7 @@ while [[ $# -gt 0 ]]; do
   esac
 done
 
-LITECORE_VERSION="v1.0"
+LITECORE_VERSION="v1.1"
 LITECORE_FILE_NAME="ai-litecore-ubuntu2204-${LITECORE_VERSION}.tar.gz"
 DEVICEFARM_CLI_VERSION="beta-v1.1.0"
 DEVICEFARM_FILE_NAME="devicefarmcli-${DEVICEFARM_CLI_VERSION}.zip"