From 848c58f319163904f0d98d35301dbb22bf03623d Mon Sep 17 00:00:00 2001 From: "jiseong.oh" Date: Wed, 1 Apr 2026 11:46:42 +0000 Subject: [PATCH 1/6] Add more CI testcases for Exynos Backend - Support necessary stages for E2E test - Enable test for float/quant ops and float models Co-authored-by: chong.checn Co-authored-by: jhbb.cha Co-authored-by: xz.linghu Signed-off-by: jiseong.oh --- .github/workflows/pull.yml | 1 + .../samsung/test/models/test_deeplab_v3.py | 3 +- backends/samsung/test/models/test_edsr.py | 3 +- .../samsung/test/models/test_inception_v3.py | 3 +- .../samsung/test/models/test_inception_v4.py | 42 +++++++++++++- .../samsung/test/models/test_mobilenet_v2.py | 3 +- .../samsung/test/models/test_mobilenet_v3.py | 3 +- backends/samsung/test/models/test_resnet18.py | 3 +- backends/samsung/test/models/test_resnet50.py | 3 +- .../test/models/test_torchvision_vit.py | 3 +- .../samsung/test/models/test_wav2letter.py | 3 +- backends/samsung/test/ops/test_add.py | 28 ++++++++- backends/samsung/test/ops/test_avg_pool2d.py | 32 +++++++++- backends/samsung/test/ops/test_batch_norm.py | 3 +- backends/samsung/test/ops/test_bmm.py | 3 +- backends/samsung/test/ops/test_cat.py | 3 +- backends/samsung/test/ops/test_clamp.py | 22 ++++++- .../samsung/test/ops/test_constant_pad_nd.py | 3 +- backends/samsung/test/ops/test_conv2d.py | 39 ++++++++++++- backends/samsung/test/ops/test_div.py | 28 ++++++++- backends/samsung/test/ops/test_embedding.py | 3 +- backends/samsung/test/ops/test_expand_copy.py | 3 +- backends/samsung/test/ops/test_gelu.py | 3 +- backends/samsung/test/ops/test_leaky_relu.py | 3 +- backends/samsung/test/ops/test_linear.py | 3 +- backends/samsung/test/ops/test_log_softmax.py | 3 +- backends/samsung/test/ops/test_max_pool2d.py | 44 +++++++++++++- backends/samsung/test/ops/test_mean_dim.py | 28 ++++++++- backends/samsung/test/ops/test_minimum.py | 3 +- backends/samsung/test/ops/test_mul.py | 32 +++++++++- backends/samsung/test/ops/test_permute.py | 3 +- .../samsung/test/ops/test_pixel_shuffle.py | 3 +- backends/samsung/test/ops/test_relu.py | 28 ++++++++- backends/samsung/test/ops/test_reshape.py | 3 +- backends/samsung/test/ops/test_rsqrt.py | 3 +- backends/samsung/test/ops/test_select.py | 3 +- backends/samsung/test/ops/test_slice_copy.py | 3 +- backends/samsung/test/ops/test_softmax.py | 3 +- backends/samsung/test/ops/test_sqrt.py | 3 +- backends/samsung/test/ops/test_squeeze.py | 3 +- backends/samsung/test/ops/test_sub.py | 3 +- backends/samsung/test/ops/test_to_copy.py | 3 +- backends/samsung/test/ops/test_unsqueeze.py | 3 +- .../test/ops/test_upsample_bilinear2d.py | 26 ++++++++- .../test/ops/test_upsample_nearest2d.py | 3 +- .../samsung/test/tester/samsung_tester.py | 40 ++++++++++++- backends/samsung/test/utils/run_tests.py | 58 +++++++++++++++++++ backends/samsung/test/utils/utils.py | 11 ++++ 48 files changed, 512 insertions(+), 46 deletions(-) create mode 100644 backends/samsung/test/utils/run_tests.py create mode 100644 backends/samsung/test/utils/utils.py diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 8a5b2f4805a..ca59156108b 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -1247,6 +1247,7 @@ jobs: fi # Test models + #python -m executorch.backends.samsung.test.utils.run_tests --chipset E9955 python -m unittest discover -s backends/samsung/test/models -p "test_*.py" test-vulkan-models-linux: diff --git a/backends/samsung/test/models/test_deeplab_v3.py b/backends/samsung/test/models/test_deeplab_v3.py index a2b3fcb93a0..cd6a6527980 100644 --- a/backends/samsung/test/models/test_deeplab_v3.py +++ b/backends/samsung/test/models/test_deeplab_v3.py @@ -10,6 +10,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.deeplab_v3 import DeepLabV3ResNet50Model @@ -18,7 +19,7 @@ def test_dl3_fp16(self): model = DeepLabV3ResNet50Model().get_eager_model() example_input = DeepLabV3ResNet50Model().get_example_inputs() tester = SamsungTester( - model, example_input, [gen_samsung_backend_compile_spec("E9955")] + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/models/test_edsr.py b/backends/samsung/test/models/test_edsr.py index 326296fc55a..e69d5cc459c 100644 --- a/backends/samsung/test/models/test_edsr.py +++ b/backends/samsung/test/models/test_edsr.py @@ -12,6 +12,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.edsr import EdsrModel @@ -20,7 +21,7 @@ def test_edsr_fp16(self): model = EdsrModel().get_eager_model() example_input = EdsrModel().get_example_inputs() tester = SamsungTester( - model, example_input, [gen_samsung_backend_compile_spec("E9955")] + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/models/test_inception_v3.py b/backends/samsung/test/models/test_inception_v3.py index ef3a94c2a62..faeea4ab4a1 100644 --- a/backends/samsung/test/models/test_inception_v3.py +++ b/backends/samsung/test/models/test_inception_v3.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.inception_v3 import InceptionV3Model @@ -23,7 +24,7 @@ def test_inception_v3_fp16(self): model = InceptionV3Model().get_eager_model() example_input = InceptionV3Model().get_example_inputs() tester = SamsungTester( - model, example_input, [gen_samsung_backend_compile_spec("E9955")] + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/models/test_inception_v4.py b/backends/samsung/test/models/test_inception_v4.py index 53bd209d5d2..2998fd894db 100644 --- a/backends/samsung/test/models/test_inception_v4.py +++ b/backends/samsung/test/models/test_inception_v4.py @@ -5,22 +5,60 @@ # except in compliance with the License. See the license file in the root # directory of this source tree for more details. - +import logging +import os import unittest from executorch.backends.samsung.serialization.compile_options import ( gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.inception_v4 import InceptionV4Model +def patch_iv4(weight_path: str): + assert os.path.isfile(weight_path), "Can not found weight path for iv4" + from safetensors import safe_open + from timm.models import inception_v4 + + def _monkeypatch_get_eager_model(self): + tensors = {} + with safe_open(weight_path, framework="pt") as st: + for k in st.keys(): + tensors[k] = st.get_tensor(k) + logging.info("Loading inception_v4 model") + m = inception_v4(pretrained=True, pretrained_cfg={"state_dict": tensors}) + logging.info("Loaded inception_v4 model") + return m + + old_func = InceptionV4Model.get_eager_model + InceptionV4Model.get_eager_model = _monkeypatch_get_eager_model + return old_func + + +def recover_iv4(old_func): + InceptionV4Model.get_eager_model = old_func + + class TestMilestoneInceptionV4(unittest.TestCase): + @classmethod + def setUpClass(cls): + assert (model_cache_dir := os.getenv("MODEL_CACHE")), "MODEL_CACHE not set!" + weight_path = os.path.join( + model_cache_dir, os.path.join(model_cache_dir, "iv4/model.safetensors") + ) + cls._old_func = patch_iv4(weight_path) + + @classmethod + def tearDownClass(cls): + recover_iv4(cls._old_func) + def test_inception_v4_fp16(self): model = InceptionV4Model().get_eager_model() example_input = InceptionV4Model().get_example_inputs() tester = SamsungTester( - model, example_input, [gen_samsung_backend_compile_spec("E9955")] + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/models/test_mobilenet_v2.py b/backends/samsung/test/models/test_mobilenet_v2.py index 86805e5cbc2..51512be57ee 100644 --- a/backends/samsung/test/models/test_mobilenet_v2.py +++ b/backends/samsung/test/models/test_mobilenet_v2.py @@ -10,6 +10,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.mobilenet_v2 import MV2Model @@ -18,7 +19,7 @@ def test_mv2_fp16(self): model = MV2Model().get_eager_model() example_input = MV2Model().get_example_inputs() tester = SamsungTester( - model, example_input, [gen_samsung_backend_compile_spec("E9955")] + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/models/test_mobilenet_v3.py b/backends/samsung/test/models/test_mobilenet_v3.py index 669cca1db12..fbfc4716b73 100644 --- a/backends/samsung/test/models/test_mobilenet_v3.py +++ b/backends/samsung/test/models/test_mobilenet_v3.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.mobilenet_v3 import MV3Model @@ -23,7 +24,7 @@ def test_mv3_fp16(self): model = MV3Model().get_eager_model() example_input = MV3Model().get_example_inputs() tester = SamsungTester( - model, example_input, [gen_samsung_backend_compile_spec("E9955")] + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/models/test_resnet18.py b/backends/samsung/test/models/test_resnet18.py index 429218649b8..b2d14d42303 100644 --- a/backends/samsung/test/models/test_resnet18.py +++ b/backends/samsung/test/models/test_resnet18.py @@ -12,6 +12,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.resnet import ResNet18Model @@ -20,7 +21,7 @@ def test_resnet18_fp16(self): model = ResNet18Model().get_eager_model() example_input = ResNet18Model().get_example_inputs() tester = SamsungTester( - model, example_input, [gen_samsung_backend_compile_spec("E9955")] + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/models/test_resnet50.py b/backends/samsung/test/models/test_resnet50.py index 0c6b32526b1..00d33fe79ea 100644 --- a/backends/samsung/test/models/test_resnet50.py +++ b/backends/samsung/test/models/test_resnet50.py @@ -12,6 +12,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.resnet import ResNet50Model @@ -20,7 +21,7 @@ def test_resnet50_fp16(self): model = ResNet50Model().get_eager_model() example_input = ResNet50Model().get_example_inputs() tester = SamsungTester( - model, example_input, [gen_samsung_backend_compile_spec("E9955")] + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/models/test_torchvision_vit.py b/backends/samsung/test/models/test_torchvision_vit.py index 7cdb4cabada..a32dd6ac9ac 100644 --- a/backends/samsung/test/models/test_torchvision_vit.py +++ b/backends/samsung/test/models/test_torchvision_vit.py @@ -11,6 +11,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.torchvision_vit import TorchVisionViTModel @@ -20,7 +21,7 @@ def test_torchvision_vit_fp16(self): model = TorchVisionViTModel().get_eager_model() example_input = TorchVisionViTModel().get_example_inputs() tester = SamsungTester( - model, example_input, [gen_samsung_backend_compile_spec("E9955")] + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/models/test_wav2letter.py b/backends/samsung/test/models/test_wav2letter.py index 4d016763b2b..569e3decfec 100644 --- a/backends/samsung/test/models/test_wav2letter.py +++ b/backends/samsung/test/models/test_wav2letter.py @@ -10,6 +10,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.wav2letter import Wav2LetterModel @@ -18,7 +19,7 @@ def test_w2l_fp16(self): model = Wav2LetterModel().get_eager_model() example_input = Wav2LetterModel().get_example_inputs() tester = SamsungTester( - model, example_input, [gen_samsung_backend_compile_spec("E9955")] + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/ops/test_add.py b/backends/samsung/test/ops/test_add.py index 58e49f7bb10..f0e51888ca5 100644 --- a/backends/samsung/test/ops/test_add.py +++ b/backends/samsung/test/ops/test_add.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Add(torch.nn.Module): @@ -38,7 +39,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() @@ -50,6 +51,23 @@ def _test(self, module: torch.nn.Module, inputs): .run_method_and_compare_outputs(inputs=inputs) ) + def _test_a8w8(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec(TestConfig.chipset)], + ) + ( + tester.quantize() + .export() + .check_count({"torch.ops.aten.add.Tensor": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_add_Tensor"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=inputs, atol=0.2) + ) + def test_fp32_simple_add(self): inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8)) self._test(Add(), inputs) @@ -61,3 +79,11 @@ def test_fp32_const_add(self): def test_fp32_add_broadcast(self): inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8)) self._test(Add(), inputs) + + def test_a8w8_simple_add(self): + inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8)) + self._test_a8w8(Add(), inputs) + + def test_a8w8_const_add(self): + inputs = (torch.randn(1, 3, 8, 8),) + self._test_a8w8(AddConstant(torch.randn(1, 3, 8, 8)), inputs) diff --git a/backends/samsung/test/ops/test_avg_pool2d.py b/backends/samsung/test/ops/test_avg_pool2d.py index e00f49a47fd..8aecb445be6 100644 --- a/backends/samsung/test/ops/test_avg_pool2d.py +++ b/backends/samsung/test/ops/test_avg_pool2d.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class AvgPool2d(torch.nn.Module): @@ -41,7 +42,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() @@ -53,6 +54,23 @@ def _test(self, module: torch.nn.Module, inputs): .run_method_and_compare_outputs(inputs=inputs) ) + def _test_a8w8(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec(TestConfig.chipset)], + ) + ( + tester.quantize() + .export() + .check_count({"torch.ops.aten.avg_pool2d.default": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_avg_pool2d_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=inputs, atol=0.2) + ) + def test_fp32_avg_pool2d(self): inputs = (torch.randn(1, 16, 24, 24),) self._test(AvgPool2d(), inputs) @@ -64,3 +82,15 @@ def test_fp32_avg_pool2d_with_stride(self): def test_fp32_avg_pool2d_with_kernel_size(self): inputs = (torch.randn(1, 16, 24, 24),) self._test(AvgPool2d(kernel_size=4), inputs) + + def test_a8w8_avg_pool2d(self): + inputs = (torch.randn(1, 16, 24, 24),) + self._test_a8w8(AvgPool2d(), inputs) + + def test_a8w8_avg_pool2d_with_stride(self): + inputs = (torch.randn(1, 16, 24, 24),) + self._test_a8w8(AvgPool2d(stride=1), inputs) + + def test_a8w8_avg_pool2d_with_kernel_size(self): + inputs = (torch.randn(1, 16, 24, 24),) + self._test_a8w8(AvgPool2d(kernel_size=4), inputs) diff --git a/backends/samsung/test/ops/test_batch_norm.py b/backends/samsung/test/ops/test_batch_norm.py index 3c73f6d993a..db1b9761e51 100644 --- a/backends/samsung/test/ops/test_batch_norm.py +++ b/backends/samsung/test/ops/test_batch_norm.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class BatchNorm(torch.nn.Module): @@ -31,7 +32,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class TestBatchNorm(unittest.TestCase): def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( - module, inputs, [gen_samsung_backend_compile_spec("E9955")] + module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/ops/test_bmm.py b/backends/samsung/test/ops/test_bmm.py index f927b051603..b4c32a5ace0 100644 --- a/backends/samsung/test/ops/test_bmm.py +++ b/backends/samsung/test/ops/test_bmm.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class BatchMatmul(torch.nn.Module): @@ -34,7 +35,7 @@ def _test(self, module: torch.nn.Module): torch.manual_seed(8) inputs = module.get_example_inputs() tester = SamsungTester( - module, inputs, [gen_samsung_backend_compile_spec("E9955")] + module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/ops/test_cat.py b/backends/samsung/test/ops/test_cat.py index a2d42370da5..9690a5dacc3 100644 --- a/backends/samsung/test/ops/test_cat.py +++ b/backends/samsung/test/ops/test_cat.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Concat(torch.nn.Module): @@ -28,7 +29,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: class TestConcat(unittest.TestCase): def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( - module, inputs, [gen_samsung_backend_compile_spec("E9955")] + module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/ops/test_clamp.py b/backends/samsung/test/ops/test_clamp.py index 3c1ac40539b..8ce7f46b3ce 100644 --- a/backends/samsung/test/ops/test_clamp.py +++ b/backends/samsung/test/ops/test_clamp.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Clamp(torch.nn.Module): @@ -33,7 +34,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class TestClamp(unittest.TestCase): def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( - module, inputs, [gen_samsung_backend_compile_spec("E9955")] + module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() @@ -45,6 +46,25 @@ def _test(self, module: torch.nn.Module, inputs): .run_method_and_compare_outputs(inputs=inputs) ) + def _test_a8w8(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + ( + tester.quantize() + .export() + .check_count({"torch.ops.aten.clamp.default": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=inputs, atol=0.2) + ) + def test_fp32_clamp(self): inputs = (torch.randn(1, 16, 8, 8),) self._test(Clamp(minimum=0, maximum=2.0), inputs) + + def test_a8w8_clamp(self): + inputs = (torch.randn(1, 16, 8, 8),) + self._test_a8w8(Clamp(minimum=0, maximum=2.0), inputs) diff --git a/backends/samsung/test/ops/test_constant_pad_nd.py b/backends/samsung/test/ops/test_constant_pad_nd.py index 5c6c6e4376c..192e3ac1a7b 100644 --- a/backends/samsung/test/ops/test_constant_pad_nd.py +++ b/backends/samsung/test/ops/test_constant_pad_nd.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class ConstantPadND(torch.nn.Module): @@ -28,7 +29,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class TestConstantPadND(unittest.TestCase): def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( - module, inputs, [gen_samsung_backend_compile_spec("E9955")] + module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/ops/test_conv2d.py b/backends/samsung/test/ops/test_conv2d.py index 39c2b2508e6..f23eed52aa9 100644 --- a/backends/samsung/test/ops/test_conv2d.py +++ b/backends/samsung/test/ops/test_conv2d.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Conv2d(torch.nn.Module): @@ -67,7 +68,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() @@ -78,6 +79,22 @@ def _test(self, module: torch.nn.Module, inputs): .run_method_and_compare_outputs(inputs=inputs) ) + def _test_a8w8(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec(TestConfig.chipset)], + ) + ( + tester.quantize() + .export() + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=inputs, atol=0.2) + ) + def test_fp32_conv2d_without_bias(self): inputs = (torch.randn(1, 3, 24, 24),) self._test(Conv2d(bias=False), inputs) @@ -93,3 +110,23 @@ def test_fp32_depthwise_conv2d(self): def test_fp32_transpose_conv2d(self): inputs = (torch.randn(1, 32, 24, 24),) self._test(TransposeConv2d(), inputs) + + def test_fp32_conv2d_with_dilation(self): + inputs = (torch.randn(1, 3, 24, 24),) + self._test(Conv2d(dilation=(2, 2)), inputs) + + def test_a8w8_conv2d_without_bias(self): + inputs = (torch.randn(1, 3, 24, 24),) + self._test_a8w8(Conv2d(bias=False), inputs) + + def test_a8w8_conv2d_with_bias(self): + inputs = (torch.randn(1, 3, 24, 24),) + self._test_a8w8(Conv2d(bias=True), inputs) + + def test_a8w8_depthwise_conv2d(self): + inputs = (torch.randn(1, 8, 24, 24),) + self._test_a8w8(Conv2d(in_channels=8, out_channels=8, groups=8), inputs) + + def test_a8w8_conv2d_with_dilation(self): + inputs = (torch.randn(1, 3, 24, 24),) + self._test_a8w8(Conv2d(dilation=(2, 2)), inputs) diff --git a/backends/samsung/test/ops/test_div.py b/backends/samsung/test/ops/test_div.py index 5a27531a96e..b491b1167a1 100644 --- a/backends/samsung/test/ops/test_div.py +++ b/backends/samsung/test/ops/test_div.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Div(torch.nn.Module): @@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() @@ -41,6 +42,23 @@ def _test(self, module: torch.nn.Module, inputs): .run_method_and_compare_outputs(inputs=inputs) ) + def _test_a8w8(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec(TestConfig.chipset)], + ) + ( + tester.quantize() + .export() + .check_count({"torch.ops.aten.div.Tensor": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_div_Tensor"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=inputs, atol=0.2) + ) + def test_fp32_simple_div(self): inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8).abs() + 1e-3) self._test(Div(), inputs) @@ -48,3 +66,11 @@ def test_fp32_simple_div(self): def test_fp32_div_broadcast(self): inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8).abs() + 1e-3) self._test(Div(), inputs) + + def test_a8w8_simple_div(self): + inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8).abs() + 1e-3) + self._test_a8w8(Div(), inputs) + + def test_a8w8_div_broadcast(self): + inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8).abs() + 1e-3) + self._test_a8w8(Div(), inputs) diff --git a/backends/samsung/test/ops/test_embedding.py b/backends/samsung/test/ops/test_embedding.py index ca3899d4c24..17e5fe6bf98 100644 --- a/backends/samsung/test/ops/test_embedding.py +++ b/backends/samsung/test/ops/test_embedding.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Embedding(torch.nn.Module): @@ -28,7 +29,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class TestEmbedding(unittest.TestCase): def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( - module, inputs, [gen_samsung_backend_compile_spec("E9955")] + module, inputs, [gen_samsung_backend_compile_spec(TestConfig.chipset)] ) ( tester.export() diff --git a/backends/samsung/test/ops/test_expand_copy.py b/backends/samsung/test/ops/test_expand_copy.py index de0f36e03d0..ba4067e14d6 100644 --- a/backends/samsung/test/ops/test_expand_copy.py +++ b/backends/samsung/test/ops/test_expand_copy.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class ExpandCopy(torch.nn.Module): @@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_gelu.py b/backends/samsung/test/ops/test_gelu.py index 20f93559fda..5b95c9d00c8 100644 --- a/backends/samsung/test/ops/test_gelu.py +++ b/backends/samsung/test/ops/test_gelu.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class GELU(torch.nn.Module): @@ -44,7 +45,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_leaky_relu.py b/backends/samsung/test/ops/test_leaky_relu.py index 4ad510528f9..edcd2aed62e 100644 --- a/backends/samsung/test/ops/test_leaky_relu.py +++ b/backends/samsung/test/ops/test_leaky_relu.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class LeakyReLU(torch.nn.Module): @@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_linear.py b/backends/samsung/test/ops/test_linear.py index ce1f13d1a1f..e4e24075beb 100644 --- a/backends/samsung/test/ops/test_linear.py +++ b/backends/samsung/test/ops/test_linear.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Linear(torch.nn.Module): @@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_log_softmax.py b/backends/samsung/test/ops/test_log_softmax.py index 2aeb600e977..2f0d1ba1ed9 100644 --- a/backends/samsung/test/ops/test_log_softmax.py +++ b/backends/samsung/test/ops/test_log_softmax.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class LogSoftmax(torch.nn.Module): @@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_max_pool2d.py b/backends/samsung/test/ops/test_max_pool2d.py index d944c38a678..8057ffb5fe3 100644 --- a/backends/samsung/test/ops/test_max_pool2d.py +++ b/backends/samsung/test/ops/test_max_pool2d.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class MaxPool2d(torch.nn.Module): @@ -43,7 +44,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() @@ -55,6 +56,23 @@ def _test(self, module: torch.nn.Module, inputs): .run_method_and_compare_outputs(inputs=inputs) ) + def _test_a8w8(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec(TestConfig.chipset)], + ) + ( + tester.quantize() + .export() + .check_count({"torch.ops.aten.max_pool2d.default": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_max_pool2d_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=inputs, atol=0.2) + ) + def test_fp32_max_pool2d(self): inputs = (torch.randn(1, 16, 24, 24),) self._test(MaxPool2d(), inputs) @@ -70,3 +88,27 @@ def test_fp32_max_pool2d_with_kernel_size(self): def test_fp32_max_pool2d_with_dilation(self): inputs = (torch.randn(1, 16, 24, 24),) self._test(MaxPool2d(dilation=2), inputs) + + def test_fp32_max_pool2d_with_stride(self): + inputs = (torch.randn(1, 16, 24, 24),) + self._test(MaxPool2d(stride=1), inputs) + + def test_a8w8_max_pool2d(self): + inputs = (torch.randn(1, 16, 24, 24),) + self._test_a8w8(MaxPool2d(), inputs) + + def test_a8w8_max_pool2d_with_padding(self): + inputs = (torch.randn(1, 16, 24, 24),) + self._test_a8w8(MaxPool2d(padding=1), inputs) + + def test_a8w8_max_pool2d_with_kernel_size(self): + inputs = (torch.randn(1, 16, 24, 24),) + self._test_a8w8(MaxPool2d(kernel_size=4), inputs) + + def test_a8w8_max_pool2d_with_dilation(self): + inputs = (torch.randn(1, 16, 24, 24),) + self._test_a8w8(MaxPool2d(dilation=2), inputs) + + def test_a8w8_max_pool2d_with_stride(self): + inputs = (torch.randn(1, 16, 24, 24),) + self._test_a8w8(MaxPool2d(stride=1), inputs) diff --git a/backends/samsung/test/ops/test_mean_dim.py b/backends/samsung/test/ops/test_mean_dim.py index 5c6378000bd..6f4166f67a4 100644 --- a/backends/samsung/test/ops/test_mean_dim.py +++ b/backends/samsung/test/ops/test_mean_dim.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class MeanDim(torch.nn.Module): @@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() @@ -42,6 +43,23 @@ def _test(self, module: torch.nn.Module, inputs): .run_method_and_compare_outputs(inputs=inputs) ) + def _test_a8w8(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec(TestConfig.chipset)], + ) + ( + tester.quantize() + .export() + .check_count({"torch.ops.aten.mean.dim": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_mean_dim"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=inputs, atol=0.2) + ) + def test_fp32_mean_with_keep_dims(self): inputs = (torch.randn(1, 3, 8, 8),) self._test(MeanDim(), inputs) @@ -49,3 +67,11 @@ def test_fp32_mean_with_keep_dims(self): def test_fp32_mean_without_keep_dims(self): inputs = (torch.randn(1, 3, 8, 8),) self._test(MeanDim(keep_dims=False), inputs) + + def test_a8w8_mean_with_keep_dims(self): + inputs = (torch.randn(1, 3, 8, 8),) + self._test_a8w8(MeanDim(), inputs) + + def test_a8w8_mean_without_keep_dims(self): + inputs = (torch.randn(1, 3, 8, 8),) + self._test_a8w8(MeanDim(keep_dims=False), inputs) diff --git a/backends/samsung/test/ops/test_minimum.py b/backends/samsung/test/ops/test_minimum.py index e82b2e0c428..43c8a32727c 100644 --- a/backends/samsung/test/ops/test_minimum.py +++ b/backends/samsung/test/ops/test_minimum.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Minimum(torch.nn.Module): @@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_mul.py b/backends/samsung/test/ops/test_mul.py index 0f77a5e8f55..3db6003b482 100644 --- a/backends/samsung/test/ops/test_mul.py +++ b/backends/samsung/test/ops/test_mul.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Mul(torch.nn.Module): @@ -38,7 +39,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() @@ -50,6 +51,23 @@ def _test(self, module: torch.nn.Module, inputs): .run_method_and_compare_outputs(inputs=inputs) ) + def _test_a8w8(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec(TestConfig.chipset)], + ) + ( + tester.quantize() + .export() + .check_count({"torch.ops.aten.mul.Tensor": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_mul_Tensor"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=inputs, atol=0.2) + ) + def test_fp32_simple_mul(self): inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8)) self._test(Mul(), inputs) @@ -61,3 +79,15 @@ def test_fp32_const_mul(self): def test_fp32_mul_broadcast(self): inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8)) self._test(Mul(), inputs) + + def test_a8w8_simple_mul(self): + inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8)) + self._test_a8w8(Mul(), inputs) + + def test_a8w8_const_mul(self): + inputs = (torch.randn(1, 3, 8, 8),) + self._test_a8w8(MulConstant(torch.randn(1, 3, 8, 8)), inputs) + + def test_a8w8_mul_broadcast(self): + inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8)) + self._test_a8w8(Mul(), inputs) diff --git a/backends/samsung/test/ops/test_permute.py b/backends/samsung/test/ops/test_permute.py index e0052c3ec37..7a289b9ef06 100644 --- a/backends/samsung/test/ops/test_permute.py +++ b/backends/samsung/test/ops/test_permute.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Permute(torch.nn.Module): @@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_pixel_shuffle.py b/backends/samsung/test/ops/test_pixel_shuffle.py index f7d86e5b1a9..18672a491d2 100644 --- a/backends/samsung/test/ops/test_pixel_shuffle.py +++ b/backends/samsung/test/ops/test_pixel_shuffle.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class PixelShuffle(torch.nn.Module): @@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_relu.py b/backends/samsung/test/ops/test_relu.py index 20da52cb10f..404064b9a51 100644 --- a/backends/samsung/test/ops/test_relu.py +++ b/backends/samsung/test/ops/test_relu.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class ReLU(torch.nn.Module): @@ -44,7 +45,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() @@ -56,6 +57,23 @@ def _test(self, module: torch.nn.Module, inputs): .run_method_and_compare_outputs(inputs=inputs) ) + def _test_a8w8(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec(TestConfig.chipset)], + ) + ( + tester.quantize() + .export() + .check_count({"torch.ops.aten.relu.default": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_relu_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=inputs, atol=0.2) + ) + def test_fp32_single_relu(self): inputs = (torch.randn(1, 3, 56, 56),) self._test(ReLU(with_conv=False), inputs) @@ -63,3 +81,11 @@ def test_fp32_single_relu(self): def test_fp32_conv_relu(self): inputs = (torch.randn(1, 3, 56, 56),) self._test(ReLU(with_conv=True), inputs) + + def test_a8w8_single_relu(self): + inputs = (torch.randn(1, 3, 56, 56),) + self._test_a8w8(ReLU(with_conv=False), inputs) + + def test_a8w8_conv_relu(self): + inputs = (torch.randn(1, 3, 56, 56),) + self._test_a8w8(ReLU(with_conv=True), inputs) diff --git a/backends/samsung/test/ops/test_reshape.py b/backends/samsung/test/ops/test_reshape.py index 148186fb997..d3477f354fc 100644 --- a/backends/samsung/test/ops/test_reshape.py +++ b/backends/samsung/test/ops/test_reshape.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Reshape(torch.nn.Module): @@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_rsqrt.py b/backends/samsung/test/ops/test_rsqrt.py index 9cab9456d64..216e1ed6e0e 100644 --- a/backends/samsung/test/ops/test_rsqrt.py +++ b/backends/samsung/test/ops/test_rsqrt.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Rsqrt(torch.nn.Module): @@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_select.py b/backends/samsung/test/ops/test_select.py index 3d619f37a0f..9909f7653b7 100644 --- a/backends/samsung/test/ops/test_select.py +++ b/backends/samsung/test/ops/test_select.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class SelectCopy(torch.nn.Module): @@ -31,7 +32,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_slice_copy.py b/backends/samsung/test/ops/test_slice_copy.py index 4b3a100f927..17868fac327 100644 --- a/backends/samsung/test/ops/test_slice_copy.py +++ b/backends/samsung/test/ops/test_slice_copy.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class SliceCopy(torch.nn.Module): @@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_softmax.py b/backends/samsung/test/ops/test_softmax.py index 8721df588d1..9c0887f6b66 100644 --- a/backends/samsung/test/ops/test_softmax.py +++ b/backends/samsung/test/ops/test_softmax.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Softmax(torch.nn.Module): @@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_sqrt.py b/backends/samsung/test/ops/test_sqrt.py index 1ed31277dc3..1d3a584c808 100644 --- a/backends/samsung/test/ops/test_sqrt.py +++ b/backends/samsung/test/ops/test_sqrt.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Sqrt(torch.nn.Module): @@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_squeeze.py b/backends/samsung/test/ops/test_squeeze.py index 329053adc8c..a69dc92a067 100644 --- a/backends/samsung/test/ops/test_squeeze.py +++ b/backends/samsung/test/ops/test_squeeze.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Squeeze(torch.nn.Module): @@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_sub.py b/backends/samsung/test/ops/test_sub.py index aea428b34b8..89e90fadc86 100644 --- a/backends/samsung/test/ops/test_sub.py +++ b/backends/samsung/test/ops/test_sub.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class Sub(torch.nn.Module): @@ -38,7 +39,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_to_copy.py b/backends/samsung/test/ops/test_to_copy.py index 002e85801fe..d867ed02655 100644 --- a/backends/samsung/test/ops/test_to_copy.py +++ b/backends/samsung/test/ops/test_to_copy.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class ToCopy(torch.nn.Module): @@ -29,7 +30,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_unsqueeze.py b/backends/samsung/test/ops/test_unsqueeze.py index e10745fa839..125b085d155 100644 --- a/backends/samsung/test/ops/test_unsqueeze.py +++ b/backends/samsung/test/ops/test_unsqueeze.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class UnSqueeze(torch.nn.Module): @@ -30,7 +31,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/ops/test_upsample_bilinear2d.py b/backends/samsung/test/ops/test_upsample_bilinear2d.py index 7bdf3ab4041..1131ac9774e 100644 --- a/backends/samsung/test/ops/test_upsample_bilinear2d.py +++ b/backends/samsung/test/ops/test_upsample_bilinear2d.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class UpsampleBilinear2d(torch.nn.Module): @@ -35,7 +36,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() @@ -49,6 +50,29 @@ def _test(self, module: torch.nn.Module, inputs): .run_method_and_compare_outputs(inputs=inputs) ) + def _test_a8w8(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec(TestConfig.chipset)], + ) + ( + tester.quantize() + .export() + .check_count({"torch.ops.aten.upsample_bilinear2d.vec": 1}) + .to_edge_transform_and_lower() + .check_not( + ["executorch_exir_dialects_edge__ops_aten_upsample_bilinear2d_vec"] + ) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=inputs, atol=0.7, rtol=0.1) + ) + def test_fp32_upsample_bilinear2d(self): inputs = (torch.randn(1, 16, 16, 16),) self._test(UpsampleBilinear2d(), inputs) + + def test_a8w8_upsample_bilinear2d(self): + inputs = (torch.randn(1, 16, 16, 16),) + self._test_a8w8(UpsampleBilinear2d(), inputs) diff --git a/backends/samsung/test/ops/test_upsample_nearest2d.py b/backends/samsung/test/ops/test_upsample_nearest2d.py index bbdff40a0e9..d59c907e84b 100644 --- a/backends/samsung/test/ops/test_upsample_nearest2d.py +++ b/backends/samsung/test/ops/test_upsample_nearest2d.py @@ -14,6 +14,7 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig class UpsampleNearest2d(torch.nn.Module): @@ -34,7 +35,7 @@ def _test(self, module: torch.nn.Module, inputs): tester = SamsungTester( module, inputs, - [gen_samsung_backend_compile_spec("E9955")], + [gen_samsung_backend_compile_spec(TestConfig.chipset)], ) ( tester.export() diff --git a/backends/samsung/test/tester/samsung_tester.py b/backends/samsung/test/tester/samsung_tester.py index f33d508dfca..7f1f65f0d6c 100644 --- a/backends/samsung/test/tester/samsung_tester.py +++ b/backends/samsung/test/tester/samsung_tester.py @@ -18,11 +18,20 @@ ) from executorch.backends.test.harness import Tester as TesterBase from executorch.backends.test.harness.stages import StageType +from executorch.backends.transforms.decompose_sdpa import ( + DecomposeScaledDotProductAttention, +) from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower from executorch.exir.backend.backend_details import CompileSpec from executorch.exir.pass_manager import PassType -from torch.export import ExportedProgram +from torch.export import export, ExportedProgram + +from torchao.quantization.pt2e.quantize_pt2e import ( + convert_pt2e, + prepare_pt2e, + prepare_qat_pt2e, +) from torchao.quantization.pt2e.quantizer import Quantizer @@ -48,6 +57,35 @@ def __init__( is_qat=is_qat, ) + def run( + self, artifact: torch.nn.Module, inputs: Optional[Tuple[torch.Tensor]] + ) -> None: + assert inputs is not None + if self.is_qat: + artifact.train() + captured_graph = export(artifact, inputs, strict=True).module() + + assert isinstance(captured_graph, torch.fx.GraphModule) + + DecomposeScaledDotProductAttention()(captured_graph) + + if self.is_qat: + prepared = prepare_qat_pt2e(captured_graph, self.quantizer) + else: + prepared = prepare_pt2e(captured_graph, self.quantizer) + + if self.calibrate: + # Calibrate prepared model to provide data to quantization observers. + if self.calibration_samples is not None: + for inp in self.calibration_samples: + prepared(*inp) + else: + prepared(*inputs) + + converted = convert_pt2e(prepared, fold_quantize=False) + + self.converted_graph = converted + class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower): def __init__( diff --git a/backends/samsung/test/utils/run_tests.py b/backends/samsung/test/utils/run_tests.py new file mode 100644 index 00000000000..4dd1c0b021a --- /dev/null +++ b/backends/samsung/test/utils/run_tests.py @@ -0,0 +1,58 @@ +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + +import argparse +import os +import sys +import unittest + +from executorch.backends.samsung.test.utils.utils import TestConfig + + +TESTS_SEARCH_DIRS = ["ops", "models"] +current_dir = os.path.dirname(os.path.abspath(__file__)) + + +def setup_env_with_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--chipset", + default="E9955", + help="Samsung chipset, i.e. E9955, E9965, etc", + type=str, + ) + parser.add_argument( + "--host", + help="Host ip address with device connecting", + type=str, + ) + args = parser.parse_args() + + TestConfig.host_ip = args.host + TestConfig.chipset = args.chipset + + +if __name__ == "__main__": + setup_env_with_args() + test_suite = unittest.TestSuite() + + for test_search_dir in TESTS_SEARCH_DIRS: + tests = unittest.TestLoader().discover( + start_dir=os.path.join(f"{current_dir}/../", test_search_dir), + pattern="test*.py", + top_level_dir=None, + ) + test_suite.addTest(tests) + + test_runner = unittest.TextTestRunner() + result = test_runner.run(test_suite) + + if not result.wasSuccessful(): + print("----------------------------------------------------------------------") + for fail_case in result.failures: + print(f" {fail_case[0]}") + sys.exit(1) diff --git a/backends/samsung/test/utils/utils.py b/backends/samsung/test/utils/utils.py new file mode 100644 index 00000000000..4385245daf9 --- /dev/null +++ b/backends/samsung/test/utils/utils.py @@ -0,0 +1,11 @@ +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + + +class TestConfig: + host_ip: str = "111.111.111.111" + chipset: str = "E9965" From d5f491008a2c2af01b05349ffc4432a10bca9ff7 Mon Sep 17 00:00:00 2001 From: "jiseong.oh" Date: Thu, 2 Apr 2026 09:44:46 +0000 Subject: [PATCH 2/6] Disable mobileBert test - this test will be enabled after fixing transformers version issue Signed-off-by: jiseong.oh --- backends/samsung/test/models/test_torchvision_vit.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backends/samsung/test/models/test_torchvision_vit.py b/backends/samsung/test/models/test_torchvision_vit.py index a32dd6ac9ac..127bc43b5c8 100644 --- a/backends/samsung/test/models/test_torchvision_vit.py +++ b/backends/samsung/test/models/test_torchvision_vit.py @@ -16,6 +16,10 @@ class TestMilestoneTorchVisionViT(unittest.TestCase): + + # This model is skipped because transformers=5.0.0rc1. + # it will re-enable after fixing the issue + @unittest.skip def test_torchvision_vit_fp16(self): torch.manual_seed(8) model = TorchVisionViTModel().get_eager_model() From 34f372318a4207f62c557073fc95cb42fbddc11d Mon Sep 17 00:00:00 2001 From: "jiseong.oh" Date: Wed, 1 Apr 2026 11:46:42 +0000 Subject: [PATCH 3/6] Add more CI testcases for Exynos Backend - Support necessary stages for E2E test - Enable test for float/quant ops and float models Co-authored-by: chong.checn Co-authored-by: jhbb.cha Co-authored-by: xz.linghu Signed-off-by: jiseong.oh --- .../test/models/test_mobilebert_finetuning.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 backends/samsung/test/models/test_mobilebert_finetuning.py diff --git a/backends/samsung/test/models/test_mobilebert_finetuning.py b/backends/samsung/test/models/test_mobilebert_finetuning.py new file mode 100644 index 00000000000..92b52e38c9e --- /dev/null +++ b/backends/samsung/test/models/test_mobilebert_finetuning.py @@ -0,0 +1,65 @@ +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + +import os +import unittest + +from executorch.backends.samsung.serialization.compile_options import ( + gen_samsung_backend_compile_spec, +) +from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.utils import TestConfig + +from executorch.examples.samsung.scripts.mobilebert_finetune import MobileBertFinetune +from transformers import AutoTokenizer + + +def patch_mobilebert_finetuning(model_cache_dir: str): + assert os.path.isdir( + model_cache_dir + ), "Can not found model cache dirrecory for mobilebert finetuning" + + def _monkeypatch_load_tokenizer(self): + tokenizer = AutoTokenizer.from_pretrained(model_cache_dir) + return tokenizer + + old_func = MobileBertFinetune.load_tokenizer + MobileBertFinetune.load_tokenizer = _monkeypatch_load_tokenizer + return old_func + + +def recover_mobilebert_finetuning(old_func): + MobileBertFinetune.load_tokenizer = old_func + + +class Test_Milestone_MobileBertFinetune(unittest.TestCase): + @classmethod + def setUpClass(cls): + assert (model_cache_dir := os.getenv("MODEL_CACHE")), "MODEL_CACHE not set!" + cls.model_cache_dir = os.path.join(model_cache_dir, "mobilebert") + cls._old_func = patch_mobilebert_finetuning(cls.model_cache_dir) + + @classmethod + def tearDownClass(cls): + recover_mobilebert_finetuning(cls._old_func) + + # This model need to be fixed according new transformer version + @unittest.skip + def test_mobilebert_finetuning_fp16(self): + mobilebert_finetune = MobileBertFinetune() + model, _ = mobilebert_finetune.get_finetune_mobilebert(self.model_cache_dir) + example_input = mobilebert_finetune.get_example_inputs() + tester = SamsungTester( + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + + ( + tester.export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(inputs=example_input, atol=0.008) + ) From f9f988d4bb0a7aee3960473a3e2a758521dc6873 Mon Sep 17 00:00:00 2001 From: "jiseong.oh" Date: Thu, 2 Apr 2026 10:14:59 +0000 Subject: [PATCH 4/6] Support Quantized MobileBert - update annotator - Support quantized mobilebert - update Quantization strategy Co-authored-by: chen.zhao Co-authored-by: Sangsoo.ko Signed-off-by: jiseong.oh --- backends/samsung/_passes/annotate_qparams.py | 39 +- .../_passes/annotate_scalar_parameters.py | 50 +- .../{fuse_conv_act.py => fuse_activation.py} | 30 +- backends/samsung/_passes/insert_qdq.py | 9 + .../_passes/transform_quantized_mask.py | 105 +++ backends/samsung/builders/__init__.py | 2 + .../samsung/builders/op_constant_pad_nd.py | 2 +- backends/samsung/builders/op_embedding.py | 1 + backends/samsung/builders/op_placeholder.py | 37 + backends/samsung/builders/op_slice_copy.py | 8 +- backends/samsung/builders/op_sub.py | 6 +- backends/samsung/enn_preprocess.py | 15 +- backends/samsung/partition/enn_partitioner.py | 1 + backends/samsung/quantizer/annotator.py | 98 +-- backends/samsung/quantizer/qconfig.py | 56 +- backends/samsung/quantizer/quantizer.py | 7 +- .../samsung/serialization/enn_graph_schema.py | 6 +- .../test/models/test_torchvision_vit.py | 3 - .../samsung/test/tester/samsung_tester.py | 7 +- backends/samsung/test/utils/datasets.py | 261 +++++++ backends/samsung/test/utils/quant_checkers.py | 240 ++++++ backends/samsung/utils/export_utils.py | 17 +- .../scripts/mobilebert_finetune_QAT.py | 686 ++++++++++++++++++ 23 files changed, 1502 insertions(+), 184 deletions(-) rename backends/samsung/_passes/{fuse_conv_act.py => fuse_activation.py} (71%) create mode 100644 backends/samsung/_passes/transform_quantized_mask.py create mode 100644 backends/samsung/builders/op_placeholder.py create mode 100644 backends/samsung/test/utils/datasets.py create mode 100644 backends/samsung/test/utils/quant_checkers.py create mode 100644 examples/samsung/scripts/mobilebert_finetune_QAT.py diff --git a/backends/samsung/_passes/annotate_qparams.py b/backends/samsung/_passes/annotate_qparams.py index 663d1fdf5fa..ede71a6ff16 100644 --- a/backends/samsung/_passes/annotate_qparams.py +++ b/backends/samsung/_passes/annotate_qparams.py @@ -14,6 +14,7 @@ from torch._export.utils import get_buffer from torch.export import ExportedProgram from torch.fx import GraphModule, Node +from torch.fx.passes.utils.source_matcher_utils import get_source_partitions class AnnotateQparamsPass(ExportPass): @@ -148,13 +149,34 @@ def _check_same(requant_obj, ori_obj) -> bool: _check_same(ori_quant_attrs[key], requantize_attrs[key]) for key in key_map.values() ): - requantize_map[idx] = requantize_attrs + if ( + ori_quant_attrs[QuantConstants.QUANT_KEY.quant_dtype] + != requantize_attrs[QuantConstants.QUANT_KEY.quant_dtype] + ): + # For Q-DQ who will change quant dtype, we will insert requantization node + requantize_map[idx] = requantize_attrs + else: + node.meta["quantize_attrs"] = requantize_attrs def _annotate(self, graph_module: GraphModule): for node in graph_module.graph.nodes: + if key_map := QuantConstants.DEQUANT_OPS_KEY_MAP.get(node.target, None): + # We will fold node with constant output in the future pass as a constant node + # example: Constant->Q->DQ->nodeN->Q->DQ, this seq will be folded to one + # We need to store the q-params from last DQ params for quantizing constant value + quant_attrs = self.get_quant_attrs(node, key_map) + if node.args[0].target in QuantConstants.QUANT_OPS_KEY_MAP: + node.meta["quantize_attrs"] = quant_attrs + else: + node.args[0].meta["quantize_attrs"] = quant_attrs + continue key_map = QuantConstants.QUANT_OPS_KEY_MAP.get(node.target, None) if not key_map: continue + quant_attrs = self.get_quant_attrs(node, key_map) + if node.args[0].target in QuantConstants.QUANT_OPS_KEY_MAP: + node.meta["quantize_attrs"] = quant_attrs + continue source_node = node.args[0] if source_node.target in ( *QuantConstants.QUANT_OPS_KEY_MAP, @@ -164,13 +186,26 @@ def _annotate(self, graph_module: GraphModule): continue elif source_node.target == operator.getitem: source_node = source_node.args[0] - quant_attrs = self.get_quant_attrs(node, key_map) + source_node.meta["quantize_attrs"] = quant_attrs self._annotate_requantize(source_node) self._propagate_quant_params(source_node) + def _annotate_decomposed_mm(self, graph_module: GraphModule): + for source_list in get_source_partitions(graph_module.graph, ["matmul"]).get( + "matmul", {} + ): + final_view = source_list.output_nodes[0] + if not (quantize_attrs := final_view.meta.get("quantize_attrs")): + continue + for node in source_list.nodes: + if node.target == exir_ops.edge.aten.bmm.default: + node.meta["quantize_attrs"] = quantize_attrs + break + def call(self, graph_module: GraphModule): self._annotate(graph_module) + self._annotate_decomposed_mm(graph_module) graph_module.recompile() return PassResult(graph_module, True) diff --git a/backends/samsung/_passes/annotate_scalar_parameters.py b/backends/samsung/_passes/annotate_scalar_parameters.py index 643685bdb25..2d3e9778f7c 100644 --- a/backends/samsung/_passes/annotate_scalar_parameters.py +++ b/backends/samsung/_passes/annotate_scalar_parameters.py @@ -5,7 +5,6 @@ # LICENSE file in the root directory of this source tree. import torch -from executorch.backends.samsung.quantizer.quantizer import global_quant_info from executorch.backends.samsung.utils.constants import QuantConstants from executorch.backends.transforms.utils import get_param_tensor, is_param_node from executorch.exir.dialects._ops import ops as exir_ops @@ -25,6 +24,7 @@ class AnnotateScalarParametersPass(ExportPass): exir_ops.edge.aten.mul.Tensor, exir_ops.edge.aten.add.Tensor, exir_ops.edge.aten.div.Tensor, + exir_ops.edge.aten.sub.Tensor, } def __init__(self, edge_program: ExportedProgram): @@ -35,27 +35,37 @@ def annotate(self, graph_module: torch.fx.GraphModule): for node in graph_module.graph.nodes: if node.target not in self.TARGET_OPS or "quantize_attrs" not in node.meta: continue - torch_quant_dtype = global_quant_info.weight_precison.torch_dtype - for input_arg in node.all_input_nodes: - if input_arg.op not in ("placeholder", "get_attr") or not is_param_node( - self.edge_program, input_arg + input0, input1 = node.all_input_nodes[0], node.all_input_nodes[1] + if input0.op not in ("placeholder", "get_attr") or not is_param_node( + self.edge_program, input0 + ): + if input1.op not in ("placeholder", "get_attr") or not is_param_node( + self.edge_program, input1 ): continue - else: - tensor = get_param_tensor(self.edge_program, input_arg) - if not tensor.shape: - qparams = { - QuantConstants.QUANT_KEY.scale: float(tensor), - QuantConstants.QUANT_KEY.quant_dtype: torch_quant_dtype, - QuantConstants.QUANT_KEY.quant_max: torch.iinfo( - torch_quant_dtype - ).max, - QuantConstants.QUANT_KEY.quant_min: torch.iinfo( - torch_quant_dtype - ).min, - QuantConstants.QUANT_KEY.zero_point: 0, - } - input_arg.meta["quantize_attrs"] = qparams + ifm_node, param_tensor_node = input0, input1 + else: + ifm_node, param_tensor_node = input1, input0 + if not (quantize_attrs := ifm_node.meta.get("quantize_attrs")): + continue + param_tensor = get_param_tensor(self.edge_program, param_tensor_node) + if not param_tensor.shape: + scale = ( + float(param_tensor) if param_tensor > 0 else -float(param_tensor) + ) + else: + continue + q_dtype = quantize_attrs[QuantConstants.QUANT_KEY.quant_dtype] + if scale == 0: + scale = 1.0 + qparams = { + QuantConstants.QUANT_KEY.scale: scale, + QuantConstants.QUANT_KEY.quant_dtype: q_dtype, + QuantConstants.QUANT_KEY.quant_max: torch.iinfo(q_dtype).max, + QuantConstants.QUANT_KEY.quant_min: torch.iinfo(q_dtype).min, + QuantConstants.QUANT_KEY.zero_point: 0, + } + param_tensor_node.meta["quantize_attrs"] = qparams def call(self, graph_module: torch.fx.GraphModule): graph = graph_module.graph diff --git a/backends/samsung/_passes/fuse_conv_act.py b/backends/samsung/_passes/fuse_activation.py similarity index 71% rename from backends/samsung/_passes/fuse_conv_act.py rename to backends/samsung/_passes/fuse_activation.py index c034c98bb14..54dc3ab3873 100644 --- a/backends/samsung/_passes/fuse_conv_act.py +++ b/backends/samsung/_passes/fuse_activation.py @@ -24,7 +24,7 @@ def map_hardtan_relux(tanhnode: torch.fx.node.Node) -> Optional[str]: return None -class FuseConvActPass(ExportPass): +class FuseActivationPass(ExportPass): TARGET_ACTS_MAP = { exir_ops.edge.aten.relu.default: (lambda x: "RELU"), exir_ops.edge.aten.relu_.default: (lambda x: "RELU"), @@ -33,39 +33,40 @@ class FuseConvActPass(ExportPass): exir_ops.edge.aten.hardtanh.default: map_hardtan_relux, exir_ops.edge.aten.hardtanh_.default: map_hardtan_relux, } + TARGET_SOURCE_NODES = { + exir_ops.edge.aten.convolution.default, + exir_ops.edge.aten.linear.default, + } def _fuse( self, graph_module: GraphModule, ): - for target_conv, target_act in self.get_target_conv_act(graph_module): + for target_src, target_act in self.get_target_src_act(graph_module): assert ( act_name := self.TARGET_ACTS_MAP.get(target_act.target)(target_act) ), f"Not supported {target_act.name} now." - target_conv.meta["activation"] = act_name + target_src.meta["activation"] = act_name if "quantize_attrs" in target_act.meta: - target_conv.meta["quantize_attrs"] = target_act.meta["quantize_attrs"] - - # If we merge the real out activation to conv, the conv should be the real out - if "real_out" in target_act.meta: - target_conv.meta["real_out"] = target_act.meta["real_out"] + target_src.meta["quantize_attrs"] = target_act.meta["quantize_attrs"] + else: + continue for user in [user for user in target_act.users.keys()]: # noqa: C416 - user.replace_input_with(target_act, target_conv) + user.replace_input_with(target_act, target_src) graph_module.graph.erase_node(target_act) - def get_target_conv_act(self, graph_module: GraphModule): + def get_target_src_act(self, graph_module: GraphModule): for node in graph_module.graph.nodes: - if node.target != exir_ops.edge.aten.convolution.default: + if node.target not in self.TARGET_SOURCE_NODES: continue if len(node.users) != 1: - # Such cases couldn't be conv + act + # Such cases couldn't be src + act continue act_node = list(node.users.keys())[0] if act_node.target not in self.TARGET_ACTS_MAP: continue if "quantize_attrs" in node.meta: - # If the conv's output is quantized - # We do not fuse them + # If we merge the real out activation to source, the source should be the real out continue yield node, act_node @@ -73,5 +74,4 @@ def call(self, graph_module: GraphModule): self._fuse(graph_module) graph_module.recompile() dead_code_elimination_pass(graph_module) - _ = super().call(graph_module).graph_module return PassResult(graph_module, True) diff --git a/backends/samsung/_passes/insert_qdq.py b/backends/samsung/_passes/insert_qdq.py index a59b011ac4b..fb473810c5a 100644 --- a/backends/samsung/_passes/insert_qdq.py +++ b/backends/samsung/_passes/insert_qdq.py @@ -156,9 +156,18 @@ def _add_qdq(self, graph_module: GraphModule): elif is_graph_output(node): self._add_dq_after(graph_module, node) + def _add_q_for_cast(self, graph_module: GraphModule): + for node in list(graph_module.graph.nodes): + if not node.target == exir_ops.edge.aten._to_copy.default: + continue + if "quantize_attrs" not in node.meta: + continue + self._add_q_after(graph_module, node) + def call(self, graph_module: GraphModule): self._add_qdq(graph_module) self._add_qdq_for_requantize(graph_module) + self._add_q_for_cast(graph_module) graph_module.graph.eliminate_dead_code() graph_module.recompile() return PassResult(graph_module, True) diff --git a/backends/samsung/_passes/transform_quantized_mask.py b/backends/samsung/_passes/transform_quantized_mask.py new file mode 100644 index 00000000000..e5116024eee --- /dev/null +++ b/backends/samsung/_passes/transform_quantized_mask.py @@ -0,0 +1,105 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from executorch.backends.samsung.utils.constants import QuantConstants +from executorch.exir.dialects._ops import ops as exir_ops +from executorch.exir.pass_base import ExportPass, PassResult +from executorch.exir.passes import dead_code_elimination_pass +from torch.export import ExportedProgram +from torch.fx import GraphModule + + +class TransformQuantizedMaskPass(ExportPass): + def __init__(self, edge_program: ExportedProgram): + super().__init__() + self.edge_program = edge_program + + def get_mask_mul(self, graph_module: GraphModule): + """ + Iterator for each patterns in the graph. + The obj returned by iterator is the first node of the pattern. + """ + nodes_in_pattern = ( + exir_ops.edge.quantized_decomposed.quantize_per_tensor.default, + exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default, + exir_ops.edge.aten.sub.Tensor, + exir_ops.edge.aten._to_copy.default, + exir_ops.edge.aten.unsqueeze_copy.default, + exir_ops.edge.aten.mul.Tensor, + ) + mask_node = None + for node in graph_module.graph.nodes: + if node.target != "attention_mask": + continue + else: + mask_node = node + break + if mask_node is None: + return None + while node.target != exir_ops.edge.aten.mul.Tensor: + find_next = False + for successor in list(node.users.keys()): + if successor.target in nodes_in_pattern: + node = successor + find_next = True + break + if not find_next: + return None + return node + + def transform( + self, + graph_module: GraphModule, + ): + mask_mul = self.get_mask_mul(graph_module) + if mask_mul is None: + return + rsub_node = mask_mul.args[0] + manual_mul_idx = 0 + for add in list(mask_mul.users.keys()): + custom_tensor_name = f"_custom_tensor_{manual_mul_idx}" + div_node = add.args[0] + if "quantize_attrs" not in div_node.meta: + return + div_quant_args = div_node.meta["quantize_attrs"] + custom_tensor = torch.tensor( + ( + div_node.meta["quantize_attrs"][QuantConstants.QUANT_KEY.quant_min] + - div_node.meta["quantize_attrs"][ + QuantConstants.QUANT_KEY.zero_point + ] + ) + * div_node.meta["quantize_attrs"][QuantConstants.QUANT_KEY.scale], + dtype=torch.float32, + ) + graph_module.register_buffer(custom_tensor_name, custom_tensor) + add.meta["quantize_attrs"] = div_quant_args + with graph_module.graph.inserting_after(rsub_node): + custom_attr = graph_module.graph.get_attr(custom_tensor_name) + with graph_module.graph.inserting_after(custom_attr): + new_mul = graph_module.graph.create_node( + "call_function", + exir_ops.edge.aten.mul.Tensor, + (mask_mul.args[0], custom_attr), + ) + new_mul.meta["quantize_attrs"] = div_quant_args + add.replace_input_with(mask_mul, new_mul) + + rsub_in = rsub_node.args[1] + with graph_module.graph.inserting_before(add): + new_mul = graph_module.graph.create_node( + "call_function", exir_ops.edge.aten.mul.Tensor, (div_node, rsub_in) + ) + new_mul.meta["quantize_attrs"] = div_quant_args + add.replace_input_with(div_node, new_mul) + manual_mul_idx += 1 + + def call(self, graph_module: GraphModule): + self.transform(graph_module) + graph_module.recompile() + dead_code_elimination_pass(graph_module) + return PassResult(graph_module, True) diff --git a/backends/samsung/builders/__init__.py b/backends/samsung/builders/__init__.py index 978da82b370..57e181c7200 100644 --- a/backends/samsung/builders/__init__.py +++ b/backends/samsung/builders/__init__.py @@ -34,6 +34,7 @@ op_mul, op_permute, op_pixel_shuffle, + op_placeholder, op_quantize, op_relu, op_reshape, @@ -80,6 +81,7 @@ op_mul, op_permute, op_pixel_shuffle, + op_placeholder, op_quantize, op_relu, op_reshape, diff --git a/backends/samsung/builders/op_constant_pad_nd.py b/backends/samsung/builders/op_constant_pad_nd.py index cc7cdc5751b..006f52619ff 100644 --- a/backends/samsung/builders/op_constant_pad_nd.py +++ b/backends/samsung/builders/op_constant_pad_nd.py @@ -52,5 +52,5 @@ def define_node( "padding": "EXPLICIT", "padding_type": "CONSTANT", } - + self._update_params_qdtype(node, params) enn_graph.define_op(node.name, "PAD", [input_id], [output_id], params) diff --git a/backends/samsung/builders/op_embedding.py b/backends/samsung/builders/op_embedding.py index f37c46a56d6..c8ef686d3d3 100644 --- a/backends/samsung/builders/op_embedding.py +++ b/backends/samsung/builders/op_embedding.py @@ -36,6 +36,7 @@ def define_node( output_id = self.define_tensor(node, enn_graph, vals_to_ids) params = {"axis": 0, "input_type": "indices"} + self._update_params_qdtype(node, params) enn_graph.define_op( node.name, "GATHER", [input_id, weight_id], [output_id], params ) diff --git a/backends/samsung/builders/op_placeholder.py b/backends/samsung/builders/op_placeholder.py new file mode 100644 index 00000000000..b4b606f56ea --- /dev/null +++ b/backends/samsung/builders/op_placeholder.py @@ -0,0 +1,37 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph +from executorch.backends.transforms.utils import is_param_node + + +@register_node_visitor +class PlaceholderVisitor(NodeVisitor): + """ + To define input tensors. + This is to make the order of inputs correct. + """ + + target = "placeholder" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + if is_param_node(self.exported_program, node): + return + self.define_tensor(node, enn_graph, vals_to_ids) diff --git a/backends/samsung/builders/op_slice_copy.py b/backends/samsung/builders/op_slice_copy.py index 0d7a23118a0..e85b6bf60c3 100644 --- a/backends/samsung/builders/op_slice_copy.py +++ b/backends/samsung/builders/op_slice_copy.py @@ -38,10 +38,14 @@ def define_node( dim = cast(int, node.args[1]) if dim < 0: dim = dim + len(in_shape) - start_val = cast(int, node.args[2]) + start_val = cast(int, node.args[2]) if node.args[2] else 0 if start_val < 0: start_val = start_val + in_shape[dim] - end_val = min(cast(int, node.args[3]), in_shape[dim]) + end_val = ( + in_shape[dim] + if len(node.args) < 4 + else min(cast(int, node.args[3]), in_shape[dim]) + ) if end_val < 0: end_val = end_val + in_shape[dim] diff --git a/backends/samsung/builders/op_sub.py b/backends/samsung/builders/op_sub.py index af2931f298e..7dc97bfa7ca 100644 --- a/backends/samsung/builders/op_sub.py +++ b/backends/samsung/builders/op_sub.py @@ -36,4 +36,8 @@ def define_node( # output output_id = self.define_tensor(node, enn_graph, vals_to_ids) - enn_graph.define_op(node.name, "SUB", [input_id_1, input_id_2], [output_id]) + params = {} + self._update_params_qdtype(node, params) + enn_graph.define_op( + node.name, "SUB", [input_id_1, input_id_2], [output_id], params + ) diff --git a/backends/samsung/enn_preprocess.py b/backends/samsung/enn_preprocess.py index 0847ec0adeb..e0ccf2d643d 100644 --- a/backends/samsung/enn_preprocess.py +++ b/backends/samsung/enn_preprocess.py @@ -18,8 +18,13 @@ ConstantPropPass, ) from executorch.backends.samsung._passes.fold_qdq import FoldQDQPass +from executorch.backends.samsung._passes.fuse_activation import FuseActivationPass from executorch.backends.samsung._passes.insert_qdq import InsertQDQPass +from executorch.backends.samsung._passes.remove_useless_ops import RemoveUselessOpPass from executorch.backends.samsung._passes.replace_scalar_ops import ReplaceOpsWithScalar +from executorch.backends.samsung._passes.transform_quantized_mask import ( + TransformQuantizedMaskPass, +) from executorch.backends.samsung.builders.node_visitor import get_node_visitors from executorch.backends.samsung.serialization.compile_options import ( ENN_COMPILE_OPTION_TITLE, @@ -30,6 +35,7 @@ from executorch.backends.transforms.fuse_batch_norm_with_conv import ( FuseBatchNormWithConvPass, ) +from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass @@ -59,9 +65,13 @@ def preprocess( enn_preprocess_passes = PassManager( passes=[ + RemoveUselessOpPass(), + RemoveCloneOpsTransform(), AnnotateQparamsPass(edge_program), + FuseActivationPass(), FoldQDQPass(), ConstantPropPass(edge_program), + TransformQuantizedMaskPass(edge_program), Conv1dToConv2d(edge_program), FuseBatchNormWithConvPass(edge_program), AddmmToLinearTransform(), @@ -79,6 +89,7 @@ def preprocess( node_visitors = get_node_visitors(edge_program) vals_to_ids: Dict[torch.fx.Node, int] = {} + placeholder_vistor = node_visitors["placeholder"] for node in pass_result.graph_module.graph.nodes: if node.op == "call_function": logging.info(f"Visiting: {node}, {node.target.__name__}") @@ -90,9 +101,11 @@ def preprocess( raise RuntimeError( f"{node.target.__name__}" " is not supported in ENN Delegate" ) + elif node.op == "placeholder": + logging.info(f"Visiting input of graph: {node}") + placeholder_vistor.define_node(node, enn_graph, vals_to_ids) elif node.op in [ "get_attr", - "placeholder", "output", ]: continue diff --git a/backends/samsung/partition/enn_partitioner.py b/backends/samsung/partition/enn_partitioner.py index 368d069c380..03fad83b32c 100644 --- a/backends/samsung/partition/enn_partitioner.py +++ b/backends/samsung/partition/enn_partitioner.py @@ -38,6 +38,7 @@ exir_ops.edge.aten.sub.Scalar, exir_ops.edge.aten.mul.Scalar, exir_ops.edge.aten.div.Scalar, + exir_ops.edge.aten.clone.default, ] diff --git a/backends/samsung/quantizer/annotator.py b/backends/samsung/quantizer/annotator.py index 31015698006..ea29c0c90c5 100644 --- a/backends/samsung/quantizer/annotator.py +++ b/backends/samsung/quantizer/annotator.py @@ -55,11 +55,7 @@ def annotate(graph: Graph, quant_config: QuantizationConfig) -> None: def _is_annotated(nodes: List[Node]): - """ - Given a list of nodes (that represents an operator pattern), - return True if any of the node - is annotated, otherwise return False - """ + # Checking if nodes are annotated. annotated = False for node in nodes: annotated = annotated or ( @@ -80,10 +76,7 @@ def _is_fake_tensor(node: Node): def _is_float_tensor(node: Node): - """Check if the node's tensor is a float tensor, - so that we can skip quantization for the node - since observers only works with float Tensors - """ + # checking if the node is quantized. if not _is_fake_tensor(node): return False return node.meta["val"].dtype in [torch.float32, torch.float16] @@ -272,18 +265,7 @@ def annotate_2in1out_with_SharedQuant( # skipping quantization if 1st input is not float. if _is_annotated([node]) or not _is_float_tensor(input0): return - if ( - isinstance(input0, Node) - and isinstance(input1, float) - and not _get_quantization_annotation(input0) - ): - return - if ( - isinstance(input0, float) - and isinstance(input1, Node) - and not _get_quantization_annotation(input1) - ): - return + if isinstance(input0, Node) and isinstance(input1, Node): shared_qspec = SharedQuantizationSpec((input0, node)) input_qspec_map[input0] = quant_config.input_activation @@ -322,7 +304,6 @@ def annotate_2in1out_with_SharedQuant( def annotate_add_ops_with_SharedQuant( node: Node, quant_config: QuantizationConfig ) -> None: - input_qspec_map = {} input0 = node.args[0] input1 = node.args[1] @@ -578,26 +559,10 @@ def annotate_index(node: Node, quant_config: QuantizationConfig) -> None: ) def annotate_index_put(node: Node, quant_config: QuantizationConfig) -> None: input_qspec_map = {} - input = node.args[0] # from KVCache in LLAMA value = node.args[2] # from linear projection layer - assert isinstance(input, Node) - assert isinstance(value, Node) - - if _is_annotated([node]) or not _is_float_tensor(input): - return - # get QuantAnnot from input path - shared_quant_node = _get_quantization_annotation(input) - if shared_quant_node: - shared_qspec = SharedQuantizationSpec((shared_quant_node, node)) - input_qspec_map[input] = shared_qspec - input_qspec_map[value] = shared_qspec - output_qspec = shared_qspec - else: - # if no QuantAnnot in input path, asign the default QuantAnnot from quant_config. - input_qspec_map[input] = quant_config.input_activation - input_qspec_map[value] = SharedQuantizationSpec((input, node)) - output_qspec = SharedQuantizationSpec((input, node)) + input_qspec_map[value] = quant_config.input_activation + output_qspec = SharedQuantizationSpec((value, node)) node.meta["quantization_annotation"] = QuantizationAnnotation( input_qspec_map=input_qspec_map, @@ -686,10 +651,11 @@ def annotate_embedding(node: Node, quant_config: QuantizationConfig) -> None: return input_qspec_map[weight] = quant_config.input_activation + shared_qspec = SharedQuantizationSpec((weight, node)) node.meta["quantization_annotation"] = QuantizationAnnotation( input_qspec_map=input_qspec_map, - output_qspec=quant_config.output_activation, + output_qspec=shared_qspec, _annotated=True, ) @@ -822,50 +788,48 @@ def annotate_batch_norm(node: Node, quant_config: QuantizationConfig) -> None: # CASE 11: Sigmoid @register_annotator([torch.ops.aten.sigmoid, torch.ops.aten.sigmoid.default]) def annotate_sigmoid(node: Node, quant_config: QuantizationConfig) -> None: - if _is_annotated([node]): + input_act = node.args[0] + # skipping quantization if 1st input is not float. + if _is_annotated([node]) or not _is_float_tensor(input_act): return + input_act_qspec = quant_config.input_activation input_qspec_map = {} - input_act = node.args[0] - input_qspec_map[input_act] = quant_config.input_activation + if _is_float_tensor(input_act): + input_qspec_map[input_act] = input_act_qspec - assert isinstance(input_act, Node) + # bias observer setting out_qconf = quant_config.output_activation + if out_qconf.quant_max is not None and out_qconf.quant_min is not None: + quant_max = out_qconf.quant_max + quant_min = out_qconf.quant_min + else: + quant_max = torch.iinfo(out_qconf.dtype).max + quant_min = torch.iinfo(out_qconf.dtype).min - q_max = ( - torch.iinfo(out_qconf.dtype).max - if out_qconf.quant_max is None - else out_qconf.quant_max - ) - q_min = ( - torch.iinfo(out_qconf.dtype).min - if out_qconf.quant_min is None - else out_qconf.quant_min - ) - - scale = 1 / (q_max - q_min + 1) + quant_scale = 1 / (quant_max - quant_min + 1) - bias_obs_ctr = FixedQParamsObserver.with_args( - scale=scale, + bias_observer_setting = FixedQParamsObserver.with_args( + scale=quant_scale, zero_point=0, dtype=quant_config.output_activation.dtype, qscheme=torch.torch.per_tensor_affine, - quant_max=q_max, - quant_min=q_min, + quant_max=quant_max, + quant_min=quant_min, ) - # make sigmoid map to the range between 0~1 - out_act_quantization_spec = QuantizationSpec( + # output spec with bias oberver + output_act_qspec = QuantizationSpec( dtype=quant_config.output_activation.dtype, - quant_max=q_max, - quant_min=q_min, - observer_or_fake_quant_ctr=bias_obs_ctr, + quant_max=quant_max, + quant_min=quant_min, + observer_or_fake_quant_ctr=bias_observer_setting, qscheme=torch.torch.per_tensor_affine, ) if _is_float_tensor(node): node.meta["quantization_annotation"] = QuantizationAnnotation( input_qspec_map=input_qspec_map, - output_qspec=out_act_quantization_spec, + output_qspec=output_act_qspec, _annotated=True, ) diff --git a/backends/samsung/quantizer/qconfig.py b/backends/samsung/quantizer/qconfig.py index f32c8d39796..03bc9e6f509 100644 --- a/backends/samsung/quantizer/qconfig.py +++ b/backends/samsung/quantizer/qconfig.py @@ -10,8 +10,10 @@ import torch from torchao.quantization.pt2e import ( - FakeQuantize, + FusedMovingAvgObsFakeQuantize, MinMaxObserver, + MovingAverageMinMaxObserver, + MovingAveragePerChannelMinMaxObserver, PerChannelMinMaxObserver, ) from torchao.quantization.pt2e.quantizer import QuantizationSpec @@ -64,9 +66,7 @@ def _get_activation_qspec( qscheme = torch.per_tensor_symmetric if is_symmetric else torch.per_tensor_affine if is_qat: - observer_or_fake_quant = FakeQuantize.with_args( - observer=observer_cls, eps=eps_value - ) + observer_or_fake_quant = FusedMovingAvgObsFakeQuantize.with_args(eps=eps_value) else: observer_or_fake_quant = observer_cls.with_args(eps=eps_value) @@ -103,8 +103,14 @@ def _get_weight_qspec( observer_cls = PerChannelMinMaxObserver if is_qat: - observer_or_fake_quant = FakeQuantize.with_args( - observer=observer_cls, eps=eps_value + observer_cls = FusedMovingAvgObsFakeQuantize + if not is_per_channel: + weight_qat_observer = MovingAverageMinMaxObserver + else: + weight_qat_observer = MovingAveragePerChannelMinMaxObserver + observer_or_fake_quant = observer_cls.with_args( + eps=eps_value, + observer=weight_qat_observer, ) else: observer_or_fake_quant = observer_cls.with_args(eps=eps_value) @@ -134,41 +140,3 @@ def get_a8w8_enn_quant_config( bias=bias_quantization_spec, ) return quantization_config - - -class QuantInfo: - def __init__(self, torch_dtype: torch.dtype, string: str): - self._torch_dtype = torch_dtype - self._string = string - - @property - def torch_dtype(self): - return self._torch_dtype - - @property - def string(self): - return self._string - - -class QuantInfoManager: - QUANT_INFO_MAP = { - Precision.A8W8: (QuantInfo(torch.int8, "INT8"), QuantInfo(torch.int8, "INT8")), - } - FP_INFO = ( - QuantInfo(torch.float32, "FLOAT32"), - QuantInfo(torch.float32, "FLOAT32"), - ) - - def __init__(self): - self.precision = None - - def set_precision(self, precision: Precision): - self.precision = precision - - @property - def weight_precison(self) -> Optional[QuantInfo]: - return self.QUANT_INFO_MAP.get(self.precision, self.FP_INFO)[0] - - @property - def act_precision(self) -> Optional[QuantInfo]: - return self.QUANT_INFO_MAP.get(self.precision, self.FP_INFO)[1] diff --git a/backends/samsung/quantizer/quantizer.py b/backends/samsung/quantizer/quantizer.py index cf46677d000..83e43f13956 100644 --- a/backends/samsung/quantizer/quantizer.py +++ b/backends/samsung/quantizer/quantizer.py @@ -11,10 +11,7 @@ from torchao.quantization.pt2e.quantizer import Quantizer from .annotator import annotate -from .qconfig import get_quant_config, Precision, QuantInfoManager - - -global_quant_info = QuantInfoManager() +from .qconfig import get_quant_config, Precision class EnnQuantizer(Quantizer): @@ -23,7 +20,6 @@ def __init__(self): super().__init__() self._precision = Precision.A8W8 - global_quant_info.set_precision(self._precision) self._is_per_channel = True self._is_qat = False self.custom_quant_annotations: Sequence[Callable] = [] @@ -31,7 +27,6 @@ def __init__(self): def setup_precision(self, quant_dtype: Precision) -> None: assert quant_dtype in Precision, f"No support for Precision {quant_dtype}." self._precision = quant_dtype - global_quant_info.set_precision(self._precision) def setup_quant_params( self, quant_dtype: Precision, is_per_channel=True, is_qat=False diff --git a/backends/samsung/serialization/enn_graph_schema.py b/backends/samsung/serialization/enn_graph_schema.py index 5209a8672ee..8448854fe22 100644 --- a/backends/samsung/serialization/enn_graph_schema.py +++ b/backends/samsung/serialization/enn_graph_schema.py @@ -90,7 +90,11 @@ def define_tensor( # noqa: C901 ) tensor.AddQuantizeParam(q_dtype, scales, zero_points) - if need_quantize and data is not None: + if ( + need_quantize + and data is not None + and data.dtype in (torch.float16, torch.float32, np.float32, np.float16) + ): if isinstance(data, np.ndarray): data = torch.tensor(data) data = quantize_tensor( diff --git a/backends/samsung/test/models/test_torchvision_vit.py b/backends/samsung/test/models/test_torchvision_vit.py index 127bc43b5c8..bab146d9979 100644 --- a/backends/samsung/test/models/test_torchvision_vit.py +++ b/backends/samsung/test/models/test_torchvision_vit.py @@ -17,9 +17,6 @@ class TestMilestoneTorchVisionViT(unittest.TestCase): - # This model is skipped because transformers=5.0.0rc1. - # it will re-enable after fixing the issue - @unittest.skip def test_torchvision_vit_fp16(self): torch.manual_seed(8) model = TorchVisionViTModel().get_eager_model() diff --git a/backends/samsung/test/tester/samsung_tester.py b/backends/samsung/test/tester/samsung_tester.py index 7f1f65f0d6c..a6eb170a61b 100644 --- a/backends/samsung/test/tester/samsung_tester.py +++ b/backends/samsung/test/tester/samsung_tester.py @@ -12,10 +12,7 @@ from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner from executorch.backends.samsung.quantizer.quantizer import EnnQuantizer, Precision from executorch.backends.samsung.test.utils import RuntimeExecutor -from executorch.backends.samsung.utils.export_utils import ( - get_edge_compile_config, - get_enn_pass_list, -) +from executorch.backends.samsung.utils.export_utils import get_edge_compile_config from executorch.backends.test.harness import Tester as TesterBase from executorch.backends.test.harness.stages import StageType from executorch.backends.transforms.decompose_sdpa import ( @@ -97,7 +94,7 @@ def __init__( compile_specs = compile_specs or [] self.partitioners = [EnnPartitioner(compile_specs=compile_specs)] self.edge_compile_config = edge_compile_config or get_edge_compile_config() - self.transform_passes = transform_passes or get_enn_pass_list() + self.transform_passes = transform_passes self.edge_dialect_program = None def run( diff --git a/backends/samsung/test/utils/datasets.py b/backends/samsung/test/utils/datasets.py new file mode 100644 index 00000000000..be935dff271 --- /dev/null +++ b/backends/samsung/test/utils/datasets.py @@ -0,0 +1,261 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import List, Literal, Optional, Tuple + +import torch +import torchvision.transforms.v2 as vision_transform_v2 +from executorch.backends.samsung.test.utils.utils import GreedyLM +from torchsr import transforms as sr_transforms +from torchvision import transforms as vision_transforms +from torchvision.datasets import ImageFolder, VOCSegmentation + + +def get_quant_test_data_classify( + data_dir: str, + calinum=100, + testnum=500, + transform_compose: Optional[vision_transforms.Compose] = None, +) -> Tuple: + """ + Generate test data for quantization model + + :param data_dir: Dir of dataset. Structure should be imagenet-like + :param calinum: Number of calibration data. Default 100 + :param testnum: Number of test data. Default 500 + :param transform_compose: Transforms to be applied to data. + + Default: + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + transforms.Lambda(lambda x: x.unsqueeze(0)), # Add batch dim + ] + :type data_dir: str + :type calinum: int + :type testnum: int + :type transform_compose: transforms.Compose | None + :return: (example_input, test_data) + """ + if not transform_compose: + transform_compose = vision_transforms.Compose( + [ + vision_transforms.Resize((256, 256)), + vision_transforms.CenterCrop(224), + vision_transforms.ToTensor(), + vision_transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + vision_transforms.Lambda(lambda x: x.unsqueeze(0)), # Add batch dim + ] + ) + dataset = ImageFolder(root=data_dir, transform=transform_compose) + cali_data = [(dataset[i][0],) for i in range(min(calinum, len(dataset)))] + test_data = [dataset[i] for i in range(min(testnum, len(dataset)))] + example_input = (dataset[0][0],) + return example_input, cali_data, test_data + + +def get_quant_test_data_super_resolution( + root_dir: str, + dataset_name: Literal["B100", "Set5", "Set14", "Urban100"], + calinum=100, + testnum=500, + transform_compose: Optional[sr_transforms.Compose] = None, +) -> Tuple: + """ + Generate test data for quantization model + + :param root_dir: Dir of dataset. The real dataset should be in root_dir/SRBenchmarks/benchmark/ + :param dataset_name: data_set name + :param testnum: Number of test data. Default 500 + :param transform_compose: Transforms to be applied to data. + Default: + transform_compose = transforms.Compose( + [transforms.ToTensor()] # Convert Pillows Image to tensor + ) + :type root_dir: str + :type dataset_name: "B100"|"Set5"|"Set14"|"Urban100" + :type calinum: int + :type testnum: int + :type transform_compose: transforms.Compose | None + :return: (example_input, cali_data, test_data) + """ + + class SrResize: + def __init__(self, expected_size: List[List[int]]): + self.expected_size = expected_size + + def __call__(self, x): + return ( + x[0].resize(self.expected_size[0]), + x[1].resize(self.expected_size[1]), + ) + + class SrUnsqueeze: + def __call__(self, x): + return ( + x[0].unsqueeze(0), + x[1].unsqueeze(0), + ) + + if not transform_compose: + transform_compose = sr_transforms.Compose( + [ + SrResize([[448, 448], [224, 224]]), + sr_transforms.ToTensor(), # Convert Pillows Image to tensor + SrUnsqueeze(), + ] + ) + from torchsr.datasets import B100, Set14, Set5, Urban100 + + dataset_cls_map = { + "B100": B100, + "Set5": Set5, + "Set14": Set14, + "Urban100": Urban100, + } + + dataset_cls = dataset_cls_map.get(dataset_name) + assert dataset_cls + dataset = dataset_cls(root=root_dir, transform=transform_compose, scale=2) + calib_data = [(dataset[i][1],) for i in range(min(calinum, len(dataset)))] + test_data = [ + (dataset[i][1], dataset[i][0]) for i in range(min(testnum, len(dataset))) + ] + example_input = (dataset[0][1],) + return example_input, calib_data, test_data + + +def get_quant_test_data_segmentation( + data_dir: str, + calinum=100, + testnum=500, + input_transform_compose: Optional[vision_transform_v2.Compose] = None, + target_transform_compose: Optional[vision_transform_v2.Compose] = None, +): + if not input_transform_compose: + input_transform_compose = vision_transform_v2.Compose( + [ + vision_transform_v2.Resize([224, 224]), + vision_transform_v2.ToImage(), + vision_transform_v2.ToDtype(torch.float32, scale=True), + vision_transform_v2.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + vision_transform_v2.Lambda(lambda x: x.unsqueeze(0)), # Add batch dim + ] + ) + if not target_transform_compose: + target_transform_compose = vision_transform_v2.Compose( + [ + vision_transform_v2.Resize([224, 224]), + vision_transform_v2.ToImage(), + vision_transform_v2.ToDtype(torch.long, scale=False), + vision_transform_v2.Lambda(lambda x: x.unsqueeze(0)), # Add batch dim + ] + ) + voc_dataset = VOCSegmentation( + data_dir, + "2012", + "val", + transform=input_transform_compose, + target_transform=target_transform_compose, + ) + calib_data = [(voc_dataset[i][0],) for i in range(min(calinum, len(voc_dataset)))] + test_data = [voc_dataset[i] for i in range(min(testnum, len(voc_dataset)))] + example_input = (voc_dataset[0][0],) + return example_input, calib_data, test_data + + +def _get_voice_dataset( + data_size: int, data_dir: str, labels: List[str], fixed_token_num: int +): + from torch.utils.data import DataLoader + from torchaudio.datasets import LIBRISPEECH + + def collate_fun(batch, encode_fn, mode="train"): + waves = [] + text_ids = [] + input_lengths = [] + output_lengths = [] + + if mode == "train": + shifts = torch.randn(len(batch)) > 0.0 + + for i, (wave, _, text, *_) in enumerate(batch): + if mode == "train" and shifts[i]: + wave = wave[:, 160:] + waves.append(wave[0]) + ids = torch.LongTensor(encode_fn(text)) + text_ids.append(ids) + input_lengths.append(wave.size(1) // 320) + output_lengths.append(len(ids)) + + waves = torch.nn.utils.rnn.pad_sequence(waves, batch_first=True).unsqueeze(1) + labels = torch.nn.utils.rnn.pad_sequence(text_ids, batch_first=True) + + return waves, labels, input_lengths, output_lengths + + lm = GreedyLM(labels) + + testset_url = "test-clean" + # testset_url = 'test-clean' + dataset = LIBRISPEECH(data_dir, url=testset_url) + data_loader = DataLoader( + dataset=dataset, + batch_size=1, + shuffle=True, + collate_fn=lambda x: collate_fun(x, lm.encode, "valid"), + ) + # prepare input data + inputs, targets = [], [] + in_lens, tar_lens = [], [] + + def _loader(): + for waves, labels, inputs_len, targets_len in data_loader: + if inputs_len[0] >= fixed_token_num: + continue + zero_padding = torch.zeros([1, 1, fixed_token_num * 320 - waves.shape[2]]) + waves = torch.concat((waves, zero_padding), axis=2) + yield waves, labels, [fixed_token_num + 1], targets_len + + for i, (waves, labels, inputs_len, targets_len) in enumerate( + _loader() + ): # waves, labels, input_lens, output_lens + inputs.append(waves) + targets.append(labels) + in_lens.append(inputs_len) + tar_lens.append(targets_len) + if i >= data_size: + break + + return inputs, targets, in_lens, tar_lens + + +def get_quant_test_data_voice( + data_dir: str, + calinum=100, + testnum=500, + fixed_out_token=300, + labels=None, +): + if labels is None: + labels = [" ", *"abcdefghijklmnopqrstuvwxyz", "'", "*"] + dataset = _get_voice_dataset( + max(testnum, calinum), data_dir, labels, fixed_out_token + ) + calib_data = [(dataset[0][i],) for i in range(min(calinum, len(dataset[0])))] + test_data = [ + (dataset[0][i], (dataset[1][i], dataset[2][i], dataset[3][i])) + for i in range(min(testnum, len(dataset[0]))) + ] + example_input = (dataset[0][0],) + return example_input, calib_data, test_data diff --git a/backends/samsung/test/utils/quant_checkers.py b/backends/samsung/test/utils/quant_checkers.py new file mode 100644 index 00000000000..7ae38d0c186 --- /dev/null +++ b/backends/samsung/test/utils/quant_checkers.py @@ -0,0 +1,240 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + +import dataclasses +import logging +from abc import abstractmethod + +import numpy as np + +import torch +import torchaudio +from executorch.backends.samsung.test.utils.utils import GreedyLM + + +@dataclasses.dataclass +class CheckerConfig: + checker: str + kargs: dict + + +class CheckerBase: + necessary_params = [] + default_params = {} + + def __init__( + self, + original_module: torch.nn.Module, + current_module: torch.nn.Module, + config: CheckerConfig, + ): + self.origin_module = original_module + self.current_module = current_module + self.config = config + self.check_and_set_params() + + @abstractmethod + def check(self, **kwargs): + pass + + def check_and_set_params(self): + expected_list = [] + for key in self.necessary_params: + if key not in self.config.kargs: + expected_list.append(key) + else: + setattr(self, key, self.config.kargs[key]) + assert ( + not expected_list + ), f"More args expected for {type(self)} in config.kargs: " + ",".join( + expected_list + ) + for key in self.default_params: + if key not in self.config.kargs: + default_value = self.default_params[key] + logging.info( + f"{key} not set in config.kargs for checker {type(self)}, using default value {default_value}" + ) + setattr(self, key, default_value) + else: + setattr(self, key, self.config.kargs[key]) + + +CHECKER_REGISTER = {} + + +def checker_register(checker_name: str): + def _wrapper(cls): + CHECKER_REGISTER[checker_name] = cls + + return _wrapper + + +def get_checker(origin_module, quantized_module, config) -> CheckerBase: + assert config.checker in CHECKER_REGISTER, ( + f'Could not find checker "{config.checker}", registered checkers: \n\t' + + "\n\t".join(CHECKER_REGISTER.keys()) + ) + return CHECKER_REGISTER[config.checker](origin_module, quantized_module, config) + + +@checker_register("classifier") +class ClassifierChecker(CheckerBase): + necessary_params = ["dataset"] + default_params = { + "topktol": { + 1: 0.9, + 3: 0.95, + }, + } + + def check(self): + assert self.dataset + assert min(self.topktol.keys()) > 0, "Topk number must be positive int" + max_topk = max(self.topktol.keys()) + + print("Check Quantization Classifier...") + + correct = torch.Tensor([0] * max_topk, device="cpu") + total = 0 + for batch_data, _ in self.dataset: + batch_size = batch_data.shape[0] + total += batch_size + # TODO: Use ground truth to replace fp models' result + fp_out: torch.Tensor = self.origin_module(batch_data) + _, fp_top1 = fp_out.topk(1, dim=-1) + fp_top1 = fp_top1.view(1, -1) + + quant_out: torch.Tensor = self.current_module(batch_data) + _, quant_topk = quant_out.topk(max_topk, dim=-1) + quant_topk = quant_topk.t() + for k_idx in range(max_topk): + correct[k_idx:] += quant_topk[k_idx].eq(fp_top1).view(-1).sum().float() + error_messages = [] + msg_template = "\tFailed in checking Top{}, Target: {:.2f} vs Current: {:.2f}" + for topk_num, topk_tol in self.topktol.items(): + correct_num = correct[topk_num - 1] + accuracy_score = correct_num / total * 100 + print(accuracy_score) + if accuracy_score < topk_tol: + + error_messages.append( + msg_template.format(topk_num, topk_tol, accuracy_score) + ) + assert not error_messages, "\n".join(["\n", *error_messages]) + print("Check Quantization Classifier Finished.") + + +@checker_register("super_resolution") +class SRChecker(CheckerBase): + necessary_params = ["dataset"] + default_params = {"threshold": 35.0} + + def check(self): + peak = 1.0 # Images are scaled to 0-1 + + def calc_unbatch_mse(x: torch.Tensor, target: torch.Tensor): + # We calc PSNR for each single image + num = torch.prod(torch.tensor(x.shape)[1:]) + return (x - target).pow(2).sum(dim=list(range(1, len(x.shape)))).pow( + 0.5 + ) / num + + data_num = 0 + total_psnr = 0 + for x, target in self.dataset: + data_num += len(x) + quant_out: torch.Tensor = self.current_module(x) + unbatch_mse = calc_unbatch_mse(target, quant_out) + unbatch_psnr = 10 * torch.log10(peak * peak / unbatch_mse) + total_psnr += unbatch_psnr.sum() + avg_psnr = total_psnr / data_num + assert ( + avg_psnr > self.threshold + ), "PSNR need to be larger than {:.2f}, but get {:.2f}. ".format( + self.threshold, avg_psnr + ) + print("Check Quantization Super Resolution Finished.") + + +@checker_register("segmentation") +class SegChecker(CheckerBase): + necessary_params = ["dataset"] + default_params = {"threshold": 0.7} + + def check(self): + def calc_miou(target: torch.Tensor, pred: torch.Tensor, class_num=21): + target = target.numpy().flatten() + mask = target != 255 # Don't consider edge + target = target[mask] + pred = pred.numpy().flatten()[mask] + target *= class_num + target += pred + # I of class a: mixmat[a, a] + # U of class a: mixmat[a, :].sum() + mixmat[:, a].sum - mixmat[a, a] + mixmat = np.bincount(target, minlength=class_num**2).reshape( + (class_num, class_num) + ) + i = mixmat.diagonal() + return np.nanmean((i / (mixmat.sum(0) + mixmat.sum(1) - i))) + + data_num = 0 + total_miou = 0 + for x, targets in self.dataset: + data_num += len(x) + quant_out: torch.Tensor = self.current_module(x)["out"].argmax(1) + total_miou += np.sum( + [calc_miou(target, pred) for target, pred in zip(targets, quant_out)] + ) + avg_miou_percentage = total_miou / data_num * 100 + assert ( + avg_miou_percentage > self.threshold + ), "MIOU need to be larger than {:.2f}%, but get {:.2f}%. ".format( + self.threshold, avg_miou_percentage + ) + print("Check Quantization Segmentation Finished.") + + +@checker_register("wave2letter") +class W2lChecker(CheckerBase): + necessary_params = ["dataset", "labels"] + default_params = {"threshold": 0.7} + + def check(self): + criterion = torch.nn.CTCLoss(blank=len(self.labels) - 1, zero_infinity=True) + data_num = 0 + lm = GreedyLM(self.labels) + c_ldist_sum, c_ref_len_sum = 0, 0 + w_ldist_sum = 0 + test_loss_sum = 0 + for x, (targets, input_lens, output_lens) in self.dataset: + data_num += len(x) + quant_out: torch.Tensor = self.current_module(x) + quant_out = quant_out.view((1, 29, quant_out.numel() // 29)) + loss = criterion( + quant_out.permute(2, 0, 1), targets, input_lens, output_lens + ) + test_loss_sum += loss.item() + decoded_preds = lm.decode_ctc(quant_out) + decoded_targets = lm.decode_ids(targets) + decoded_targets = [t[:len] for t, len in zip(decoded_targets, output_lens)] + + for hypo, ref in zip(decoded_preds, decoded_targets): + c_ldist_sum += torchaudio.functional.edit_distance(ref, hypo) + c_ref_len_sum += len(ref) + hypo_words = "".join(hypo).split() + ref_words = "".join(ref).split() + w_ldist_sum += torchaudio.functional.edit_distance( + ref_words, hypo_words + ) + test_loss = test_loss_sum / len(self.dataset) + assert ( + test_loss < self.threshold + ), "CTC need to be smaller than {:.2f}%, but get {:.2f}%. ".format( + self.threshold, test_loss + ) + return self diff --git a/backends/samsung/utils/export_utils.py b/backends/samsung/utils/export_utils.py index e075f4dca0b..a6d87ba933e 100644 --- a/backends/samsung/utils/export_utils.py +++ b/backends/samsung/utils/export_utils.py @@ -9,14 +9,11 @@ import executorch.exir as exir import torch -from executorch.backends.samsung._passes.fuse_conv_act import FuseConvActPass -from executorch.backends.samsung._passes.remove_useless_ops import RemoveUselessOpPass from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner from executorch.backends.samsung.quantizer.quantizer import EnnQuantizer, Precision from executorch.backends.transforms.decompose_sdpa import ( DecomposeScaledDotProductAttention, ) -from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform from executorch.exir import EdgeCompileConfig from executorch.exir.backend.backend_details import CompileSpec from executorch.exir.dialects._ops import ops as exir_ops @@ -44,14 +41,6 @@ def get_edge_compile_config(): ) -def get_enn_pass_list() -> List[PassType]: - return [ - RemoveUselessOpPass(), - RemoveCloneOpsTransform(), - FuseConvActPass(), - ] - - def quantize_module( module: torch.nn.Module, inputs, @@ -82,12 +71,8 @@ def to_edge_transform_and_lower_to_enn( ) -> exir.ExecutorchProgramManager: assert compile_specs is not None, "For now, we must deliver complile specs" prog = torch.export.export(module, inputs) - pass_list = get_enn_pass_list() - if custom_pass_config: - pass_list.extend(custom_pass_config) return to_edge_transform_and_lower( prog, - pass_list, - {"forward": [EnnPartitioner(compile_specs)]}, + partitioner={"forward": [EnnPartitioner(compile_specs)]}, compile_config=get_edge_compile_config(), ) diff --git a/examples/samsung/scripts/mobilebert_finetune_QAT.py b/examples/samsung/scripts/mobilebert_finetune_QAT.py new file mode 100644 index 00000000000..fb23722223d --- /dev/null +++ b/examples/samsung/scripts/mobilebert_finetune_QAT.py @@ -0,0 +1,686 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +from pathlib import Path +from typing import Optional + +import evaluate +import numpy as np +import requests + +import torch +import torch.nn as nn +import torchao + +from datasets import ClassLabel, DatasetDict, load_dataset + +from executorch.backends.samsung.quantizer import EnnQuantizer, Precision +from executorch.backends.samsung.serialization.compile_options import ( + gen_samsung_backend_compile_spec, +) +from executorch.backends.samsung.utils.export_utils import ( + to_edge_transform_and_lower_to_enn, +) +from executorch.examples.samsung.utils import save_tensors +from executorch.exir import ExecutorchBackendConfig +from executorch.extension.export_util.utils import save_pte_program +from torch.optim import AdamW +from torch.utils.data import DataLoader +from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e +from tqdm import tqdm + +from transformers import ( + AutoTokenizer, + MobileBertForSequenceClassification, + Trainer, + TrainingArguments, +) + +# For removing the tokenizer warning +os.environ["TOKENIZERS_PARALLELISM"] = "true" + + +class MobileBertFinetune: + def __init__(self, metric, args): + self.tokenizer = self.load_tokenizer() + self.artifact = args.artifact + self.max_length = args.max_length + self.csv_dataset = args.csv_dataset + self.metric = metric if metric is not None else evaluate.load("accuracy") + self.batch_size_training = args.batch_size + self.num_epochs = args.num_epochs_for_finetune + + def load_tokenizer(self): + return AutoTokenizer.from_pretrained("google/mobilebert-uncased") + + def load_CSV_dataset(self): + # grab dataset + if self.csv_dataset is None: + url = "https://raw.githubusercontent.com/susanli2016/NLP-with-Python/master/data/title_conference.csv" + print( + "Because a CSV file is not assigned, a CSV file is downloaded from ", + str(url), + ) + response = requests.get(url, allow_redirects=True) + cvs_file_path = os.path.join(self.artifact, "title_conference.csv") + if response.status_code == 200: + with open(cvs_file_path, "wb") as f: + f.write(response.content) + print("CSV file downloaded successfully!\n\n") + else: + print( + f"Failed to download the file. Status code: {response.status_code}\n\n" + ) + else: + cvs_file_path = self.csv_dataset + + # load dataset + try: + loaded_datasets = load_dataset("csv", data_files=cvs_file_path) + raw_labels = loaded_datasets["train"].unique("Conference") + except: + print(f"Error: the file '{cvs_file_path}' was not avaiable.") + + # Creating ClassLabel + class_labels = ClassLabel(names=raw_labels) + labels = {key: index for index, key in enumerate(raw_labels)} + + def encode_labels(example): + example["label"] = class_labels.str2int(example["Conference"]) + return example + + loaded_datasets = loaded_datasets.map(encode_labels) + + split_dataset = loaded_datasets["train"].train_test_split( + test_size=0.15, seed=51 + ) + raw_datasets = DatasetDict( + {"train": split_dataset["train"], "validation": split_dataset["test"]} + ) + + if self.max_length is None: + + def preprocess_function(examples): + return self.tokenizer(examples["Title"], truncation=True, padding=True) + + else: + + def preprocess_function(examples): + return self.tokenizer( + examples["Title"], + truncation=True, + padding="max_length", + max_length=self.max_length, + ) + + print("Preprocessing data...") + tokenized_datasets = raw_datasets.map(preprocess_function, batched=True) + tokenized_datasets.set_format( + type="torch", columns=["input_ids", "attention_mask", "label"] + ) + return tokenized_datasets, labels + + # Define compute metrics function + def compute_metrics(self, eval_pred): + logits, labels = eval_pred + predictions = np.argmax(logits, axis=-1) + return self.metric.compute(predictions=predictions, references=labels) + + def training( + self, + model, + tokenized_datasets, + tokenizer, + compute_metrics, + batch_size=8, + num_epochs=3, + device="cpu", + ): + # Training arguments + training_args = TrainingArguments( + output_dir="./results", + eval_strategy="epoch", + save_strategy="epoch", + learning_rate=2e-5, + per_device_train_batch_size=batch_size, + per_device_eval_batch_size=batch_size, + num_train_epochs=num_epochs, + weight_decay=0.01, + logging_dir="./logs", + load_best_model_at_end=True, + metric_for_best_model="accuracy", + dataloader_pin_memory=False if device == torch.device(type="cpu") else True, + ) + + # Trainer + trainer = Trainer( + model=model, + args=training_args, + train_dataset=tokenized_datasets["train"], + eval_dataset=tokenized_datasets["validation"], + processing_class=tokenizer, + compute_metrics=compute_metrics, + ) + return trainer + + def get_finetune_mobilebert(self, artifacts_dir): + # Pretrained bert's output ranges in a large scale. It is challenge for enn backend to support directly. + # Please finetune mobilebert on specific tasks, make sure that bert's output and hidden states are friendly + # to resource-constraint device. + + # Load data for classification + tokenized_datasets, labels = self.load_CSV_dataset() + + artifacts_dir = artifacts_dir if artifacts_dir is not None else "./mobilebert" + need_finetune = True + os.makedirs(artifacts_dir, exist_ok=True) + pretrained_required_files = ["config.json", "model.safetensors"] + path = Path(artifacts_dir) + if (path / pretrained_required_files[0]).exists() and ( + path / pretrained_required_files[1] + ).exists(): + need_finetune = False + + # get pre-trained mobilebert + model = MobileBertForSequenceClassification.from_pretrained( + "google/mobilebert-uncased" if need_finetune else artifacts_dir, + num_labels=len(labels), + # return_dict=False, + ) + + if not need_finetune: + return model.eval(), tokenized_datasets + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model.to(device) + + trainer = self.training( + model, + tokenized_datasets, + self.tokenizer, + self.compute_metrics, + self.batch_size_training, + self.num_epochs, + device, + ) + + # Train the model + print( + "\n==== Starting training for fine tuning for ", + self.num_epochs, + "epochs....", + ) + trainer.train() + + # Evaluate on validation set + print("\n==== Starting evaluating the fine tuned model ....") + FP_eval_results = trainer.evaluate() + print("The eval results of the trained model =", FP_eval_results) + + model.save_pretrained(artifacts_dir) + + return model, tokenized_datasets + + +def get_dataset(data_size, tokenized_datasets, batch_size, num_workers, device): + # making dataset for calibrating the model... + inputs, labels = [], [] + for i, (batch) in enumerate( + tqdm( + DataLoader( + tokenized_datasets["validation"], + batch_size=batch_size, + shuffle=True, + num_workers=num_workers, + pin_memory=False if device == torch.device(type="cpu") else True, + ) + ) + ): + inputs.append((batch["input_ids"], batch["attention_mask"])) + labels.append(batch["label"].tolist()) + if i >= int(data_size): + break + + return inputs, labels + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=":f"): + self.name = name + self.fmt = fmt + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})" + return fmtstr.format(**self.__dict__) + + +def trainingQuantModel_QAT( + model, tokenized_datasets, batch_size, workers, device, num_epochs +): + avgloss = AverageMeter("Loss", "1.5f") + + model = torchao.quantization.pt2e.move_exported_model_to_train(model) + optimizer = AdamW(model.parameters(), lr=3e-5, weight_decay=0.01) + + criterion = nn.CrossEntropyLoss() + model.to(device) + print(f"\n=== Starting training on {device} for {num_epochs} epochs...") + + data_loader = DataLoader( + tokenized_datasets["train"], + batch_size=batch_size, + shuffle=True, + num_workers=workers, + pin_memory=False if device == torch.device(type="cpu") else True, + ) + + # --- Training and Evaluation Loop --- + for nepoch in range(num_epochs): + for batch in tqdm(data_loader, desc=f"Training Epoch {nepoch + 1}"): + batch_input_ids = batch["input_ids"].to(device) + batch_attention_mask = batch["attention_mask"].to(device) + batch_label = batch["label"].to(device) + logits = model(batch_input_ids, batch_attention_mask).logits + loss = criterion(logits, batch_label) + loss.backward() + optimizer.step() + optimizer.zero_grad() + avgloss.update(loss, batch["label"].size(0)) + print(f"Epoch {nepoch + 1} | Average Training Loss: {avgloss.avg:.4f} \n") + + return torchao.quantization.pt2e.move_exported_model_to_eval(model) + + +# Eval a mobileBert model +def evaluatingQuantModel_mobileBert( + quantized_model, + tokenized_datasets, + device, + batch_size_edge, + workers, + metric=None, +): + if metric is None: + metric = evaluate.load("glue", "mrpc") + + # Collect predictions + predictions = [] + labels = [] + + for batch in tqdm( + DataLoader( + tokenized_datasets["validation"], + batch_size=batch_size_edge, + shuffle=True, + num_workers=workers, + pin_memory=True, + ) + ): + batch_input_ids = batch["input_ids"].to(device) + batch_attention_mask = batch["attention_mask"].to(device) + outputs = quantized_model(batch_input_ids, batch_attention_mask) + logits = outputs.logits + preds = torch.argmax(logits, dim=-1) + predictions.extend(preds.tolist()) + labels.extend(batch["label"].tolist()) + + # Compute accuracy and F1 + results = metric.compute(predictions=predictions, references=labels) + print("Evaluation results:", results) + + return results + + +def build_aten_to_qat_mobilebert( + model, + inputs, + quant_dtype: Optional[Precision] = None, + is_per_channel=True, + is_qat=True, + tokenized_datasets="", + batch_size_training=1, + batch_size_edge=1, + num_workers=8, + num_epochs=100, + qat_file_name="mobilebert_qat_model.pt2", + qat_file_name_for_cpu="mobilebert_qat_model_for_cpu.pt2", + metric=None, + device="cpu", +): + # Evaluating a FP32 model + print("==================================================") + print("\nEvaluation a FP model") + FP_results = evaluatingQuantModel_mobileBert( + model.eval().to(device), + tokenized_datasets, + device, + batch_size_training, + num_workers, + metric, + ) + + # Training a quantized model with QAT + print("\n\n==================================================") + print("==== Starting QAT(Quantization Aware Training)....") + quantizer = EnnQuantizer() + quantizer.setup_quant_params(quant_dtype, is_per_channel, is_qat) + batch_dim = torch.export.Dim("batch_size", min=1, max=batch_size_training) + + size_input_ids = (batch_size_training, inputs[0].size(1)) + size_attention_mask = (batch_size_training, inputs[1].size(1)) + vector_input_ids = torch.randint(0, 256, size_input_ids).to(device) + vector_attention_mask = torch.randint(0, 1, size_attention_mask).to(device) + example_inputs = ( + vector_input_ids, + vector_attention_mask, + ) + + exported_model = torch.export.export( + model.eval().to(device), + example_inputs, + dynamic_shapes={"input_ids": {0: batch_dim}, "attention_mask": {0: batch_dim}}, + ).module() + prepared_model = prepare_pt2e(exported_model, quantizer) + + prepared_model = trainingQuantModel_QAT( + prepared_model, + tokenized_datasets, + batch_size=batch_size_training, + workers=num_workers, + device=device, + num_epochs=num_epochs, + ) + + quantized_model = convert_pt2e(prepared_model) + + # Evaluating a quantized model with QAT + print("\nEvaluation a quantized model") + results = evaluatingQuantModel_mobileBert( + quantized_model.to(device), + tokenized_datasets, + device, + batch_size_training, + num_workers, + metric, + ) + + print("\n------------------------------------") + print(" FP32 Model, accuracy=", FP_results["accuracy"]) + print("Quantized Model, accuracy=", results["accuracy"]) + print( + " Accurarcy drop, accuracy=", + (results["accuracy"] / FP_results["accuracy"]) * 100, + "%", + ) + print("------------------------------------") + print("==== Model Evaluation complete! \n\n") + + # Saving a quantized model for GPU servers + size_input_ids = (batch_size_edge, inputs[0].size(1)) + size_attention_mask = (batch_size_edge, inputs[1].size(1)) + vector_input_ids = torch.randint(0, 256, size_input_ids).to(device) + vector_attention_mask = torch.randint(0, 1, size_attention_mask).to(device) + example_inputs = ( + vector_input_ids, + vector_attention_mask, + ) + + exported_model = torch.export.export(quantized_model, example_inputs) + torch.export.save(exported_model, qat_file_name) + print(f"QAT model for {device} is saved in ", qat_file_name) + + # Saving a quantized model for CPU servers + device_cpu = torch.device(type="cpu") + quantized_model = quantized_model.to(device_cpu) + quantized_model = removing_gpu_node_in_graph(quantized_model) + cpu_vector_input_ids = torch.randint(0, 256, size_input_ids).to(device_cpu) + cpu_vector_attention_mask = torch.randint(0, 1, size_attention_mask).to(device_cpu) + example_inputs_cpu = ( + cpu_vector_input_ids, + cpu_vector_attention_mask, + ) + + exported_model = torch.export.export(quantized_model, example_inputs_cpu) + torch.export.save(exported_model, qat_file_name_for_cpu) + print(f"QAT model for {device_cpu} is saved in ", qat_file_name_for_cpu) + + # Reloading a quantized model for GPU servers + exported_model = torch.export.load(qat_file_name) + print("==== QAT Training complete! \n\n") + + return exported_model.module() + + +def removing_gpu_node_in_graph(model): + graph = model.graph + for node in list(graph.nodes): + if node.target == torch.ops.aten._assert_tensor_metadata.default: + # remove torch.ops.aten._assert_tensor_metadata.default + node.replace_all_uses_with(node.args[0]) # bypass + graph.erase_node(node) + if node.target == torch.ops.aten.zeros.default: + # Change torch.ops.aten.zeros.default + node.kwargs = { + "dtype": torch.int64, + "device": torch.device("cpu"), + "pin_memory": False, + } + model.graph.eliminate_dead_code() + model.recompile() + # complete converting GPU target ops to CPU ones. + + return model + + +def main(args): + # ensure the working directory exist. + os.makedirs(args.artifact, exist_ok=True) + + # define the metric for the model evaluation + metric = evaluate.load("accuracy") + + # Fine tuning model with a csv dataset + mobilebert_finetune = MobileBertFinetune(metric, args) + model, tokenized_datasets = mobilebert_finetune.get_finetune_mobilebert( + args.artifact + ) + + # Setting for QAT training + batch_size_edge = 1 # The batch of the final graph for a target edge device is 1 + batch_size_training = args.batch_size + num_workers = args.num_workers # Num of dataset loaders + num_epochs = args.num_epochs_for_QAT # Num of epochs in QAT training + data_num = args.calibration_number # Num of dataset for quantization calibration + + # searching an avaiable device + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model.to(device) + + # making dataset for calibrating the model... + print("\n==== Loading calibration dataset for PTQ quantization....") + inputs, labels = get_dataset( + data_num, tokenized_datasets, batch_size_edge, num_workers, device + ) + + # running an example + example_ref_input_ids = inputs[0][0].to(device) + example_ref_attention_mask = inputs[0][1].to(device) + example_inputs = (example_ref_input_ids, example_ref_attention_mask) + float_out = model(*example_inputs) + + # QAT Training with a csv dataset + qat_file_path = os.path.join(args.artifact, "mobilebert_qat_model_csv.pt2") + qat_file_path_for_cpu = os.path.join( + args.artifact, "mobilebert_qat_model_csv_for_cpu.pt2" + ) + if args.qat and args.precision is not None: + model = build_aten_to_qat_mobilebert( + model.train(), + example_inputs, + quant_dtype=getattr(Precision, args.precision), + is_qat=True, + tokenized_datasets=tokenized_datasets, + batch_size_training=batch_size_training, + batch_size_edge=batch_size_edge, + num_epochs=num_epochs, + qat_file_name=qat_file_path, + qat_file_name_for_cpu=qat_file_path_for_cpu, + metric=metric, + device=device, + ) + quant_out = model(*example_inputs) + else: + # trying to load a pretrained QAT model + if device == torch.device(type="cpu"): + model_path = qat_file_path_for_cpu + else: + model_path = qat_file_path + + print(f"\n==== Loading a pretrained QAT model from '{model_path}'....") + try: + loaded_model = torch.export.load(model_path) + model = loaded_model.module().to(device) + except: + print(f"Error: the file '{model_path}' was not avaiable.") + + quant_out = model(*example_inputs) + + compile_specs = [gen_samsung_backend_compile_spec(args.chipset)] + edge = to_edge_transform_and_lower_to_enn( + model, example_inputs, compile_specs=compile_specs + ) + model_name = "mobilebert_exynos" + exec_prog = edge.to_executorch( + config=ExecutorchBackendConfig(extract_delegate_segments=True) + ) + save_pte_program(exec_prog, model_name, args.artifact) + + if args.dump: + # Expect example inputs are tuple, including input ids and attn mask + save_tensors(example_inputs, prefix="float_input", artifact_dir=args.artifact) + save_tensors(float_out, prefix="float_output", artifact_dir=args.artifact) + if args.precision: + save_tensors(quant_out, "quant_out", artifact_dir=args.artifact) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "-c", + "--chipset", + required=True, + help="Samsung chipset, i.e. E9955, etc", + type=str, + ) + parser.add_argument( + "-a", + "--artifact", + help="path for storing generated artifacts by this example.", + default="./mobilebert", + type=str, + ) + parser.add_argument( + "--csv_dataset", + default=None, + help=( + "path of a csv file " + "e.g. --csv_dataset ./mobilebert/title_conference.csv " + "If you don't assign a cvs file, a csv file is loaded automatically " + "from https://raw.githubusercontent.com/susanli2016/NLP-with-Python/master/data/title_conference.csv" + ), + type=str, + ) + parser.add_argument( + "-p", + "--precision", + default="A8W8", + help=("Quantizaiton precision. If not set, the model will not be quantized."), + choices=[None, "A8W8"], + type=str, + ) + parser.add_argument( + "-cn", + "--calibration_number", + default=100, + help=( + "Assign the number of data you want " + "to use for calibrating the quant params." + ), + type=int, + ) + parser.add_argument( + "--num-epochs-for-finetune", + default=12, + type=int, + help="# of epochs for finetune training", + ) + parser.add_argument( + "-m", + "--max-length", + default=256, + type=int, + help="The max length of input tokens", + ) + parser.add_argument( + "--batch-size", + default=32, + type=int, + help=( + "Batch size for finetuning and QAT training" + "The batch of the final graph for a target edge device is 1." + " It is independent on the setting of batch-size. " + ), + ) + parser.add_argument( + "--num-workers", + default=8, + type=int, + help="# of workers for DataLoader in QAT training", + ) + parser.add_argument( + "--qat", + default=False, + const=True, + nargs="?", + help=("Whether to train the model with QAT."), + type=bool, + ) + parser.add_argument( + "--num-epochs-for-QAT", + default=12, + type=int, + help=( + "# of epochs for QAT training" + ">1000 epochs is recommended to get proper accuracy" + " with a GPU server." + ), + ) + parser.add_argument( + "--dump", + default=False, + action="store_true", + help=("Whether to dump all outputs. If not set, we only dump pte."), + ) + args = parser.parse_args() + main(args) From b2dc6426c492029f877786b94e125342288ac278 Mon Sep 17 00:00:00 2001 From: "jiseong.oh" Date: Thu, 2 Apr 2026 11:15:38 +0000 Subject: [PATCH 5/6] Add framework for quantization tc - Add Quantization TestCases Co-authored-by: chen03.zhao@samsung.com Signed-off-by: jiseong.oh --- .../samsung/test/models/test_deeplab_v3.py | 30 ++++++++++++++- backends/samsung/test/models/test_edsr.py | 26 ++++++++++++- .../samsung/test/models/test_inception_v3.py | 27 ++++++++++++- .../samsung/test/models/test_inception_v4.py | 38 ++++++++++++++++++- .../test/models/test_mobilebert_finetuning.py | 2 +- .../samsung/test/models/test_mobilenet_v2.py | 27 ++++++++++++- .../samsung/test/models/test_mobilenet_v3.py | 28 +++++++++++++- backends/samsung/test/models/test_resnet18.py | 27 ++++++++++++- backends/samsung/test/models/test_resnet50.py | 27 ++++++++++++- .../test/models/test_torchvision_vit.py | 27 ++++++++++++- .../samsung/test/models/test_wav2letter.py | 34 ++++++++++++++++- .../samsung/test/tester/samsung_tester.py | 19 +++++++++- backends/samsung/test/utils/utils.py | 33 +++++++++++++++- 13 files changed, 331 insertions(+), 14 deletions(-) diff --git a/backends/samsung/test/models/test_deeplab_v3.py b/backends/samsung/test/models/test_deeplab_v3.py index cd6a6527980..634cf69911b 100644 --- a/backends/samsung/test/models/test_deeplab_v3.py +++ b/backends/samsung/test/models/test_deeplab_v3.py @@ -1,15 +1,20 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file # except in compliance with the License. See the license file in the root # directory of this source tree for more details. +import os import unittest from executorch.backends.samsung.serialization.compile_options import ( gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.datasets import ( + get_quant_test_data_segmentation, +) +from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.deeplab_v3 import DeepLabV3ResNet50Model @@ -27,3 +32,26 @@ def test_dl3_fp16(self): .to_executorch() .run_method_and_compare_outputs(inputs=example_input, atol=0.009) ) + + def test_dl3_a8w8(self): + model = DeepLabV3ResNet50Model().get_eager_model() + example_input, cali, testdata = get_quant_test_data_segmentation( + os.path.join(os.environ["DATASET_PATH"], "VOC_image") + ) + checker_config = CheckerConfig( + "segmentation", + { + "dataset": testdata, + "threshold": 0.7, + }, + ) + tester = SamsungTester( + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + ( + tester.quantize(cali_dataset=cali, checker_config=checker_config) + .export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1) + ) diff --git a/backends/samsung/test/models/test_edsr.py b/backends/samsung/test/models/test_edsr.py index e69d5cc459c..77f6223b9d0 100644 --- a/backends/samsung/test/models/test_edsr.py +++ b/backends/samsung/test/models/test_edsr.py @@ -1,4 +1,4 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file @@ -6,12 +6,17 @@ # directory of this source tree for more details. +import os import unittest from executorch.backends.samsung.serialization.compile_options import ( gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.datasets import ( + get_quant_test_data_super_resolution, +) +from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.edsr import EdsrModel @@ -29,3 +34,22 @@ def test_edsr_fp16(self): .to_executorch() .run_method_and_compare_outputs(inputs=example_input, atol=0.02) ) + + def test_edsr_a8w8(self): + example_input, cali, testdata = get_quant_test_data_super_resolution( + os.path.join(os.environ["DATASET_PATH"]), "B100" + ) + model = EdsrModel().get_eager_model() + checker_config = CheckerConfig( + "super_resolution", {"dataset": testdata, "threshold": 0.7} + ) + tester = SamsungTester( + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + ( + tester.quantize(cali_dataset=cali, checker_config=checker_config) + .export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1) + ) diff --git a/backends/samsung/test/models/test_inception_v3.py b/backends/samsung/test/models/test_inception_v3.py index faeea4ab4a1..ce1dba86c7b 100644 --- a/backends/samsung/test/models/test_inception_v3.py +++ b/backends/samsung/test/models/test_inception_v3.py @@ -1,4 +1,4 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file @@ -6,6 +6,7 @@ # directory of this source tree for more details. +import os import unittest import torch @@ -14,6 +15,8 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify +from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.inception_v3 import InceptionV3Model @@ -32,3 +35,25 @@ def test_inception_v3_fp16(self): .to_executorch() .run_method_and_compare_outputs(inputs=example_input, atol=0.02, rtol=0.02) ) + + def test_inception_v3_a8w8(self): + example_input, cali, testdata = get_quant_test_data_classify( + os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset") + ) + checker_config = CheckerConfig( + "classifier", + { + "dataset": testdata, + }, + ) + model = InceptionV3Model().get_eager_model() + tester = SamsungTester( + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + ( + tester.quantize(cali_dataset=cali, checker_config=checker_config) + .export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1) + ) diff --git a/backends/samsung/test/models/test_inception_v4.py b/backends/samsung/test/models/test_inception_v4.py index 2998fd894db..ad1def29a5f 100644 --- a/backends/samsung/test/models/test_inception_v4.py +++ b/backends/samsung/test/models/test_inception_v4.py @@ -1,4 +1,4 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file @@ -13,8 +13,11 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify +from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.inception_v4 import InceptionV4Model +from torchvision import transforms def patch_iv4(weight_path: str): @@ -66,3 +69,36 @@ def test_inception_v4_fp16(self): .to_executorch() .run_method_and_compare_outputs(inputs=example_input, atol=0.02, rtol=0.02) ) + + def test_inception_v4_a8w8(self): + transform_compose = transforms.Compose( + [ + transforms.Resize((299, 299)), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + transforms.Lambda(lambda x: x.unsqueeze(0)), # Add batch dim + ] + ) + example_input, cali, testdata = get_quant_test_data_classify( + os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset"), + transform_compose=transform_compose, + ) + checker_config = CheckerConfig( + "classifier", + { + "dataset": testdata, + }, + ) + model = InceptionV4Model().get_eager_model() + tester = SamsungTester( + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + ( + tester.quantize(cali_dataset=cali, checker_config=checker_config) + .export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1) + ) diff --git a/backends/samsung/test/models/test_mobilebert_finetuning.py b/backends/samsung/test/models/test_mobilebert_finetuning.py index 92b52e38c9e..22666aa0e65 100644 --- a/backends/samsung/test/models/test_mobilebert_finetuning.py +++ b/backends/samsung/test/models/test_mobilebert_finetuning.py @@ -1,4 +1,4 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file diff --git a/backends/samsung/test/models/test_mobilenet_v2.py b/backends/samsung/test/models/test_mobilenet_v2.py index 51512be57ee..c9a39e84874 100644 --- a/backends/samsung/test/models/test_mobilenet_v2.py +++ b/backends/samsung/test/models/test_mobilenet_v2.py @@ -1,15 +1,18 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file # except in compliance with the License. See the license file in the root # directory of this source tree for more details. +import os import unittest from executorch.backends.samsung.serialization.compile_options import ( gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify +from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.mobilenet_v2 import MV2Model @@ -27,3 +30,25 @@ def test_mv2_fp16(self): .to_executorch() .run_method_and_compare_outputs(inputs=example_input, atol=0.02) ) + + def test_mv2_a8w8(self): + example_input, cali, testdata = get_quant_test_data_classify( + os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset") + ) + checker_config = CheckerConfig( + "classifier", + { + "dataset": testdata, + }, + ) + model = MV2Model().get_eager_model() + tester = SamsungTester( + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + ( + tester.quantize(cali_dataset=cali, checker_config=checker_config) + .export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1) + ) diff --git a/backends/samsung/test/models/test_mobilenet_v3.py b/backends/samsung/test/models/test_mobilenet_v3.py index fbfc4716b73..81fe7f1a287 100644 --- a/backends/samsung/test/models/test_mobilenet_v3.py +++ b/backends/samsung/test/models/test_mobilenet_v3.py @@ -1,4 +1,4 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file @@ -6,6 +6,7 @@ # directory of this source tree for more details. +import os import unittest import torch @@ -14,6 +15,8 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify +from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.mobilenet_v3 import MV3Model @@ -32,3 +35,26 @@ def test_mv3_fp16(self): .to_executorch() .run_method_and_compare_outputs(inputs=example_input, atol=0.07, rtol=0.07) ) + + def test_mv3_a8w8(self): + example_input, cali, testdata = get_quant_test_data_classify( + os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset") + ) + checker_config = CheckerConfig( + "classifier", + { + "dataset": testdata, + "topktol": {1: 0.0, 2: 0.0}, + }, + ) + model = MV3Model().get_eager_model() + tester = SamsungTester( + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + ( + tester.quantize(cali_dataset=cali, checker_config=checker_config) + .export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(inputs=example_input, atol=3, rtol=3) + ) diff --git a/backends/samsung/test/models/test_resnet18.py b/backends/samsung/test/models/test_resnet18.py index b2d14d42303..59e68231a0d 100644 --- a/backends/samsung/test/models/test_resnet18.py +++ b/backends/samsung/test/models/test_resnet18.py @@ -1,4 +1,4 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file @@ -6,12 +6,15 @@ # directory of this source tree for more details. +import os import unittest from executorch.backends.samsung.serialization.compile_options import ( gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify +from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.resnet import ResNet18Model @@ -29,3 +32,25 @@ def test_resnet18_fp16(self): .to_executorch() .run_method_and_compare_outputs(inputs=example_input, atol=0.02, rtol=0.02) ) + + def test_resnet18_a8w8(self): + example_input, cali, testdata = get_quant_test_data_classify( + os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset") + ) + checker_config = CheckerConfig( + "classifier", + { + "dataset": testdata, + }, + ) + model = ResNet18Model().get_eager_model() + tester = SamsungTester( + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + ( + tester.quantize(cali_dataset=cali, checker_config=checker_config) + .export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1) + ) diff --git a/backends/samsung/test/models/test_resnet50.py b/backends/samsung/test/models/test_resnet50.py index 00d33fe79ea..88925c742c1 100644 --- a/backends/samsung/test/models/test_resnet50.py +++ b/backends/samsung/test/models/test_resnet50.py @@ -1,4 +1,4 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file @@ -6,12 +6,15 @@ # directory of this source tree for more details. +import os import unittest from executorch.backends.samsung.serialization.compile_options import ( gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify +from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.resnet import ResNet50Model @@ -29,3 +32,25 @@ def test_resnet50_fp16(self): .to_executorch() .run_method_and_compare_outputs(inputs=example_input, atol=0.02, rtol=0.02) ) + + def test_resnet50_a8w8(self): + example_input, cali, testdata = get_quant_test_data_classify( + os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset") + ) + checker_config = CheckerConfig( + "classifier", + { + "dataset": testdata, + }, + ) + model = ResNet50Model().get_eager_model() + tester = SamsungTester( + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + ( + tester.quantize(cali_dataset=cali, checker_config=checker_config) + .export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(inputs=example_input, atol=1, rtol=1) + ) diff --git a/backends/samsung/test/models/test_torchvision_vit.py b/backends/samsung/test/models/test_torchvision_vit.py index bab146d9979..4239755c526 100644 --- a/backends/samsung/test/models/test_torchvision_vit.py +++ b/backends/samsung/test/models/test_torchvision_vit.py @@ -1,9 +1,10 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file # except in compliance with the License. See the license file in the root # directory of this source tree for more details. +import os import unittest import torch @@ -11,6 +12,8 @@ gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_classify +from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.torchvision_vit import TorchVisionViTModel @@ -32,3 +35,25 @@ def test_torchvision_vit_fp16(self): inputs=example_input, atol=0.005, rtol=0.005 ) ) + + def test_torchvision_vit_a8w8(self): + example_input, cali, testdata = get_quant_test_data_classify( + os.path.join(os.environ["DATASET_PATH"], "imagenet_ptq_subset") + ) + checker_config = CheckerConfig( + "classifier", + { + "dataset": testdata, + }, + ) + model = TorchVisionViTModel().get_eager_model() + tester = SamsungTester( + model, example_input, [gen_samsung_backend_compile_spec(TestConfig.chipset)] + ) + ( + tester.quantize(cali_dataset=cali, checker_config=checker_config) + .export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(inputs=example_input, atol=2, rtol=2) + ) diff --git a/backends/samsung/test/models/test_wav2letter.py b/backends/samsung/test/models/test_wav2letter.py index 569e3decfec..6daccabd81e 100644 --- a/backends/samsung/test/models/test_wav2letter.py +++ b/backends/samsung/test/models/test_wav2letter.py @@ -1,15 +1,19 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file # except in compliance with the License. See the license file in the root # directory of this source tree for more details. +import os import unittest +import torch from executorch.backends.samsung.serialization.compile_options import ( gen_samsung_backend_compile_spec, ) from executorch.backends.samsung.test.tester import SamsungTester +from executorch.backends.samsung.test.utils.datasets import get_quant_test_data_voice +from executorch.backends.samsung.test.utils.quant_checkers import CheckerConfig from executorch.backends.samsung.test.utils.utils import TestConfig from executorch.examples.models.wav2letter import Wav2LetterModel @@ -27,3 +31,31 @@ def test_w2l_fp16(self): .to_executorch() .run_method_and_compare_outputs(inputs=example_input, atol=0.009) ) + + def test_w2l_quant(self): + factory = Wav2LetterModel() + factory.vocab_size = 29 + assert (model_cache_dir := os.getenv("MODEL_CACHE")), "MODEL_CACHE not set!" + weight_path = os.path.join(model_cache_dir, "w2l/states_fused.pth") + state_dict = torch.load(weight_path, weights_only=False) + model = factory.get_eager_model() + model.load_state_dict(state_dict) + example_input, calib_data, quant_test_data = get_quant_test_data_voice( + os.path.join(os.environ["DATASET_PATH"], "w2l/wav2letter") + ) + labels = [" ", *"abcdefghijklmnopqrstuvwxyz", "'", "*"] + checker_config = CheckerConfig( + "wave2letter", {"dataset": quant_test_data, "labels": labels} + ) + ( + SamsungTester( + model, + example_input, + [gen_samsung_backend_compile_spec(TestConfig.chipset)], + ) + .quantize(cali_dataset=calib_data, checker_config=checker_config) + .export() + .to_edge_transform_and_lower() + .to_executorch() + .run_method_and_compare_outputs(atol=1.0, rtol=1.0) + ) diff --git a/backends/samsung/test/tester/samsung_tester.py b/backends/samsung/test/tester/samsung_tester.py index a6eb170a61b..258aef191d0 100644 --- a/backends/samsung/test/tester/samsung_tester.py +++ b/backends/samsung/test/tester/samsung_tester.py @@ -12,6 +12,7 @@ from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner from executorch.backends.samsung.quantizer.quantizer import EnnQuantizer, Precision from executorch.backends.samsung.test.utils import RuntimeExecutor +from executorch.backends.samsung.test.utils.quant_checkers import get_checker from executorch.backends.samsung.utils.export_utils import get_edge_compile_config from executorch.backends.test.harness import Tester as TesterBase from executorch.backends.test.harness.stages import StageType @@ -45,6 +46,7 @@ def __init__( calibrate: bool = True, calibration_samples: Optional[Sequence[Any]] = None, is_qat: Optional[bool] = False, + checker_config=None, ): super().__init__( quantizer=quantizer, @@ -53,6 +55,7 @@ def __init__( calibration_samples=calibration_samples, is_qat=is_qat, ) + self.checker_config = checker_config def run( self, artifact: torch.nn.Module, inputs: Optional[Tuple[torch.Tensor]] @@ -82,6 +85,9 @@ def run( converted = convert_pt2e(prepared, fold_quantize=False) self.converted_graph = converted + if self.checker_config: + checker = get_checker(artifact, converted, self.checker_config) + checker.check() class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower): @@ -141,11 +147,20 @@ def __init__( self.example_inputs = example_inputs self.compile_specs = compile_specs - def quantize(self, quantize_stage: Optional[Quantize] = None): + def quantize( + self, + quantize_stage: Optional[Quantize] = None, + cali_dataset=None, + checker_config=None, + ): if quantize_stage is None: quantizer = EnnQuantizer() quantizer.setup_quant_params(Precision.A8W8) - quantize_stage = Quantize(quantizer) + quantize_stage = Quantize( + quantizer, + calibration_samples=cali_dataset, + checker_config=checker_config, + ) return super().quantize(quantize_stage) diff --git a/backends/samsung/test/utils/utils.py b/backends/samsung/test/utils/utils.py index 4385245daf9..936e2727c96 100644 --- a/backends/samsung/test/utils/utils.py +++ b/backends/samsung/test/utils/utils.py @@ -1,11 +1,42 @@ -# Copyright (c) Samsung Electronics Co. LTD +# Copyright (c) 2025 Samsung Electronics Co. LTD # All rights reserved # # Licensed under the BSD License (the "License"); you may not use this file # except in compliance with the License. See the license file in the root # directory of this source tree for more details. +import torch + class TestConfig: host_ip: str = "111.111.111.111" chipset: str = "E9965" + + +class GreedyLM: + def __init__(self, vocab, blank_label="*"): + self.vocab = vocab + self.char_to_id = {c: i for i, c in enumerate(vocab)} + self.blank_label = blank_label + + def encode(self, text): + return [self.char_to_id[c] for c in text.lower()] + + def decode_ids(self, ids): + if ids.ndim == 2: # batch|steps + return [self.decode_ids(t) for t in ids] + + decoded_text = "".join([self.vocab[id] for id in ids]) + + return decoded_text + + def decode_ctc(self, emissions): + if emissions.ndim == 3: # batch|labels|steps + return [self.decode_ctc(t) for t in emissions] + + amax_ids = emissions.argmax(0) + amax_ids_collapsed = torch.unique_consecutive(amax_ids) + decoded_text = "".join([self.vocab[id] for id in amax_ids_collapsed]) + decoded_text = decoded_text.replace(self.blank_label, "") + + return decoded_text From a96dd33ee39b704ef8e45a013457f31ec1882fd1 Mon Sep 17 00:00:00 2001 From: "jiseong.oh" Date: Mon, 6 Apr 2026 12:01:24 +0000 Subject: [PATCH 6/6] update Litecore version to 1.1.0 Signed-off-by: jiseong.oh --- .ci/scripts/setup-samsung-linux-deps.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/scripts/setup-samsung-linux-deps.sh b/.ci/scripts/setup-samsung-linux-deps.sh index 9aa9c4380a5..e502baa87e5 100644 --- a/.ci/scripts/setup-samsung-linux-deps.sh +++ b/.ci/scripts/setup-samsung-linux-deps.sh @@ -29,7 +29,7 @@ while [[ $# -gt 0 ]]; do esac done -LITECORE_VERSION="v1.0" +LITECORE_VERSION="v1.1" LITECORE_FILE_NAME="ai-litecore-ubuntu2204-${LITECORE_VERSION}.tar.gz" DEVICEFARM_CLI_VERSION="beta-v1.1.0" DEVICEFARM_FILE_NAME="devicefarmcli-${DEVICEFARM_CLI_VERSION}.zip"