pytorch
diff --git a/‎backends/samsung/_passes/annotate_qparams.py‎
Lines changed: 37 additions & 2 deletions b/‎backends/samsung/_passes/annotate_qparams.py‎
Lines changed: 37 additions & 2 deletions
diff --git a/‎backends/samsung/_passes/annotate_scalar_parameters.py‎
Lines changed: 30 additions & 20 deletions b/‎backends/samsung/_passes/annotate_scalar_parameters.py‎
Lines changed: 30 additions & 20 deletions
diff --git a/‎backends/samsung/_passes/fuse_conv_act.py‎
Lines changed: 0 additions & 77 deletions b/‎backends/samsung/_passes/fuse_conv_act.py‎
Lines changed: 0 additions & 77 deletions
diff --git a/‎backends/samsung/_passes/insert_qdq.py‎
Lines changed: 9 additions & 0 deletions b/‎backends/samsung/_passes/insert_qdq.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎backends/samsung/builders/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/samsung/builders/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/samsung/builders/op_constant_pad_nd.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/samsung/builders/op_constant_pad_nd.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/samsung/builders/op_embedding.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/samsung/builders/op_embedding.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/samsung/builders/op_slice_copy.py‎
Lines changed: 6 additions & 2 deletions b/‎backends/samsung/builders/op_slice_copy.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎backends/samsung/builders/op_sub.py‎
Lines changed: 5 additions & 1 deletion b/‎backends/samsung/builders/op_sub.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/samsung/enn_preprocess.py‎
Lines changed: 14 additions & 1 deletion b/‎backends/samsung/enn_preprocess.py‎
Lines changed: 14 additions & 1 deletion
@@ -14,6 +14,7 @@
 from torch._export.utils import get_buffer
 from torch.export import ExportedProgram
 from torch.fx import GraphModule, Node
+from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
 
 
 class AnnotateQparamsPass(ExportPass):
@@ -148,13 +149,34 @@ def _check_same(requant_obj, ori_obj) -> bool:
                 _check_same(ori_quant_attrs[key], requantize_attrs[key])
                 for key in key_map.values()
             ):
-                requantize_map[idx] = requantize_attrs
+                if (
+                    ori_quant_attrs[QuantConstants.QUANT_KEY.quant_dtype]
+                    != requantize_attrs[QuantConstants.QUANT_KEY.quant_dtype]
+                ):
+                    # For Q-DQ who will change quant dtype, we will insert requantization node
+                    requantize_map[idx] = requantize_attrs
+                else:
+                    node.meta["quantize_attrs"] = requantize_attrs
 
     def _annotate(self, graph_module: GraphModule):
         for node in graph_module.graph.nodes:
+            if key_map := QuantConstants.DEQUANT_OPS_KEY_MAP.get(node.target, None):
+                # We will fold node with constant output in the future pass as a constant node
+                # example: Constant->Q->DQ->nodeN->Q->DQ, this seq will be folded to one
+                # We need to store the q-params from last DQ params for quantizing constant value
+                quant_attrs = self.get_quant_attrs(node, key_map)
+                if node.args[0].target in QuantConstants.QUANT_OPS_KEY_MAP:
+                    node.meta["quantize_attrs"] = quant_attrs
+                else:
+                    node.args[0].meta["quantize_attrs"] = quant_attrs
+                continue
             key_map = QuantConstants.QUANT_OPS_KEY_MAP.get(node.target, None)
             if not key_map:
                 continue
+            quant_attrs = self.get_quant_attrs(node, key_map)
+            if node.args[0].target in QuantConstants.QUANT_OPS_KEY_MAP:
+                node.meta["quantize_attrs"] = quant_attrs
+                continue
             source_node = node.args[0]
             if source_node.target in (
                 *QuantConstants.QUANT_OPS_KEY_MAP,
@@ -164,13 +186,26 @@ def _annotate(self, graph_module: GraphModule):
                 continue
             elif source_node.target == operator.getitem:
                 source_node = source_node.args[0]
-            quant_attrs = self.get_quant_attrs(node, key_map)
+
             source_node.meta["quantize_attrs"] = quant_attrs
             self._annotate_requantize(source_node)
             self._propagate_quant_params(source_node)
 
+    def _annotate_decomposed_mm(self, graph_module: GraphModule):
+        for source_list in get_source_partitions(graph_module.graph, ["matmul"]).get(
+            "matmul", {}
+        ):
+            final_view = source_list.output_nodes[0]
+            if not (quantize_attrs := final_view.meta.get("quantize_attrs")):
+                continue
+            for node in source_list.nodes:
+                if node.target == exir_ops.edge.aten.bmm.default:
+                    node.meta["quantize_attrs"] = quantize_attrs
+                    break
+
     def call(self, graph_module: GraphModule):
         self._annotate(graph_module)
+        self._annotate_decomposed_mm(graph_module)
         graph_module.recompile()
         return PassResult(graph_module, True)
 
 
@@ -5,7 +5,6 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
-from executorch.backends.samsung.quantizer.quantizer import global_quant_info
 from executorch.backends.samsung.utils.constants import QuantConstants
 from executorch.backends.transforms.utils import get_param_tensor, is_param_node
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -25,6 +24,7 @@ class AnnotateScalarParametersPass(ExportPass):
         exir_ops.edge.aten.mul.Tensor,
         exir_ops.edge.aten.add.Tensor,
         exir_ops.edge.aten.div.Tensor,
+        exir_ops.edge.aten.sub.Tensor,
     }
 
     def __init__(self, edge_program: ExportedProgram):
@@ -35,27 +35,37 @@ def annotate(self, graph_module: torch.fx.GraphModule):
         for node in graph_module.graph.nodes:
             if node.target not in self.TARGET_OPS or "quantize_attrs" not in node.meta:
                 continue
-            torch_quant_dtype = global_quant_info.weight_precison.torch_dtype
-            for input_arg in node.all_input_nodes:
-                if input_arg.op not in ("placeholder", "get_attr") or not is_param_node(
-                    self.edge_program, input_arg
+            input0, input1 = node.all_input_nodes[0], node.all_input_nodes[1]
+            if input0.op not in ("placeholder", "get_attr") or not is_param_node(
+                self.edge_program, input0
+            ):
+                if input1.op not in ("placeholder", "get_attr") or not is_param_node(
+                    self.edge_program, input1
                 ):
                     continue
-                else:
-                    tensor = get_param_tensor(self.edge_program, input_arg)
-                    if not tensor.shape:
-                        qparams = {
-                            QuantConstants.QUANT_KEY.scale: float(tensor),
-                            QuantConstants.QUANT_KEY.quant_dtype: torch_quant_dtype,
-                            QuantConstants.QUANT_KEY.quant_max: torch.iinfo(
-                                torch_quant_dtype
-                            ).max,
-                            QuantConstants.QUANT_KEY.quant_min: torch.iinfo(
-                                torch_quant_dtype
-                            ).min,
-                            QuantConstants.QUANT_KEY.zero_point: 0,
-                        }
-                        input_arg.meta["quantize_attrs"] = qparams
+                ifm_node, param_tensor_node = input0, input1
+            else:
+                ifm_node, param_tensor_node = input1, input0
+            if not (quantize_attrs := ifm_node.meta.get("quantize_attrs")):
+                continue
+            param_tensor = get_param_tensor(self.edge_program, param_tensor_node)
+            if not param_tensor.shape:
+                scale = (
+                    float(param_tensor) if param_tensor > 0 else -float(param_tensor)
+                )
+            else:
+                continue
+            q_dtype = quantize_attrs[QuantConstants.QUANT_KEY.quant_dtype]
+            if scale == 0:
+                scale = 1.0
+            qparams = {
+                QuantConstants.QUANT_KEY.scale: scale,
+                QuantConstants.QUANT_KEY.quant_dtype: q_dtype,
+                QuantConstants.QUANT_KEY.quant_max: torch.iinfo(q_dtype).max,
+                QuantConstants.QUANT_KEY.quant_min: torch.iinfo(q_dtype).min,
+                QuantConstants.QUANT_KEY.zero_point: 0,
+            }
+            param_tensor_node.meta["quantize_attrs"] = qparams
 
     def call(self, graph_module: torch.fx.GraphModule):
         graph = graph_module.graph
 
@@ -156,9 +156,18 @@ def _add_qdq(self, graph_module: GraphModule):
             elif is_graph_output(node):
                 self._add_dq_after(graph_module, node)
 
+    def _add_q_for_cast(self, graph_module: GraphModule):
+        for node in list(graph_module.graph.nodes):
+            if not node.target == exir_ops.edge.aten._to_copy.default:
+                continue
+            if "quantize_attrs" not in node.meta:
+                continue
+            self._add_q_after(graph_module, node)
+
     def call(self, graph_module: GraphModule):
         self._add_qdq(graph_module)
         self._add_qdq_for_requantize(graph_module)
+        self._add_q_for_cast(graph_module)
         graph_module.graph.eliminate_dead_code()
         graph_module.recompile()
         return PassResult(graph_module, True)
@@ -34,6 +34,7 @@
     op_mul,
     op_permute,
     op_pixel_shuffle,
+    op_placeholder,
     op_quantize,
     op_relu,
     op_reshape,
@@ -80,6 +81,7 @@
     op_mul,
     op_permute,
     op_pixel_shuffle,
+    op_placeholder,
     op_quantize,
     op_relu,
     op_reshape,
 
@@ -52,5 +52,5 @@ def define_node(
             "padding": "EXPLICIT",
             "padding_type": "CONSTANT",
         }
-
+        self._update_params_qdtype(node, params)
         enn_graph.define_op(node.name, "PAD", [input_id], [output_id], params)
@@ -36,6 +36,7 @@ def define_node(
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
 
         params = {"axis": 0, "input_type": "indices"}
+        self._update_params_qdtype(node, params)
         enn_graph.define_op(
             node.name, "GATHER", [input_id, weight_id], [output_id], params
         )
@@ -38,10 +38,14 @@ def define_node(
         dim = cast(int, node.args[1])
         if dim < 0:
             dim = dim + len(in_shape)
-        start_val = cast(int, node.args[2])
+        start_val = cast(int, node.args[2]) if node.args[2] else 0
         if start_val < 0:
             start_val = start_val + in_shape[dim]
-        end_val = min(cast(int, node.args[3]), in_shape[dim])
+        end_val = (
+            in_shape[dim]
+            if len(node.args) < 4
+            else min(cast(int, node.args[3]), in_shape[dim])
+        )
         if end_val < 0:
             end_val = end_val + in_shape[dim]
 
 
@@ -36,4 +36,8 @@ def define_node(
         # output
         output_id = self.define_tensor(node, enn_graph, vals_to_ids)
 
-        enn_graph.define_op(node.name, "SUB", [input_id_1, input_id_2], [output_id])
+        params = {}
+        self._update_params_qdtype(node, params)
+        enn_graph.define_op(
+            node.name, "SUB", [input_id_1, input_id_2], [output_id], params
+        )
@@ -18,8 +18,13 @@
     ConstantPropPass,
 )
 from executorch.backends.samsung._passes.fold_qdq import FoldQDQPass
+from executorch.backends.samsung._passes.fuse_activation import FuseActivationPass
 from executorch.backends.samsung._passes.insert_qdq import InsertQDQPass
+from executorch.backends.samsung._passes.remove_useless_ops import RemoveUselessOpPass
 from executorch.backends.samsung._passes.replace_scalar_ops import ReplaceOpsWithScalar
+from executorch.backends.samsung._passes.transform_quantized_mask import (
+    TransformQuantizedMaskPass,
+)
 from executorch.backends.samsung.builders.node_visitor import get_node_visitors
 from executorch.backends.samsung.serialization.compile_options import (
     ENN_COMPILE_OPTION_TITLE,
@@ -30,6 +35,7 @@
 from executorch.backends.transforms.fuse_batch_norm_with_conv import (
     FuseBatchNormWithConvPass,
 )
+from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform
 
 from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass
 
@@ -59,9 +65,13 @@ def preprocess(
 
         enn_preprocess_passes = PassManager(
             passes=[
+                RemoveUselessOpPass(),
+                RemoveCloneOpsTransform(),
                 AnnotateQparamsPass(edge_program),
+                FuseActivationPass(),
                 FoldQDQPass(),
                 ConstantPropPass(edge_program),
+                TransformQuantizedMaskPass(edge_program),
                 Conv1dToConv2d(edge_program),
                 FuseBatchNormWithConvPass(edge_program),
                 AddmmToLinearTransform(),
@@ -79,6 +89,7 @@ def preprocess(
         node_visitors = get_node_visitors(edge_program)
 
         vals_to_ids: Dict[torch.fx.Node, int] = {}
+        placeholder_vistor = node_visitors["placeholder"]
         for node in pass_result.graph_module.graph.nodes:
             if node.op == "call_function":
                 logging.info(f"Visiting: {node}, {node.target.__name__}")
@@ -90,9 +101,11 @@ def preprocess(
                     raise RuntimeError(
                         f"{node.target.__name__}" " is not supported in ENN Delegate"
                     )
+            elif node.op == "placeholder":
+                logging.info(f"Visiting input of graph: {node}")
+                placeholder_vistor.define_node(node, enn_graph, vals_to_ids)
             elif node.op in [
                 "get_attr",
-                "placeholder",
                 "output",
             ]:
                 continue
Original file line number	Diff line number	Diff line change
`@@ -52,5 +52,5 @@ def define_node(`
`52`	`52`	`"padding": "EXPLICIT",`
`53`	`53`	`"padding_type": "CONSTANT",`
`54`	`54`	`}`
`55`		`-`
	`55`	`+ self._update_params_qdtype(node, params)`
`56`	`56`	`enn_graph.define_op(node.name, "PAD", [input_id], [output_id], params)`
Original file line number	Diff line number	Diff line change
`@@ -36,6 +36,7 @@ def define_node(`
`36`	`36`	`output_id = self.define_tensor(node, enn_graph, vals_to_ids)`
`37`	`37`
`38`	`38`	`params = {"axis": 0, "input_type": "indices"}`
	`39`	`+ self._update_params_qdtype(node, params)`
`39`	`40`	`enn_graph.define_op(`
`40`	`41`	`node.name, "GATHER", [input_id, weight_id], [output_id], params`
`41`	`42`	`)`