pytorch
diff --git a/‎backends/cadence/aot/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/aot/quantizer/fusion_pass.py‎
Lines changed: 30 additions & 147 deletions b/‎backends/cadence/aot/quantizer/fusion_pass.py‎
Lines changed: 30 additions & 147 deletions
diff --git a/‎backends/cadence/aot/quantizer/patterns.py‎
Lines changed: 12 additions & 11 deletions b/‎backends/cadence/aot/quantizer/patterns.py‎
Lines changed: 12 additions & 11 deletions
@@ -425,6 +425,7 @@ python_unittest(
         "//executorch/exir:pass_base",
         "//executorch/exir/dialects:lib",
         "//executorch/exir/passes:lib",
+        ":ref_implementations",
     ],
 )
 
 
@@ -65,33 +65,18 @@ def get_args_and_kwargs_add(
     dequants_inputs: List[fx.Node],
     quant_node: fx.Node,
 ) -> Tuple[Tuple[ArgsType, ...], Dict[str, ArgsType]]:
-    X_scale_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[0].args[1]),
-        {"dtype": torch.float},
-    )
-    X_zero_point_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[0].args[2]),
-        {"dtype": torch.int32},
-    )
-    Y_scale_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[1].args[1]),
-        {"dtype": torch.float},
-    )
-    Y_zero_point_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[1].args[2]),
-        {"dtype": torch.int32},
-    )
+    X_scale = dequants_inputs[0].args[1]
+
+    X_zero_point = dequants_inputs[0].args[2]
+    Y_scale = dequants_inputs[1].args[1]
+    Y_zero_point = dequants_inputs[1].args[2]
     args = (
         inputs_inputs[0],
-        X_scale_,
-        X_zero_point_,
+        X_scale,
+        X_zero_point,
         inputs_inputs[1],
-        Y_scale_,
-        Y_zero_point_,
+        Y_scale,
+        Y_zero_point,
         quant_node.args[1],
         quant_node.args[2],
     )
@@ -129,31 +114,12 @@ def get_args_and_kwargs_linear(
     else:
         bias = bias_inputs[0]
 
-    # Create single element tensors for weight_zero_point, out_multiplier, out_shift.
-    # Note that the function expects int32_t, when it would default to int64_t, so
-    # we explicitly require that type.
-    weight_zero_point_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_weights[0].args[2]),
-        {"dtype": torch.int32},
-    )
-    out_multiplier_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_multiplier[0].item()),
-        {"dtype": torch.int32},
-    )
-    out_shift_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_shift[0].item()),
-        {"dtype": torch.int32},
-    )
-
     args = tuple(inputs_inputs + weights_inputs + [bias])
     kwargs = {
         "src_zero_point": dequants_inputs[0].args[2],
-        "weight_zero_point": weight_zero_point_,
-        "out_multiplier": out_multiplier_,
-        "out_shift": out_shift_,
+        "weight_zero_point": dequants_weights[0].args[2],
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
         "out_zero_point": quant_node.args[2],
         "offset": None,
     }
@@ -178,22 +144,8 @@ def get_args_and_kwargs_layer_norm(
     ), "per-channel quantization is not supported for layer norm, both scale and zero_point should be scalars"
 
     # Make the scale and zero_point tensors
-    scale_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            dequants_inputs[0].args[1],
-        ),
-        {"dtype": torch.float32},
-    )
-    zero_point_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            dequants_inputs[0].args[2],
-        ),
-        {"dtype": torch.int32},
-    )
+    scale = dequants_inputs[0].args[1]
+    zero_point = dequants_inputs[0].args[2]
 
     weight = other_inputs[1] if len(other_inputs) > 1 else None
 
@@ -220,7 +172,7 @@ def get_args_and_kwargs_layer_norm(
         )
 
     # Make the args and kwargs for the replacement op
-    args = tuple(inputs_inputs + [scale_tensor] + [zero_point_tensor])
+    args = tuple(inputs_inputs + [scale, zero_point])
     kwargs = {
         "normalized_shape": other_inputs[0],
         "weight": weight,
@@ -308,31 +260,6 @@ def get_args_and_kwargs_conv(
 
     (out_multiplier, out_shift) = quantize_tensor_multiplier(requantize_scale_t)
 
-    out_multiplier_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_multiplier[0].item()),
-        {"dtype": torch.int32},
-    )
-    out_shift_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_shift[0].item()),
-        {"dtype": torch.int32},
-    )
-
-    # Create a single element tensor for the weight zero point
-    weight_zero_point_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], weight_zero_point),
-        {"dtype": torch.int32},
-    )
-
-    # Create a single element tensor for the bias scale
-    bias_scale_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], bias_scale),
-        {"dtype": torch.float32},
-    )
-
     # Make the args and kwargs for the replacement op
     args = tuple(inputs_inputs + weights_inputs + [bias])
     kwargs = {
@@ -341,12 +268,12 @@ def get_args_and_kwargs_conv(
         "dilation": dilation,
         "groups": groups,
         "input_zero_point": dequants_inputs[0].args[2],
-        "weight_zero_point": weight_zero_point_tensor,
-        "bias_scale": bias_scale_tensor,
+        "weight_zero_point": weight_zero_point,
+        "bias_scale": bias_scale,
         "out_scale": quant_node.args[1],
         "out_zero_point": quant_node.args[2],
-        "out_multiplier": out_multiplier_,
-        "out_shift": out_shift_,
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
     }
     return args, kwargs
 
@@ -367,27 +294,11 @@ def get_args_and_kwargs_relu(
     # Make the args and kwargs for the replacement op
     args = tuple(inputs_inputs)
 
-    X_zero_point = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[0].args[2]),
-        {"dtype": torch.int32},
-    )
-    out_multiplier_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_multiplier[0].item()),
-        {"dtype": torch.int32},
-    )
-    out_shift_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_shift[0].item()),
-        {"dtype": torch.int32},
-    )
-
     kwargs = {
-        "X_zero_point": X_zero_point,
+        "X_zero_point": dequants_inputs[0].args[2],
         "out_zero_point": quant_node.args[2],
-        "out_multiplier": out_multiplier_,
-        "out_shift": out_shift_,
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
     }
     return args, kwargs
 
@@ -435,48 +346,20 @@ def get_args_and_kwargs_softmax(
         {"dtype": torch.int32},
     )
     # Make the scale and zero_point tensors
-    in_scale_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            dequants_inputs[0].args[1],
-        ),
-        {"dtype": torch.float32},
-    )
-    in_zero_point_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            dequants_inputs[0].args[2],
-        ),
-        {"dtype": torch.int32},
-    )
-    out_scale_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            quant_node.args[1],
-        ),
-        {"dtype": torch.float32},
-    )
-    out_zero_point_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            quant_node.args[2],
-        ),
-        {"dtype": torch.int32},
-    )
+    in_scale = dequants_inputs[0].args[1]
+    in_zero_point = dequants_inputs[0].args[2]
+    out_scale = quant_node.args[1]
+    out_zero_point = quant_node.args[2]
 
     # Make the args and kwargs for the replacement op
     args = (
         inputs_inputs[0],
         mask_tensor,
         op_node.args[1],
-        in_scale_tensor,
-        in_zero_point_tensor,
-        out_scale_tensor,
-        out_zero_point_tensor,
+        in_scale,
+        in_zero_point,
+        out_scale,
+        out_zero_point,
     )
     kwargs = {}
 
 
@@ -112,7 +112,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_linear.default
+        return torch.ops.cadence.quantized_linear.per_tensor
 
 
 class AddPattern(QuantizationPattern):
@@ -150,7 +150,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_add.default
+        return torch.ops.cadence.quantized_add.per_tensor
 
 
 class BmmPattern(QuantizationPattern):
@@ -265,7 +265,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_conv2d_nchw.default
+        return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
 
 class Conv2dPattern(QuantizationPattern):
@@ -307,7 +307,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_conv2d_nchw.default
+        return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
 
 class LayerNormPattern(QuantizationPattern):
@@ -345,7 +345,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_layer_norm.default
+        return torch.ops.cadence.quantized_layer_norm.per_tensor
 
 
 class LinearPattern(QuantizationPattern):
@@ -387,7 +387,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_linear.default
+        return torch.ops.cadence.quantized_linear.per_tensor
 
 
 class MatmulPattern(QuantizationPattern):
@@ -411,6 +411,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
+        # TODO: T240804887 This is actually a per-tensor variant, we just need to change the name of the op
         return torch.ops.cadence.quantized_matmul.default
 
 
@@ -437,7 +438,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_relu.default
+        return torch.ops.cadence.quantized_relu.per_tensor
 
 
 # Regular relu op
@@ -496,7 +497,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_conv2d_nchw.default
+        return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
 
 # Conv1d + regular relu op fusion
@@ -544,7 +545,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_softmax.default
+        return torch.ops.cadence.quantized_softmax.per_tensor
 
 
 class MixedW8A32LinearPattern(QuantizationPattern):
@@ -598,7 +599,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_w8a32_linear.default
+        return torch.ops.cadence.quantized_w8a32_linear.per_tensor
 
 
 class MixedW8A32ConvPattern(QuantizationPattern):
@@ -660,4 +661,4 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_w8a32_conv.default
+        return torch.ops.cadence.quantized_w8a32_conv.per_tensor
Original file line number	Diff line number	Diff line change
`@@ -425,6 +425,7 @@ python_unittest(`
`425`	`425`	`"//executorch/exir:pass_base",`
`426`	`426`	`"//executorch/exir/dialects:lib",`
`427`	`427`	`"//executorch/exir/passes:lib",`
	`428`	`+ ":ref_implementations",`
`428`	`429`	`],`
`429`	`430`	`)`
`430`	`431`
Original file line number	Diff line number	Diff line change
`@@ -112,7 +112,7 @@ def get_anchors(`
`112`	`112`	`)`
`113`	`113`
`114`	`114`	`def replacement_op(self) -> OpOverload:`
`115`		`- return torch.ops.cadence.quantized_linear.default`
	`115`	`+ return torch.ops.cadence.quantized_linear.per_tensor`
`116`	`116`
`117`	`117`
`118`	`118`	`class AddPattern(QuantizationPattern):`
`@@ -150,7 +150,7 @@ def get_anchors(`
`150`	`150`	`)`
`151`	`151`
`152`	`152`	`def replacement_op(self) -> OpOverload:`
`153`		`- return torch.ops.cadence.quantized_add.default`
	`153`	`+ return torch.ops.cadence.quantized_add.per_tensor`
`154`	`154`
`155`	`155`
`156`	`156`	`class BmmPattern(QuantizationPattern):`
`@@ -265,7 +265,7 @@ def get_anchors(`
`265`	`265`	`)`
`266`	`266`
`267`	`267`	`def replacement_op(self) -> OpOverload:`
`268`		`- return torch.ops.cadence.quantized_conv2d_nchw.default`
	`268`	`+ return torch.ops.cadence.quantized_conv2d_nchw.per_tensor`
`269`	`269`
`270`	`270`
`271`	`271`	`class Conv2dPattern(QuantizationPattern):`
`@@ -307,7 +307,7 @@ def get_anchors(`
`307`	`307`	`)`
`308`	`308`
`309`	`309`	`def replacement_op(self) -> OpOverload:`
`310`		`- return torch.ops.cadence.quantized_conv2d_nchw.default`
	`310`	`+ return torch.ops.cadence.quantized_conv2d_nchw.per_tensor`
`311`	`311`
`312`	`312`
`313`	`313`	`class LayerNormPattern(QuantizationPattern):`
`@@ -345,7 +345,7 @@ def get_anchors(`
`345`	`345`	`)`
`346`	`346`
`347`	`347`	`def replacement_op(self) -> OpOverload:`
`348`		`- return torch.ops.cadence.quantized_layer_norm.default`
	`348`	`+ return torch.ops.cadence.quantized_layer_norm.per_tensor`
`349`	`349`
`350`	`350`
`351`	`351`	`class LinearPattern(QuantizationPattern):`
`@@ -387,7 +387,7 @@ def get_anchors(`
`387`	`387`	`)`
`388`	`388`
`389`	`389`	`def replacement_op(self) -> OpOverload:`
`390`		`- return torch.ops.cadence.quantized_linear.default`
	`390`	`+ return torch.ops.cadence.quantized_linear.per_tensor`
`391`	`391`
`392`	`392`
`393`	`393`	`class MatmulPattern(QuantizationPattern):`
`@@ -411,6 +411,7 @@ def get_anchors(`
`411`	`411`	`)`
`412`	`412`
`413`	`413`	`def replacement_op(self) -> OpOverload:`
	`414`	`+ # TODO: T240804887 This is actually a per-tensor variant, we just need to change the name of the op`
`414`	`415`	`return torch.ops.cadence.quantized_matmul.default`
`415`	`416`
`416`	`417`
`@@ -437,7 +438,7 @@ def get_anchors(`
`437`	`438`	`)`
`438`	`439`
`439`	`440`	`def replacement_op(self) -> OpOverload:`
`440`		`- return torch.ops.cadence.quantized_relu.default`
	`441`	`+ return torch.ops.cadence.quantized_relu.per_tensor`
`441`	`442`
`442`	`443`
`443`	`444`	`# Regular relu op`
`@@ -496,7 +497,7 @@ def get_anchors(`
`496`	`497`	`)`
`497`	`498`
`498`	`499`	`def replacement_op(self) -> OpOverload:`
`499`		`- return torch.ops.cadence.quantized_conv2d_nchw.default`
	`500`	`+ return torch.ops.cadence.quantized_conv2d_nchw.per_tensor`
`500`	`501`
`501`	`502`
`502`	`503`	`# Conv1d + regular relu op fusion`
`@@ -544,7 +545,7 @@ def get_anchors(`
`544`	`545`	`)`
`545`	`546`
`546`	`547`	`def replacement_op(self) -> OpOverload:`
`547`		`- return torch.ops.cadence.quantized_softmax.default`
	`548`	`+ return torch.ops.cadence.quantized_softmax.per_tensor`
`548`	`549`
`549`	`550`
`550`	`551`	`class MixedW8A32LinearPattern(QuantizationPattern):`
`@@ -598,7 +599,7 @@ def get_anchors(`
`598`	`599`	`)`
`599`	`600`
`600`	`601`	`def replacement_op(self) -> OpOverload:`
`601`		`- return torch.ops.cadence.quantized_w8a32_linear.default`
	`602`	`+ return torch.ops.cadence.quantized_w8a32_linear.per_tensor`
`602`	`603`
`603`	`604`
`604`	`605`	`class MixedW8A32ConvPattern(QuantizationPattern):`
`@@ -660,4 +661,4 @@ def get_anchors(`
`660`	`661`	`)`
`661`	`662`
`662`	`663`	`def replacement_op(self) -> OpOverload:`
`663`		`- return torch.ops.cadence.quantized_w8a32_conv.default`
	`664`	`+ return torch.ops.cadence.quantized_w8a32_conv.per_tensor`