Skip to content

Commit f4b7033

Browse files
Skip load/store scalar in simd_op_check_sve2
1 parent 16de509 commit f4b7033

1 file changed

Lines changed: 7 additions & 39 deletions

File tree

test/correctness/simd_op_check_sve2.cpp

Lines changed: 7 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -687,45 +687,13 @@ class SimdOpCheckArmSve : public SimdOpCheckTest {
687687
continue;
688688
}
689689

690-
// LD/ST - Load/Store
691-
for (float factor : {0.5f, 1.f, 2.f}) {
692-
const int width = base_vec_bits * factor;
693-
const int total_lanes = width / bits;
694-
const int vector_lanes = base_vec_bits / bits;
695-
const int instr_lanes = min(total_lanes, vector_lanes);
696-
if (instr_lanes < 2 || (vector_lanes / vscale < 2)) continue; // bail out scalar and <vscale x 1 x ty>
697-
698-
// In case of arm32, instruction selection looks inconsistent due to optimization by LLVM
699-
AddTestFunctor add(*this, bits, total_lanes, target.bits == 64);
700-
// NOTE: if the expr is too simple, LLVM might generate "bl memcpy"
701-
Expr load_store_1 = in_im(x) * 3;
702-
703-
if (has_sve()) {
704-
// This pattern has changed with LLVM 21, see https://github.com/halide/Halide/issues/8584 for more
705-
// details.
706-
if (Halide::Internal::get_llvm_version() < 210) {
707-
// in native width, ld1b/st1b is used regardless of data type
708-
const bool allow_byte_ls = (width == target.vector_bits);
709-
add({get_sve_ls_instr("ld1", bits, bits, "", allow_byte_ls ? "b" : "")}, total_lanes, load_store_1);
710-
add({get_sve_ls_instr("st1", bits, bits, "", allow_byte_ls ? "b" : "")}, total_lanes, load_store_1);
711-
} else {
712-
if (width == 256 && vscale == 1) {
713-
// Optimized with load/store pair (two registers) instruction
714-
add({{"ldp", R"(q\d\d?)"}}, total_lanes, load_store_1);
715-
add({{"stp", R"(q\d\d?)"}}, total_lanes, load_store_1);
716-
} else {
717-
// There does not seem to be a simple rule to select ldr/str or ld1x/stx
718-
add("ld1_or_ldr", {{R"(ld(1.|r))", R"(z\d\d?)"}}, total_lanes, load_store_1);
719-
add("st1_or_str", {{R"(st(1.|r))", R"(z\d\d?)"}}, total_lanes, load_store_1);
720-
}
721-
}
722-
} else {
723-
// vector register is not used for simple load/store
724-
string reg_prefix = (width <= 64) ? "d" : "q";
725-
add({{"st[rp]", reg_prefix + R"(\d\d?)"}}, total_lanes, load_store_1);
726-
add({{"ld[rp]", reg_prefix + R"(\d\d?)"}}, total_lanes, load_store_1);
727-
}
728-
}
690+
// LD/ST - Load/Store scalar
691+
// We skip scalar load/store test due to the following challenges.
692+
// The rule by which LLVM selects instruction does not seem simple.
693+
// For example, ld1, ldr, or ldp is used for instruction and z or q register is used for operand,
694+
// depending on data type, vscale, what is performed before/after load, and LLVM version.
695+
// The other thing is, load/store instruction appears in other place than we want to check,
696+
// which makes it prone to false-positive detection as we only search strings line-by-line.
729697

730698
// LDn - Structured Load strided elements
731699
if (Halide::Internal::get_llvm_version() >= 220) {

0 commit comments

Comments
 (0)