@@ -687,45 +687,13 @@ class SimdOpCheckArmSve : public SimdOpCheckTest {
687687 continue ;
688688 }
689689
690- // LD/ST - Load/Store
691- for (float factor : {0 .5f , 1 .f , 2 .f }) {
692- const int width = base_vec_bits * factor;
693- const int total_lanes = width / bits;
694- const int vector_lanes = base_vec_bits / bits;
695- const int instr_lanes = min (total_lanes, vector_lanes);
696- if (instr_lanes < 2 || (vector_lanes / vscale < 2 )) continue ; // bail out scalar and <vscale x 1 x ty>
697-
698- // In case of arm32, instruction selection looks inconsistent due to optimization by LLVM
699- AddTestFunctor add (*this , bits, total_lanes, target.bits == 64 );
700- // NOTE: if the expr is too simple, LLVM might generate "bl memcpy"
701- Expr load_store_1 = in_im (x) * 3 ;
702-
703- if (has_sve ()) {
704- // This pattern has changed with LLVM 21, see https://github.com/halide/Halide/issues/8584 for more
705- // details.
706- if (Halide::Internal::get_llvm_version () < 210 ) {
707- // in native width, ld1b/st1b is used regardless of data type
708- const bool allow_byte_ls = (width == target.vector_bits );
709- add ({get_sve_ls_instr (" ld1" , bits, bits, " " , allow_byte_ls ? " b" : " " )}, total_lanes, load_store_1);
710- add ({get_sve_ls_instr (" st1" , bits, bits, " " , allow_byte_ls ? " b" : " " )}, total_lanes, load_store_1);
711- } else {
712- if (width == 256 && vscale == 1 ) {
713- // Optimized with load/store pair (two registers) instruction
714- add ({{" ldp" , R"( q\d\d?)" }}, total_lanes, load_store_1);
715- add ({{" stp" , R"( q\d\d?)" }}, total_lanes, load_store_1);
716- } else {
717- // There does not seem to be a simple rule to select ldr/str or ld1x/stx
718- add (" ld1_or_ldr" , {{R"( ld(1.|r))" , R"( z\d\d?)" }}, total_lanes, load_store_1);
719- add (" st1_or_str" , {{R"( st(1.|r))" , R"( z\d\d?)" }}, total_lanes, load_store_1);
720- }
721- }
722- } else {
723- // vector register is not used for simple load/store
724- string reg_prefix = (width <= 64 ) ? " d" : " q" ;
725- add ({{" st[rp]" , reg_prefix + R"( \d\d?)" }}, total_lanes, load_store_1);
726- add ({{" ld[rp]" , reg_prefix + R"( \d\d?)" }}, total_lanes, load_store_1);
727- }
728- }
690+ // LD/ST - Load/Store scalar
691+ // We skip scalar load/store test due to the following challenges.
692+ // The rule by which LLVM selects instruction does not seem simple.
693+ // For example, ld1, ldr, or ldp is used for instruction and z or q register is used for operand,
694+ // depending on data type, vscale, what is performed before/after load, and LLVM version.
695+ // The other thing is, load/store instruction appears in other place than we want to check,
696+ // which makes it prone to false-positive detection as we only search strings line-by-line.
729697
730698 // LDn - Structured Load strided elements
731699 if (Halide::Internal::get_llvm_version () >= 220 ) {
0 commit comments