diff --git a/src/intrinsic/archs.rs b/src/intrinsic/archs.rs index 3c1698df6de..1f0c1c25ff7 100644 --- a/src/intrinsic/archs.rs +++ b/src/intrinsic/archs.rs @@ -24,6 +24,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "gcsss" => "__builtin_arm_gcsss", "isb" => "__builtin_arm_isb", "prefetch" => "__builtin_arm_prefetch", + "prefetch.ir" => "__builtin_arm_prefetch_ir", "range.prefetch" => "__builtin_arm_range_prefetch", "sme.in.streaming.mode" => "__builtin_arm_in_streaming_mode", "sve.aesd" => "__builtin_sve_svaesd_u8", @@ -53,6 +54,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "alignbyte" => "__builtin_amdgcn_alignbyte", "ashr.pk.i8.i32" => "__builtin_amdgcn_ashr_pk_i8_i32", "ashr.pk.u8.i32" => "__builtin_amdgcn_ashr_pk_u8_i32", + "asyncmark" => "__builtin_amdgcn_asyncmark", "buffer.wbinvl1" => "__builtin_amdgcn_buffer_wbinvl1", "buffer.wbinvl1.sc" => "__builtin_amdgcn_buffer_wbinvl1_sc", "buffer.wbinvl1.vol" => "__builtin_amdgcn_buffer_wbinvl1_vol", @@ -270,6 +272,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "fdot2c.f32.bf16" => "__builtin_amdgcn_fdot2c_f32_bf16", "flat.prefetch" => "__builtin_amdgcn_flat_prefetch", "fmul.legacy" => "__builtin_amdgcn_fmul_legacy", + "global.load.async.lds" => "__builtin_amdgcn_global_load_async_lds", "global.load.async.to.lds.b128" => { "__builtin_amdgcn_global_load_async_to_lds_b128" } @@ -361,11 +364,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "perm.pk16.b4.u4" => "__builtin_amdgcn_perm_pk16_b4_u4", "perm.pk16.b6.u4" => "__builtin_amdgcn_perm_pk16_b6_u4", "perm.pk16.b8.u4" => "__builtin_amdgcn_perm_pk16_b8_u4", - "permlane.bcast" => "__builtin_amdgcn_permlane_bcast", - "permlane.down" => "__builtin_amdgcn_permlane_down", "permlane.idx.gen" => "__builtin_amdgcn_permlane_idx_gen", - "permlane.up" => "__builtin_amdgcn_permlane_up", - "permlane.xor" => "__builtin_amdgcn_permlane_xor", "permlane16.var" => "__builtin_amdgcn_permlane16_var", "permlanex16.var" => "__builtin_amdgcn_permlanex16_var", "pk.add.max.i16" => "__builtin_amdgcn_pk_add_max_i16", @@ -375,6 +374,9 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "prng.b32" => "__builtin_amdgcn_prng_b32", "qsad.pk.u16.u8" => "__builtin_amdgcn_qsad_pk_u16_u8", "queue.ptr" => "__builtin_amdgcn_queue_ptr", + "raw.ptr.buffer.load.async.lds" => { + "__builtin_amdgcn_raw_ptr_buffer_load_async_lds" + } "raw.ptr.buffer.load.lds" => "__builtin_amdgcn_raw_ptr_buffer_load_lds", "rcp.legacy" => "__builtin_amdgcn_rcp_legacy", "rsq.legacy" => "__builtin_amdgcn_rsq_legacy", @@ -412,6 +414,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "s.ttracedata" => "__builtin_amdgcn_s_ttracedata", "s.ttracedata.imm" => "__builtin_amdgcn_s_ttracedata_imm", "s.wait.asynccnt" => "__builtin_amdgcn_s_wait_asynccnt", + "s.wait.event" => "__builtin_amdgcn_s_wait_event", "s.wait.event.export.ready" => "__builtin_amdgcn_s_wait_event_export_ready", "s.wait.tensorcnt" => "__builtin_amdgcn_s_wait_tensorcnt", "s.waitcnt" => "__builtin_amdgcn_s_waitcnt", @@ -462,16 +465,18 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "smfmac.i32.16x16x64.i8" => "__builtin_amdgcn_smfmac_i32_16x16x64_i8", "smfmac.i32.32x32x32.i8" => "__builtin_amdgcn_smfmac_i32_32x32x32_i8", "smfmac.i32.32x32x64.i8" => "__builtin_amdgcn_smfmac_i32_32x32x64_i8", + "struct.ptr.buffer.load.async.lds" => { + "__builtin_amdgcn_struct_ptr_buffer_load_async_lds" + } "struct.ptr.buffer.load.lds" => "__builtin_amdgcn_struct_ptr_buffer_load_lds", "sudot4" => "__builtin_amdgcn_sudot4", "sudot8" => "__builtin_amdgcn_sudot8", "tensor.load.to.lds" => "__builtin_amdgcn_tensor_load_to_lds", - "tensor.load.to.lds.d2" => "__builtin_amdgcn_tensor_load_to_lds_d2", "tensor.store.from.lds" => "__builtin_amdgcn_tensor_store_from_lds", - "tensor.store.from.lds.d2" => "__builtin_amdgcn_tensor_store_from_lds_d2", "udot2" => "__builtin_amdgcn_udot2", "udot4" => "__builtin_amdgcn_udot4", "udot8" => "__builtin_amdgcn_udot8", + "wait.asyncmark" => "__builtin_amdgcn_wait_asyncmark", "wave.barrier" => "__builtin_amdgcn_wave_barrier", "wavefrontsize" => "__builtin_amdgcn_wavefrontsize", "workgroup.id.x" => "__builtin_amdgcn_workgroup_id_x", @@ -4844,7 +4849,11 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "add.rn.f" => "__nvvm_add_rn_f", "add.rn.ftz.f" => "__nvvm_add_rn_ftz_f", "add.rn.ftz.sat.f" => "__nvvm_add_rn_ftz_sat_f", + "add.rn.ftz.sat.f16" => "__nvvm_add_rn_ftz_sat_f16", + "add.rn.ftz.sat.v2f16" => "__nvvm_add_rn_ftz_sat_v2f16", "add.rn.sat.f" => "__nvvm_add_rn_sat_f", + "add.rn.sat.f16" => "__nvvm_add_rn_sat_f16", + "add.rn.sat.v2f16" => "__nvvm_add_rn_sat_v2f16", "add.rp.d" => "__nvvm_add_rp_d", "add.rp.f" => "__nvvm_add_rp_f", "add.rp.ftz.f" => "__nvvm_add_rp_ftz_f", @@ -5063,18 +5072,10 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "fma.rn.bf16x2" => "__nvvm_fma_rn_bf16x2", "fma.rn.d" => "__nvvm_fma_rn_d", "fma.rn.f" => "__nvvm_fma_rn_f", - "fma.rn.ftz.bf16" => "__nvvm_fma_rn_ftz_bf16", - "fma.rn.ftz.bf16x2" => "__nvvm_fma_rn_ftz_bf16x2", "fma.rn.ftz.f" => "__nvvm_fma_rn_ftz_f", - "fma.rn.ftz.relu.bf16" => "__nvvm_fma_rn_ftz_relu_bf16", - "fma.rn.ftz.relu.bf16x2" => "__nvvm_fma_rn_ftz_relu_bf16x2", - "fma.rn.ftz.sat.bf16" => "__nvvm_fma_rn_ftz_sat_bf16", - "fma.rn.ftz.sat.bf16x2" => "__nvvm_fma_rn_ftz_sat_bf16x2", "fma.rn.ftz.sat.f" => "__nvvm_fma_rn_ftz_sat_f", "fma.rn.relu.bf16" => "__nvvm_fma_rn_relu_bf16", "fma.rn.relu.bf16x2" => "__nvvm_fma_rn_relu_bf16x2", - "fma.rn.sat.bf16" => "__nvvm_fma_rn_sat_bf16", - "fma.rn.sat.bf16x2" => "__nvvm_fma_rn_sat_bf16x2", "fma.rn.sat.f" => "__nvvm_fma_rn_sat_f", "fma.rp.d" => "__nvvm_fma_rp_d", "fma.rp.f" => "__nvvm_fma_rp_f", @@ -5195,6 +5196,10 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "mul.rn.d" => "__nvvm_mul_rn_d", "mul.rn.f" => "__nvvm_mul_rn_f", "mul.rn.ftz.f" => "__nvvm_mul_rn_ftz_f", + "mul.rn.ftz.sat.f16" => "__nvvm_mul_rn_ftz_sat_f16", + "mul.rn.ftz.sat.v2f16" => "__nvvm_mul_rn_ftz_sat_v2f16", + "mul.rn.sat.f16" => "__nvvm_mul_rn_sat_f16", + "mul.rn.sat.v2f16" => "__nvvm_mul_rn_sat_v2f16", "mul.rp.d" => "__nvvm_mul_rp_d", "mul.rp.f" => "__nvvm_mul_rp_f", "mul.rp.ftz.f" => "__nvvm_mul_rp_ftz_f", @@ -5827,8 +5832,10 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "altivec.vmuleuh" => "__builtin_altivec_vmuleuh", "altivec.vmuleuw" => "__builtin_altivec_vmuleuw", "altivec.vmulhsd" => "__builtin_altivec_vmulhsd", + "altivec.vmulhsh" => "__builtin_altivec_vmulhsh", "altivec.vmulhsw" => "__builtin_altivec_vmulhsw", "altivec.vmulhud" => "__builtin_altivec_vmulhud", + "altivec.vmulhuh" => "__builtin_altivec_vmulhuh", "altivec.vmulhuw" => "__builtin_altivec_vmulhuw", "altivec.vmulosb" => "__builtin_altivec_vmulosb", "altivec.vmulosd" => "__builtin_altivec_vmulosd", @@ -5912,22 +5919,45 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "altivec.vsum4shs" => "__builtin_altivec_vsum4shs", "altivec.vsum4ubs" => "__builtin_altivec_vsum4ubs", "altivec.vsumsws" => "__builtin_altivec_vsumsws", + "altivec.vucmprhb" => "__builtin_altivec_vucmprhb", + "altivec.vucmprhh" => "__builtin_altivec_vucmprhh", + "altivec.vucmprhn" => "__builtin_altivec_vucmprhn", + "altivec.vucmprlb" => "__builtin_altivec_vucmprlb", + "altivec.vucmprlh" => "__builtin_altivec_vucmprlh", + "altivec.vucmprln" => "__builtin_altivec_vucmprln", "altivec.vupkhpx" => "__builtin_altivec_vupkhpx", "altivec.vupkhsb" => "__builtin_altivec_vupkhsb", "altivec.vupkhsh" => "__builtin_altivec_vupkhsh", + "altivec.vupkhsntob" => "__builtin_altivec_vupkhsntob", "altivec.vupkhsw" => "__builtin_altivec_vupkhsw", + "altivec.vupkint4tobf16" => "__builtin_altivec_vupkint4tobf16", + "altivec.vupkint4tofp32" => "__builtin_altivec_vupkint4tofp32", + "altivec.vupkint8tobf16" => "__builtin_altivec_vupkint8tobf16", + "altivec.vupkint8tofp32" => "__builtin_altivec_vupkint8tofp32", "altivec.vupklpx" => "__builtin_altivec_vupklpx", "altivec.vupklsb" => "__builtin_altivec_vupklsb", "altivec.vupklsh" => "__builtin_altivec_vupklsh", + "altivec.vupklsntob" => "__builtin_altivec_vupklsntob", "altivec.vupklsw" => "__builtin_altivec_vupklsw", "amo.ldat" => "__builtin_amo_ldat", + "amo.ldat.cond" => "__builtin_amo_ldat_cond", + "amo.ldat.csne" => "__builtin_amo_ldat_csne", "amo.lwat" => "__builtin_amo_lwat", + "amo.lwat.cond" => "__builtin_amo_lwat_cond", + "amo.lwat.csne" => "__builtin_amo_lwat_csne", + "amo.stdat" => "__builtin_amo_stdat", + "amo.stwat" => "__builtin_amo_stwat", "bcdadd" => "__builtin_ppc_bcdadd", "bcdadd.p" => "__builtin_ppc_bcdadd_p", "bcdcopysign" => "__builtin_ppc_bcdcopysign", "bcdsetsign" => "__builtin_ppc_bcdsetsign", + "bcdshift" => "__builtin_ppc_bcdshift", + "bcdshiftround" => "__builtin_ppc_bcdshiftround", "bcdsub" => "__builtin_ppc_bcdsub", "bcdsub.p" => "__builtin_ppc_bcdsub_p", + "bcdtruncate" => "__builtin_ppc_bcdtruncate", + "bcdunsignedshift" => "__builtin_ppc_bcdunsignedshift", + "bcdunsignedtruncate" => "__builtin_ppc_bcdunsignedtruncate", "bpermd" => "__builtin_bpermd", "cbcdtd" => "__builtin_cbcdtd", "cbcdtdd" => "__builtin_ppc_cbcdtd", @@ -6126,6 +6156,27 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "vsx.xxinsertw" => "__builtin_vsx_xxinsertw", "vsx.xxleqv" => "__builtin_vsx_xxleqv", "vsx.xxpermx" => "__builtin_vsx_xxpermx", + "xsaddaddsuqm" => "__builtin_xsaddaddsuqm", + "xsaddadduqm" => "__builtin_xsaddadduqm", + "xsaddsubsuqm" => "__builtin_xsaddsubsuqm", + "xsaddsubuqm" => "__builtin_xsaddsubuqm", + "xsmerge2t1uqm" => "__builtin_xsmerge2t1uqm", + "xsmerge2t2uqm" => "__builtin_xsmerge2t2uqm", + "xsmerge2t3uqm" => "__builtin_xsmerge2t3uqm", + "xsmerge3t1uqm" => "__builtin_xsmerge3t1uqm", + "xsrebase2t1uqm" => "__builtin_xsrebase2t1uqm", + "xsrebase2t2uqm" => "__builtin_xsrebase2t2uqm", + "xsrebase2t3uqm" => "__builtin_xsrebase2t3uqm", + "xsrebase2t4uqm" => "__builtin_xsrebase2t4uqm", + "xsrebase3t1uqm" => "__builtin_xsrebase3t1uqm", + "xsrebase3t2uqm" => "__builtin_xsrebase3t2uqm", + "xsrebase3t3uqm" => "__builtin_xsrebase3t3uqm", + "xxmulmul" => "__builtin_xxmulmul", + "xxmulmulhiadd" => "__builtin_xxmulmulhiadd", + "xxmulmulloadd" => "__builtin_xxmulmulloadd", + "xxssumudm" => "__builtin_xxssumudm", + "xxssumudmc" => "__builtin_xxssumudmc", + "xxssumudmcext" => "__builtin_xxssumudmcext", "zoned2packed" => "__builtin_ppc_zoned2packed", _ => unimplemented!("***** unsupported LLVM intrinsic {full_name}"), } @@ -6388,13 +6439,13 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { // spv "group.memory.barrier.with.group.sync" => "__builtin_spirv_group_barrier", "num.subgroups" => "__builtin_spirv_num_subgroups", + "subgroup.ballot" => "__builtin_spirv_subgroup_ballot", "subgroup.id" => "__builtin_spirv_subgroup_id", "subgroup.local.invocation.id" => { "__builtin_spirv_subgroup_local_invocation_id" } "subgroup.max.size" => "__builtin_spirv_subgroup_max_size", "subgroup.size" => "__builtin_spirv_subgroup_size", - "wave.ballot" => "__builtin_spirv_subgroup_ballot", _ => unimplemented!("***** unsupported LLVM intrinsic {full_name}"), } }