Skip to content

Add {try_}map_ref #303

@Fuuzetsu

Description

@Fuuzetsu

It seems difficult to efficiently map an ArrayVec, i.e. something that's length preserving. For code like this:

#[inline]
fn map_f(v: &u8) -> f64 {
    f64::from(*v)
}

pub fn map_arr_iter_ix(a: arrayvec::ArrayVec<u8, 24>) -> arrayvec::ArrayVec<f64, 24> {
    arrayvec::ArrayVec::from_iter((0..a.len()).map(|ix| map_f(&a[ix])))
}

pub fn map_arr_iter_value(a: arrayvec::ArrayVec<u8, 24>) -> arrayvec::ArrayVec<f64, 24> {
    arrayvec::ArrayVec::from_iter(a.iter().map(map_f))
}

It generates ASM that includes capacity checks, panic code etc. The ASM is similar enough in both cases that I only include one.

.section .text.models::map_arr_iter_value,"ax",@progbits
	.globl	models::map_arr_iter_value
	.p2align	4
	.type	models::map_arr_iter_value,@function
models::map_arr_iter_value:
		// /home/shana/programming/engine/models/src/lib.rs : 379
		pub fn map_arr_iter_value(a: arrayvec::ArrayVec<u8, 24>) -> arrayvec::ArrayVec<f64, 24> {
	.cfi_startproc
	push rbp
	.cfi_def_cfa_offset 16
	push r15
	.cfi_def_cfa_offset 24
	push r14
	.cfi_def_cfa_offset 32
	push r13
	.cfi_def_cfa_offset 40
	push r12
	.cfi_def_cfa_offset 48
	push rbx
	.cfi_def_cfa_offset 56
	sub rsp, 200
	.cfi_def_cfa_offset 256
	.cfi_offset rbx, -56
	.cfi_offset r12, -48
	.cfi_offset r13, -40
	.cfi_offset r14, -32
	.cfi_offset r15, -24
	.cfi_offset rbp, -16
	mov rbx, rdi
		// /home/shana/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrayvec-0.7.6/src/arrayvec.rs : 114
		pub const fn len(&self) -> usize { self.len as usize }
	mov r12d, dword ptr [rsi]
	test r12, r12
		// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/slice/iter/macros.rs : 179
		if ptr == crate::intrinsics::transmute::<$ptr, NonNull<T>>(end_or_len) {
	je .LBB962_1
	mov r14, rsi
		// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/ptr/mut_ptr.rs : 961
		unsafe { intrinsics::offset(self, count) }
	add r14, 4
		// /home/shana/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrayvec-0.7.6/src/arrayvec.rs : 1101
		if ptr == end_ptr && CHECK { extend_panic(); }
	mov r13, r12
	shl r13, 3
	xor ebp, ebp
	jmp .LBB962_3
	.p2align	4
.LBB962_5:
	vcvtsi2sd xmm0, xmm15, r15d
	inc r14
		// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/ptr/mod.rs : 1940
		intrinsics::write_via_move(dst, src)
	vmovsd qword ptr [rsp + rbp + 8], xmm0
	add rbp, 8
		// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/ptr/non_null.rs : 1683
		self.as_ptr() == other.as_ptr()
	cmp r13, rbp
		// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/slice/iter/macros.rs : 179
		if ptr == crate::intrinsics::transmute::<$ptr, NonNull<T>>(end_or_len) {
	je .LBB962_6
.LBB962_3:
		// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/ops/function.rs : 310
		(*self).call_mut(args)
	movzx r15d, byte ptr [r14]
		// /home/shana/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrayvec-0.7.6/src/arrayvec.rs : 1101
		if ptr == end_ptr && CHECK { extend_panic(); }
	cmp rbp, 192
	jne .LBB962_5
	lea rdi, [rip + .Lanon.acc18acdac17d7dd77cba6a0167c7603.34]
	call qword ptr [rip + arrayvec::arrayvec::extend_panic@GOTPCREL]
	jmp .LBB962_5
.LBB962_1:
	xor r12d, r12d
.LBB962_6:
		// /home/shana/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrayvec-0.7.6/src/arrayvec.rs : 1095
		**self_len = len as LenUint;
	mov dword ptr [rsp], r12d
		// /home/shana/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrayvec-0.7.6/src/arrayvec.rs : 1148
		array
	mov rax, qword ptr [rsp + 192]
	mov qword ptr [rbx + 192], rax
	vmovups ymm0, ymmword ptr [rsp + 160]
	vmovups ymmword ptr [rbx + 160], ymm0
	vmovups ymm0, ymmword ptr [rsp + 32]
	vmovups ymm1, ymmword ptr [rsp + 64]
	vmovups ymm2, ymmword ptr [rsp + 96]
	vmovups ymm3, ymmword ptr [rsp + 128]
	vmovups ymmword ptr [rbx + 128], ymm3
	vmovups ymmword ptr [rbx + 96], ymm2
	vmovups ymmword ptr [rbx + 64], ymm1
	vmovups ymmword ptr [rbx + 32], ymm0
	mov eax, dword ptr [rsp]
	mov dword ptr [rbx], eax
	vmovups xmm0, xmmword ptr [rsp + 4]
	vmovups xmmword ptr [rbx + 4], xmm0
	mov rax, qword ptr [rsp + 20]
	mov qword ptr [rbx + 20], rax
	mov eax, dword ptr [rsp + 28]
	mov dword ptr [rbx + 28], eax
		// /home/shana/programming/engine/models/src/lib.rs : 381
		}
	mov rax, rbx
	add rsp, 200
	.cfi_def_cfa_offset 56
	pop rbx
	.cfi_def_cfa_offset 48
	pop r12
	.cfi_def_cfa_offset 40
	pop r13
	.cfi_def_cfa_offset 32
	pop r14
	.cfi_def_cfa_offset 24
	pop r15
	.cfi_def_cfa_offset 16
	pop rbp
	.cfi_def_cfa_offset 8
	vzeroupper
	ret

After playing around with various options, I have come up with this as the best option

/// Map from one arrayvec (references) into a new one. If the target capacity is
/// lower than the original one, the mapping will only happen on the part that
/// fits (rather than panicking).
pub fn map_ref<T, U, const N: usize, const M: usize>(
    a: &arrayvec::ArrayVec<T, N>,
    mut f: impl FnMut(&T) -> U,
) -> arrayvec::ArrayVec<U, M> {
    // const M: usize = N;
    let mut arr = arrayvec::ArrayVec::<U, M>::new_const();

    // The .take(N) makes the compiler realise that slice must have at most `N`
    // elements. You can actually achieve the same thing with an
    // `assert!(slice.len() <= N)` and it'll compile that away. Of course _we_
    // know it's true but if you omit both the `.take()` and the assert! then it
    // suddenly forgets to keep track of the fact and adds panic checks in the
    // try_push below.
    //
    // Further, at no loss of expressivity nor runtime cost at all, we can
    // generate a different capacity ArrayVec and simply push as much as we can
    // into it: all it takes is changing `.take(N)` into `.take(N.min(M))` and
    // it'll generate sane code in all the cases.
    let iter = a.as_slice().iter().take(N.min(M));

    for v in iter {
        // unwrap never fails because .take() ensures we're within capacity
        // bounds. Compiler realises this and omits and checks here. If we were
        // using no-panic create, we could put it on this function to ensure
        // this...
        arr.try_push(f(v)).unwrap();
    }

    arr
}

This generates ASM without any panics. But it feels like we should just have method for this.

I threw in support for different sized arrayvecs because it was trivial and we have an actual use-case for this where we map an arrayvec except the last element, so this is prefect).

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions