diff --git a/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64 b/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64 index 0ab9f379a2..22a8d048ce 100644 --- a/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64 +++ b/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64 @@ -3,32 +3,20 @@ bench_new_box_zeroed_with_elems_dynamic_padding: push rbx push rax mov rbx, rdi - mov ecx, 3 - mov rax, rdi - mul rcx - jo .LBB5_6 - mov r14, rax - cmp rax, -10 - ja .LBB5_6 - lea rax, [r14 + 9] - not r14d - and r14d, 3 - add r14, rax - setb al - movabs rcx, 9223372036854775803 - cmp r14, rcx - seta cl - or cl, al - je .LBB5_4 -.LBB5_6: - xor eax, eax - jmp .LBB5_5 -.LBB5_4: + movabs rax, 3074457345618258598 + cmp rdi, rax + ja .LBB5_1 + lea r14, [rbx + 2*rbx] + or r14, 3 + add r14, 9 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] mov esi, 4 mov rdi, r14 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] -.LBB5_5: + jmp .LBB5_3 +.LBB5_1: + xor eax, eax +.LBB5_3: mov rdx, rbx add rsp, 8 pop rbx diff --git a/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64.mca b/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64.mca index f666a03ce9..e6efaeded4 100644 --- a/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64.mca +++ b/benches/new_box_zeroed_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 3200 -Total Cycles: 2989 -Total uOps: 4300 +Instructions: 2100 +Total Cycles: 2990 +Total uOps: 3000 Dispatch Width: 4 -uOps Per Cycle: 1.44 -IPC: 1.07 -Block RThroughput: 10.8 +uOps Per Cycle: 1.00 +IPC: 0.70 +Block RThroughput: 7.5 Instruction Info: @@ -22,29 +22,18 @@ Instruction Info: 2 5 1.00 * push rbx 2 5 1.00 * push rax 1 1 0.33 mov rbx, rdi - 1 1 0.33 mov ecx, 3 - 1 1 0.33 mov rax, rdi - 2 4 1.00 mul rcx - 1 1 1.00 jo .LBB5_6 - 1 1 0.33 mov r14, rax - 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_6 - 1 1 0.50 lea rax, [r14 + 9] - 1 1 0.33 not r14d - 1 1 0.33 and r14d, 3 - 1 1 0.33 add r14, rax - 1 1 0.50 setb al - 1 1 0.33 movabs rcx, 9223372036854775803 - 1 1 0.33 cmp r14, rcx - 2 2 1.00 seta cl - 1 1 0.33 or cl, al - 1 1 1.00 je .LBB5_4 - 1 0 0.25 xor eax, eax - 1 1 1.00 jmp .LBB5_5 + 1 1 0.33 movabs rax, 3074457345618258598 + 1 1 0.33 cmp rdi, rax + 1 1 1.00 ja .LBB5_1 + 1 1 0.50 lea r14, [rbx + 2*rbx] + 1 1 0.33 or r14, 3 + 1 1 0.33 add r14, 9 4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] 1 1 0.33 mov esi, 4 1 1 0.33 mov rdi, r14 4 7 1.00 * call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] + 1 1 1.00 jmp .LBB5_3 + 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rbx 1 1 0.33 add rsp, 8 1 6 0.50 * pop rbx @@ -65,39 +54,28 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 8.99 8.98 5.00 10.03 4.49 4.51 + - - 4.49 4.50 5.00 6.01 4.50 4.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - 1.00 - 0.49 0.51 push r14 - - - - - 1.00 - 0.51 0.49 push rbx - - - - - 1.00 - 0.49 0.51 push rax - - - 0.95 0.04 - 0.01 - - mov rbx, rdi - - - - 0.97 - 0.03 - - mov ecx, 3 - - - 0.02 0.02 - 0.96 - - mov rax, rdi - - - 1.00 1.00 - - - - mul rcx - - - - - - 1.00 - - jo .LBB5_6 - - - 0.02 0.97 - 0.01 - - mov r14, rax - - - 0.97 0.03 - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_6 - - - 0.99 0.01 - - - - lea rax, [r14 + 9] - - - 0.01 0.99 - - - - not r14d - - - 0.97 0.03 - - - - and r14d, 3 - - - 0.01 0.98 - 0.01 - - add r14, rax - - - 1.00 - - - - - setb al - - - 0.02 - - 0.98 - - movabs rcx, 9223372036854775803 - - - - 0.97 - 0.03 - - cmp r14, rcx - - - 2.00 - - - - - seta cl - - - 0.03 0.03 - 0.94 - - or cl, al - - - - - - 1.00 - - je .LBB5_4 + - - - - 1.00 - 0.50 0.50 push r14 + - - - - 1.00 - 0.50 0.50 push rbx + - - - - 1.00 - 0.50 0.50 push rax + - - 0.49 0.50 - 0.01 - - mov rbx, rdi + - - 0.50 0.50 - - - - movabs rax, 3074457345618258598 + - - 0.50 0.50 - - - - cmp rdi, rax + - - - - - 1.00 - - ja .LBB5_1 + - - 0.50 0.50 - - - - lea r14, [rbx + 2*rbx] + - - 0.50 0.50 - - - - or r14, 3 + - - 0.50 - - 0.50 - - add r14, 9 + - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] + - - - 0.50 - 0.50 - - mov esi, 4 + - - 0.50 0.50 - - - - mov rdi, r14 + - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] + - - - - - 1.00 - - jmp .LBB5_3 - - - - - - - - xor eax, eax - - - - - - 1.00 - - jmp .LBB5_5 - - - - - 1.00 1.00 1.02 0.98 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] - - - 0.03 0.97 - - - - mov esi, 4 - - - 0.96 0.01 - 0.03 - - mov rdi, r14 - - - - - 1.00 1.00 0.98 1.02 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] - - - - 0.97 - 0.03 - - mov rdx, rbx - - - 0.01 0.99 - - - - add rsp, 8 + - - 0.51 0.49 - - - - mov rdx, rbx + - - 0.49 0.51 - - - - add rsp, 8 - - - - - - 0.50 0.50 pop rbx - - - - - - 0.50 0.50 pop r14 - - - - - 1.00 - - ret diff --git a/benches/new_box_zeroed_with_elems_dynamic_size.x86-64 b/benches/new_box_zeroed_with_elems_dynamic_size.x86-64 index 175fff0fd3..bff15e55ad 100644 --- a/benches/new_box_zeroed_with_elems_dynamic_size.x86-64 +++ b/benches/new_box_zeroed_with_elems_dynamic_size.x86-64 @@ -3,17 +3,17 @@ bench_new_box_zeroed_with_elems_dynamic_size: push rbx push rax mov rbx, rdi - movabs rax, 4611686018427387900 + movabs rax, 4611686018427387901 cmp rdi, rax - jbe .LBB5_2 - xor eax, eax - jmp .LBB5_3 -.LBB5_2: + ja .LBB5_1 lea r14, [2*rbx + 4] call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] mov esi, 2 mov rdi, r14 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] + jmp .LBB5_3 +.LBB5_1: + xor eax, eax .LBB5_3: mov rdx, rbx add rsp, 8 diff --git a/benches/new_box_zeroed_with_elems_dynamic_size.x86-64.mca b/benches/new_box_zeroed_with_elems_dynamic_size.x86-64.mca index 88b5f84b98..153d36c01c 100644 --- a/benches/new_box_zeroed_with_elems_dynamic_size.x86-64.mca +++ b/benches/new_box_zeroed_with_elems_dynamic_size.x86-64.mca @@ -22,16 +22,16 @@ Instruction Info: 2 5 1.00 * push rbx 2 5 1.00 * push rax 1 1 0.33 mov rbx, rdi - 1 1 0.33 movabs rax, 4611686018427387900 + 1 1 0.33 movabs rax, 4611686018427387901 1 1 0.33 cmp rdi, rax - 1 1 1.00 jbe .LBB5_2 - 1 0 0.25 xor eax, eax - 1 1 1.00 jmp .LBB5_3 + 1 1 1.00 ja .LBB5_1 1 1 0.50 lea r14, [2*rbx + 4] 4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] 1 1 0.33 mov esi, 2 1 1 0.33 mov rdi, r14 4 7 1.00 * call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] + 1 1 1.00 jmp .LBB5_3 + 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rbx 1 1 0.33 add rsp, 8 1 6 0.50 * pop rbx @@ -52,26 +52,26 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 3.97 3.97 5.00 5.06 4.50 4.50 + - - 3.97 3.98 5.00 5.05 4.50 4.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 1.00 - 0.50 0.50 push r14 - - - - 1.00 - 0.50 0.50 push rbx - - - - 1.00 - 0.50 0.50 push rax - - - 0.94 0.05 - 0.01 - - mov rbx, rdi - - - 0.05 0.95 - - - - movabs rax, 4611686018427387900 - - - 0.95 - - 0.05 - - cmp rdi, rax - - - - - - 1.00 - - jbe .LBB5_2 - - - - - - - - - xor eax, eax - - - - - - 1.00 - - jmp .LBB5_3 - - - - 1.00 - - - - lea r14, [2*rbx + 4] + - - 0.05 0.94 - 0.01 - - mov rbx, rdi + - - 0.94 0.06 - - - - movabs rax, 4611686018427387901 + - - 0.06 0.94 - - - - cmp rdi, rax + - - - - - 1.00 - - ja .LBB5_1 + - - 0.94 0.06 - - - - lea r14, [2*rbx + 4] - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] - - - 0.06 0.94 - - - - mov esi, 2 - - - 0.94 0.06 - - - - mov rdi, r14 + - - 0.98 0.02 - - - - mov esi, 2 + - - 0.02 0.94 - 0.04 - - mov rdi, r14 - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] - - - 0.05 0.95 - - - - mov rdx, rbx - - - 0.98 0.02 - - - - add rsp, 8 + - - - - - 1.00 - - jmp .LBB5_3 + - - - - - - - - xor eax, eax + - - 0.94 0.06 - - - - mov rdx, rbx + - - 0.04 0.96 - - - - add rsp, 8 - - - - - - 0.50 0.50 pop rbx - - - - - - 0.50 0.50 pop r14 - - - - - 1.00 - - ret diff --git a/benches/new_vec_zeroed.x86-64 b/benches/new_vec_zeroed.x86-64 index 264fa4a852..b5c083aa0d 100644 --- a/benches/new_vec_zeroed.x86-64 +++ b/benches/new_vec_zeroed.x86-64 @@ -1,44 +1,40 @@ bench_new_vec_zeroed: + mov rax, rdi + movabs rcx, 1537228672809129301 + cmp rsi, rcx + ja .LBB5_5 + test rsi, rsi + je .LBB5_2 push r15 push r14 - push r12 push rbx - push rax - mov rbx, rdi - movabs r12, 9223372036854775805 - mov ecx, 6 - mov rax, rsi - mul rcx - jo .LBB5_6 - cmp rax, r12 - jbe .LBB5_2 -.LBB5_6: - add r12, 3 - mov qword ptr [rbx], r12 - jmp .LBB5_7 -.LBB5_2: - mov r14, rsi - test rax, rax - je .LBB5_3 - mov r15, rax + lea rcx, [rsi + rsi] + lea rbx, [rcx + 2*rcx] + mov r14, rax + mov r15, rsi call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] mov esi, 2 - mov rdi, r15 + mov rdi, rbx call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] - test rax, rax - jne .LBB5_5 - jmp .LBB5_6 -.LBB5_3: - mov eax, 2 -.LBB5_5: - mov qword ptr [rbx], r14 - mov qword ptr [rbx + 8], rax - mov qword ptr [rbx + 16], r14 -.LBB5_7: - mov rax, rbx - add rsp, 8 + mov rsi, r15 + mov rcx, rax + mov rax, r14 + test rcx, rcx pop rbx - pop r12 pop r14 pop r15 + je .LBB5_5 + mov qword ptr [rax], rsi + mov qword ptr [rax + 8], rcx + mov qword ptr [rax + 16], rsi + ret +.LBB5_5: + movabs rcx, -9223372036854775808 + mov qword ptr [rax], rcx + ret +.LBB5_2: + mov ecx, 2 + mov qword ptr [rax], rsi + mov qword ptr [rax + 8], rcx + mov qword ptr [rax + 16], rsi ret diff --git a/benches/new_vec_zeroed.x86-64.mca b/benches/new_vec_zeroed.x86-64.mca index 093bbde096..b4fb4544ec 100644 --- a/benches/new_vec_zeroed.x86-64.mca +++ b/benches/new_vec_zeroed.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 3800 -Total Cycles: 5277 -Total uOps: 5000 +Instructions: 3700 +Total Cycles: 3486 +Total uOps: 4600 Dispatch Width: 4 -uOps Per Cycle: 0.95 -IPC: 0.72 -Block RThroughput: 12.5 +uOps Per Cycle: 1.32 +IPC: 1.06 +Block RThroughput: 12.0 Instruction Info: @@ -18,43 +18,42 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: + 1 1 0.33 mov rax, rdi + 1 1 0.33 movabs rcx, 1537228672809129301 + 1 1 0.33 cmp rsi, rcx + 1 1 1.00 ja .LBB5_5 + 1 1 0.33 test rsi, rsi + 1 1 1.00 je .LBB5_2 2 5 1.00 * push r15 2 5 1.00 * push r14 - 2 5 1.00 * push r12 2 5 1.00 * push rbx - 2 5 1.00 * push rax - 1 1 0.33 mov rbx, rdi - 1 1 0.33 movabs r12, 9223372036854775805 - 1 1 0.33 mov ecx, 6 - 1 1 0.33 mov rax, rsi - 2 4 1.00 mul rcx - 1 1 1.00 jo .LBB5_6 - 1 1 0.33 cmp rax, r12 - 1 1 1.00 jbe .LBB5_2 - 1 1 0.33 add r12, 3 - 1 1 1.00 * mov qword ptr [rbx], r12 - 1 1 1.00 jmp .LBB5_7 - 1 1 0.33 mov r14, rsi - 1 1 0.33 test rax, rax - 1 1 1.00 je .LBB5_3 - 1 1 0.33 mov r15, rax + 1 1 0.50 lea rcx, [rsi + rsi] + 1 1 0.50 lea rbx, [rcx + 2*rcx] + 1 1 0.33 mov r14, rax + 1 1 0.33 mov r15, rsi 4 7 1.00 * call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] 1 1 0.33 mov esi, 2 - 1 1 0.33 mov rdi, r15 + 1 1 0.33 mov rdi, rbx 4 7 1.00 * call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] - 1 1 0.33 test rax, rax - 1 1 1.00 jne .LBB5_5 - 1 1 1.00 jmp .LBB5_6 - 1 1 0.33 mov eax, 2 - 1 1 1.00 * mov qword ptr [rbx], r14 - 1 1 1.00 * mov qword ptr [rbx + 8], rax - 1 1 1.00 * mov qword ptr [rbx + 16], r14 - 1 1 0.33 mov rax, rbx - 1 1 0.33 add rsp, 8 + 1 1 0.33 mov rsi, r15 + 1 1 0.33 mov rcx, rax + 1 1 0.33 mov rax, r14 + 1 1 0.33 test rcx, rcx 1 6 0.50 * pop rbx - 1 6 0.50 * pop r12 1 6 0.50 * pop r14 1 6 0.50 * pop r15 + 1 1 1.00 je .LBB5_5 + 1 1 1.00 * mov qword ptr [rax], rsi + 1 1 1.00 * mov qword ptr [rax + 8], rcx + 1 1 1.00 * mov qword ptr [rax + 16], rsi + 1 1 1.00 U ret + 1 1 0.33 movabs rcx, -9223372036854775808 + 1 1 1.00 * mov qword ptr [rax], rcx + 1 1 1.00 U ret + 1 1 0.33 mov ecx, 2 + 1 1 1.00 * mov qword ptr [rax], rsi + 1 1 1.00 * mov qword ptr [rax + 8], rcx + 1 1 1.00 * mov qword ptr [rax + 16], rsi 1 1 1.00 U ret @@ -71,45 +70,44 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 7.01 7.98 11.00 11.01 8.50 8.50 + - - 6.99 6.99 12.00 10.02 8.00 9.00 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - 1.00 - 0.50 0.50 push r15 - - - - - 1.00 - 0.50 0.50 push r14 - - - - - 1.00 - 0.50 0.50 push r12 - - - - - 1.00 - 0.50 0.50 push rbx - - - - - 1.00 - 0.50 0.50 push rax - - - 0.98 0.01 - 0.01 - - mov rbx, rdi - - - 0.01 0.99 - - - - movabs r12, 9223372036854775805 - - - 0.02 - - 0.98 - - mov ecx, 6 - - - - 0.98 - 0.02 - - mov rax, rsi - - - 1.00 1.00 - - - - mul rcx - - - - - - 1.00 - - jo .LBB5_6 - - - 0.99 0.01 - - - - cmp rax, r12 - - - - - - 1.00 - - jbe .LBB5_2 - - - - - - 1.00 - - add r12, 3 - - - - - 1.00 - 0.50 0.50 mov qword ptr [rbx], r12 - - - - - - 1.00 - - jmp .LBB5_7 - - - 0.98 0.02 - - - - mov r14, rsi - - - 0.01 0.99 - - - - test rax, rax - - - - - - 1.00 - - je .LBB5_3 - - - 0.99 0.01 - - - - mov r15, rax - - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] - - - 0.01 0.99 - - - - mov esi, 2 - - - 0.99 0.01 - - - - mov rdi, r15 - - - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] - - - 0.01 0.99 - - - - test rax, rax - - - - - - 1.00 - - jne .LBB5_5 - - - - - - 1.00 - - jmp .LBB5_6 - - - 0.02 0.98 - - - - mov eax, 2 - - - - - 1.00 - 0.50 0.50 mov qword ptr [rbx], r14 - - - - - 1.00 - 0.50 0.50 mov qword ptr [rbx + 8], rax - - - - - 1.00 - 0.50 0.50 mov qword ptr [rbx + 16], r14 - - - 0.97 0.03 - - - - mov rax, rbx - - - 0.03 0.97 - - - - add rsp, 8 - - - - - - - 0.50 0.50 pop rbx - - - - - - - 0.50 0.50 pop r12 - - - - - - - 0.50 0.50 pop r14 - - - - - - - 0.50 0.50 pop r15 + - - 0.01 0.98 - 0.01 - - mov rax, rdi + - - 0.98 0.02 - - - - movabs rcx, 1537228672809129301 + - - 0.02 0.98 - - - - cmp rsi, rcx + - - - - - 1.00 - - ja .LBB5_5 + - - 0.98 - - 0.02 - - test rsi, rsi + - - - - - 1.00 - - je .LBB5_2 + - - - - 1.00 - - 1.00 push r15 + - - - - 1.00 - 1.00 - push r14 + - - - - 1.00 - - 1.00 push rbx + - - - 1.00 - - - - lea rcx, [rsi + rsi] + - - - 1.00 - - - - lea rbx, [rcx + 2*rcx] + - - 1.00 - - - - - mov r14, rax + - - 1.00 - - - - - mov r15, rsi + - - - - 1.00 1.00 2.00 - call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL] + - - - 0.01 - 0.99 - - mov esi, 2 + - - 0.01 0.99 - - - - mov rdi, rbx + - - - - 1.00 1.00 - 2.00 call qword ptr [rip + __rustc::__rust_alloc_zeroed@GOTPCREL] + - - 0.01 - - 0.99 - - mov rsi, r15 + - - 0.99 0.01 - - - - mov rcx, rax + - - - 0.99 - 0.01 - - mov rax, r14 + - - 0.99 0.01 - - - - test rcx, rcx + - - - - - - - 1.00 pop rbx + - - - - - - 1.00 - pop r14 + - - - - - - - 1.00 pop r15 + - - - - - 1.00 - - je .LBB5_5 + - - - - 1.00 - 1.00 - mov qword ptr [rax], rsi + - - - - 1.00 - - 1.00 mov qword ptr [rax + 8], rcx + - - - - 1.00 - 1.00 - mov qword ptr [rax + 16], rsi + - - - - - 1.00 - - ret + - - 0.01 0.99 - - - - movabs rcx, -9223372036854775808 + - - - - 1.00 - - 1.00 mov qword ptr [rax], rcx + - - - - - 1.00 - - ret + - - 0.99 0.01 - - - - mov ecx, 2 + - - - - 1.00 - 1.00 - mov qword ptr [rax], rsi + - - - - 1.00 - - 1.00 mov qword ptr [rax + 8], rcx + - - - - 1.00 - 1.00 - mov qword ptr [rax + 16], rsi - - - - - 1.00 - - ret diff --git a/benches/ref_from_bytes_dynamic_padding.x86-64 b/benches/ref_from_bytes_dynamic_padding.x86-64 index e844a4608f..5177a4ce95 100644 --- a/benches/ref_from_bytes_dynamic_padding.x86-64 +++ b/benches/ref_from_bytes_dynamic_padding.x86-64 @@ -1,22 +1,24 @@ bench_ref_from_bytes_dynamic_padding: - test dil, 3 - jne .LBB5_3 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jb .LBB5_3 + setb cl + test dil, 3 + setne dl + or dl, cl + jne .LBB5_1 add rax, -9 movabs rcx, -6148914691236517205 mul rcx shr rdx - lea rax, [rdx + 2*rdx] - or rax, 3 - add rax, 9 - cmp rsi, rax - je .LBB5_4 -.LBB5_3: - xor edi, edi - mov rdx, rsi -.LBB5_4: - mov rax, rdi + lea rcx, [rdx + 2*rdx] + or rcx, 3 + add rcx, 9 + xor eax, eax + cmp rsi, rcx + cmovne rdx, rsi + cmove rax, rdi + ret +.LBB5_1: + xor eax, eax ret diff --git a/benches/ref_from_bytes_dynamic_padding.x86-64.mca b/benches/ref_from_bytes_dynamic_padding.x86-64.mca index 423ed38ba2..25a0d3e961 100644 --- a/benches/ref_from_bytes_dynamic_padding.x86-64.mca +++ b/benches/ref_from_bytes_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1900 -Total Cycles: 645 -Total uOps: 2000 +Instructions: 2200 +Total Cycles: 783 +Total uOps: 2500 Dispatch Width: 4 -uOps Per Cycle: 3.10 -IPC: 2.95 -Block RThroughput: 5.0 +uOps Per Cycle: 3.19 +IPC: 2.81 +Block RThroughput: 6.3 Instruction Info: @@ -18,24 +18,27 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 test dil, 3 - 1 1 1.00 jne .LBB5_3 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jb .LBB5_3 + 1 1 0.50 setb cl + 1 1 0.33 test dil, 3 + 1 1 0.50 setne dl + 1 1 0.33 or dl, cl + 1 1 1.00 jne .LBB5_1 1 1 0.33 add rax, -9 1 1 0.33 movabs rcx, -6148914691236517205 2 4 1.00 mul rcx 1 1 0.50 shr rdx - 1 1 0.50 lea rax, [rdx + 2*rdx] - 1 1 0.33 or rax, 3 - 1 1 0.33 add rax, 9 - 1 1 0.33 cmp rsi, rax - 1 1 1.00 je .LBB5_4 - 1 0 0.25 xor edi, edi - 1 1 0.33 mov rdx, rsi - 1 1 0.33 mov rax, rdi + 1 1 0.50 lea rcx, [rdx + 2*rdx] + 1 1 0.33 or rcx, 3 + 1 1 0.33 add rcx, 9 + 1 0 0.25 xor eax, eax + 1 1 0.33 cmp rsi, rcx + 2 2 0.67 cmovne rdx, rsi + 2 2 0.67 cmove rax, rdi + 1 1 1.00 U ret + 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -52,26 +55,29 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.32 6.33 - 6.35 - - + - - 7.65 7.67 - 7.68 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.64 0.35 - 0.01 - - test dil, 3 - - - - - - 1.00 - - jne .LBB5_3 - - - 0.34 0.65 - 0.01 - - movabs rax, 9223372036854775804 - - - 0.35 0.65 - - - - and rax, rsi - - - 0.33 0.34 - 0.33 - - cmp rax, 9 - - - - - - 1.00 - - jb .LBB5_3 - - - 0.35 - - 0.65 - - add rax, -9 - - - 0.97 0.01 - 0.02 - - movabs rcx, -6148914691236517205 + - - - 0.99 - 0.01 - - movabs rax, 9223372036854775804 + - - 0.04 0.95 - 0.01 - - and rax, rsi + - - 0.09 0.85 - 0.06 - - cmp rax, 9 + - - 0.50 - - 0.50 - - setb cl + - - 0.01 0.95 - 0.04 - - test dil, 3 + - - 0.36 - - 0.64 - - setne dl + - - 0.47 0.12 - 0.41 - - or dl, cl + - - - - - 1.00 - - jne .LBB5_1 + - - - 0.95 - 0.05 - - add rax, -9 + - - - 0.81 - 0.19 - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - - 0.99 - - 0.01 - - shr rdx - - - 0.33 0.67 - - - - lea rax, [rdx + 2*rdx] - - - 0.34 0.66 - - - - or rax, 3 - - - 0.33 0.66 - 0.01 - - add rax, 9 - - - 0.01 0.99 - - - - cmp rsi, rax - - - - - - 1.00 - - je .LBB5_4 - - - - - - - - - xor edi, edi - - - 0.32 0.01 - 0.67 - - mov rdx, rsi - - - 0.02 0.34 - 0.64 - - mov rax, rdi + - - 0.62 - - 0.38 - - shr rdx + - - 0.62 0.38 - - - - lea rcx, [rdx + 2*rdx] + - - 0.59 0.17 - 0.24 - - or rcx, 3 + - - 0.61 0.19 - 0.20 - - add rcx, 9 + - - - - - - - - xor eax, eax + - - 0.75 0.24 - 0.01 - - cmp rsi, rcx + - - 1.00 0.03 - 0.97 - - cmovne rdx, rsi + - - 0.99 0.04 - 0.97 - - cmove rax, rdi + - - - - - 1.00 - - ret + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/ref_from_bytes_dynamic_size.x86-64 b/benches/ref_from_bytes_dynamic_size.x86-64 index cc905b76c0..2ed6e32b05 100644 --- a/benches/ref_from_bytes_dynamic_size.x86-64 +++ b/benches/ref_from_bytes_dynamic_size.x86-64 @@ -1,20 +1,20 @@ bench_ref_from_bytes_dynamic_size: - mov rdx, rsi cmp rsi, 4 setb al - or al, dil - test al, 1 - je .LBB5_2 + mov ecx, edi + or cl, al + test cl, 1 + jne .LBB5_1 + lea rcx, [rsi - 4] + mov rdx, rcx + shr rdx + and rcx, -2 + add rcx, 4 xor eax, eax + cmp rsi, rcx + cmovne rdx, rsi + cmove rax, rdi ret -.LBB5_2: - lea rcx, [rdx - 4] - mov rsi, rcx - and rsi, -2 - add rsi, 4 - shr rcx +.LBB5_1: xor eax, eax - cmp rdx, rsi - cmove rdx, rcx - cmove rax, rdi ret diff --git a/benches/ref_from_bytes_dynamic_size.x86-64.mca b/benches/ref_from_bytes_dynamic_size.x86-64.mca index 68aea583e4..7c90f65142 100644 --- a/benches/ref_from_bytes_dynamic_size.x86-64.mca +++ b/benches/ref_from_bytes_dynamic_size.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 Instructions: 1800 -Total Cycles: 704 +Total Cycles: 606 Total uOps: 2000 Dispatch Width: 4 -uOps Per Cycle: 2.84 -IPC: 2.56 +uOps Per Cycle: 3.30 +IPC: 2.97 Block RThroughput: 5.0 @@ -18,23 +18,23 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 mov rdx, rsi 1 1 0.33 cmp rsi, 4 1 1 0.50 setb al - 1 1 0.33 or al, dil - 1 1 0.33 test al, 1 - 1 1 1.00 je .LBB5_2 + 1 1 0.33 mov ecx, edi + 1 1 0.33 or cl, al + 1 1 0.33 test cl, 1 + 1 1 1.00 jne .LBB5_1 + 1 1 0.50 lea rcx, [rsi - 4] + 1 1 0.33 mov rdx, rcx + 1 1 0.50 shr rdx + 1 1 0.33 and rcx, -2 + 1 1 0.33 add rcx, 4 1 0 0.25 xor eax, eax + 1 1 0.33 cmp rsi, rcx + 2 2 0.67 cmovne rdx, rsi + 2 2 0.67 cmove rax, rdi 1 1 1.00 U ret - 1 1 0.50 lea rcx, [rdx - 4] - 1 1 0.33 mov rsi, rcx - 1 1 0.33 and rsi, -2 - 1 1 0.33 add rsi, 4 - 1 1 0.50 shr rcx 1 0 0.25 xor eax, eax - 1 1 0.33 cmp rdx, rsi - 2 2 0.67 cmove rdx, rcx - 2 2 0.67 cmove rax, rdi 1 1 1.00 U ret @@ -51,25 +51,25 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 5.97 5.98 - 6.05 - - + - - 6.00 6.00 - 6.00 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.97 0.01 - 0.02 - - mov rdx, rsi - - - 0.01 0.02 - 0.97 - - cmp rsi, 4 - - - 0.03 - - 0.97 - - setb al - - - 0.01 0.02 - 0.97 - - or al, dil - - - - 0.98 - 0.02 - - test al, 1 - - - - - - 1.00 - - je .LBB5_2 + - - 0.99 - - 0.01 - - cmp rsi, 4 + - - 1.00 - - - - - setb al + - - 0.98 0.02 - - - - mov ecx, edi + - - 0.98 0.01 - 0.01 - - or cl, al + - - 0.01 0.99 - - - - test cl, 1 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.99 0.01 - - - - lea rcx, [rsi - 4] + - - 0.02 0.98 - - - - mov rdx, rcx + - - - - - 1.00 - - shr rdx + - - 0.99 0.01 - - - - and rcx, -2 + - - - 1.00 - - - - add rcx, 4 - - - - - - - - xor eax, eax + - - 0.02 0.98 - - - - cmp rsi, rcx + - - 0.01 1.00 - 0.99 - - cmovne rdx, rsi + - - 0.01 1.00 - 0.99 - - cmove rax, rdi - - - - - 1.00 - - ret - - - 0.98 0.02 - - - - lea rcx, [rdx - 4] - - - 0.01 0.99 - - - - mov rsi, rcx - - - - 0.98 - 0.02 - - and rsi, -2 - - - 0.98 0.01 - 0.01 - - add rsi, 4 - - - 0.99 - - 0.01 - - shr rcx - - - - - - - - xor eax, eax - - - 0.02 0.97 - 0.01 - - cmp rdx, rsi - - - 0.99 0.99 - 0.02 - - cmove rdx, rcx - - - 0.98 0.99 - 0.03 - - cmove rax, rdi - - - - - 1.00 - - ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 index ba9e1a2c78..1ab816b4cc 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 +++ b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 @@ -1,30 +1,17 @@ bench_ref_from_bytes_with_elems_dynamic_padding: - mov rcx, rdx - mov edx, 3 - mov rax, rcx - mul rdx - jo .LBB5_5 - cmp rax, -10 - ja .LBB5_5 - mov edx, eax - not edx - and edx, 3 - add rdx, rax - add rdx, 9 - cmp rsi, rdx - jne .LBB5_5 - mov r8d, edi - and r8d, 3 - jne .LBB5_5 - add rax, 9 - cmp rdx, rax - jb .LBB5_5 + movabs rcx, 3074457345618258598 + cmp rdx, rcx + ja .LBB5_3 mov rax, rdi - mov rdx, rcx + test al, 3 + jne .LBB5_3 + lea rcx, [rdx + 2*rdx] + or rcx, 3 + add rcx, 9 + cmp rsi, rcx + jne .LBB5_3 ret -.LBB5_5: - xor edi, edi - mov rcx, rsi - mov rax, rdi - mov rdx, rcx +.LBB5_3: + xor eax, eax + mov rdx, rsi ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca index 93696305cb..afb0b4c0b1 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca +++ b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2800 -Total Cycles: 944 -Total uOps: 2900 +Instructions: 1500 +Total Cycles: 505 +Total uOps: 1500 Dispatch Width: 4 -uOps Per Cycle: 3.07 +uOps Per Cycle: 2.97 IPC: 2.97 -Block RThroughput: 7.3 +Block RThroughput: 5.0 Instruction Info: @@ -18,33 +18,20 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 mov rcx, rdx - 1 1 0.33 mov edx, 3 - 1 1 0.33 mov rax, rcx - 2 4 1.00 mul rdx - 1 1 1.00 jo .LBB5_5 - 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_5 - 1 1 0.33 mov edx, eax - 1 1 0.33 not edx - 1 1 0.33 and edx, 3 - 1 1 0.33 add rdx, rax - 1 1 0.33 add rdx, 9 - 1 1 0.33 cmp rsi, rdx - 1 1 1.00 jne .LBB5_5 - 1 1 0.33 mov r8d, edi - 1 1 0.33 and r8d, 3 - 1 1 1.00 jne .LBB5_5 - 1 1 0.33 add rax, 9 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 jb .LBB5_5 + 1 1 0.33 movabs rcx, 3074457345618258598 + 1 1 0.33 cmp rdx, rcx + 1 1 1.00 ja .LBB5_3 1 1 0.33 mov rax, rdi - 1 1 0.33 mov rdx, rcx + 1 1 0.33 test al, 3 + 1 1 1.00 jne .LBB5_3 + 1 1 0.50 lea rcx, [rdx + 2*rdx] + 1 1 0.33 or rcx, 3 + 1 1 0.33 add rcx, 9 + 1 1 0.33 cmp rsi, rcx + 1 1 1.00 jne .LBB5_3 1 1 1.00 U ret - 1 0 0.25 xor edi, edi - 1 1 0.33 mov rcx, rsi - 1 1 0.33 mov rax, rdi - 1 1 0.33 mov rdx, rcx + 1 0 0.25 xor eax, eax + 1 1 0.33 mov rdx, rsi 1 1 1.00 U ret @@ -61,35 +48,22 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 9.32 9.32 - 9.36 - - + - - 4.49 4.49 - 5.02 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.64 0.18 - 0.18 - - mov rcx, rdx - - - 0.17 0.83 - - - - mov edx, 3 - - - 0.50 0.49 - 0.01 - - mov rax, rcx - - - 1.00 1.00 - - - - mul rdx - - - - - - 1.00 - - jo .LBB5_5 - - - 0.82 0.18 - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_5 - - - 0.02 0.98 - - - - mov edx, eax - - - 0.82 0.02 - 0.16 - - not edx - - - 0.82 0.17 - 0.01 - - and edx, 3 - - - 0.99 - - 0.01 - - add rdx, rax - - - 0.98 0.01 - 0.01 - - add rdx, 9 - - - 1.00 - - - - - cmp rsi, rdx - - - - - - 1.00 - - jne .LBB5_5 - - - 0.16 0.83 - 0.01 - - mov r8d, edi - - - 0.17 0.17 - 0.66 - - and r8d, 3 - - - - - - 1.00 - - jne .LBB5_5 - - - 0.02 0.98 - - - - add rax, 9 - - - - 0.17 - 0.83 - - cmp rdx, rax - - - - - - 1.00 - - jb .LBB5_5 - - - 0.01 0.67 - 0.32 - - mov rax, rdi - - - 0.02 0.98 - - - - mov rdx, rcx + - - 0.97 0.02 - 0.01 - - movabs rcx, 3074457345618258598 + - - 0.50 0.50 - - - - cmp rdx, rcx + - - - - - 1.00 - - ja .LBB5_3 + - - 0.50 0.50 - - - - mov rax, rdi + - - 0.02 0.97 - 0.01 - - test al, 3 + - - - - - 1.00 - - jne .LBB5_3 + - - 0.97 0.03 - - - - lea rcx, [rdx + 2*rdx] + - - 0.50 0.50 - - - - or rcx, 3 + - - 0.03 0.97 - - - - add rcx, 9 + - - 0.03 0.97 - - - - cmp rsi, rcx + - - - - - 1.00 - - jne .LBB5_3 - - - - - 1.00 - - ret - - - - - - - - - xor edi, edi - - - 0.34 0.66 - - - - mov rcx, rsi - - - 0.34 0.50 - 0.16 - - mov rax, rdi - - - 0.50 0.50 - - - - mov rdx, rcx + - - - - - - - - xor eax, eax + - - 0.97 0.03 - - - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 index 6aaff6d066..efee25e23f 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 +++ b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 @@ -1,13 +1,12 @@ bench_ref_from_bytes_with_elems_dynamic_size: - movabs rax, 9223372036854775805 - cmp rdx, rax - seta cl + movabs rcx, 4611686018427387901 + cmp rdx, rcx + ja .LBB5_2 mov rax, rdi - or dil, cl - test dil, 1 - jne .LBB5_2 lea rcx, [2*rdx + 4] - cmp rsi, rcx + and edi, 1 + xor rcx, rsi + or rcx, rdi je .LBB5_3 .LBB5_2: xor eax, eax diff --git a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca index 4a67974f1a..3235e68f2b 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca +++ b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1300 -Total Cycles: 439 -Total uOps: 1400 +Instructions: 1200 +Total Cycles: 371 +Total uOps: 1200 Dispatch Width: 4 -uOps Per Cycle: 3.19 -IPC: 2.96 -Block RThroughput: 3.5 +uOps Per Cycle: 3.23 +IPC: 3.23 +Block RThroughput: 3.0 Instruction Info: @@ -18,15 +18,14 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 9223372036854775805 - 1 1 0.33 cmp rdx, rax - 2 2 1.00 seta cl + 1 1 0.33 movabs rcx, 4611686018427387901 + 1 1 0.33 cmp rdx, rcx + 1 1 1.00 ja .LBB5_2 1 1 0.33 mov rax, rdi - 1 1 0.33 or dil, cl - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_2 1 1 0.50 lea rcx, [2*rdx + 4] - 1 1 0.33 cmp rsi, rcx + 1 1 0.33 and edi, 1 + 1 1 0.33 xor rcx, rsi + 1 1 0.33 or rcx, rdi 1 1 1.00 je .LBB5_3 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rsi @@ -46,20 +45,19 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 4.32 4.33 - 4.35 - - + - - 3.66 3.66 - 3.68 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 0.99 - 0.01 - - movabs rax, 9223372036854775805 - - - 0.33 0.67 - - - - cmp rdx, rax - - - 1.98 - - 0.02 - - seta cl - - - 0.01 0.99 - - - - mov rax, rdi - - - 1.00 - - - - - or dil, cl - - - 0.99 0.01 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_2 - - - - 1.00 - - - - lea rcx, [2*rdx + 4] - - - 0.01 - - 0.99 - - cmp rsi, rcx + - - - 0.99 - 0.01 - - movabs rcx, 4611686018427387901 + - - 0.35 0.33 - 0.32 - - cmp rdx, rcx + - - - - - 1.00 - - ja .LBB5_2 + - - 0.63 0.37 - - - - mov rax, rdi + - - 0.35 0.65 - - - - lea rcx, [2*rdx + 4] + - - 0.34 0.65 - 0.01 - - and edi, 1 + - - 0.99 0.01 - - - - xor rcx, rsi + - - 1.00 - - - - - or rcx, rdi - - - - - 1.00 - - je .LBB5_3 - - - - - - - - xor eax, eax - - - - 0.67 - 0.33 - - mov rdx, rsi + - - - 0.66 - 0.34 - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_dynamic_padding.x86-64 b/benches/ref_from_prefix_dynamic_padding.x86-64 index a58592a245..01228fbcbc 100644 --- a/benches/ref_from_prefix_dynamic_padding.x86-64 +++ b/benches/ref_from_prefix_dynamic_padding.x86-64 @@ -1,22 +1,24 @@ bench_ref_from_prefix_dynamic_padding: - xor edx, edx - mov eax, 0 - test dil, 3 - je .LBB5_1 - ret -.LBB5_1: movabs rax, 9223372036854775804 - and rsi, rax - cmp rsi, 9 - jae .LBB5_3 + and rax, rsi + cmp rax, 9 + jae .LBB5_2 mov edx, 1 - xor eax, eax + xor ecx, ecx + mov rax, rcx + ret +.LBB5_2: + xor edx, edx + mov ecx, 0 + test dil, 3 + je .LBB5_3 + mov rax, rcx ret .LBB5_3: - add rsi, -9 + add rax, -9 movabs rcx, -6148914691236517205 - mov rax, rsi mul rcx shr rdx - mov rax, rdi + mov rcx, rdi + mov rax, rcx ret diff --git a/benches/ref_from_prefix_dynamic_padding.x86-64.mca b/benches/ref_from_prefix_dynamic_padding.x86-64.mca index 62ea4babaf..6e50e96210 100644 --- a/benches/ref_from_prefix_dynamic_padding.x86-64.mca +++ b/benches/ref_from_prefix_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1900 -Total Cycles: 608 -Total uOps: 2000 +Instructions: 2100 +Total Cycles: 673 +Total uOps: 2200 Dispatch Width: 4 -uOps Per Cycle: 3.29 -IPC: 3.13 -Block RThroughput: 5.0 +uOps Per Cycle: 3.27 +IPC: 3.12 +Block RThroughput: 5.5 Instruction Info: @@ -18,24 +18,26 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_1 - 1 1 1.00 U ret 1 1 0.33 movabs rax, 9223372036854775804 - 1 1 0.33 and rsi, rax - 1 1 0.33 cmp rsi, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 0.33 and rax, rsi + 1 1 0.33 cmp rax, 9 + 1 1 1.00 jae .LBB5_2 1 1 0.33 mov edx, 1 - 1 0 0.25 xor eax, eax + 1 0 0.25 xor ecx, ecx + 1 1 0.33 mov rax, rcx + 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov ecx, 0 + 1 1 0.33 test dil, 3 + 1 1 1.00 je .LBB5_3 + 1 1 0.33 mov rax, rcx 1 1 1.00 U ret - 1 1 0.33 add rsi, -9 + 1 1 0.33 add rax, -9 1 1 0.33 movabs rcx, -6148914691236517205 - 1 1 0.33 mov rax, rsi 2 4 1.00 mul rcx 1 1 0.50 shr rdx - 1 1 0.33 mov rax, rdi + 1 1 0.33 mov rcx, rdi + 1 1 0.33 mov rax, rcx 1 1 1.00 U ret @@ -52,26 +54,28 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.00 6.00 - 6.00 - - + - - 6.67 6.66 - 6.67 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - - - - - xor edx, edx - - - 0.01 0.98 - 0.01 - - mov eax, 0 - - - 0.98 0.01 - 0.01 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_1 + - - 0.66 0.33 - 0.01 - - movabs rax, 9223372036854775804 + - - 0.33 0.67 - - - - and rax, rsi + - - - 1.00 - - - - cmp rax, 9 + - - - - - 1.00 - - jae .LBB5_2 + - - 0.67 - - 0.33 - - mov edx, 1 + - - - - - - - - xor ecx, ecx + - - 0.66 - - 0.34 - - mov rax, rcx - - - - - 1.00 - - ret - - - 0.01 0.99 - - - - movabs rax, 9223372036854775804 - - - - 1.00 - - - - and rsi, rax - - - - 1.00 - - - - cmp rsi, 9 - - - - - - 1.00 - - jae .LBB5_3 - - - 1.00 - - - - - mov edx, 1 - - - - - - - - - xor eax, eax + - - - - - - - - xor edx, edx + - - 0.67 0.33 - - - - mov ecx, 0 + - - - 0.67 - 0.33 - - test dil, 3 + - - - - - 1.00 - - je .LBB5_3 + - - 0.34 0.66 - - - - mov rax, rcx - - - - - 1.00 - - ret - - - 0.02 0.02 - 0.96 - - add rsi, -9 - - - 0.99 0.01 - - - - movabs rcx, -6148914691236517205 - - - 0.01 0.99 - - - - mov rax, rsi + - - - 1.00 - - - - add rax, -9 + - - 1.00 - - - - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - 1.00 - - - - - shr rdx - - - 0.98 - - 0.02 - - mov rax, rdi + - - 0.33 0.34 - 0.33 - - mov rcx, rdi + - - 0.01 0.66 - 0.33 - - mov rax, rcx - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_dynamic_size.x86-64 b/benches/ref_from_prefix_dynamic_size.x86-64 index fe6332c910..e402765c33 100644 --- a/benches/ref_from_prefix_dynamic_size.x86-64 +++ b/benches/ref_from_prefix_dynamic_size.x86-64 @@ -1,14 +1,14 @@ bench_ref_from_prefix_dynamic_size: - xor edx, edx - mov eax, 0 - test dil, 1 - jne .LBB5_4 cmp rsi, 4 - jae .LBB5_3 + jae .LBB5_2 mov edx, 1 xor eax, eax ret -.LBB5_3: +.LBB5_2: + xor edx, edx + mov eax, 0 + test dil, 1 + jne .LBB5_4 add rsi, -4 shr rsi mov rdx, rsi diff --git a/benches/ref_from_prefix_dynamic_size.x86-64.mca b/benches/ref_from_prefix_dynamic_size.x86-64.mca index 3900a59461..ce71749bc4 100644 --- a/benches/ref_from_prefix_dynamic_size.x86-64.mca +++ b/benches/ref_from_prefix_dynamic_size.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 Instructions: 1400 -Total Cycles: 405 +Total Cycles: 404 Total uOps: 1400 Dispatch Width: 4 -uOps Per Cycle: 3.46 -IPC: 3.46 +uOps Per Cycle: 3.47 +IPC: 3.47 Block RThroughput: 4.0 @@ -18,15 +18,15 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_4 1 1 0.33 cmp rsi, 4 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB5_2 1 1 0.33 mov edx, 1 1 0 0.25 xor eax, eax 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test dil, 1 + 1 1 1.00 jne .LBB5_4 1 1 0.33 add rsi, -4 1 1 0.50 shr rsi 1 1 0.33 mov rdx, rsi @@ -47,21 +47,21 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 3.99 3.99 - 4.02 - - + - - 3.99 4.00 - 4.01 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - - - - - xor edx, edx - - - 0.01 0.98 - 0.01 - - mov eax, 0 - - - 0.98 0.02 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.02 0.98 - - - - cmp rsi, 4 - - - - - - 1.00 - - jae .LBB5_3 - - - 0.98 0.01 - 0.01 - - mov edx, 1 + - - 0.99 - - 0.01 - - cmp rsi, 4 + - - - - - 1.00 - - jae .LBB5_2 + - - - 1.00 - - - - mov edx, 1 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.01 0.99 - - - - add rsi, -4 + - - - - - - - - xor edx, edx + - - 1.00 - - - - - mov eax, 0 + - - - 1.00 - - - - test dil, 1 + - - - - - 1.00 - - jne .LBB5_4 + - - 1.00 - - - - - add rsi, -4 - - 1.00 - - - - - shr rsi - - - 1.00 - - - - mov rdx, rsi - - - 0.99 0.01 - - - - mov rax, rdi + - - - 1.00 - - - - mov rax, rdi - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_static_size.x86-64 b/benches/ref_from_prefix_static_size.x86-64 index 7c1bf45bb6..0328ae9719 100644 --- a/benches/ref_from_prefix_static_size.x86-64 +++ b/benches/ref_from_prefix_static_size.x86-64 @@ -1,8 +1,7 @@ bench_ref_from_prefix_static_size: xor eax, eax - cmp rsi, 6 - mov rcx, rdi - cmovb rcx, rax test dil, 1 - cmove rax, rcx + cmovne rdi, rax + cmp rsi, 6 + cmovae rax, rdi ret diff --git a/benches/ref_from_prefix_static_size.x86-64.mca b/benches/ref_from_prefix_static_size.x86-64.mca index 9691b88fe0..d4355bc6e8 100644 --- a/benches/ref_from_prefix_static_size.x86-64.mca +++ b/benches/ref_from_prefix_static_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 700 -Total Cycles: 274 -Total uOps: 900 +Instructions: 600 +Total Cycles: 305 +Total uOps: 800 Dispatch Width: 4 -uOps Per Cycle: 3.28 -IPC: 2.55 -Block RThroughput: 2.3 +uOps Per Cycle: 2.62 +IPC: 1.97 +Block RThroughput: 2.0 Instruction Info: @@ -19,11 +19,10 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 0 0.25 xor eax, eax - 1 1 0.33 cmp rsi, 6 - 1 1 0.33 mov rcx, rdi - 2 2 0.67 cmovb rcx, rax 1 1 0.33 test dil, 1 - 2 2 0.67 cmove rax, rcx + 2 2 0.67 cmovne rdi, rax + 1 1 0.33 cmp rsi, 6 + 2 2 0.67 cmovae rax, rdi 1 1 1.00 U ret @@ -40,14 +39,13 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 2.66 2.67 - 2.67 - - + - - 1.95 2.28 - 2.77 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - - - - xor eax, eax - - - - 0.01 - 0.99 - - cmp rsi, 6 - - - 0.01 0.67 - 0.32 - - mov rcx, rdi - - - 1.00 0.99 - 0.01 - - cmovb rcx, rax - - - 0.66 0.01 - 0.33 - - test dil, 1 - - - 0.99 0.99 - 0.02 - - cmove rax, rcx + - - 0.05 0.06 - 0.89 - - test dil, 1 + - - 0.95 0.94 - 0.11 - - cmovne rdi, rax + - - - 0.34 - 0.66 - - cmp rsi, 6 + - - 0.95 0.94 - 0.11 - - cmovae rax, rdi - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 index c03811bdbe..2552d72393 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 +++ b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 @@ -1,35 +1,21 @@ bench_ref_from_prefix_with_elems_dynamic_padding: mov rcx, rdx - mov edx, 3 - mov rax, rcx - mul rdx mov edx, 1 - jo .LBB5_5 - cmp rax, -10 - ja .LBB5_5 - lea r8, [rax + 9] - not eax - and eax, 3 - add rax, r8 - jae .LBB5_3 -.LBB5_5: - xor r8d, r8d - mov rax, r8 - ret + movabs rax, 3074457345618258598 + cmp rcx, rax + ja .LBB5_3 + lea rax, [rcx + 2*rcx] + or rax, 3 + add rax, 9 + cmp rax, rsi + jbe .LBB5_4 .LBB5_3: - xor edx, edx - mov r8d, 0 - test dil, 3 - je .LBB5_4 - mov rax, r8 + xor eax, eax ret .LBB5_4: - xor edx, edx - cmp rax, rsi - mov eax, 1 - cmova rcx, rax - cmova rdi, rdx + xor eax, eax + test dil, 3 + cmovne rcx, rax + cmove rax, rdi mov rdx, rcx - mov r8, rdi - mov rax, r8 ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca index 6a3968fe9e..d69beeedc4 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 3100 -Total Cycles: 1110 -Total uOps: 3600 +Instructions: 1800 +Total Cycles: 605 +Total uOps: 2000 Dispatch Width: 4 -uOps Per Cycle: 3.24 -IPC: 2.79 -Block RThroughput: 9.0 +uOps Per Cycle: 3.31 +IPC: 2.98 +Block RThroughput: 5.0 Instruction Info: @@ -19,35 +19,22 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 mov rcx, rdx - 1 1 0.33 mov edx, 3 - 1 1 0.33 mov rax, rcx - 2 4 1.00 mul rdx 1 1 0.33 mov edx, 1 - 1 1 1.00 jo .LBB5_5 - 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_5 - 1 1 0.50 lea r8, [rax + 9] - 1 1 0.33 not eax - 1 1 0.33 and eax, 3 - 1 1 0.33 add rax, r8 - 1 1 1.00 jae .LBB5_3 - 1 0 0.25 xor r8d, r8d - 1 1 0.33 mov rax, r8 + 1 1 0.33 movabs rax, 3074457345618258598 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_3 + 1 1 0.50 lea rax, [rcx + 2*rcx] + 1 1 0.33 or rax, 3 + 1 1 0.33 add rax, 9 + 1 1 0.33 cmp rax, rsi + 1 1 1.00 jbe .LBB5_4 + 1 0 0.25 xor eax, eax 1 1 1.00 U ret - 1 0 0.25 xor edx, edx - 1 1 0.33 mov r8d, 0 + 1 0 0.25 xor eax, eax 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_4 - 1 1 0.33 mov rax, r8 - 1 1 1.00 U ret - 1 0 0.25 xor edx, edx - 1 1 0.33 cmp rax, rsi - 1 1 0.33 mov eax, 1 - 3 3 1.00 cmova rcx, rax - 3 3 1.00 cmova rdi, rdx + 2 2 0.67 cmovne rcx, rax + 2 2 0.67 cmove rax, rdi 1 1 0.33 mov rdx, rcx - 1 1 0.33 mov r8, rdi - 1 1 0.33 mov rax, r8 1 1 1.00 U ret @@ -64,38 +51,25 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 11.01 10.98 - 11.01 - - + - - 5.99 5.99 - 6.02 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.48 0.50 - 0.02 - - mov rcx, rdx - - - 0.02 0.98 - - - - mov edx, 3 - - - 0.51 0.48 - 0.01 - - mov rax, rcx - - - 1.00 1.00 - - - - mul rdx - - - 0.49 0.50 - 0.01 - - mov edx, 1 - - - - - - 1.00 - - jo .LBB5_5 - - - 0.98 0.02 - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_5 - - - 0.02 0.98 - - - - lea r8, [rax + 9] - - - 0.98 0.02 - - - - not eax - - - 0.99 0.01 - - - - and eax, 3 - - - 0.98 0.01 - 0.01 - - add rax, r8 - - - - - - 1.00 - - jae .LBB5_3 - - - - - - - - - xor r8d, r8d - - - 0.01 0.98 - 0.01 - - mov rax, r8 - - - - - - 1.00 - - ret - - - - - - - - - xor edx, edx - - - 0.48 0.52 - - - - mov r8d, 0 - - - 0.02 0.97 - 0.01 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_4 - - - 0.49 0.50 - 0.01 - - mov rax, r8 + - - 0.98 0.01 - 0.01 - - mov rcx, rdx + - - 0.01 0.99 - - - - mov edx, 1 + - - 0.02 0.98 - - - - movabs rax, 3074457345618258598 + - - 0.98 0.01 - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_3 + - - 0.01 0.99 - - - - lea rax, [rcx + 2*rcx] + - - 0.99 0.01 - - - - or rax, 3 + - - 0.99 0.01 - - - - add rax, 9 + - - 0.99 0.01 - - - - cmp rax, rsi + - - - - - 1.00 - - jbe .LBB5_4 + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - - - - - - - xor edx, edx - - - 0.51 0.49 - - - - cmp rax, rsi - - - - 1.00 - - - - mov eax, 1 - - - 1.04 0.97 - 0.99 - - cmova rcx, rax - - - 0.98 0.53 - 1.49 - - cmova rdi, rdx - - - 0.50 0.50 - - - - mov rdx, rcx - - - 0.51 0.01 - 0.48 - - mov r8, rdi - - - 0.02 0.01 - 0.97 - - mov rax, r8 + - - - - - - - - xor eax, eax + - - 0.01 0.98 - 0.01 - - test dil, 3 + - - 0.01 0.99 - 1.00 - - cmovne rcx, rax + - - 0.01 1.00 - 0.99 - - cmove rax, rdi + - - 0.99 0.01 - - - - mov rdx, rcx - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 index 05818b0633..1d6a8e334b 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 +++ b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 @@ -1,22 +1,19 @@ bench_ref_from_prefix_with_elems_dynamic_size: - movabs rax, 9223372036854775805 - cmp rdx, rax - ja .LBB5_1 mov rcx, rdx - xor edx, edx - mov eax, 0 - test dil, 1 - jne .LBB5_4 + mov edx, 1 + movabs rax, 4611686018427387901 + cmp rcx, rax + ja .LBB5_3 lea rax, [2*rcx + 4] - xor r8d, r8d cmp rax, rsi - mov edx, 1 - cmovbe rdx, rcx - cmova rdi, r8 - mov rax, rdi -.LBB5_4: + jbe .LBB5_4 +.LBB5_3: + xor eax, eax ret -.LBB5_1: - mov edx, 1 +.LBB5_4: xor eax, eax + test dil, 1 + cmovne rcx, rax + cmove rax, rdi + mov rdx, rcx ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca index 94c718e22c..da9883ddde 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca +++ b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1900 -Total Cycles: 672 -Total uOps: 2300 +Instructions: 1600 +Total Cycles: 603 +Total uOps: 1800 Dispatch Width: 4 -uOps Per Cycle: 3.42 -IPC: 2.83 -Block RThroughput: 5.8 +uOps Per Cycle: 2.99 +IPC: 2.65 +Block RThroughput: 4.5 Instruction Info: @@ -18,24 +18,21 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 9223372036854775805 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 1 1 0.33 mov rcx, rdx - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_4 + 1 1 0.33 mov edx, 1 + 1 1 0.33 movabs rax, 4611686018427387901 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_3 1 1 0.50 lea rax, [2*rcx + 4] - 1 0 0.25 xor r8d, r8d 1 1 0.33 cmp rax, rsi - 1 1 0.33 mov edx, 1 - 3 3 1.00 cmovbe rdx, rcx - 3 3 1.00 cmova rdi, r8 - 1 1 0.33 mov rax, rdi + 1 1 1.00 jbe .LBB5_4 + 1 0 0.25 xor eax, eax 1 1 1.00 U ret - 1 1 0.33 mov edx, 1 1 0 0.25 xor eax, eax + 1 1 0.33 test dil, 1 + 2 2 0.67 cmovne rcx, rax + 2 2 0.67 cmove rax, rdi + 1 1 0.33 mov rdx, rcx 1 1 1.00 U ret @@ -52,26 +49,23 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.66 6.66 - 6.68 - - + - - 5.33 5.33 - 5.34 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 0.99 - 0.01 - - movabs rax, 9223372036854775805 - - - 0.37 0.63 - - - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - 0.63 0.37 - - - - mov rcx, rdx - - - - - - - - - xor edx, edx - - - 0.01 0.98 - 0.01 - - mov eax, 0 - - - 0.98 0.02 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.01 0.99 - - - - lea rax, [2*rcx + 4] - - - - - - - - - xor r8d, r8d - - - 1.00 - - - - - cmp rax, rsi - - - - 0.67 - 0.33 - - mov edx, 1 - - - 0.73 0.98 - 1.29 - - cmovbe rdx, rcx - - - 1.60 0.36 - 1.04 - - cmova rdi, r8 - - - 0.99 0.01 - - - - mov rax, rdi + - - 0.48 0.45 - 0.07 - - mov rcx, rdx + - - 0.45 0.49 - 0.06 - - mov edx, 1 + - - 0.18 0.25 - 0.57 - - movabs rax, 4611686018427387901 + - - 0.24 0.51 - 0.25 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_3 + - - 0.52 0.48 - - - - lea rax, [2*rcx + 4] + - - 0.47 0.53 - - - - cmp rax, rsi + - - - - - 1.00 - - jbe .LBB5_4 + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.34 0.66 - - - - mov edx, 1 - - - - - - - - xor eax, eax + - - 0.47 0.50 - 0.03 - - test dil, 1 + - - 1.00 1.00 - - - - cmovne rcx, rax + - - 0.99 0.66 - 0.35 - - cmove rax, rdi + - - 0.53 0.46 - 0.01 - - mov rdx, rcx - - - - - 1.00 - - ret diff --git a/benches/ref_from_suffix_dynamic_padding.x86-64 b/benches/ref_from_suffix_dynamic_padding.x86-64 index 3e05f6023f..9da52dcae0 100644 --- a/benches/ref_from_suffix_dynamic_padding.x86-64 +++ b/benches/ref_from_suffix_dynamic_padding.x86-64 @@ -1,11 +1,11 @@ bench_ref_from_suffix_dynamic_padding: - lea eax, [rsi + rdi] - test al, 3 - jne .LBB5_1 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jae .LBB5_3 + jb .LBB5_1 + lea ecx, [rsi + rdi] + test cl, 3 + je .LBB5_3 .LBB5_1: xor eax, eax ret diff --git a/benches/ref_from_suffix_dynamic_padding.x86-64.mca b/benches/ref_from_suffix_dynamic_padding.x86-64.mca index 73599d5b6a..929873f5e7 100644 --- a/benches/ref_from_suffix_dynamic_padding.x86-64.mca +++ b/benches/ref_from_suffix_dynamic_padding.x86-64.mca @@ -1,10 +1,10 @@ Iterations: 100 Instructions: 2000 -Total Cycles: 682 +Total Cycles: 683 Total uOps: 2100 Dispatch Width: 4 -uOps Per Cycle: 3.08 +uOps Per Cycle: 3.07 IPC: 2.93 Block RThroughput: 5.3 @@ -18,13 +18,13 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.50 lea eax, [rsi + rdi] - 1 1 0.33 test al, 3 - 1 1 1.00 jne .LBB5_1 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jb .LBB5_1 + 1 1 0.50 lea ecx, [rsi + rdi] + 1 1 0.33 test cl, 3 + 1 1 1.00 je .LBB5_3 1 0 0.25 xor eax, eax 1 1 1.00 U ret 1 1 0.33 add rax, -9 @@ -53,27 +53,27 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.65 6.67 - 6.68 - - + - - 6.67 6.65 - 6.68 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.90 0.10 - - - - lea eax, [rsi + rdi] - - - 0.93 - - 0.07 - - test al, 3 - - - - - - 1.00 - - jne .LBB5_1 - - - 0.51 0.47 - 0.02 - - movabs rax, 9223372036854775804 - - - - - - 1.00 - - and rax, rsi - - - - 0.09 - 0.91 - - cmp rax, 9 - - - - - - 1.00 - - jae .LBB5_3 + - - 0.05 0.32 - 0.63 - - movabs rax, 9223372036854775804 + - - 0.63 0.03 - 0.34 - - and rax, rsi + - - 0.94 0.03 - 0.03 - - cmp rax, 9 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.05 0.95 - - - - lea ecx, [rsi + rdi] + - - 0.03 0.97 - - - - test cl, 3 + - - - - - 1.00 - - je .LBB5_3 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.43 0.47 - 0.10 - - add rax, -9 - - - 0.42 0.39 - 0.19 - - movabs rcx, -6148914691236517205 + - - 0.35 0.35 - 0.30 - - add rax, -9 + - - 0.95 0.04 - 0.01 - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - 0.69 - - 0.31 - - shr rdx - - - 0.54 0.46 - - - - lea rax, [rdx + 2*rdx] - - - 0.07 0.91 - 0.02 - - sub rsi, rax - - - 0.91 0.05 - 0.04 - - or rax, -4 - - - 0.08 0.90 - 0.02 - - add rsi, rdi - - - 0.09 0.91 - - - - add rax, rsi - - - 0.08 0.92 - - - - add rax, -8 + - - 0.65 0.35 - - - - lea rax, [rdx + 2*rdx] + - - 0.30 0.35 - 0.35 - - sub rsi, rax + - - 0.66 0.02 - 0.32 - - or rax, -4 + - - 0.02 0.64 - 0.34 - - add rsi, rdi + - - 0.33 0.65 - 0.02 - - add rax, rsi + - - 0.02 0.95 - 0.03 - - add rax, -8 - - - - - 1.00 - - ret diff --git a/benches/ref_from_suffix_dynamic_size.x86-64 b/benches/ref_from_suffix_dynamic_size.x86-64 index bd4ace8983..13fdcf8624 100644 --- a/benches/ref_from_suffix_dynamic_size.x86-64 +++ b/benches/ref_from_suffix_dynamic_size.x86-64 @@ -1,13 +1,15 @@ bench_ref_from_suffix_dynamic_size: - mov rdx, rsi + cmp rsi, 4 + jb .LBB5_1 + mov rax, rdi lea ecx, [rsi + rdi] - mov eax, edx - and eax, 1 - add rax, rdi - xor esi, esi - sub rdx, 4 - cmovb rax, rsi - shr rdx test cl, 1 - cmovne rax, rsi + jne .LBB5_1 + lea rdx, [rsi - 4] + shr rdx + and esi, 1 + add rax, rsi + ret +.LBB5_1: + xor eax, eax ret diff --git a/benches/ref_from_suffix_dynamic_size.x86-64.mca b/benches/ref_from_suffix_dynamic_size.x86-64.mca index 1398bcfe27..949b83310c 100644 --- a/benches/ref_from_suffix_dynamic_size.x86-64.mca +++ b/benches/ref_from_suffix_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1200 -Total Cycles: 439 -Total uOps: 1400 +Instructions: 1300 +Total Cycles: 405 +Total uOps: 1300 Dispatch Width: 4 -uOps Per Cycle: 3.19 -IPC: 2.73 -Block RThroughput: 3.5 +uOps Per Cycle: 3.21 +IPC: 3.21 +Block RThroughput: 4.0 Instruction Info: @@ -18,17 +18,18 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 mov rdx, rsi + 1 1 0.33 cmp rsi, 4 + 1 1 1.00 jb .LBB5_1 + 1 1 0.33 mov rax, rdi 1 1 0.50 lea ecx, [rsi + rdi] - 1 1 0.33 mov eax, edx - 1 1 0.33 and eax, 1 - 1 1 0.33 add rax, rdi - 1 0 0.25 xor esi, esi - 1 1 0.33 sub rdx, 4 - 2 2 0.67 cmovb rax, rsi - 1 1 0.50 shr rdx 1 1 0.33 test cl, 1 - 2 2 0.67 cmovne rax, rsi + 1 1 1.00 jne .LBB5_1 + 1 1 0.50 lea rdx, [rsi - 4] + 1 1 0.50 shr rdx + 1 1 0.33 and esi, 1 + 1 1 0.33 add rax, rsi + 1 1 1.00 U ret + 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -45,19 +46,20 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 4.33 4.33 - 4.34 - - + - - 3.99 3.99 - 4.02 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.02 0.32 - 0.66 - - mov rdx, rsi - - - 0.32 0.68 - - - - lea ecx, [rsi + rdi] - - - 0.66 - - 0.34 - - mov eax, edx - - - 0.02 0.33 - 0.65 - - and eax, 1 - - - - 0.99 - 0.01 - - add rax, rdi - - - - - - - - - xor esi, esi - - - 0.65 - - 0.35 - - sub rdx, 4 - - - 1.00 1.00 - - - - cmovb rax, rsi - - - 0.66 - - 0.34 - - shr rdx - - - - 0.01 - 0.99 - - test cl, 1 - - - 1.00 1.00 - - - - cmovne rax, rsi + - - 0.02 0.97 - 0.01 - - cmp rsi, 4 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.97 0.03 - - - - mov rax, rdi + - - 0.01 0.99 - - - - lea ecx, [rsi + rdi] + - - 0.98 0.02 - - - - test cl, 1 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.97 0.03 - - - - lea rdx, [rsi - 4] + - - 1.00 - - - - - shr rdx + - - 0.02 0.98 - - - - and esi, 1 + - - 0.02 0.97 - 0.01 - - add rax, rsi + - - - - - 1.00 - - ret + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/ref_from_suffix_static_size.x86-64 b/benches/ref_from_suffix_static_size.x86-64 index 9e90b9e254..4f003e061d 100644 --- a/benches/ref_from_suffix_static_size.x86-64 +++ b/benches/ref_from_suffix_static_size.x86-64 @@ -1,13 +1,12 @@ bench_ref_from_suffix_static_size: - lea eax, [rsi + rdi] cmp rsi, 6 - setb cl - or cl, al - test cl, 1 - je .LBB5_2 - xor eax, eax - ret -.LBB5_2: + jb .LBB5_1 + lea eax, [rsi + rdi] + test al, 1 + jne .LBB5_1 lea rax, [rdi + rsi] add rax, -6 ret +.LBB5_1: + xor eax, eax + ret diff --git a/benches/ref_from_suffix_static_size.x86-64.mca b/benches/ref_from_suffix_static_size.x86-64.mca index ef5892647b..70da98d6db 100644 --- a/benches/ref_from_suffix_static_size.x86-64.mca +++ b/benches/ref_from_suffix_static_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1100 -Total Cycles: 338 -Total uOps: 1100 +Instructions: 1000 +Total Cycles: 404 +Total uOps: 1000 Dispatch Width: 4 -uOps Per Cycle: 3.25 -IPC: 3.25 -Block RThroughput: 3.0 +uOps Per Cycle: 2.48 +IPC: 2.48 +Block RThroughput: 4.0 Instruction Info: @@ -18,17 +18,16 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.50 lea eax, [rsi + rdi] 1 1 0.33 cmp rsi, 6 - 1 1 0.50 setb cl - 1 1 0.33 or cl, al - 1 1 0.33 test cl, 1 - 1 1 1.00 je .LBB5_2 - 1 0 0.25 xor eax, eax - 1 1 1.00 U ret + 1 1 1.00 jb .LBB5_1 + 1 1 0.50 lea eax, [rsi + rdi] + 1 1 0.33 test al, 1 + 1 1 1.00 jne .LBB5_1 1 1 0.50 lea rax, [rdi + rsi] 1 1 0.33 add rax, -6 1 1 1.00 U ret + 1 0 0.25 xor eax, eax + 1 1 1.00 U ret Resources: @@ -44,18 +43,17 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 3.32 3.33 - 3.35 - - + - - 2.49 2.50 - 4.01 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.97 0.03 - - - - lea eax, [rsi + rdi] - - - 0.33 0.32 - 0.35 - - cmp rsi, 6 - - - 1.00 - - - - - setb cl - - - - 1.00 - - - - or cl, al - - - - 1.00 - - - - test cl, 1 - - - - - - 1.00 - - je .LBB5_2 - - - - - - - - - xor eax, eax - - - - - - 1.00 - - ret + - - 0.49 0.50 - 0.01 - - cmp rsi, 6 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.50 0.50 - - - - lea eax, [rsi + rdi] + - - 0.66 0.34 - - - - test al, 1 + - - - - - 1.00 - - jne .LBB5_1 - - 0.34 0.66 - - - - lea rax, [rdi + rsi] - - - 0.68 0.32 - - - - add rax, -6 + - - 0.50 0.50 - - - - add rax, -6 + - - - - - 1.00 - - ret + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 index b3e239cb75..e1844f6b1e 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 +++ b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 @@ -1,34 +1,26 @@ bench_ref_from_suffix_with_elems_dynamic_padding: mov rcx, rdx - mov edx, 3 - mov rax, rcx - mul rdx - jo .LBB5_1 - cmp rax, -10 - ja .LBB5_1 - lea rdx, [rax + 9] - not eax - and eax, 3 - add rax, rdx - jae .LBB5_4 -.LBB5_1: - xor r8d, r8d mov edx, 1 - mov rax, r8 - ret + movabs rax, 3074457345618258598 + cmp rcx, rax + ja .LBB5_4 + lea rax, [rcx + 2*rcx] + or rax, 3 + add rax, 9 + mov r8, rsi + sub r8, rax + jae .LBB5_2 .LBB5_4: - lea r9d, [rsi + rdi] - xor edx, edx - mov r8d, 0 - test r9b, 3 - je .LBB5_5 - mov rax, r8 - ret + xor eax, eax .LBB5_5: - sub rsi, rax - jb .LBB5_1 - add rdi, rsi + ret +.LBB5_2: + add esi, edi + xor edx, edx + mov eax, 0 + test sil, 3 + jne .LBB5_5 + add rdi, r8 mov rdx, rcx - mov r8, rdi - mov rax, r8 + mov rax, rdi ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca index c7c3c7ec2b..6cde05d596 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 3000 -Total Cycles: 973 -Total uOps: 3100 +Instructions: 2200 +Total Cycles: 671 +Total uOps: 2200 Dispatch Width: 4 -uOps Per Cycle: 3.19 -IPC: 3.08 -Block RThroughput: 8.0 +uOps Per Cycle: 3.28 +IPC: 3.28 +Block RThroughput: 5.5 Instruction Info: @@ -19,34 +19,26 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 mov rcx, rdx - 1 1 0.33 mov edx, 3 - 1 1 0.33 mov rax, rcx - 2 4 1.00 mul rdx - 1 1 1.00 jo .LBB5_1 - 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_1 - 1 1 0.50 lea rdx, [rax + 9] - 1 1 0.33 not eax - 1 1 0.33 and eax, 3 - 1 1 0.33 add rax, rdx - 1 1 1.00 jae .LBB5_4 - 1 0 0.25 xor r8d, r8d 1 1 0.33 mov edx, 1 - 1 1 0.33 mov rax, r8 + 1 1 0.33 movabs rax, 3074457345618258598 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_4 + 1 1 0.50 lea rax, [rcx + 2*rcx] + 1 1 0.33 or rax, 3 + 1 1 0.33 add rax, 9 + 1 1 0.33 mov r8, rsi + 1 1 0.33 sub r8, rax + 1 1 1.00 jae .LBB5_2 + 1 0 0.25 xor eax, eax 1 1 1.00 U ret - 1 1 0.50 lea r9d, [rsi + rdi] + 1 1 0.33 add esi, edi 1 0 0.25 xor edx, edx - 1 1 0.33 mov r8d, 0 - 1 1 0.33 test r9b, 3 - 1 1 1.00 je .LBB5_5 - 1 1 0.33 mov rax, r8 - 1 1 1.00 U ret - 1 1 0.33 sub rsi, rax - 1 1 1.00 jb .LBB5_1 - 1 1 0.33 add rdi, rsi + 1 1 0.33 mov eax, 0 + 1 1 0.33 test sil, 3 + 1 1 1.00 jne .LBB5_5 + 1 1 0.33 add rdi, r8 1 1 0.33 mov rdx, rcx - 1 1 0.33 mov r8, rdi - 1 1 0.33 mov rax, r8 + 1 1 0.33 mov rax, rdi 1 1 1.00 U ret @@ -63,37 +55,29 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 9.66 9.66 - 9.68 - - + - - 6.66 6.66 - 6.68 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 0.99 - 0.01 - - mov rcx, rdx - - - 0.66 0.34 - - - - mov edx, 3 - - - 0.34 0.66 - - - - mov rax, rcx - - - 1.00 1.00 - - - - mul rdx - - - - - - 1.00 - - jo .LBB5_1 - - - 1.00 - - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_1 - - - - 1.00 - - - - lea rdx, [rax + 9] - - - 1.00 - - - - - not eax - - - 1.00 - - - - - and eax, 3 - - - 1.00 - - - - - add rax, rdx - - - - - - 1.00 - - jae .LBB5_4 - - - - - - - - - xor r8d, r8d - - - 0.33 0.33 - 0.34 - - mov edx, 1 - - - 0.33 - - 0.67 - - mov rax, r8 + - - 0.66 0.33 - 0.01 - - mov rcx, rdx + - - 0.33 0.67 - - - - mov edx, 1 + - - 0.67 0.33 - - - - movabs rax, 3074457345618258598 + - - 0.99 - - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_4 + - - - 1.00 - - - - lea rax, [rcx + 2*rcx] + - - 0.34 - - 0.66 - - or rax, 3 + - - 1.00 - - - - - add rax, 9 + - - - 0.34 - 0.66 - - mov r8, rsi + - - 1.00 - - - - - sub r8, rax + - - - - - 1.00 - - jae .LBB5_2 + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.33 0.67 - - - - lea r9d, [rsi + rdi] + - - - 1.00 - - - - add esi, edi - - - - - - - - xor edx, edx - - - 0.67 0.33 - - - - mov r8d, 0 - - - 0.33 0.34 - 0.33 - - test r9b, 3 - - - - - - 1.00 - - je .LBB5_5 - - - 0.66 0.01 - 0.33 - - mov rax, r8 - - - - - - 1.00 - - ret - - - 0.33 0.67 - - - - sub rsi, rax - - - - - - 1.00 - - jb .LBB5_1 - - - - 1.00 - - - - add rdi, rsi - - - 0.01 0.99 - - - - mov rdx, rcx - - - - 1.00 - - - - mov r8, rdi - - - 0.67 0.33 - - - - mov rax, r8 + - - - 0.99 - 0.01 - - mov eax, 0 + - - 0.33 0.34 - 0.33 - - test sil, 3 + - - - - - 1.00 - - jne .LBB5_5 + - - 0.67 0.33 - - - - add rdi, r8 + - - 0.33 0.67 - - - - mov rdx, rcx + - - 0.34 0.66 - - - - mov rax, rdi - - - - - 1.00 - - ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 index 5b401e7ca1..b9414b2d4c 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 +++ b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 @@ -1,23 +1,24 @@ bench_ref_from_suffix_with_elems_dynamic_size: - movabs rax, 9223372036854775805 - cmp rdx, rax - ja .LBB5_1 - lea r8d, [rsi + rdi] - xor ecx, ecx - mov eax, 0 - test r8b, 1 - jne .LBB5_5 - lea rax, [2*rdx + 4] - sub rsi, rax + mov rcx, rdx + mov edx, 1 + movabs rax, 4611686018427387901 + cmp rcx, rax + ja .LBB5_3 + lea rax, [2*rcx + 4] + mov r8, rsi + sub r8, rax jae .LBB5_4 -.LBB5_1: +.LBB5_3: xor eax, eax - mov edx, 1 ret .LBB5_4: - add rdi, rsi - mov rcx, rdx - mov rax, rdi -.LBB5_5: + add esi, edi + xor edx, edx + mov eax, 0 + test sil, 1 + jne .LBB5_6 + add rdi, r8 mov rdx, rcx + mov rax, rdi +.LBB5_6: ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca index eac400f3f4..46ce6b7d5e 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca +++ b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 -Instructions: 1900 -Total Cycles: 571 -Total uOps: 1900 +Instructions: 2000 +Total Cycles: 604 +Total uOps: 2000 Dispatch Width: 4 -uOps Per Cycle: 3.33 -IPC: 3.33 +uOps Per Cycle: 3.31 +IPC: 3.31 Block RThroughput: 5.0 @@ -18,24 +18,25 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 9223372036854775805 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 - 1 1 0.50 lea r8d, [rsi + rdi] - 1 0 0.25 xor ecx, ecx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test r8b, 1 - 1 1 1.00 jne .LBB5_5 - 1 1 0.50 lea rax, [2*rdx + 4] - 1 1 0.33 sub rsi, rax + 1 1 0.33 mov rcx, rdx + 1 1 0.33 mov edx, 1 + 1 1 0.33 movabs rax, 4611686018427387901 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_3 + 1 1 0.50 lea rax, [2*rcx + 4] + 1 1 0.33 mov r8, rsi + 1 1 0.33 sub r8, rax 1 1 1.00 jae .LBB5_4 1 0 0.25 xor eax, eax - 1 1 0.33 mov edx, 1 1 1 1.00 U ret - 1 1 0.33 add rdi, rsi - 1 1 0.33 mov rcx, rdx - 1 1 0.33 mov rax, rdi + 1 1 0.33 add esi, edi + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test sil, 1 + 1 1 1.00 jne .LBB5_6 + 1 1 0.33 add rdi, r8 1 1 0.33 mov rdx, rcx + 1 1 0.33 mov rax, rdi 1 1 1.00 U ret @@ -52,26 +53,27 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 5.66 5.66 - 5.68 - - + - - 5.99 6.00 - 6.01 - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.66 0.33 - 0.01 - - movabs rax, 9223372036854775805 - - - 0.01 0.99 - - - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - 0.99 0.01 - - - - lea r8d, [rsi + rdi] - - - - - - - - - xor ecx, ecx - - - 0.33 0.33 - 0.34 - - mov eax, 0 - - - 0.33 0.34 - 0.33 - - test r8b, 1 - - - - - - 1.00 - - jne .LBB5_5 - - - 0.34 0.66 - - - - lea rax, [2*rdx + 4] - - - - 1.00 - - - - sub rsi, rax + - - 0.04 0.95 - 0.01 - - mov rcx, rdx + - - - 1.00 - - - - mov edx, 1 + - - 1.00 - - - - - movabs rax, 4611686018427387901 + - - - - - 1.00 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_3 + - - - 1.00 - - - - lea rax, [2*rcx + 4] + - - 1.00 - - - - - mov r8, rsi + - - - 1.00 - - - - sub r8, rax - - - - - 1.00 - - jae .LBB5_4 - - - - - - - - xor eax, eax - - - 1.00 - - - - - mov edx, 1 - - - - - 1.00 - - ret - - - - 1.00 - - - - add rdi, rsi - - - 1.00 - - - - - mov rcx, rdx - - - 0.32 0.68 - - - - mov rax, rdi - - - 0.68 0.32 - - - - mov rdx, rcx + - - 1.00 - - - - - add esi, edi + - - - - - - - - xor edx, edx + - - - 1.00 - - - - mov eax, 0 + - - 1.00 - - - - - test sil, 1 + - - - - - 1.00 - - jne .LBB5_6 + - - - 1.00 - - - - add rdi, r8 + - - 1.00 - - - - - mov rdx, rcx + - - 0.95 0.05 - - - - mov rax, rdi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_bytes_dynamic_padding.x86-64 b/benches/try_ref_from_bytes_dynamic_padding.x86-64 index 217c5fc617..5b6bc5b189 100644 --- a/benches/try_ref_from_bytes_dynamic_padding.x86-64 +++ b/benches/try_ref_from_bytes_dynamic_padding.x86-64 @@ -1,24 +1,27 @@ bench_try_ref_from_bytes_dynamic_padding: - test dil, 3 - jne .LBB5_4 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jb .LBB5_4 + jb .LBB5_1 add rax, -9 movabs rcx, -6148914691236517205 mul rcx + test dil, 3 + jne .LBB5_1 shr rdx lea rax, [rdx + 2*rdx] or rax, 3 add rax, 9 cmp rsi, rax - jne .LBB5_4 - cmp word ptr [rdi], -16192 - je .LBB5_5 -.LBB5_4: - xor edi, edi + jne .LBB5_1 + movzx ecx, word ptr [rdi] + xor eax, eax + cmp ecx, 49344 + cmove rsi, rdx + cmove rax, rdi + mov rdx, rsi + ret +.LBB5_1: + xor eax, eax mov rdx, rsi -.LBB5_5: - mov rax, rdi ret diff --git a/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca b/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca index 95b993c7e0..ccc679bdcf 100644 --- a/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2100 -Total Cycles: 709 -Total uOps: 2300 +Instructions: 2500 +Total Cycles: 1008 +Total uOps: 2800 Dispatch Width: 4 -uOps Per Cycle: 3.24 -IPC: 2.96 -Block RThroughput: 5.8 +uOps Per Cycle: 2.78 +IPC: 2.48 +Block RThroughput: 7.0 Instruction Info: @@ -18,26 +18,30 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 test dil, 3 - 1 1 1.00 jne .LBB5_4 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jb .LBB5_4 + 1 1 1.00 jb .LBB5_1 1 1 0.33 add rax, -9 1 1 0.33 movabs rcx, -6148914691236517205 2 4 1.00 mul rcx + 1 1 0.33 test dil, 3 + 1 1 1.00 jne .LBB5_1 1 1 0.50 shr rdx 1 1 0.50 lea rax, [rdx + 2*rdx] 1 1 0.33 or rax, 3 1 1 0.33 add rax, 9 1 1 0.33 cmp rsi, rax - 1 1 1.00 jne .LBB5_4 - 2 6 0.50 * cmp word ptr [rdi], -16192 - 1 1 1.00 je .LBB5_5 - 1 0 0.25 xor edi, edi + 1 1 1.00 jne .LBB5_1 + 1 5 0.50 * movzx ecx, word ptr [rdi] + 1 0 0.25 xor eax, eax + 1 1 0.33 cmp ecx, 49344 + 2 2 0.67 cmove rsi, rdx + 2 2 0.67 cmove rax, rdi + 1 1 0.33 mov rdx, rsi + 1 1 1.00 U ret + 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rsi - 1 1 0.33 mov rax, rdi 1 1 1.00 U ret @@ -54,28 +58,32 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.98 6.99 - 7.03 0.50 0.50 + - - 8.02 8.01 - 8.97 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.48 0.51 - 0.01 - - test dil, 3 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.51 0.49 - - - - movabs rax, 9223372036854775804 - - - 0.01 0.99 - - - - and rax, rsi - - - 0.51 0.49 - - - - cmp rax, 9 - - - - - - 1.00 - - jb .LBB5_4 - - - 0.98 - - 0.02 - - add rax, -9 - - - 0.98 0.02 - - - - movabs rcx, -6148914691236517205 + - - 0.07 0.04 - 0.89 - - movabs rax, 9223372036854775804 + - - 0.97 0.01 - 0.02 - - and rax, rsi + - - - 0.99 - 0.01 - - cmp rax, 9 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.99 0.01 - - - - add rax, -9 + - - 0.02 0.06 - 0.92 - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - - 0.99 - - 0.01 - - shr rdx - - - - 1.00 - - - - lea rax, [rdx + 2*rdx] - - - - 0.51 - 0.49 - - or rax, 3 - - - 0.01 0.49 - 0.50 - - add rax, 9 - - - - 0.02 - 0.98 - - cmp rsi, rax - - - - - - 1.00 - - jne .LBB5_4 - - - 0.51 0.49 - - 0.50 0.50 cmp word ptr [rdi], -16192 - - - - - - 1.00 - - je .LBB5_5 - - - - - - - - - xor edi, edi - - - 0.50 0.50 - - - - mov rdx, rsi - - - 0.50 0.48 - 0.02 - - mov rax, rdi + - - 0.07 0.91 - 0.02 - - test dil, 3 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.97 - - 0.03 - - shr rdx + - - 0.03 0.97 - - - - lea rax, [rdx + 2*rdx] + - - 0.02 0.95 - 0.03 - - or rax, 3 + - - 0.03 0.96 - 0.01 - - add rax, 9 + - - 0.01 0.98 - 0.01 - - cmp rsi, rax + - - - - - 1.00 - - jne .LBB5_1 + - - - - - - 0.50 0.50 movzx ecx, word ptr [rdi] + - - - - - - - - xor eax, eax + - - 0.91 0.06 - 0.03 - - cmp ecx, 49344 + - - 0.97 0.04 - 0.99 - - cmove rsi, rdx + - - 0.99 0.99 - 0.02 - - cmove rax, rdi + - - 0.96 0.03 - 0.01 - - mov rdx, rsi + - - - - - 1.00 - - ret + - - - - - - - - xor eax, eax + - - 0.01 0.01 - 0.98 - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_bytes_dynamic_size.x86-64 b/benches/try_ref_from_bytes_dynamic_size.x86-64 index cf67afd31c..15d08c143c 100644 --- a/benches/try_ref_from_bytes_dynamic_size.x86-64 +++ b/benches/try_ref_from_bytes_dynamic_size.x86-64 @@ -1,22 +1,22 @@ bench_try_ref_from_bytes_dynamic_size: - mov rdx, rsi - mov rax, rdi cmp rsi, 4 - setb cl - or cl, al - test cl, 1 - jne .LBB5_4 - lea rcx, [rdx - 4] - mov rsi, rcx - and rsi, -2 - add rsi, 4 - cmp rdx, rsi - jne .LBB5_4 - cmp word ptr [rax], -16192 - jne .LBB5_4 + jb .LBB5_1 + test dil, 1 + jne .LBB5_1 + mov rdx, rsi + lea rcx, [rsi - 4] + mov rax, rcx + and rax, -2 + add rax, 4 + cmp rsi, rax + jne .LBB5_1 shr rcx - mov rdx, rcx + movzx esi, word ptr [rdi] + xor eax, eax + cmp esi, 49344 + cmove rdx, rcx + cmove rax, rdi ret -.LBB5_4: +.LBB5_1: xor eax, eax ret diff --git a/benches/try_ref_from_bytes_dynamic_size.x86-64.mca b/benches/try_ref_from_bytes_dynamic_size.x86-64.mca index ecd7a18f6d..99a6a1d9f3 100644 --- a/benches/try_ref_from_bytes_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_bytes_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 Instructions: 2000 -Total Cycles: 639 -Total uOps: 2100 +Total Cycles: 641 +Total uOps: 2200 Dispatch Width: 4 -uOps Per Cycle: 3.29 -IPC: 3.13 -Block RThroughput: 5.3 +uOps Per Cycle: 3.43 +IPC: 3.12 +Block RThroughput: 5.5 Instruction Info: @@ -18,23 +18,23 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 mov rdx, rsi - 1 1 0.33 mov rax, rdi 1 1 0.33 cmp rsi, 4 - 1 1 0.50 setb cl - 1 1 0.33 or cl, al - 1 1 0.33 test cl, 1 - 1 1 1.00 jne .LBB5_4 - 1 1 0.50 lea rcx, [rdx - 4] - 1 1 0.33 mov rsi, rcx - 1 1 0.33 and rsi, -2 - 1 1 0.33 add rsi, 4 - 1 1 0.33 cmp rdx, rsi - 1 1 1.00 jne .LBB5_4 - 2 6 0.50 * cmp word ptr [rax], -16192 - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jb .LBB5_1 + 1 1 0.33 test dil, 1 + 1 1 1.00 jne .LBB5_1 + 1 1 0.33 mov rdx, rsi + 1 1 0.50 lea rcx, [rsi - 4] + 1 1 0.33 mov rax, rcx + 1 1 0.33 and rax, -2 + 1 1 0.33 add rax, 4 + 1 1 0.33 cmp rsi, rax + 1 1 1.00 jne .LBB5_1 1 1 0.50 shr rcx - 1 1 0.33 mov rdx, rcx + 1 5 0.50 * movzx esi, word ptr [rdi] + 1 0 0.25 xor eax, eax + 1 1 0.33 cmp esi, 49344 + 2 2 0.67 cmove rdx, rcx + 2 2 0.67 cmove rax, rdi 1 1 1.00 U ret 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -53,27 +53,27 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.32 6.32 - 6.36 0.50 0.50 + - - 6.31 6.32 - 6.37 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.33 0.66 - 0.01 - - mov rdx, rsi - - - 0.66 0.34 - - - - mov rax, rdi - - - 0.34 0.66 - - - - cmp rsi, 4 - - - 0.99 - - 0.01 - - setb cl - - - 0.01 0.99 - - - - or cl, al - - - - 1.00 - - - - test cl, 1 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.66 0.34 - - - - lea rcx, [rdx - 4] - - - 0.33 0.66 - 0.01 - - mov rsi, rcx - - - 1.00 - - - - - and rsi, -2 - - - 0.66 0.34 - - - - add rsi, 4 - - - - 1.00 - - - - cmp rdx, rsi - - - - - - 1.00 - - jne .LBB5_4 - - - - - - 1.00 0.50 0.50 cmp word ptr [rax], -16192 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.67 - - 0.33 - - shr rcx - - - 0.67 0.33 - - - - mov rdx, rcx + - - 0.95 0.03 - 0.02 - - cmp rsi, 4 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.37 0.63 - - - - test dil, 1 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.66 0.33 - 0.01 - - mov rdx, rsi + - - 0.33 0.67 - - - - lea rcx, [rsi - 4] + - - 0.01 0.99 - - - - mov rax, rcx + - - - 1.00 - - - - and rax, -2 + - - - 0.99 - 0.01 - - add rax, 4 + - - 0.01 0.99 - - - - cmp rsi, rax + - - - - - 1.00 - - jne .LBB5_1 + - - 1.00 - - - - - shr rcx + - - - - - - 0.50 0.50 movzx esi, word ptr [rdi] + - - - - - - - - xor eax, eax + - - 0.99 0.01 - - - - cmp esi, 49344 + - - 0.99 0.02 - 0.99 - - cmove rdx, rcx + - - 1.00 0.66 - 0.34 - - cmove rax, rdi - - - - - 1.00 - - ret - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 index f8a719dd10..2ea5118fa3 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 +++ b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 @@ -1,36 +1,19 @@ bench_try_ref_from_bytes_with_elems_dynamic_padding: - mov rcx, rdx - mov edx, 3 - mov rax, rcx - mul rdx - jo .LBB5_8 - mov rdx, rax - cmp rax, -10 - ja .LBB5_8 - mov eax, edx - not eax - and eax, 3 - lea r8, [rax + rdx] - add r8, 9 - xor eax, eax - cmp rsi, r8 - jne .LBB5_6 - mov r9d, edi - and r9d, 3 - jne .LBB5_6 - add rdx, 9 - cmp r8, rdx - jb .LBB5_6 - movzx edx, word ptr [rdi] - cmp dx, -16192 - cmove rsi, rcx - xor eax, eax - cmp edx, 49344 - cmove rax, rdi -.LBB5_6: - mov rdx, rsi - ret -.LBB5_8: + movabs rcx, 3074457345618258598 + cmp rdx, rcx + ja .LBB5_4 + mov rax, rdi + test al, 3 + jne .LBB5_4 + lea rcx, [rdx + 2*rdx] + or rcx, 3 + add rcx, 9 + cmp rsi, rcx + jne .LBB5_4 + cmp word ptr [rax], -16192 + je .LBB5_5 +.LBB5_4: xor eax, eax mov rdx, rsi +.LBB5_5: ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca index bc48088077..c5d4a2b0d5 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 3300 -Total Cycles: 1082 -Total uOps: 3600 +Instructions: 1600 +Total Cycles: 507 +Total uOps: 1700 Dispatch Width: 4 -uOps Per Cycle: 3.33 -IPC: 3.05 -Block RThroughput: 9.0 +uOps Per Cycle: 3.35 +IPC: 3.16 +Block RThroughput: 5.0 Instruction Info: @@ -18,36 +18,19 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 mov rcx, rdx - 1 1 0.33 mov edx, 3 - 1 1 0.33 mov rax, rcx - 2 4 1.00 mul rdx - 1 1 1.00 jo .LBB5_8 - 1 1 0.33 mov rdx, rax - 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_8 - 1 1 0.33 mov eax, edx - 1 1 0.33 not eax - 1 1 0.33 and eax, 3 - 1 1 0.50 lea r8, [rax + rdx] - 1 1 0.33 add r8, 9 - 1 0 0.25 xor eax, eax - 1 1 0.33 cmp rsi, r8 - 1 1 1.00 jne .LBB5_6 - 1 1 0.33 mov r9d, edi - 1 1 0.33 and r9d, 3 - 1 1 1.00 jne .LBB5_6 - 1 1 0.33 add rdx, 9 - 1 1 0.33 cmp r8, rdx - 1 1 1.00 jb .LBB5_6 - 1 5 0.50 * movzx edx, word ptr [rdi] - 1 1 0.33 cmp dx, -16192 - 2 2 0.67 cmove rsi, rcx - 1 0 0.25 xor eax, eax - 1 1 0.33 cmp edx, 49344 - 2 2 0.67 cmove rax, rdi - 1 1 0.33 mov rdx, rsi - 1 1 1.00 U ret + 1 1 0.33 movabs rcx, 3074457345618258598 + 1 1 0.33 cmp rdx, rcx + 1 1 1.00 ja .LBB5_4 + 1 1 0.33 mov rax, rdi + 1 1 0.33 test al, 3 + 1 1 1.00 jne .LBB5_4 + 1 1 0.50 lea rcx, [rdx + 2*rdx] + 1 1 0.33 or rcx, 3 + 1 1 0.33 add rcx, 9 + 1 1 0.33 cmp rsi, rcx + 1 1 1.00 jne .LBB5_4 + 2 6 0.50 * cmp word ptr [rax], -16192 + 1 1 1.00 je .LBB5_5 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rsi 1 1 1.00 U ret @@ -66,40 +49,23 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 10.64 10.65 - 10.71 0.50 0.50 + - - 4.98 4.99 - 5.03 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.25 0.73 - 0.02 - - mov rcx, rdx - - - 0.74 0.03 - 0.23 - - mov edx, 3 - - - 0.60 0.40 - - - - mov rax, rcx - - - 1.00 1.00 - - - - mul rdx - - - - - - 1.00 - - jo .LBB5_8 - - - 0.11 0.89 - - - - mov rdx, rax - - - 0.99 0.01 - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_8 - - - 0.01 0.89 - 0.10 - - mov eax, edx - - - 0.11 0.89 - - - - not eax - - - 0.01 0.88 - 0.11 - - and eax, 3 - - - 0.01 0.99 - - - - lea r8, [rax + rdx] - - - 0.01 0.99 - - - - add r8, 9 - - - - - - - - - xor eax, eax - - - - 0.99 - 0.01 - - cmp rsi, r8 - - - - - - 1.00 - - jne .LBB5_6 - - - 0.42 - - 0.58 - - mov r9d, edi - - - 0.53 0.01 - 0.46 - - and r9d, 3 - - - - - - 1.00 - - jne .LBB5_6 - - - 0.99 0.01 - - - - add rdx, 9 - - - 0.99 0.01 - - - - cmp r8, rdx - - - - - - 1.00 - - jb .LBB5_6 - - - - - - - 0.50 0.50 movzx edx, word ptr [rdi] - - - 0.45 0.01 - 0.54 - - cmp dx, -16192 - - - 1.00 0.35 - 0.65 - - cmove rsi, rcx - - - - - - - - - xor eax, eax - - - 0.75 0.02 - 0.23 - - cmp edx, 49344 - - - 1.00 0.68 - 0.32 - - cmove rax, rdi - - - 0.12 0.54 - 0.34 - - mov rdx, rsi - - - - - - 1.00 - - ret + - - 0.98 0.01 - 0.01 - - movabs rcx, 3074457345618258598 + - - 0.01 0.99 - - - - cmp rdx, rcx + - - - - - 1.00 - - ja .LBB5_4 + - - 0.99 0.01 - - - - mov rax, rdi + - - 0.01 0.98 - 0.01 - - test al, 3 + - - - - - 1.00 - - jne .LBB5_4 + - - 0.98 0.02 - - - - lea rcx, [rdx + 2*rdx] + - - 0.01 0.99 - - - - or rcx, 3 + - - - 1.00 - - - - add rcx, 9 + - - - 0.99 - 0.01 - - cmp rsi, rcx + - - - - - 1.00 - - jne .LBB5_4 + - - 1.00 - - - 0.50 0.50 cmp word ptr [rax], -16192 + - - - - - 1.00 - - je .LBB5_5 - - - - - - - - xor eax, eax - - - 0.55 0.33 - 0.12 - - mov rdx, rsi + - - 1.00 - - - - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 index 791351a659..9054d9c7a1 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 +++ b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 @@ -1,13 +1,13 @@ bench_try_ref_from_bytes_with_elems_dynamic_size: - movabs rax, 9223372036854775805 - cmp rdx, rax - seta cl + movabs rcx, 4611686018427387901 + cmp rdx, rcx + ja .LBB5_3 mov rax, rdi - or dil, cl - test dil, 1 - jne .LBB5_3 lea rcx, [2*rdx + 4] cmp rsi, rcx + setne cl + or cl, al + test cl, 1 jne .LBB5_3 cmp word ptr [rax], -16192 je .LBB5_4 diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca index 76a7caaecf..66d1b87267 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 Instructions: 1500 -Total Cycles: 507 -Total uOps: 1700 +Total Cycles: 474 +Total uOps: 1600 Dispatch Width: 4 -uOps Per Cycle: 3.35 -IPC: 2.96 -Block RThroughput: 4.3 +uOps Per Cycle: 3.38 +IPC: 3.16 +Block RThroughput: 4.0 Instruction Info: @@ -18,15 +18,15 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 9223372036854775805 - 1 1 0.33 cmp rdx, rax - 2 2 1.00 seta cl + 1 1 0.33 movabs rcx, 4611686018427387901 + 1 1 0.33 cmp rdx, rcx + 1 1 1.00 ja .LBB5_3 1 1 0.33 mov rax, rdi - 1 1 0.33 or dil, cl - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_3 1 1 0.50 lea rcx, [2*rdx + 4] 1 1 0.33 cmp rsi, rcx + 1 1 0.50 setne cl + 1 1 0.33 or cl, al + 1 1 0.33 test cl, 1 1 1 1.00 jne .LBB5_3 2 6 0.50 * cmp word ptr [rax], -16192 1 1 1.00 je .LBB5_4 @@ -48,22 +48,22 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 4.98 4.99 - 5.03 0.50 0.50 + - - 4.66 4.66 - 4.68 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - 0.99 - 0.01 - - movabs rax, 9223372036854775805 - - - 0.50 0.50 - - - - cmp rdx, rax - - - 1.96 - - 0.04 - - seta cl - - - 0.01 0.99 - - - - mov rax, rdi - - - 1.00 - - - - - or dil, cl - - - 0.99 0.01 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_3 - - - 0.01 0.99 - - - - lea rcx, [2*rdx + 4] - - - 0.02 0.49 - 0.49 - - cmp rsi, rcx + - - 0.33 0.66 - 0.01 - - movabs rcx, 4611686018427387901 + - - 1.00 - - - - - cmp rdx, rcx + - - - - - 1.00 - - ja .LBB5_3 + - - 0.66 0.01 - 0.33 - - mov rax, rdi + - - 0.33 0.67 - - - - lea rcx, [2*rdx + 4] + - - 0.01 0.99 - - - - cmp rsi, rcx + - - 0.66 - - 0.34 - - setne cl + - - - 1.00 - - - - or cl, al + - - 0.01 0.99 - - - - test cl, 1 - - - - - 1.00 - - jne .LBB5_3 - - - - 0.51 - 0.49 0.50 0.50 cmp word ptr [rax], -16192 + - - 0.99 0.01 - - 0.50 0.50 cmp word ptr [rax], -16192 - - - - - 1.00 - - je .LBB5_4 - - - - - - - - xor eax, eax - - - 0.49 0.51 - - - - mov rdx, rsi + - - 0.67 0.33 - - - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_dynamic_padding.x86-64 b/benches/try_ref_from_prefix_dynamic_padding.x86-64 index d832cb7ecf..3cb4e6b574 100644 --- a/benches/try_ref_from_prefix_dynamic_padding.x86-64 +++ b/benches/try_ref_from_prefix_dynamic_padding.x86-64 @@ -1,29 +1,31 @@ bench_try_ref_from_prefix_dynamic_padding: - xor edx, edx - mov eax, 0 - test dil, 3 - je .LBB5_1 - ret -.LBB5_1: movabs rax, 9223372036854775804 - and rsi, rax - cmp rsi, 9 - jae .LBB5_3 + and rax, rsi + cmp rax, 9 + jae .LBB5_2 mov edx, 1 - xor eax, eax + xor ecx, ecx + mov rax, rcx + ret +.LBB5_2: + xor edx, edx + mov ecx, 0 + test dil, 3 + je .LBB5_3 + mov rax, rcx ret .LBB5_3: - add rsi, -9 + add rax, -9 movabs rcx, -6148914691236517205 - mov rax, rsi mul rcx mov rax, rdx shr rax - movzx ecx, word ptr [rdi] - cmp cx, -16192 + movzx esi, word ptr [rdi] + cmp si, -16192 mov edx, 2 cmove rdx, rax - xor eax, eax - cmp ecx, 49344 - cmove rax, rdi + xor ecx, ecx + cmp esi, 49344 + cmove rcx, rdi + mov rax, rcx ret diff --git a/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca b/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca index 482112a39b..ef17cbfa30 100644 --- a/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2600 -Total Cycles: 843 -Total uOps: 2900 +Instructions: 2800 +Total Cycles: 910 +Total uOps: 3100 Dispatch Width: 4 -uOps Per Cycle: 3.44 +uOps Per Cycle: 3.41 IPC: 3.08 -Block RThroughput: 7.3 +Block RThroughput: 7.8 Instruction Info: @@ -18,31 +18,33 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_1 - 1 1 1.00 U ret 1 1 0.33 movabs rax, 9223372036854775804 - 1 1 0.33 and rsi, rax - 1 1 0.33 cmp rsi, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 0.33 and rax, rsi + 1 1 0.33 cmp rax, 9 + 1 1 1.00 jae .LBB5_2 1 1 0.33 mov edx, 1 - 1 0 0.25 xor eax, eax + 1 0 0.25 xor ecx, ecx + 1 1 0.33 mov rax, rcx + 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov ecx, 0 + 1 1 0.33 test dil, 3 + 1 1 1.00 je .LBB5_3 + 1 1 0.33 mov rax, rcx 1 1 1.00 U ret - 1 1 0.33 add rsi, -9 + 1 1 0.33 add rax, -9 1 1 0.33 movabs rcx, -6148914691236517205 - 1 1 0.33 mov rax, rsi 2 4 1.00 mul rcx 1 1 0.33 mov rax, rdx 1 1 0.50 shr rax - 1 5 0.50 * movzx ecx, word ptr [rdi] - 1 1 0.33 cmp cx, -16192 + 1 5 0.50 * movzx esi, word ptr [rdi] + 1 1 0.33 cmp si, -16192 1 1 0.33 mov edx, 2 2 2 0.67 cmove rdx, rax - 1 0 0.25 xor eax, eax - 1 1 0.33 cmp ecx, 49344 - 2 2 0.67 cmove rax, rdi + 1 0 0.25 xor ecx, ecx + 1 1 0.33 cmp esi, 49344 + 2 2 0.67 cmove rcx, rdi + 1 1 0.33 mov rax, rcx 1 1 1.00 U ret @@ -59,33 +61,35 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 8.33 8.33 - 8.34 0.50 0.50 + - - 9.00 9.00 - 9.00 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - - - - - xor edx, edx - - - 0.32 0.34 - 0.34 - - mov eax, 0 - - - 0.34 0.33 - 0.33 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_1 + - - 0.03 0.96 - 0.01 - - movabs rax, 9223372036854775804 + - - 0.96 0.01 - 0.03 - - and rax, rsi + - - 0.96 0.02 - 0.02 - - cmp rax, 9 + - - - - - 1.00 - - jae .LBB5_2 + - - 0.99 - - 0.01 - - mov edx, 1 + - - - - - - - - xor ecx, ecx + - - 0.01 - - 0.99 - - mov rax, rcx - - - - - 1.00 - - ret - - - 0.35 0.65 - - - - movabs rax, 9223372036854775804 - - - 0.96 0.03 - 0.01 - - and rsi, rax - - - 0.01 0.97 - 0.02 - - cmp rsi, 9 - - - - - - 1.00 - - jae .LBB5_3 - - - 0.67 0.01 - 0.32 - - mov edx, 1 - - - - - - - - - xor eax, eax + - - - - - - - - xor edx, edx + - - 0.01 0.03 - 0.96 - - mov ecx, 0 + - - 0.02 0.97 - 0.01 - - test dil, 3 + - - - - - 1.00 - - je .LBB5_3 + - - 0.02 0.98 - - - - mov rax, rcx - - - - - 1.00 - - ret - - - 0.02 0.34 - 0.64 - - add rsi, -9 - - - 0.33 0.66 - 0.01 - - movabs rcx, -6148914691236517205 - - - 0.66 0.34 - - - - mov rax, rsi + - - - 1.00 - - - - add rax, -9 + - - 1.00 - - - - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - 0.01 0.99 - - - - mov rax, rdx - - 0.99 - - 0.01 - - shr rax - - - - - - - 0.50 0.50 movzx ecx, word ptr [rdi] - - - 0.33 0.03 - 0.64 - - cmp cx, -16192 - - - 0.01 0.31 - 0.68 - - mov edx, 2 + - - - - - - 0.50 0.50 movzx esi, word ptr [rdi] + - - 0.97 0.02 - 0.01 - - cmp si, -16192 + - - 0.01 0.02 - 0.97 - - mov edx, 2 - - 1.00 1.00 - - - - cmove rdx, rax - - - - - - - - - xor eax, eax - - - 0.33 0.33 - 0.34 - - cmp ecx, 49344 - - - 1.00 1.00 - - - - cmove rax, rdi + - - - - - - - - xor ecx, ecx + - - 0.01 0.01 - 0.98 - - cmp esi, 49344 + - - 1.00 1.00 - - - - cmove rcx, rdi + - - 0.01 0.99 - - - - mov rax, rcx - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_dynamic_size.x86-64 b/benches/try_ref_from_prefix_dynamic_size.x86-64 index be7f34b9f8..bca29f5523 100644 --- a/benches/try_ref_from_prefix_dynamic_size.x86-64 +++ b/benches/try_ref_from_prefix_dynamic_size.x86-64 @@ -1,14 +1,14 @@ bench_try_ref_from_prefix_dynamic_size: - xor edx, edx - mov eax, 0 - test dil, 1 - jne .LBB5_4 cmp rsi, 4 - jae .LBB5_3 + jae .LBB5_2 mov edx, 1 xor eax, eax ret -.LBB5_3: +.LBB5_2: + xor edx, edx + mov eax, 0 + test dil, 1 + jne .LBB5_4 add rsi, -4 shr rsi movzx ecx, word ptr [rdi] diff --git a/benches/try_ref_from_prefix_dynamic_size.x86-64.mca b/benches/try_ref_from_prefix_dynamic_size.x86-64.mca index 11706defe1..bdc62c5367 100644 --- a/benches/try_ref_from_prefix_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_prefix_dynamic_size.x86-64.mca @@ -18,15 +18,15 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_4 1 1 0.33 cmp rsi, 4 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB5_2 1 1 0.33 mov edx, 1 1 0 0.25 xor eax, eax 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test dil, 1 + 1 1 1.00 jne .LBB5_4 1 1 0.33 add rsi, -4 1 1 0.50 shr rsi 1 5 0.50 * movzx ecx, word ptr [rdi] @@ -56,22 +56,22 @@ Resource pressure per iteration: Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - - - - - - - xor edx, edx - - - 0.30 0.37 - 0.33 - - mov eax, 0 - - - 0.35 0.32 - 0.33 - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_4 - - - 0.32 0.33 - 0.35 - - cmp rsi, 4 - - - - - - 1.00 - - jae .LBB5_3 - - - 0.33 0.35 - 0.32 - - mov edx, 1 + - - - 0.35 - 0.65 - - cmp rsi, 4 + - - - - - 1.00 - - jae .LBB5_2 + - - 0.34 0.66 - - - - mov edx, 1 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.34 0.64 - 0.02 - - add rsi, -4 + - - - - - - - - xor edx, edx + - - 0.64 0.34 - 0.02 - - mov eax, 0 + - - 0.33 0.64 - 0.03 - - test dil, 1 + - - - - - 1.00 - - jne .LBB5_4 + - - 0.64 0.34 - 0.02 - - add rsi, -4 - - 1.00 - - - - - shr rsi - - - - - - 0.50 0.50 movzx ecx, word ptr [rdi] - - - 0.60 0.40 - - - - cmp ecx, 49344 - - - 0.05 0.95 - - - - mov edx, 2 + - - 0.32 0.38 - 0.30 - - cmp ecx, 49344 + - - 0.03 0.95 - 0.02 - - mov edx, 2 - - 1.00 1.00 - - - - cmove rdx, rsi - - - - - - - - xor eax, eax - - - 0.37 0.31 - 0.32 - - cmp cx, -16192 + - - 0.36 0.01 - 0.63 - - cmp cx, -16192 - - 1.00 1.00 - - - - cmove rax, rdi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_static_size.x86-64 b/benches/try_ref_from_prefix_static_size.x86-64 index 83212f776e..804d65c8d5 100644 --- a/benches/try_ref_from_prefix_static_size.x86-64 +++ b/benches/try_ref_from_prefix_static_size.x86-64 @@ -1,8 +1,9 @@ bench_try_ref_from_prefix_static_size: cmp rsi, 6 setb al - or al, dil - test al, 1 + mov ecx, edi + or cl, al + test cl, 1 jne .LBB5_2 movzx eax, word ptr [rdi] cmp eax, 49344 diff --git a/benches/try_ref_from_prefix_static_size.x86-64.mca b/benches/try_ref_from_prefix_static_size.x86-64.mca index 5d02b863a7..27fa1930fe 100644 --- a/benches/try_ref_from_prefix_static_size.x86-64.mca +++ b/benches/try_ref_from_prefix_static_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1200 -Total Cycles: 374 -Total uOps: 1300 +Instructions: 1300 +Total Cycles: 407 +Total uOps: 1400 Dispatch Width: 4 -uOps Per Cycle: 3.48 -IPC: 3.21 -Block RThroughput: 3.3 +uOps Per Cycle: 3.44 +IPC: 3.19 +Block RThroughput: 3.5 Instruction Info: @@ -20,8 +20,9 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 cmp rsi, 6 1 1 0.50 setb al - 1 1 0.33 or al, dil - 1 1 0.33 test al, 1 + 1 1 0.33 mov ecx, edi + 1 1 0.33 or cl, al + 1 1 0.33 test cl, 1 1 1 1.00 jne .LBB5_2 1 5 0.50 * movzx eax, word ptr [rdi] 1 1 0.33 cmp eax, 49344 @@ -45,18 +46,19 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 3.66 3.65 - 3.69 0.50 0.50 + - - 3.99 3.99 - 4.02 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.35 0.64 - 0.01 - - cmp rsi, 6 + - - 0.05 0.94 - 0.01 - - cmp rsi, 6 - - 1.00 - - - - - setb al - - - 0.02 0.66 - 0.32 - - or al, dil - - - 0.03 0.65 - 0.32 - - test al, 1 + - - 0.93 0.07 - - - - mov ecx, edi + - - 0.03 0.96 - 0.01 - - or cl, al + - - 0.03 0.02 - 0.95 - - test cl, 1 - - - - - 1.00 - - jne .LBB5_2 - - - - - - 0.50 0.50 movzx eax, word ptr [rdi] - - - 0.92 0.07 - 0.01 - - cmp eax, 49344 - - - 0.37 0.63 - - - - mov eax, 2 + - - 0.02 0.97 - 0.01 - - cmp eax, 49344 + - - 0.96 0.03 - 0.01 - - mov eax, 2 - - 0.97 1.00 - 0.03 - - cmove rax, rdi - - - - - 1.00 - - je .LBB5_3 - - - - - - - - xor eax, eax diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 index d7b2ca9ce2..15273eeb08 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 +++ b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 @@ -1,30 +1,23 @@ bench_try_ref_from_prefix_with_elems_dynamic_padding: mov rcx, rdx - mov edx, 3 - mov rax, rcx - mul rdx - jo .LBB5_1 - cmp rax, -10 - ja .LBB5_1 - lea rdx, [rax + 9] - not eax - and eax, 3 - add rax, rdx - jae .LBB5_4 -.LBB5_1: - xor eax, eax mov edx, 1 - ret + movabs rax, 3074457345618258598 + cmp rcx, rax + ja .LBB5_4 + lea rax, [rcx + 2*rcx] + or rax, 3 + add rax, 9 + cmp rax, rsi + jbe .LBB5_2 .LBB5_4: - mov r8, rax + xor eax, eax +.LBB5_5: + ret +.LBB5_2: xor edx, edx mov eax, 0 test dil, 3 - je .LBB5_5 - ret -.LBB5_5: - cmp r8, rsi - ja .LBB5_1 + jne .LBB5_5 movzx esi, word ptr [rdi] cmp si, -16192 mov edx, 2 diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca index 9df1d29761..4fc4306581 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 3100 -Total Cycles: 1008 -Total uOps: 3400 +Instructions: 2400 +Total Cycles: 741 +Total uOps: 2600 Dispatch Width: 4 -uOps Per Cycle: 3.37 -IPC: 3.08 -Block RThroughput: 8.5 +uOps Per Cycle: 3.51 +IPC: 3.24 +Block RThroughput: 6.5 Instruction Info: @@ -19,28 +19,21 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 mov rcx, rdx - 1 1 0.33 mov edx, 3 - 1 1 0.33 mov rax, rcx - 2 4 1.00 mul rdx - 1 1 1.00 jo .LBB5_1 - 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_1 - 1 1 0.50 lea rdx, [rax + 9] - 1 1 0.33 not eax - 1 1 0.33 and eax, 3 - 1 1 0.33 add rax, rdx - 1 1 1.00 jae .LBB5_4 - 1 0 0.25 xor eax, eax 1 1 0.33 mov edx, 1 + 1 1 0.33 movabs rax, 3074457345618258598 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_4 + 1 1 0.50 lea rax, [rcx + 2*rcx] + 1 1 0.33 or rax, 3 + 1 1 0.33 add rax, 9 + 1 1 0.33 cmp rax, rsi + 1 1 1.00 jbe .LBB5_2 + 1 0 0.25 xor eax, eax 1 1 1.00 U ret - 1 1 0.33 mov r8, rax 1 0 0.25 xor edx, edx 1 1 0.33 mov eax, 0 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_5 - 1 1 1.00 U ret - 1 1 0.33 cmp r8, rsi - 1 1 1.00 ja .LBB5_1 + 1 1 1.00 jne .LBB5_5 1 5 0.50 * movzx esi, word ptr [rdi] 1 1 0.33 cmp si, -16192 1 1 0.33 mov edx, 2 @@ -64,38 +57,31 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 9.98 9.99 - 10.03 0.50 0.50 + - - 7.32 7.33 - 7.35 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.49 0.50 - 0.01 - - mov rcx, rdx - - - 0.01 0.99 - - - - mov edx, 3 - - - 0.99 0.01 - - - - mov rax, rcx - - - 1.00 1.00 - - - - mul rdx - - - - - - 1.00 - - jo .LBB5_1 - - - 1.00 - - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_1 - - - - 1.00 - - - - lea rdx, [rax + 9] - - - 1.00 - - - - - not eax - - - 0.99 0.01 - - - - and eax, 3 - - - 0.99 0.01 - - - - add rax, rdx - - - - - - 1.00 - - jae .LBB5_4 + - - - 0.99 - 0.01 - - mov rcx, rdx + - - 0.66 0.02 - 0.32 - - mov edx, 1 + - - 0.35 0.32 - 0.33 - - movabs rax, 3074457345618258598 + - - - 0.99 - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_4 + - - 0.99 0.01 - - - - lea rax, [rcx + 2*rcx] + - - 0.33 0.67 - - - - or rax, 3 + - - - 1.00 - - - - add rax, 9 + - - - 1.00 - - - - cmp rax, rsi + - - - - - 1.00 - - jbe .LBB5_2 - - - - - - - - xor eax, eax - - - - 0.98 - 0.02 - - mov edx, 1 - - - - - 1.00 - - ret - - - 0.50 0.50 - - - - mov r8, rax - - - - - - - - xor edx, edx - - - 0.02 0.49 - 0.49 - - mov eax, 0 - - - - 0.49 - 0.51 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_5 - - - - - - 1.00 - - ret - - - 0.98 0.02 - - - - cmp r8, rsi - - - - - - 1.00 - - ja .LBB5_1 + - - 0.34 - - 0.66 - - mov eax, 0 + - - 0.99 - - 0.01 - - test dil, 3 + - - - - - 1.00 - - jne .LBB5_5 - - - - - - 0.50 0.50 movzx esi, word ptr [rdi] - - - 0.02 0.98 - - - - cmp si, -16192 - - - 0.98 0.02 - - - - mov edx, 2 - - - 0.50 1.00 - 0.50 - - cmove rdx, rcx + - - 0.66 0.01 - 0.33 - - cmp si, -16192 + - - 0.67 0.32 - 0.01 - - mov edx, 2 + - - 1.00 0.99 - 0.01 - - cmove rdx, rcx - - - - - - - - xor eax, eax - - - 0.01 0.99 - - - - cmp esi, 49344 - - - 0.50 1.00 - 0.50 - - cmove rax, rdi + - - 0.33 0.33 - 0.34 - - cmp esi, 49344 + - - 1.00 0.68 - 0.32 - - cmove rax, rdi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 index b659b67b58..c1b444fde9 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 +++ b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 @@ -1,20 +1,20 @@ bench_try_ref_from_prefix_with_elems_dynamic_size: - movabs rax, 9223372036854775805 - cmp rdx, rax - ja .LBB5_1 mov rcx, rdx - xor edx, edx - mov eax, 0 - test dil, 1 - jne .LBB5_5 + mov edx, 1 + movabs rax, 4611686018427387901 + cmp rcx, rax + ja .LBB5_3 lea rax, [2*rcx + 4] cmp rax, rsi jbe .LBB5_4 -.LBB5_1: +.LBB5_3: xor eax, eax - mov edx, 1 ret .LBB5_4: + xor edx, edx + mov eax, 0 + test dil, 1 + jne .LBB5_6 movzx esi, word ptr [rdi] cmp si, -16192 mov edx, 2 @@ -22,5 +22,5 @@ bench_try_ref_from_prefix_with_elems_dynamic_size: xor eax, eax cmp esi, 49344 cmove rax, rdi -.LBB5_5: +.LBB5_6: ret diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca index 7dc6caa16b..c7bcc8ae1d 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca @@ -18,20 +18,20 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 9223372036854775805 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 1 1 0.33 mov rcx, rdx - 1 0 0.25 xor edx, edx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_5 + 1 1 0.33 mov edx, 1 + 1 1 0.33 movabs rax, 4611686018427387901 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_3 1 1 0.50 lea rax, [2*rcx + 4] 1 1 0.33 cmp rax, rsi 1 1 1.00 jbe .LBB5_4 1 0 0.25 xor eax, eax - 1 1 0.33 mov edx, 1 1 1 1.00 U ret + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test dil, 1 + 1 1 1.00 jne .LBB5_6 1 5 0.50 * movzx esi, word ptr [rdi] 1 1 0.33 cmp si, -16192 1 1 0.33 mov edx, 2 @@ -55,29 +55,29 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.65 6.66 - 6.69 0.50 0.50 + - - 6.66 6.66 - 6.68 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.66 0.33 - 0.01 - - movabs rax, 9223372036854775805 - - - 0.02 0.66 - 0.32 - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - 0.66 0.33 - 0.01 - - mov rcx, rdx - - - - - - - - - xor edx, edx - - - 0.33 0.01 - 0.66 - - mov eax, 0 - - - 0.34 0.65 - 0.01 - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_5 - - - 0.65 0.35 - - - - lea rax, [2*rcx + 4] - - - - 1.00 - - - - cmp rax, rsi + - - 0.01 0.98 - 0.01 - - mov rcx, rdx + - - 0.67 0.01 - 0.32 - - mov edx, 1 + - - 0.33 0.33 - 0.34 - - movabs rax, 4611686018427387901 + - - - 0.99 - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_3 + - - 0.99 0.01 - - - - lea rax, [2*rcx + 4] + - - 0.33 0.67 - - - - cmp rax, rsi - - - - - 1.00 - - jbe .LBB5_4 - - - - - - - - xor eax, eax - - - 0.34 0.01 - 0.65 - - mov edx, 1 - - - - - 1.00 - - ret + - - - - - - - - xor edx, edx + - - 0.34 0.02 - 0.64 - - mov eax, 0 + - - 0.33 0.66 - 0.01 - - test dil, 1 + - - - - - 1.00 - - jne .LBB5_6 - - - - - - 0.50 0.50 movzx esi, word ptr [rdi] - - - 0.65 0.34 - 0.01 - - cmp si, -16192 - - - 0.66 0.34 - - - - mov edx, 2 + - - 0.66 0.34 - - - - cmp si, -16192 + - - 0.33 0.67 - - - - mov edx, 2 - - 1.00 0.99 - 0.01 - - cmove rdx, rcx - - - - - - - - xor eax, eax - - - 0.34 0.66 - - - - cmp esi, 49344 - - - 1.00 0.99 - 0.01 - - cmove rax, rdi + - - 0.67 0.32 - 0.01 - - cmp esi, 49344 + - - 1.00 0.67 - 0.33 - - cmove rax, rdi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_dynamic_padding.x86-64 b/benches/try_ref_from_suffix_dynamic_padding.x86-64 index b3e9244428..b265188697 100644 --- a/benches/try_ref_from_suffix_dynamic_padding.x86-64 +++ b/benches/try_ref_from_suffix_dynamic_padding.x86-64 @@ -1,11 +1,11 @@ bench_try_ref_from_suffix_dynamic_padding: - lea eax, [rsi + rdi] - test al, 3 - jne .LBB5_1 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jae .LBB5_3 + jb .LBB5_1 + lea ecx, [rsi + rdi] + test cl, 3 + je .LBB5_3 .LBB5_1: xor eax, eax ret diff --git a/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca b/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca index d56ae56d85..ad9399513b 100644 --- a/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 Instructions: 2300 -Total Cycles: 791 +Total Cycles: 797 Total uOps: 2600 Dispatch Width: 4 -uOps Per Cycle: 3.29 -IPC: 2.91 +uOps Per Cycle: 3.26 +IPC: 2.89 Block RThroughput: 6.5 @@ -18,13 +18,13 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.50 lea eax, [rsi + rdi] - 1 1 0.33 test al, 3 - 1 1 1.00 jne .LBB5_1 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jb .LBB5_1 + 1 1 0.50 lea ecx, [rsi + rdi] + 1 1 0.33 test cl, 3 + 1 1 1.00 je .LBB5_3 1 0 0.25 xor eax, eax 1 1 1.00 U ret 1 1 0.33 add rax, -9 @@ -56,30 +56,30 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 7.70 7.58 - 7.72 0.50 0.50 + - - 7.67 7.62 - 7.71 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.26 0.74 - - - - lea eax, [rsi + rdi] - - - 0.19 0.28 - 0.53 - - test al, 3 - - - - - - 1.00 - - jne .LBB5_1 - - - 0.93 0.06 - 0.01 - - movabs rax, 9223372036854775804 - - - 0.81 0.14 - 0.05 - - and rax, rsi - - - 0.55 0.43 - 0.02 - - cmp rax, 9 - - - - - - 1.00 - - jae .LBB5_3 + - - 0.60 0.24 - 0.16 - - movabs rax, 9223372036854775804 + - - 0.58 0.17 - 0.25 - - and rax, rsi + - - 0.33 0.60 - 0.07 - - cmp rax, 9 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.30 0.70 - - - - lea ecx, [rsi + rdi] + - - 0.13 0.57 - 0.30 - - test cl, 3 + - - - - - 1.00 - - je .LBB5_3 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.42 0.56 - 0.02 - - add rax, -9 - - - 0.67 0.30 - 0.03 - - movabs rcx, -6148914691236517205 + - - 0.72 0.21 - 0.07 - - add rax, -9 + - - 0.69 0.23 - 0.08 - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx - - - 0.71 - - 0.29 - - shr rdx - - - 0.32 0.68 - - - - lea rcx, [rdx + 2*rdx] - - - 0.57 0.04 - 0.39 - - sub rsi, rcx - - - 0.28 0.67 - 0.05 - - or rcx, -4 - - - 0.29 0.29 - 0.42 - - add rsi, rdi - - - 0.02 0.98 - - - - lea rdi, [rcx + rsi] - - - 0.02 0.41 - 0.57 - - add rdi, -8 + - - 0.60 - - 0.40 - - shr rdx + - - 0.50 0.50 - - - - lea rcx, [rdx + 2*rdx] + - - 0.44 0.25 - 0.31 - - sub rsi, rcx + - - 0.52 0.34 - 0.14 - - or rcx, -4 + - - 0.28 0.46 - 0.26 - - add rsi, rdi + - - 0.06 0.94 - - - - lea rdi, [rcx + rsi] + - - - 0.37 - 0.63 - - add rdi, -8 - - - - - - - - xor eax, eax - - - 0.57 0.01 - 0.42 0.50 0.50 cmp word ptr [rcx + rsi - 8], -16192 - - - 0.09 0.99 - 0.92 - - cmove rax, rdi + - - 0.58 0.06 - 0.36 0.50 0.50 cmp word ptr [rcx + rsi - 8], -16192 + - - 0.34 0.98 - 0.68 - - cmove rax, rdi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_dynamic_size.x86-64 b/benches/try_ref_from_suffix_dynamic_size.x86-64 index d51f7817e5..f175802bae 100644 --- a/benches/try_ref_from_suffix_dynamic_size.x86-64 +++ b/benches/try_ref_from_suffix_dynamic_size.x86-64 @@ -1,13 +1,9 @@ bench_try_ref_from_suffix_dynamic_size: - lea eax, [rsi + rdi] cmp rsi, 4 - setb cl - or cl, al - test cl, 1 - je .LBB5_2 - xor eax, eax - ret -.LBB5_2: + jb .LBB5_1 + lea eax, [rsi + rdi] + test al, 1 + jne .LBB5_1 lea rdx, [rsi - 4] shr rdx and esi, 1 @@ -16,3 +12,6 @@ bench_try_ref_from_suffix_dynamic_size: cmp word ptr [rdi + rsi], -16192 cmove rax, rcx ret +.LBB5_1: + xor eax, eax + ret diff --git a/benches/try_ref_from_suffix_dynamic_size.x86-64.mca b/benches/try_ref_from_suffix_dynamic_size.x86-64.mca index 6cf7f8e493..37b19a1fab 100644 --- a/benches/try_ref_from_suffix_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_suffix_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 1600 -Total Cycles: 510 -Total uOps: 1800 +Instructions: 1500 +Total Cycles: 476 +Total uOps: 1700 Dispatch Width: 4 -uOps Per Cycle: 3.53 -IPC: 3.14 -Block RThroughput: 4.5 +uOps Per Cycle: 3.57 +IPC: 3.15 +Block RThroughput: 4.3 Instruction Info: @@ -18,14 +18,11 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.50 lea eax, [rsi + rdi] 1 1 0.33 cmp rsi, 4 - 1 1 0.50 setb cl - 1 1 0.33 or cl, al - 1 1 0.33 test cl, 1 - 1 1 1.00 je .LBB5_2 - 1 0 0.25 xor eax, eax - 1 1 1.00 U ret + 1 1 1.00 jb .LBB5_1 + 1 1 0.50 lea eax, [rsi + rdi] + 1 1 0.33 test al, 1 + 1 1 1.00 jne .LBB5_1 1 1 0.50 lea rdx, [rsi - 4] 1 1 0.50 shr rdx 1 1 0.33 and esi, 1 @@ -34,6 +31,8 @@ Instruction Info: 2 6 0.50 * cmp word ptr [rdi + rsi], -16192 2 2 0.67 cmove rax, rcx 1 1 1.00 U ret + 1 0 0.25 xor eax, eax + 1 1 1.00 U ret Resources: @@ -49,23 +48,22 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 4.99 5.00 - 5.01 0.50 0.50 + - - 4.66 4.66 - 4.68 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.98 0.02 - - - - lea eax, [rsi + rdi] - - - - 0.98 - 0.02 - - cmp rsi, 4 - - - 1.00 - - - - - setb cl - - - 0.01 0.99 - - - - or cl, al - - - 0.01 0.07 - 0.92 - - test cl, 1 - - - - - - 1.00 - - je .LBB5_2 + - - 0.58 0.41 - 0.01 - - cmp rsi, 4 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.41 0.59 - - - - lea eax, [rsi + rdi] + - - 0.28 0.72 - - - - test al, 1 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.70 0.30 - - - - lea rdx, [rsi - 4] + - - 0.68 - - 0.32 - - shr rdx + - - 0.61 0.32 - 0.07 - - and esi, 1 + - - 0.28 0.72 - - - - lea rcx, [rdi + rsi] - - - - - - - - xor eax, eax + - - 0.12 0.60 - 0.28 0.50 0.50 cmp word ptr [rdi + rsi], -16192 + - - 1.00 1.00 - - - - cmove rax, rcx - - - - - 1.00 - - ret - - - 0.93 0.07 - - - - lea rdx, [rsi - 4] - - - 0.93 - - 0.07 - - shr rdx - - - 0.06 0.93 - 0.01 - - and esi, 1 - - - 0.07 0.93 - - - - lea rcx, [rdi + rsi] - - - - - - - - xor eax, eax - - - - 0.01 - 0.99 0.50 0.50 cmp word ptr [rdi + rsi], -16192 - - - 1.00 1.00 - - - - cmove rax, rcx - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_static_size.x86-64 b/benches/try_ref_from_suffix_static_size.x86-64 index cd39f70931..e917f89bbb 100644 --- a/benches/try_ref_from_suffix_static_size.x86-64 +++ b/benches/try_ref_from_suffix_static_size.x86-64 @@ -1,16 +1,15 @@ bench_try_ref_from_suffix_static_size: - lea eax, [rsi + rdi] cmp rsi, 6 - setb cl - or cl, al - test cl, 1 - je .LBB5_2 - xor eax, eax - ret -.LBB5_2: + jb .LBB5_1 + lea eax, [rsi + rdi] + test al, 1 + jne .LBB5_1 lea rcx, [rdi + rsi] add rcx, -6 xor eax, eax cmp word ptr [rdi + rsi - 6], -16192 cmove rax, rcx ret +.LBB5_1: + xor eax, eax + ret diff --git a/benches/try_ref_from_suffix_static_size.x86-64.mca b/benches/try_ref_from_suffix_static_size.x86-64.mca index 087d1e7ed9..1227e4103d 100644 --- a/benches/try_ref_from_suffix_static_size.x86-64.mca +++ b/benches/try_ref_from_suffix_static_size.x86-64.mca @@ -1,11 +1,11 @@ Iterations: 100 -Instructions: 1400 -Total Cycles: 443 -Total uOps: 1600 +Instructions: 1300 +Total Cycles: 410 +Total uOps: 1500 Dispatch Width: 4 -uOps Per Cycle: 3.61 -IPC: 3.16 +uOps Per Cycle: 3.66 +IPC: 3.17 Block RThroughput: 4.0 @@ -18,20 +18,19 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.50 lea eax, [rsi + rdi] 1 1 0.33 cmp rsi, 6 - 1 1 0.50 setb cl - 1 1 0.33 or cl, al - 1 1 0.33 test cl, 1 - 1 1 1.00 je .LBB5_2 - 1 0 0.25 xor eax, eax - 1 1 1.00 U ret + 1 1 1.00 jb .LBB5_1 + 1 1 0.50 lea eax, [rsi + rdi] + 1 1 0.33 test al, 1 + 1 1 1.00 jne .LBB5_1 1 1 0.50 lea rcx, [rdi + rsi] 1 1 0.33 add rcx, -6 1 0 0.25 xor eax, eax 2 6 0.50 * cmp word ptr [rdi + rsi - 6], -16192 2 2 0.67 cmove rax, rcx 1 1 1.00 U ret + 1 0 0.25 xor eax, eax + 1 1 1.00 U ret Resources: @@ -47,21 +46,20 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 4.33 4.33 - 4.34 0.50 0.50 + - - 3.98 3.98 - 4.04 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.32 0.68 - - - - lea eax, [rsi + rdi] - - - 0.05 0.94 - 0.01 - - cmp rsi, 6 - - - 1.00 - - - - - setb cl - - - 0.95 0.05 - - - - or cl, al - - - 0.95 0.02 - 0.03 - - test cl, 1 - - - - - - 1.00 - - je .LBB5_2 + - - 0.03 0.96 - 0.01 - - cmp rsi, 6 + - - - - - 1.00 - - jb .LBB5_1 + - - 0.95 0.05 - - - - lea eax, [rsi + rdi] + - - 0.06 0.94 - - - - test al, 1 + - - - - - 1.00 - - jne .LBB5_1 + - - 0.94 0.06 - - - - lea rcx, [rdi + rsi] + - - 0.05 0.95 - - - - add rcx, -6 - - - - - - - - xor eax, eax + - - 0.95 0.04 - 0.01 0.50 0.50 cmp word ptr [rdi + rsi - 6], -16192 + - - 1.00 0.98 - 0.02 - - cmove rax, rcx - - - - - 1.00 - - ret - - - 0.04 0.96 - - - - lea rcx, [rdi + rsi] - - - 0.02 0.97 - 0.01 - - add rcx, -6 - - - - - - - - xor eax, eax - - - 0.03 0.66 - 0.31 0.50 0.50 cmp word ptr [rdi + rsi - 6], -16192 - - - 0.97 0.05 - 0.98 - - cmove rax, rcx - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 index a8ceabe11f..91dc7251d3 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 +++ b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 @@ -1,39 +1,31 @@ bench_try_ref_from_suffix_with_elems_dynamic_padding: mov rcx, rdx - mov edx, 3 - mov rax, rcx - mul rdx - jo .LBB5_1 - cmp rax, -10 - ja .LBB5_1 - lea rdx, [rax + 9] - not eax - and eax, 3 - add rax, rdx - jae .LBB5_4 -.LBB5_1: - xor r8d, r8d mov edx, 1 - mov rax, r8 - ret + movabs rax, 3074457345618258598 + cmp rcx, rax + ja .LBB5_4 + lea rax, [rcx + 2*rcx] + or rax, 3 + add rax, 9 + mov r8, rsi + sub r8, rax + jae .LBB5_2 .LBB5_4: - lea r9d, [rsi + rdi] - xor edx, edx - mov r8d, 0 - test r9b, 3 - je .LBB5_5 - mov rax, r8 - ret + xor eax, eax .LBB5_5: - sub rsi, rax - jb .LBB5_1 - lea rax, [rdi + rsi] - movzx esi, word ptr [rdi + rsi] - cmp si, -16192 + ret +.LBB5_2: + add esi, edi + xor edx, edx + mov eax, 0 + test sil, 3 + jne .LBB5_5 + lea rsi, [rdi + r8] + movzx edi, word ptr [rdi + r8] + cmp di, -16192 mov edx, 2 cmove rdx, rcx - xor r8d, r8d - cmp esi, 49344 - cmove r8, rax - mov rax, r8 + xor eax, eax + cmp edi, 49344 + cmove rax, rsi ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca index 4937b556fe..198346b5fb 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 3500 -Total Cycles: 1144 -Total uOps: 3800 +Instructions: 2700 +Total Cycles: 1304 +Total uOps: 2900 Dispatch Width: 4 -uOps Per Cycle: 3.32 -IPC: 3.06 -Block RThroughput: 9.5 +uOps Per Cycle: 2.22 +IPC: 2.07 +Block RThroughput: 7.3 Instruction Info: @@ -19,39 +19,31 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 mov rcx, rdx - 1 1 0.33 mov edx, 3 - 1 1 0.33 mov rax, rcx - 2 4 1.00 mul rdx - 1 1 1.00 jo .LBB5_1 - 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_1 - 1 1 0.50 lea rdx, [rax + 9] - 1 1 0.33 not eax - 1 1 0.33 and eax, 3 - 1 1 0.33 add rax, rdx - 1 1 1.00 jae .LBB5_4 - 1 0 0.25 xor r8d, r8d 1 1 0.33 mov edx, 1 - 1 1 0.33 mov rax, r8 + 1 1 0.33 movabs rax, 3074457345618258598 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_4 + 1 1 0.50 lea rax, [rcx + 2*rcx] + 1 1 0.33 or rax, 3 + 1 1 0.33 add rax, 9 + 1 1 0.33 mov r8, rsi + 1 1 0.33 sub r8, rax + 1 1 1.00 jae .LBB5_2 + 1 0 0.25 xor eax, eax 1 1 1.00 U ret - 1 1 0.50 lea r9d, [rsi + rdi] + 1 1 0.33 add esi, edi 1 0 0.25 xor edx, edx - 1 1 0.33 mov r8d, 0 - 1 1 0.33 test r9b, 3 - 1 1 1.00 je .LBB5_5 - 1 1 0.33 mov rax, r8 - 1 1 1.00 U ret - 1 1 0.33 sub rsi, rax - 1 1 1.00 jb .LBB5_1 - 1 1 0.50 lea rax, [rdi + rsi] - 1 5 0.50 * movzx esi, word ptr [rdi + rsi] - 1 1 0.33 cmp si, -16192 + 1 1 0.33 mov eax, 0 + 1 1 0.33 test sil, 3 + 1 1 1.00 jne .LBB5_5 + 1 1 0.50 lea rsi, [rdi + r8] + 1 5 0.50 * movzx edi, word ptr [rdi + r8] + 1 1 0.33 cmp di, -16192 1 1 0.33 mov edx, 2 2 2 0.67 cmove rdx, rcx - 1 0 0.25 xor r8d, r8d - 1 1 0.33 cmp esi, 49344 - 2 2 0.67 cmove r8, rax - 1 1 0.33 mov rax, r8 + 1 0 0.25 xor eax, eax + 1 1 0.33 cmp edi, 49344 + 2 2 0.67 cmove rax, rsi 1 1 1.00 U ret @@ -68,42 +60,34 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 11.32 11.32 - 11.36 0.50 0.50 + - - 8.01 8.49 - 8.50 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.32 0.67 - 0.01 - - mov rcx, rdx - - - 0.66 0.18 - 0.16 - - mov edx, 3 - - - 1.00 - - - - - mov rax, rcx - - - 1.00 1.00 - - - - mul rdx - - - - - - 1.00 - - jo .LBB5_1 - - - 0.01 0.99 - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_1 - - - 0.99 0.01 - - - - lea rdx, [rax + 9] - - - 0.01 0.99 - - - - not eax - - - 0.02 0.98 - - - - and eax, 3 - - - 0.02 0.98 - - - - add rax, rdx - - - - - - 1.00 - - jae .LBB5_4 - - - - - - - - - xor r8d, r8d - - - 0.66 0.01 - 0.33 - - mov edx, 1 - - - 0.50 - - 0.50 - - mov rax, r8 + - - 0.48 0.50 - 0.02 - - mov rcx, rdx + - - 0.02 0.52 - 0.46 - - mov edx, 1 + - - 0.49 0.51 - - - - movabs rax, 3074457345618258598 + - - 0.51 0.48 - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_4 + - - 0.48 0.52 - - - - lea rax, [rcx + 2*rcx] + - - 0.52 0.48 - - - - or rax, 3 + - - 0.52 0.47 - 0.01 - - add rax, 9 + - - 0.48 0.52 - - - - mov r8, rsi + - - 0.51 0.01 - 0.48 - - sub r8, rax + - - - - - 1.00 - - jae .LBB5_2 + - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - - 0.99 0.01 - - - - lea r9d, [rsi + rdi] + - - 0.01 0.50 - 0.49 - - add esi, edi - - - - - - - - xor edx, edx - - - 0.50 0.32 - 0.18 - - mov r8d, 0 - - - 0.16 0.17 - 0.67 - - test r9b, 3 - - - - - - 1.00 - - je .LBB5_5 - - - 0.33 0.33 - 0.34 - - mov rax, r8 - - - - - - 1.00 - - ret - - - - 0.51 - 0.49 - - sub rsi, rax - - - - - - 1.00 - - jb .LBB5_1 - - - 0.16 0.84 - - - - lea rax, [rdi + rsi] - - - - - - - 0.50 0.50 movzx esi, word ptr [rdi + rsi] - - - 0.02 0.98 - - - - cmp si, -16192 - - - 1.00 - - - - - mov edx, 2 - - - 0.99 0.84 - 0.17 - - cmove rdx, rcx - - - - - - - - - xor r8d, r8d - - - 0.98 - - 0.02 - - cmp esi, 49344 - - - 0.99 0.52 - 0.49 - - cmove r8, rax - - - 0.01 0.99 - - - - mov rax, r8 + - - 0.04 0.95 - 0.01 - - mov eax, 0 + - - 0.01 0.50 - 0.49 - - test sil, 3 + - - - - - 1.00 - - jne .LBB5_5 + - - 0.50 0.50 - - - - lea rsi, [rdi + r8] + - - - - - - 0.50 0.50 movzx edi, word ptr [rdi + r8] + - - 0.97 0.02 - 0.01 - - cmp di, -16192 + - - 0.48 0.51 - 0.01 - - mov edx, 2 + - - 0.99 0.51 - 0.50 - - cmove rdx, rcx + - - - - - - - - xor eax, eax + - - 0.02 0.48 - 0.50 - - cmp edi, 49344 + - - 0.98 0.51 - 0.51 - - cmove rax, rsi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 index ff25a78945..ee0c7db854 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 +++ b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 @@ -1,28 +1,29 @@ bench_try_ref_from_suffix_with_elems_dynamic_size: - movabs rax, 9223372036854775805 - cmp rdx, rax - ja .LBB5_1 - lea r8d, [rsi + rdi] - xor ecx, ecx - mov eax, 0 - test r8b, 1 - jne .LBB5_5 - lea rax, [2*rdx + 4] - sub rsi, rax + mov rcx, rdx + mov edx, 1 + movabs rax, 4611686018427387901 + cmp rcx, rax + ja .LBB5_3 + lea rax, [2*rcx + 4] + mov r8, rsi + sub r8, rax jae .LBB5_4 -.LBB5_1: +.LBB5_3: xor eax, eax - mov edx, 1 ret .LBB5_4: - lea r8, [rdi + rsi] - movzx esi, word ptr [rdi + rsi] - cmp si, -16192 - mov ecx, 2 - cmove rcx, rdx + add esi, edi + xor edx, edx + mov eax, 0 + test sil, 1 + jne .LBB5_6 + lea rsi, [rdi + r8] + movzx edi, word ptr [rdi + r8] + cmp di, -16192 + mov edx, 2 + cmove rdx, rcx xor eax, eax - cmp esi, 49344 - cmove rax, r8 -.LBB5_5: - mov rdx, rcx + cmp edi, 49344 + cmove rax, rsi +.LBB5_6: ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca index 8b6333bf34..7eb924c596 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca @@ -1,12 +1,12 @@ Iterations: 100 -Instructions: 2400 -Total Cycles: 1107 -Total uOps: 2600 +Instructions: 2500 +Total Cycles: 1105 +Total uOps: 2700 Dispatch Width: 4 -uOps Per Cycle: 2.35 -IPC: 2.17 -Block RThroughput: 6.5 +uOps Per Cycle: 2.44 +IPC: 2.26 +Block RThroughput: 6.8 Instruction Info: @@ -18,29 +18,30 @@ Instruction Info: [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: - 1 1 0.33 movabs rax, 9223372036854775805 - 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 - 1 1 0.50 lea r8d, [rsi + rdi] - 1 0 0.25 xor ecx, ecx - 1 1 0.33 mov eax, 0 - 1 1 0.33 test r8b, 1 - 1 1 1.00 jne .LBB5_5 - 1 1 0.50 lea rax, [2*rdx + 4] - 1 1 0.33 sub rsi, rax + 1 1 0.33 mov rcx, rdx + 1 1 0.33 mov edx, 1 + 1 1 0.33 movabs rax, 4611686018427387901 + 1 1 0.33 cmp rcx, rax + 1 1 1.00 ja .LBB5_3 + 1 1 0.50 lea rax, [2*rcx + 4] + 1 1 0.33 mov r8, rsi + 1 1 0.33 sub r8, rax 1 1 1.00 jae .LBB5_4 1 0 0.25 xor eax, eax - 1 1 0.33 mov edx, 1 1 1 1.00 U ret - 1 1 0.50 lea r8, [rdi + rsi] - 1 5 0.50 * movzx esi, word ptr [rdi + rsi] - 1 1 0.33 cmp si, -16192 - 1 1 0.33 mov ecx, 2 - 2 2 0.67 cmove rcx, rdx + 1 1 0.33 add esi, edi + 1 0 0.25 xor edx, edx + 1 1 0.33 mov eax, 0 + 1 1 0.33 test sil, 1 + 1 1 1.00 jne .LBB5_6 + 1 1 0.50 lea rsi, [rdi + r8] + 1 5 0.50 * movzx edi, word ptr [rdi + r8] + 1 1 0.33 cmp di, -16192 + 1 1 0.33 mov edx, 2 + 2 2 0.67 cmove rdx, rcx 1 0 0.25 xor eax, eax - 1 1 0.33 cmp esi, 49344 - 2 2 0.67 cmove rax, r8 - 1 1 0.33 mov rdx, rcx + 1 1 0.33 cmp edi, 49344 + 2 2 0.67 cmove rax, rsi 1 1 1.00 U ret @@ -57,31 +58,32 @@ Resources: Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6.0] [6.1] - - - 6.99 7.00 - 8.01 0.50 0.50 + - - 7.50 7.52 - 7.98 0.50 0.50 Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.02 0.95 - 0.03 - - movabs rax, 9223372036854775805 - - - 0.93 0.04 - 0.03 - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 - - - 0.96 0.04 - - - - lea r8d, [rsi + rdi] - - - - - - - - - xor ecx, ecx - - - 0.95 0.02 - 0.03 - - mov eax, 0 - - - 0.95 0.05 - - - - test r8b, 1 - - - - - - 1.00 - - jne .LBB5_5 - - - 0.06 0.94 - - - - lea rax, [2*rdx + 4] - - - 0.93 0.07 - - - - sub rsi, rax + - - 0.47 0.52 - 0.01 - - mov rcx, rdx + - - 0.50 0.49 - 0.01 - - mov edx, 1 + - - 0.49 0.49 - 0.02 - - movabs rax, 4611686018427387901 + - - 0.48 0.51 - 0.01 - - cmp rcx, rax + - - - - - 1.00 - - ja .LBB5_3 + - - 0.51 0.49 - - - - lea rax, [2*rcx + 4] + - - 0.49 0.51 - - - - mov r8, rsi + - - 0.48 0.52 - - - - sub r8, rax - - - - - 1.00 - - jae .LBB5_4 - - - - - - - - xor eax, eax - - - 0.03 0.95 - 0.02 - - mov edx, 1 - - - - - 1.00 - - ret - - - 0.97 0.03 - - - - lea r8, [rdi + rsi] - - - - - - - 0.50 0.50 movzx esi, word ptr [rdi + rsi] - - - 0.03 0.97 - - - - cmp si, -16192 - - - 0.05 0.94 - 0.01 - - mov ecx, 2 - - - 0.06 0.98 - 0.96 - - cmove rcx, rdx + - - 0.47 0.47 - 0.06 - - add esi, edi + - - - - - - - - xor edx, edx + - - 0.51 0.49 - - - - mov eax, 0 + - - 0.47 0.47 - 0.06 - - test sil, 1 + - - - - - 1.00 - - jne .LBB5_6 + - - 0.52 0.48 - - - - lea rsi, [rdi + r8] + - - - - - - 0.50 0.50 movzx edi, word ptr [rdi + r8] + - - 0.50 0.04 - 0.46 - - cmp di, -16192 + - - 0.49 0.50 - 0.01 - - mov edx, 2 + - - 0.54 0.52 - 0.94 - - cmove rdx, rcx - - - - - - - - xor eax, eax - - - 0.97 0.03 - - - - cmp esi, 49344 - - - 0.06 0.96 - 0.98 - - cmove rax, r8 - - - 0.02 0.03 - 0.95 - - mov rdx, rcx + - - 0.04 0.49 - 0.47 - - cmp edi, 49344 + - - 0.54 0.53 - 0.93 - - cmove rax, rsi - - - - - 1.00 - - ret diff --git a/src/byte_slice.rs b/src/byte_slice.rs index ace0b5dd6c..6f9ee9ac33 100644 --- a/src/byte_slice.rs +++ b/src/byte_slice.rs @@ -365,7 +365,7 @@ mod proofs { fn any_vec() -> Vec { let len = kani::any(); - kani::assume(len <= isize::MAX as usize); + kani::assume(len <= crate::DstLayout::MAX_SIZE); vec![0u8; len] } diff --git a/src/layout.rs b/src/layout.rs index 6c83676c80..e2b322e8a6 100644 --- a/src/layout.rs +++ b/src/layout.rs @@ -136,6 +136,18 @@ impl DstLayout { None => const_unreachable!(), }; + /// The maximum size of an allocation \[1\]. + /// + /// \[1\] Per : + /// + /// For any allocation with base `address`, `size`, and a set of `addresses`, + /// the following are guaranteed: [..] + /// + /// - `size <= isize::MAX` + /// + #[allow(clippy::as_conversions)] + pub(crate) const MAX_SIZE: usize = isize::MAX as usize; + /// Assumes that this layout lacks static shallow padding. /// /// # Panics @@ -626,37 +638,7 @@ impl DstLayout { addr.checked_add(bytes_len).is_some(), "`addr` + `bytes_len` > usize::MAX" ); - - // Alignment checks go in their own block to avoid introducing variables - // into the top-level scope. - { - // We check alignment for `addr` (for prefix casts) or `addr + - // bytes_len` (for suffix casts). For a prefix cast, the correctness - // of this check is trivial - `addr` is the address the object will - // live at. - // - // For a suffix cast, we know that all valid sizes for the type are - // a multiple of the alignment (and by safety precondition, we know - // `DstLayout` may only describe valid Rust types). Thus, a - // validly-sized instance which lives at a validly-aligned address - // must also end at a validly-aligned address. Thus, if the end - // address for a suffix cast (`addr + bytes_len`) is not aligned, - // then no valid start address will be aligned either. - let offset = match cast_type { - CastType::Prefix => 0, - CastType::Suffix => bytes_len, - }; - - // Addition is guaranteed not to overflow because `offset <= - // bytes_len`, and `addr + bytes_len <= usize::MAX` is a - // precondition of this method. Modulus is guaranteed not to divide - // by 0 because `align` is non-zero. - #[allow(clippy::arithmetic_side_effects)] - if (addr + offset) % self.align.get() != 0 { - return Err(MetadataCastError::Alignment); - } - } - + let (elems, self_bytes) = match size_info { SizeInfo::Sized { size } => { if size > bytes_len { @@ -670,7 +652,7 @@ impl DstLayout { // multiple of the alignment, or will be larger than // `bytes_len`. let max_total_bytes = - util::round_down_to_next_multiple_of_alignment(bytes_len, self.align); + util::round_down_to_next_multiple_of_alignment(bytes_len, self.align); // Calculate the maximum number of bytes that could be consumed // by the trailing slice. // @@ -681,7 +663,7 @@ impl DstLayout { // `bytes_len` too small even for 0 trailing slice elements. None => return Err(MetadataCastError::Size), }; - + // Calculate the number of elements that fit in // `max_slice_and_padding_bytes`; any remaining bytes will be // considered padding. @@ -716,10 +698,40 @@ impl DstLayout { // `self_bytes` up to `max_total_bytes`. #[allow(clippy::arithmetic_side_effects)] let self_bytes = - without_padding + util::padding_needed_for(without_padding, self.align); + without_padding + util::padding_needed_for(without_padding, self.align); (elems, self_bytes) } }; + + // Alignment checks go in their own block to avoid introducing variables + // into the top-level scope. + { + // We check alignment for `addr` (for prefix casts) or `addr + + // bytes_len` (for suffix casts). For a prefix cast, the correctness + // of this check is trivial - `addr` is the address the object will + // live at. + // + // For a suffix cast, we know that all valid sizes for the type are + // a multiple of the alignment (and by safety precondition, we know + // `DstLayout` may only describe valid Rust types). Thus, a + // validly-sized instance which lives at a validly-aligned address + // must also end at a validly-aligned address. Thus, if the end + // address for a suffix cast (`addr + bytes_len`) is not aligned, + // then no valid start address will be aligned either. + let offset = match cast_type { + CastType::Prefix => 0, + CastType::Suffix => bytes_len, + }; + + // Addition is guaranteed not to overflow because `offset <= + // bytes_len`, and `addr + bytes_len <= usize::MAX` is a + // precondition of this method. Modulus is guaranteed not to divide + // by 0 because `align` is non-zero. + #[allow(clippy::arithmetic_side_effects)] + if (addr + offset) % self.align.get() != 0 { + return Err(MetadataCastError::Alignment); + } + } __const_debug_assert!(self_bytes <= bytes_len); @@ -1975,7 +1987,7 @@ mod proofs { true => { let size: usize = kani::any(); - kani::assume(size <= isize::MAX as _); + kani::assume(size <= DstLayout::MAX_SIZE); SizeInfo::Sized { size } } @@ -1989,8 +2001,8 @@ mod proofs { let elem_size: usize = kani::any(); let offset: usize = kani::any(); - kani::assume(elem_size < isize::MAX as _); - kani::assume(offset < isize::MAX as _); + kani::assume(elem_size < DstLayout::MAX_SIZE); + kani::assume(offset < DstLayout::MAX_SIZE); TrailingSliceLayout { elem_size, offset } } @@ -2019,7 +2031,7 @@ mod proofs { loop {} }; - if unpadded_size >= isize::MAX as usize { + if unpadded_size >= DstLayout::MAX_SIZE { // The `unpadded_size` exceeds `isize::MAX`; `meta` is invalid. kani::assume(false); loop {} diff --git a/src/lib.rs b/src/lib.rs index cf6fb4518c..0c66144932 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -863,6 +863,19 @@ pub unsafe trait KnownLayout { fn size_for_metadata(meta: Self::PointerMetadata) -> Option { meta.size_for_metadata(Self::LAYOUT) } + + /// Computes whether `meta` can describe a valid allocation of `Self`. + /// + /// # Safety + /// + /// `is_valid_metadata` promises to return `true` if and only if the size of + /// an allocation of `Self` with `meta` would not overflow an + /// [`isize::MAX`]. + #[doc(hidden)] + #[inline(always)] + fn is_valid_metadata(meta: Self::PointerMetadata) -> bool { + meta.to_elem_count() <= maximum_trailing_slice_len::().to_elem_count() + } } /// Efficiently produces the [`TrailingSliceLayout`] of `T`. @@ -888,9 +901,39 @@ where T::SIZE_INFO } +/// Efficiently produces the maximum trailing slice length `T`. +#[inline(always)] +pub(crate) fn maximum_trailing_slice_len() -> usize +where + T: ?Sized + KnownLayout, +{ + trait LayoutFacts { + const MAX_LEN: usize; + } + + impl LayoutFacts for T + where + T: KnownLayout, + { + const MAX_LEN: usize = match T::LAYOUT.size_info { + SizeInfo::SliceDst(TrailingSliceLayout { elem_size: 0, .. }) => usize::MAX, + _ => match T::LAYOUT.validate_cast_and_convert_metadata( + T::LAYOUT.align.get(), + DstLayout::MAX_SIZE, + CastType::Prefix, + ) { + Ok((elems, _)) => elems, + Err(_) => const_panic!("unreachable"), + }, + }; + } + + T::MAX_LEN +} + /// The metadata associated with a [`KnownLayout`] type. #[doc(hidden)] -pub trait PointerMetadata: Copy + Eq + Debug { +pub trait PointerMetadata: Copy + Eq + Debug + Ord { /// Constructs a `Self` from an element count. /// /// If `Self = ()`, this returns `()`. If `Self = usize`, this returns diff --git a/src/util/macros.rs b/src/util/macros.rs index 59fd2549d9..7dca5410c8 100644 --- a/src/util/macros.rs +++ b/src/util/macros.rs @@ -982,8 +982,37 @@ macro_rules! codegen_preamble { } } +/// Stub for rendering codegen documentation; used to break build dependency +/// between benches and zerocopy when re-blessing codegen tests. +#[allow(unused)] +#[cfg(not(doc))] +macro_rules! codegen_section { + ( + header = $level:expr, + bench = $bench:expr, + format = $format:expr, + arity = $arity:literal, + $([ + $($open:ident)? + @index $index:literal + @title $title:literal + @variant $variant:literal + ]),* + ) => { + "" + }; + ( + header = $level:expr, + bench = $bench:expr, + format = $format:expr, + ) => { + "" + }; +} + /// Generates the HTML for code generation documentation. #[allow(unused)] +#[cfg(doc)] macro_rules! codegen_section { ( header = $level:expr, diff --git a/src/util/mod.rs b/src/util/mod.rs index ccc5166fdd..4016f8f048 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -142,7 +142,7 @@ pub(crate) fn validate_aligned_to(t: T) -> Result<(), Alignment /// on the answer it gives if this is not the case. #[cfg_attr( kani, - kani::requires(len <= isize::MAX as usize), + kani::requires(len <= DstLayout::MAX_SIZE), kani::requires(align.is_power_of_two()), kani::ensures(|&p| (len + p) % align.get() == 0), // Ensures that we add the minimum required padding. @@ -382,29 +382,29 @@ pub(crate) unsafe fn new_box( where T: ?Sized + crate::KnownLayout, { + let align = T::LAYOUT.align.get(); + if !T::is_valid_metadata(meta) { + return Err(AllocError); + } let size = match T::size_for_metadata(meta) { Some(size) => size, + // Thanks to the `!T::is_valid_metadata(meta)` check + // above, this branch is unreachable. Fortunately, the + // optimizer recognizes this, so replacing this branch + // with `unreachable_unchecked` produces no codegen + // improvements. None => return Err(AllocError), }; - - let align = T::LAYOUT.align.get(); - // On stable Rust versions <= 1.64.0, `Layout::from_size_align` has a bug in - // which sufficiently-large allocations (those which, when rounded up to the - // alignment, overflow `isize`) are not rejected, which can cause undefined - // behavior. See #64 for details. - // - // FIXME(#67): Once our MSRV is > 1.64.0, remove this assertion. - #[allow(clippy::as_conversions)] - let max_alloc = (isize::MAX as usize).saturating_sub(align); - if size > max_alloc { - return Err(AllocError); - } - - // FIXME(https://github.com/rust-lang/rust/issues/55724): Use - // `Layout::repeat` once it's stabilized. - let layout = Layout::from_size_align(size, align).or(Err(AllocError))?; - - let ptr = if layout.size() != 0 { + let ptr = if size != 0 { + // SAFETY: + // - `align` is derived from a `NonZeroUsize` and is thus non-zero. + // - `align` is a power of two because, by invariant on + // `KnownLayout::LAYOUT` `::LAYOUT` accurately + // reflects the layout of `T`. + // - `size`, by invariant on `size_for_metadata` is well-aligned for + // `align` and, by the check on `T::is_valid_metadata(meta)`, is less + // than `isize::MAX`. + let layout: Layout = unsafe { Layout::from_size_align_unchecked(size, align) }; // SAFETY: By contract on the caller, `allocate` is either // `alloc::alloc::alloc` or `alloc::alloc::alloc_zeroed`. The above // check ensures their shared safety precondition: that the supplied @@ -420,8 +420,6 @@ where None => return Err(AllocError), } } else { - let align = T::LAYOUT.align.get(); - // We use `transmute` instead of an `as` cast since Miri (with strict // provenance enabled) notices and complains that an `as` cast creates a // pointer with no provenance. Miri isn't smart enough to realize that @@ -435,8 +433,8 @@ where #[allow(unknown_lints)] #[allow(clippy::useless_transmute, integer_to_ptr_transmutes)] let dangling = unsafe { mem::transmute::(align) }; - // SAFETY: `dangling` is constructed from `T::LAYOUT.align`, which is a - // `NonZeroUsize`, which is guaranteed to be non-zero. + // SAFETY: `dangling` is constructed from `align`, which is derived from + // a `NonZeroUsize`, which is guaranteed to be non-zero. // // `Box<[T]>` does not allocate when `T` is zero-sized or when `len` is // zero, but it does require a non-null dangling pointer for its @@ -579,11 +577,19 @@ mod len_of { ) -> Result<(MetadataOf, MetadataOf<[u8]>), MetadataCastError> { let layout = match meta { None => T::LAYOUT, - // This can return `None` if the metadata describes an object - // which can't fit in an `isize`. + // This can return `Err(MetadataCastError::Size)` if the + // metadata describes an object which can't fit in an `isize`. Some(meta) => { + if !T::is_valid_metadata(meta) { + return Err(MetadataCastError::Size); + } let size = match T::size_for_metadata(meta) { Some(size) => size, + // Thanks to the `!T::is_valid_metadata(meta)` check + // above, this branch is unreachable. Fortunately, the + // optimizer recognizes this, so replacing this branch + // with `unreachable_unchecked` produces no codegen + // improvements. None => return Err(MetadataCastError::Size), }; DstLayout {