Skip to content

Commit 826dbcf

Browse files
committed
ZJIT: A64: Remove nop padding after conditional branches
Previously, there were a lot of nops after conditional branches. They come from branch to LIR labels: ./miniruby --zjit-call-threshold=1 --zjit-dump-disasm -e 'Object || String' # Insn: v14 CheckInterrupts # RUBY_VM_CHECK_INTS(ec) ldur w2, [x20, #0x20] tst w2, w2 b.ne #0x120900278 nop nop nop nop nop # Insn: v15 Test v11 tst x0, #-5 mov x2, #0 mov x3, #1 csel x2, x2, x3, eq # Insn: v16 IfTrue v15, bb3(v6, v11) tst x2, x2 b.eq #0x120900198 nop nop nop nop nop They gunk up the disassembly and can't be helpful for speed. This commit removes them. I think they were accidentally inherited from certain YJIT branches that require padding for patching. ZJIT doesn't have these requirements. Use a single branch instruction for conditional branches to labels; Jmp already uses a single `B` instruction. This will work for assemblers that generate less than ~260,000 instructions -- plenty. Let the CodeBlock::label_ref() callback return a failure, so we can fail compilation instead of panicking in case we do get large offsets.
1 parent fa910e2 commit 826dbcf

File tree

6 files changed

+78
-27
lines changed

6 files changed

+78
-27
lines changed

zjit/src/asm/mod.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pub mod arm64;
2020
pub struct Label(pub usize);
2121

2222
/// The object that knows how to encode the branch instruction.
23-
type BranchEncoder = Box<dyn Fn(&mut CodeBlock, i64, i64)>;
23+
type BranchEncoder = Box<dyn Fn(&mut CodeBlock, i64, i64) -> Result<(), ()>>;
2424

2525
/// Reference to an ASM label
2626
pub struct LabelRef {
@@ -233,7 +233,7 @@ impl CodeBlock {
233233
}
234234

235235
// Add a label reference at the current write position
236-
pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: impl Fn(&mut CodeBlock, i64, i64) + 'static) {
236+
pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: impl Fn(&mut CodeBlock, i64, i64) -> Result<(), ()> + 'static) {
237237
assert!(label.0 < self.label_addrs.len());
238238

239239
// Keep track of the reference
@@ -248,8 +248,9 @@ impl CodeBlock {
248248
}
249249

250250
// Link internal label references
251-
pub fn link_labels(&mut self) {
251+
pub fn link_labels(&mut self) -> Result<(), ()> {
252252
let orig_pos = self.write_pos;
253+
let mut link_result = Ok(());
253254

254255
// For each label reference
255256
for label_ref in mem::take(&mut self.label_refs) {
@@ -261,11 +262,14 @@ impl CodeBlock {
261262
assert!(label_addr < self.mem_size);
262263

263264
self.write_pos = ref_pos;
264-
(label_ref.encode.as_ref())(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
265+
let encode_result = (label_ref.encode.as_ref())(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
266+
link_result = link_result.and(encode_result);
265267

266-
// Assert that we've written the same number of bytes that we
267-
// expected to have written.
268-
assert!(self.write_pos == ref_pos + label_ref.num_bytes);
268+
// Verify number of bytes written when the callback returns Ok
269+
if encode_result.is_ok() {
270+
assert_eq!(self.write_pos, ref_pos + label_ref.num_bytes, "label_ref \
271+
callback didn't write number of bytes it claimed to write upfront");
272+
}
269273
}
270274

271275
self.write_pos = orig_pos;
@@ -274,6 +278,8 @@ impl CodeBlock {
274278
self.label_addrs.clear();
275279
self.label_names.clear();
276280
assert!(self.label_refs.is_empty());
281+
282+
link_result
277283
}
278284

279285
/// Convert a Label to CodePtr

zjit/src/asm/x86_64/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,7 @@ pub fn call_label(cb: &mut CodeBlock, label: Label) {
679679
cb.label_ref(label, 5, |cb, src_addr, dst_addr| {
680680
cb.write_byte(0xE8);
681681
cb.write_int((dst_addr - src_addr) as u64, 32);
682+
Ok(())
682683
});
683684
}
684685

@@ -795,6 +796,7 @@ fn write_jcc<const OP: u8>(cb: &mut CodeBlock, label: Label) {
795796
cb.write_byte(0x0F);
796797
cb.write_byte(OP);
797798
cb.write_int((dst_addr - src_addr) as u64, 32);
799+
Ok(())
798800
});
799801
}
800802

@@ -834,6 +836,7 @@ pub fn jmp_label(cb: &mut CodeBlock, label: Label) {
834836
cb.label_ref(label, 5, |cb, src_addr, dst_addr| {
835837
cb.write_byte(0xE9);
836838
cb.write_int((dst_addr - src_addr) as u64, 32);
839+
Ok(())
837840
});
838841
}
839842

zjit/src/asm/x86_64/tests.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ fn test_call_label() {
136136
let cb = compile(|cb| {
137137
let label_idx = cb.new_label("fn".to_owned());
138138
call_label(cb, label_idx);
139-
cb.link_labels();
139+
cb.link_labels().unwrap();
140140
});
141141
assert_disasm_snapshot!(cb.disasm(), @" 0x0: call 0");
142142
assert_snapshot!(cb.hexdump(), @"e8fbffffff");
@@ -255,7 +255,7 @@ fn test_jge_label() {
255255
let cb = compile(|cb| {
256256
let label_idx = cb.new_label("loop".to_owned());
257257
jge_label(cb, label_idx);
258-
cb.link_labels();
258+
cb.link_labels().unwrap();
259259
});
260260
assert_disasm_snapshot!(cb.disasm(), @" 0x0: jge 0");
261261
assert_snapshot!(cb.hexdump(), @"0f8dfaffffff");
@@ -268,14 +268,14 @@ fn test_jmp_label() {
268268
let label_idx = cb.new_label("next".to_owned());
269269
jmp_label(cb, label_idx);
270270
cb.write_label(label_idx);
271-
cb.link_labels();
271+
cb.link_labels().unwrap();
272272
});
273273
// Backwards jump
274274
let cb2 = compile(|cb| {
275275
let label_idx = cb.new_label("loop".to_owned());
276276
cb.write_label(label_idx);
277277
jmp_label(cb, label_idx);
278-
cb.link_labels();
278+
cb.link_labels().unwrap();
279279
});
280280

281281
assert_disasm_snapshot!(disasms!(cb1, cb2), @r"
@@ -301,7 +301,7 @@ fn test_jo_label() {
301301
let cb = compile(|cb| {
302302
let label_idx = cb.new_label("loop".to_owned());
303303
jo_label(cb, label_idx);
304-
cb.link_labels();
304+
cb.link_labels().unwrap();
305305
});
306306

307307
assert_disasm_snapshot!(cb.disasm(), @" 0x0: jo 0");

zjit/src/backend/arm64/mod.rs

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -998,10 +998,17 @@ impl Assembler {
998998
generate_branch::<CONDITION>(cb, src_addr, dst_addr);
999999
},
10001000
Target::Label(label_idx) => {
1001-
// We save `cb.conditional_jump_insns` number of bytes since we may use up to that amount
1002-
// `generate_branch` will pad the emitted branch instructions with `nop`s for each unused byte.
1003-
cb.label_ref(label_idx, (cb.conditional_jump_insns() * 4) as usize, |cb, src_addr, dst_addr| {
1004-
generate_branch::<CONDITION>(cb, src_addr - (cb.conditional_jump_insns() * 4) as i64, dst_addr);
1001+
// Try to use a single B.cond instruction
1002+
cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
1003+
// +1 since src_addr is after the instruction while A64
1004+
// counts the offset relative to the start.
1005+
let offset = (dst_addr - src_addr) / 4 + 1;
1006+
if bcond_offset_fits_bits(offset) {
1007+
bcond(cb, CONDITION, InstructionOffset::from_insns(offset as i32));
1008+
Ok(())
1009+
} else {
1010+
Err(())
1011+
}
10051012
});
10061013
},
10071014
Target::SideExit { .. } => {
@@ -1399,6 +1406,7 @@ impl Assembler {
13991406
// Set output to the raw address of the label
14001407
cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| {
14011408
adr(cb, Self::EMIT_OPND, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
1409+
Ok(())
14021410
});
14031411

14041412
mov(cb, out.into(), Self::EMIT_OPND);
@@ -1480,14 +1488,17 @@ impl Assembler {
14801488
emit_jmp_ptr(cb, dst_ptr, true);
14811489
},
14821490
Target::Label(label_idx) => {
1483-
// Here we're going to save enough space for
1484-
// ourselves and then come back and write the
1485-
// instruction once we know the offset. We're going
1486-
// to assume we can fit into a single b instruction.
1487-
// It will panic otherwise.
1491+
// Reserve space for a single B instruction
14881492
cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
1489-
let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap();
1490-
b(cb, InstructionOffset::from_bytes(bytes));
1493+
// +1 since src_addr is after the instruction while A64
1494+
// counts the offset relative to the start.
1495+
let offset = (dst_addr - src_addr) / 4 + 1;
1496+
if b_offset_fits_bits(offset) {
1497+
b(cb, InstructionOffset::from_insns(offset as i32));
1498+
Ok(())
1499+
} else {
1500+
Err(())
1501+
}
14911502
});
14921503
},
14931504
Target::SideExit { .. } => {
@@ -1632,7 +1643,7 @@ impl Assembler {
16321643
let gc_offsets = asm.arm64_emit(cb);
16331644

16341645
if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) {
1635-
cb.link_labels();
1646+
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
16361647

16371648
// Invalidate icache for newly written out region so we don't run stale code.
16381649
unsafe { rb_jit_icache_invalidate(start_ptr.raw_ptr(cb) as _, cb.get_write_ptr().raw_ptr(cb) as _) };
@@ -1748,6 +1759,29 @@ mod tests {
17481759
assert_snapshot!(cb.hexdump(), @"600080d2207d009be10300aa");
17491760
}
17501761

1762+
#[test]
1763+
fn test_conditional_branch_to_label() {
1764+
let (mut asm, mut cb) = setup_asm();
1765+
let start = asm.new_label("start");
1766+
let forward = asm.new_label("forward");
1767+
1768+
let value = asm.load(Opnd::mem(VALUE_BITS, NATIVE_STACK_PTR, 0));
1769+
asm.write_label(start.clone());
1770+
asm.cmp(value, 0.into());
1771+
asm.jg(forward.clone());
1772+
asm.jl(start.clone());
1773+
asm.write_label(forward);
1774+
1775+
asm.compile_with_num_regs(&mut cb, 1);
1776+
assert_disasm_snapshot!(cb.disasm(), @r"
1777+
0x0: ldur x0, [sp]
1778+
0x4: cmp x0, #0
1779+
0x8: b.gt #0x10
1780+
0xc: b.lt #4
1781+
");
1782+
assert_snapshot!(cb.hexdump(), @"e00340f81f0000f14c000054cbffff54");
1783+
}
1784+
17511785
#[test]
17521786
fn sp_movements_are_single_instruction() {
17531787
let (mut asm, mut cb) = setup_asm();
@@ -2571,7 +2605,7 @@ mod tests {
25712605
}
25722606

25732607
#[test]
2574-
fn test_label_branch_generate_bounds() {
2608+
fn test_exceeding_label_branch_generate_bounds() {
25752609
// The immediate in a conditional branch is a 19 bit unsigned integer
25762610
// which has a max value of 2^18 - 1.
25772611
const IMMEDIATE_MAX_VALUE: usize = 2usize.pow(18) - 1;
@@ -2582,6 +2616,7 @@ mod tests {
25822616
let page_size = unsafe { rb_jit_get_page_size() } as usize;
25832617
let memory_required = (IMMEDIATE_MAX_VALUE + 8) * 4 + page_size;
25842618

2619+
crate::options::rb_zjit_prepare_options(); // Allow `get_option!` in Assembler
25852620
let mut asm = Assembler::new();
25862621
let mut cb = CodeBlock::new_dummy_sized(memory_required);
25872622

@@ -2595,7 +2630,7 @@ mod tests {
25952630
});
25962631

25972632
asm.write_label(far_label.clone());
2598-
asm.compile_with_num_regs(&mut cb, 1);
2633+
assert_eq!(Err(CompileError::LabelLinkingFailure), asm.compile(&mut cb));
25992634
}
26002635

26012636
#[test]

zjit/src/backend/x86_64/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,7 @@ impl Assembler {
836836
cb.label_ref(*label, 7, move |cb, src_addr, dst_addr| {
837837
let disp = dst_addr - src_addr;
838838
lea(cb, out.into(), mem_opnd(8, RIP, disp.try_into().unwrap()));
839+
Ok(())
839840
});
840841
} else {
841842
// Set output to the jump target's raw address
@@ -1104,7 +1105,7 @@ impl Assembler {
11041105
let gc_offsets = asm.x86_emit(cb);
11051106

11061107
if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) {
1107-
cb.link_labels();
1108+
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
11081109
Ok((start_ptr, gc_offsets))
11091110
} else {
11101111
cb.clear_labels();

zjit/src/stats.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ make_counters! {
306306
compile_error_iseq_stack_too_large,
307307
compile_error_exception_handler,
308308
compile_error_out_of_memory,
309+
compile_error_label_linking_failure,
309310
compile_error_jit_to_jit_optional,
310311
compile_error_register_spill_on_ccall,
311312
compile_error_register_spill_on_alloc,
@@ -466,6 +467,10 @@ pub enum CompileError {
466467
ExceptionHandler,
467468
OutOfMemory,
468469
ParseError(ParseError),
470+
/// When a ZJIT function is too large, the branches may have
471+
/// offsets that don't fit in one instruction. We error in
472+
/// error that case.
473+
LabelLinkingFailure,
469474
}
470475

471476
/// Return a raw pointer to the exit counter for a given CompileError
@@ -479,6 +484,7 @@ pub fn exit_counter_for_compile_error(compile_error: &CompileError) -> Counter {
479484
IseqStackTooLarge => compile_error_iseq_stack_too_large,
480485
ExceptionHandler => compile_error_exception_handler,
481486
OutOfMemory => compile_error_out_of_memory,
487+
LabelLinkingFailure => compile_error_label_linking_failure,
482488
ParseError(parse_error) => match parse_error {
483489
StackUnderflow(_) => compile_error_parse_stack_underflow,
484490
MalformedIseq(_) => compile_error_parse_malformed_iseq,

0 commit comments

Comments
 (0)