diff --git a/crates/cranelift/src/func_environ/gc/enabled/drc.rs b/crates/cranelift/src/func_environ/gc/enabled/drc.rs index aada2c8f393a..ffa4f8573332 100644 --- a/crates/cranelift/src/func_environ/gc/enabled/drc.rs +++ b/crates/cranelift/src/func_environ/gc/enabled/drc.rs @@ -364,7 +364,7 @@ fn emit_gc_raw_alloc( .ins() .iconst(ir::types::I32, i64::from(kind.as_u32())); - let ty = builder.ins().iconst(ir::types::I32, i64::from(ty.as_u32())); + let ty = func_env.module_interned_to_shared_ty(&mut builder.cursor(), ty); assert!(align.is_power_of_two()); let align = builder.ins().iconst(ir::types::I32, i64::from(align)); diff --git a/crates/environ/src/builtin.rs b/crates/environ/src/builtin.rs index e9d91bd22eef..4d087280aec6 100644 --- a/crates/environ/src/builtin.rs +++ b/crates/environ/src/builtin.rs @@ -86,7 +86,7 @@ macro_rules! foreach_builtin_function { gc_alloc_raw( vmctx: vmctx, kind: u32, - module_interned_type_index: u32, + shared_type_index: u32, size: u32, align: u32 ) -> u32; diff --git a/crates/wasmtime/proptest-regressions/runtime/vm/gc/enabled/free_list.txt b/crates/wasmtime/proptest-regressions/runtime/vm/gc/enabled/free_list.txt new file mode 100644 index 000000000000..c3a26b7b9713 --- /dev/null +++ b/crates/wasmtime/proptest-regressions/runtime/vm/gc/enabled/free_list.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc b26e69fbaf46deb79652859039538e422818fd40b9afff63faa7aacbddecfd3d # shrinks to (capacity, ops) = (219544665809630458, [(10, Alloc(Layout { size: 193045289231815352, align: 8 (1 << 3) })), (10, Dealloc(Layout { size: 193045289231815352, align: 8 (1 << 3) }))]) diff --git a/crates/wasmtime/src/runtime/store.rs b/crates/wasmtime/src/runtime/store.rs index 557428e1e76d..5e7560d85d55 100644 --- a/crates/wasmtime/src/runtime/store.rs +++ b/crates/wasmtime/src/runtime/store.rs @@ -2057,6 +2057,13 @@ impl StoreOpaque { .expect("attempted to access the store's GC heap before it has been allocated") } + /// Returns a mutable reference to the GC store if it has been allocated. + #[inline] + #[cfg(feature = "gc-drc")] + pub(crate) fn try_gc_store_mut(&mut self) -> Option<&mut GcStore> { + self.gc_store.as_mut() + } + #[inline] pub(crate) fn gc_roots(&self) -> &RootSet { &self.gc_roots diff --git a/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs b/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs index 29558ea308f2..50afcc9b0513 100644 --- a/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs +++ b/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs @@ -95,6 +95,7 @@ unsafe impl GcRuntime for DrcCollector { } /// How to trace a GC object. +#[derive(Clone)] enum TraceInfo { /// How to trace an array. Array { @@ -111,12 +112,108 @@ enum TraceInfo { }, } +struct TraceInfos { + // SAFETY: Self-borrow from `self.trace_infos`, boxed so it won't move, + // never exposed externally as `'static`. + cache: [Option<(VMSharedTypeIndex, &'static TraceInfo)>; Self::CACHE_CAPACITY], + + trace_infos: HashMap>, +} + +impl Default for TraceInfos { + fn default() -> Self { + Self { + cache: [None; 256], + trace_infos: HashMap::default(), + } + } +} + +impl TraceInfos { + const CACHE_CAPACITY: usize = 256; + + /// Is this set of trace information empty? + pub fn is_empty(&self) -> bool { + let is_empty = self.trace_infos.is_empty(); + if is_empty { + debug_assert!(self.cache.iter().all(|x| x.is_none())); + } + is_empty + } + + /// Get the trace information associated with the given type index. + pub fn get(&mut self, ty: VMSharedTypeIndex) -> &TraceInfo { + if let Some((ty2, info)) = self.cache[Self::cache_index(ty)] + && ty == ty2 + { + return info; + } + + self.get_slow(ty) + } + + #[inline] + fn cache_index(ty: VMSharedTypeIndex) -> usize { + let bits = ty.bits(); + let bits = usize::try_from(bits).unwrap(); + bits % Self::CACHE_CAPACITY + } + + fn get_slow(&mut self, ty: VMSharedTypeIndex) -> &TraceInfo { + let info = &self.trace_infos[&ty]; + + let index = Self::cache_index(ty); + debug_assert!(self.cache[index].is_none_or(|(ty2, _)| ty != ty2)); + self.cache[index] = Some(( + ty, + // SAFETY: Self-borrow, boxed so it won't move, never exposed + // externally as `'static`. + unsafe { NonNull::from(&**info).as_ref() }, + )); + + info + } + + /// Ensure that we have tracing information for the given type. + #[inline] + pub fn ensure_trace_info( + &mut self, + ty: VMSharedTypeIndex, + make_trace_info: impl FnMut() -> TraceInfo, + ) { + if let Some((ty2, _)) = self.cache[Self::cache_index(ty)] + && ty == ty2 + { + return; + } + + if self.trace_infos.get(&ty).is_some() { + return; + } + + self.insert_new_trace_info(ty, make_trace_info); + } + + fn insert_new_trace_info( + &mut self, + ty: VMSharedTypeIndex, + mut make_trace_info: impl FnMut() -> TraceInfo, + ) { + debug_assert!(self.cache[Self::cache_index(ty)].is_none()); + debug_assert!(!self.trace_infos.contains_key(&ty)); + + let info = make_trace_info(); + let old = self.trace_infos.insert(ty, Box::new(info)); + debug_assert!(old.is_none()); + } +} + /// A deferred reference-counting (DRC) heap. struct DrcHeap { engine: EngineWeak, /// For every type that we have allocated in this heap, how do we trace it? - trace_infos: HashMap, + trace_infos: TraceInfos, /// Count of how many no-gc scopes we are currently within. no_gc_count: u64, @@ -160,7 +257,7 @@ impl DrcHeap { log::trace!("allocating new DRC heap"); Ok(Self { engine: engine.weak(), - trace_infos: HashMap::with_capacity(1), + trace_infos: TraceInfos::default(), no_gc_count: 0, over_approximated_stack_roots: Box::new(None), memory: None, @@ -170,23 +267,23 @@ impl DrcHeap { }) } - fn engine(&self) -> Engine { - self.engine.upgrade().unwrap() - } - fn dealloc(&mut self, gc_ref: VMGcRef) { let drc_ref = drc_ref(&gc_ref); - let size = self.index(drc_ref).object_size(); - let layout = FreeList::layout(size); + let size = self.index(drc_ref).object_size; + let alloc_size = FreeList::aligned_size(size); let index = gc_ref.as_heap_index().unwrap(); // Poison the freed memory so that any stale access is detectable. if cfg!(gc_zeal) { let index = usize::try_from(index.get()).unwrap(); - self.heap_slice_mut()[index..][..layout.size()].fill(POISON); + let alloc_size = usize::try_from(alloc_size).unwrap(); + self.heap_slice_mut()[index..][..alloc_size].fill(POISON); } - self.free_list.as_mut().unwrap().dealloc(index, layout); + self.free_list + .as_mut() + .unwrap() + .dealloc_fast(index, alloc_size); } /// Increment the ref count for the associated object. @@ -197,36 +294,10 @@ impl DrcHeap { let drc_ref = drc_ref(gc_ref); let header = self.index_mut(&drc_ref); - debug_assert_ne!( - header.ref_count, 0, - "{:#p} is supposedly live; should have nonzero ref count", - *gc_ref - ); - header.ref_count += 1; + header.inc_ref(); log::trace!("increment {:#p} ref count -> {}", *gc_ref, header.ref_count); } - /// Decrement the ref count for the associated object. - /// - /// Returns `true` if the ref count reached zero and the object should be - /// deallocated. - fn dec_ref(&mut self, gc_ref: &VMGcRef) -> bool { - if gc_ref.is_i31() { - return false; - } - - let drc_ref = drc_ref(gc_ref); - let header = self.index_mut(drc_ref); - debug_assert_ne!( - header.ref_count, 0, - "{:#p} is supposedly live; should have nonzero ref count", - *gc_ref - ); - header.ref_count -= 1; - log::trace!("decrement {:#p} ref count -> {}", *gc_ref, header.ref_count); - header.ref_count == 0 - } - /// Decrement the ref count for the associated object. /// /// If the ref count reached zero, then deallocate the object and remove its @@ -238,6 +309,7 @@ impl DrcHeap { fn dec_ref_and_maybe_dealloc( &mut self, host_data_table: &mut ExternRefHostDataTable, + trace_infos: &mut TraceInfos, gc_ref: &VMGcRef, ) { let mut stack = self.dec_ref_stack.take().unwrap(); @@ -245,137 +317,111 @@ impl DrcHeap { stack.push(gc_ref.unchecked_copy()); while let Some(gc_ref) = stack.pop() { - if self.dec_ref(&gc_ref) { - // The object's reference count reached zero. - // - // Enqueue any other objects it references for dec-ref'ing. - self.trace_gc_ref(&gc_ref, &mut stack); - - // If this object was an `externref`, remove its associated - // entry from the host-data table. - if let Some(externref) = gc_ref.as_typed::(self) { - let host_data_id = self.index(externref).host_data; - host_data_table.dealloc(host_data_id); - } - - // Deallocate this GC object! - self.dealloc(gc_ref.unchecked_copy()); - } - } - - debug_assert!(stack.is_empty()); - debug_assert!(self.dec_ref_stack.is_none()); - self.dec_ref_stack = Some(stack); - } - - /// Ensure that we have tracing information for the given type. - fn ensure_trace_info(&mut self, ty: VMSharedTypeIndex) { - if self.trace_infos.contains_key(&ty) { - return; - } - - self.insert_new_trace_info(ty); - } + debug_assert!(!gc_ref.is_i31()); - fn insert_new_trace_info(&mut self, ty: VMSharedTypeIndex) { - debug_assert!(!self.trace_infos.contains_key(&ty)); - - let engine = self.engine(); - let gc_layout = engine - .signatures() - .layout(ty) - .unwrap_or_else(|| panic!("should have a GC layout for {ty:?}")); - - let info = match gc_layout { - GcLayout::Array(l) => { - if l.elems_are_gc_refs { - debug_assert_eq!(l.elem_offset(0), GC_REF_ARRAY_ELEMS_OFFSET,); - } - TraceInfo::Array { - gc_ref_elems: l.elems_are_gc_refs, - } + // Read the DRC header once to get ref_count, type, and object_size. + let drc_header = self.index_mut(drc_ref(&gc_ref)); + log::trace!( + "decrement {:#p} ref count -> {}", + gc_ref, + drc_header.ref_count - 1 + ); + if !drc_header.dec_ref() { + continue; } - GcLayout::Struct(l) => TraceInfo::Struct { - gc_ref_offsets: l - .fields - .iter() - .filter_map(|f| if f.is_gc_ref { Some(f.offset) } else { None }) - .collect(), - }, - }; - - let old_entry = self.trace_infos.insert(ty, info); - debug_assert!(old_entry.is_none()); - } - - /// Enumerate all of the given `VMGcRef`'s outgoing edges. - fn trace_gc_ref(&self, gc_ref: &VMGcRef, stack: &mut Vec) { - debug_assert!(!gc_ref.is_i31()); - - let header = self.header(gc_ref); - let Some(ty) = header.ty() else { - debug_assert!(header.kind().matches(VMGcKind::ExternRef)); - return; - }; - - match self - .trace_infos - .get(&ty) - .expect("should have inserted trace info for every GC type allocated in this heap") - { - TraceInfo::Struct { gc_ref_offsets } => { - stack.reserve(gc_ref_offsets.len()); - let data = self.gc_object_data(gc_ref); - for offset in gc_ref_offsets { - let raw = data.read_u32(*offset); - if let Some(gc_ref) = VMGcRef::from_raw_u32(raw) - && !gc_ref.is_i31() - { - debug_assert!( + // Ref count reached zero. + + // Extract type and size from the header we already read (avoiding + // re-reading from heap). + let ty = drc_header.header.ty(); + let object_size = drc_header.object_size; + + // Trace: enqueue child GC refs for dec-ref'ing. + if let Some(ty) = ty { + match trace_infos.get(ty) { + TraceInfo::Struct { gc_ref_offsets } => { + stack.reserve(gc_ref_offsets.len()); + + let object_start = + usize::try_from(gc_ref.as_heap_index().unwrap().get()).unwrap(); + let heap = self.heap_slice(); + for offset in gc_ref_offsets { + let offset = usize::try_from(*offset).unwrap(); + + // Read gc ref fields directly from the heap slice, + // avoiding the overhead of repeatedly indexing into + // the heap via the object header. + let field_start = object_start + offset; + let field_end = field_start + mem::size_of::(); + debug_assert!( + field_end <= object_start + usize::try_from(object_size).unwrap() + ); + let raw: [u8; 4] = heap[field_start..field_end].try_into().unwrap(); + let raw = u32::from_le_bytes(raw); + + if let Some(child) = VMGcRef::from_raw_u32(raw) + && !child.is_i31() { - let header = self.header(&gc_ref); - let kind = header.kind().as_u32(); - VMGcKind::try_from_u32(kind).is_some() - }, - "trace_gc_ref: struct field at offset {offset} references object \ - with invalid `VMGcKind`", - ); - - stack.push(gc_ref); + debug_assert!({ + let header = self.header(&child); + let kind = header.kind().as_u32(); + VMGcKind::try_from_u32(kind).is_some() + }); + stack.push(child); + } + } + } + TraceInfo::Array { gc_ref_elems } => { + if *gc_ref_elems { + let data = self.gc_object_data(&gc_ref); + let len = self.array_len(gc_ref.as_arrayref_unchecked()); + stack.reserve(usize::try_from(len).unwrap()); + for i in 0..len { + let elem_offset = GC_REF_ARRAY_ELEMS_OFFSET + + i * u32::try_from(mem::size_of::()).unwrap(); + let raw = data.read_u32(elem_offset); + if let Some(child) = VMGcRef::from_raw_u32(raw) + && !child.is_i31() + { + debug_assert!({ + let header = self.header(&child); + let kind = header.kind().as_u32(); + VMGcKind::try_from_u32(kind).is_some() + }); + stack.push(child); + } + } + } } } + } else { + // Handle `externref` host data. Only `externref`s have host + // data, and `ty` is `None` only for `externref`s, so we skip + // this for `struct` and `array` objects entirely. + debug_assert!(drc_header.header.kind().matches(VMGcKind::ExternRef)); + let externref = gc_ref.as_typed::(self).unwrap(); + let host_data_id = self.index(externref).host_data; + host_data_table.dealloc(host_data_id); } - TraceInfo::Array { gc_ref_elems } => { - if !*gc_ref_elems { - return; - } + // Deallocate using the object_size we already read. + let alloc_size = FreeList::aligned_size(object_size); + let index = gc_ref.as_heap_index().unwrap(); - let data = self.gc_object_data(gc_ref); - let len = self.array_len(gc_ref.as_arrayref_unchecked()); - stack.reserve(usize::try_from(len).unwrap()); - for i in 0..len { - let elem_offset = GC_REF_ARRAY_ELEMS_OFFSET - + i * u32::try_from(mem::size_of::()).unwrap(); - let raw = data.read_u32(elem_offset); - if let Some(gc_ref) = VMGcRef::from_raw_u32(raw) - && !gc_ref.is_i31() - { - debug_assert!( - { - let header = self.header(&gc_ref); - let kind = header.kind().as_u32(); - VMGcKind::try_from_u32(kind).is_some() - }, - "trace_gc_ref: array element at index {i} references object \ - with invalid `VMGcKind`", - ); - - stack.push(gc_ref); - } - } + if cfg!(gc_zeal) { + let idx = usize::try_from(index.get()).unwrap(); + self.heap_slice_mut()[idx..][..usize::try_from(alloc_size).unwrap()].fill(POISON); } + + self.free_list + .as_mut() + .unwrap() + .dealloc_fast(index, alloc_size); } + + debug_assert!(stack.is_empty()); + debug_assert!(self.dec_ref_stack.is_none()); + self.dec_ref_stack = Some(stack); } /// Iterate over the over-approximated-stack-roots list. @@ -569,6 +615,8 @@ impl DrcHeap { .as_ref() .map(|r| r.unchecked_copy()); + let mut trace_infos = mem::take(&mut self.trace_infos); + while let Some(gc_ref) = next { log::trace!("sweeping gc ref: {gc_ref:#p}"); @@ -604,9 +652,12 @@ impl DrcHeap { .index_mut(drc_ref(prev)) .set_next_over_approximated_stack_root(prev_next), } - self.dec_ref_and_maybe_dealloc(host_data_table, &gc_ref); + self.dec_ref_and_maybe_dealloc(host_data_table, &mut trace_infos, &gc_ref); } + debug_assert!(self.trace_infos.is_empty()); + self.trace_infos = trace_infos; + log::trace!("Done sweeping"); if log::log_enabled!(log::Level::Trace) { @@ -616,6 +667,36 @@ impl DrcHeap { ); } } + + fn ensure_trace_info(&mut self, ty: VMSharedTypeIndex) { + let engine = &self.engine; + self.trace_infos.ensure_trace_info(ty, || { + let gc_layout = engine + .upgrade() + .unwrap() + .signatures() + .layout(ty) + .unwrap_or_else(|| panic!("should have a GC layout for {ty:?}")); + + match gc_layout { + GcLayout::Array(l) => { + if l.elems_are_gc_refs { + debug_assert_eq!(l.elem_offset(0), GC_REF_ARRAY_ELEMS_OFFSET); + } + TraceInfo::Array { + gc_ref_elems: l.elems_are_gc_refs, + } + } + GcLayout::Struct(l) => TraceInfo::Struct { + gc_ref_offsets: l + .fields + .iter() + .filter_map(|f| if f.is_gc_ref { Some(f.offset) } else { None }) + .collect(), + }, + } + }); + } } /// Convert the given GC reference as a typed GC reference pointing to a @@ -730,6 +811,22 @@ impl VMDrcHeader { false } } + + /// Increment the ref count for this object. + fn inc_ref(&mut self) { + debug_assert!(self.ref_count > 0); + self.ref_count += 1; + } + + /// Decrement the ref count for this object. + /// + /// Returns `true` if the ref count reached zero and the object should be + /// deallocated. + fn dec_ref(&mut self) -> bool { + debug_assert!(self.ref_count > 0); + self.ref_count -= 1; + self.ref_count == 0 + } } /// The common header for all arrays in the DRC collector. @@ -845,7 +942,12 @@ unsafe impl GcHeap for DrcHeap { // Decrement the ref count of the value being overwritten and, if // necessary, deallocate the GC object. if let Some(dest) = destination { - self.dec_ref_and_maybe_dealloc(host_data_table, dest); + let mut trace_infos = mem::take(&mut self.trace_infos); + + self.dec_ref_and_maybe_dealloc(host_data_table, &mut trace_infos, dest); + + debug_assert!(self.trace_infos.is_empty()); + self.trace_infos = trace_infos; } // Do the actual write. @@ -920,6 +1022,7 @@ unsafe impl GcHeap for DrcHeap { fn alloc_raw(&mut self, header: VMGcHeader, layout: Layout) -> Result> { debug_assert!(layout.size() >= core::mem::size_of::()); debug_assert!(layout.align() >= core::mem::align_of::()); + debug_assert!(FreeList::can_align_to(layout.align())); debug_assert_eq!(header.reserved_u26(), 0); // We must have trace info for every GC type that we allocate in this @@ -933,8 +1036,9 @@ unsafe impl GcHeap for DrcHeap { } let object_size = u32::try_from(layout.size()).unwrap(); + let alloc_size = FreeList::aligned_size(object_size); - let gc_ref = match self.free_list.as_mut().unwrap().alloc(layout)? { + let gc_ref = match self.free_list.as_mut().unwrap().alloc_fast(alloc_size) { None => return Ok(Err(u64::try_from(layout.size()).unwrap())), Some(index) => VMGcRef::from_heap_index(index).unwrap(), }; diff --git a/crates/wasmtime/src/runtime/vm/gc/enabled/free_list.rs b/crates/wasmtime/src/runtime/vm/gc/enabled/free_list.rs index e638e5ea90a6..5a0bd4d90c0b 100644 --- a/crates/wasmtime/src/runtime/vm/gc/enabled/free_list.rs +++ b/crates/wasmtime/src/runtime/vm/gc/enabled/free_list.rs @@ -1,8 +1,9 @@ use crate::prelude::*; use alloc::collections::BTreeMap; -use core::{alloc::Layout, num::NonZeroU32, ops::Bound}; +use core::{alloc::Layout, num::NonZeroU32}; -/// A very simple first-fit free list for use by our garbage collectors. +/// A free list for use by our garbage collectors, using a sorted Vec of +/// (index, length) pairs for cache-friendly operations. pub(crate) struct FreeList { /// The total capacity of the contiguous range of memory we are managing. /// @@ -25,6 +26,11 @@ pub(crate) struct FreeList { /// Our free blocks, as a map from index to length of the free block at that /// index. free_block_index_to_len: BTreeMap, + /// Bump allocator: current position in the active free block. + /// Allocations bump this forward. When exhausted, refilled from blocks. + bump_ptr: u32, + /// End of the current bump allocation region. + bump_end: u32, } /// Our minimum and maximum supported alignment. Every allocation is aligned to @@ -40,6 +46,13 @@ impl FreeList { Layout::from_size_align(size, ALIGN_USIZE).unwrap() } + /// Compute the aligned allocation size for a given byte size. Returns the + /// size rounded up to this free list's alignment, as a u32. + #[inline] + pub fn aligned_size(size: u32) -> u32 { + (size + ALIGN_U32 - 1) & !(ALIGN_U32 - 1) + } + /// Get the current total capacity this free list manages. pub fn current_capacity(&self) -> usize { self.capacity @@ -53,6 +66,8 @@ impl FreeList { let mut free_list = FreeList { capacity, free_block_index_to_len: BTreeMap::new(), + bump_ptr: 0, + bump_end: 0, }; let end = u32::try_from(free_list.capacity).unwrap_or_else(|_| { @@ -67,13 +82,11 @@ impl FreeList { let len = round_u32_down_to_pow2(end.saturating_sub(start), ALIGN_U32); - let entire_range = if len >= ALIGN_U32 { - Some((start, len)) - } else { - None - }; - - free_list.free_block_index_to_len.extend(entire_range); + if len >= ALIGN_U32 { + // Initialize bump allocator with the entire range. + free_list.bump_ptr = start; + free_list.bump_end = start + len; + } free_list } @@ -146,11 +159,23 @@ impl FreeList { round_usize_down_to_pow2(cap.saturating_sub(ALIGN_USIZE), ALIGN_USIZE) } + /// Total number of free blocks (including bump region if non-empty). + #[cfg(test)] + fn num_free_blocks(&self) -> usize { + self.free_block_index_to_len.len() + if self.bump_end > self.bump_ptr { 1 } else { 0 } + } + + /// Can this free list align allocations to the given value? + pub fn can_align_to(align: usize) -> bool { + debug_assert!(align.is_power_of_two()); + align <= ALIGN_USIZE + } + /// Check the given layout for compatibility with this free list and return /// the actual block size we will use for this layout. fn check_layout(&self, layout: Layout) -> Result { ensure!( - layout.align() <= ALIGN_USIZE, + Self::can_align_to(layout.align()), "requested allocation's alignment of {} is greater than max supported \ alignment of {ALIGN_USIZE}", layout.align(), @@ -175,109 +200,154 @@ impl FreeList { }) } - /// Find the first free block that can hold an allocation of the given size - /// and remove it from the free list. - fn first_fit(&mut self, alloc_size: u32) -> Option<(u32, u32)> { - debug_assert_eq!(alloc_size % ALIGN_U32, 0); - - let (&block_index, &block_len) = self - .free_block_index_to_len - .iter() - .find(|(_idx, len)| **len >= alloc_size)?; - - debug_assert_eq!(block_index % ALIGN_U32, 0); - debug_assert_eq!(block_len % ALIGN_U32, 0); - - let entry = self.free_block_index_to_len.remove(&block_index); - debug_assert!(entry.is_some()); + #[cfg(test)] + pub fn alloc(&mut self, layout: Layout) -> Result> { + log::trace!("FreeList::alloc({layout:?})"); + let alloc_size = self.check_layout(layout)?; + Ok(self.alloc_impl(alloc_size)) + } - Some((block_index, block_len)) + /// Fast-path allocation with a pre-computed aligned size, as returned from + /// `Self::aligned_size`. + #[inline] + pub fn alloc_fast(&mut self, alloc_size: u32) -> Option { + debug_assert_eq!(alloc_size % ALIGN_U32, 0); + debug_assert!(alloc_size > 0); + self.alloc_impl(alloc_size) } - /// If the given allocated block is large enough such that we can split it - /// and still have enough space left for future allocations, then split it. - /// - /// Returns the new length of the allocated block. - fn maybe_split(&mut self, alloc_size: u32, block_index: u32, block_len: u32) -> u32 { + #[inline] + fn alloc_impl(&mut self, alloc_size: u32) -> Option { + debug_assert_eq!( + Self::layout(usize::try_from(alloc_size).unwrap()).size(), + usize::try_from(alloc_size).unwrap() + ); debug_assert_eq!(alloc_size % ALIGN_U32, 0); - debug_assert_eq!(block_index % ALIGN_U32, 0); - debug_assert_eq!(block_len % ALIGN_U32, 0); - if block_len - alloc_size < ALIGN_U32 { - // The block is not large enough to split. - return block_len; - } + // Fast path: bump allocate from the current region. + let new_ptr = self.bump_ptr + alloc_size; + if new_ptr <= self.bump_end { + let result = self.bump_ptr; + self.bump_ptr = new_ptr; + debug_assert_ne!(result, 0); + debug_assert_eq!(result % ALIGN_U32, 0); - // The block is large enough to split. Split the block at exactly the - // requested allocation size and put the tail back in the free list. - let new_block_len = alloc_size; - let split_start = block_index + alloc_size; - let split_len = block_len - alloc_size; + #[cfg(debug_assertions)] + self.check_integrity(); - debug_assert_eq!(new_block_len % ALIGN_U32, 0); - debug_assert_eq!(split_start % ALIGN_U32, 0); - debug_assert_eq!(split_len % ALIGN_U32, 0); + log::trace!("FreeList::alloc -> {result:#x}"); + return Some(unsafe { NonZeroU32::new_unchecked(result) }); + } - self.free_block_index_to_len.insert(split_start, split_len); + // After we've mutated the free list, double check its integrity. + #[cfg(debug_assertions)] + self.check_integrity(); - new_block_len + // Slow path: find a block in the blocks list, then set it as bump region. + self.alloc_slow(alloc_size) } - /// Allocate space for an object of the given layout. - /// - /// Returns: - /// - /// * `Ok(Some(_))`: Allocation succeeded. - /// - /// * `Ok(None)`: Can't currently fulfill the allocation request, but might - /// be able to if some stuff was reallocated. - /// - /// * `Err(_)`: - pub fn alloc(&mut self, layout: Layout) -> Result> { - log::trace!("FreeList::alloc({layout:?})"); - let alloc_size = self.check_layout(layout)?; - debug_assert_eq!(alloc_size % ALIGN_U32, 0); + #[inline(never)] + #[cold] + fn alloc_slow(&mut self, alloc_size: u32) -> Option { + // Put the remaining bump region back into blocks if non-empty. + let remaining_ptr = self.bump_ptr; + let remaining = self.bump_end - self.bump_ptr; + self.bump_ptr = 0; + self.bump_end = 0; + if remaining >= ALIGN_U32 { + self.insert_free_block(remaining_ptr, remaining); + } + + // Find a block big enough. + let (&block_index, &block_len) = self + .free_block_index_to_len + .iter() + .find(|(_, len)| **len >= alloc_size)?; + self.free_block_index_to_len.remove(&block_index); - let (block_index, block_len) = match self.first_fit(alloc_size) { - None => return Ok(None), - Some(tup) => tup, - }; - debug_assert_ne!(block_index, 0); debug_assert_eq!(block_index % ALIGN_U32, 0); - debug_assert!(block_len >= alloc_size); debug_assert_eq!(block_len % ALIGN_U32, 0); - let block_len = self.maybe_split(alloc_size, block_index, block_len); - debug_assert!(block_len >= alloc_size); - debug_assert_eq!(block_len % ALIGN_U32, 0); + // Set this block as the new bump region and allocate from it. + self.bump_ptr = block_index + alloc_size; + self.bump_end = block_index + block_len; - // After we've mutated the free list, double check its integrity. + debug_assert_ne!(block_index, 0); #[cfg(debug_assertions)] self.check_integrity(); - log::trace!("FreeList::alloc({layout:?}) -> {block_index:#x}"); - Ok(Some(unsafe { NonZeroU32::new_unchecked(block_index) })) + Some(unsafe { NonZeroU32::new_unchecked(block_index) }) } /// Deallocate an object with the given layout. pub fn dealloc(&mut self, index: NonZeroU32, layout: Layout) { log::trace!("FreeList::dealloc({index:#x}, {layout:?})"); + let alloc_size = self.check_layout(layout).unwrap(); + self.dealloc_impl(index.get(), alloc_size); + } - let index = index.get(); - debug_assert_eq!(index % ALIGN_U32, 0); + /// Fast-path deallocation with a pre-computed aligned size. + #[inline] + pub fn dealloc_fast(&mut self, index: NonZeroU32, alloc_size: u32) { + debug_assert_eq!(alloc_size % ALIGN_U32, 0); + debug_assert_eq!(index.get() % ALIGN_U32, 0); + self.dealloc_impl(index.get(), alloc_size); + } - let alloc_size = self.check_layout(layout).unwrap(); + #[inline] + fn dealloc_impl(&mut self, index: u32, alloc_size: u32) { + debug_assert_eq!( + Self::layout(usize::try_from(alloc_size).unwrap()).size(), + usize::try_from(alloc_size).unwrap() + ); + debug_assert_eq!(index % ALIGN_U32, 0); debug_assert_eq!(alloc_size % ALIGN_U32, 0); + // Check if the freed block is directly below the bump region. + if index + alloc_size == self.bump_ptr { + self.bump_ptr = index; + + // Also check if the last block in the list is now contiguous with + // the extended bump region. + if let Some((&bi, &bl)) = self.free_block_index_to_len.last_key_value() { + if bi + bl == self.bump_ptr { + self.bump_ptr = bi; + self.free_block_index_to_len.pop_last(); + } + } + + #[cfg(debug_assertions)] + self.check_integrity(); + + return; + } + + // Check if the freed block is directly above the bump region. + if self.bump_end == index { + self.bump_end = index + alloc_size; + + // Also check if the first block above the bump region is now + // contiguous. + if let Some(block_len) = self.free_block_index_to_len.remove(&self.bump_end) { + self.bump_end += block_len; + } + + #[cfg(debug_assertions)] + self.check_integrity(); + + return; + } + let prev_block = self .free_block_index_to_len - .range((Bound::Unbounded, Bound::Excluded(index))) + .range(..index) .next_back() .map(|(idx, len)| (*idx, *len)); let next_block = self .free_block_index_to_len - .range((Bound::Excluded(index), Bound::Unbounded)) + .range(index + 1..) .next() .map(|(idx, len)| (*idx, *len)); @@ -291,9 +361,9 @@ impl FreeList { && blocks_are_contiguous(index, alloc_size, next_index) => { log::trace!( - "merging blocks {prev_index:#x}..{prev_len:#x}, {index:#x}..{index_end:#x}, {next_index:#x}..{next_end:#x}", - prev_len = prev_index + prev_len, - index_end = index + u32::try_from(layout.size()).unwrap(), + "merging blocks {prev_index:#x}..{prev_end:#x}, {index:#x}..{index_end:#x}, {next_index:#x}..{next_end:#x}", + prev_end = prev_index + prev_len, + index_end = index + alloc_size, next_end = next_index + next_len, ); self.free_block_index_to_len.remove(&next_index); @@ -307,9 +377,9 @@ impl FreeList { if blocks_are_contiguous(prev_index, prev_len, index) => { log::trace!( - "merging blocks {prev_index:#x}..{prev_len:#x}, {index:#x}..{index_end:#x}", - prev_len = prev_index + prev_len, - index_end = index + u32::try_from(layout.size()).unwrap(), + "merging blocks {prev_index:#x}..{prev_end:#x}, {index:#x}..{index_end:#x}", + prev_end = prev_index + prev_len, + index_end = index + alloc_size, ); let merged_block_len = index + alloc_size - prev_index; debug_assert_eq!(merged_block_len % ALIGN_U32, 0); @@ -322,7 +392,7 @@ impl FreeList { { log::trace!( "merging blocks {index:#x}..{index_end:#x}, {next_index:#x}..{next_end:#x}", - index_end = index + u32::try_from(layout.size()).unwrap(), + index_end = index + alloc_size, next_end = next_index + next_len, ); self.free_block_index_to_len.remove(&next_index); @@ -339,6 +409,15 @@ impl FreeList { } } + // After merge, check if the last block is now contiguous with the bump + // region and absorb it. + if let Some((&block_index, &block_len)) = self.free_block_index_to_len.last_key_value() { + if block_index + block_len == self.bump_ptr { + self.bump_ptr = block_index; + self.free_block_index_to_len.pop_last(); + } + } + // After we've added to/mutated the free list, double check its // integrity. #[cfg(debug_assertions)] @@ -347,7 +426,23 @@ impl FreeList { /// Iterate over all free blocks as `(index, len)` pairs. pub fn iter_free_blocks(&self) -> impl Iterator + '_ { - self.free_block_index_to_len.iter().map(|(&i, &l)| (i, l)) + let bump = if self.bump_end > self.bump_ptr { + Some((self.bump_ptr, self.bump_end - self.bump_ptr)) + } else { + None + }; + self.free_block_index_to_len + .iter() + .map(|(idx, len)| (*idx, *len)) + .chain(bump) + } + + /// Insert a free block into the sorted blocks list with merging. + fn insert_free_block(&mut self, index: u32, size: u32) { + debug_assert_eq!(index % ALIGN_U32, 0); + debug_assert_eq!(size % ALIGN_U32, 0); + // Reuse dealloc_impl which handles insertion and merging. + self.dealloc_impl(index, size); } /// Assert that the free list is valid: @@ -385,6 +480,26 @@ impl FreeList { prev_end = Some(end); } + + // Check bump region validity. + assert!(self.bump_ptr <= self.bump_end); + if self.bump_ptr < self.bump_end { + assert_eq!(self.bump_ptr % ALIGN_U32, 0); + assert_eq!(self.bump_end % ALIGN_U32, 0); + assert!(usize::try_from(self.bump_end).unwrap() <= self.capacity); + // Bump region should not overlap with any block. + for (&index, &len) in self.free_block_index_to_len.iter() { + let block_end = index + len; + assert!( + self.bump_end <= index || self.bump_ptr >= block_end, + "bump region [{}, {}) overlaps with block [{}, {})", + self.bump_ptr, + self.bump_end, + index, + block_end + ); + } + } } } @@ -431,12 +546,15 @@ mod tests { use std::num::NonZeroUsize; fn free_list_block_len_and_size(free_list: &FreeList) -> (usize, Option) { - let len = free_list.free_block_index_to_len.len(); - let size = free_list - .free_block_index_to_len - .values() - .next() - .map(|s| usize::try_from(*s).unwrap()); + let len = free_list.num_free_blocks(); + let size = if free_list.bump_end > free_list.bump_ptr { + Some(usize::try_from(free_list.bump_end - free_list.bump_ptr).unwrap()) + } else { + free_list + .free_block_index_to_len + .first_key_value() + .map(|(_, &s)| usize::try_from(s).unwrap()) + }; (len, size) } @@ -598,7 +716,7 @@ mod tests { // `ALIGN_U32`. let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 2); - assert_eq!(free_list.free_block_index_to_len.len(), 1); + assert_eq!(free_list.num_free_blocks(), 1); assert_eq!(free_list.max_size(), ALIGN_USIZE * 2); // Allocate a block such that the remainder is not worth splitting. @@ -608,7 +726,7 @@ mod tests { .expect("have free space available for allocation"); // Should not have split the block. - assert_eq!(free_list.free_block_index_to_len.len(), 0); + assert_eq!(free_list.num_free_blocks(), 0); } #[test] @@ -617,7 +735,7 @@ mod tests { // `ALIGN_U32`. let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 3); - assert_eq!(free_list.free_block_index_to_len.len(), 1); + assert_eq!(free_list.num_free_blocks(), 1); assert_eq!(free_list.max_size(), ALIGN_USIZE * 3); // Allocate a block such that the remainder is not worth splitting. @@ -627,7 +745,7 @@ mod tests { .expect("have free space available for allocation"); // Should have split the block. - assert_eq!(free_list.free_block_index_to_len.len(), 1); + assert_eq!(free_list.num_free_blocks(), 1); } #[test] @@ -636,7 +754,7 @@ mod tests { let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 100); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "initially one big free block" ); @@ -646,7 +764,7 @@ mod tests { .expect("allocation within 'static' free list limits") .expect("have free space available for allocation"); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "should have split the block to allocate `a`" ); @@ -656,21 +774,21 @@ mod tests { .expect("allocation within 'static' free list limits") .expect("have free space available for allocation"); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "should have split the block to allocate `b`" ); free_list.dealloc(a, layout); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 2, "should have two non-contiguous free blocks after deallocating `a`" ); free_list.dealloc(b, layout); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "should have merged `a` and `b` blocks with the rest to form a \ single, contiguous free block after deallocating `b`" @@ -683,7 +801,7 @@ mod tests { let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 100); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "initially one big free block" ); @@ -701,21 +819,21 @@ mod tests { .expect("allocation within 'static' free list limits") .expect("have free space available for allocation"); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "should have split the block to allocate `a`, `b`, and `c`" ); free_list.dealloc(a, layout); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 2, "should have two non-contiguous free blocks after deallocating `a`" ); free_list.dealloc(b, layout); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 2, "should have merged `a` and `b` blocks, but not merged with the \ rest of the free space" @@ -730,7 +848,7 @@ mod tests { let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 100); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "initially one big free block" ); @@ -748,21 +866,21 @@ mod tests { .expect("allocation within 'static' free list limits") .expect("have free space available for allocation"); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "should have split the block to allocate `a`, `b`, and `c`" ); free_list.dealloc(a, layout); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 2, "should have two non-contiguous free blocks after deallocating `a`" ); free_list.dealloc(c, layout); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 2, "should have merged `c` block with rest of the free space, but not \ with `a` block" @@ -777,7 +895,7 @@ mod tests { let mut free_list = FreeList::new(ALIGN_USIZE + ALIGN_USIZE * 100); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "initially one big free block" ); @@ -799,21 +917,21 @@ mod tests { .expect("allocation within 'static' free list limits") .expect("have free space available for allocation"); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "should have split the block to allocate `a`, `b`, `c`, and `d`" ); free_list.dealloc(a, layout); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 2, "should have two non-contiguous free blocks after deallocating `a`" ); free_list.dealloc(c, layout); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 3, "should not have merged `c` block `a` block or rest of the free \ space" @@ -921,14 +1039,14 @@ mod tests { free_list.dealloc(a, layout); free_list.dealloc(b, layout); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "`dealloc` should merge blocks from different `add_capacity` calls together" ); free_list.add_capacity(ALIGN_USIZE); assert_eq!( - free_list.free_block_index_to_len.len(), + free_list.num_free_blocks(), 1, "`add_capacity` should eagerly merge new capacity into the last block \ in the free list, when possible" diff --git a/crates/wasmtime/src/runtime/vm/libcalls.rs b/crates/wasmtime/src/runtime/vm/libcalls.rs index 186d1c02acd8..8d38b3bc35ed 100644 --- a/crates/wasmtime/src/runtime/vm/libcalls.rs +++ b/crates/wasmtime/src/runtime/vm/libcalls.rs @@ -689,30 +689,20 @@ fn grow_gc_heap(store: &mut dyn VMStore, _instance: InstanceId, bytes_needed: u6 #[cfg(feature = "gc-drc")] fn gc_alloc_raw( store: &mut dyn VMStore, - instance: InstanceId, + _instance: InstanceId, kind_and_reserved: u32, - module_interned_type_index: u32, + shared_type_index: u32, size: u32, align: u32, ) -> Result { use crate::vm::VMGcHeader; use core::alloc::Layout; - use wasmtime_environ::{ModuleInternedTypeIndex, VMGcKind}; + use wasmtime_environ::{VMGcKind, VMSharedTypeIndex}; let kind = VMGcKind::from_high_bits_of_u32(kind_and_reserved); log::trace!("gc_alloc_raw(kind={kind:?}, size={size}, align={align})"); - let module = store - .instance(instance) - .runtime_module() - .expect("should never allocate GC types defined in a dummy module"); - - let module_interned_type_index = ModuleInternedTypeIndex::from_u32(module_interned_type_index); - let shared_type_index = module - .signatures() - .shared_type(module_interned_type_index) - .expect("should have engine type index for module type index"); - + let shared_type_index = VMSharedTypeIndex::from_u32(shared_type_index); let mut header = VMGcHeader::from_kind_and_index(kind, shared_type_index); header.set_reserved_u26(kind_and_reserved & VMGcKind::UNUSED_MASK); @@ -724,6 +714,16 @@ fn gc_alloc_raw( err.context(e) })?; + // Fast path: when the GC store already exists, try to allocate directly to + // skip the async/fiber machinery. + let opaque = store.store_opaque_mut(); + if let Some(gc_store) = opaque.try_gc_store_mut() { + if let Ok(gc_ref) = gc_store.alloc_raw(header, layout)? { + let raw = gc_store.expose_gc_ref_to_wasm(gc_ref); + return Ok(raw); + } + } + let (mut limiter, store) = store.resource_limiter_and_store_opaque(); block_on!(store, async |store, asyncness| { let gc_ref = store diff --git a/tests/disas/gc/drc/array-new-fixed-of-gc-refs.wat b/tests/disas/gc/drc/array-new-fixed-of-gc-refs.wat index bd7ea5b1ff71..52f55279c88c 100644 --- a/tests/disas/gc/drc/array-new-fixed-of-gc-refs.wat +++ b/tests/disas/gc/drc/array-new-fixed-of-gc-refs.wat @@ -25,101 +25,103 @@ ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64, v2: i32, v3: i32, v4: i32): -;; v135 = stack_addr.i64 ss2 -;; store notrap v2, v135 -;; v134 = stack_addr.i64 ss1 -;; store notrap v3, v134 -;; v133 = stack_addr.i64 ss0 -;; store notrap v4, v133 +;; v137 = stack_addr.i64 ss2 +;; store notrap v2, v137 +;; v136 = stack_addr.i64 ss1 +;; store notrap v3, v136 +;; v135 = stack_addr.i64 ss0 +;; store notrap v4, v135 ;; @0025 v14 = iconst.i32 -1476395008 -;; @0025 v15 = iconst.i32 0 -;; v148 = iconst.i32 40 -;; @0025 v16 = iconst.i32 8 -;; @0025 v17 = call fn0(v0, v14, v15, v148, v16), stack_map=[i32 @ ss2+0, i32 @ ss1+0, i32 @ ss0+0] ; v14 = -1476395008, v15 = 0, v148 = 40, v16 = 8 +;; @0025 v16 = load.i64 notrap aligned readonly can_move v0+40 +;; @0025 v17 = load.i32 notrap aligned readonly can_move v16 +;; v150 = iconst.i32 40 +;; @0025 v18 = iconst.i32 8 +;; @0025 v19 = call fn0(v0, v14, v17, v150, v18), stack_map=[i32 @ ss2+0, i32 @ ss1+0, i32 @ ss0+0] ; v14 = -1476395008, v150 = 40, v18 = 8 ;; @0025 v6 = iconst.i32 3 -;; @0025 v129 = load.i64 notrap aligned readonly can_move v0+8 -;; @0025 v18 = load.i64 notrap aligned readonly can_move v129+32 -;; @0025 v19 = uextend.i64 v17 -;; @0025 v20 = iadd v18, v19 -;; v128 = iconst.i64 24 -;; @0025 v21 = iadd v20, v128 ; v128 = 24 -;; @0025 store notrap aligned v6, v21 ; v6 = 3 -;; v91 = load.i32 notrap v135 -;; v126 = iconst.i32 1 -;; @0025 v26 = band v91, v126 ; v126 = 1 -;; @0025 v27 = icmp eq v91, v15 ; v15 = 0 -;; @0025 v28 = uextend.i32 v27 -;; @0025 v29 = bor v26, v28 -;; @0025 brif v29, block3, block2 +;; @0025 v131 = load.i64 notrap aligned readonly can_move v0+8 +;; @0025 v20 = load.i64 notrap aligned readonly can_move v131+32 +;; @0025 v21 = uextend.i64 v19 +;; @0025 v22 = iadd v20, v21 +;; v130 = iconst.i64 24 +;; @0025 v23 = iadd v22, v130 ; v130 = 24 +;; @0025 store notrap aligned v6, v23 ; v6 = 3 +;; v93 = load.i32 notrap v137 +;; v128 = iconst.i32 1 +;; @0025 v28 = band v93, v128 ; v128 = 1 +;; v126 = iconst.i32 0 +;; @0025 v29 = icmp eq v93, v126 ; v126 = 0 +;; @0025 v30 = uextend.i32 v29 +;; @0025 v31 = bor v28, v30 +;; @0025 brif v31, block3, block2 ;; ;; block2: -;; @0025 v30 = uextend.i64 v91 -;; @0025 v32 = iadd.i64 v18, v30 -;; v166 = iconst.i64 8 -;; @0025 v34 = iadd v32, v166 ; v166 = 8 -;; @0025 v35 = load.i64 notrap aligned v34 -;; v96 = iconst.i64 1 -;; @0025 v36 = iadd v35, v96 ; v96 = 1 -;; @0025 store notrap aligned v36, v34 +;; @0025 v32 = uextend.i64 v93 +;; @0025 v34 = iadd.i64 v20, v32 +;; v168 = iconst.i64 8 +;; @0025 v36 = iadd v34, v168 ; v168 = 8 +;; @0025 v37 = load.i64 notrap aligned v36 +;; v98 = iconst.i64 1 +;; @0025 v38 = iadd v37, v98 ; v98 = 1 +;; @0025 store notrap aligned v38, v36 ;; @0025 jump block3 ;; ;; block3: -;; v87 = load.i32 notrap v135 -;; v150 = iconst.i64 28 -;; v156 = iadd.i64 v20, v150 ; v150 = 28 -;; @0025 store notrap aligned little v87, v156 -;; v86 = load.i32 notrap v134 -;; v256 = iconst.i32 1 -;; v257 = band v86, v256 ; v256 = 1 -;; v258 = iconst.i32 0 -;; v259 = icmp eq v86, v258 ; v258 = 0 -;; @0025 v45 = uextend.i32 v259 -;; @0025 v46 = bor v257, v45 -;; @0025 brif v46, block5, block4 +;; v89 = load.i32 notrap v137 +;; v152 = iconst.i64 28 +;; v158 = iadd.i64 v22, v152 ; v152 = 28 +;; @0025 store notrap aligned little v89, v158 +;; v88 = load.i32 notrap v136 +;; v258 = iconst.i32 1 +;; v259 = band v88, v258 ; v258 = 1 +;; v260 = iconst.i32 0 +;; v261 = icmp eq v88, v260 ; v260 = 0 +;; @0025 v47 = uextend.i32 v261 +;; @0025 v48 = bor v259, v47 +;; @0025 brif v48, block5, block4 ;; ;; block4: -;; @0025 v47 = uextend.i64 v86 -;; @0025 v49 = iadd.i64 v18, v47 -;; v260 = iconst.i64 8 -;; @0025 v51 = iadd v49, v260 ; v260 = 8 -;; @0025 v52 = load.i64 notrap aligned v51 -;; v261 = iconst.i64 1 -;; @0025 v53 = iadd v52, v261 ; v261 = 1 -;; @0025 store notrap aligned v53, v51 +;; @0025 v49 = uextend.i64 v88 +;; @0025 v51 = iadd.i64 v20, v49 +;; v262 = iconst.i64 8 +;; @0025 v53 = iadd v51, v262 ; v262 = 8 +;; @0025 v54 = load.i64 notrap aligned v53 +;; v263 = iconst.i64 1 +;; @0025 v55 = iadd v54, v263 ; v263 = 1 +;; @0025 store notrap aligned v55, v53 ;; @0025 jump block5 ;; ;; block5: -;; v82 = load.i32 notrap v134 -;; v131 = iconst.i64 32 -;; v164 = iadd.i64 v20, v131 ; v131 = 32 -;; @0025 store notrap aligned little v82, v164 -;; v81 = load.i32 notrap v133 -;; v262 = iconst.i32 1 -;; v263 = band v81, v262 ; v262 = 1 -;; v264 = iconst.i32 0 -;; v265 = icmp eq v81, v264 ; v264 = 0 -;; @0025 v62 = uextend.i32 v265 -;; @0025 v63 = bor v263, v62 -;; @0025 brif v63, block7, block6 +;; v84 = load.i32 notrap v136 +;; v133 = iconst.i64 32 +;; v166 = iadd.i64 v22, v133 ; v133 = 32 +;; @0025 store notrap aligned little v84, v166 +;; v83 = load.i32 notrap v135 +;; v264 = iconst.i32 1 +;; v265 = band v83, v264 ; v264 = 1 +;; v266 = iconst.i32 0 +;; v267 = icmp eq v83, v266 ; v266 = 0 +;; @0025 v64 = uextend.i32 v267 +;; @0025 v65 = bor v265, v64 +;; @0025 brif v65, block7, block6 ;; ;; block6: -;; @0025 v64 = uextend.i64 v81 -;; @0025 v66 = iadd.i64 v18, v64 -;; v266 = iconst.i64 8 -;; @0025 v68 = iadd v66, v266 ; v266 = 8 -;; @0025 v69 = load.i64 notrap aligned v68 -;; v267 = iconst.i64 1 -;; @0025 v70 = iadd v69, v267 ; v267 = 1 -;; @0025 store notrap aligned v70, v68 +;; @0025 v66 = uextend.i64 v83 +;; @0025 v68 = iadd.i64 v20, v66 +;; v268 = iconst.i64 8 +;; @0025 v70 = iadd v68, v268 ; v268 = 8 +;; @0025 v71 = load.i64 notrap aligned v70 +;; v269 = iconst.i64 1 +;; @0025 v72 = iadd v71, v269 ; v269 = 1 +;; @0025 store notrap aligned v72, v70 ;; @0025 jump block7 ;; ;; block7: -;; v77 = load.i32 notrap v133 -;; v182 = iconst.i64 36 -;; v188 = iadd.i64 v20, v182 ; v182 = 36 -;; @0025 store notrap aligned little v77, v188 +;; v79 = load.i32 notrap v135 +;; v184 = iconst.i64 36 +;; v190 = iadd.i64 v22, v184 ; v184 = 36 +;; @0025 store notrap aligned little v79, v190 ;; @0029 jump block1 ;; ;; block1: -;; @0029 return v17 +;; @0029 return v19 ;; } diff --git a/tests/disas/gc/drc/array-new-fixed.wat b/tests/disas/gc/drc/array-new-fixed.wat index 14e2cf718478..ebcfcfdfa366 100644 --- a/tests/disas/gc/drc/array-new-fixed.wat +++ b/tests/disas/gc/drc/array-new-fixed.wat @@ -22,29 +22,30 @@ ;; ;; block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64): ;; @0025 v14 = iconst.i32 -1476395008 -;; @0025 v15 = iconst.i32 0 -;; v45 = iconst.i32 56 -;; @0025 v16 = iconst.i32 8 -;; @0025 v17 = call fn0(v0, v14, v15, v45, v16) ; v14 = -1476395008, v15 = 0, v45 = 56, v16 = 8 +;; @0025 v16 = load.i64 notrap aligned readonly can_move v0+40 +;; @0025 v17 = load.i32 notrap aligned readonly can_move v16 +;; v47 = iconst.i32 56 +;; @0025 v18 = iconst.i32 8 +;; @0025 v19 = call fn0(v0, v14, v17, v47, v18) ; v14 = -1476395008, v47 = 56, v18 = 8 ;; @0025 v6 = iconst.i32 3 -;; @0025 v30 = load.i64 notrap aligned readonly can_move v0+8 -;; @0025 v18 = load.i64 notrap aligned readonly can_move v30+32 -;; @0025 v19 = uextend.i64 v17 -;; @0025 v20 = iadd v18, v19 -;; v35 = iconst.i64 24 -;; @0025 v21 = iadd v20, v35 ; v35 = 24 -;; @0025 store notrap aligned v6, v21 ; v6 = 3 -;; v32 = iconst.i64 32 -;; v52 = iadd v20, v32 ; v32 = 32 -;; @0025 store notrap aligned little v2, v52 -;; v55 = iconst.i64 40 -;; v61 = iadd v20, v55 ; v55 = 40 -;; @0025 store notrap aligned little v3, v61 -;; v79 = iconst.i64 48 -;; v85 = iadd v20, v79 ; v79 = 48 -;; @0025 store notrap aligned little v4, v85 +;; @0025 v32 = load.i64 notrap aligned readonly can_move v0+8 +;; @0025 v20 = load.i64 notrap aligned readonly can_move v32+32 +;; @0025 v21 = uextend.i64 v19 +;; @0025 v22 = iadd v20, v21 +;; v37 = iconst.i64 24 +;; @0025 v23 = iadd v22, v37 ; v37 = 24 +;; @0025 store notrap aligned v6, v23 ; v6 = 3 +;; v34 = iconst.i64 32 +;; v54 = iadd v22, v34 ; v34 = 32 +;; @0025 store notrap aligned little v2, v54 +;; v57 = iconst.i64 40 +;; v63 = iadd v22, v57 ; v57 = 40 +;; @0025 store notrap aligned little v3, v63 +;; v81 = iconst.i64 48 +;; v87 = iadd v22, v81 ; v81 = 48 +;; @0025 store notrap aligned little v4, v87 ;; @0029 jump block1 ;; ;; block1: -;; @0029 return v17 +;; @0029 return v19 ;; } diff --git a/tests/disas/gc/drc/array-new.wat b/tests/disas/gc/drc/array-new.wat index f571e9ac214a..0f09e1c0fca9 100644 --- a/tests/disas/gc/drc/array-new.wat +++ b/tests/disas/gc/drc/array-new.wat @@ -22,45 +22,46 @@ ;; ;; block0(v0: i64, v1: i64, v2: i64, v3: i32): ;; @0022 v6 = uextend.i64 v3 -;; v35 = iconst.i64 3 -;; v36 = ishl v6, v35 ; v35 = 3 -;; v33 = iconst.i64 32 -;; @0022 v8 = ushr v36, v33 ; v33 = 32 +;; v37 = iconst.i64 3 +;; v38 = ishl v6, v37 ; v37 = 3 +;; v35 = iconst.i64 32 +;; @0022 v8 = ushr v38, v35 ; v35 = 32 ;; @0022 trapnz v8, user17 ;; @0022 v5 = iconst.i32 32 -;; v42 = iconst.i32 3 -;; v43 = ishl v3, v42 ; v42 = 3 -;; @0022 v10 = uadd_overflow_trap v5, v43, user17 ; v5 = 32 +;; v44 = iconst.i32 3 +;; v45 = ishl v3, v44 ; v44 = 3 +;; @0022 v10 = uadd_overflow_trap v5, v45, user17 ; v5 = 32 ;; @0022 v12 = iconst.i32 -1476395008 -;; @0022 v13 = iconst.i32 0 -;; v40 = iconst.i32 8 -;; @0022 v15 = call fn0(v0, v12, v13, v10, v40) ; v12 = -1476395008, v13 = 0, v40 = 8 -;; @0022 v31 = load.i64 notrap aligned readonly can_move v0+8 -;; @0022 v16 = load.i64 notrap aligned readonly can_move v31+32 -;; @0022 v17 = uextend.i64 v15 -;; @0022 v18 = iadd v16, v17 -;; v30 = iconst.i64 24 -;; @0022 v19 = iadd v18, v30 ; v30 = 24 -;; @0022 store notrap aligned v3, v19 -;; v52 = iadd v18, v33 ; v33 = 32 -;; @0022 v25 = uextend.i64 v10 -;; @0022 v26 = iadd v18, v25 -;; v34 = iconst.i64 8 -;; @0022 jump block2(v52) +;; @0022 v14 = load.i64 notrap aligned readonly can_move v0+40 +;; @0022 v15 = load.i32 notrap aligned readonly can_move v14 +;; v42 = iconst.i32 8 +;; @0022 v17 = call fn0(v0, v12, v15, v10, v42) ; v12 = -1476395008, v42 = 8 +;; @0022 v33 = load.i64 notrap aligned readonly can_move v0+8 +;; @0022 v18 = load.i64 notrap aligned readonly can_move v33+32 +;; @0022 v19 = uextend.i64 v17 +;; @0022 v20 = iadd v18, v19 +;; v32 = iconst.i64 24 +;; @0022 v21 = iadd v20, v32 ; v32 = 24 +;; @0022 store notrap aligned v3, v21 +;; v54 = iadd v20, v35 ; v35 = 32 +;; @0022 v27 = uextend.i64 v10 +;; @0022 v28 = iadd v20, v27 +;; v36 = iconst.i64 8 +;; @0022 jump block2(v54) ;; -;; block2(v27: i64): -;; @0022 v28 = icmp eq v27, v26 -;; @0022 brif v28, block4, block3 +;; block2(v29: i64): +;; @0022 v30 = icmp eq v29, v28 +;; @0022 brif v30, block4, block3 ;; ;; block3: -;; @0022 store.i64 notrap aligned little v2, v27 -;; v57 = iconst.i64 8 -;; v58 = iadd.i64 v27, v57 ; v57 = 8 -;; @0022 jump block2(v58) +;; @0022 store.i64 notrap aligned little v2, v29 +;; v59 = iconst.i64 8 +;; v60 = iadd.i64 v29, v59 ; v59 = 8 +;; @0022 jump block2(v60) ;; ;; block4: ;; @0025 jump block1 ;; ;; block1: -;; @0025 return v15 +;; @0025 return v17 ;; } diff --git a/tests/disas/gc/drc/funcref-in-gc-heap-new.wat b/tests/disas/gc/drc/funcref-in-gc-heap-new.wat index 55c1c80ca7a9..6c17ce97741f 100644 --- a/tests/disas/gc/drc/funcref-in-gc-heap-new.wat +++ b/tests/disas/gc/drc/funcref-in-gc-heap-new.wat @@ -25,24 +25,25 @@ ;; ;; block0(v0: i64, v1: i64, v2: i64): ;; @0020 v6 = iconst.i32 -1342177280 -;; @0020 v7 = iconst.i32 0 +;; @0020 v8 = load.i64 notrap aligned readonly can_move v0+40 +;; @0020 v9 = load.i32 notrap aligned readonly can_move v8 ;; @0020 v4 = iconst.i32 32 -;; @0020 v8 = iconst.i32 8 -;; @0020 v9 = call fn0(v0, v6, v7, v4, v8) ; v6 = -1342177280, v7 = 0, v4 = 32, v8 = 8 -;; v24 = stack_addr.i64 ss0 -;; store notrap v9, v24 -;; @0020 v15 = call fn1(v0, v2), stack_map=[i32 @ ss0+0] -;; @0020 v16 = ireduce.i32 v15 -;; @0020 v22 = load.i64 notrap aligned readonly can_move v0+8 -;; @0020 v10 = load.i64 notrap aligned readonly can_move v22+32 -;; @0020 v11 = uextend.i64 v9 -;; @0020 v12 = iadd v10, v11 -;; v20 = iconst.i64 24 -;; @0020 v13 = iadd v12, v20 ; v20 = 24 -;; @0020 store notrap aligned little v16, v13 -;; v17 = load.i32 notrap v24 +;; @0020 v10 = iconst.i32 8 +;; @0020 v11 = call fn0(v0, v6, v9, v4, v10) ; v6 = -1342177280, v4 = 32, v10 = 8 +;; v26 = stack_addr.i64 ss0 +;; store notrap v11, v26 +;; @0020 v17 = call fn1(v0, v2), stack_map=[i32 @ ss0+0] +;; @0020 v18 = ireduce.i32 v17 +;; @0020 v24 = load.i64 notrap aligned readonly can_move v0+8 +;; @0020 v12 = load.i64 notrap aligned readonly can_move v24+32 +;; @0020 v13 = uextend.i64 v11 +;; @0020 v14 = iadd v12, v13 +;; v22 = iconst.i64 24 +;; @0020 v15 = iadd v14, v22 ; v22 = 24 +;; @0020 store notrap aligned little v18, v15 +;; v19 = load.i32 notrap v26 ;; @0023 jump block1 ;; ;; block1: -;; @0023 return v17 +;; @0023 return v19 ;; } diff --git a/tests/disas/gc/drc/struct-new-default.wat b/tests/disas/gc/drc/struct-new-default.wat index 5213951ba4d2..ea29bf2de328 100644 --- a/tests/disas/gc/drc/struct-new-default.wat +++ b/tests/disas/gc/drc/struct-new-default.wat @@ -25,40 +25,42 @@ ;; ;; block0(v0: i64, v1: i64): ;; @0021 v8 = iconst.i32 -1342177280 -;; @0021 v4 = iconst.i32 0 +;; @0021 v10 = load.i64 notrap aligned readonly can_move v0+40 +;; @0021 v11 = load.i32 notrap aligned readonly can_move v10 ;; @0021 v6 = iconst.i32 40 -;; @0021 v10 = iconst.i32 8 -;; @0021 v11 = call fn0(v0, v8, v4, v6, v10) ; v8 = -1342177280, v4 = 0, v6 = 40, v10 = 8 +;; @0021 v12 = iconst.i32 8 +;; @0021 v13 = call fn0(v0, v8, v11, v6, v12) ; v8 = -1342177280, v6 = 40, v12 = 8 ;; @0021 v3 = f32const 0.0 -;; @0021 v44 = load.i64 notrap aligned readonly can_move v0+8 -;; @0021 v12 = load.i64 notrap aligned readonly can_move v44+32 -;; @0021 v13 = uextend.i64 v11 -;; @0021 v14 = iadd v12, v13 -;; v43 = iconst.i64 24 -;; @0021 v15 = iadd v14, v43 ; v43 = 24 -;; @0021 store notrap aligned little v3, v15 ; v3 = 0.0 -;; v42 = iconst.i64 28 -;; @0021 v16 = iadd v14, v42 ; v42 = 28 -;; @0021 istore8 notrap aligned little v4, v16 ; v4 = 0 -;; v40 = iconst.i32 1 -;; @0021 brif v40, block3, block2 ; v40 = 1 +;; @0021 v46 = load.i64 notrap aligned readonly can_move v0+8 +;; @0021 v14 = load.i64 notrap aligned readonly can_move v46+32 +;; @0021 v15 = uextend.i64 v13 +;; @0021 v16 = iadd v14, v15 +;; v45 = iconst.i64 24 +;; @0021 v17 = iadd v16, v45 ; v45 = 24 +;; @0021 store notrap aligned little v3, v17 ; v3 = 0.0 +;; @0021 v4 = iconst.i32 0 +;; v44 = iconst.i64 28 +;; @0021 v18 = iadd v16, v44 ; v44 = 28 +;; @0021 istore8 notrap aligned little v4, v18 ; v4 = 0 +;; v42 = iconst.i32 1 +;; @0021 brif v42, block3, block2 ; v42 = 1 ;; ;; block2: -;; @0021 v25 = iconst.i64 8 -;; @0021 v26 = iadd.i64 v12, v25 ; v25 = 8 -;; @0021 v27 = load.i64 notrap aligned v26 -;; v36 = iconst.i64 1 -;; @0021 v28 = iadd v27, v36 ; v36 = 1 -;; @0021 store notrap aligned v28, v26 +;; @0021 v27 = iconst.i64 8 +;; @0021 v28 = iadd.i64 v14, v27 ; v27 = 8 +;; @0021 v29 = load.i64 notrap aligned v28 +;; v38 = iconst.i64 1 +;; @0021 v30 = iadd v29, v38 ; v38 = 1 +;; @0021 store notrap aligned v30, v28 ;; @0021 jump block3 ;; ;; block3: -;; v66 = iconst.i32 0 -;; v41 = iconst.i64 32 -;; @0021 v17 = iadd.i64 v14, v41 ; v41 = 32 -;; @0021 store notrap aligned little v66, v17 ; v66 = 0 +;; v68 = iconst.i32 0 +;; v43 = iconst.i64 32 +;; @0021 v19 = iadd.i64 v16, v43 ; v43 = 32 +;; @0021 store notrap aligned little v68, v19 ; v68 = 0 ;; @0024 jump block1 ;; ;; block1: -;; @0024 return v11 +;; @0024 return v13 ;; } diff --git a/tests/disas/gc/drc/struct-new.wat b/tests/disas/gc/drc/struct-new.wat index 621fd0c2fc2e..f1cbdf51d536 100644 --- a/tests/disas/gc/drc/struct-new.wat +++ b/tests/disas/gc/drc/struct-new.wat @@ -25,49 +25,51 @@ ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64, v2: f32, v3: i32, v4: i32): -;; v56 = stack_addr.i64 ss0 -;; store notrap v4, v56 +;; v58 = stack_addr.i64 ss0 +;; store notrap v4, v58 ;; @002a v8 = iconst.i32 -1342177280 -;; @002a v9 = iconst.i32 0 +;; @002a v10 = load.i64 notrap aligned readonly can_move v0+40 +;; @002a v11 = load.i32 notrap aligned readonly can_move v10 ;; @002a v6 = iconst.i32 40 -;; @002a v10 = iconst.i32 8 -;; @002a v11 = call fn0(v0, v8, v9, v6, v10), stack_map=[i32 @ ss0+0] ; v8 = -1342177280, v9 = 0, v6 = 40, v10 = 8 -;; @002a v54 = load.i64 notrap aligned readonly can_move v0+8 -;; @002a v12 = load.i64 notrap aligned readonly can_move v54+32 -;; @002a v13 = uextend.i64 v11 -;; @002a v14 = iadd v12, v13 -;; v53 = iconst.i64 24 -;; @002a v15 = iadd v14, v53 ; v53 = 24 -;; @002a store notrap aligned little v2, v15 -;; v52 = iconst.i64 28 -;; @002a v16 = iadd v14, v52 ; v52 = 28 -;; @002a istore8 notrap aligned little v3, v16 -;; v38 = load.i32 notrap v56 -;; v49 = iconst.i32 1 -;; @002a v18 = band v38, v49 ; v49 = 1 -;; @002a v19 = icmp eq v38, v9 ; v9 = 0 -;; @002a v20 = uextend.i32 v19 -;; @002a v21 = bor v18, v20 -;; @002a brif v21, block3, block2 +;; @002a v12 = iconst.i32 8 +;; @002a v13 = call fn0(v0, v8, v11, v6, v12), stack_map=[i32 @ ss0+0] ; v8 = -1342177280, v6 = 40, v12 = 8 +;; @002a v56 = load.i64 notrap aligned readonly can_move v0+8 +;; @002a v14 = load.i64 notrap aligned readonly can_move v56+32 +;; @002a v15 = uextend.i64 v13 +;; @002a v16 = iadd v14, v15 +;; v55 = iconst.i64 24 +;; @002a v17 = iadd v16, v55 ; v55 = 24 +;; @002a store notrap aligned little v2, v17 +;; v54 = iconst.i64 28 +;; @002a v18 = iadd v16, v54 ; v54 = 28 +;; @002a istore8 notrap aligned little v3, v18 +;; v40 = load.i32 notrap v58 +;; v51 = iconst.i32 1 +;; @002a v20 = band v40, v51 ; v51 = 1 +;; v49 = iconst.i32 0 +;; @002a v21 = icmp eq v40, v49 ; v49 = 0 +;; @002a v22 = uextend.i32 v21 +;; @002a v23 = bor v20, v22 +;; @002a brif v23, block3, block2 ;; ;; block2: -;; @002a v22 = uextend.i64 v38 -;; @002a v24 = iadd.i64 v12, v22 -;; @002a v25 = iconst.i64 8 -;; @002a v26 = iadd v24, v25 ; v25 = 8 -;; @002a v27 = load.i64 notrap aligned v26 -;; v43 = iconst.i64 1 -;; @002a v28 = iadd v27, v43 ; v43 = 1 -;; @002a store notrap aligned v28, v26 +;; @002a v24 = uextend.i64 v40 +;; @002a v26 = iadd.i64 v14, v24 +;; @002a v27 = iconst.i64 8 +;; @002a v28 = iadd v26, v27 ; v27 = 8 +;; @002a v29 = load.i64 notrap aligned v28 +;; v45 = iconst.i64 1 +;; @002a v30 = iadd v29, v45 ; v45 = 1 +;; @002a store notrap aligned v30, v28 ;; @002a jump block3 ;; ;; block3: -;; v34 = load.i32 notrap v56 -;; v51 = iconst.i64 32 -;; @002a v17 = iadd.i64 v14, v51 ; v51 = 32 -;; @002a store notrap aligned little v34, v17 +;; v36 = load.i32 notrap v58 +;; v53 = iconst.i64 32 +;; @002a v19 = iadd.i64 v16, v53 ; v53 = 32 +;; @002a store notrap aligned little v36, v19 ;; @002d jump block1 ;; ;; block1: -;; @002d return v11 +;; @002d return v13 ;; } diff --git a/tests/disas/gc/issue-11753.wat b/tests/disas/gc/issue-11753.wat index 0690f3670283..6658c9c06fe8 100644 --- a/tests/disas/gc/issue-11753.wat +++ b/tests/disas/gc/issue-11753.wat @@ -29,21 +29,22 @@ ;; movq 0x18(%r10), %r10 ;; addq $0x60, %r10 ;; cmpq %rsp, %r10 -;; ja 0xbe +;; ja 0xc2 ;; 19: subq $0x50, %rsp ;; movq %rbx, 0x20(%rsp) ;; movq %r12, 0x28(%rsp) ;; movq %r13, 0x30(%rsp) ;; movq %r14, 0x38(%rsp) ;; movq %r15, 0x40(%rsp) -;; movq %rdi, 8(%rsp) ;; movl $0xb0000000, %esi -;; xorl %edx, %edx +;; movq 0x28(%rdi), %rax +;; movq %rdi, 8(%rsp) +;; movl (%rax), %edx ;; movl $0x20, %ecx ;; movl $8, %r8d ;; movq 8(%rsp), %rbx ;; movq %rbx, %rdi -;; callq 0x215 +;; callq 0x219 ;; movl %eax, (%rsp) ;; movq 8(%rbx), %rcx ;; movq 0x20(%rcx), %rcx @@ -56,12 +57,12 @@ ;; movq %rbx, 8(%rsp) ;; callq *%rax ;; ├─╼ exception frame offset: SP = FP - 0x50 -;; ╰─╼ exception handler: default handler, context at [SP+0x8], handler=0x86 +;; ╰─╼ exception handler: default handler, context at [SP+0x8], handler=0x8a ;; movl (%rsp), %eax ;; ╰─╼ stack_map: frame_size=80, frame_offsets=[0] ;; testl %eax, %eax -;; je 0xc0 -;; 91: movl %eax, %eax +;; je 0xc4 +;; 95: movl %eax, %eax ;; movq 0x10(%rsp), %rcx ;; movl 0x18(%rcx, %rax), %eax ;; movq 0x20(%rsp), %rbx @@ -73,7 +74,7 @@ ;; movq %rbp, %rsp ;; popq %rbp ;; retq -;; be: ud2 +;; c2: ud2 ;; ╰─╼ trap: StackOverflow -;; c0: ud2 +;; c4: ud2 ;; ╰─╼ trap: NullReference diff --git a/tests/disas/gc/struct-new-default.wat b/tests/disas/gc/struct-new-default.wat index 795bba74b8f5..1317c27b9b6d 100644 --- a/tests/disas/gc/struct-new-default.wat +++ b/tests/disas/gc/struct-new-default.wat @@ -27,44 +27,46 @@ ;; ;; block0(v0: i64, v1: i64): ;; @0023 v9 = iconst.i32 -1342177280 -;; @0023 v4 = iconst.i32 0 +;; @0023 v11 = load.i64 notrap aligned readonly can_move v0+40 +;; @0023 v12 = load.i32 notrap aligned readonly can_move v11 ;; @0023 v7 = iconst.i32 64 -;; @0023 v11 = iconst.i32 16 -;; @0023 v12 = call fn0(v0, v9, v4, v7, v11) ; v9 = -1342177280, v4 = 0, v7 = 64, v11 = 16 +;; @0023 v13 = iconst.i32 16 +;; @0023 v14 = call fn0(v0, v9, v12, v7, v13) ; v9 = -1342177280, v7 = 64, v13 = 16 ;; @0023 v3 = f32const 0.0 -;; @0023 v47 = load.i64 notrap aligned readonly can_move v0+8 -;; @0023 v13 = load.i64 notrap aligned readonly can_move v47+32 -;; @0023 v14 = uextend.i64 v12 -;; @0023 v15 = iadd v13, v14 -;; v46 = iconst.i64 24 -;; @0023 v16 = iadd v15, v46 ; v46 = 24 -;; @0023 store notrap aligned little v3, v16 ; v3 = 0.0 -;; v45 = iconst.i64 28 -;; @0023 v17 = iadd v15, v45 ; v45 = 28 -;; @0023 istore8 notrap aligned little v4, v17 ; v4 = 0 -;; v43 = iconst.i32 1 -;; @0023 brif v43, block3, block2 ; v43 = 1 +;; @0023 v49 = load.i64 notrap aligned readonly can_move v0+8 +;; @0023 v15 = load.i64 notrap aligned readonly can_move v49+32 +;; @0023 v16 = uextend.i64 v14 +;; @0023 v17 = iadd v15, v16 +;; v48 = iconst.i64 24 +;; @0023 v18 = iadd v17, v48 ; v48 = 24 +;; @0023 store notrap aligned little v3, v18 ; v3 = 0.0 +;; @0023 v4 = iconst.i32 0 +;; v47 = iconst.i64 28 +;; @0023 v19 = iadd v17, v47 ; v47 = 28 +;; @0023 istore8 notrap aligned little v4, v19 ; v4 = 0 +;; v45 = iconst.i32 1 +;; @0023 brif v45, block3, block2 ; v45 = 1 ;; ;; block2: -;; @0023 v26 = iconst.i64 8 -;; @0023 v27 = iadd.i64 v13, v26 ; v26 = 8 -;; @0023 v28 = load.i64 notrap aligned v27 -;; v39 = iconst.i64 1 -;; @0023 v29 = iadd v28, v39 ; v39 = 1 -;; @0023 store notrap aligned v29, v27 +;; @0023 v28 = iconst.i64 8 +;; @0023 v29 = iadd.i64 v15, v28 ; v28 = 8 +;; @0023 v30 = load.i64 notrap aligned v29 +;; v41 = iconst.i64 1 +;; @0023 v31 = iadd v30, v41 ; v41 = 1 +;; @0023 store notrap aligned v31, v29 ;; @0023 jump block3 ;; ;; block3: -;; v69 = iconst.i32 0 -;; v44 = iconst.i64 32 -;; @0023 v18 = iadd.i64 v15, v44 ; v44 = 32 -;; @0023 store notrap aligned little v69, v18 ; v69 = 0 +;; v71 = iconst.i32 0 +;; v46 = iconst.i64 32 +;; @0023 v20 = iadd.i64 v17, v46 ; v46 = 32 +;; @0023 store notrap aligned little v71, v20 ; v71 = 0 ;; @0023 v6 = vconst.i8x16 const0 -;; v36 = iconst.i64 48 -;; @0023 v35 = iadd.i64 v15, v36 ; v36 = 48 -;; @0023 store notrap aligned little v6, v35 ; v6 = const0 +;; v38 = iconst.i64 48 +;; @0023 v37 = iadd.i64 v17, v38 ; v38 = 48 +;; @0023 store notrap aligned little v6, v37 ; v6 = const0 ;; @0026 jump block1 ;; ;; block1: -;; @0026 return v12 +;; @0026 return v14 ;; } diff --git a/tests/disas/gc/struct-new-stack-map.wat b/tests/disas/gc/struct-new-stack-map.wat index a00f86460538..0d8118b7eafd 100644 --- a/tests/disas/gc/struct-new-stack-map.wat +++ b/tests/disas/gc/struct-new-stack-map.wat @@ -18,7 +18,7 @@ ;; movq 0x18(%r10), %r10 ;; addq $0x40, %r10 ;; cmpq %rsp, %r10 -;; ja 0xaf +;; ja 0xb3 ;; 19: subq $0x30, %rsp ;; movq %rbx, 0x20(%rsp) ;; movq %r12, 0x28(%rsp) @@ -26,11 +26,12 @@ ;; movdqu %xmm0, 8(%rsp) ;; movl %ecx, (%rsp) ;; movl $0xb0000000, %esi -;; xorl %edx, %edx +;; movq 0x28(%rdi), %rax +;; movq %rdi, %rbx +;; movl (%rax), %edx ;; movl $0x28, %ecx ;; movl $8, %r8d -;; movq %rdi, %rbx -;; callq 0x125 +;; callq 0x129 ;; movq 8(%rbx), %rcx ;; ╰─╼ stack_map: frame_size=48, frame_offsets=[0] ;; movq 0x20(%rcx), %rcx @@ -47,8 +48,8 @@ ;; movzbl %r8b, %r8d ;; orl %r8d, %edi ;; testl %edi, %edi -;; jne 0x95 -;; 89: movl %esi, %esi +;; jne 0x99 +;; 8d: movl %esi, %esi ;; leaq (%rcx, %rsi), %rdi ;; addq $1, 8(%rcx, %rsi) ;; movl (%rsp), %esi @@ -59,4 +60,4 @@ ;; movq %rbp, %rsp ;; popq %rbp ;; retq -;; af: ud2 +;; b3: ud2 diff --git a/tests/disas/gc/struct-new.wat b/tests/disas/gc/struct-new.wat index 3020d440a05d..8777f9a196e9 100644 --- a/tests/disas/gc/struct-new.wat +++ b/tests/disas/gc/struct-new.wat @@ -25,49 +25,51 @@ ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64, v2: f32, v3: i32, v4: i32): -;; v56 = stack_addr.i64 ss0 -;; store notrap v4, v56 +;; v58 = stack_addr.i64 ss0 +;; store notrap v4, v58 ;; @002a v8 = iconst.i32 -1342177280 -;; @002a v9 = iconst.i32 0 +;; @002a v10 = load.i64 notrap aligned readonly can_move v0+40 +;; @002a v11 = load.i32 notrap aligned readonly can_move v10 ;; @002a v6 = iconst.i32 40 -;; @002a v10 = iconst.i32 8 -;; @002a v11 = call fn0(v0, v8, v9, v6, v10), stack_map=[i32 @ ss0+0] ; v8 = -1342177280, v9 = 0, v6 = 40, v10 = 8 -;; @002a v54 = load.i64 notrap aligned readonly can_move v0+8 -;; @002a v12 = load.i64 notrap aligned readonly can_move v54+32 -;; @002a v13 = uextend.i64 v11 -;; @002a v14 = iadd v12, v13 -;; v53 = iconst.i64 24 -;; @002a v15 = iadd v14, v53 ; v53 = 24 -;; @002a store notrap aligned little v2, v15 -;; v52 = iconst.i64 28 -;; @002a v16 = iadd v14, v52 ; v52 = 28 -;; @002a istore8 notrap aligned little v3, v16 -;; v38 = load.i32 notrap v56 -;; v49 = iconst.i32 1 -;; @002a v18 = band v38, v49 ; v49 = 1 -;; @002a v19 = icmp eq v38, v9 ; v9 = 0 -;; @002a v20 = uextend.i32 v19 -;; @002a v21 = bor v18, v20 -;; @002a brif v21, block3, block2 +;; @002a v12 = iconst.i32 8 +;; @002a v13 = call fn0(v0, v8, v11, v6, v12), stack_map=[i32 @ ss0+0] ; v8 = -1342177280, v6 = 40, v12 = 8 +;; @002a v56 = load.i64 notrap aligned readonly can_move v0+8 +;; @002a v14 = load.i64 notrap aligned readonly can_move v56+32 +;; @002a v15 = uextend.i64 v13 +;; @002a v16 = iadd v14, v15 +;; v55 = iconst.i64 24 +;; @002a v17 = iadd v16, v55 ; v55 = 24 +;; @002a store notrap aligned little v2, v17 +;; v54 = iconst.i64 28 +;; @002a v18 = iadd v16, v54 ; v54 = 28 +;; @002a istore8 notrap aligned little v3, v18 +;; v40 = load.i32 notrap v58 +;; v51 = iconst.i32 1 +;; @002a v20 = band v40, v51 ; v51 = 1 +;; v49 = iconst.i32 0 +;; @002a v21 = icmp eq v40, v49 ; v49 = 0 +;; @002a v22 = uextend.i32 v21 +;; @002a v23 = bor v20, v22 +;; @002a brif v23, block3, block2 ;; ;; block2: -;; @002a v22 = uextend.i64 v38 -;; @002a v24 = iadd.i64 v12, v22 -;; @002a v25 = iconst.i64 8 -;; @002a v26 = iadd v24, v25 ; v25 = 8 -;; @002a v27 = load.i64 notrap aligned v26 -;; v43 = iconst.i64 1 -;; @002a v28 = iadd v27, v43 ; v43 = 1 -;; @002a store notrap aligned v28, v26 +;; @002a v24 = uextend.i64 v40 +;; @002a v26 = iadd.i64 v14, v24 +;; @002a v27 = iconst.i64 8 +;; @002a v28 = iadd v26, v27 ; v27 = 8 +;; @002a v29 = load.i64 notrap aligned v28 +;; v45 = iconst.i64 1 +;; @002a v30 = iadd v29, v45 ; v45 = 1 +;; @002a store notrap aligned v30, v28 ;; @002a jump block3 ;; ;; block3: -;; v34 = load.i32 notrap v56 -;; v51 = iconst.i64 32 -;; @002a v17 = iadd.i64 v14, v51 ; v51 = 32 -;; @002a store notrap aligned little v34, v17 +;; v36 = load.i32 notrap v58 +;; v53 = iconst.i64 32 +;; @002a v19 = iadd.i64 v16, v53 ; v53 = 32 +;; @002a store notrap aligned little v36, v19 ;; @002d jump block1 ;; ;; block1: -;; @002d return v11 +;; @002d return v13 ;; }