diff --git a/src/boxed.rs b/src/boxed.rs index 081ffee..df8186c 100644 --- a/src/boxed.rs +++ b/src/boxed.rs @@ -6,15 +6,15 @@ use alloc::{alloc::Layout, string::String}; use core::{ mem::align_of, ops::{Deref, DerefMut}, - ptr::NonNull, }; use crate::{ops::GenericString, MAX_INLINE}; +use crate::TaggedPtr; #[cfg(target_endian = "little")] #[repr(C)] pub(crate) struct BoxedString { - ptr: NonNull, + ptr: TaggedPtr, cap: usize, len: usize, } @@ -24,16 +24,7 @@ pub(crate) struct BoxedString { pub(crate) struct BoxedString { len: usize, cap: usize, - ptr: NonNull, -} - -/// Checks if a pointer is aligned to an even address (good) -/// or an odd address (either actually an InlineString or very, very bad). -/// -/// Returns `true` if aligned to an odd address, `false` if even. The sense of -/// the boolean is "does this look like an InlineString? true/false" -fn check_alignment(ptr: *const u8) -> bool { - ptr.align_offset(2) > 0 + ptr: TaggedPtr, } impl GenericString for BoxedString { @@ -45,7 +36,7 @@ impl GenericString for BoxedString { fn as_mut_capacity_slice(&mut self) -> &mut [u8] { #[allow(unsafe_code)] unsafe { - core::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.capacity()) + core::slice::from_raw_parts_mut(self.ptr.as_non_null().as_ptr(), self.capacity()) } } } @@ -53,15 +44,11 @@ impl GenericString for BoxedString { impl BoxedString { const MINIMAL_CAPACITY: usize = MAX_INLINE * 2; - pub(crate) fn check_alignment(this: &Self) -> bool { - check_alignment(this.ptr.as_ptr()) - } - fn layout_for(cap: usize) -> Layout { - // Always request memory that is specifically aligned to at least 2, so - // the least significant bit is guaranteed to be 0. + // Always request memory that is specifically aligned to at least 4, so + // the least significant two bits are guaranteed to be 0. let layout = Layout::array::(cap) - .and_then(|layout| layout.align_to(align_of::())) + .and_then(|layout| layout.align_to(align_of::())) .unwrap(); assert!( layout.size() <= isize::MAX as usize, @@ -70,29 +57,29 @@ impl BoxedString { layout } - fn alloc(cap: usize) -> NonNull { + fn alloc(cap: usize) -> TaggedPtr { let layout = Self::layout_for(cap); #[allow(unsafe_code)] - let ptr = match NonNull::new(unsafe { alloc::alloc::alloc(layout) }) { + let ptr = match TaggedPtr::new(unsafe { alloc::alloc::alloc(layout) }) { Some(ptr) => ptr, None => alloc::alloc::handle_alloc_error(layout), }; - debug_assert!(ptr.as_ptr().align_offset(2) == 0); + debug_assert!(ptr.as_non_null().as_ptr().align_offset(4) == 0); ptr } fn realloc(&mut self, cap: usize) { let layout = Self::layout_for(cap); let old_layout = Self::layout_for(self.cap); - let old_ptr = self.ptr.as_ptr(); + let old_ptr = self.ptr.as_non_null().as_ptr(); #[allow(unsafe_code)] let ptr = unsafe { alloc::alloc::realloc(old_ptr, old_layout, layout.size()) }; - self.ptr = match NonNull::new(ptr) { + self.ptr = match TaggedPtr::new(ptr) { Some(ptr) => ptr, None => alloc::alloc::handle_alloc_error(layout), }; self.cap = cap; - debug_assert!(self.ptr.as_ptr().align_offset(2) == 0); + debug_assert!(self.ptr.as_non_null().as_ptr().align_offset(4) == 0); } pub(crate) fn ensure_capacity(&mut self, target_cap: usize) { @@ -132,7 +119,7 @@ impl Drop for BoxedString { fn drop(&mut self) { #[allow(unsafe_code)] unsafe { - alloc::alloc::dealloc(self.ptr.as_ptr(), Self::layout_for(self.cap)) + alloc::alloc::dealloc(self.ptr.as_non_null().as_ptr(), Self::layout_for(self.cap)) } } } @@ -149,7 +136,7 @@ impl Deref for BoxedString { fn deref(&self) -> &Self::Target { #[allow(unsafe_code)] unsafe { - core::str::from_utf8_unchecked(core::slice::from_raw_parts(self.ptr.as_ptr(), self.len)) + core::str::from_utf8_unchecked(core::slice::from_raw_parts(self.ptr.as_non_null().as_ptr(), self.len)) } } } @@ -159,7 +146,7 @@ impl DerefMut for BoxedString { #[allow(unsafe_code)] unsafe { core::str::from_utf8_unchecked_mut(core::slice::from_raw_parts_mut( - self.ptr.as_ptr(), + self.ptr.as_non_null().as_ptr(), self.len, )) } @@ -174,6 +161,8 @@ impl From for BoxedString { } else { #[cfg(has_allocator)] { + use core::ptr::NonNull; + // TODO: Use String::into_raw_parts when stabilised, meanwhile let's get unsafe let len = s.len(); let cap = s.capacity(); @@ -190,7 +179,7 @@ impl From for BoxedString { Self { cap, len, - ptr: aligned_ptr.cast(), + ptr: TaggedPtr::new(aligned_ptr.as_ptr() as *mut _).unwrap(), } } else { Self::from_str(cap, &s) @@ -215,7 +204,7 @@ impl From for String { use alloc::alloc::Allocator; let allocator = alloc::alloc::Global; if let Ok(aligned_ptr) = - unsafe { allocator.grow(ptr, BoxedString::layout_for(cap), new_layout) } + unsafe { allocator.grow(ptr.as_non_null(), BoxedString::layout_for(cap), new_layout) } { core::mem::forget(s); unsafe { String::from_raw_parts(aligned_ptr.as_ptr().cast(), len, cap) } diff --git a/src/lib.rs b/src/lib.rs index 33e0023..6423e13 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -117,7 +117,7 @@ use core::{ hash::{Hash, Hasher}, iter::FromIterator, marker::PhantomData, - mem::{forget, MaybeUninit}, + mem::forget, ops::{ Add, Deref, DerefMut, Index, IndexMut, Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, @@ -126,8 +126,7 @@ use core::{ str::FromStr, }; -#[cfg(feature = "std")] -use std::borrow::Cow; +use alloc::borrow::Cow; mod config; pub use config::{Compact, LazyCompact, SmartStringMode, MAX_INLINE}; @@ -135,6 +134,9 @@ pub use config::{Compact, LazyCompact, SmartStringMode, MAX_INLINE}; mod marker_byte; use marker_byte::Discriminant; +mod tagged_ptr; +use tagged_ptr::TaggedPtr; + mod inline; use inline::InlineString; @@ -189,7 +191,7 @@ pub mod alias { /// one - not without also storing that state in the inline representation, which /// would waste precious bytes for inline string data. pub struct SmartString { - data: MaybeUninit, + data: InlineString, mode: PhantomData, } @@ -248,7 +250,7 @@ impl SmartString { /// once this happens. pub const fn new_const() -> Self { Self { - data: MaybeUninit::new(InlineString::new()), + data: InlineString::new(), mode: PhantomData, } } @@ -263,7 +265,7 @@ impl SmartString { /// once this happens. pub const fn new_const() -> Self { Self { - data: MaybeUninit::new(InlineString::new()), + data: InlineString::new(), mode: PhantomData, } } @@ -278,10 +280,10 @@ impl SmartString { fn from_boxed(boxed: BoxedString) -> Self { let mut out = Self { - data: MaybeUninit::uninit(), + data: InlineString::new(), mode: PhantomData, }; - let data_ptr: *mut BoxedString = out.data.as_mut_ptr().cast(); + let data_ptr: *mut BoxedString = &mut out.data as *mut _ as *mut BoxedString; #[allow(unsafe_code)] unsafe { data_ptr.write(boxed) @@ -291,33 +293,29 @@ impl SmartString { fn from_inline(inline: InlineString) -> Self { Self { - data: MaybeUninit::new(inline), + data: inline, mode: PhantomData, } } fn discriminant(&self) -> Discriminant { - // unsafe { self.data.assume_init() }.marker.discriminant() - let str_ptr: *const BoxedString = - self.data.as_ptr().cast() as *const _ as *const BoxedString; - #[allow(unsafe_code)] - Discriminant::from_bit(BoxedString::check_alignment(unsafe { &*str_ptr })) + self.data.marker.discriminant() } fn cast(&self) -> StringCast<'_> { #[allow(unsafe_code)] match self.discriminant() { - Discriminant::Inline => StringCast::Inline(unsafe { &*self.data.as_ptr() }), - Discriminant::Boxed => StringCast::Boxed(unsafe { &*self.data.as_ptr().cast() }), + Discriminant::Inline => StringCast::Inline(&self.data), + Discriminant::Boxed => StringCast::Boxed(unsafe { &*(&self.data as *const _ as *const BoxedString) }), } } fn cast_mut(&mut self) -> StringCastMut<'_> { #[allow(unsafe_code)] match self.discriminant() { - Discriminant::Inline => StringCastMut::Inline(unsafe { &mut *self.data.as_mut_ptr() }), + Discriminant::Inline => StringCastMut::Inline(&mut self.data), Discriminant::Boxed => { - StringCastMut::Boxed(unsafe { &mut *self.data.as_mut_ptr().cast() }) + StringCastMut::Boxed(unsafe { &mut *(&mut self.data as *mut _ as *mut BoxedString) }) } } } @@ -325,9 +323,9 @@ impl SmartString { fn cast_into(mut self) -> StringCastInto { #[allow(unsafe_code)] match self.discriminant() { - Discriminant::Inline => StringCastInto::Inline(unsafe { self.data.assume_init() }), + Discriminant::Inline => StringCastInto::Inline(self.data), Discriminant::Boxed => StringCastInto::Boxed(unsafe { - let boxed_ptr: *mut BoxedString = self.data.as_mut_ptr().cast(); + let boxed_ptr: *mut BoxedString = &mut self.data as *mut _ as *mut BoxedString; let string = boxed_ptr.read(); forget(self); string @@ -337,7 +335,7 @@ impl SmartString { fn promote_from(&mut self, string: BoxedString) { debug_assert!(self.discriminant() == Discriminant::Inline); - let data: *mut BoxedString = self.data.as_mut_ptr().cast(); + let data: *mut BoxedString = &mut self.data as *mut _ as *mut BoxedString; #[allow(unsafe_code)] unsafe { data.write(string) @@ -362,11 +360,11 @@ impl SmartString { false } else { let s: &str = string.deref(); - let inlined = s.into(); + let inlined: InlineString = s.into(); #[allow(unsafe_code)] unsafe { drop_in_place(string); - self.data.as_mut_ptr().write(inlined); + core::ptr::write(&mut self.data, inlined); } true } @@ -693,7 +691,6 @@ impl From> for SmartString { } } -#[cfg(feature = "std")] impl From> for SmartString { fn from(string: Cow<'_, str>) -> Self { if string.len() > MAX_INLINE { diff --git a/src/marker_byte.rs b/src/marker_byte.rs index ba6cbab..6561c9d 100644 --- a/src/marker_byte.rs +++ b/src/marker_byte.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. +use core::num::NonZeroU8; + #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub(crate) enum Discriminant { Boxed, @@ -27,13 +29,27 @@ impl Discriminant { } } +/// We now use this type to facilitate Option size optimization. +/// The low two bits are used to determine both the discriminant and the None state. +/// +/// 00000000 - None +/// xxxxxx01 - unused +/// xxxxxx10 - BoxedString +/// xxxxxx11 - InlineString +/// +/// BoxedString now uses TaggedPtr to ensure the low two bits form the 10 pattern. +/// This guarantees the in-memory NonZeroU8 value is always in a valid state and that it matches the +/// tagging convention of Marker. #[derive(Clone, Copy, Debug)] -pub(crate) struct Marker(u8); +pub(crate) struct Marker(NonZeroU8); impl Marker { #[inline(always)] - const fn assemble(discriminant: Discriminant, data: u8) -> u8 { - data << 1 | discriminant.bit() + const fn assemble(discriminant: Discriminant, data: u8) -> NonZeroU8 { + debug_assert!(data < 0x40); + + #[allow(unsafe_code)] + unsafe { NonZeroU8::new_unchecked((data << 2) | 2 | discriminant.bit()) } // SAFETY: (2 | x) != 0 is guaranteed for all x } #[inline(always)] @@ -43,23 +59,21 @@ impl Marker { #[inline(always)] pub(crate) const fn new_inline(data: u8) -> Self { - debug_assert!(data < 0x80); Self(Self::assemble(Discriminant::Inline, data)) } #[inline(always)] pub(crate) const fn discriminant(self) -> Discriminant { - Discriminant::from_bit(self.0 & 0x01 != 0) + Discriminant::from_bit(self.0.get() & 0x01 != 0) } #[inline(always)] pub(crate) const fn data(self) -> u8 { - self.0 >> 1 + self.0.get() >> 2 } #[inline(always)] pub(crate) fn set_data(&mut self, byte: u8) { - debug_assert!(byte < 0x80); self.0 = Self::assemble(self.discriminant(), byte); } } diff --git a/src/tagged_ptr.rs b/src/tagged_ptr.rs new file mode 100644 index 0000000..db49062 --- /dev/null +++ b/src/tagged_ptr.rs @@ -0,0 +1,42 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +use core::ptr::NonNull; +use core::num::NonZeroUsize; +use core::marker::PhantomData; + +#[derive(Debug, Clone, Copy)] +pub(crate) struct GenericTaggedPtr(NonZeroUsize, PhantomData); +impl GenericTaggedPtr { + pub(crate) fn new(v: *mut T) -> Option { + // this will be optimized away at compile time + if PATTERN == 0 || PATTERN & MASK != PATTERN { + panic!("fill must not be zero"); + } + + if v as usize == 0 || v as usize & MASK != 0 { + None + } else { + #[allow(unsafe_code)] + let ptr = unsafe { NonZeroUsize::new_unchecked((v as usize & !MASK) | PATTERN) }; // SAFETY: v | PATTERN != 0 because PATTERN != 0 + Some(Self(ptr, PhantomData)) + } + } + pub(crate) fn as_non_null(self) -> NonNull { + #[allow(unsafe_code)] + unsafe { NonNull::new_unchecked((self.0.get() & !MASK) as *mut T) } // SAFETY: v & !MASK != 0 guaranteed from Self::new + } +} + +pub(crate) type TaggedPtr = GenericTaggedPtr; + +#[test] +fn check_filled_ptr() { + for i in 1..=1024 { + let v = i << 2; + let p = TaggedPtr::new(v as *mut _).unwrap(); + assert_eq!(p.0.get(), v | 2); + assert_eq!(p.as_non_null().as_ptr() as usize, v); + } +} \ No newline at end of file diff --git a/src/test.rs b/src/test.rs index 8abba06..56d4f49 100644 --- a/src/test.rs +++ b/src/test.rs @@ -527,19 +527,14 @@ mod tests { #[test] fn check_alignment() { use crate::boxed::BoxedString; - use crate::inline::InlineString; use crate::marker_byte::Discriminant; - let inline = InlineString::new(); - let inline_ptr: *const InlineString = &inline; - let boxed_ptr: *const BoxedString = inline_ptr.cast(); - #[allow(unsafe_code)] - let discriminant = - Discriminant::from_bit(BoxedString::check_alignment(unsafe { &*boxed_ptr })); - assert_eq!(Discriminant::Inline, discriminant); + let boxed = BoxedString::from_str(32, "welp"); + let discriminant = SmartString::::from_boxed(boxed).discriminant(); + assert_eq!(Discriminant::Boxed, discriminant); let boxed = BoxedString::from_str(32, "welp"); - let discriminant = Discriminant::from_bit(BoxedString::check_alignment(&boxed)); + let discriminant = SmartString::::from_boxed(boxed).discriminant(); assert_eq!(Discriminant::Boxed, discriminant); let mut s = SmartString::::new(); @@ -552,6 +547,73 @@ mod tests { assert_eq!(Discriminant::Inline, s.discriminant()); } + #[test] + fn check_sizes() { + assert_eq!(core::mem::size_of::>(), core::mem::size_of::()); + assert_eq!(core::mem::size_of::>>(), core::mem::size_of::>()); + + assert_eq!(core::mem::size_of::>(), core::mem::size_of::()); + assert_eq!(core::mem::size_of::>>(), core::mem::size_of::>()); + + // ------------------------- + + let mut empty_bucket: Vec>> = vec![]; + let mut short_bucket: Vec>> = vec![]; + let mut long_bucket: Vec>> = vec![]; + let mut none_bucket: Vec>> = vec![]; + + // make sure we shift around the allocation addresses by doing multiple of them and keeping the results + for _ in 0..1024 { + empty_bucket.push(Some(SmartString::new())); + empty_bucket.push(Some(SmartString::from(""))); + short_bucket.push(Some(SmartString::from("hello"))); + long_bucket.push(Some(SmartString::from("this is a really long line of text that will not be able to be inlined"))); + none_bucket.push(None); + } + + for v in empty_bucket { + assert_eq!(v.unwrap().as_str(), ""); + } + for v in short_bucket { + assert_eq!(v.unwrap().as_str(), "hello"); + } + for v in long_bucket { + assert_eq!(v.unwrap().as_str(), "this is a really long line of text that will not be able to be inlined"); + } + for v in none_bucket { + assert!(v.is_none()); + } + + // ------------------------- + + let mut empty_bucket: Vec>> = vec![]; + let mut short_bucket: Vec>> = vec![]; + let mut long_bucket: Vec>> = vec![]; + let mut none_bucket: Vec>> = vec![]; + + // make sure we shift around the allocation addresses by doing multiple of them and keeping the results + for _ in 0..1024 { + empty_bucket.push(Some(SmartString::new())); + empty_bucket.push(Some(SmartString::from(""))); + short_bucket.push(Some(SmartString::from("hello"))); + long_bucket.push(Some(SmartString::from("this is a really long line of text that will not be able to be inlined"))); + none_bucket.push(None); + } + + for v in empty_bucket { + assert_eq!(v.unwrap().as_str(), ""); + } + for v in short_bucket { + assert_eq!(v.unwrap().as_str(), "hello"); + } + for v in long_bucket { + assert_eq!(v.unwrap().as_str(), "this is a really long line of text that will not be able to be inlined"); + } + for v in none_bucket { + assert!(v.is_none()); + } + } + #[test] fn from_string() { let std_s =