diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index c10446b75b40..c63309fbd3f9 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -128,6 +128,52 @@ macro_rules! isle_common_prelude_methods { Imm64::new((x >> y) & ty_mask) } + #[inline] + fn imm64_rotl(&mut self, ty: Type, x: Imm64, y: Imm64) -> Imm64 { + let bits = ty.bits(); + assert!(bits <= 64); + // This holds for all Cranelift types ({u/i}{8,16,32,64}) + debug_assert!(bits.is_power_of_two()); + + let ty_mask = self.ty_mask(ty); + let x = (x.bits() as u64) & ty_mask; + + // Mask off any excess rotate bits so the rotate stays within `ty`. + let shift_mask = bits - 1; + let y = ((y.bits() as u64) & u64::from(shift_mask)) as u32; + + // In Rust, x >> 64 or x << 64 panics. + let result = if y == 0 { + x + } else { + (x << y) | (x >> (u32::from(bits) - y)) + }; + + Imm64::new((result & ty_mask) as i64) + } + + #[inline] + fn imm64_rotr(&mut self, ty: Type, x: Imm64, y: Imm64) -> Imm64 { + let bits = ty.bits(); + assert!(bits <= 64); + debug_assert!(bits.is_power_of_two()); + + let ty_mask = self.ty_mask(ty); + let x = (x.bits() as u64) & ty_mask; + + // Mask off any excess rotate bits so the rotate stays within `ty`. + let shift_mask = bits - 1; + let y = ((y.bits() as u64) & u64::from(shift_mask)) as u32; + + let result = if y == 0 { + x + } else { + (x >> y) | (x << (u32::from(bits) - y)) + }; + + Imm64::new((result & ty_mask) as i64) + } + #[inline] fn i64_sextend_u64(&mut self, ty: Type, x: u64) -> i64 { let shift_amt = core::cmp::max(0, 64 - ty.bits()); diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index e6ec5c84db40..a8654496109a 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -91,6 +91,17 @@ (iconst _ k2))) (subsume (iconst ty (imm64_sshr ty k1 k2)))) +(rule (simplify (rotr (fits_in_64 ty) + (iconst ty k1) + (iconst _ k2))) + (subsume (iconst ty (imm64_rotr ty k1 k2)))) + +(rule (simplify (rotl (fits_in_64 ty) + (iconst ty k1) + (iconst _ k2))) + (subsume (iconst ty (imm64_rotl ty k1 k2)))) + + (rule (simplify (ireduce narrow (iconst (fits_in_64 _) (u64_from_imm64 imm)))) (subsume (iconst narrow (imm64_masked narrow imm)))) diff --git a/cranelift/codegen/src/opts/shifts.isle b/cranelift/codegen/src/opts/shifts.isle index 82992c26c6cf..f3cfe7700c86 100644 --- a/cranelift/codegen/src/opts/shifts.isle +++ b/cranelift/codegen/src/opts/shifts.isle @@ -266,6 +266,10 @@ (rule (simplify (rotl ty (rotr ty x y @ (value_type kty)) z @ (value_type kty))) (rotr ty x (isub_uextend y z))) +(rule + (simplify (rotr ty (iconst ty p) (select ty x (iconst ty y) (iconst ty z)))) + (select ty x (iconst ty (imm64_rotr ty p y)) (iconst ty (imm64_rotr ty p z)))) + ;; Convert shifts into rotates. We always normalize into a rotate left. ;; ;; (bor (ishl x k1) (ushr x k2)) == (rotl x k1) if k2 == ty_bits - k1 @@ -311,4 +315,3 @@ (rule (simplify (iadd ty (ishl ty x z) (ishl ty y z))) (ishl ty (iadd ty x y) z)) (rule (simplify (ushr ty (band ty (ishl ty x y) z) y)) (band ty x (ushr ty z y))) - diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index f6493c7987f7..76a9773f4299 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -85,6 +85,12 @@ (decl pure imm64_sshr (Type Imm64 Imm64) Imm64) (extern constructor imm64_sshr imm64_sshr) +(decl pure imm64_rotl (Type Imm64 Imm64) Imm64) +(extern constructor imm64_rotl imm64_rotl) + +(decl pure imm64_rotr (Type Imm64 Imm64) Imm64) +(extern constructor imm64_rotr imm64_rotr) + ;; Sign extends a u64 from ty bits up to 64bits (decl pure i64_sextend_u64 (Type u64) i64) (extern constructor i64_sextend_u64 i64_sextend_u64) diff --git a/cranelift/filetests/filetests/egraph/cprop.clif b/cranelift/filetests/filetests/egraph/cprop.clif index fd3d88cb2202..fe3741aae739 100644 --- a/cranelift/filetests/filetests/egraph/cprop.clif +++ b/cranelift/filetests/filetests/egraph/cprop.clif @@ -158,6 +158,55 @@ block0: ; check: v3 = iconst.i16 -4 ; check: return v3 +;; 0xf0 == 0b00001111 +function %rotr() -> i8 { +block0: + v0 = iconst.i8 0x0f + v1 = iconst.i8 2 + v2 = rotr v0, v1 + return v2 +} + +;; 0b11000011 == -61 +; check: v3 = iconst.i8 -61 +; check: return v3 + +function %rotr_i16() -> i16 { +block0: + v0 = iconst.i16 0x0f + v1 = iconst.i8 2 + v2 = rotr v0, v1 + return v2 +} + +;; 0b1100000000000011 == -16381 +; check: v3 = iconst.i16 -16381 +; check: return v3 + +function %rotl() -> i8 { +block0: + v0 = iconst.i8 0xf0 + v1 = iconst.i8 2 + v2 = rotl v0, v1 + return v2 +} + +;; 0b11000011 == -61 +; check: v3 = iconst.i8 -61 +; check: return v3 + +function %rotl_i16() -> i16 { +block0: + v0 = iconst.i16 0xf000 + v1 = iconst.i8 2 + v2 = rotl v0, v1 + return v2 +} + +;; 0b11000011 == -61 +; check: v3 = iconst.i16 -16381 +; check: return v3 + function %icmp_eq_i32() -> i8 { block0: v0 = iconst.i32 1 diff --git a/cranelift/filetests/filetests/egraph/shifts.clif b/cranelift/filetests/filetests/egraph/shifts.clif index f0c0e0754dee..c0319a9b8286 100644 --- a/cranelift/filetests/filetests/egraph/shifts.clif +++ b/cranelift/filetests/filetests/egraph/shifts.clif @@ -758,6 +758,21 @@ block0(v0: i8, v1: i8): ; check: return v0 +function %rotr_iconst_select_iconsts(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = iconst.i8 1 + v3 = iconst.i8 2 + v4 = select v0, v2, v3 + v5 = rotr.i8 v1, v4 + return v5 +} + +; check: v6 = iconst.i8 -128 +; check: v7 = iconst.i8 64 +; check: v8 = select v0, v6, v7 +; check: return v8 + function %shifts_to_rotl(i64) -> i64 { block0(v0: i64): v1 = iconst.i16 3 diff --git a/cranelift/filetests/filetests/runtests/rotr.clif b/cranelift/filetests/filetests/runtests/rotr.clif index 2065f0220ef5..f23152dfd4d7 100644 --- a/cranelift/filetests/filetests/runtests/rotr.clif +++ b/cranelift/filetests/filetests/runtests/rotr.clif @@ -231,6 +231,30 @@ block0(v0: i8, v1: i8): ; run: %rotr_i8_i8(0xe0, 65) == 0x70 ; run: %rotr_i8_i8(0xe0, 66) == 0x38 +function %rotr_iconst_select_iconsts_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = iconst.i8 1 + v3 = iconst.i8 2 + v4 = select v0, v2, v3 + v5 = rotr.i8 v1, v4 + return v5 +} +; run: %rotr_iconst_select_iconsts_i8(0) == 0x40 +; run: %rotr_iconst_select_iconsts_i8(1) == 0x80 + +function %rotr_iconst_select_iconsts_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = iconst.i32 1 + v3 = iconst.i32 2 + v4 = select v0, v2, v3 + v5 = rotr.i32 v1, v4 + return v5 +} +; run: %rotr_iconst_select_iconsts_i32(0) == 0x40000000 +; run: %rotr_iconst_select_iconsts_i32(1) == 0x80000000 + ;; This is a regression test for rotates on x64