diff --git a/crates/core_arch/src/loongarch64/lasx/generated.rs b/crates/core_arch/src/loongarch64/lasx/generated.rs index 703d0822eb..3114797506 100644 --- a/crates/core_arch/src/loongarch64/lasx/generated.rs +++ b/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -257,22 +257,6 @@ unsafe extern "unadjusted" { fn __lasx_xvreplve_d(a: __v4i64, b: i32) -> __v4i64; #[link_name = "llvm.loongarch.lasx.xvpermi.w"] fn __lasx_xvpermi_w(a: __v8i32, b: __v8i32, c: u32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvmuh.b"] - fn __lasx_xvmuh_b(a: __v32i8, b: __v32i8) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvmuh.h"] - fn __lasx_xvmuh_h(a: __v16i16, b: __v16i16) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvmuh.w"] - fn __lasx_xvmuh_w(a: __v8i32, b: __v8i32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvmuh.d"] - fn __lasx_xvmuh_d(a: __v4i64, b: __v4i64) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvmuh.bu"] - fn __lasx_xvmuh_bu(a: __v32u8, b: __v32u8) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvmuh.hu"] - fn __lasx_xvmuh_hu(a: __v16u16, b: __v16u16) -> __v16u16; - #[link_name = "llvm.loongarch.lasx.xvmuh.wu"] - fn __lasx_xvmuh_wu(a: __v8u32, b: __v8u32) -> __v8u32; - #[link_name = "llvm.loongarch.lasx.xvmuh.du"] - fn __lasx_xvmuh_du(a: __v4u64, b: __v4u64) -> __v4u64; #[link_name = "llvm.loongarch.lasx.xvsllwil.h.b"] fn __lasx_xvsllwil_h_b(a: __v32i8, b: u32) -> __v16i16; #[link_name = "llvm.loongarch.lasx.xvsllwil.w.h"] @@ -1898,62 +1882,6 @@ pub fn lasx_xvpermi_w(a: m256i, b: m256i) -> m256i { unsafe { transmute(__lasx_xvpermi_w(transmute(a), transmute(b), IMM8)) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvmuh_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvmuh_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvmuh_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvmuh_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvmuh_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvmuh_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvmuh_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvmuh_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvmuh_bu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvmuh_bu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvmuh_hu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvmuh_hu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvmuh_wu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvmuh_wu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvmuh_du(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvmuh_du(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lasx")] #[rustc_legacy_const_generics(1)] diff --git a/crates/core_arch/src/loongarch64/lasx/portable.rs b/crates/core_arch/src/loongarch64/lasx/portable.rs index 08391fa676..110a98cfb0 100644 --- a/crates/core_arch/src/loongarch64/lasx/portable.rs +++ b/crates/core_arch/src/loongarch64/lasx/portable.rs @@ -492,6 +492,14 @@ impl_vvv!("lasx", lasx_xvabsd_bu, ls::simd_absd, m256i, u8x32); impl_vvv!("lasx", lasx_xvabsd_hu, ls::simd_absd, m256i, u16x16); impl_vvv!("lasx", lasx_xvabsd_wu, ls::simd_absd, m256i, u32x8); impl_vvv!("lasx", lasx_xvabsd_du, ls::simd_absd, m256i, u64x4); +impl_vvv!("lasx", lasx_xvmuh_b, simd_muh, m256i, i8x32, i16x32); +impl_vvv!("lasx", lasx_xvmuh_h, simd_muh, m256i, i16x16, i32x16); +impl_vvv!("lasx", lasx_xvmuh_w, simd_muh, m256i, i32x8, i64x8); +impl_vvv!("lasx", lasx_xvmuh_d, simd_muh, m256i, i64x4, i128x4); +impl_vvv!("lasx", lasx_xvmuh_bu, simd_muh, m256i, u8x32, u16x32); +impl_vvv!("lasx", lasx_xvmuh_hu, simd_muh, m256i, u16x16, u32x16); +impl_vvv!("lasx", lasx_xvmuh_wu, simd_muh, m256i, u32x8, u64x8); +impl_vvv!("lasx", lasx_xvmuh_du, simd_muh, m256i, u64x4, u128x4); impl_vvv!("lasx", lasx_xvpickev_b, simd_pickev_b, m256i, i8x32); impl_vvv!("lasx", lasx_xvpickev_h, simd_pickev_h, m256i, i16x16); impl_vvv!("lasx", lasx_xvpickev_w, simd_pickev_w, m256i, i32x8); diff --git a/crates/core_arch/src/loongarch64/lsx/generated.rs b/crates/core_arch/src/loongarch64/lsx/generated.rs index 49176c0193..6483c6296a 100644 --- a/crates/core_arch/src/loongarch64/lsx/generated.rs +++ b/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -253,22 +253,6 @@ unsafe extern "unadjusted" { fn __lsx_vffint_s_wu(a: __v4u32) -> __v4f32; #[link_name = "llvm.loongarch.lsx.vffint.d.lu"] fn __lsx_vffint_d_lu(a: __v2u64) -> __v2f64; - #[link_name = "llvm.loongarch.lsx.vmuh.b"] - fn __lsx_vmuh_b(a: __v16i8, b: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vmuh.h"] - fn __lsx_vmuh_h(a: __v8i16, b: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vmuh.w"] - fn __lsx_vmuh_w(a: __v4i32, b: __v4i32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vmuh.d"] - fn __lsx_vmuh_d(a: __v2i64, b: __v2i64) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vmuh.bu"] - fn __lsx_vmuh_bu(a: __v16u8, b: __v16u8) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vmuh.hu"] - fn __lsx_vmuh_hu(a: __v8u16, b: __v8u16) -> __v8u16; - #[link_name = "llvm.loongarch.lsx.vmuh.wu"] - fn __lsx_vmuh_wu(a: __v4u32, b: __v4u32) -> __v4u32; - #[link_name = "llvm.loongarch.lsx.vmuh.du"] - fn __lsx_vmuh_du(a: __v2u64, b: __v2u64) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vsllwil.h.b"] fn __lsx_vsllwil_h_b(a: __v16i8, b: u32) -> __v8i16; #[link_name = "llvm.loongarch.lsx.vsllwil.w.h"] @@ -1802,62 +1786,6 @@ pub fn lsx_vffint_d_lu(a: m128i) -> m128d { unsafe { transmute(__lsx_vffint_d_lu(transmute(a))) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vmuh_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vmuh_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vmuh_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vmuh_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vmuh_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vmuh_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vmuh_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vmuh_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vmuh_bu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vmuh_bu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vmuh_hu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vmuh_hu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vmuh_wu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vmuh_wu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vmuh_du(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vmuh_du(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lsx")] #[rustc_legacy_const_generics(1)] diff --git a/crates/core_arch/src/loongarch64/lsx/portable.rs b/crates/core_arch/src/loongarch64/lsx/portable.rs index 10b6b0a616..2ceed73283 100644 --- a/crates/core_arch/src/loongarch64/lsx/portable.rs +++ b/crates/core_arch/src/loongarch64/lsx/portable.rs @@ -386,6 +386,14 @@ impl_vvv!("lsx", lsx_vabsd_bu, ls::simd_absd, m128i, u8x16); impl_vvv!("lsx", lsx_vabsd_hu, ls::simd_absd, m128i, u16x8); impl_vvv!("lsx", lsx_vabsd_wu, ls::simd_absd, m128i, u32x4); impl_vvv!("lsx", lsx_vabsd_du, ls::simd_absd, m128i, u64x2); +impl_vvv!("lsx", lsx_vmuh_b, simd_muh, m128i, i8x16, i16x16); +impl_vvv!("lsx", lsx_vmuh_h, simd_muh, m128i, i16x8, i32x8); +impl_vvv!("lsx", lsx_vmuh_w, simd_muh, m128i, i32x4, i64x4); +impl_vvv!("lsx", lsx_vmuh_d, simd_muh, m128i, i64x2, i128x2); +impl_vvv!("lsx", lsx_vmuh_bu, simd_muh, m128i, u8x16, u16x16); +impl_vvv!("lsx", lsx_vmuh_hu, simd_muh, m128i, u16x8, u32x8); +impl_vvv!("lsx", lsx_vmuh_wu, simd_muh, m128i, u32x4, u64x4); +impl_vvv!("lsx", lsx_vmuh_du, simd_muh, m128i, u64x2, u128x2); impl_vvv!("lsx", lsx_vpickev_b, simd_pickev_b, m128i, i8x16); impl_vvv!("lsx", lsx_vpickev_h, simd_pickev_h, m128i, i16x8); impl_vvv!("lsx", lsx_vpickev_w, simd_pickev_w, m128i, i32x4); diff --git a/crates/core_arch/src/loongarch64/simd.rs b/crates/core_arch/src/loongarch64/simd.rs index 0521b1b839..58dd016400 100644 --- a/crates/core_arch/src/loongarch64/simd.rs +++ b/crates/core_arch/src/loongarch64/simd.rs @@ -35,16 +35,26 @@ impl_simd_ext!(u8x16, u8); impl_simd_ext!(u8x32, u8); impl_simd_ext!(i16x8, i16); impl_simd_ext!(i16x16, i16); +impl_simd_ext!(i16x32, i16); impl_simd_ext!(u16x8, u16); impl_simd_ext!(u16x16, u16); +impl_simd_ext!(u16x32, u16); impl_simd_ext!(i32x4, i32); impl_simd_ext!(i32x8, i32); +impl_simd_ext!(i32x16, i32); impl_simd_ext!(u32x4, u32); impl_simd_ext!(u32x8, u32); +impl_simd_ext!(u32x16, u32); impl_simd_ext!(i64x2, i64); impl_simd_ext!(i64x4, i64); +impl_simd_ext!(i64x8, i64); impl_simd_ext!(u64x2, u64); impl_simd_ext!(u64x4, u64); +impl_simd_ext!(u64x8, u64); +impl_simd_ext!(i128x2, i128); +impl_simd_ext!(u128x2, u128); +impl_simd_ext!(i128x4, i128); +impl_simd_ext!(u128x4, u128); #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] @@ -156,6 +166,18 @@ pub(super) const unsafe fn simd_msub(a: T, b: T, c: T) -> T { is::simd_sub(a, is::simd_mul(b, c)) } +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_muh(a: T, b: T) -> T { + let a: W = is::simd_cast(a); + let b: W = is::simd_cast(b); + let p = is::simd_mul(a, b); + is::simd_cast(is::simd_shr( + p, + ls::simd_splat((size_of::() * 8 / 2) as i64), + )) +} + #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] pub(super) const unsafe fn simd_nor(a: T, b: T) -> T { @@ -312,6 +334,19 @@ macro_rules! impl_vvv { } } }; + ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ty, $wty:ty) => { + #[inline] + #[target_feature(enable = $ft)] + #[unstable(feature = "stdarch_loongarch", issue = "117427")] + pub fn $name(a: $oty, b: $oty) -> $oty { + unsafe { + let a: $ity = transmute(a); + let b: $ity = transmute(b); + let r: $ity = $op::<$ity, $wty>(a, b); + transmute(r) + } + } + }; } pub(super) use impl_vvv; diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs index 30c3125f5f..08ee17548a 100644 --- a/crates/core_arch/src/simd.rs +++ b/crates/core_arch/src/simd.rs @@ -28,11 +28,13 @@ unsafe impl SimdElement for u8 {} unsafe impl SimdElement for u16 {} unsafe impl SimdElement for u32 {} unsafe impl SimdElement for u64 {} +unsafe impl SimdElement for u128 {} unsafe impl SimdElement for i8 {} unsafe impl SimdElement for i16 {} unsafe impl SimdElement for i32 {} unsafe impl SimdElement for i64 {} +unsafe impl SimdElement for i128 {} unsafe impl SimdElement for f16 {} unsafe impl SimdElement for f32 {} @@ -371,11 +373,13 @@ pub(crate) type u8x32 = Simd; pub(crate) type u16x16 = Simd; pub(crate) type u32x8 = Simd; pub(crate) type u64x4 = Simd; +pub(crate) type u128x2 = Simd; pub(crate) type i8x32 = Simd; pub(crate) type i16x16 = Simd; pub(crate) type i32x8 = Simd; pub(crate) type i64x4 = Simd; +pub(crate) type i128x2 = Simd; pub(crate) type f16x16 = Simd; pub(crate) type f32x8 = Simd; @@ -391,11 +395,13 @@ pub(crate) type u8x64 = Simd; pub(crate) type u16x32 = Simd; pub(crate) type u32x16 = Simd; pub(crate) type u64x8 = Simd; +pub(crate) type u128x4 = Simd; pub(crate) type i8x64 = Simd; pub(crate) type i16x32 = Simd; pub(crate) type i32x16 = Simd; pub(crate) type i64x8 = Simd; +pub(crate) type i128x4 = Simd; pub(crate) type f16x32 = Simd; pub(crate) type f32x16 = Simd; diff --git a/crates/stdarch-gen-loongarch/lasx.spec b/crates/stdarch-gen-loongarch/lasx.spec index 2ca1a32eab..d3efb6c960 100644 --- a/crates/stdarch-gen-loongarch/lasx.spec +++ b/crates/stdarch-gen-loongarch/lasx.spec @@ -2071,41 +2071,49 @@ asm-fmts = xd, xj data-types = V4DI, V4DI /// lasx_xvmuh_b +impl = portable name = lasx_xvmuh_b asm-fmts = xd, xj, xk data-types = V32QI, V32QI, V32QI /// lasx_xvmuh_h +impl = portable name = lasx_xvmuh_h asm-fmts = xd, xj, xk data-types = V16HI, V16HI, V16HI /// lasx_xvmuh_w +impl = portable name = lasx_xvmuh_w asm-fmts = xd, xj, xk data-types = V8SI, V8SI, V8SI /// lasx_xvmuh_d +impl = portable name = lasx_xvmuh_d asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI /// lasx_xvmuh_bu +impl = portable name = lasx_xvmuh_bu asm-fmts = xd, xj, xk data-types = UV32QI, UV32QI, UV32QI /// lasx_xvmuh_hu +impl = portable name = lasx_xvmuh_hu asm-fmts = xd, xj, xk data-types = UV16HI, UV16HI, UV16HI /// lasx_xvmuh_wu +impl = portable name = lasx_xvmuh_wu asm-fmts = xd, xj, xk data-types = UV8SI, UV8SI, UV8SI /// lasx_xvmuh_du +impl = portable name = lasx_xvmuh_du asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI diff --git a/crates/stdarch-gen-loongarch/lsx.spec b/crates/stdarch-gen-loongarch/lsx.spec index 855094cfb6..79dc4217da 100644 --- a/crates/stdarch-gen-loongarch/lsx.spec +++ b/crates/stdarch-gen-loongarch/lsx.spec @@ -2133,41 +2133,49 @@ asm-fmts = vd, vj data-types = V2DI, V2DI /// lsx_vmuh_b +impl = portable name = lsx_vmuh_b asm-fmts = vd, vj, vk data-types = V16QI, V16QI, V16QI /// lsx_vmuh_h +impl = portable name = lsx_vmuh_h asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI /// lsx_vmuh_w +impl = portable name = lsx_vmuh_w asm-fmts = vd, vj, vk data-types = V4SI, V4SI, V4SI /// lsx_vmuh_d +impl = portable name = lsx_vmuh_d asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI /// lsx_vmuh_bu +impl = portable name = lsx_vmuh_bu asm-fmts = vd, vj, vk data-types = UV16QI, UV16QI, UV16QI /// lsx_vmuh_hu +impl = portable name = lsx_vmuh_hu asm-fmts = vd, vj, vk data-types = UV8HI, UV8HI, UV8HI /// lsx_vmuh_wu +impl = portable name = lsx_vmuh_wu asm-fmts = vd, vj, vk data-types = UV4SI, UV4SI, UV4SI /// lsx_vmuh_du +impl = portable name = lsx_vmuh_du asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI diff --git a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt index 2c54ce23e6..0bd33c248b 100644 --- a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt +++ b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt @@ -285,6 +285,14 @@ lsx_vfrintrm_s lsx_vfrintrm_d lsx_vfrintrz_s lsx_vfrintrz_d +lsx_vmuh_b +lsx_vmuh_h +lsx_vmuh_w +lsx_vmuh_d +lsx_vmuh_bu +lsx_vmuh_hu +lsx_vmuh_wu +lsx_vmuh_du # LASX intrinsics lasx_xvsll_b @@ -571,3 +579,11 @@ lasx_xvfrintrm_s lasx_xvfrintrm_d lasx_xvfrintrz_s lasx_xvfrintrz_d +lasx_xvmuh_b +lasx_xvmuh_h +lasx_xvmuh_w +lasx_xvmuh_d +lasx_xvmuh_bu +lasx_xvmuh_hu +lasx_xvmuh_wu +lasx_xvmuh_du