core/stdarch/crates/core_arch/src/x86/
ssse3.rs

1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
3use crate::{
4    core_arch::{simd::*, x86::*},
5    intrinsics::simd::*,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11/// Computes the absolute value of packed 8-bit signed integers in `a` and
12/// return the unsigned results.
13///
14/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
15#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
20    let a = a.as_i8x16();
21    let zero = i8x16::ZERO;
22    let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
23    transmute(r)
24}
25
26/// Computes the absolute value of each of the packed 16-bit signed integers in
27/// `a` and
28/// return the 16-bit unsigned integer
29///
30/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
31#[inline]
32#[target_feature(enable = "ssse3")]
33#[cfg_attr(test, assert_instr(pabsw))]
34#[stable(feature = "simd_x86", since = "1.27.0")]
35pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
36    let a = a.as_i16x8();
37    let zero = i16x8::ZERO;
38    let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
39    transmute(r)
40}
41
42/// Computes the absolute value of each of the packed 32-bit signed integers in
43/// `a` and
44/// return the 32-bit unsigned integer
45///
46/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
47#[inline]
48#[target_feature(enable = "ssse3")]
49#[cfg_attr(test, assert_instr(pabsd))]
50#[stable(feature = "simd_x86", since = "1.27.0")]
51pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
52    let a = a.as_i32x4();
53    let zero = i32x4::ZERO;
54    let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
55    transmute(r)
56}
57
58/// Shuffles bytes from `a` according to the content of `b`.
59///
60/// The last 4 bits of each byte of `b` are used as addresses
61/// into the 16 bytes of `a`.
62///
63/// In addition, if the highest significant bit of a byte of `b`
64/// is set, the respective destination byte is set to 0.
65///
66/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
67/// logically equivalent to:
68///
69/// ```
70/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
71///     let mut r = [0u8; 16];
72///     for i in 0..16 {
73///         // if the most significant bit of b is set,
74///         // then the destination byte is set to 0.
75///         if b[i] & 0x80 == 0u8 {
76///             r[i] = a[(b[i] % 16) as usize];
77///         }
78///     }
79///     r
80/// }
81/// ```
82///
83/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
84#[inline]
85#[target_feature(enable = "ssse3")]
86#[cfg_attr(test, assert_instr(pshufb))]
87#[stable(feature = "simd_x86", since = "1.27.0")]
88pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
89    transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
90}
91
92/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
93/// shift the result right by `n` bytes, and returns the low 16 bytes.
94///
95/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
96#[inline]
97#[target_feature(enable = "ssse3")]
98#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
99#[rustc_legacy_const_generics(2)]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub unsafe fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
102    static_assert_uimm_bits!(IMM8, 8);
103    // If palignr is shifting the pair of vectors more than the size of two
104    // lanes, emit zero.
105    if IMM8 > 32 {
106        return _mm_setzero_si128();
107    }
108    // If palignr is shifting the pair of input vectors more than one lane,
109    // but less than two lanes, convert to shifting in zeroes.
110    let (a, b) = if IMM8 > 16 {
111        (_mm_setzero_si128(), a)
112    } else {
113        (a, b)
114    };
115    const fn mask(shift: u32, i: u32) -> u32 {
116        if shift > 32 {
117            // Unused, but needs to be a valid index.
118            i
119        } else if shift > 16 {
120            shift - 16 + i
121        } else {
122            shift + i
123        }
124    }
125    let r: i8x16 = simd_shuffle!(
126        b.as_i8x16(),
127        a.as_i8x16(),
128        [
129            mask(IMM8 as u32, 0),
130            mask(IMM8 as u32, 1),
131            mask(IMM8 as u32, 2),
132            mask(IMM8 as u32, 3),
133            mask(IMM8 as u32, 4),
134            mask(IMM8 as u32, 5),
135            mask(IMM8 as u32, 6),
136            mask(IMM8 as u32, 7),
137            mask(IMM8 as u32, 8),
138            mask(IMM8 as u32, 9),
139            mask(IMM8 as u32, 10),
140            mask(IMM8 as u32, 11),
141            mask(IMM8 as u32, 12),
142            mask(IMM8 as u32, 13),
143            mask(IMM8 as u32, 14),
144            mask(IMM8 as u32, 15),
145        ],
146    );
147    transmute(r)
148}
149
150/// Horizontally adds the adjacent pairs of values contained in 2 packed
151/// 128-bit vectors of `[8 x i16]`.
152///
153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
154#[inline]
155#[target_feature(enable = "ssse3")]
156#[cfg_attr(test, assert_instr(phaddw))]
157#[stable(feature = "simd_x86", since = "1.27.0")]
158pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
159    transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
160}
161
162/// Horizontally adds the adjacent pairs of values contained in 2 packed
163/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
164/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
165///
166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
167#[inline]
168#[target_feature(enable = "ssse3")]
169#[cfg_attr(test, assert_instr(phaddsw))]
170#[stable(feature = "simd_x86", since = "1.27.0")]
171pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
172    transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
173}
174
175/// Horizontally adds the adjacent pairs of values contained in 2 packed
176/// 128-bit vectors of `[4 x i32]`.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
179#[inline]
180#[target_feature(enable = "ssse3")]
181#[cfg_attr(test, assert_instr(phaddd))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
184    transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
185}
186
187/// Horizontally subtract the adjacent pairs of values contained in 2
188/// packed 128-bit vectors of `[8 x i16]`.
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
191#[inline]
192#[target_feature(enable = "ssse3")]
193#[cfg_attr(test, assert_instr(phsubw))]
194#[stable(feature = "simd_x86", since = "1.27.0")]
195pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
196    transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
197}
198
199/// Horizontally subtract the adjacent pairs of values contained in 2
200/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
201/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
202/// saturated to 8000h.
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
205#[inline]
206#[target_feature(enable = "ssse3")]
207#[cfg_attr(test, assert_instr(phsubsw))]
208#[stable(feature = "simd_x86", since = "1.27.0")]
209pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
210    transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
211}
212
213/// Horizontally subtract the adjacent pairs of values contained in 2
214/// packed 128-bit vectors of `[4 x i32]`.
215///
216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
217#[inline]
218#[target_feature(enable = "ssse3")]
219#[cfg_attr(test, assert_instr(phsubd))]
220#[stable(feature = "simd_x86", since = "1.27.0")]
221pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
222    transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
223}
224
225/// Multiplies corresponding pairs of packed 8-bit unsigned integer
226/// values contained in the first source operand and packed 8-bit signed
227/// integer values contained in the second source operand, add pairs of
228/// contiguous products with signed saturation, and writes the 16-bit sums to
229/// the corresponding bits in the destination.
230///
231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
232#[inline]
233#[target_feature(enable = "ssse3")]
234#[cfg_attr(test, assert_instr(pmaddubsw))]
235#[stable(feature = "simd_x86", since = "1.27.0")]
236pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
237    transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
238}
239
240/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
241/// product to the 18 most significant bits by right-shifting, round the
242/// truncated value by adding 1, and write bits `[16:1]` to the destination.
243///
244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
245#[inline]
246#[target_feature(enable = "ssse3")]
247#[cfg_attr(test, assert_instr(pmulhrsw))]
248#[stable(feature = "simd_x86", since = "1.27.0")]
249pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
250    transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
251}
252
253/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
254/// integer in `b` is negative, and returns the result.
255/// Elements in result are zeroed out when the corresponding element in `b`
256/// is zero.
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
259#[inline]
260#[target_feature(enable = "ssse3")]
261#[cfg_attr(test, assert_instr(psignb))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
264    transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
265}
266
267/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
268/// integer in `b` is negative, and returns the results.
269/// Elements in result are zeroed out when the corresponding element in `b`
270/// is zero.
271///
272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
273#[inline]
274#[target_feature(enable = "ssse3")]
275#[cfg_attr(test, assert_instr(psignw))]
276#[stable(feature = "simd_x86", since = "1.27.0")]
277pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
278    transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
279}
280
281/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
282/// integer in `b` is negative, and returns the results.
283/// Element in result are zeroed out when the corresponding element in `b`
284/// is zero.
285///
286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
287#[inline]
288#[target_feature(enable = "ssse3")]
289#[cfg_attr(test, assert_instr(psignd))]
290#[stable(feature = "simd_x86", since = "1.27.0")]
291pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
292    transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
293}
294
295#[allow(improper_ctypes)]
296extern "C" {
297    #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
298    fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
299
300    #[link_name = "llvm.x86.ssse3.phadd.w.128"]
301    fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
302
303    #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
304    fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
305
306    #[link_name = "llvm.x86.ssse3.phadd.d.128"]
307    fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
308
309    #[link_name = "llvm.x86.ssse3.phsub.w.128"]
310    fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
311
312    #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
313    fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
314
315    #[link_name = "llvm.x86.ssse3.phsub.d.128"]
316    fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
317
318    #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
319    fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
320
321    #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
322    fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
323
324    #[link_name = "llvm.x86.ssse3.psign.b.128"]
325    fn psignb128(a: i8x16, b: i8x16) -> i8x16;
326
327    #[link_name = "llvm.x86.ssse3.psign.w.128"]
328    fn psignw128(a: i16x8, b: i16x8) -> i16x8;
329
330    #[link_name = "llvm.x86.ssse3.psign.d.128"]
331    fn psignd128(a: i32x4, b: i32x4) -> i32x4;
332}
333
334#[cfg(test)]
335mod tests {
336    use stdarch_test::simd_test;
337
338    use crate::core_arch::x86::*;
339
340    #[simd_test(enable = "ssse3")]
341    unsafe fn test_mm_abs_epi8() {
342        let r = _mm_abs_epi8(_mm_set1_epi8(-5));
343        assert_eq_m128i(r, _mm_set1_epi8(5));
344    }
345
346    #[simd_test(enable = "ssse3")]
347    unsafe fn test_mm_abs_epi16() {
348        let r = _mm_abs_epi16(_mm_set1_epi16(-5));
349        assert_eq_m128i(r, _mm_set1_epi16(5));
350    }
351
352    #[simd_test(enable = "ssse3")]
353    unsafe fn test_mm_abs_epi32() {
354        let r = _mm_abs_epi32(_mm_set1_epi32(-5));
355        assert_eq_m128i(r, _mm_set1_epi32(5));
356    }
357
358    #[simd_test(enable = "ssse3")]
359    unsafe fn test_mm_shuffle_epi8() {
360        #[rustfmt::skip]
361        let a = _mm_setr_epi8(
362            1, 2, 3, 4, 5, 6, 7, 8,
363            9, 10, 11, 12, 13, 14, 15, 16,
364        );
365        #[rustfmt::skip]
366        let b = _mm_setr_epi8(
367            4, 128_u8 as i8, 4, 3,
368            24, 12, 6, 19,
369            12, 5, 5, 10,
370            4, 1, 8, 0,
371        );
372        let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
373        let r = _mm_shuffle_epi8(a, b);
374        assert_eq_m128i(r, expected);
375
376        // Test indices greater than 15 wrapping around
377        let b = _mm_add_epi8(b, _mm_set1_epi8(32));
378        let r = _mm_shuffle_epi8(a, b);
379        assert_eq_m128i(r, expected);
380    }
381
382    #[simd_test(enable = "ssse3")]
383    unsafe fn test_mm_alignr_epi8() {
384        #[rustfmt::skip]
385        let a = _mm_setr_epi8(
386            1, 2, 3, 4, 5, 6, 7, 8,
387            9, 10, 11, 12, 13, 14, 15, 16,
388        );
389        #[rustfmt::skip]
390        let b = _mm_setr_epi8(
391            4, 63, 4, 3,
392            24, 12, 6, 19,
393            12, 5, 5, 10,
394            4, 1, 8, 0,
395        );
396        let r = _mm_alignr_epi8::<33>(a, b);
397        assert_eq_m128i(r, _mm_set1_epi8(0));
398
399        let r = _mm_alignr_epi8::<17>(a, b);
400        #[rustfmt::skip]
401        let expected = _mm_setr_epi8(
402            2, 3, 4, 5, 6, 7, 8, 9,
403            10, 11, 12, 13, 14, 15, 16, 0,
404        );
405        assert_eq_m128i(r, expected);
406
407        let r = _mm_alignr_epi8::<16>(a, b);
408        assert_eq_m128i(r, a);
409
410        let r = _mm_alignr_epi8::<15>(a, b);
411        #[rustfmt::skip]
412        let expected = _mm_setr_epi8(
413            0, 1, 2, 3, 4, 5, 6, 7,
414            8, 9, 10, 11, 12, 13, 14, 15,
415        );
416        assert_eq_m128i(r, expected);
417
418        let r = _mm_alignr_epi8::<0>(a, b);
419        assert_eq_m128i(r, b);
420    }
421
422    #[simd_test(enable = "ssse3")]
423    unsafe fn test_mm_hadd_epi16() {
424        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
425        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
426        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
427        let r = _mm_hadd_epi16(a, b);
428        assert_eq_m128i(r, expected);
429
430        // Test wrapping on overflow
431        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
432        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
433        let expected = _mm_setr_epi16(
434            i16::MIN,
435            i16::MIN + 1,
436            i16::MIN + 2,
437            i16::MIN + 3,
438            i16::MAX,
439            i16::MAX - 1,
440            i16::MAX - 2,
441            i16::MAX - 3,
442        );
443        let r = _mm_hadd_epi16(a, b);
444        assert_eq_m128i(r, expected);
445    }
446
447    #[simd_test(enable = "ssse3")]
448    unsafe fn test_mm_hadds_epi16() {
449        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
450        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
451        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
452        let r = _mm_hadds_epi16(a, b);
453        assert_eq_m128i(r, expected);
454
455        // Test saturating on overflow
456        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
457        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
458        let expected = _mm_setr_epi16(
459            i16::MAX,
460            i16::MAX,
461            i16::MAX,
462            i16::MAX,
463            i16::MIN,
464            i16::MIN,
465            i16::MIN,
466            i16::MIN,
467        );
468        let r = _mm_hadds_epi16(a, b);
469        assert_eq_m128i(r, expected);
470    }
471
472    #[simd_test(enable = "ssse3")]
473    unsafe fn test_mm_hadd_epi32() {
474        let a = _mm_setr_epi32(1, 2, 3, 4);
475        let b = _mm_setr_epi32(4, 128, 4, 3);
476        let expected = _mm_setr_epi32(3, 7, 132, 7);
477        let r = _mm_hadd_epi32(a, b);
478        assert_eq_m128i(r, expected);
479
480        // Test wrapping on overflow
481        let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
482        let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
483        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
484        let r = _mm_hadd_epi32(a, b);
485        assert_eq_m128i(r, expected);
486    }
487
488    #[simd_test(enable = "ssse3")]
489    unsafe fn test_mm_hsub_epi16() {
490        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
491        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
492        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
493        let r = _mm_hsub_epi16(a, b);
494        assert_eq_m128i(r, expected);
495
496        // Test wrapping on overflow
497        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
498        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
499        let expected = _mm_setr_epi16(
500            i16::MIN,
501            i16::MIN + 1,
502            i16::MIN + 2,
503            i16::MIN + 3,
504            i16::MAX,
505            i16::MAX - 1,
506            i16::MAX - 2,
507            i16::MAX - 3,
508        );
509        let r = _mm_hsub_epi16(a, b);
510        assert_eq_m128i(r, expected);
511    }
512
513    #[simd_test(enable = "ssse3")]
514    unsafe fn test_mm_hsubs_epi16() {
515        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
516        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
517        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
518        let r = _mm_hsubs_epi16(a, b);
519        assert_eq_m128i(r, expected);
520
521        // Test saturating on overflow
522        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
523        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
524        let expected = _mm_setr_epi16(
525            i16::MAX,
526            i16::MAX,
527            i16::MAX,
528            i16::MAX,
529            i16::MIN,
530            i16::MIN,
531            i16::MIN,
532            i16::MIN,
533        );
534        let r = _mm_hsubs_epi16(a, b);
535        assert_eq_m128i(r, expected);
536    }
537
538    #[simd_test(enable = "ssse3")]
539    unsafe fn test_mm_hsub_epi32() {
540        let a = _mm_setr_epi32(1, 2, 3, 4);
541        let b = _mm_setr_epi32(4, 128, 4, 3);
542        let expected = _mm_setr_epi32(-1, -1, -124, 1);
543        let r = _mm_hsub_epi32(a, b);
544        assert_eq_m128i(r, expected);
545
546        // Test wrapping on overflow
547        let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
548        let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
549        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
550        let r = _mm_hsub_epi32(a, b);
551        assert_eq_m128i(r, expected);
552    }
553
554    #[simd_test(enable = "ssse3")]
555    unsafe fn test_mm_maddubs_epi16() {
556        #[rustfmt::skip]
557        let a = _mm_setr_epi8(
558            1, 2, 3, 4, 5, 6, 7, 8,
559            9, 10, 11, 12, 13, 14, 15, 16,
560        );
561        #[rustfmt::skip]
562        let b = _mm_setr_epi8(
563            4, 63, 4, 3,
564            24, 12, 6, 19,
565            12, 5, 5, 10,
566            4, 1, 8, 0,
567        );
568        let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
569        let r = _mm_maddubs_epi16(a, b);
570        assert_eq_m128i(r, expected);
571
572        // Test widening and saturation
573        #[rustfmt::skip]
574        let a = _mm_setr_epi8(
575            u8::MAX as i8, u8::MAX as i8,
576            u8::MAX as i8, u8::MAX as i8,
577            u8::MAX as i8, u8::MAX as i8,
578            100, 100, 0, 0,
579            0, 0, 0, 0, 0, 0,
580        );
581        #[rustfmt::skip]
582        let b = _mm_setr_epi8(
583            i8::MAX, i8::MAX,
584            i8::MAX, i8::MIN,
585            i8::MIN, i8::MIN,
586            50, 15, 0, 0, 0,
587            0, 0, 0, 0, 0,
588        );
589        let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
590        let r = _mm_maddubs_epi16(a, b);
591        assert_eq_m128i(r, expected);
592    }
593
594    #[simd_test(enable = "ssse3")]
595    unsafe fn test_mm_mulhrs_epi16() {
596        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
597        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
598        let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
599        let r = _mm_mulhrs_epi16(a, b);
600        assert_eq_m128i(r, expected);
601
602        // Test extreme values
603        let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
604        let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
605        let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
606        let r = _mm_mulhrs_epi16(a, b);
607        assert_eq_m128i(r, expected);
608    }
609
610    #[simd_test(enable = "ssse3")]
611    unsafe fn test_mm_sign_epi8() {
612        #[rustfmt::skip]
613        let a = _mm_setr_epi8(
614            1, 2, 3, 4, 5, 6, 7, 8,
615            9, 10, 11, 12, 13, -14, -15, 16,
616        );
617        #[rustfmt::skip]
618        let b = _mm_setr_epi8(
619            4, 63, -4, 3, 24, 12, -6, -19,
620            12, 5, -5, 10, 4, 1, -8, 0,
621        );
622        #[rustfmt::skip]
623        let expected = _mm_setr_epi8(
624            1, 2, -3, 4, 5, 6, -7, -8,
625            9, 10, -11, 12, 13, -14, 15, 0,
626        );
627        let r = _mm_sign_epi8(a, b);
628        assert_eq_m128i(r, expected);
629    }
630
631    #[simd_test(enable = "ssse3")]
632    unsafe fn test_mm_sign_epi16() {
633        let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
634        let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
635        let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
636        let r = _mm_sign_epi16(a, b);
637        assert_eq_m128i(r, expected);
638    }
639
640    #[simd_test(enable = "ssse3")]
641    unsafe fn test_mm_sign_epi32() {
642        let a = _mm_setr_epi32(-1, 2, 3, 4);
643        let b = _mm_setr_epi32(1, -1, 1, 0);
644        let expected = _mm_setr_epi32(-1, -2, 3, 0);
645        let r = _mm_sign_epi32(a, b);
646        assert_eq_m128i(r, expected);
647    }
648}