1use crate::{
4 core_arch::{simd::*, x86::*},
5 intrinsics::simd::*,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
20 let a = a.as_i8x16();
21 let zero = i8x16::ZERO;
22 let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
23 transmute(r)
24}
25
26#[inline]
32#[target_feature(enable = "ssse3")]
33#[cfg_attr(test, assert_instr(pabsw))]
34#[stable(feature = "simd_x86", since = "1.27.0")]
35pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
36 let a = a.as_i16x8();
37 let zero = i16x8::ZERO;
38 let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
39 transmute(r)
40}
41
42#[inline]
48#[target_feature(enable = "ssse3")]
49#[cfg_attr(test, assert_instr(pabsd))]
50#[stable(feature = "simd_x86", since = "1.27.0")]
51pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
52 let a = a.as_i32x4();
53 let zero = i32x4::ZERO;
54 let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
55 transmute(r)
56}
57
58#[inline]
85#[target_feature(enable = "ssse3")]
86#[cfg_attr(test, assert_instr(pshufb))]
87#[stable(feature = "simd_x86", since = "1.27.0")]
88pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
89 transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
90}
91
92#[inline]
97#[target_feature(enable = "ssse3")]
98#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
99#[rustc_legacy_const_generics(2)]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub unsafe fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
102 static_assert_uimm_bits!(IMM8, 8);
103 if IMM8 > 32 {
106 return _mm_setzero_si128();
107 }
108 let (a, b) = if IMM8 > 16 {
111 (_mm_setzero_si128(), a)
112 } else {
113 (a, b)
114 };
115 const fn mask(shift: u32, i: u32) -> u32 {
116 if shift > 32 {
117 i
119 } else if shift > 16 {
120 shift - 16 + i
121 } else {
122 shift + i
123 }
124 }
125 let r: i8x16 = simd_shuffle!(
126 b.as_i8x16(),
127 a.as_i8x16(),
128 [
129 mask(IMM8 as u32, 0),
130 mask(IMM8 as u32, 1),
131 mask(IMM8 as u32, 2),
132 mask(IMM8 as u32, 3),
133 mask(IMM8 as u32, 4),
134 mask(IMM8 as u32, 5),
135 mask(IMM8 as u32, 6),
136 mask(IMM8 as u32, 7),
137 mask(IMM8 as u32, 8),
138 mask(IMM8 as u32, 9),
139 mask(IMM8 as u32, 10),
140 mask(IMM8 as u32, 11),
141 mask(IMM8 as u32, 12),
142 mask(IMM8 as u32, 13),
143 mask(IMM8 as u32, 14),
144 mask(IMM8 as u32, 15),
145 ],
146 );
147 transmute(r)
148}
149
150#[inline]
155#[target_feature(enable = "ssse3")]
156#[cfg_attr(test, assert_instr(phaddw))]
157#[stable(feature = "simd_x86", since = "1.27.0")]
158pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
159 transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
160}
161
162#[inline]
168#[target_feature(enable = "ssse3")]
169#[cfg_attr(test, assert_instr(phaddsw))]
170#[stable(feature = "simd_x86", since = "1.27.0")]
171pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
172 transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
173}
174
175#[inline]
180#[target_feature(enable = "ssse3")]
181#[cfg_attr(test, assert_instr(phaddd))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
184 transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
185}
186
187#[inline]
192#[target_feature(enable = "ssse3")]
193#[cfg_attr(test, assert_instr(phsubw))]
194#[stable(feature = "simd_x86", since = "1.27.0")]
195pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
196 transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
197}
198
199#[inline]
206#[target_feature(enable = "ssse3")]
207#[cfg_attr(test, assert_instr(phsubsw))]
208#[stable(feature = "simd_x86", since = "1.27.0")]
209pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
210 transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
211}
212
213#[inline]
218#[target_feature(enable = "ssse3")]
219#[cfg_attr(test, assert_instr(phsubd))]
220#[stable(feature = "simd_x86", since = "1.27.0")]
221pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
222 transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
223}
224
225#[inline]
233#[target_feature(enable = "ssse3")]
234#[cfg_attr(test, assert_instr(pmaddubsw))]
235#[stable(feature = "simd_x86", since = "1.27.0")]
236pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
237 transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
238}
239
240#[inline]
246#[target_feature(enable = "ssse3")]
247#[cfg_attr(test, assert_instr(pmulhrsw))]
248#[stable(feature = "simd_x86", since = "1.27.0")]
249pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
250 transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
251}
252
253#[inline]
260#[target_feature(enable = "ssse3")]
261#[cfg_attr(test, assert_instr(psignb))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
264 transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
265}
266
267#[inline]
274#[target_feature(enable = "ssse3")]
275#[cfg_attr(test, assert_instr(psignw))]
276#[stable(feature = "simd_x86", since = "1.27.0")]
277pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
278 transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
279}
280
281#[inline]
288#[target_feature(enable = "ssse3")]
289#[cfg_attr(test, assert_instr(psignd))]
290#[stable(feature = "simd_x86", since = "1.27.0")]
291pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
292 transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
293}
294
295#[allow(improper_ctypes)]
296extern "C" {
297 #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
298 fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
299
300 #[link_name = "llvm.x86.ssse3.phadd.w.128"]
301 fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
302
303 #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
304 fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
305
306 #[link_name = "llvm.x86.ssse3.phadd.d.128"]
307 fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
308
309 #[link_name = "llvm.x86.ssse3.phsub.w.128"]
310 fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
311
312 #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
313 fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
314
315 #[link_name = "llvm.x86.ssse3.phsub.d.128"]
316 fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
317
318 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
319 fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
320
321 #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
322 fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
323
324 #[link_name = "llvm.x86.ssse3.psign.b.128"]
325 fn psignb128(a: i8x16, b: i8x16) -> i8x16;
326
327 #[link_name = "llvm.x86.ssse3.psign.w.128"]
328 fn psignw128(a: i16x8, b: i16x8) -> i16x8;
329
330 #[link_name = "llvm.x86.ssse3.psign.d.128"]
331 fn psignd128(a: i32x4, b: i32x4) -> i32x4;
332}
333
334#[cfg(test)]
335mod tests {
336 use stdarch_test::simd_test;
337
338 use crate::core_arch::x86::*;
339
340 #[simd_test(enable = "ssse3")]
341 unsafe fn test_mm_abs_epi8() {
342 let r = _mm_abs_epi8(_mm_set1_epi8(-5));
343 assert_eq_m128i(r, _mm_set1_epi8(5));
344 }
345
346 #[simd_test(enable = "ssse3")]
347 unsafe fn test_mm_abs_epi16() {
348 let r = _mm_abs_epi16(_mm_set1_epi16(-5));
349 assert_eq_m128i(r, _mm_set1_epi16(5));
350 }
351
352 #[simd_test(enable = "ssse3")]
353 unsafe fn test_mm_abs_epi32() {
354 let r = _mm_abs_epi32(_mm_set1_epi32(-5));
355 assert_eq_m128i(r, _mm_set1_epi32(5));
356 }
357
358 #[simd_test(enable = "ssse3")]
359 unsafe fn test_mm_shuffle_epi8() {
360 #[rustfmt::skip]
361 let a = _mm_setr_epi8(
362 1, 2, 3, 4, 5, 6, 7, 8,
363 9, 10, 11, 12, 13, 14, 15, 16,
364 );
365 #[rustfmt::skip]
366 let b = _mm_setr_epi8(
367 4, 128_u8 as i8, 4, 3,
368 24, 12, 6, 19,
369 12, 5, 5, 10,
370 4, 1, 8, 0,
371 );
372 let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
373 let r = _mm_shuffle_epi8(a, b);
374 assert_eq_m128i(r, expected);
375
376 let b = _mm_add_epi8(b, _mm_set1_epi8(32));
378 let r = _mm_shuffle_epi8(a, b);
379 assert_eq_m128i(r, expected);
380 }
381
382 #[simd_test(enable = "ssse3")]
383 unsafe fn test_mm_alignr_epi8() {
384 #[rustfmt::skip]
385 let a = _mm_setr_epi8(
386 1, 2, 3, 4, 5, 6, 7, 8,
387 9, 10, 11, 12, 13, 14, 15, 16,
388 );
389 #[rustfmt::skip]
390 let b = _mm_setr_epi8(
391 4, 63, 4, 3,
392 24, 12, 6, 19,
393 12, 5, 5, 10,
394 4, 1, 8, 0,
395 );
396 let r = _mm_alignr_epi8::<33>(a, b);
397 assert_eq_m128i(r, _mm_set1_epi8(0));
398
399 let r = _mm_alignr_epi8::<17>(a, b);
400 #[rustfmt::skip]
401 let expected = _mm_setr_epi8(
402 2, 3, 4, 5, 6, 7, 8, 9,
403 10, 11, 12, 13, 14, 15, 16, 0,
404 );
405 assert_eq_m128i(r, expected);
406
407 let r = _mm_alignr_epi8::<16>(a, b);
408 assert_eq_m128i(r, a);
409
410 let r = _mm_alignr_epi8::<15>(a, b);
411 #[rustfmt::skip]
412 let expected = _mm_setr_epi8(
413 0, 1, 2, 3, 4, 5, 6, 7,
414 8, 9, 10, 11, 12, 13, 14, 15,
415 );
416 assert_eq_m128i(r, expected);
417
418 let r = _mm_alignr_epi8::<0>(a, b);
419 assert_eq_m128i(r, b);
420 }
421
422 #[simd_test(enable = "ssse3")]
423 unsafe fn test_mm_hadd_epi16() {
424 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
425 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
426 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
427 let r = _mm_hadd_epi16(a, b);
428 assert_eq_m128i(r, expected);
429
430 let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
432 let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
433 let expected = _mm_setr_epi16(
434 i16::MIN,
435 i16::MIN + 1,
436 i16::MIN + 2,
437 i16::MIN + 3,
438 i16::MAX,
439 i16::MAX - 1,
440 i16::MAX - 2,
441 i16::MAX - 3,
442 );
443 let r = _mm_hadd_epi16(a, b);
444 assert_eq_m128i(r, expected);
445 }
446
447 #[simd_test(enable = "ssse3")]
448 unsafe fn test_mm_hadds_epi16() {
449 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
450 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
451 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
452 let r = _mm_hadds_epi16(a, b);
453 assert_eq_m128i(r, expected);
454
455 let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
457 let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
458 let expected = _mm_setr_epi16(
459 i16::MAX,
460 i16::MAX,
461 i16::MAX,
462 i16::MAX,
463 i16::MIN,
464 i16::MIN,
465 i16::MIN,
466 i16::MIN,
467 );
468 let r = _mm_hadds_epi16(a, b);
469 assert_eq_m128i(r, expected);
470 }
471
472 #[simd_test(enable = "ssse3")]
473 unsafe fn test_mm_hadd_epi32() {
474 let a = _mm_setr_epi32(1, 2, 3, 4);
475 let b = _mm_setr_epi32(4, 128, 4, 3);
476 let expected = _mm_setr_epi32(3, 7, 132, 7);
477 let r = _mm_hadd_epi32(a, b);
478 assert_eq_m128i(r, expected);
479
480 let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
482 let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
483 let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
484 let r = _mm_hadd_epi32(a, b);
485 assert_eq_m128i(r, expected);
486 }
487
488 #[simd_test(enable = "ssse3")]
489 unsafe fn test_mm_hsub_epi16() {
490 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
491 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
492 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
493 let r = _mm_hsub_epi16(a, b);
494 assert_eq_m128i(r, expected);
495
496 let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
498 let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
499 let expected = _mm_setr_epi16(
500 i16::MIN,
501 i16::MIN + 1,
502 i16::MIN + 2,
503 i16::MIN + 3,
504 i16::MAX,
505 i16::MAX - 1,
506 i16::MAX - 2,
507 i16::MAX - 3,
508 );
509 let r = _mm_hsub_epi16(a, b);
510 assert_eq_m128i(r, expected);
511 }
512
513 #[simd_test(enable = "ssse3")]
514 unsafe fn test_mm_hsubs_epi16() {
515 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
516 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
517 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
518 let r = _mm_hsubs_epi16(a, b);
519 assert_eq_m128i(r, expected);
520
521 let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
523 let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
524 let expected = _mm_setr_epi16(
525 i16::MAX,
526 i16::MAX,
527 i16::MAX,
528 i16::MAX,
529 i16::MIN,
530 i16::MIN,
531 i16::MIN,
532 i16::MIN,
533 );
534 let r = _mm_hsubs_epi16(a, b);
535 assert_eq_m128i(r, expected);
536 }
537
538 #[simd_test(enable = "ssse3")]
539 unsafe fn test_mm_hsub_epi32() {
540 let a = _mm_setr_epi32(1, 2, 3, 4);
541 let b = _mm_setr_epi32(4, 128, 4, 3);
542 let expected = _mm_setr_epi32(-1, -1, -124, 1);
543 let r = _mm_hsub_epi32(a, b);
544 assert_eq_m128i(r, expected);
545
546 let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
548 let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
549 let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
550 let r = _mm_hsub_epi32(a, b);
551 assert_eq_m128i(r, expected);
552 }
553
554 #[simd_test(enable = "ssse3")]
555 unsafe fn test_mm_maddubs_epi16() {
556 #[rustfmt::skip]
557 let a = _mm_setr_epi8(
558 1, 2, 3, 4, 5, 6, 7, 8,
559 9, 10, 11, 12, 13, 14, 15, 16,
560 );
561 #[rustfmt::skip]
562 let b = _mm_setr_epi8(
563 4, 63, 4, 3,
564 24, 12, 6, 19,
565 12, 5, 5, 10,
566 4, 1, 8, 0,
567 );
568 let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
569 let r = _mm_maddubs_epi16(a, b);
570 assert_eq_m128i(r, expected);
571
572 #[rustfmt::skip]
574 let a = _mm_setr_epi8(
575 u8::MAX as i8, u8::MAX as i8,
576 u8::MAX as i8, u8::MAX as i8,
577 u8::MAX as i8, u8::MAX as i8,
578 100, 100, 0, 0,
579 0, 0, 0, 0, 0, 0,
580 );
581 #[rustfmt::skip]
582 let b = _mm_setr_epi8(
583 i8::MAX, i8::MAX,
584 i8::MAX, i8::MIN,
585 i8::MIN, i8::MIN,
586 50, 15, 0, 0, 0,
587 0, 0, 0, 0, 0,
588 );
589 let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
590 let r = _mm_maddubs_epi16(a, b);
591 assert_eq_m128i(r, expected);
592 }
593
594 #[simd_test(enable = "ssse3")]
595 unsafe fn test_mm_mulhrs_epi16() {
596 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
597 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
598 let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
599 let r = _mm_mulhrs_epi16(a, b);
600 assert_eq_m128i(r, expected);
601
602 let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
604 let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
605 let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
606 let r = _mm_mulhrs_epi16(a, b);
607 assert_eq_m128i(r, expected);
608 }
609
610 #[simd_test(enable = "ssse3")]
611 unsafe fn test_mm_sign_epi8() {
612 #[rustfmt::skip]
613 let a = _mm_setr_epi8(
614 1, 2, 3, 4, 5, 6, 7, 8,
615 9, 10, 11, 12, 13, -14, -15, 16,
616 );
617 #[rustfmt::skip]
618 let b = _mm_setr_epi8(
619 4, 63, -4, 3, 24, 12, -6, -19,
620 12, 5, -5, 10, 4, 1, -8, 0,
621 );
622 #[rustfmt::skip]
623 let expected = _mm_setr_epi8(
624 1, 2, -3, 4, 5, 6, -7, -8,
625 9, 10, -11, 12, 13, -14, 15, 0,
626 );
627 let r = _mm_sign_epi8(a, b);
628 assert_eq_m128i(r, expected);
629 }
630
631 #[simd_test(enable = "ssse3")]
632 unsafe fn test_mm_sign_epi16() {
633 let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
634 let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
635 let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
636 let r = _mm_sign_epi16(a, b);
637 assert_eq_m128i(r, expected);
638 }
639
640 #[simd_test(enable = "ssse3")]
641 unsafe fn test_mm_sign_epi32() {
642 let a = _mm_setr_epi32(-1, 2, 3, 4);
643 let b = _mm_setr_epi32(1, -1, 1, 0);
644 let expected = _mm_setr_epi32(-1, -2, 3, 0);
645 let r = _mm_sign_epi32(a, b);
646 assert_eq_m128i(r, expected);
647 }
648}