1use crate::{
4 core_arch::{simd::*, x86::*},
5 intrinsics::simd::*,
6 intrinsics::sqrtf32,
7 mem, ptr,
8};
9
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13#[inline]
18#[target_feature(enable = "sse")]
19#[cfg_attr(test, assert_instr(addss))]
20#[stable(feature = "simd_x86", since = "1.27.0")]
21#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22pub const fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
23 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) + _mm_cvtss_f32(b)) }
24}
25
26#[inline]
31#[target_feature(enable = "sse")]
32#[cfg_attr(test, assert_instr(addps))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35pub const fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
36 unsafe { simd_add(a, b) }
37}
38
39#[inline]
44#[target_feature(enable = "sse")]
45#[cfg_attr(test, assert_instr(subss))]
46#[stable(feature = "simd_x86", since = "1.27.0")]
47#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
48pub const fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
49 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) - _mm_cvtss_f32(b)) }
50}
51
52#[inline]
57#[target_feature(enable = "sse")]
58#[cfg_attr(test, assert_instr(subps))]
59#[stable(feature = "simd_x86", since = "1.27.0")]
60#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
61pub const fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
62 unsafe { simd_sub(a, b) }
63}
64
65#[inline]
70#[target_feature(enable = "sse")]
71#[cfg_attr(test, assert_instr(mulss))]
72#[stable(feature = "simd_x86", since = "1.27.0")]
73#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
74pub const fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
75 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) * _mm_cvtss_f32(b)) }
76}
77
78#[inline]
83#[target_feature(enable = "sse")]
84#[cfg_attr(test, assert_instr(mulps))]
85#[stable(feature = "simd_x86", since = "1.27.0")]
86#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
87pub const fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
88 unsafe { simd_mul(a, b) }
89}
90
91#[inline]
96#[target_feature(enable = "sse")]
97#[cfg_attr(test, assert_instr(divss))]
98#[stable(feature = "simd_x86", since = "1.27.0")]
99#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
100pub const fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
101 unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) / _mm_cvtss_f32(b)) }
102}
103
104#[inline]
109#[target_feature(enable = "sse")]
110#[cfg_attr(test, assert_instr(divps))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
113pub const fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
114 unsafe { simd_div(a, b) }
115}
116
117#[inline]
122#[target_feature(enable = "sse")]
123#[cfg_attr(test, assert_instr(sqrtss))]
124#[stable(feature = "simd_x86", since = "1.27.0")]
125pub fn _mm_sqrt_ss(a: __m128) -> __m128 {
126 unsafe { simd_insert!(a, 0, sqrtf32(_mm_cvtss_f32(a))) }
127}
128
129#[inline]
134#[target_feature(enable = "sse")]
135#[cfg_attr(test, assert_instr(sqrtps))]
136#[stable(feature = "simd_x86", since = "1.27.0")]
137pub fn _mm_sqrt_ps(a: __m128) -> __m128 {
138 unsafe { simd_fsqrt(a) }
139}
140
141#[inline]
146#[target_feature(enable = "sse")]
147#[cfg_attr(test, assert_instr(rcpss))]
148#[stable(feature = "simd_x86", since = "1.27.0")]
149pub fn _mm_rcp_ss(a: __m128) -> __m128 {
150 unsafe { rcpss(a) }
151}
152
153#[inline]
158#[target_feature(enable = "sse")]
159#[cfg_attr(test, assert_instr(rcpps))]
160#[stable(feature = "simd_x86", since = "1.27.0")]
161pub fn _mm_rcp_ps(a: __m128) -> __m128 {
162 unsafe { rcpps(a) }
163}
164
165#[inline]
170#[target_feature(enable = "sse")]
171#[cfg_attr(test, assert_instr(rsqrtss))]
172#[stable(feature = "simd_x86", since = "1.27.0")]
173pub fn _mm_rsqrt_ss(a: __m128) -> __m128 {
174 unsafe { rsqrtss(a) }
175}
176
177#[inline]
182#[target_feature(enable = "sse")]
183#[cfg_attr(test, assert_instr(rsqrtps))]
184#[stable(feature = "simd_x86", since = "1.27.0")]
185pub fn _mm_rsqrt_ps(a: __m128) -> __m128 {
186 unsafe { rsqrtps(a) }
187}
188
189#[inline]
195#[target_feature(enable = "sse")]
196#[cfg_attr(test, assert_instr(minss))]
197#[stable(feature = "simd_x86", since = "1.27.0")]
198pub fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
199 unsafe { minss(a, b) }
200}
201
202#[inline]
207#[target_feature(enable = "sse")]
208#[cfg_attr(test, assert_instr(minps))]
209#[stable(feature = "simd_x86", since = "1.27.0")]
210pub fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
211 unsafe { minps(a, b) }
213}
214
215#[inline]
221#[target_feature(enable = "sse")]
222#[cfg_attr(test, assert_instr(maxss))]
223#[stable(feature = "simd_x86", since = "1.27.0")]
224pub fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
225 unsafe { maxss(a, b) }
226}
227
228#[inline]
233#[target_feature(enable = "sse")]
234#[cfg_attr(test, assert_instr(maxps))]
235#[stable(feature = "simd_x86", since = "1.27.0")]
236pub fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
237 unsafe { maxps(a, b) }
239}
240
241#[inline]
245#[target_feature(enable = "sse")]
246#[cfg_attr(
248 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
249 assert_instr(andps)
250)]
251#[stable(feature = "simd_x86", since = "1.27.0")]
252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
253pub const fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
254 unsafe {
255 let a: __m128i = mem::transmute(a);
256 let b: __m128i = mem::transmute(b);
257 mem::transmute(simd_and(a, b))
258 }
259}
260
261#[inline]
268#[target_feature(enable = "sse")]
269#[cfg_attr(
272 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
273 assert_instr(andnps)
274)]
275#[stable(feature = "simd_x86", since = "1.27.0")]
276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
277pub const fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
278 unsafe {
279 let a: __m128i = mem::transmute(a);
280 let b: __m128i = mem::transmute(b);
281 let mask: __m128i = mem::transmute(i32x4::splat(-1));
282 mem::transmute(simd_and(simd_xor(mask, a), b))
283 }
284}
285
286#[inline]
290#[target_feature(enable = "sse")]
291#[cfg_attr(
293 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
294 assert_instr(orps)
295)]
296#[stable(feature = "simd_x86", since = "1.27.0")]
297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
298pub const fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
299 unsafe {
300 let a: __m128i = mem::transmute(a);
301 let b: __m128i = mem::transmute(b);
302 mem::transmute(simd_or(a, b))
303 }
304}
305
306#[inline]
311#[target_feature(enable = "sse")]
312#[cfg_attr(
314 all(test, any(target_arch = "x86_64", target_feature = "sse2")),
315 assert_instr(xorps)
316)]
317#[stable(feature = "simd_x86", since = "1.27.0")]
318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
319pub const fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
320 unsafe {
321 let a: __m128i = mem::transmute(a);
322 let b: __m128i = mem::transmute(b);
323 mem::transmute(simd_xor(a, b))
324 }
325}
326
327#[inline]
333#[target_feature(enable = "sse")]
334#[cfg_attr(test, assert_instr(cmpeqss))]
335#[stable(feature = "simd_x86", since = "1.27.0")]
336pub fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 {
337 unsafe { cmpss(a, b, 0) }
338}
339
340#[inline]
347#[target_feature(enable = "sse")]
348#[cfg_attr(test, assert_instr(cmpltss))]
349#[stable(feature = "simd_x86", since = "1.27.0")]
350pub fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 {
351 unsafe { cmpss(a, b, 1) }
352}
353
354#[inline]
361#[target_feature(enable = "sse")]
362#[cfg_attr(test, assert_instr(cmpless))]
363#[stable(feature = "simd_x86", since = "1.27.0")]
364pub fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 {
365 unsafe { cmpss(a, b, 2) }
366}
367
368#[inline]
375#[target_feature(enable = "sse")]
376#[cfg_attr(test, assert_instr(cmpltss))]
377#[stable(feature = "simd_x86", since = "1.27.0")]
378pub fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
379 unsafe { simd_shuffle!(a, cmpss(b, a, 1), [4, 1, 2, 3]) }
380}
381
382#[inline]
389#[target_feature(enable = "sse")]
390#[cfg_attr(test, assert_instr(cmpless))]
391#[stable(feature = "simd_x86", since = "1.27.0")]
392pub fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 {
393 unsafe { simd_shuffle!(a, cmpss(b, a, 2), [4, 1, 2, 3]) }
394}
395
396#[inline]
403#[target_feature(enable = "sse")]
404#[cfg_attr(test, assert_instr(cmpneqss))]
405#[stable(feature = "simd_x86", since = "1.27.0")]
406pub fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 {
407 unsafe { cmpss(a, b, 4) }
408}
409
410#[inline]
417#[target_feature(enable = "sse")]
418#[cfg_attr(test, assert_instr(cmpnltss))]
419#[stable(feature = "simd_x86", since = "1.27.0")]
420pub fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 {
421 unsafe { cmpss(a, b, 5) }
422}
423
424#[inline]
431#[target_feature(enable = "sse")]
432#[cfg_attr(test, assert_instr(cmpnless))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434pub fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 {
435 unsafe { cmpss(a, b, 6) }
436}
437
438#[inline]
445#[target_feature(enable = "sse")]
446#[cfg_attr(test, assert_instr(cmpnltss))]
447#[stable(feature = "simd_x86", since = "1.27.0")]
448pub fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
449 unsafe { simd_shuffle!(a, cmpss(b, a, 5), [4, 1, 2, 3]) }
450}
451
452#[inline]
459#[target_feature(enable = "sse")]
460#[cfg_attr(test, assert_instr(cmpnless))]
461#[stable(feature = "simd_x86", since = "1.27.0")]
462pub fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 {
463 unsafe { simd_shuffle!(a, cmpss(b, a, 6), [4, 1, 2, 3]) }
464}
465
466#[inline]
473#[target_feature(enable = "sse")]
474#[cfg_attr(test, assert_instr(cmpordss))]
475#[stable(feature = "simd_x86", since = "1.27.0")]
476pub fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 {
477 unsafe { cmpss(a, b, 7) }
478}
479
480#[inline]
487#[target_feature(enable = "sse")]
488#[cfg_attr(test, assert_instr(cmpunordss))]
489#[stable(feature = "simd_x86", since = "1.27.0")]
490pub fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 {
491 unsafe { cmpss(a, b, 3) }
492}
493
494#[inline]
500#[target_feature(enable = "sse")]
501#[cfg_attr(test, assert_instr(cmpeqps))]
502#[stable(feature = "simd_x86", since = "1.27.0")]
503pub fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 {
504 unsafe { cmpps(a, b, 0) }
505}
506
507#[inline]
513#[target_feature(enable = "sse")]
514#[cfg_attr(test, assert_instr(cmpltps))]
515#[stable(feature = "simd_x86", since = "1.27.0")]
516pub fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 {
517 unsafe { cmpps(a, b, 1) }
518}
519
520#[inline]
527#[target_feature(enable = "sse")]
528#[cfg_attr(test, assert_instr(cmpleps))]
529#[stable(feature = "simd_x86", since = "1.27.0")]
530pub fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 {
531 unsafe { cmpps(a, b, 2) }
532}
533
534#[inline]
540#[target_feature(enable = "sse")]
541#[cfg_attr(test, assert_instr(cmpltps))]
542#[stable(feature = "simd_x86", since = "1.27.0")]
543pub fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 {
544 unsafe { cmpps(b, a, 1) }
545}
546
547#[inline]
554#[target_feature(enable = "sse")]
555#[cfg_attr(test, assert_instr(cmpleps))]
556#[stable(feature = "simd_x86", since = "1.27.0")]
557pub fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 {
558 unsafe { cmpps(b, a, 2) }
559}
560
561#[inline]
567#[target_feature(enable = "sse")]
568#[cfg_attr(test, assert_instr(cmpneqps))]
569#[stable(feature = "simd_x86", since = "1.27.0")]
570pub fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 {
571 unsafe { cmpps(a, b, 4) }
572}
573
574#[inline]
581#[target_feature(enable = "sse")]
582#[cfg_attr(test, assert_instr(cmpnltps))]
583#[stable(feature = "simd_x86", since = "1.27.0")]
584pub fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 {
585 unsafe { cmpps(a, b, 5) }
586}
587
588#[inline]
595#[target_feature(enable = "sse")]
596#[cfg_attr(test, assert_instr(cmpnleps))]
597#[stable(feature = "simd_x86", since = "1.27.0")]
598pub fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 {
599 unsafe { cmpps(a, b, 6) }
600}
601
602#[inline]
609#[target_feature(enable = "sse")]
610#[cfg_attr(test, assert_instr(cmpnltps))]
611#[stable(feature = "simd_x86", since = "1.27.0")]
612pub fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 {
613 unsafe { cmpps(b, a, 5) }
614}
615
616#[inline]
623#[target_feature(enable = "sse")]
624#[cfg_attr(test, assert_instr(cmpnleps))]
625#[stable(feature = "simd_x86", since = "1.27.0")]
626pub fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 {
627 unsafe { cmpps(b, a, 6) }
628}
629
630#[inline]
637#[target_feature(enable = "sse")]
638#[cfg_attr(test, assert_instr(cmpordps))]
639#[stable(feature = "simd_x86", since = "1.27.0")]
640pub fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 {
641 unsafe { cmpps(b, a, 7) }
642}
643
644#[inline]
651#[target_feature(enable = "sse")]
652#[cfg_attr(test, assert_instr(cmpunordps))]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 {
655 unsafe { cmpps(b, a, 3) }
656}
657
658#[inline]
663#[target_feature(enable = "sse")]
664#[cfg_attr(test, assert_instr(comiss))]
665#[stable(feature = "simd_x86", since = "1.27.0")]
666pub fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 {
667 unsafe { comieq_ss(a, b) }
668}
669
670#[inline]
675#[target_feature(enable = "sse")]
676#[cfg_attr(test, assert_instr(comiss))]
677#[stable(feature = "simd_x86", since = "1.27.0")]
678pub fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 {
679 unsafe { comilt_ss(a, b) }
680}
681
682#[inline]
688#[target_feature(enable = "sse")]
689#[cfg_attr(test, assert_instr(comiss))]
690#[stable(feature = "simd_x86", since = "1.27.0")]
691pub fn _mm_comile_ss(a: __m128, b: __m128) -> i32 {
692 unsafe { comile_ss(a, b) }
693}
694
695#[inline]
701#[target_feature(enable = "sse")]
702#[cfg_attr(test, assert_instr(comiss))]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704pub fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 {
705 unsafe { comigt_ss(a, b) }
706}
707
708#[inline]
714#[target_feature(enable = "sse")]
715#[cfg_attr(test, assert_instr(comiss))]
716#[stable(feature = "simd_x86", since = "1.27.0")]
717pub fn _mm_comige_ss(a: __m128, b: __m128) -> i32 {
718 unsafe { comige_ss(a, b) }
719}
720
721#[inline]
726#[target_feature(enable = "sse")]
727#[cfg_attr(test, assert_instr(comiss))]
728#[stable(feature = "simd_x86", since = "1.27.0")]
729pub fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 {
730 unsafe { comineq_ss(a, b) }
731}
732
733#[inline]
739#[target_feature(enable = "sse")]
740#[cfg_attr(test, assert_instr(ucomiss))]
741#[stable(feature = "simd_x86", since = "1.27.0")]
742pub fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 {
743 unsafe { ucomieq_ss(a, b) }
744}
745
746#[inline]
753#[target_feature(enable = "sse")]
754#[cfg_attr(test, assert_instr(ucomiss))]
755#[stable(feature = "simd_x86", since = "1.27.0")]
756pub fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 {
757 unsafe { ucomilt_ss(a, b) }
758}
759
760#[inline]
767#[target_feature(enable = "sse")]
768#[cfg_attr(test, assert_instr(ucomiss))]
769#[stable(feature = "simd_x86", since = "1.27.0")]
770pub fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 {
771 unsafe { ucomile_ss(a, b) }
772}
773
774#[inline]
781#[target_feature(enable = "sse")]
782#[cfg_attr(test, assert_instr(ucomiss))]
783#[stable(feature = "simd_x86", since = "1.27.0")]
784pub fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 {
785 unsafe { ucomigt_ss(a, b) }
786}
787
788#[inline]
795#[target_feature(enable = "sse")]
796#[cfg_attr(test, assert_instr(ucomiss))]
797#[stable(feature = "simd_x86", since = "1.27.0")]
798pub fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 {
799 unsafe { ucomige_ss(a, b) }
800}
801
802#[inline]
808#[target_feature(enable = "sse")]
809#[cfg_attr(test, assert_instr(ucomiss))]
810#[stable(feature = "simd_x86", since = "1.27.0")]
811pub fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 {
812 unsafe { ucomineq_ss(a, b) }
813}
814
815#[inline]
825#[target_feature(enable = "sse")]
826#[cfg_attr(test, assert_instr(cvtss2si))]
827#[stable(feature = "simd_x86", since = "1.27.0")]
828pub fn _mm_cvtss_si32(a: __m128) -> i32 {
829 unsafe { cvtss2si(a) }
830}
831
832#[inline]
836#[target_feature(enable = "sse")]
837#[cfg_attr(test, assert_instr(cvtss2si))]
838#[stable(feature = "simd_x86", since = "1.27.0")]
839pub fn _mm_cvt_ss2si(a: __m128) -> i32 {
840 _mm_cvtss_si32(a)
841}
842
843#[inline]
855#[target_feature(enable = "sse")]
856#[cfg_attr(test, assert_instr(cvttss2si))]
857#[stable(feature = "simd_x86", since = "1.27.0")]
858pub fn _mm_cvttss_si32(a: __m128) -> i32 {
859 unsafe { cvttss2si(a) }
860}
861
862#[inline]
866#[target_feature(enable = "sse")]
867#[cfg_attr(test, assert_instr(cvttss2si))]
868#[stable(feature = "simd_x86", since = "1.27.0")]
869pub fn _mm_cvtt_ss2si(a: __m128) -> i32 {
870 _mm_cvttss_si32(a)
871}
872
873#[inline]
877#[target_feature(enable = "sse")]
878#[stable(feature = "simd_x86", since = "1.27.0")]
881#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
882pub const fn _mm_cvtss_f32(a: __m128) -> f32 {
883 unsafe { simd_extract!(a, 0) }
884}
885
886#[inline]
894#[target_feature(enable = "sse")]
895#[cfg_attr(test, assert_instr(cvtsi2ss))]
896#[stable(feature = "simd_x86", since = "1.27.0")]
897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
898pub const fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 {
899 unsafe { simd_insert!(a, 0, b as f32) }
900}
901
902#[inline]
906#[target_feature(enable = "sse")]
907#[cfg_attr(test, assert_instr(cvtsi2ss))]
908#[stable(feature = "simd_x86", since = "1.27.0")]
909pub fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 {
910 _mm_cvtsi32_ss(a, b)
911}
912
913#[inline]
918#[target_feature(enable = "sse")]
919#[cfg_attr(test, assert_instr(movss))]
920#[stable(feature = "simd_x86", since = "1.27.0")]
921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
922pub const fn _mm_set_ss(a: f32) -> __m128 {
923 __m128([a, 0.0, 0.0, 0.0])
924}
925
926#[inline]
930#[target_feature(enable = "sse")]
931#[cfg_attr(test, assert_instr(shufps))]
932#[stable(feature = "simd_x86", since = "1.27.0")]
933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
934pub const fn _mm_set1_ps(a: f32) -> __m128 {
935 f32x4::splat(a).as_m128()
936}
937
938#[inline]
942#[target_feature(enable = "sse")]
943#[cfg_attr(test, assert_instr(shufps))]
944#[stable(feature = "simd_x86", since = "1.27.0")]
945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
946pub const fn _mm_set_ps1(a: f32) -> __m128 {
947 _mm_set1_ps(a)
948}
949
950#[inline]
970#[target_feature(enable = "sse")]
971#[cfg_attr(test, assert_instr(unpcklps))]
972#[stable(feature = "simd_x86", since = "1.27.0")]
973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
974pub const fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
975 __m128([d, c, b, a])
976}
977
978#[inline]
989#[target_feature(enable = "sse")]
990#[cfg_attr(
991 all(test, any(target_env = "msvc", target_arch = "x86_64")),
992 assert_instr(unpcklps)
993)]
994#[cfg_attr(
996 all(test, all(not(target_env = "msvc"), target_arch = "x86")),
997 assert_instr(movaps)
998)]
999#[stable(feature = "simd_x86", since = "1.27.0")]
1000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1001pub const fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
1002 __m128([a, b, c, d])
1003}
1004
1005#[inline]
1009#[target_feature(enable = "sse")]
1010#[cfg_attr(test, assert_instr(xorps))]
1011#[stable(feature = "simd_x86", since = "1.27.0")]
1012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1013pub const fn _mm_setzero_ps() -> __m128 {
1014 const { unsafe { mem::zeroed() } }
1015}
1016
1017#[inline]
1020#[allow(non_snake_case)]
1021#[unstable(feature = "stdarch_x86_mm_shuffle", issue = "111147")]
1022pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
1023 ((z << 6) | (y << 4) | (x << 2) | w) as i32
1024}
1025
1026#[inline]
1040#[target_feature(enable = "sse")]
1041#[cfg_attr(test, assert_instr(shufps, MASK = 3))]
1042#[rustc_legacy_const_generics(2)]
1043#[stable(feature = "simd_x86", since = "1.27.0")]
1044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1045pub const fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
1046 static_assert_uimm_bits!(MASK, 8);
1047 unsafe {
1048 simd_shuffle!(
1049 a,
1050 b,
1051 [
1052 MASK as u32 & 0b11,
1053 (MASK as u32 >> 2) & 0b11,
1054 ((MASK as u32 >> 4) & 0b11) + 4,
1055 ((MASK as u32 >> 6) & 0b11) + 4,
1056 ],
1057 )
1058 }
1059}
1060
1061#[inline]
1066#[target_feature(enable = "sse")]
1067#[cfg_attr(test, assert_instr(unpckhps))]
1068#[stable(feature = "simd_x86", since = "1.27.0")]
1069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1070pub const fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
1071 unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
1072}
1073
1074#[inline]
1079#[target_feature(enable = "sse")]
1080#[cfg_attr(test, assert_instr(unpcklps))]
1081#[stable(feature = "simd_x86", since = "1.27.0")]
1082#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1083pub const fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
1084 unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
1085}
1086
1087#[inline]
1092#[target_feature(enable = "sse")]
1093#[cfg_attr(test, assert_instr(movhlps))]
1094#[stable(feature = "simd_x86", since = "1.27.0")]
1095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1096pub const fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
1097 unsafe { simd_shuffle!(a, b, [6, 7, 2, 3]) }
1099}
1100
1101#[inline]
1106#[target_feature(enable = "sse")]
1107#[cfg_attr(test, assert_instr(movlhps))]
1108#[stable(feature = "simd_x86", since = "1.27.0")]
1109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1110pub const fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
1111 unsafe { simd_shuffle!(a, b, [0, 1, 4, 5]) }
1112}
1113
1114#[inline]
1121#[target_feature(enable = "sse")]
1122#[cfg_attr(test, assert_instr(movmskps))]
1123#[stable(feature = "simd_x86", since = "1.27.0")]
1124#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1125pub const fn _mm_movemask_ps(a: __m128) -> i32 {
1126 unsafe {
1129 let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO);
1130 simd_bitmask::<i32x4, u8>(mask) as i32
1131 }
1132}
1133
1134#[inline]
1141#[target_feature(enable = "sse")]
1142#[cfg_attr(test, assert_instr(movss))]
1143#[stable(feature = "simd_x86", since = "1.27.0")]
1144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1145pub const unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
1146 __m128([*p, 0.0, 0.0, 0.0])
1147}
1148
1149#[inline]
1157#[target_feature(enable = "sse")]
1158#[cfg_attr(test, assert_instr(movss))]
1159#[stable(feature = "simd_x86", since = "1.27.0")]
1160#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1161pub const unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
1162 let a = *p;
1163 __m128([a, a, a, a])
1164}
1165
1166#[inline]
1170#[target_feature(enable = "sse")]
1171#[cfg_attr(test, assert_instr(movss))]
1172#[stable(feature = "simd_x86", since = "1.27.0")]
1173#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1174pub const unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
1175 _mm_load1_ps(p)
1176}
1177
1178#[inline]
1189#[target_feature(enable = "sse")]
1190#[cfg_attr(
1193 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1194 assert_instr(movaps)
1195)]
1196#[stable(feature = "simd_x86", since = "1.27.0")]
1197#[allow(clippy::cast_ptr_alignment)]
1198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1199pub const unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
1200 *(p as *const __m128)
1201}
1202
1203#[inline]
1213#[target_feature(enable = "sse")]
1214#[cfg_attr(test, assert_instr(movups))]
1215#[stable(feature = "simd_x86", since = "1.27.0")]
1216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1217pub const unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
1218 let mut dst = _mm_undefined_ps();
1221 ptr::copy_nonoverlapping(
1222 p as *const u8,
1223 ptr::addr_of_mut!(dst) as *mut u8,
1224 mem::size_of::<__m128>(),
1225 );
1226 dst
1227}
1228
1229#[inline]
1251#[target_feature(enable = "sse")]
1252#[cfg_attr(
1253 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1254 assert_instr(movaps)
1255)]
1256#[stable(feature = "simd_x86", since = "1.27.0")]
1257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1258pub const unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
1259 let a = _mm_load_ps(p);
1260 simd_shuffle!(a, a, [3, 2, 1, 0])
1261}
1262
1263#[inline]
1269#[target_feature(enable = "sse")]
1270#[cfg_attr(test, assert_instr(movss))]
1271#[stable(feature = "simd_x86", since = "1.27.0")]
1272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1273pub const unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
1274 *p = simd_extract!(a, 0);
1275}
1276
1277#[inline]
1296#[target_feature(enable = "sse")]
1297#[cfg_attr(
1298 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1299 assert_instr(movaps)
1300)]
1301#[stable(feature = "simd_x86", since = "1.27.0")]
1302#[allow(clippy::cast_ptr_alignment)]
1303#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1304pub const unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
1305 let b: __m128 = simd_shuffle!(a, a, [0, 0, 0, 0]);
1306 *(p as *mut __m128) = b;
1307}
1308
1309#[inline]
1313#[target_feature(enable = "sse")]
1314#[cfg_attr(
1315 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1316 assert_instr(movaps)
1317)]
1318#[stable(feature = "simd_x86", since = "1.27.0")]
1319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1320pub const unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
1321 _mm_store1_ps(p, a);
1322}
1323
1324#[inline]
1336#[target_feature(enable = "sse")]
1337#[cfg_attr(
1338 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1339 assert_instr(movaps)
1340)]
1341#[stable(feature = "simd_x86", since = "1.27.0")]
1342#[allow(clippy::cast_ptr_alignment)]
1343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1344pub const unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
1345 *(p as *mut __m128) = a;
1346}
1347
1348#[inline]
1356#[target_feature(enable = "sse")]
1357#[cfg_attr(test, assert_instr(movups))]
1358#[stable(feature = "simd_x86", since = "1.27.0")]
1359#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1360pub const unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
1361 ptr::copy_nonoverlapping(
1362 ptr::addr_of!(a) as *const u8,
1363 p as *mut u8,
1364 mem::size_of::<__m128>(),
1365 );
1366}
1367
1368#[inline]
1385#[target_feature(enable = "sse")]
1386#[cfg_attr(
1387 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1388 assert_instr(movaps)
1389)]
1390#[stable(feature = "simd_x86", since = "1.27.0")]
1391#[allow(clippy::cast_ptr_alignment)]
1392#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1393pub const unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
1394 let b: __m128 = simd_shuffle!(a, a, [3, 2, 1, 0]);
1395 *(p as *mut __m128) = b;
1396}
1397
1398#[inline]
1408#[target_feature(enable = "sse")]
1409#[cfg_attr(test, assert_instr(movss))]
1410#[stable(feature = "simd_x86", since = "1.27.0")]
1411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1412pub const fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
1413 unsafe { simd_shuffle!(a, b, [4, 1, 2, 3]) }
1414}
1415
1416#[inline]
1484#[target_feature(enable = "sse")]
1485#[cfg_attr(test, assert_instr(sfence))]
1486#[stable(feature = "simd_x86", since = "1.27.0")]
1487pub fn _mm_sfence() {
1488 unsafe { sfence() }
1489}
1490
1491#[inline]
1506#[target_feature(enable = "sse")]
1507#[cfg_attr(test, assert_instr(stmxcsr))]
1508#[stable(feature = "simd_x86", since = "1.27.0")]
1509#[deprecated(
1510 since = "1.75.0",
1511 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1512)]
1513pub unsafe fn _mm_getcsr() -> u32 {
1514 unsafe {
1515 let mut result = 0_i32;
1516 stmxcsr(ptr::addr_of_mut!(result) as *mut i8);
1517 result as u32
1518 }
1519}
1520
1521#[inline]
1655#[target_feature(enable = "sse")]
1656#[cfg_attr(test, assert_instr(ldmxcsr))]
1657#[stable(feature = "simd_x86", since = "1.27.0")]
1658#[deprecated(
1659 since = "1.75.0",
1660 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1661)]
1662pub unsafe fn _mm_setcsr(val: u32) {
1663 ldmxcsr(ptr::addr_of!(val) as *const i8);
1664}
1665
1666#[stable(feature = "simd_x86", since = "1.27.0")]
1668pub const _MM_EXCEPT_INVALID: u32 = 0x0001;
1669#[stable(feature = "simd_x86", since = "1.27.0")]
1671pub const _MM_EXCEPT_DENORM: u32 = 0x0002;
1672#[stable(feature = "simd_x86", since = "1.27.0")]
1674pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004;
1675#[stable(feature = "simd_x86", since = "1.27.0")]
1677pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008;
1678#[stable(feature = "simd_x86", since = "1.27.0")]
1680pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010;
1681#[stable(feature = "simd_x86", since = "1.27.0")]
1683pub const _MM_EXCEPT_INEXACT: u32 = 0x0020;
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1686pub const _MM_EXCEPT_MASK: u32 = 0x003f;
1687
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1690pub const _MM_MASK_INVALID: u32 = 0x0080;
1691#[stable(feature = "simd_x86", since = "1.27.0")]
1693pub const _MM_MASK_DENORM: u32 = 0x0100;
1694#[stable(feature = "simd_x86", since = "1.27.0")]
1696pub const _MM_MASK_DIV_ZERO: u32 = 0x0200;
1697#[stable(feature = "simd_x86", since = "1.27.0")]
1699pub const _MM_MASK_OVERFLOW: u32 = 0x0400;
1700#[stable(feature = "simd_x86", since = "1.27.0")]
1702pub const _MM_MASK_UNDERFLOW: u32 = 0x0800;
1703#[stable(feature = "simd_x86", since = "1.27.0")]
1705pub const _MM_MASK_INEXACT: u32 = 0x1000;
1706#[stable(feature = "simd_x86", since = "1.27.0")]
1708pub const _MM_MASK_MASK: u32 = 0x1f80;
1709
1710#[stable(feature = "simd_x86", since = "1.27.0")]
1712pub const _MM_ROUND_NEAREST: u32 = 0x0000;
1713#[stable(feature = "simd_x86", since = "1.27.0")]
1715pub const _MM_ROUND_DOWN: u32 = 0x2000;
1716#[stable(feature = "simd_x86", since = "1.27.0")]
1718pub const _MM_ROUND_UP: u32 = 0x4000;
1719#[stable(feature = "simd_x86", since = "1.27.0")]
1721pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000;
1722
1723#[stable(feature = "simd_x86", since = "1.27.0")]
1725pub const _MM_ROUND_MASK: u32 = 0x6000;
1726
1727#[stable(feature = "simd_x86", since = "1.27.0")]
1729pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000;
1730#[stable(feature = "simd_x86", since = "1.27.0")]
1732pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000;
1733#[stable(feature = "simd_x86", since = "1.27.0")]
1735pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000;
1736
1737#[inline]
1741#[allow(deprecated)] #[allow(non_snake_case)]
1743#[target_feature(enable = "sse")]
1744#[stable(feature = "simd_x86", since = "1.27.0")]
1745#[deprecated(
1746 since = "1.75.0",
1747 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1748)]
1749pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
1750 _mm_getcsr() & _MM_MASK_MASK
1751}
1752
1753#[inline]
1757#[allow(deprecated)] #[allow(non_snake_case)]
1759#[target_feature(enable = "sse")]
1760#[stable(feature = "simd_x86", since = "1.27.0")]
1761#[deprecated(
1762 since = "1.75.0",
1763 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1764)]
1765pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
1766 _mm_getcsr() & _MM_EXCEPT_MASK
1767}
1768
1769#[inline]
1773#[allow(deprecated)] #[allow(non_snake_case)]
1775#[target_feature(enable = "sse")]
1776#[stable(feature = "simd_x86", since = "1.27.0")]
1777#[deprecated(
1778 since = "1.75.0",
1779 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1780)]
1781pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
1782 _mm_getcsr() & _MM_FLUSH_ZERO_MASK
1783}
1784
1785#[inline]
1789#[allow(deprecated)] #[allow(non_snake_case)]
1791#[target_feature(enable = "sse")]
1792#[stable(feature = "simd_x86", since = "1.27.0")]
1793#[deprecated(
1794 since = "1.75.0",
1795 note = "see `_mm_getcsr` documentation - use inline assembly instead"
1796)]
1797pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
1798 _mm_getcsr() & _MM_ROUND_MASK
1799}
1800
1801#[inline]
1805#[allow(deprecated)] #[allow(non_snake_case)]
1807#[target_feature(enable = "sse")]
1808#[stable(feature = "simd_x86", since = "1.27.0")]
1809#[deprecated(
1810 since = "1.75.0",
1811 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1812)]
1813pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
1814 _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | (x & _MM_MASK_MASK))
1815}
1816
1817#[inline]
1821#[allow(deprecated)] #[allow(non_snake_case)]
1823#[target_feature(enable = "sse")]
1824#[stable(feature = "simd_x86", since = "1.27.0")]
1825#[deprecated(
1826 since = "1.75.0",
1827 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1828)]
1829pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
1830 _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | (x & _MM_EXCEPT_MASK))
1831}
1832
1833#[inline]
1837#[allow(deprecated)] #[allow(non_snake_case)]
1839#[target_feature(enable = "sse")]
1840#[stable(feature = "simd_x86", since = "1.27.0")]
1841#[deprecated(
1842 since = "1.75.0",
1843 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1844)]
1845pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
1846 _mm_setcsr((_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | (x & _MM_FLUSH_ZERO_MASK))
1847}
1848
1849#[inline]
1853#[allow(deprecated)] #[allow(non_snake_case)]
1855#[target_feature(enable = "sse")]
1856#[stable(feature = "simd_x86", since = "1.27.0")]
1857#[deprecated(
1858 since = "1.75.0",
1859 note = "see `_mm_setcsr` documentation - use inline assembly instead"
1860)]
1861pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) {
1862 _mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | (x & _MM_ROUND_MASK))
1863}
1864
1865#[stable(feature = "simd_x86", since = "1.27.0")]
1867pub const _MM_HINT_T0: i32 = 3;
1868
1869#[stable(feature = "simd_x86", since = "1.27.0")]
1871pub const _MM_HINT_T1: i32 = 2;
1872
1873#[stable(feature = "simd_x86", since = "1.27.0")]
1875pub const _MM_HINT_T2: i32 = 1;
1876
1877#[stable(feature = "simd_x86", since = "1.27.0")]
1879pub const _MM_HINT_NTA: i32 = 0;
1880
1881#[stable(feature = "simd_x86", since = "1.27.0")]
1883pub const _MM_HINT_ET0: i32 = 7;
1884
1885#[stable(feature = "simd_x86", since = "1.27.0")]
1887pub const _MM_HINT_ET1: i32 = 6;
1888
1889#[inline]
1934#[target_feature(enable = "sse")]
1935#[cfg_attr(test, assert_instr(prefetcht0, STRATEGY = _MM_HINT_T0))]
1936#[cfg_attr(test, assert_instr(prefetcht1, STRATEGY = _MM_HINT_T1))]
1937#[cfg_attr(test, assert_instr(prefetcht2, STRATEGY = _MM_HINT_T2))]
1938#[cfg_attr(test, assert_instr(prefetchnta, STRATEGY = _MM_HINT_NTA))]
1939#[rustc_legacy_const_generics(1)]
1940#[stable(feature = "simd_x86", since = "1.27.0")]
1941pub fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
1942 static_assert_uimm_bits!(STRATEGY, 3);
1943 unsafe {
1946 prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1);
1947 }
1948}
1949
1950#[inline]
1957#[target_feature(enable = "sse")]
1958#[stable(feature = "simd_x86", since = "1.27.0")]
1959#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1960pub const fn _mm_undefined_ps() -> __m128 {
1961 const { unsafe { mem::zeroed() } }
1962}
1963
1964#[inline]
1968#[allow(non_snake_case)]
1969#[target_feature(enable = "sse")]
1970#[stable(feature = "simd_x86", since = "1.27.0")]
1971#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1972pub const fn _MM_TRANSPOSE4_PS(
1973 row0: &mut __m128,
1974 row1: &mut __m128,
1975 row2: &mut __m128,
1976 row3: &mut __m128,
1977) {
1978 let tmp0 = _mm_unpacklo_ps(*row0, *row1);
1979 let tmp2 = _mm_unpacklo_ps(*row2, *row3);
1980 let tmp1 = _mm_unpackhi_ps(*row0, *row1);
1981 let tmp3 = _mm_unpackhi_ps(*row2, *row3);
1982
1983 *row0 = _mm_movelh_ps(tmp0, tmp2);
1984 *row1 = _mm_movehl_ps(tmp2, tmp0);
1985 *row2 = _mm_movelh_ps(tmp1, tmp3);
1986 *row3 = _mm_movehl_ps(tmp3, tmp1);
1987}
1988
1989#[allow(improper_ctypes)]
1990unsafe extern "C" {
1991 #[link_name = "llvm.x86.sse.rcp.ss"]
1992 fn rcpss(a: __m128) -> __m128;
1993 #[link_name = "llvm.x86.sse.rcp.ps"]
1994 fn rcpps(a: __m128) -> __m128;
1995 #[link_name = "llvm.x86.sse.rsqrt.ss"]
1996 fn rsqrtss(a: __m128) -> __m128;
1997 #[link_name = "llvm.x86.sse.rsqrt.ps"]
1998 fn rsqrtps(a: __m128) -> __m128;
1999 #[link_name = "llvm.x86.sse.min.ss"]
2000 fn minss(a: __m128, b: __m128) -> __m128;
2001 #[link_name = "llvm.x86.sse.min.ps"]
2002 fn minps(a: __m128, b: __m128) -> __m128;
2003 #[link_name = "llvm.x86.sse.max.ss"]
2004 fn maxss(a: __m128, b: __m128) -> __m128;
2005 #[link_name = "llvm.x86.sse.max.ps"]
2006 fn maxps(a: __m128, b: __m128) -> __m128;
2007 #[link_name = "llvm.x86.sse.cmp.ps"]
2008 fn cmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
2009 #[link_name = "llvm.x86.sse.comieq.ss"]
2010 fn comieq_ss(a: __m128, b: __m128) -> i32;
2011 #[link_name = "llvm.x86.sse.comilt.ss"]
2012 fn comilt_ss(a: __m128, b: __m128) -> i32;
2013 #[link_name = "llvm.x86.sse.comile.ss"]
2014 fn comile_ss(a: __m128, b: __m128) -> i32;
2015 #[link_name = "llvm.x86.sse.comigt.ss"]
2016 fn comigt_ss(a: __m128, b: __m128) -> i32;
2017 #[link_name = "llvm.x86.sse.comige.ss"]
2018 fn comige_ss(a: __m128, b: __m128) -> i32;
2019 #[link_name = "llvm.x86.sse.comineq.ss"]
2020 fn comineq_ss(a: __m128, b: __m128) -> i32;
2021 #[link_name = "llvm.x86.sse.ucomieq.ss"]
2022 fn ucomieq_ss(a: __m128, b: __m128) -> i32;
2023 #[link_name = "llvm.x86.sse.ucomilt.ss"]
2024 fn ucomilt_ss(a: __m128, b: __m128) -> i32;
2025 #[link_name = "llvm.x86.sse.ucomile.ss"]
2026 fn ucomile_ss(a: __m128, b: __m128) -> i32;
2027 #[link_name = "llvm.x86.sse.ucomigt.ss"]
2028 fn ucomigt_ss(a: __m128, b: __m128) -> i32;
2029 #[link_name = "llvm.x86.sse.ucomige.ss"]
2030 fn ucomige_ss(a: __m128, b: __m128) -> i32;
2031 #[link_name = "llvm.x86.sse.ucomineq.ss"]
2032 fn ucomineq_ss(a: __m128, b: __m128) -> i32;
2033 #[link_name = "llvm.x86.sse.cvtss2si"]
2034 fn cvtss2si(a: __m128) -> i32;
2035 #[link_name = "llvm.x86.sse.cvttss2si"]
2036 fn cvttss2si(a: __m128) -> i32;
2037 #[link_name = "llvm.x86.sse.sfence"]
2038 fn sfence();
2039 #[link_name = "llvm.x86.sse.stmxcsr"]
2040 fn stmxcsr(p: *mut i8);
2041 #[link_name = "llvm.x86.sse.ldmxcsr"]
2042 fn ldmxcsr(p: *const i8);
2043 #[link_name = "llvm.prefetch"]
2044 fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
2045 #[link_name = "llvm.x86.sse.cmp.ss"]
2046 fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
2047}
2048
2049#[inline]
2065#[target_feature(enable = "sse")]
2066#[cfg_attr(test, assert_instr(movntps))]
2067#[stable(feature = "simd_x86", since = "1.27.0")]
2068#[allow(clippy::cast_ptr_alignment)]
2069pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
2070 crate::arch::asm!(
2072 vps!("movntps", ",{a}"),
2073 p = in(reg) mem_addr,
2074 a = in(xmm_reg) a,
2075 options(nostack, preserves_flags),
2076 );
2077}
2078
2079#[cfg(test)]
2080mod tests {
2081 use crate::core_arch::assert_eq_const as assert_eq;
2082 use crate::{hint::black_box, ptr};
2083 use std::boxed;
2084 use stdarch_test::simd_test;
2085
2086 use crate::core_arch::{simd::*, x86::*};
2087
2088 const NAN: f32 = f32::NAN;
2089
2090 #[simd_test(enable = "sse")]
2091 const fn test_mm_add_ps() {
2092 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2093 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2094 let r = _mm_add_ps(a, b);
2095 assert_eq_m128(r, _mm_setr_ps(-101.0, 25.0, 0.0, -15.0));
2096 }
2097
2098 #[simd_test(enable = "sse")]
2099 const fn test_mm_add_ss() {
2100 let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
2101 let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
2102 let r = _mm_add_ss(a, b);
2103 assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
2104 }
2105
2106 #[simd_test(enable = "sse")]
2107 const fn test_mm_sub_ps() {
2108 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2109 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2110 let r = _mm_sub_ps(a, b);
2111 assert_eq_m128(r, _mm_setr_ps(99.0, -15.0, 0.0, -5.0));
2112 }
2113
2114 #[simd_test(enable = "sse")]
2115 const fn test_mm_sub_ss() {
2116 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2117 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2118 let r = _mm_sub_ss(a, b);
2119 assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0));
2120 }
2121
2122 #[simd_test(enable = "sse")]
2123 const fn test_mm_mul_ps() {
2124 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2125 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2126 let r = _mm_mul_ps(a, b);
2127 assert_eq_m128(r, _mm_setr_ps(100.0, 100.0, 0.0, 50.0));
2128 }
2129
2130 #[simd_test(enable = "sse")]
2131 const fn test_mm_mul_ss() {
2132 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2133 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2134 let r = _mm_mul_ss(a, b);
2135 assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0));
2136 }
2137
2138 #[simd_test(enable = "sse")]
2139 const fn test_mm_div_ps() {
2140 let a = _mm_setr_ps(-1.0, 5.0, 2.0, -10.0);
2141 let b = _mm_setr_ps(-100.0, 20.0, 0.2, -5.0);
2142 let r = _mm_div_ps(a, b);
2143 assert_eq_m128(r, _mm_setr_ps(0.01, 0.25, 10.0, 2.0));
2144 }
2145
2146 #[simd_test(enable = "sse")]
2147 const fn test_mm_div_ss() {
2148 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2149 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2150 let r = _mm_div_ss(a, b);
2151 assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0));
2152 }
2153
2154 #[simd_test(enable = "sse")]
2155 fn test_mm_sqrt_ss() {
2156 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2157 let r = _mm_sqrt_ss(a);
2158 let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
2159 assert_eq_m128(r, e);
2160 }
2161
2162 #[simd_test(enable = "sse")]
2163 fn test_mm_sqrt_ps() {
2164 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2165 let r = _mm_sqrt_ps(a);
2166 let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
2167 assert_eq_m128(r, e);
2168 }
2169
2170 #[simd_test(enable = "sse")]
2171 fn test_mm_rcp_ss() {
2172 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2173 let r = _mm_rcp_ss(a);
2174 let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
2175 let rel_err = 0.00048828125;
2176 assert_approx_eq!(get_m128(r, 0), get_m128(e, 0), 2. * rel_err);
2177 for i in 1..4 {
2178 assert_eq!(get_m128(r, i), get_m128(e, i));
2179 }
2180 }
2181
2182 #[simd_test(enable = "sse")]
2183 fn test_mm_rcp_ps() {
2184 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2185 let r = _mm_rcp_ps(a);
2186 let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
2187 let rel_err = 0.00048828125;
2188 for i in 0..4 {
2189 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2190 }
2191 }
2192
2193 #[simd_test(enable = "sse")]
2194 fn test_mm_rsqrt_ss() {
2195 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2196 let r = _mm_rsqrt_ss(a);
2197 let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
2198 let rel_err = 0.00048828125;
2199 for i in 0..4 {
2200 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2201 }
2202 }
2203
2204 #[simd_test(enable = "sse")]
2205 fn test_mm_rsqrt_ps() {
2206 let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2207 let r = _mm_rsqrt_ps(a);
2208 let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
2209 let rel_err = 0.00048828125;
2210 for i in 0..4 {
2211 assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2212 }
2213 }
2214
2215 #[simd_test(enable = "sse")]
2216 fn test_mm_min_ss() {
2217 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2218 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2219 let r = _mm_min_ss(a, b);
2220 assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2221 }
2222
2223 #[simd_test(enable = "sse")]
2224 fn test_mm_min_ps() {
2225 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2226 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2227 let r = _mm_min_ps(a, b);
2228 assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2229
2230 let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2236 let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2237 let r1 = _mm_min_ps(a, b).as_f32x4().to_bits();
2238 let r2 = _mm_min_ps(b, a).as_f32x4().to_bits();
2239 let a = a.as_f32x4().to_bits();
2240 let b = b.as_f32x4().to_bits();
2241 assert_eq!(r1, b);
2242 assert_eq!(r2, a);
2243 assert_ne!(a, b); }
2245
2246 #[simd_test(enable = "sse")]
2247 fn test_mm_max_ss() {
2248 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2249 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2250 let r = _mm_max_ss(a, b);
2251 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0));
2252 }
2253
2254 #[simd_test(enable = "sse")]
2255 fn test_mm_max_ps() {
2256 let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2257 let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2258 let r = _mm_max_ps(a, b);
2259 assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0));
2260
2261 let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2263 let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2264 let r1 = _mm_max_ps(a, b).as_f32x4().to_bits();
2265 let r2 = _mm_max_ps(b, a).as_f32x4().to_bits();
2266 let a = a.as_f32x4().to_bits();
2267 let b = b.as_f32x4().to_bits();
2268 assert_eq!(r1, b);
2269 assert_eq!(r2, a);
2270 assert_ne!(a, b); }
2272
2273 #[simd_test(enable = "sse")]
2274 const fn test_mm_and_ps() {
2275 let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
2276 let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
2277 let r = _mm_and_ps(*black_box(&a), *black_box(&b));
2278 let e = f32x4::from_bits(u32x4::splat(0b0001)).as_m128();
2279 assert_eq_m128(r, e);
2280 }
2281
2282 #[simd_test(enable = "sse")]
2283 const fn test_mm_andnot_ps() {
2284 let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
2285 let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
2286 let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
2287 let e = f32x4::from_bits(u32x4::splat(0b0100)).as_m128();
2288 assert_eq_m128(r, e);
2289 }
2290
2291 #[simd_test(enable = "sse")]
2292 const fn test_mm_or_ps() {
2293 let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
2294 let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
2295 let r = _mm_or_ps(*black_box(&a), *black_box(&b));
2296 let e = f32x4::from_bits(u32x4::splat(0b0111)).as_m128();
2297 assert_eq_m128(r, e);
2298 }
2299
2300 #[simd_test(enable = "sse")]
2301 const fn test_mm_xor_ps() {
2302 let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
2303 let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
2304 let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
2305 let e = f32x4::from_bits(u32x4::splat(0b0110)).as_m128();
2306 assert_eq_m128(r, e);
2307 }
2308
2309 #[simd_test(enable = "sse")]
2310 fn test_mm_cmpeq_ss() {
2311 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2312 let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
2313 let r = _mm_cmpeq_ss(a, b).as_f32x4().to_bits();
2314 let e = f32x4::new(f32::from_bits(0), 2.0, 3.0, 4.0).to_bits();
2315 assert_eq!(r, e);
2316
2317 let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2318 let r2 = _mm_cmpeq_ss(a, b2).as_f32x4().to_bits();
2319 let e2 = f32x4::new(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0).to_bits();
2320 assert_eq!(r2, e2);
2321 }
2322
2323 #[simd_test(enable = "sse")]
2324 fn test_mm_cmplt_ss() {
2325 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2326 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2327 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2328 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2329
2330 let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb = _mm_cmplt_ss(a, b).as_f32x4().to_bits();
2335 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2336 assert_eq!(rb, eb);
2337
2338 let rc = _mm_cmplt_ss(a, c).as_f32x4().to_bits();
2339 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2340 assert_eq!(rc, ec);
2341
2342 let rd = _mm_cmplt_ss(a, d).as_f32x4().to_bits();
2343 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2344 assert_eq!(rd, ed);
2345 }
2346
2347 #[simd_test(enable = "sse")]
2348 fn test_mm_cmple_ss() {
2349 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2350 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2351 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2352 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2353
2354 let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb = _mm_cmple_ss(a, b).as_f32x4().to_bits();
2359 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2360 assert_eq!(rb, eb);
2361
2362 let rc = _mm_cmple_ss(a, c).as_f32x4().to_bits();
2363 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2364 assert_eq!(rc, ec);
2365
2366 let rd = _mm_cmple_ss(a, d).as_f32x4().to_bits();
2367 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2368 assert_eq!(rd, ed);
2369 }
2370
2371 #[simd_test(enable = "sse")]
2372 fn test_mm_cmpgt_ss() {
2373 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2374 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2375 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2376 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2377
2378 let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb = _mm_cmpgt_ss(a, b).as_f32x4().to_bits();
2383 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2384 assert_eq!(rb, eb);
2385
2386 let rc = _mm_cmpgt_ss(a, c).as_f32x4().to_bits();
2387 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2388 assert_eq!(rc, ec);
2389
2390 let rd = _mm_cmpgt_ss(a, d).as_f32x4().to_bits();
2391 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2392 assert_eq!(rd, ed);
2393 }
2394
2395 #[simd_test(enable = "sse")]
2396 fn test_mm_cmpge_ss() {
2397 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2398 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2399 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2400 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2401
2402 let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb = _mm_cmpge_ss(a, b).as_f32x4().to_bits();
2407 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2408 assert_eq!(rb, eb);
2409
2410 let rc = _mm_cmpge_ss(a, c).as_f32x4().to_bits();
2411 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2412 assert_eq!(rc, ec);
2413
2414 let rd = _mm_cmpge_ss(a, d).as_f32x4().to_bits();
2415 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2416 assert_eq!(rd, ed);
2417 }
2418
2419 #[simd_test(enable = "sse")]
2420 fn test_mm_cmpneq_ss() {
2421 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2422 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2423 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2424 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2425
2426 let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb = _mm_cmpneq_ss(a, b).as_f32x4().to_bits();
2431 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2432 assert_eq!(rb, eb);
2433
2434 let rc = _mm_cmpneq_ss(a, c).as_f32x4().to_bits();
2435 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2436 assert_eq!(rc, ec);
2437
2438 let rd = _mm_cmpneq_ss(a, d).as_f32x4().to_bits();
2439 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2440 assert_eq!(rd, ed);
2441 }
2442
2443 #[simd_test(enable = "sse")]
2444 fn test_mm_cmpnlt_ss() {
2445 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2451 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2452 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2453 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2454
2455 let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb = _mm_cmpnlt_ss(a, b).as_f32x4().to_bits();
2460 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2461 assert_eq!(rb, eb);
2462
2463 let rc = _mm_cmpnlt_ss(a, c).as_f32x4().to_bits();
2464 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2465 assert_eq!(rc, ec);
2466
2467 let rd = _mm_cmpnlt_ss(a, d).as_f32x4().to_bits();
2468 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2469 assert_eq!(rd, ed);
2470 }
2471
2472 #[simd_test(enable = "sse")]
2473 fn test_mm_cmpnle_ss() {
2474 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2480 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2481 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2482 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2483
2484 let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb = _mm_cmpnle_ss(a, b).as_f32x4().to_bits();
2489 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2490 assert_eq!(rb, eb);
2491
2492 let rc = _mm_cmpnle_ss(a, c).as_f32x4().to_bits();
2493 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2494 assert_eq!(rc, ec);
2495
2496 let rd = _mm_cmpnle_ss(a, d).as_f32x4().to_bits();
2497 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2498 assert_eq!(rd, ed);
2499 }
2500
2501 #[simd_test(enable = "sse")]
2502 fn test_mm_cmpngt_ss() {
2503 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2509 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2510 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2511 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2512
2513 let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb = _mm_cmpngt_ss(a, b).as_f32x4().to_bits();
2518 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2519 assert_eq!(rb, eb);
2520
2521 let rc = _mm_cmpngt_ss(a, c).as_f32x4().to_bits();
2522 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2523 assert_eq!(rc, ec);
2524
2525 let rd = _mm_cmpngt_ss(a, d).as_f32x4().to_bits();
2526 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2527 assert_eq!(rd, ed);
2528 }
2529
2530 #[simd_test(enable = "sse")]
2531 fn test_mm_cmpnge_ss() {
2532 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2538 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2539 let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2540 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2541
2542 let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb = _mm_cmpnge_ss(a, b).as_f32x4().to_bits();
2547 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2548 assert_eq!(rb, eb);
2549
2550 let rc = _mm_cmpnge_ss(a, c).as_f32x4().to_bits();
2551 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2552 assert_eq!(rc, ec);
2553
2554 let rd = _mm_cmpnge_ss(a, d).as_f32x4().to_bits();
2555 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2556 assert_eq!(rd, ed);
2557 }
2558
2559 #[simd_test(enable = "sse")]
2560 fn test_mm_cmpord_ss() {
2561 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2562 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2563 let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2564 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2565
2566 let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb = _mm_cmpord_ss(a, b).as_f32x4().to_bits();
2571 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2572 assert_eq!(rb, eb);
2573
2574 let rc = _mm_cmpord_ss(a, c).as_f32x4().to_bits();
2575 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2576 assert_eq!(rc, ec);
2577
2578 let rd = _mm_cmpord_ss(a, d).as_f32x4().to_bits();
2579 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2580 assert_eq!(rd, ed);
2581 }
2582
2583 #[simd_test(enable = "sse")]
2584 fn test_mm_cmpunord_ss() {
2585 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2586 let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2587 let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2588 let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2589
2590 let b1 = 0u32; let c1 = !0u32; let d1 = 0u32; let rb = _mm_cmpunord_ss(a, b).as_f32x4().to_bits();
2595 let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
2596 assert_eq!(rb, eb);
2597
2598 let rc = _mm_cmpunord_ss(a, c).as_f32x4().to_bits();
2599 let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
2600 assert_eq!(rc, ec);
2601
2602 let rd = _mm_cmpunord_ss(a, d).as_f32x4().to_bits();
2603 let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
2604 assert_eq!(rd, ed);
2605 }
2606
2607 #[simd_test(enable = "sse")]
2608 fn test_mm_cmpeq_ps() {
2609 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2610 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2611 let tru = !0u32;
2612 let fls = 0u32;
2613
2614 let e = u32x4::new(fls, fls, tru, fls);
2615 let r = _mm_cmpeq_ps(a, b).as_f32x4().to_bits();
2616 assert_eq!(r, e);
2617 }
2618
2619 #[simd_test(enable = "sse")]
2620 fn test_mm_cmplt_ps() {
2621 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2622 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2623 let tru = !0u32;
2624 let fls = 0u32;
2625
2626 let e = u32x4::new(tru, fls, fls, fls);
2627 let r = _mm_cmplt_ps(a, b).as_f32x4().to_bits();
2628 assert_eq!(r, e);
2629 }
2630
2631 #[simd_test(enable = "sse")]
2632 fn test_mm_cmple_ps() {
2633 let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
2634 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2635 let tru = !0u32;
2636 let fls = 0u32;
2637
2638 let e = u32x4::new(tru, fls, tru, fls);
2639 let r = _mm_cmple_ps(a, b).as_f32x4().to_bits();
2640 assert_eq!(r, e);
2641 }
2642
2643 #[simd_test(enable = "sse")]
2644 fn test_mm_cmpgt_ps() {
2645 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2646 let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2647 let tru = !0u32;
2648 let fls = 0u32;
2649
2650 let e = u32x4::new(fls, tru, fls, fls);
2651 let r = _mm_cmpgt_ps(a, b).as_f32x4().to_bits();
2652 assert_eq!(r, e);
2653 }
2654
2655 #[simd_test(enable = "sse")]
2656 fn test_mm_cmpge_ps() {
2657 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2658 let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2659 let tru = !0u32;
2660 let fls = 0u32;
2661
2662 let e = u32x4::new(fls, tru, tru, fls);
2663 let r = _mm_cmpge_ps(a, b).as_f32x4().to_bits();
2664 assert_eq!(r, e);
2665 }
2666
2667 #[simd_test(enable = "sse")]
2668 fn test_mm_cmpneq_ps() {
2669 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2670 let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2671 let tru = !0u32;
2672 let fls = 0u32;
2673
2674 let e = u32x4::new(tru, tru, fls, tru);
2675 let r = _mm_cmpneq_ps(a, b).as_f32x4().to_bits();
2676 assert_eq!(r, e);
2677 }
2678
2679 #[simd_test(enable = "sse")]
2680 fn test_mm_cmpnlt_ps() {
2681 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2682 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2683 let tru = !0u32;
2684 let fls = 0u32;
2685
2686 let e = u32x4::new(fls, tru, tru, tru);
2687 let r = _mm_cmpnlt_ps(a, b).as_f32x4().to_bits();
2688 assert_eq!(r, e);
2689 }
2690
2691 #[simd_test(enable = "sse")]
2692 fn test_mm_cmpnle_ps() {
2693 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2694 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2695 let tru = !0u32;
2696 let fls = 0u32;
2697
2698 let e = u32x4::new(fls, tru, fls, tru);
2699 let r = _mm_cmpnle_ps(a, b).as_f32x4().to_bits();
2700 assert_eq!(r, e);
2701 }
2702
2703 #[simd_test(enable = "sse")]
2704 fn test_mm_cmpngt_ps() {
2705 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2706 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2707 let tru = !0u32;
2708 let fls = 0u32;
2709
2710 let e = u32x4::new(tru, fls, tru, tru);
2711 let r = _mm_cmpngt_ps(a, b).as_f32x4().to_bits();
2712 assert_eq!(r, e);
2713 }
2714
2715 #[simd_test(enable = "sse")]
2716 fn test_mm_cmpnge_ps() {
2717 let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2718 let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2719 let tru = !0u32;
2720 let fls = 0u32;
2721
2722 let e = u32x4::new(tru, fls, fls, tru);
2723 let r = _mm_cmpnge_ps(a, b).as_f32x4().to_bits();
2724 assert_eq!(r, e);
2725 }
2726
2727 #[simd_test(enable = "sse")]
2728 fn test_mm_cmpord_ps() {
2729 let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2730 let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2731 let tru = !0u32;
2732 let fls = 0u32;
2733
2734 let e = u32x4::new(tru, fls, fls, fls);
2735 let r = _mm_cmpord_ps(a, b).as_f32x4().to_bits();
2736 assert_eq!(r, e);
2737 }
2738
2739 #[simd_test(enable = "sse")]
2740 fn test_mm_cmpunord_ps() {
2741 let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2742 let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2743 let tru = !0u32;
2744 let fls = 0u32;
2745
2746 let e = u32x4::new(fls, tru, tru, tru);
2747 let r = _mm_cmpunord_ps(a, b).as_f32x4().to_bits();
2748 assert_eq!(r, e);
2749 }
2750
2751 #[simd_test(enable = "sse")]
2752 fn test_mm_comieq_ss() {
2753 let aa = &[3.0f32, 12.0, 23.0, NAN];
2754 let bb = &[3.0f32, 47.5, 1.5, NAN];
2755
2756 let ee = &[1i32, 0, 0, 0];
2757
2758 for i in 0..4 {
2759 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2760 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2761
2762 let r = _mm_comieq_ss(a, b);
2763
2764 assert_eq!(
2765 ee[i], r,
2766 "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2767 a, b, r, ee[i], i
2768 );
2769 }
2770 }
2771
2772 #[simd_test(enable = "sse")]
2773 fn test_mm_comilt_ss() {
2774 let aa = &[3.0f32, 12.0, 23.0, NAN];
2775 let bb = &[3.0f32, 47.5, 1.5, NAN];
2776
2777 let ee = &[0i32, 1, 0, 0];
2778
2779 for i in 0..4 {
2780 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2781 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2782
2783 let r = _mm_comilt_ss(a, b);
2784
2785 assert_eq!(
2786 ee[i], r,
2787 "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2788 a, b, r, ee[i], i
2789 );
2790 }
2791 }
2792
2793 #[simd_test(enable = "sse")]
2794 fn test_mm_comile_ss() {
2795 let aa = &[3.0f32, 12.0, 23.0, NAN];
2796 let bb = &[3.0f32, 47.5, 1.5, NAN];
2797
2798 let ee = &[1i32, 1, 0, 0];
2799
2800 for i in 0..4 {
2801 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2802 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2803
2804 let r = _mm_comile_ss(a, b);
2805
2806 assert_eq!(
2807 ee[i], r,
2808 "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2809 a, b, r, ee[i], i
2810 );
2811 }
2812 }
2813
2814 #[simd_test(enable = "sse")]
2815 fn test_mm_comigt_ss() {
2816 let aa = &[3.0f32, 12.0, 23.0, NAN];
2817 let bb = &[3.0f32, 47.5, 1.5, NAN];
2818
2819 let ee = &[1i32, 0, 1, 0];
2820
2821 for i in 0..4 {
2822 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2823 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2824
2825 let r = _mm_comige_ss(a, b);
2826
2827 assert_eq!(
2828 ee[i], r,
2829 "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2830 a, b, r, ee[i], i
2831 );
2832 }
2833 }
2834
2835 #[simd_test(enable = "sse")]
2836 fn test_mm_comineq_ss() {
2837 let aa = &[3.0f32, 12.0, 23.0, NAN];
2838 let bb = &[3.0f32, 47.5, 1.5, NAN];
2839
2840 let ee = &[0i32, 1, 1, 1];
2841
2842 for i in 0..4 {
2843 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2844 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2845
2846 let r = _mm_comineq_ss(a, b);
2847
2848 assert_eq!(
2849 ee[i], r,
2850 "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2851 a, b, r, ee[i], i
2852 );
2853 }
2854 }
2855
2856 #[simd_test(enable = "sse")]
2857 fn test_mm_ucomieq_ss() {
2858 let aa = &[3.0f32, 12.0, 23.0, NAN];
2859 let bb = &[3.0f32, 47.5, 1.5, NAN];
2860
2861 let ee = &[1i32, 0, 0, 0];
2862
2863 for i in 0..4 {
2864 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2865 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2866
2867 let r = _mm_ucomieq_ss(a, b);
2868
2869 assert_eq!(
2870 ee[i], r,
2871 "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2872 a, b, r, ee[i], i
2873 );
2874 }
2875 }
2876
2877 #[simd_test(enable = "sse")]
2878 fn test_mm_ucomilt_ss() {
2879 let aa = &[3.0f32, 12.0, 23.0, NAN];
2880 let bb = &[3.0f32, 47.5, 1.5, NAN];
2881
2882 let ee = &[0i32, 1, 0, 0];
2883
2884 for i in 0..4 {
2885 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2886 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2887
2888 let r = _mm_ucomilt_ss(a, b);
2889
2890 assert_eq!(
2891 ee[i], r,
2892 "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2893 a, b, r, ee[i], i
2894 );
2895 }
2896 }
2897
2898 #[simd_test(enable = "sse")]
2899 fn test_mm_ucomile_ss() {
2900 let aa = &[3.0f32, 12.0, 23.0, NAN];
2901 let bb = &[3.0f32, 47.5, 1.5, NAN];
2902
2903 let ee = &[1i32, 1, 0, 0];
2904
2905 for i in 0..4 {
2906 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2907 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2908
2909 let r = _mm_ucomile_ss(a, b);
2910
2911 assert_eq!(
2912 ee[i], r,
2913 "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2914 a, b, r, ee[i], i
2915 );
2916 }
2917 }
2918
2919 #[simd_test(enable = "sse")]
2920 fn test_mm_ucomigt_ss() {
2921 let aa = &[3.0f32, 12.0, 23.0, NAN];
2922 let bb = &[3.0f32, 47.5, 1.5, NAN];
2923
2924 let ee = &[0i32, 0, 1, 0];
2925
2926 for i in 0..4 {
2927 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2928 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2929
2930 let r = _mm_ucomigt_ss(a, b);
2931
2932 assert_eq!(
2933 ee[i], r,
2934 "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2935 a, b, r, ee[i], i
2936 );
2937 }
2938 }
2939
2940 #[simd_test(enable = "sse")]
2941 fn test_mm_ucomige_ss() {
2942 let aa = &[3.0f32, 12.0, 23.0, NAN];
2943 let bb = &[3.0f32, 47.5, 1.5, NAN];
2944
2945 let ee = &[1i32, 0, 1, 0];
2946
2947 for i in 0..4 {
2948 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2949 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2950
2951 let r = _mm_ucomige_ss(a, b);
2952
2953 assert_eq!(
2954 ee[i], r,
2955 "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2956 a, b, r, ee[i], i
2957 );
2958 }
2959 }
2960
2961 #[simd_test(enable = "sse")]
2962 fn test_mm_ucomineq_ss() {
2963 let aa = &[3.0f32, 12.0, 23.0, NAN];
2964 let bb = &[3.0f32, 47.5, 1.5, NAN];
2965
2966 let ee = &[0i32, 1, 1, 1];
2967
2968 for i in 0..4 {
2969 let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2970 let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2971
2972 let r = _mm_ucomineq_ss(a, b);
2973
2974 assert_eq!(
2975 ee[i], r,
2976 "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2977 a, b, r, ee[i], i
2978 );
2979 }
2980 }
2981
2982 #[simd_test(enable = "sse")]
2983 fn test_mm_cvtss_si32() {
2984 let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
2985 let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
2986 for i in 0..inputs.len() {
2987 let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
2988 let e = result[i];
2989 let r = _mm_cvtss_si32(x);
2990 assert_eq!(
2991 e, r,
2992 "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
2993 i, x, r, e
2994 );
2995 }
2996 }
2997
2998 #[simd_test(enable = "sse")]
2999 fn test_mm_cvttss_si32() {
3000 let inputs = &[
3001 (42.0f32, 42i32),
3002 (-31.4, -31),
3003 (-33.5, -33),
3004 (-34.5, -34),
3005 (10.999, 10),
3006 (-5.99, -5),
3007 (4.0e10, i32::MIN),
3008 (4.0e-10, 0),
3009 (NAN, i32::MIN),
3010 (2147483500.1, 2147483520),
3011 ];
3012 for (i, &(xi, e)) in inputs.iter().enumerate() {
3013 let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
3014 let r = _mm_cvttss_si32(x);
3015 assert_eq!(
3016 e, r,
3017 "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
3018 i, x, r, e
3019 );
3020 }
3021 }
3022
3023 #[simd_test(enable = "sse")]
3024 const fn test_mm_cvtsi32_ss() {
3025 let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3026
3027 let r = _mm_cvtsi32_ss(a, 4555);
3028 let e = _mm_setr_ps(4555.0, 6.0, 7.0, 8.0);
3029 assert_eq_m128(e, r);
3030
3031 let r = _mm_cvtsi32_ss(a, 322223333);
3032 let e = _mm_setr_ps(322223333.0, 6.0, 7.0, 8.0);
3033 assert_eq_m128(e, r);
3034
3035 let r = _mm_cvtsi32_ss(a, -432);
3036 let e = _mm_setr_ps(-432.0, 6.0, 7.0, 8.0);
3037 assert_eq_m128(e, r);
3038
3039 let r = _mm_cvtsi32_ss(a, -322223333);
3040 let e = _mm_setr_ps(-322223333.0, 6.0, 7.0, 8.0);
3041 assert_eq_m128(e, r);
3042 }
3043
3044 #[simd_test(enable = "sse")]
3045 const fn test_mm_cvtss_f32() {
3046 let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
3047 assert_eq!(_mm_cvtss_f32(a), 312.0134);
3048 }
3049
3050 #[simd_test(enable = "sse")]
3051 const fn test_mm_set_ss() {
3052 let r = _mm_set_ss(black_box(4.25));
3053 assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0));
3054 }
3055
3056 #[simd_test(enable = "sse")]
3057 const fn test_mm_set1_ps() {
3058 let r1 = _mm_set1_ps(black_box(4.25));
3059 let r2 = _mm_set_ps1(black_box(4.25));
3060 assert_eq!(get_m128(r1, 0), 4.25);
3061 assert_eq!(get_m128(r1, 1), 4.25);
3062 assert_eq!(get_m128(r1, 2), 4.25);
3063 assert_eq!(get_m128(r1, 3), 4.25);
3064 assert_eq!(get_m128(r2, 0), 4.25);
3065 assert_eq!(get_m128(r2, 1), 4.25);
3066 assert_eq!(get_m128(r2, 2), 4.25);
3067 assert_eq!(get_m128(r2, 3), 4.25);
3068 }
3069
3070 #[simd_test(enable = "sse")]
3071 const fn test_mm_set_ps() {
3072 let r = _mm_set_ps(
3073 black_box(1.0),
3074 black_box(2.0),
3075 black_box(3.0),
3076 black_box(4.0),
3077 );
3078 assert_eq!(get_m128(r, 0), 4.0);
3079 assert_eq!(get_m128(r, 1), 3.0);
3080 assert_eq!(get_m128(r, 2), 2.0);
3081 assert_eq!(get_m128(r, 3), 1.0);
3082 }
3083
3084 #[simd_test(enable = "sse")]
3085 const fn test_mm_setr_ps() {
3086 let r = _mm_setr_ps(
3087 black_box(1.0),
3088 black_box(2.0),
3089 black_box(3.0),
3090 black_box(4.0),
3091 );
3092 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3093 }
3094
3095 #[simd_test(enable = "sse")]
3096 const fn test_mm_setzero_ps() {
3097 let r = *black_box(&_mm_setzero_ps());
3098 assert_eq_m128(r, _mm_set1_ps(0.0));
3099 }
3100
3101 #[simd_test]
3102 #[allow(non_snake_case)]
3103 const fn test_MM_SHUFFLE() {
3104 assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11);
3105 assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00);
3106 assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01);
3107 }
3108
3109 #[simd_test(enable = "sse")]
3110 const fn test_mm_shuffle_ps() {
3111 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3112 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3113 let r = _mm_shuffle_ps::<0b00_01_01_11>(a, b);
3114 assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0));
3115 }
3116
3117 #[simd_test(enable = "sse")]
3118 const fn test_mm_unpackhi_ps() {
3119 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3120 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3121 let r = _mm_unpackhi_ps(a, b);
3122 assert_eq_m128(r, _mm_setr_ps(3.0, 7.0, 4.0, 8.0));
3123 }
3124
3125 #[simd_test(enable = "sse")]
3126 const fn test_mm_unpacklo_ps() {
3127 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3128 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3129 let r = _mm_unpacklo_ps(a, b);
3130 assert_eq_m128(r, _mm_setr_ps(1.0, 5.0, 2.0, 6.0));
3131 }
3132
3133 #[simd_test(enable = "sse")]
3134 const fn test_mm_movehl_ps() {
3135 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3136 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3137 let r = _mm_movehl_ps(a, b);
3138 assert_eq_m128(r, _mm_setr_ps(7.0, 8.0, 3.0, 4.0));
3139 }
3140
3141 #[simd_test(enable = "sse")]
3142 const fn test_mm_movelh_ps() {
3143 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3144 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3145 let r = _mm_movelh_ps(a, b);
3146 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
3147 }
3148
3149 #[simd_test(enable = "sse")]
3150 const fn test_mm_load_ss() {
3151 let a = 42.0f32;
3152 let r = unsafe { _mm_load_ss(ptr::addr_of!(a)) };
3153 assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
3154 }
3155
3156 #[simd_test(enable = "sse")]
3157 const fn test_mm_load1_ps() {
3158 let a = 42.0f32;
3159 let r = unsafe { _mm_load1_ps(ptr::addr_of!(a)) };
3160 assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
3161 }
3162
3163 #[simd_test(enable = "sse")]
3164 const fn test_mm_load_ps() {
3165 let vals = Memory {
3166 data: [1.0f32, 2.0, 3.0, 4.0],
3167 };
3168
3169 let p = vals.data.as_ptr();
3171
3172 let r = unsafe { _mm_load_ps(p) };
3173 let e = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3174 assert_eq_m128(r, e);
3175 }
3176
3177 #[simd_test(enable = "sse")]
3178 const fn test_mm_loadu_ps() {
3179 let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3180 let p = unsafe { vals.as_ptr().add(3) };
3181 let r = unsafe { _mm_loadu_ps(black_box(p)) };
3182 assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
3183 }
3184
3185 #[simd_test(enable = "sse")]
3186 const fn test_mm_loadr_ps() {
3187 let vals = Memory {
3188 data: [1.0f32, 2.0, 3.0, 4.0],
3189 };
3190
3191 let p = vals.data.as_ptr();
3193
3194 let r = unsafe { _mm_loadr_ps(p) };
3195 let e = _mm_setr_ps(4.0, 3.0, 2.0, 1.0);
3196 assert_eq_m128(r, e);
3197 }
3198
3199 #[simd_test(enable = "sse")]
3200 const fn test_mm_store_ss() {
3201 let mut vals = [0.0f32; 8];
3202 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3203 unsafe {
3204 _mm_store_ss(vals.as_mut_ptr().add(1), a);
3205 }
3206
3207 assert_eq!(vals[0], 0.0);
3208 assert_eq!(vals[1], 1.0);
3209 assert_eq!(vals[2], 0.0);
3210 }
3211
3212 #[simd_test(enable = "sse")]
3213 const fn test_mm_store1_ps() {
3214 let mut vals = Memory { data: [0.0f32; 4] };
3215 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3216
3217 let p = vals.data.as_mut_ptr();
3219
3220 unsafe {
3221 _mm_store1_ps(p, *black_box(&a));
3222 }
3223
3224 assert_eq!(vals.data, [1.0, 1.0, 1.0, 1.0]);
3225 }
3226
3227 #[simd_test(enable = "sse")]
3228 const fn test_mm_store_ps() {
3229 let mut vals = Memory { data: [0.0f32; 4] };
3230 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3231
3232 let p = vals.data.as_mut_ptr();
3234
3235 unsafe {
3236 _mm_store_ps(p, *black_box(&a));
3237 }
3238
3239 assert_eq!(vals.data, [1.0, 2.0, 3.0, 4.0]);
3240 }
3241
3242 #[simd_test(enable = "sse")]
3243 const fn test_mm_storer_ps() {
3244 let mut vals = Memory { data: [0.0f32; 4] };
3245 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3246
3247 let p = vals.data.as_mut_ptr();
3249
3250 unsafe {
3251 _mm_storer_ps(p, *black_box(&a));
3252 }
3253
3254 assert_eq!(vals.data, [4.0, 3.0, 2.0, 1.0]);
3255 }
3256
3257 #[simd_test(enable = "sse")]
3258 const fn test_mm_storeu_ps() {
3259 #[repr(align(16))]
3260 struct Memory8 {
3261 data: [f32; 8],
3262 }
3263
3264 let mut vals = Memory8 { data: [0.0f32; 8] };
3266 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3267
3268 let p = unsafe { vals.data.as_mut_ptr().offset(1) };
3270
3271 unsafe {
3272 _mm_storeu_ps(p, *black_box(&a));
3273 }
3274
3275 assert_eq!(vals.data, [0.0, 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0]);
3276 }
3277
3278 #[simd_test(enable = "sse")]
3279 const fn test_mm_move_ss() {
3280 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3281 let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3282
3283 let r = _mm_move_ss(a, b);
3284 let e = _mm_setr_ps(5.0, 2.0, 3.0, 4.0);
3285 assert_eq_m128(e, r);
3286 }
3287
3288 #[simd_test(enable = "sse")]
3289 const fn test_mm_movemask_ps() {
3290 let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
3291 assert_eq!(r, 0b0101);
3292
3293 let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0));
3294 assert_eq!(r, 0b0111);
3295 }
3296
3297 #[simd_test(enable = "sse")]
3298 #[cfg_attr(miri, ignore)]
3300 fn test_mm_sfence() {
3301 _mm_sfence();
3302 }
3303
3304 #[simd_test(enable = "sse")]
3305 const fn test_MM_TRANSPOSE4_PS() {
3306 let mut a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3307 let mut b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3308 let mut c = _mm_setr_ps(9.0, 10.0, 11.0, 12.0);
3309 let mut d = _mm_setr_ps(13.0, 14.0, 15.0, 16.0);
3310
3311 _MM_TRANSPOSE4_PS(&mut a, &mut b, &mut c, &mut d);
3312
3313 assert_eq_m128(a, _mm_setr_ps(1.0, 5.0, 9.0, 13.0));
3314 assert_eq_m128(b, _mm_setr_ps(2.0, 6.0, 10.0, 14.0));
3315 assert_eq_m128(c, _mm_setr_ps(3.0, 7.0, 11.0, 15.0));
3316 assert_eq_m128(d, _mm_setr_ps(4.0, 8.0, 12.0, 16.0));
3317 }
3318
3319 #[repr(align(16))]
3320 struct Memory {
3321 pub data: [f32; 4],
3322 }
3323
3324 #[simd_test(enable = "sse")]
3325 #[cfg_attr(miri, ignore)]
3328 fn test_mm_stream_ps() {
3329 let a = _mm_set1_ps(7.0);
3330 let mut mem = Memory { data: [-1.0; 4] };
3331
3332 unsafe {
3333 _mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
3334 }
3335 _mm_sfence();
3336 for i in 0..4 {
3337 assert_eq!(mem.data[i], get_m128(a, i));
3338 }
3339 }
3340}