Skip to content

Commit fbad8d4

Browse files
authored
Merge pull request #86 from Freaky/sse2-intrinsics
Avoid use of SSE4.1 intrinsic for SSE2
2 parents 3281c83 + 1e34249 commit fbad8d4

1 file changed

Lines changed: 5 additions & 3 deletions

File tree

src/simd/x86_sse2.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ use std::arch::x86::{
33
__m128i,
44
_mm_and_si128,
55
_mm_cmpeq_epi8,
6-
_mm_extract_epi32,
6+
_mm_cvtsi128_si32,
77
_mm_loadu_si128,
88
_mm_sad_epu8,
99
_mm_set1_epi8,
1010
_mm_setzero_si128,
11+
_mm_shuffle_epi32,
1112
_mm_sub_epi8,
1213
_mm_xor_si128,
1314
};
@@ -17,11 +18,12 @@ use std::arch::x86_64::{
1718
__m128i,
1819
_mm_and_si128,
1920
_mm_cmpeq_epi8,
20-
_mm_extract_epi32,
21+
_mm_cvtsi128_si32,
2122
_mm_loadu_si128,
2223
_mm_sad_epu8,
2324
_mm_set1_epi8,
2425
_mm_setzero_si128,
26+
_mm_shuffle_epi32,
2527
_mm_sub_epi8,
2628
_mm_xor_si128,
2729
};
@@ -49,7 +51,7 @@ unsafe fn mm_from_offset(slice: &[u8], offset: usize) -> __m128i {
4951
#[target_feature(enable = "sse2")]
5052
unsafe fn sum(u8s: &__m128i) -> usize {
5153
let sums = _mm_sad_epu8(*u8s, _mm_setzero_si128());
52-
(_mm_extract_epi32(sums, 0) + _mm_extract_epi32(sums, 2)) as usize
54+
(_mm_cvtsi128_si32(sums) + _mm_cvtsi128_si32(_mm_shuffle_epi32(sums, 0xaa))) as usize
5355
}
5456

5557
#[target_feature(enable = "sse2")]

0 commit comments

Comments
 (0)