1- extern crate packed_simd ;
1+
22
33#[ cfg( not( feature = "runtime-dispatch-simd" ) ) ]
4- use core:: mem;
4+ use core:: { mem, simd} ;
5+
56#[ cfg( feature = "runtime-dispatch-simd" ) ]
6- use std:: mem;
7+ use std:: { mem, simd } ;
78
8- use self :: packed_simd :: { u8x32, u8x64, FromCast } ;
9+ use simd :: { u8x32, u8x64, cmp :: SimdPartialEq , num :: SimdInt } ;
910
1011const MASK : [ u8 ; 64 ] = [
1112 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
@@ -14,20 +15,20 @@ const MASK: [u8; 64] = [
1415] ;
1516
1617unsafe fn u8x64_from_offset ( slice : & [ u8 ] , offset : usize ) -> u8x64 {
17- u8x64:: from_slice_unaligned_unchecked ( slice. get_unchecked ( offset..) )
18+ u8x64:: from_slice ( slice. get_unchecked ( offset..) )
1819}
1920unsafe fn u8x32_from_offset ( slice : & [ u8 ] , offset : usize ) -> u8x32 {
20- u8x32:: from_slice_unaligned_unchecked ( slice. get_unchecked ( offset..) )
21+ u8x32:: from_slice ( slice. get_unchecked ( offset..) )
2122}
2223
2324fn sum_x64 ( u8s : & u8x64 ) -> usize {
2425 let mut store = [ 0 ; mem:: size_of :: < u8x64 > ( ) ] ;
25- u8s. write_to_slice_unaligned ( & mut store) ;
26+ u8s. copy_to_slice ( & mut store) ;
2627 store. iter ( ) . map ( |& e| e as usize ) . sum ( )
2728}
2829fn sum_x32 ( u8s : & u8x32 ) -> usize {
2930 let mut store = [ 0 ; mem:: size_of :: < u8x32 > ( ) ] ;
30- u8s. write_to_slice_unaligned ( & mut store) ;
31+ u8s. copy_to_slice ( & mut store) ;
3132 store. iter ( ) . map ( |& e| e as usize ) . sum ( )
3233}
3334
@@ -44,7 +45,7 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
4445 while haystack. len ( ) >= offset + 64 * 255 {
4546 let mut counts = u8x64:: splat ( 0 ) ;
4647 for _ in 0 ..255 {
47- counts -= u8x64 :: from_cast ( u8x64_from_offset ( haystack, offset) . eq ( needles_x64) ) ;
48+ counts -= u8x64_from_offset ( haystack, offset) . simd_eq ( needles_x64) . to_int ( ) . cast ( ) ;
4849 offset += 64 ;
4950 }
5051 count += sum_x64 ( & counts) ;
@@ -54,7 +55,7 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
5455 if haystack. len ( ) >= offset + 64 * 128 {
5556 let mut counts = u8x64:: splat ( 0 ) ;
5657 for _ in 0 ..128 {
57- counts -= u8x64 :: from_cast ( u8x64_from_offset ( haystack, offset) . eq ( needles_x64) ) ;
58+ counts -= u8x64_from_offset ( haystack, offset) . simd_eq ( needles_x64) . to_int ( ) . cast ( ) ;
5859 offset += 64 ;
5960 }
6061 count += sum_x64 ( & counts) ;
@@ -66,15 +67,15 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
6667 let mut counts = u8x32:: splat ( 0 ) ;
6768 for i in 0 ..( haystack. len ( ) - offset) / 32 {
6869 counts -=
69- u8x32 :: from_cast ( u8x32_from_offset ( haystack, offset + i * 32 ) . eq ( needles_x32) ) ;
70+ u8x32_from_offset ( haystack, offset + i * 32 ) . simd_eq ( needles_x32) . to_int ( ) . cast ( ) ;
7071 }
7172 count += sum_x32 ( & counts) ;
7273
7374 // Straggler; need to reset counts because prior loop can run 255 times
7475 counts = u8x32:: splat ( 0 ) ;
7576 if haystack. len ( ) % 32 != 0 {
7677 counts -=
77- u8x32 :: from_cast ( u8x32_from_offset ( haystack, haystack. len ( ) - 32 ) . eq ( needles_x32) )
78+ u8x32_from_offset ( haystack, haystack. len ( ) - 32 ) . simd_eq ( needles_x32) . to_int ( ) . cast ( )
7879 & u8x32_from_offset ( & MASK , haystack. len ( ) % 32 ) ;
7980 }
8081 count += sum_x32 ( & counts) ;
@@ -84,11 +85,11 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
8485}
8586
8687fn is_leading_utf8_byte_x64 ( u8s : u8x64 ) -> u8x64 {
87- u8x64 :: from_cast ( ( u8s & u8x64:: splat ( 0b1100_0000 ) ) . ne ( u8x64:: splat ( 0b1000_0000 ) ) )
88+ ( u8s & u8x64:: splat ( 0b1100_0000 ) ) . simd_ne ( u8x64:: splat ( 0b1000_0000 ) ) . to_int ( ) . cast ( )
8889}
8990
9091fn is_leading_utf8_byte_x32 ( u8s : u8x32 ) -> u8x32 {
91- u8x32 :: from_cast ( ( u8s & u8x32:: splat ( 0b1100_0000 ) ) . ne ( u8x32:: splat ( 0b1000_0000 ) ) )
92+ ( u8s & u8x32:: splat ( 0b1100_0000 ) ) . simd_ne ( u8x32:: splat ( 0b1000_0000 ) ) . to_int ( ) . cast ( )
9293}
9394
9495pub fn chunk_num_chars ( utf8_chars : & [ u8 ] ) -> usize {
0 commit comments