Skip to content

Commit c36db9f

Browse files
committed
remove packed_simd in favor of std::simd
Fixes 91 As mentioned in the `packed_simd` README, the crate is superseded by `#![feature(portable_simd)]`.
1 parent 934ea0e commit c36db9f

3 files changed

Lines changed: 18 additions & 16 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,11 @@ appveyor = { repository = "llogiq/bytecount" }
1818
bench = false
1919

2020
[features]
21-
generic-simd = ["packed_simd"]
21+
generic-simd = []
2222
runtime-dispatch-simd = []
2323
html_report = []
2424

2525
[dependencies]
26-
packed_simd = { version = "0.3.8", optional = true }
2726

2827
[dev-dependencies]
2928
quickcheck = "1.0"

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
//! [`naive_count_32`](fn.naive_count_32.html) method can be faster
3232
//! still on small strings.
3333
34+
#![cfg_attr(feature = "generic-simd", feature(portable_simd))]
35+
3436
#![deny(missing_docs)]
3537
#![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)]
3638

src/simd/generic.rs

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
extern crate packed_simd;
1+
22

33
#[cfg(not(feature = "runtime-dispatch-simd"))]
4-
use core::mem;
4+
use core::{mem, simd};
5+
56
#[cfg(feature = "runtime-dispatch-simd")]
6-
use std::mem;
7+
use std::{mem, simd};
78

8-
use self::packed_simd::{u8x32, u8x64, FromCast};
9+
use simd::{u8x32, u8x64, cmp::SimdPartialEq, num::SimdInt};
910

1011
const MASK: [u8; 64] = [
1112
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -14,20 +15,20 @@ const MASK: [u8; 64] = [
1415
];
1516

1617
unsafe fn u8x64_from_offset(slice: &[u8], offset: usize) -> u8x64 {
17-
u8x64::from_slice_unaligned_unchecked(slice.get_unchecked(offset..))
18+
u8x64::from_slice(slice.get_unchecked(offset..))
1819
}
1920
unsafe fn u8x32_from_offset(slice: &[u8], offset: usize) -> u8x32 {
20-
u8x32::from_slice_unaligned_unchecked(slice.get_unchecked(offset..))
21+
u8x32::from_slice(slice.get_unchecked(offset..))
2122
}
2223

2324
fn sum_x64(u8s: &u8x64) -> usize {
2425
let mut store = [0; mem::size_of::<u8x64>()];
25-
u8s.write_to_slice_unaligned(&mut store);
26+
u8s.copy_to_slice(&mut store);
2627
store.iter().map(|&e| e as usize).sum()
2728
}
2829
fn sum_x32(u8s: &u8x32) -> usize {
2930
let mut store = [0; mem::size_of::<u8x32>()];
30-
u8s.write_to_slice_unaligned(&mut store);
31+
u8s.copy_to_slice(&mut store);
3132
store.iter().map(|&e| e as usize).sum()
3233
}
3334

@@ -44,7 +45,7 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
4445
while haystack.len() >= offset + 64 * 255 {
4546
let mut counts = u8x64::splat(0);
4647
for _ in 0..255 {
47-
counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64));
48+
counts -= u8x64_from_offset(haystack, offset).simd_eq(needles_x64).to_int().cast();
4849
offset += 64;
4950
}
5051
count += sum_x64(&counts);
@@ -54,7 +55,7 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
5455
if haystack.len() >= offset + 64 * 128 {
5556
let mut counts = u8x64::splat(0);
5657
for _ in 0..128 {
57-
counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64));
58+
counts -= u8x64_from_offset(haystack, offset).simd_eq(needles_x64).to_int().cast();
5859
offset += 64;
5960
}
6061
count += sum_x64(&counts);
@@ -66,15 +67,15 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
6667
let mut counts = u8x32::splat(0);
6768
for i in 0..(haystack.len() - offset) / 32 {
6869
counts -=
69-
u8x32::from_cast(u8x32_from_offset(haystack, offset + i * 32).eq(needles_x32));
70+
u8x32_from_offset(haystack, offset + i * 32).simd_eq(needles_x32).to_int().cast();
7071
}
7172
count += sum_x32(&counts);
7273

7374
// Straggler; need to reset counts because prior loop can run 255 times
7475
counts = u8x32::splat(0);
7576
if haystack.len() % 32 != 0 {
7677
counts -=
77-
u8x32::from_cast(u8x32_from_offset(haystack, haystack.len() - 32).eq(needles_x32))
78+
u8x32_from_offset(haystack, haystack.len() - 32).simd_eq(needles_x32).to_int().cast()
7879
& u8x32_from_offset(&MASK, haystack.len() % 32);
7980
}
8081
count += sum_x32(&counts);
@@ -84,11 +85,11 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
8485
}
8586

8687
fn is_leading_utf8_byte_x64(u8s: u8x64) -> u8x64 {
87-
u8x64::from_cast((u8s & u8x64::splat(0b1100_0000)).ne(u8x64::splat(0b1000_0000)))
88+
(u8s & u8x64::splat(0b1100_0000)).simd_ne(u8x64::splat(0b1000_0000)).to_int().cast()
8889
}
8990

9091
fn is_leading_utf8_byte_x32(u8s: u8x32) -> u8x32 {
91-
u8x32::from_cast((u8s & u8x32::splat(0b1100_0000)).ne(u8x32::splat(0b1000_0000)))
92+
(u8s & u8x32::splat(0b1100_0000)).simd_ne(u8x32::splat(0b1000_0000)).to_int().cast()
9293
}
9394

9495
pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize {

0 commit comments

Comments
 (0)