Skip to content

Commit 19edf30

Browse files
committed
version 0.6.6: fix for wasm32
1 parent 2d41959 commit 19edf30

3 files changed

Lines changed: 42 additions & 20 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ authors = ["Andre Bogus <bogusandre@gmail.de>", "Joshua Landau <joshua@landau.ws
33
description = "count occurrences of a given byte, or the number of UTF-8 code points, in a byte slice, fast"
44
edition = "2018"
55
name = "bytecount"
6-
version = "0.6.5"
6+
version = "0.6.6"
77
license = "Apache-2.0/MIT"
88
repository = "https://github.com/llogiq/bytecount"
99
categories = ["algorithms", "no-std"]

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ The [newlinebench](https://github.com/llogiq/newlinebench) repository has furthe
1212

1313
To use bytecount in your crate, if you have [cargo-edit](https://github.com/killercup/cargo-edit), just type
1414
`cargo add bytecount` in a terminal with the crate root as the current path. Otherwise you can manually edit your
15-
`Cargo.toml` to add `bytecount = 0.6.5` to your `[dependencies]` section.
15+
`Cargo.toml` to add `bytecount = 0.6.6` to your `[dependencies]` section.
1616

1717
In your crate root (`lib.rs` or `main.rs`, depending on if you are writing a
1818
library or application), add `extern crate bytecount;`. Now you can simply use

src/simd/wasm.rs

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,21 @@ unsafe fn sum(u8s: v128) -> usize {
5151
unsafe fn sum4(u1: v128, u2: v128, u3: v128, u4: v128) -> usize {
5252
// sum < (2^2 * 2^3 * 2^8 = 2^13) < 2^16, therefore no overflow here
5353
let u16s = u16x8_add(
54-
u16x8_add(u16x8_extadd_pairwise_u8x16(u1), u16x8_extadd_pairwise_u8x16(u2)),
55-
u16x8_add(u16x8_extadd_pairwise_u8x16(u3), u16x8_extadd_pairwise_u8x16(u4)),
54+
u16x8_add(
55+
u16x8_extadd_pairwise_u8x16(u1),
56+
u16x8_extadd_pairwise_u8x16(u2),
57+
),
58+
u16x8_add(
59+
u16x8_extadd_pairwise_u8x16(u3),
60+
u16x8_extadd_pairwise_u8x16(u4),
61+
),
5662
);
5763
let u32s = u32x4_extadd_pairwise_u16x8(u16s);
5864
let (u1, u2, u3, u4) = (
65+
u32x4_extract_lane::<0>(u32s),
5966
u32x4_extract_lane::<1>(u32s),
6067
u32x4_extract_lane::<2>(u32s),
6168
u32x4_extract_lane::<3>(u32s),
62-
u32x4_extract_lane::<4>(u32s),
6369
);
6470
((u1 + u2) + (u3 + u4)) as usize
6571
}
@@ -69,10 +75,14 @@ pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize {
6975
let needles = u8x16_splat(needle);
7076
let mut count = 0;
7177
let mut offset = 0;
72-
78+
7379
while haystack.len() >= offset + 16 * 255 {
74-
let (mut count1, mut count2, mut count3, mut count4) =
75-
(u8x16_splat(0), u8x16_splat(0), u8x16_splat(0), u8x16_splat(0));
80+
let (mut count1, mut count2, mut count3, mut count4) = (
81+
u8x16_splat(0),
82+
u8x16_splat(0),
83+
u8x16_splat(0),
84+
u8x16_splat(0),
85+
);
7686
for _ in 0..255 {
7787
let (h1, h2, h3, h4) = u8x16x4_from_offset(haystack, offset);
7888
count1 = u8x16_sub(count1, u8x16_eq(h1, needles));
@@ -83,10 +93,14 @@ pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize {
8393
}
8494
count += sum4(count1, count2, count3, count4);
8595
}
86-
96+
8797
// 64
88-
let (mut count1, mut count2, mut count3, mut count4) =
89-
(u8x16_splat(0), u8x16_splat(0), u8x16_splat(0), u8x16_splat(0));
98+
let (mut count1, mut count2, mut count3, mut count4) = (
99+
u8x16_splat(0),
100+
u8x16_splat(0),
101+
u8x16_splat(0),
102+
u8x16_splat(0),
103+
);
90104
for _ in 0..(haystack.len() - offset) / 64 {
91105
let (h1, h2, h3, h4) = u8x16x4_from_offset(haystack, offset);
92106
count1 = u8x16_sub(count1, u8x16_eq(h1, needles));
@@ -114,7 +128,7 @@ pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize {
114128
),
115129
);
116130
}
117-
count + sum(counts)
131+
count + sum(counts)
118132
}
119133

120134
#[target_feature(enable = "simd128")]
@@ -134,23 +148,31 @@ pub unsafe fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
134148

135149
// 4080
136150
while utf8_chars.len() >= offset + 64 * 255 {
137-
let (mut count1, mut count2, mut count3, mut count4) =
138-
(u8x16_splat(0), u8x16_splat(0), u8x16_splat(0), u8x16_splat(0));
151+
let (mut count1, mut count2, mut count3, mut count4) = (
152+
u8x16_splat(0),
153+
u8x16_splat(0),
154+
u8x16_splat(0),
155+
u8x16_splat(0),
156+
);
139157

140158
for _ in 0..255 {
141159
let (h1, h2, h3, h4) = u8x16x4_from_offset(utf8_chars, offset);
142-
count1 = u8x16_sub(count1,is_leading_utf8_byte(h1));
143-
count2 = u8x16_sub(count2,is_leading_utf8_byte(h2));
144-
count3 = u8x16_sub(count3,is_leading_utf8_byte(h3));
145-
count4 = u8x16_sub(count4,is_leading_utf8_byte(h4));
160+
count1 = u8x16_sub(count1, is_leading_utf8_byte(h1));
161+
count2 = u8x16_sub(count2, is_leading_utf8_byte(h2));
162+
count3 = u8x16_sub(count3, is_leading_utf8_byte(h3));
163+
count4 = u8x16_sub(count4, is_leading_utf8_byte(h4));
146164
offset += 64;
147165
}
148166
count += sum4(count1, count2, count3, count4);
149167
}
150168

151169
// 4080
152-
let (mut count1, mut count2, mut count3, mut count4) =
153-
(u8x16_splat(0), u8x16_splat(0), u8x16_splat(0), u8x16_splat(0));
170+
let (mut count1, mut count2, mut count3, mut count4) = (
171+
u8x16_splat(0),
172+
u8x16_splat(0),
173+
u8x16_splat(0),
174+
u8x16_splat(0),
175+
);
154176
for _ in 0..(utf8_chars.len() - offset) / 64 {
155177
let (h1, h2, h3, h4) = u8x16x4_from_offset(utf8_chars, offset);
156178
count1 = u8x16_sub(count1, is_leading_utf8_byte(h1));

0 commit comments

Comments
 (0)