Skip to content

Commit 2005354

Browse files
committed
add bench and speed things up
1 parent 7e50f35 commit 2005354

6 files changed

Lines changed: 137 additions & 105 deletions

File tree

crates/bpe/benchmarks/performance.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use bpe_benchmarks::*;
99
use criterion::{
1010
criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion, PlotConfiguration,
1111
};
12-
use rand::{thread_rng, Rng};
12+
use rand::{rng, Rng};
1313

1414
fn counting_benchmark(c: &mut Criterion) {
1515
for (name, bpe, _, _) in TOKENIZERS.iter() {
@@ -22,7 +22,7 @@ fn counting_benchmark(c: &mut Criterion) {
2222
group.throughput(criterion::Throughput::Bytes(bytes as u64));
2323
group.bench_with_input(BenchmarkId::new("interval", bytes), &bytes, |b, bytes| {
2424
b.iter_batched(
25-
|| thread_rng().gen_range(0..input.len() - bytes),
25+
|| rng().random_range(0..input.len() - bytes),
2626
|start| fast.count(start..start + bytes),
2727
criterion::BatchSize::SmallInput,
2828
)
@@ -32,7 +32,7 @@ fn counting_benchmark(c: &mut Criterion) {
3232
&bytes,
3333
|b, bytes| {
3434
b.iter_batched(
35-
|| thread_rng().gen_range(0..input.len() - bytes),
35+
|| rng().random_range(0..input.len() - bytes),
3636
|start| bpe.bpe.count(&input.as_bytes()[start..start + bytes]),
3737
criterion::BatchSize::SmallInput,
3838
)

crates/string-offsets/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,9 @@ wasm-bindgen = { version = "0.2", optional = true }
2222
[dev-dependencies]
2323
rand = "0.9"
2424
rand_chacha = "0.9"
25+
criterion = "0.5"
26+
27+
[[bench]]
28+
name = "performance"
29+
path = "benchmarks/performance.rs"
30+
harness = false
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[package]
2+
name = "string-offsets-benchmarks"
3+
edition = "2021"
4+
5+
[[bench]]
6+
name = "performance"
7+
path = "performance.rs"
8+
harness = false
9+
test = false
10+
11+
[dependencies]
12+
bpe = { path = "../../string-offsets" }
13+
criterion = "0.5"
14+
rand = "0.9"
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
2+
use rand::{rng, Rng};
3+
use string_offsets::StringOffsets;
4+
5+
fn construction_benchmark(c: &mut Criterion) {
6+
// Generate random input string
7+
let mut group = c.benchmark_group("construction");
8+
for size in [1000, 10000, 100000] {
9+
let mut rng = rng();
10+
let random_input: String = (0..size).map(|_| rng.random::<u8>() as char).collect();
11+
12+
// Create benchmark group for throughput measurement
13+
14+
// Set throughput based on input size in bytes
15+
group.throughput(criterion::Throughput::Bytes(random_input.len() as u64));
16+
17+
// Run the benchmark
18+
group.bench_with_input(
19+
BenchmarkId::from_parameter(size),
20+
&random_input,
21+
|b, input| b.iter(|| black_box(StringOffsets::new(input))),
22+
);
23+
}
24+
group.finish();
25+
}
26+
27+
criterion_group!(
28+
name = benches;
29+
config = Criterion::default();
30+
targets = construction_benchmark
31+
);
32+
criterion_main!(benches);

crates/string-offsets/src/bitrank.rs

Lines changed: 42 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,12 @@ impl Block {
4444
/// This panics if the bit was already set, because that indicates that the original positions
4545
/// list is invalid/had duplicates.
4646
fn set(&mut self, index: usize) {
47-
assert!(index < BITS_PER_BLOCK);
47+
debug_assert!(index < BITS_PER_BLOCK);
4848
let chunk_idx = index / BITS_PER_SUB_BLOCK;
4949
let bit_idx = index % BITS_PER_SUB_BLOCK;
5050
let mask = 1 << ((BITS_PER_SUB_BLOCK - 1) - bit_idx);
51-
assert_eq!(self.bits[chunk_idx] & mask, 0, "toggling bits off indicates that the original data was incorrect, most likely containing duplicate values.");
52-
self.bits[chunk_idx] ^= mask;
51+
debug_assert_eq!(self.bits[chunk_idx] & mask, 0, "toggling bits off indicates that the original data was incorrect, most likely containing duplicate values.");
52+
self.bits[chunk_idx] |= mask;
5353
}
5454

5555
/// The **total rank** of the block relative local index, and the index of the one
@@ -107,62 +107,37 @@ pub struct BitRankBuilder {
107107
}
108108

109109
impl BitRankBuilder {
110-
/// Returns a new builder.
111-
#[cfg(test)]
112-
pub fn new() -> Self {
113-
Self::default()
114-
}
115-
116110
/// Returns a builder that can hold integers with values `0..cap`.
117111
pub fn with_capacity(cap: usize) -> Self {
112+
const ZERO_BLOCK: Block = Block {
113+
rank: 0,
114+
sub_blocks: [0; SUB_BLOCKS_PER_BLOCK],
115+
bits: [0; SUB_BLOCKS_PER_BLOCK],
116+
};
118117
Self {
119-
blocks: Vec::with_capacity(cap.div_ceil(BITS_PER_BLOCK)),
120-
}
121-
}
122-
123-
fn finish_last_block(&mut self) -> u64 {
124-
if let Some(block) = self.blocks.last_mut() {
125-
let mut local_rank = 0;
126-
for (i, chunk) in block.bits.iter().enumerate() {
127-
block.sub_blocks[i] = local_rank;
128-
local_rank += chunk.count_ones() as u16;
129-
}
130-
block.rank + local_rank as u64
131-
} else {
132-
0
118+
blocks: vec![ZERO_BLOCK; cap.div_ceil(BITS_PER_BLOCK)],
133119
}
134120
}
135121

136122
/// Adds a bit. Bits must be added in order of increasing `position`.
137123
pub fn push(&mut self, position: usize) {
138124
let block_id = position / BITS_PER_BLOCK;
139-
assert!(
140-
self.blocks.len() <= block_id + 1,
141-
"positions must be increasing!"
142-
);
143-
if block_id >= self.blocks.len() {
144-
let curr_rank = self.finish_last_block();
145-
while block_id >= self.blocks.len() {
146-
// Without this declared as a `const`, rustc 1.82 creates the Block value on the
147-
// stack first, then `memcpy`s it into `self.blocks`.
148-
const ZERO_BLOCK: Block = Block {
149-
rank: 0,
150-
sub_blocks: [0; SUB_BLOCKS_PER_BLOCK],
151-
bits: [0; SUB_BLOCKS_PER_BLOCK],
152-
};
153-
self.blocks.push(ZERO_BLOCK);
154-
self.blocks.last_mut().expect("just inserted").rank = curr_rank;
155-
}
156-
}
157-
self.blocks
158-
.last_mut()
159-
.expect("just ensured there are enough blocks")
160-
.set(position % BITS_PER_BLOCK);
125+
self.blocks[block_id].set(position % BITS_PER_BLOCK);
161126
}
162127

163128
/// Finishes the `BitRank` by writing the last block of data.
164129
pub fn finish(mut self) -> BitRank {
165-
self.finish_last_block();
130+
let mut total_rank = 0;
131+
for block in &mut self.blocks {
132+
block.rank = total_rank;
133+
let mut local_rank = 0;
134+
for (i, chunk) in block.bits.iter().enumerate() {
135+
block.sub_blocks[i] = local_rank;
136+
local_rank += chunk.count_ones() as u16;
137+
}
138+
total_rank += local_rank as u64
139+
}
140+
//self.finish_last_block();
166141
BitRank {
167142
blocks: self.blocks,
168143
}
@@ -222,8 +197,8 @@ mod tests {
222197

223198
/// Creates a `BitRank` containing the integers in `iter` (which should be strictly
224199
/// increasing).
225-
pub fn bitrank<I: IntoIterator<Item = usize>>(iter: I) -> BitRank {
226-
let mut builder = BitRankBuilder::new();
200+
pub fn bitrank<I: IntoIterator<Item = usize>>(capacity: usize, iter: I) -> BitRank {
201+
let mut builder = BitRankBuilder::with_capacity(capacity);
227202
for position in iter {
228203
builder.push(position);
229204
}
@@ -232,32 +207,32 @@ mod tests {
232207

233208
#[test]
234209
fn test_rank_zero() {
235-
let br = bitrank([0]);
210+
let br = bitrank(1, [0]);
236211
assert_eq!(br.rank(0), 0);
237212
assert_eq!(br.rank(1), 1);
238213
}
239214

240215
#[test]
241216
fn test_empty() {
242-
let br = bitrank([]);
217+
let br = bitrank(0, []);
243218
assert!(br.blocks.is_empty());
244219
}
245220

246221
#[test]
247222
fn test_index_out_of_bounds() {
248-
let br = bitrank([BITS_PER_BLOCK - 1]);
223+
let br = bitrank(BITS_PER_BLOCK, [BITS_PER_BLOCK - 1]);
249224
assert_eq!(br.rank(BITS_PER_BLOCK), 1);
250225
}
251226

252227
#[test]
253228
#[should_panic]
254229
fn test_duplicate_position() {
255-
bitrank([64, 66, 68, 68, 90]);
230+
bitrank(91, [64, 66, 68, 68, 90]);
256231
}
257232

258233
#[test]
259234
fn test_rank_exclusive() {
260-
let br = bitrank(0..132);
235+
let br = bitrank(133, 0..132);
261236
assert_eq!(br.blocks.len(), 1);
262237
assert_eq!(br.rank(64), 64);
263238
assert_eq!(br.rank(132), 132);
@@ -267,37 +242,37 @@ mod tests {
267242
fn test_rank() {
268243
let mut positions: Vec<usize> = (0..132).collect();
269244
positions.append(&mut vec![138usize, 140, 146]);
270-
let br = bitrank(positions);
245+
let br = bitrank(146, positions);
271246
assert_eq!(br.rank(135), 132);
272247

273-
let br2 = bitrank(0..BITS_PER_BLOCK - 5);
248+
let br2 = bitrank(BITS_PER_BLOCK, 0..BITS_PER_BLOCK - 5);
274249
assert_eq!(br2.rank(169), 169);
275250

276-
let br3 = bitrank(0..BITS_PER_BLOCK + 5);
251+
let br3 = bitrank(BITS_PER_BLOCK + 6, 0..BITS_PER_BLOCK + 5);
277252
assert_eq!(br3.rank(BITS_PER_BLOCK), BITS_PER_BLOCK);
278253
}
279254

280255
#[test]
281256
fn test_rank_idx() {
282257
let mut positions: Vec<usize> = (0..132).collect();
283258
positions.append(&mut vec![138usize, 140, 146]);
284-
let br = bitrank(positions);
259+
let br = bitrank(147, positions);
285260
assert_eq!(br.rank_select(135), (132, Some(131)));
286261

287262
let bits2: Vec<usize> = (0..BITS_PER_BLOCK - 5).collect();
288-
let br2 = bitrank(bits2);
263+
let br2 = bitrank(BITS_PER_BLOCK, bits2);
289264
assert_eq!(br2.rank_select(169), (169, Some(168)));
290265

291266
let bits3: Vec<usize> = (0..BITS_PER_BLOCK + 5).collect();
292-
let br3 = bitrank(bits3);
267+
let br3 = bitrank(BITS_PER_BLOCK + 6, bits3);
293268
assert_eq!(br3.rank_select(BITS_PER_BLOCK), (BITS_PER_BLOCK, None));
294269

295270
let bits4: Vec<usize> = vec![1, 1000, 9999, BITS_PER_BLOCK + 1];
296-
let br4 = bitrank(bits4);
271+
let br4 = bitrank(BITS_PER_BLOCK + 1, bits4);
297272
assert_eq!(br4.rank_select(10000), (3, Some(9999)));
298273

299274
let bits5: Vec<usize> = vec![1, 1000, 9999, BITS_PER_BLOCK + 1];
300-
let br5 = bitrank(bits5);
275+
let br5 = bitrank(BITS_PER_BLOCK + 1, bits5);
301276
assert_eq!(br5.rank_select(BITS_PER_BLOCK), (3, None));
302277
}
303278

@@ -313,7 +288,7 @@ mod tests {
313288
// This isn't strictly necessary, given that the bit would just be toggled again, but it
314289
// ensures that we are meeting the contract.
315290
random_bits.dedup();
316-
let br = bitrank(random_bits.iter().copied());
291+
let br = bitrank(1_000_000, random_bits.iter().copied());
317292
let mut rank = 0;
318293
let mut select = None;
319294
for i in 0..random_bits.capacity() {
@@ -333,7 +308,7 @@ mod tests {
333308
#[test]
334309
fn test_rank_out_of_bounds() {
335310
for i in 1..30 {
336-
let br = bitrank([BITS_PER_BLOCK * i - 1]);
311+
let br = bitrank(BITS_PER_BLOCK * i, [BITS_PER_BLOCK * i - 1]);
337312
assert_eq!(br.max_rank(), 1);
338313
assert_eq!(br.rank(BITS_PER_BLOCK * i - 1), 0);
339314
for j in 0..10 {
@@ -344,7 +319,10 @@ mod tests {
344319

345320
#[test]
346321
fn test_large_gap() {
347-
let br = bitrank((3..4).chain(BITS_PER_BLOCK * 15..BITS_PER_BLOCK * 15 + 17));
322+
let br = bitrank(
323+
BITS_PER_BLOCK * 16,
324+
(3..4).chain(BITS_PER_BLOCK * 15..BITS_PER_BLOCK * 15 + 17),
325+
);
348326
for i in 1..15 {
349327
assert_eq!(br.rank(BITS_PER_BLOCK * i), 1);
350328
}

0 commit comments

Comments
 (0)