Skip to content

Commit

Permalink
perf: EXC-1942: Add BitVec create/grow benchmarks (#4030)
Browse files Browse the repository at this point in the history
Creating 768GiB worth of pages (192M bits) takes 2.9ms:

```
test bitvec_from_elem_false/bits:256GiB/ops:1 ... bench:      144149 ns/iter (+/- 1961)
test bitvec_from_elem_false/bits:512GiB/ops:1 ... bench:     1336148 ns/iter (+/- 50658)
test bitvec_from_elem_false/bits:768GiB/ops:1 ... bench:     2917326 ns/iter (+/- 22253)
```

There is a significant performance improvement for 1TiB worth of pages
(256M bits or 32MiB):

```
test bitvec_from_elem_false/bits:1008GiB/ops:1 ... bench:     4069684 ns/iter (+/- 17428)
test bitvec_from_elem_false/bits:1TiB/ops:1 ... bench:          11312 ns/iter (+/- 53)
test bitvec_from_elem_false/bits:2TiB/ops:1 ... bench:          12974 ns/iter (+/- 209)
```

The standard vector behaves similarly, so it's likely the underlying
memory allocator optimizations:

```
test vec_from_elem_0/bits:768GiB/ops:1 ... bench:      2709736 ns/iter (+/- 27170)
test vec_from_elem_0/bits:1008GiB/ops:1 ... bench:     3843070 ns/iter (+/- 111132)
test vec_from_elem_0/bits:1TiB/ops:1 ... bench:          11379 ns/iter (+/- 564)
test vec_from_elem_0/bits:2TiB/ops:1 ... bench:          12687 ns/iter (+/- 110)
```

Growing 768GiB worth of pages (192M bits) takes 2.6ms:

```
test bitvec_grow_false/bits:256GiB/ops:1 ... bench:      186167 ns/iter (+/- 2079)
test bitvec_grow_false/bits:512GiB/ops:1 ... bench:     1169816 ns/iter (+/- 20458)
test bitvec_grow_false/bits:768GiB/ops:1 ... bench:     2617347 ns/iter (+/- 22551)
```

There is a significant performance degradation for 1TiB worth of pages
(256M bits or 32MiB). The root cause is likely the underlying memory
allocator behavior and the fact that `BitVec::grow` initializes all the
data.

```
test bitvec_grow_false/bits:1008GiB/ops:1 ... bench:     3568375 ns/iter (+/- 54736)
test bitvec_grow_false/bits:1TiB/ops:1 ... bench:       23986994 ns/iter (+/- 463857)
test bitvec_grow_false/bits:2TiB/ops:1 ... bench:       51976114 ns/iter (+/- 1330421)
```

The benchmark also confirms that allocating non-initialized memory
(`bitvec_with_capacity` or `bitvec_reserve`) always takes a constant
time:

```
test bitvec_with_capacity/bits:1TiB/ops:1 ... bench:       11705 ns/iter (+/- 246)
test bitvec_with_capacity/bits:2TiB/ops:1 ... bench:       12682 ns/iter (+/- 48)
test bitvec_reserve/bits:1TiB/ops:1 ... bench:       11379 ns/iter (+/- 84)
test bitvec_reserve/bits:2TiB/ops:1 ... bench:       13149 ns/iter (+/- 112)
```

The benchmark shows that the canister scheduling overhead should be a
function
of state size. Also, for large memory allocations, `BitVec` itself
should be optimized.
  • Loading branch information
berestovskyy authored Feb 21, 2025
1 parent 2bbbfd1 commit 98fa250
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 0 deletions.
12 changes: 12 additions & 0 deletions rs/memory_tracker/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,15 @@ rust_bench(
"@crate_index//:rayon",
] + DEPENDENCIES,
)

rust_bench(
name = "bit_vec",
testonly = True,
srcs = ["benches/bit_vec.rs"],
deps = [
# Keep sorted.
"@crate_index//:criterion",
"@crate_index//:memmap2",
"@crate_index//:rayon",
] + DEPENDENCIES,
)
4 changes: 4 additions & 0 deletions rs/memory_tracker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,7 @@ name = "traps"
[[bench]]
harness = false
name = "memory_ops"

[[bench]]
harness = false
name = "bit_vec"
108 changes: 108 additions & 0 deletions rs/memory_tracker/benches/bit_vec.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use std::hint::black_box;

use criterion::{criterion_group, criterion_main, Criterion, Throughput};

const PAGE_SIZE: usize = 4096;
const MIB: usize = 1024 * 1024;
const GIB: usize = 1024 * 1024 * 1024;
const TIB: usize = 1024 * 1024 * 1024 * 1024;
const OPS: u64 = 1;
const NUM_BITS: &[(&str, usize)] = &[
("64MiB", 64 * MIB / PAGE_SIZE),
("512MiB", 512 * MIB / PAGE_SIZE),
("1GiB", GIB / PAGE_SIZE),
("2GiB", 2 * GIB / PAGE_SIZE),
("4GiB", 4 * GIB / PAGE_SIZE),
("8GiB", 8 * GIB / PAGE_SIZE),
("64GiB", 64 * GIB / PAGE_SIZE),
("256GiB", 256 * GIB / PAGE_SIZE),
("512GiB", 512 * GIB / PAGE_SIZE),
("768GiB", 768 * GIB / PAGE_SIZE),
("1008GiB", 1008 * GIB / PAGE_SIZE),
("1TiB", TIB / PAGE_SIZE),
("2TiB", 2 * TIB / PAGE_SIZE),
];

fn bitvec_from_elem_false(num_bits: usize) {
for _ in 0..OPS {
let _bit_vec = black_box(bit_vec::BitVec::from_elem(black_box(num_bits), false));
}
}

fn bitvec_from_elem_true(num_bits: usize) {
for _ in 0..OPS {
let _bit_vec = black_box(bit_vec::BitVec::from_elem(black_box(num_bits), true));
}
}

fn bitvec_with_capacity(num_bits: usize) {
for _ in 0..OPS {
let mut bit_vec = black_box(bit_vec::BitVec::with_capacity(black_box(num_bits)));
bit_vec.push(true);
let _bit_vec = black_box(bit_vec);
}
}

fn vec_from_elem_0(num_bits: usize) {
let num_blocks = num_bits / u32::BITS as usize;
for _ in 0..OPS {
let _vec = black_box(std::vec::from_elem(0_u32, black_box(num_blocks)));
}
}

fn from_elem_bench(c: &mut Criterion) {
bench(c, "bitvec_from_elem_false", bitvec_from_elem_false);
bench(c, "bitvec_from_elem_true", bitvec_from_elem_true);
bench(c, "bitvec_with_capacity", bitvec_with_capacity);
bench(c, "vec_from_elem_0", vec_from_elem_0);
}

fn bitvec_grow_false(num_bits: usize) {
for _ in 0..OPS {
let mut bit_vec = bit_vec::BitVec::new();
bit_vec.grow(black_box(num_bits), false);
let _bit_vec = black_box(bit_vec);
}
}

fn bitvec_grow_true(num_bits: usize) {
for _ in 0..OPS {
let mut bit_vec = bit_vec::BitVec::new();
bit_vec.grow(black_box(num_bits), true);
let _bit_vec = black_box(bit_vec);
}
}

fn bitvec_reserve(num_bits: usize) {
for _ in 0..OPS {
let mut bit_vec = bit_vec::BitVec::new();
bit_vec.reserve(black_box(num_bits));
bit_vec.push(true);
let _bit_vec = black_box(bit_vec);
}
}

fn resize_bench(c: &mut Criterion) {
bench(c, "bitvec_grow_false", bitvec_grow_false);
bench(c, "bitvec_grow_true", bitvec_grow_true);
bench(c, "bitvec_reserve", bitvec_reserve);
}

fn bench(c: &mut Criterion, group_name: &str, routine: fn(usize)) {
let mut group = c.benchmark_group(group_name);

for (id, num_bits) in NUM_BITS {
group.throughput(Throughput::Elements(OPS));

group.bench_function(format!("bits:{id}/ops:{OPS}"), |b| {
b.iter(|| {
routine(*num_bits);
})
});
}

group.finish();
}

criterion_group!(benches, from_elem_bench, resize_bench);
criterion_main!(benches);

0 comments on commit 98fa250

Please sign in to comment.