Last active
January 23, 2021 16:33
-
-
Save oxalica/3360eec9376f22533fcecff02798b698 to your computer and use it in GitHub Desktop.
Vec::retain: swap+truncate vs. drop+move
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
retain_std u64 almost_kept | |
time: [123.44 us 127.97 us 134.19 us] | |
Found 14 outliers among 100 measurements (14.00%) | |
2 (2.00%) low mild | |
5 (5.00%) high mild | |
7 (7.00%) high severe | |
retain_early_drop u64 almost_kept | |
time: [98.345 us 100.49 us 103.22 us] | |
Found 15 outliers among 100 measurements (15.00%) | |
1 (1.00%) low severe | |
1 (1.00%) low mild | |
3 (3.00%) high mild | |
10 (10.00%) high severe | |
retain_std_black u64 almost_kept | |
time: [215.45 us 221.39 us 229.35 us] | |
Found 16 outliers among 100 measurements (16.00%) | |
7 (7.00%) high mild | |
9 (9.00%) high severe | |
retain_early_drop_black u64 almost_kept | |
time: [211.25 us 215.49 us 221.38 us] | |
Found 9 outliers among 100 measurements (9.00%) | |
3 (3.00%) high mild | |
6 (6.00%) high severe | |
retain_std u64 almost_drop | |
time: [109.32 us 113.41 us 118.71 us] | |
Found 13 outliers among 100 measurements (13.00%) | |
2 (2.00%) low severe | |
3 (3.00%) high mild | |
8 (8.00%) high severe | |
retain_early_drop u64 almost_drop | |
time: [54.413 us 56.216 us 58.747 us] | |
Found 11 outliers among 100 measurements (11.00%) | |
3 (3.00%) high mild | |
8 (8.00%) high severe | |
retain_std_black u64 almost_drop | |
time: [201.42 us 204.76 us 210.15 us] | |
Found 8 outliers among 100 measurements (8.00%) | |
3 (3.00%) low mild | |
1 (1.00%) high mild | |
4 (4.00%) high severe | |
retain_early_drop_black u64 almost_drop | |
time: [208.82 us 212.38 us 216.96 us] | |
Found 9 outliers among 100 measurements (9.00%) | |
1 (1.00%) high mild | |
8 (8.00%) high severe | |
retain_std u64 drop_head | |
time: [120.17 us 125.81 us 133.32 us] | |
Found 16 outliers among 100 measurements (16.00%) | |
6 (6.00%) high mild | |
10 (10.00%) high severe | |
retain_early_drop u64 drop_head | |
time: [100.77 us 104.96 us 110.49 us] | |
Found 22 outliers among 100 measurements (22.00%) | |
3 (3.00%) low severe | |
2 (2.00%) low mild | |
2 (2.00%) high mild | |
15 (15.00%) high severe | |
retain_std_black u64 drop_head | |
time: [217.34 us 223.07 us 229.90 us] | |
Found 9 outliers among 100 measurements (9.00%) | |
4 (4.00%) high mild | |
5 (5.00%) high severe | |
retain_early_drop_black u64 drop_head | |
time: [229.91 us 235.38 us 242.05 us] | |
Found 6 outliers among 100 measurements (6.00%) | |
1 (1.00%) high mild | |
5 (5.00%) high severe | |
retain_std u64 drop_tail | |
time: [108.65 us 111.25 us 115.37 us] | |
Found 16 outliers among 100 measurements (16.00%) | |
3 (3.00%) high mild | |
13 (13.00%) high severe | |
retain_early_drop u64 drop_tail | |
time: [60.681 us 61.226 us 62.334 us] | |
Found 11 outliers among 100 measurements (11.00%) | |
2 (2.00%) high mild | |
9 (9.00%) high severe | |
retain_std_black u64 drop_tail | |
time: [188.84 us 191.98 us 196.50 us] | |
Found 16 outliers among 100 measurements (16.00%) | |
5 (5.00%) high mild | |
11 (11.00%) high severe | |
retain_early_drop_black u64 drop_tail | |
time: [191.12 us 193.76 us 197.41 us] | |
Found 10 outliers among 100 measurements (10.00%) | |
3 (3.00%) high mild | |
7 (7.00%) high severe | |
retain_std box_u64 almost_kept | |
time: [522.42 us 536.95 us 556.43 us] | |
Found 11 outliers among 100 measurements (11.00%) | |
3 (3.00%) high mild | |
8 (8.00%) high severe | |
retain_early_drop box_u64 almost_kept | |
time: [336.46 us 345.96 us 358.06 us] | |
Found 9 outliers among 100 measurements (9.00%) | |
4 (4.00%) high mild | |
5 (5.00%) high severe | |
retain_std_black box_u64 almost_kept | |
time: [551.75 us 567.97 us 589.73 us] | |
Found 11 outliers among 100 measurements (11.00%) | |
5 (5.00%) high mild | |
6 (6.00%) high severe | |
retain_early_drop_black box_u64 almost_kept | |
time: [350.19 us 355.36 us 361.61 us] | |
Found 8 outliers among 100 measurements (8.00%) | |
3 (3.00%) high mild | |
5 (5.00%) high severe | |
retain_std box_u64 almost_drop | |
time: [1.3071 ms 1.3337 ms 1.3697 ms] | |
Found 9 outliers among 100 measurements (9.00%) | |
2 (2.00%) high mild | |
7 (7.00%) high severe | |
retain_early_drop box_u64 almost_drop | |
time: [1.0219 ms 1.0687 ms 1.1472 ms] | |
Found 10 outliers among 100 measurements (10.00%) | |
3 (3.00%) high mild | |
7 (7.00%) high severe | |
retain_std_black box_u64 almost_drop | |
time: [1.3579 ms 1.3644 ms 1.3754 ms] | |
Found 11 outliers among 100 measurements (11.00%) | |
1 (1.00%) low mild | |
3 (3.00%) high mild | |
7 (7.00%) high severe | |
retain_early_drop_black box_u64 almost_drop | |
time: [1.1125 ms 1.1491 ms 1.1969 ms] | |
Found 5 outliers among 100 measurements (5.00%) | |
1 (1.00%) high mild | |
4 (4.00%) high severe | |
retain_std box_u64 drop_head | |
time: [375.30 us 378.73 us 383.46 us] | |
Found 7 outliers among 100 measurements (7.00%) | |
1 (1.00%) low mild | |
2 (2.00%) high mild | |
4 (4.00%) high severe | |
retain_early_drop box_u64 drop_head | |
time: [338.84 us 346.42 us 357.24 us] | |
Found 6 outliers among 100 measurements (6.00%) | |
2 (2.00%) high mild | |
4 (4.00%) high severe | |
retain_std_black box_u64 drop_head | |
time: [416.23 us 429.75 us 449.05 us] | |
Found 11 outliers among 100 measurements (11.00%) | |
3 (3.00%) high mild | |
8 (8.00%) high severe | |
retain_early_drop_black box_u64 drop_head | |
time: [374.24 us 387.10 us 403.51 us] | |
Found 6 outliers among 100 measurements (6.00%) | |
1 (1.00%) high mild | |
5 (5.00%) high severe | |
retain_std box_u64 drop_tail | |
time: [318.67 us 322.47 us 328.94 us] | |
Found 7 outliers among 100 measurements (7.00%) | |
3 (3.00%) low mild | |
1 (1.00%) high mild | |
3 (3.00%) high severe | |
retain_early_drop box_u64 drop_tail | |
time: [299.08 us 305.63 us 315.34 us] | |
Found 9 outliers among 100 measurements (9.00%) | |
3 (3.00%) high mild | |
6 (6.00%) high severe | |
retain_std_black box_u64 drop_tail | |
time: [350.63 us 360.51 us 375.18 us] | |
Found 5 outliers among 100 measurements (5.00%) | |
5 (5.00%) high severe | |
retain_early_drop_black box_u64 drop_tail | |
time: [331.23 us 335.57 us 342.68 us] | |
Found 9 outliers among 100 measurements (9.00%) | |
2 (2.00%) high mild | |
7 (7.00%) high severe | |
retain_std 64byte almost_kept | |
time: [784.09 us 792.22 us 802.28 us] | |
Found 14 outliers among 100 measurements (14.00%) | |
3 (3.00%) high mild | |
11 (11.00%) high severe | |
retain_early_drop 64byte almost_kept | |
time: [668.95 us 677.15 us 686.92 us] | |
Found 13 outliers among 100 measurements (13.00%) | |
2 (2.00%) high mild | |
11 (11.00%) high severe | |
retain_std_black 64byte almost_kept | |
time: [784.50 us 793.33 us 804.71 us] | |
Found 11 outliers among 100 measurements (11.00%) | |
2 (2.00%) high mild | |
9 (9.00%) high severe | |
retain_early_drop_black 64byte almost_kept | |
time: [695.69 us 703.90 us 714.98 us] | |
Found 13 outliers among 100 measurements (13.00%) | |
5 (5.00%) high mild | |
8 (8.00%) high severe | |
retain_std 64byte almost_drop | |
time: [457.75 us 465.60 us 475.24 us] | |
Found 11 outliers among 100 measurements (11.00%) | |
11 (11.00%) high severe | |
retain_early_drop 64byte almost_drop | |
time: [337.76 us 341.31 us 346.20 us] | |
Found 10 outliers among 100 measurements (10.00%) | |
3 (3.00%) high mild | |
7 (7.00%) high severe | |
retain_std_black 64byte almost_drop | |
time: [493.88 us 501.65 us 511.77 us] | |
Found 8 outliers among 100 measurements (8.00%) | |
1 (1.00%) high mild | |
7 (7.00%) high severe | |
retain_early_drop_black 64byte almost_drop | |
time: [455.96 us 472.64 us 490.19 us] | |
Found 2 outliers among 100 measurements (2.00%) | |
2 (2.00%) high mild | |
retain_std 64byte drop_head | |
time: [831.50 us 850.35 us 871.02 us] | |
Found 5 outliers among 100 measurements (5.00%) | |
3 (3.00%) high mild | |
2 (2.00%) high severe | |
retain_early_drop 64byte drop_head | |
time: [689.15 us 696.21 us 704.81 us] | |
Found 15 outliers among 100 measurements (15.00%) | |
2 (2.00%) high mild | |
13 (13.00%) high severe | |
retain_std_black 64byte drop_head | |
time: [777.10 us 787.47 us 800.61 us] | |
Found 17 outliers among 100 measurements (17.00%) | |
4 (4.00%) high mild | |
13 (13.00%) high severe | |
retain_early_drop_black 64byte drop_head | |
time: [708.77 us 722.93 us 742.72 us] | |
Found 15 outliers among 100 measurements (15.00%) | |
6 (6.00%) high mild | |
9 (9.00%) high severe | |
retain_std 64byte drop_tail | |
time: [266.76 us 273.20 us 281.11 us] | |
Found 13 outliers among 100 measurements (13.00%) | |
3 (3.00%) high mild | |
10 (10.00%) high severe | |
retain_early_drop 64byte drop_tail | |
time: [267.17 us 271.99 us 278.05 us] | |
Found 7 outliers among 100 measurements (7.00%) | |
1 (1.00%) high mild | |
6 (6.00%) high severe | |
retain_std_black 64byte drop_tail | |
time: [313.70 us 319.00 us 326.10 us] | |
Found 10 outliers among 100 measurements (10.00%) | |
2 (2.00%) high mild | |
8 (8.00%) high severe | |
retain_early_drop_black 64byte drop_tail | |
time: [314.71 us 322.80 us 333.55 us] | |
Found 8 outliers among 100 measurements (8.00%) | |
3 (3.00%) high mild | |
5 (5.00%) high severe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion}; | |
use testt::retain; | |
#[rustfmt::skip] | |
fn bench_retain(c: &mut Criterion) { | |
const N: usize = 100_000; | |
fn black_fn<T>(f: fn(&T) -> bool) -> fn(&T) -> bool { | |
black_box(f) | |
} | |
macro_rules! bench { | |
($name:literal, $gen:expr, $f:expr) => { | |
c.bench_function(concat!("retain_std ", $name), move |b| { | |
b.iter_batched($gen, move |mut v| { | |
v.retain($f); | |
v | |
}, BatchSize::NumBatches(1)); | |
}); | |
c.bench_function(concat!("retain_early_drop ", $name), move |b| { | |
b.iter_batched($gen, move |mut v| { | |
retain(&mut v, $f); | |
v | |
}, BatchSize::NumBatches(1)); | |
}); | |
c.bench_function(concat!("retain_std_black ", $name), move |b| { | |
b.iter_batched($gen, move |mut v| { | |
v.retain(black_fn($f)); | |
v | |
}, BatchSize::NumBatches(1)); | |
}); | |
c.bench_function(concat!("retain_early_drop_black ", $name), move |b| { | |
b.iter_batched($gen, move |mut v| { | |
retain(&mut v, black_fn($f)); | |
v | |
}, BatchSize::NumBatches(1)); | |
}); | |
}; | |
} | |
let gen_u64_seq = || (0..N as u64).collect::<Vec<_>>(); | |
bench!("u64 almost_kept", gen_u64_seq, |x| *x & 7 != 0); | |
bench!("u64 almost_drop", gen_u64_seq, |x| *x & 7 == 0); | |
bench!("u64 drop_head", gen_u64_seq, |x| *x > 10000); | |
bench!("u64 drop_tail", gen_u64_seq, |x| *x < N as u64 - 10000); | |
let gen_boxed_u64 = || (0..N as u64).map(Box::new).collect::<Vec<_>>(); | |
bench!("box_u64 almost_kept", gen_boxed_u64, |x: &Box<u64>| **x & 7 != 0); | |
bench!("box_u64 almost_drop", gen_boxed_u64, |x: &Box<u64>| **x & 7 == 0); | |
bench!("box_u64 drop_head", gen_boxed_u64, |x: &Box<u64>| **x > 10000); | |
bench!("box_u64 drop_tail", gen_boxed_u64, |x: &Box<u64>| **x < N as u64 - 10000); | |
let gen_arr = || (0..N as u64).map(|x| [x; 8]).collect::<Vec<_>>(); | |
bench!("64byte almost_kept", gen_arr, |x: &[u64; 8]| x[0] & 7 != 0); | |
bench!("64byte almost_drop", gen_arr, |x: &[u64; 8]| x[0] & 7 == 0); | |
bench!("64byte drop_head", gen_arr, |x: &[u64; 8]| x[0] > 10000); | |
bench!("64byte drop_tail", gen_arr, |x: &[u64; 8]| x[0] < N as u64 - 10000); | |
} | |
criterion_group!(benches, bench_retain); | |
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::ptr; | |
pub fn retain<T, F>(self_: &mut Vec<T>, mut f: F) | |
where | |
F: FnMut(&T) -> bool, | |
{ | |
let original_len = self_.len(); | |
// Avoid double drop if the drop guard is not executed, | |
// since we may make some holes during the process. | |
unsafe { self_.set_len(0) }; | |
// Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked] | |
// |<- processed len ->| ^- next to check | |
// |<- deleted cnt ->| | |
// |<- original_len ->| | |
// Kept: Elements which predicate returns true on. | |
// Hole: Moved or dropped element slot. | |
// Unchecked: Unchecked valid elements. | |
// | |
// This drop guard will be invoked when predicate or `drop` of element panicked. | |
// It shifts unchecked elements to cover holes and `set_len` to the correct length. | |
// In cases when predicate and `drop` never panick, it will be optimized out. | |
struct BackshiftOnDrop<'a, T> { | |
v: &'a mut Vec<T>, | |
processed_len: usize, | |
deleted_cnt: usize, | |
original_len: usize, | |
} | |
impl<T> Drop for BackshiftOnDrop<'_, T> { | |
fn drop(&mut self) { | |
if self.deleted_cnt > 0 { | |
// SAFETY: Trailing unchecked items must be valid since we never touch them. | |
unsafe { | |
ptr::copy( | |
self.v.as_ptr().add(self.processed_len), | |
self.v | |
.as_mut_ptr() | |
.add(self.processed_len - self.deleted_cnt), | |
self.original_len - self.processed_len, | |
); | |
} | |
} | |
// SAFETY: After filling holes, all items are in contiguous memory. | |
unsafe { | |
self.v.set_len(self.original_len - self.deleted_cnt); | |
} | |
} | |
} | |
let mut g = BackshiftOnDrop { | |
v: self_, | |
processed_len: 0, | |
deleted_cnt: 0, | |
original_len, | |
}; | |
while g.processed_len < original_len { | |
// SAFETY: Unchecked element must be valid. | |
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) }; | |
if !f(cur) { | |
// Advance early to avoid double drop if `drop_in_place` panicked. | |
g.processed_len += 1; | |
g.deleted_cnt += 1; | |
// SAFETY: We never touch this element again after dropped. | |
unsafe { ptr::drop_in_place(cur) }; | |
// We already advanced the counter. | |
continue; | |
} | |
if g.deleted_cnt > 0 { | |
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element. | |
// We use copy for move, and never touch this element again. | |
unsafe { | |
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt); | |
ptr::copy_nonoverlapping(cur, hole_slot, 1); | |
} | |
} | |
g.processed_len += 1; | |
} | |
// All item are processed. This can be optimized to `set_len` by LLVM. | |
drop(g); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment