Skip to content

Commit ec47f3e

Browse files
Auto merge of #147508 - nnethercote:TaskDeps-improvements, r=<try>
`TaskDeps` improvements
2 parents 61efd19 + 5d00076 commit ec47f3e

File tree

2 files changed

+40
-27
lines changed

2 files changed

+40
-27
lines changed

compiler/rustc_query_system/src/dep_graph/edges.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::dep_graph::DepNodeIndex;
88
#[derive(Default, Debug)]
99
pub(crate) struct EdgesVec {
1010
max: u32,
11-
edges: SmallVec<[DepNodeIndex; EdgesVec::INLINE_CAPACITY]>,
11+
edges: SmallVec<[DepNodeIndex; 8]>,
1212
}
1313

1414
impl Hash for EdgesVec {
@@ -19,8 +19,6 @@ impl Hash for EdgesVec {
1919
}
2020

2121
impl EdgesVec {
22-
pub(crate) const INLINE_CAPACITY: usize = 8;
23-
2422
#[inline]
2523
pub(crate) fn new() -> Self {
2624
Self::default()

compiler/rustc_query_system/src/dep_graph/graph.rs

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -339,13 +339,11 @@ impl<D: Deps> DepGraphData<D> {
339339
let (result, edges) = if cx.dep_context().is_eval_always(key.kind) {
340340
(with_deps(TaskDepsRef::EvalAlways), EdgesVec::new())
341341
} else {
342-
let task_deps = Lock::new(TaskDeps {
342+
let task_deps = Lock::new(TaskDeps::new(
343343
#[cfg(debug_assertions)]
344-
node: Some(key),
345-
reads: EdgesVec::new(),
346-
read_set: Default::default(),
347-
phantom_data: PhantomData,
348-
});
344+
Some(key),
345+
0,
346+
));
349347
(with_deps(TaskDepsRef::Allow(&task_deps)), task_deps.into_inner().reads)
350348
};
351349

@@ -377,12 +375,18 @@ impl<D: Deps> DepGraphData<D> {
377375
{
378376
debug_assert!(!cx.is_eval_always(dep_kind));
379377

380-
let task_deps = Lock::new(TaskDeps::default());
378+
// Large numbers of reads are common enough here that pre-sizing `read_set`
379+
// to 128 actually helps perf on some benchmarks.
380+
let task_deps = Lock::new(TaskDeps::new(
381+
#[cfg(debug_assertions)]
382+
None,
383+
128,
384+
));
381385
let result = D::with_deps(TaskDepsRef::Allow(&task_deps), op);
382386
let task_deps = task_deps.into_inner();
383-
let task_deps = task_deps.reads;
387+
let reads = task_deps.reads;
384388

385-
let dep_node_index = match task_deps.len() {
389+
let dep_node_index = match reads.len() {
386390
0 => {
387391
// Because the dep-node id of anon nodes is computed from the sets of its
388392
// dependencies we already know what the ID of this dependency-less node is
@@ -393,7 +397,7 @@ impl<D: Deps> DepGraphData<D> {
393397
}
394398
1 => {
395399
// When there is only one dependency, don't bother creating a node.
396-
task_deps[0]
400+
reads[0]
397401
}
398402
_ => {
399403
// The dep node indices are hashed here instead of hashing the dep nodes of the
@@ -402,7 +406,7 @@ impl<D: Deps> DepGraphData<D> {
402406
// combining it with the per session random number `anon_id_seed`. This hash only need
403407
// to map the dependencies to a single value on a per session basis.
404408
let mut hasher = StableHasher::new();
405-
task_deps.hash(&mut hasher);
409+
reads.hash(&mut hasher);
406410

407411
let target_dep_node = DepNode {
408412
kind: dep_kind,
@@ -420,7 +424,7 @@ impl<D: Deps> DepGraphData<D> {
420424
// memory impact of this `anon_node_to_index` map remains tolerable, and helps
421425
// us avoid useless growth of the graph with almost-equivalent nodes.
422426
self.current.anon_node_to_index.get_or_insert_with(target_dep_node, || {
423-
self.current.alloc_new_node(target_dep_node, task_deps, Fingerprint::ZERO)
427+
self.current.alloc_new_node(target_dep_node, reads, Fingerprint::ZERO)
424428
})
425429
}
426430
};
@@ -471,18 +475,17 @@ impl<D: Deps> DepGraph<D> {
471475
data.current.total_read_count.fetch_add(1, Ordering::Relaxed);
472476
}
473477

474-
// As long as we only have a low number of reads we can avoid doing a hash
475-
// insert and potentially allocating/reallocating the hashmap
476-
let new_read = if task_deps.reads.len() < EdgesVec::INLINE_CAPACITY {
477-
task_deps.reads.iter().all(|other| *other != dep_node_index)
478+
// Has `dep_node_index` been seen before? Use either a linear scan or a hashset
479+
// lookup to determine this. See `TaskDeps::read_set` for details.
480+
let new_read = if task_deps.reads.len() <= TaskDeps::LINEAR_SCAN_MAX {
481+
!task_deps.reads.contains(&dep_node_index)
478482
} else {
479483
task_deps.read_set.insert(dep_node_index)
480484
};
481485
if new_read {
482486
task_deps.reads.push(dep_node_index);
483-
if task_deps.reads.len() == EdgesVec::INLINE_CAPACITY {
484-
// Fill `read_set` with what we have so far so we can use the hashset
485-
// next time
487+
if task_deps.reads.len() == TaskDeps::LINEAR_SCAN_MAX + 1 {
488+
// Fill `read_set` with what we have so far. Future lookups will use it.
486489
task_deps.read_set.extend(task_deps.reads.iter().copied());
487490
}
488491

@@ -1292,18 +1295,30 @@ pub enum TaskDepsRef<'a> {
12921295
pub struct TaskDeps {
12931296
#[cfg(debug_assertions)]
12941297
node: Option<DepNode>,
1298+
1299+
/// A vector of `DepNodeIndex`, basically.
12951300
reads: EdgesVec,
1301+
1302+
/// When adding new edges to `reads` in `DepGraph::read_index` we need to determine if the edge
1303+
/// has been seen before. If the number of elements in `reads` is small, we just do a linear
1304+
/// scan. If the number is higher, a hashset has better perf. This field is that hashset. It's
1305+
/// only used if the number of elements in `reads` exceeds `LINEAR_SCAN_MAX`.
12961306
read_set: FxHashSet<DepNodeIndex>,
1307+
12971308
phantom_data: PhantomData<DepNode>,
12981309
}
12991310

1300-
impl Default for TaskDeps {
1301-
fn default() -> Self {
1302-
Self {
1311+
impl TaskDeps {
1312+
/// See `TaskDeps::read_set` above.
1313+
const LINEAR_SCAN_MAX: usize = 16;
1314+
1315+
#[inline]
1316+
fn new(#[cfg(debug_assertions)] node: Option<DepNode>, read_set_capacity: usize) -> Self {
1317+
TaskDeps {
13031318
#[cfg(debug_assertions)]
1304-
node: None,
1319+
node,
13051320
reads: EdgesVec::new(),
1306-
read_set: FxHashSet::with_capacity_and_hasher(128, Default::default()),
1321+
read_set: FxHashSet::with_capacity_and_hasher(read_set_capacity, Default::default()),
13071322
phantom_data: PhantomData,
13081323
}
13091324
}

0 commit comments

Comments
 (0)