Skip to content

Commit

Permalink
Use Hashbrown's raw entry API to reduce hashes and clone in priority (#…
Browse files Browse the repository at this point in the history
…10881)

## Summary

I'm open to not merging this -- I was kind of just interested in what
the API looked like. But the idea is: we can avoid hashing values twice
and unnecessarily cloning within the priority map by using the raw entry
API.
  • Loading branch information
charliermarsh authored Jan 23, 2025
1 parent db4ab9d commit b2dac99
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 39 deletions.
2 changes: 2 additions & 0 deletions crates/uv-resolver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub use yanks::AllowedYanks;
/// `ConflictItemRef`. i.e., We can avoid allocs on lookups.
type FxHashbrownSet<T> = hashbrown::HashSet<T, rustc_hash::FxBuildHasher>;

type FxHashbrownMap<K, V> = hashbrown::HashMap<K, V, rustc_hash::FxBuildHasher>;

mod candidate_selector;
mod dependency_mode;
mod dependency_provider;
Expand Down
6 changes: 6 additions & 0 deletions crates/uv-resolver/src/pubgrub/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,3 +354,9 @@ impl std::fmt::Display for PubGrubPackageInner {
}
}
}

impl From<&PubGrubPackage> for PubGrubPackage {
fn from(package: &PubGrubPackage) -> Self {
package.clone()
}
}
79 changes: 40 additions & 39 deletions crates/uv-resolver/src/pubgrub/priority.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use std::cmp::Reverse;
use std::collections::hash_map::OccupiedEntry;

use hashbrown::hash_map::{EntryRef, OccupiedEntry};
use pubgrub::Range;
use rustc_hash::FxHashMap;
use rustc_hash::FxBuildHasher;

use uv_normalize::PackageName;
use uv_pep440::Version;

use crate::fork_urls::ForkUrls;
use crate::pubgrub::package::PubGrubPackage;
use crate::pubgrub::PubGrubPackageInner;
use crate::SentinelRange;
use crate::{FxHashbrownMap, SentinelRange};

/// A prioritization map to guide the PubGrub resolution process.
///
Expand All @@ -23,8 +23,8 @@ use crate::SentinelRange;
/// See: <https://github.com/pypa/pip/blob/ef78c129b1a966dbbbdb8ebfffc43723e89110d1/src/pip/_internal/resolution/resolvelib/provider.py#L120>
#[derive(Clone, Debug, Default)]
pub(crate) struct PubGrubPriorities {
package_priority: FxHashMap<PackageName, PubGrubPriority>,
virtual_package_tiebreaker: FxHashMap<PubGrubPackage, PubGrubTiebreaker>,
package_priority: FxHashbrownMap<PackageName, PubGrubPriority>,
virtual_package_tiebreaker: FxHashbrownMap<PubGrubPackage, PubGrubTiebreaker>,
}

impl PubGrubPriorities {
Expand All @@ -35,27 +35,24 @@ impl PubGrubPriorities {
version: &Range<Version>,
urls: &ForkUrls,
) {
if !self.virtual_package_tiebreaker.contains_key(package) {
self.virtual_package_tiebreaker.insert(
package.clone(),
PubGrubTiebreaker::from(
u32::try_from(self.virtual_package_tiebreaker.len())
.expect("Less than 2**32 packages"),
),
);
}
let len = self.virtual_package_tiebreaker.len();
self.virtual_package_tiebreaker
.entry_ref(package)
.or_insert_with(|| {
PubGrubTiebreaker::from(u32::try_from(len).expect("Less than 2**32 packages"))
});

let next = self.package_priority.len();
// The root package and Python constraints have no explicit priority, the root package is
// always first and the Python version (range) is fixed.
let Some(name) = package.name_no_root() else {
return;
};

match self.package_priority.entry(name.clone()) {
std::collections::hash_map::Entry::Occupied(mut entry) => {
let len = self.package_priority.len();
match self.package_priority.entry_ref(name) {
EntryRef::Occupied(mut entry) => {
// Preserve the original index.
let index = Self::get_index(&entry).unwrap_or(next);
let index = Self::get_index(&entry).unwrap_or(len);

// Compute the priority.
let priority = if urls.get(name).is_some() {
Expand All @@ -81,16 +78,16 @@ impl PubGrubPriorities {
entry.insert(priority);
}
}
std::collections::hash_map::Entry::Vacant(entry) => {
EntryRef::Vacant(entry) => {
// Compute the priority.
let priority = if urls.get(name).is_some() {
PubGrubPriority::DirectUrl(Reverse(next))
PubGrubPriority::DirectUrl(Reverse(len))
} else if version.as_singleton().is_some()
|| SentinelRange::from(version).is_sentinel()
{
PubGrubPriority::Singleton(Reverse(next))
PubGrubPriority::Singleton(Reverse(len))
} else {
PubGrubPriority::Unspecified(Reverse(next))
PubGrubPriority::Unspecified(Reverse(len))
};

// Insert the priority.
Expand All @@ -99,7 +96,9 @@ impl PubGrubPriorities {
}
}

fn get_index(entry: &OccupiedEntry<PackageName, PubGrubPriority>) -> Option<usize> {
fn get_index(
entry: &OccupiedEntry<'_, PackageName, PubGrubPriority, FxBuildHasher>,
) -> Option<usize> {
match entry.get() {
PubGrubPriority::ConflictLate(Reverse(index))
| PubGrubPriority::Unspecified(Reverse(index))
Expand Down Expand Up @@ -133,7 +132,6 @@ impl PubGrubPriorities {
/// Returns whether the priority was changed, i.e., it's the first time we hit this condition
/// for the package.
pub(crate) fn mark_conflict_early(&mut self, package: &PubGrubPackage) -> bool {
let next = self.package_priority.len();
let Some(name) = package.name_no_root() else {
// Not a correctness bug
if cfg!(debug_assertions) {
Expand All @@ -142,20 +140,22 @@ impl PubGrubPriorities {
return false;
}
};
match self.package_priority.entry(name.clone()) {
std::collections::hash_map::Entry::Occupied(mut entry) => {

let len = self.package_priority.len();
match self.package_priority.entry_ref(name) {
EntryRef::Vacant(entry) => {
entry.insert(PubGrubPriority::ConflictEarly(Reverse(len)));
true
}
EntryRef::Occupied(mut entry) => {
if matches!(entry.get(), PubGrubPriority::ConflictEarly(_)) {
// Already in the right category
return false;
};
let index = Self::get_index(&entry).unwrap_or(next);
let index = Self::get_index(&entry).unwrap_or(len);
entry.insert(PubGrubPriority::ConflictEarly(Reverse(index)));
true
}
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(PubGrubPriority::ConflictEarly(Reverse(next)));
true
}
}
}

Expand All @@ -165,7 +165,6 @@ impl PubGrubPriorities {
/// Returns whether the priority was changed, i.e., it's the first time this package was
/// marked as conflicting above the threshold.
pub(crate) fn mark_conflict_late(&mut self, package: &PubGrubPackage) -> bool {
let next = self.package_priority.len();
let Some(name) = package.name_no_root() else {
// Not a correctness bug
if cfg!(debug_assertions) {
Expand All @@ -174,8 +173,14 @@ impl PubGrubPriorities {
return false;
}
};
match self.package_priority.entry(name.clone()) {
std::collections::hash_map::Entry::Occupied(mut entry) => {

let len = self.package_priority.len();
match self.package_priority.entry_ref(name) {
EntryRef::Vacant(entry) => {
entry.insert(PubGrubPriority::ConflictLate(Reverse(len)));
true
}
EntryRef::Occupied(mut entry) => {
// The ConflictEarly` match avoids infinite loops.
if matches!(
entry.get(),
Expand All @@ -184,14 +189,10 @@ impl PubGrubPriorities {
// Already in the right category
return false;
};
let index = Self::get_index(&entry).unwrap_or(next);
let index = Self::get_index(&entry).unwrap_or(len);
entry.insert(PubGrubPriority::ConflictLate(Reverse(index)));
true
}
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(PubGrubPriority::ConflictLate(Reverse(next)));
true
}
}
}
}
Expand Down

0 comments on commit b2dac99

Please sign in to comment.