mirror of https://github.com/rust-lang/cargo
149 lines
5.5 KiB
Rust
149 lines
5.5 KiB
Rust
//! Utility for capturing a global cache last-use database based on the files
|
|
//! on a real-world system.
|
|
//!
|
|
//! This will look in the CARGO_HOME of the current system and record last-use
|
|
//! data for all files in the cache. This is intended to provide a real-world
|
|
//! example for a benchmark that should be close to what a real set of data
|
|
//! should look like.
|
|
//!
|
|
//! See `benches/global_cache_tracker.rs` for the benchmark that uses this
|
|
//! data.
|
|
//!
|
|
//! The database is kept in git. It usually shouldn't need to be re-generated
|
|
//! unless there is a change in the schema or the benchmark.
|
|
|
|
use cargo::core::global_cache_tracker::{self, DeferredGlobalLastUse, GlobalCacheTracker};
|
|
use cargo::util::cache_lock::CacheLockMode;
|
|
use cargo::util::interning::InternedString;
|
|
use cargo::Config;
|
|
use rand::prelude::SliceRandom;
|
|
use std::collections::HashMap;
|
|
use std::fs;
|
|
use std::fs::File;
|
|
use std::io::Write;
|
|
use std::path::Path;
|
|
|
|
fn main() {
|
|
// Set up config.
|
|
let shell = cargo::core::Shell::new();
|
|
let homedir = Path::new(env!("CARGO_MANIFEST_DIR")).join("global-cache-tracker");
|
|
let cwd = homedir.clone();
|
|
let mut config = Config::new(shell, cwd, homedir.clone());
|
|
config
|
|
.configure(
|
|
0,
|
|
false,
|
|
None,
|
|
false,
|
|
false,
|
|
false,
|
|
&None,
|
|
&["gc".to_string()],
|
|
&[],
|
|
)
|
|
.unwrap();
|
|
let db_path = GlobalCacheTracker::db_path(&config).into_path_unlocked();
|
|
if db_path.exists() {
|
|
fs::remove_file(&db_path).unwrap();
|
|
}
|
|
|
|
let _lock = config
|
|
.acquire_package_cache_lock(CacheLockMode::DownloadExclusive)
|
|
.unwrap();
|
|
let mut deferred = DeferredGlobalLastUse::new();
|
|
let mut tracker = GlobalCacheTracker::new(&config).unwrap();
|
|
|
|
let real_home = cargo::util::homedir(&std::env::current_dir().unwrap()).unwrap();
|
|
|
|
let cache_dir = real_home.join("registry/cache");
|
|
for dir_ent in fs::read_dir(cache_dir).unwrap() {
|
|
let registry = dir_ent.unwrap();
|
|
let encoded_registry_name = InternedString::new(®istry.file_name().to_string_lossy());
|
|
for krate in fs::read_dir(registry.path()).unwrap() {
|
|
let krate = krate.unwrap();
|
|
let meta = krate.metadata().unwrap();
|
|
deferred.mark_registry_crate_used_stamp(
|
|
global_cache_tracker::RegistryCrate {
|
|
encoded_registry_name,
|
|
crate_filename: krate.file_name().to_string_lossy().as_ref().into(),
|
|
size: meta.len(),
|
|
},
|
|
Some(&meta.modified().unwrap()),
|
|
);
|
|
}
|
|
}
|
|
|
|
let mut src_entries = Vec::new();
|
|
|
|
let cache_dir = real_home.join("registry/src");
|
|
for dir_ent in fs::read_dir(cache_dir).unwrap() {
|
|
let registry = dir_ent.unwrap();
|
|
let encoded_registry_name = InternedString::new(®istry.file_name().to_string_lossy());
|
|
for krate in fs::read_dir(registry.path()).unwrap() {
|
|
let krate = krate.unwrap();
|
|
let meta = krate.metadata().unwrap();
|
|
let src = global_cache_tracker::RegistrySrc {
|
|
encoded_registry_name,
|
|
package_dir: krate.file_name().to_string_lossy().as_ref().into(),
|
|
size: Some(cargo_util::du(&krate.path(), &[]).unwrap()),
|
|
};
|
|
src_entries.push(src.clone());
|
|
let timestamp = meta.modified().unwrap();
|
|
deferred.mark_registry_src_used_stamp(src, Some(×tamp));
|
|
}
|
|
}
|
|
|
|
let git_co_dir = real_home.join("git/checkouts");
|
|
for dir_ent in fs::read_dir(git_co_dir).unwrap() {
|
|
let git_source = dir_ent.unwrap();
|
|
let encoded_git_name = InternedString::new(&git_source.file_name().to_string_lossy());
|
|
for co in fs::read_dir(git_source.path()).unwrap() {
|
|
let co = co.unwrap();
|
|
let meta = co.metadata().unwrap();
|
|
deferred.mark_git_checkout_used_stamp(
|
|
global_cache_tracker::GitCheckout {
|
|
encoded_git_name,
|
|
short_name: co.file_name().to_string_lossy().as_ref().into(),
|
|
size: Some(cargo_util::du(&co.path(), &[]).unwrap()),
|
|
},
|
|
Some(&meta.modified().unwrap()),
|
|
);
|
|
}
|
|
}
|
|
|
|
deferred.save(&mut tracker).unwrap();
|
|
drop(deferred);
|
|
drop(tracker);
|
|
fs::rename(&db_path, homedir.join("global-cache-sample")).unwrap();
|
|
// Clean up the lock file created above.
|
|
fs::remove_file(homedir.join(".package-cache")).unwrap();
|
|
|
|
// Save a random sample of crates that the benchmark should update.
|
|
// Pick whichever registry has the most entries. This is to be somewhat
|
|
// realistic for the common case that all dependencies come from one
|
|
// registry (crates.io).
|
|
let mut counts = HashMap::new();
|
|
for src in &src_entries {
|
|
let c: &mut u32 = counts.entry(src.encoded_registry_name).or_default();
|
|
*c += 1;
|
|
}
|
|
let mut counts: Vec<_> = counts.into_iter().map(|(k, v)| (v, k)).collect();
|
|
counts.sort();
|
|
let biggest = counts.last().unwrap().1;
|
|
|
|
src_entries.retain(|src| src.encoded_registry_name == biggest);
|
|
let mut rng = &mut rand::thread_rng();
|
|
let sample: Vec<_> = src_entries.choose_multiple(&mut rng, 500).collect();
|
|
let mut f = File::create(homedir.join("random-sample")).unwrap();
|
|
for src in sample {
|
|
writeln!(
|
|
f,
|
|
"{},{},{}",
|
|
src.encoded_registry_name,
|
|
src.package_dir,
|
|
src.size.unwrap()
|
|
)
|
|
.unwrap();
|
|
}
|
|
}
|