mirror of https://github.com/rust-lang/cargo
1032 lines
39 KiB
Rust
1032 lines
39 KiB
Rust
//! Utilities for handling git repositories, mainly around
|
|
//! authentication/cloning.
|
|
|
|
use crate::core::GitReference;
|
|
use crate::util::errors::{CargoResult, CargoResultExt};
|
|
use crate::util::paths;
|
|
use crate::util::{network, Config, IntoUrl, Progress};
|
|
use anyhow::{anyhow, Context};
|
|
use cargo_util::ProcessBuilder;
|
|
use curl::easy::List;
|
|
use git2::{self, ErrorClass, ObjectType};
|
|
use log::{debug, info};
|
|
use serde::ser;
|
|
use serde::Serialize;
|
|
use std::env;
|
|
use std::fmt;
|
|
use std::path::{Path, PathBuf};
|
|
use std::process::Command;
|
|
use url::Url;
|
|
|
|
fn serialize_str<T, S>(t: &T, s: S) -> Result<S::Ok, S::Error>
|
|
where
|
|
T: fmt::Display,
|
|
S: ser::Serializer,
|
|
{
|
|
s.collect_str(t)
|
|
}
|
|
|
|
pub struct GitShortID(git2::Buf);
|
|
|
|
impl GitShortID {
|
|
pub fn as_str(&self) -> &str {
|
|
self.0.as_str().unwrap()
|
|
}
|
|
}
|
|
|
|
/// `GitRemote` represents a remote repository. It gets cloned into a local
|
|
/// `GitDatabase`.
|
|
#[derive(PartialEq, Clone, Debug, Serialize)]
|
|
pub struct GitRemote {
|
|
#[serde(serialize_with = "serialize_str")]
|
|
url: Url,
|
|
}
|
|
|
|
/// `GitDatabase` is a local clone of a remote repository's database. Multiple
|
|
/// `GitCheckouts` can be cloned from this `GitDatabase`.
|
|
#[derive(Serialize)]
|
|
pub struct GitDatabase {
|
|
remote: GitRemote,
|
|
path: PathBuf,
|
|
#[serde(skip_serializing)]
|
|
repo: git2::Repository,
|
|
}
|
|
|
|
/// `GitCheckout` is a local checkout of a particular revision. Calling
|
|
/// `clone_into` with a reference will resolve the reference into a revision,
|
|
/// and return a `anyhow::Error` if no revision for that reference was found.
|
|
#[derive(Serialize)]
|
|
pub struct GitCheckout<'a> {
|
|
database: &'a GitDatabase,
|
|
location: PathBuf,
|
|
#[serde(serialize_with = "serialize_str")]
|
|
revision: git2::Oid,
|
|
#[serde(skip_serializing)]
|
|
repo: git2::Repository,
|
|
}
|
|
|
|
// Implementations
|
|
|
|
impl GitRemote {
|
|
pub fn new(url: &Url) -> GitRemote {
|
|
GitRemote { url: url.clone() }
|
|
}
|
|
|
|
pub fn url(&self) -> &Url {
|
|
&self.url
|
|
}
|
|
|
|
pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult<git2::Oid> {
|
|
reference.resolve(&self.db_at(path)?.repo)
|
|
}
|
|
|
|
pub fn checkout(
|
|
&self,
|
|
into: &Path,
|
|
db: Option<GitDatabase>,
|
|
reference: &GitReference,
|
|
locked_rev: Option<git2::Oid>,
|
|
cargo_config: &Config,
|
|
) -> CargoResult<(GitDatabase, git2::Oid)> {
|
|
// If we have a previous instance of `GitDatabase` then fetch into that
|
|
// if we can. If that can successfully load our revision then we've
|
|
// populated the database with the latest version of `reference`, so
|
|
// return that database and the rev we resolve to.
|
|
if let Some(mut db) = db {
|
|
fetch(&mut db.repo, self.url.as_str(), reference, cargo_config)
|
|
.context(format!("failed to fetch into: {}", into.display()))?;
|
|
match locked_rev {
|
|
Some(rev) => {
|
|
if db.contains(rev) {
|
|
return Ok((db, rev));
|
|
}
|
|
}
|
|
None => {
|
|
if let Ok(rev) = reference.resolve(&db.repo) {
|
|
return Ok((db, rev));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Otherwise start from scratch to handle corrupt git repositories.
|
|
// After our fetch (which is interpreted as a clone now) we do the same
|
|
// resolution to figure out what we cloned.
|
|
if into.exists() {
|
|
paths::remove_dir_all(into)?;
|
|
}
|
|
paths::create_dir_all(into)?;
|
|
let mut repo = init(into, true)?;
|
|
fetch(&mut repo, self.url.as_str(), reference, cargo_config)
|
|
.context(format!("failed to clone into: {}", into.display()))?;
|
|
let rev = match locked_rev {
|
|
Some(rev) => rev,
|
|
None => reference.resolve(&repo)?,
|
|
};
|
|
|
|
Ok((
|
|
GitDatabase {
|
|
remote: self.clone(),
|
|
path: into.to_path_buf(),
|
|
repo,
|
|
},
|
|
rev,
|
|
))
|
|
}
|
|
|
|
pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
|
|
let repo = git2::Repository::open(db_path)?;
|
|
Ok(GitDatabase {
|
|
remote: self.clone(),
|
|
path: db_path.to_path_buf(),
|
|
repo,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl GitDatabase {
|
|
pub fn copy_to(
|
|
&self,
|
|
rev: git2::Oid,
|
|
dest: &Path,
|
|
cargo_config: &Config,
|
|
) -> CargoResult<GitCheckout<'_>> {
|
|
let mut checkout = None;
|
|
if let Ok(repo) = git2::Repository::open(dest) {
|
|
let mut co = GitCheckout::new(dest, self, rev, repo);
|
|
if !co.is_fresh() {
|
|
// After a successful fetch operation the subsequent reset can
|
|
// fail sometimes for corrupt repositories where the fetch
|
|
// operation succeeds but the object isn't actually there in one
|
|
// way or another. In these situations just skip the error and
|
|
// try blowing away the whole repository and trying with a
|
|
// clone.
|
|
co.fetch(cargo_config)?;
|
|
match co.reset(cargo_config) {
|
|
Ok(()) => {
|
|
assert!(co.is_fresh());
|
|
checkout = Some(co);
|
|
}
|
|
Err(e) => debug!("failed reset after fetch {:?}", e),
|
|
}
|
|
} else {
|
|
checkout = Some(co);
|
|
}
|
|
};
|
|
let checkout = match checkout {
|
|
Some(c) => c,
|
|
None => GitCheckout::clone_into(dest, self, rev, cargo_config)?,
|
|
};
|
|
checkout.update_submodules(cargo_config)?;
|
|
Ok(checkout)
|
|
}
|
|
|
|
pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
|
|
let obj = self.repo.find_object(revision, None)?;
|
|
Ok(GitShortID(obj.short_id()?))
|
|
}
|
|
|
|
pub fn contains(&self, oid: git2::Oid) -> bool {
|
|
self.repo.revparse_single(&oid.to_string()).is_ok()
|
|
}
|
|
|
|
pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
|
|
r.resolve(&self.repo)
|
|
}
|
|
}
|
|
|
|
impl GitReference {
|
|
pub fn resolve(&self, repo: &git2::Repository) -> CargoResult<git2::Oid> {
|
|
let id = match self {
|
|
// Note that we resolve the named tag here in sync with where it's
|
|
// fetched into via `fetch` below.
|
|
GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
|
|
let refname = format!("refs/remotes/origin/tags/{}", s);
|
|
let id = repo.refname_to_id(&refname)?;
|
|
let obj = repo.find_object(id, None)?;
|
|
let obj = obj.peel(ObjectType::Commit)?;
|
|
Ok(obj.id())
|
|
})()
|
|
.chain_err(|| format!("failed to find tag `{}`", s))?,
|
|
|
|
// Resolve the remote name since that's all we're configuring in
|
|
// `fetch` below.
|
|
GitReference::Branch(s) => {
|
|
let name = format!("origin/{}", s);
|
|
let b = repo
|
|
.find_branch(&name, git2::BranchType::Remote)
|
|
.chain_err(|| format!("failed to find branch `{}`", s))?;
|
|
b.get()
|
|
.target()
|
|
.ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
|
|
}
|
|
|
|
// We'll be using the HEAD commit
|
|
GitReference::DefaultBranch => {
|
|
let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
|
|
let head = repo.find_object(head_id, None)?;
|
|
head.peel(ObjectType::Commit)?.id()
|
|
}
|
|
|
|
GitReference::Rev(s) => {
|
|
let obj = repo.revparse_single(s)?;
|
|
match obj.as_tag() {
|
|
Some(tag) => tag.target_id(),
|
|
None => obj.id(),
|
|
}
|
|
}
|
|
};
|
|
Ok(id)
|
|
}
|
|
}
|
|
|
|
impl<'a> GitCheckout<'a> {
|
|
fn new(
|
|
path: &Path,
|
|
database: &'a GitDatabase,
|
|
revision: git2::Oid,
|
|
repo: git2::Repository,
|
|
) -> GitCheckout<'a> {
|
|
GitCheckout {
|
|
location: path.to_path_buf(),
|
|
database,
|
|
revision,
|
|
repo,
|
|
}
|
|
}
|
|
|
|
fn clone_into(
|
|
into: &Path,
|
|
database: &'a GitDatabase,
|
|
revision: git2::Oid,
|
|
config: &Config,
|
|
) -> CargoResult<GitCheckout<'a>> {
|
|
let dirname = into.parent().unwrap();
|
|
paths::create_dir_all(&dirname)?;
|
|
if into.exists() {
|
|
paths::remove_dir_all(into)?;
|
|
}
|
|
|
|
// we're doing a local filesystem-to-filesystem clone so there should
|
|
// be no need to respect global configuration options, so pass in
|
|
// an empty instance of `git2::Config` below.
|
|
let git_config = git2::Config::new()?;
|
|
|
|
// Clone the repository, but make sure we use the "local" option in
|
|
// libgit2 which will attempt to use hardlinks to set up the database.
|
|
// This should speed up the clone operation quite a bit if it works.
|
|
//
|
|
// Note that we still use the same fetch options because while we don't
|
|
// need authentication information we may want progress bars and such.
|
|
let url = database.path.into_url()?;
|
|
let mut repo = None;
|
|
with_fetch_options(&git_config, url.as_str(), config, &mut |fopts| {
|
|
let mut checkout = git2::build::CheckoutBuilder::new();
|
|
checkout.dry_run(); // we'll do this below during a `reset`
|
|
|
|
let r = git2::build::RepoBuilder::new()
|
|
// use hard links and/or copy the database, we're doing a
|
|
// filesystem clone so this'll speed things up quite a bit.
|
|
.clone_local(git2::build::CloneLocal::Local)
|
|
.with_checkout(checkout)
|
|
.fetch_options(fopts)
|
|
.clone(url.as_str(), into)?;
|
|
repo = Some(r);
|
|
Ok(())
|
|
})?;
|
|
let repo = repo.unwrap();
|
|
|
|
let checkout = GitCheckout::new(into, database, revision, repo);
|
|
checkout.reset(config)?;
|
|
Ok(checkout)
|
|
}
|
|
|
|
fn is_fresh(&self) -> bool {
|
|
match self.repo.revparse_single("HEAD") {
|
|
Ok(ref head) if head.id() == self.revision => {
|
|
// See comments in reset() for why we check this
|
|
self.location.join(".cargo-ok").exists()
|
|
}
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
fn fetch(&mut self, cargo_config: &Config) -> CargoResult<()> {
|
|
info!("fetch {}", self.repo.path().display());
|
|
let url = self.database.path.into_url()?;
|
|
let reference = GitReference::Rev(self.revision.to_string());
|
|
fetch(&mut self.repo, url.as_str(), &reference, cargo_config)?;
|
|
Ok(())
|
|
}
|
|
|
|
fn reset(&self, config: &Config) -> CargoResult<()> {
|
|
// If we're interrupted while performing this reset (e.g., we die because
|
|
// of a signal) Cargo needs to be sure to try to check out this repo
|
|
// again on the next go-round.
|
|
//
|
|
// To enable this we have a dummy file in our checkout, .cargo-ok, which
|
|
// if present means that the repo has been successfully reset and is
|
|
// ready to go. Hence if we start to do a reset, we make sure this file
|
|
// *doesn't* exist, and then once we're done we create the file.
|
|
let ok_file = self.location.join(".cargo-ok");
|
|
let _ = paths::remove_file(&ok_file);
|
|
info!("reset {} to {}", self.repo.path().display(), self.revision);
|
|
|
|
// Ensure libgit2 won't mess with newlines when we vendor.
|
|
if let Ok(mut git_config) = self.repo.config() {
|
|
git_config.set_bool("core.autocrlf", false)?;
|
|
}
|
|
|
|
let object = self.repo.find_object(self.revision, None)?;
|
|
reset(&self.repo, &object, config)?;
|
|
paths::create(ok_file)?;
|
|
Ok(())
|
|
}
|
|
|
|
fn update_submodules(&self, cargo_config: &Config) -> CargoResult<()> {
|
|
return update_submodules(&self.repo, cargo_config);
|
|
|
|
fn update_submodules(repo: &git2::Repository, cargo_config: &Config) -> CargoResult<()> {
|
|
info!("update submodules for: {:?}", repo.workdir().unwrap());
|
|
|
|
for mut child in repo.submodules()? {
|
|
update_submodule(repo, &mut child, cargo_config).chain_err(|| {
|
|
format!(
|
|
"failed to update submodule `{}`",
|
|
child.name().unwrap_or("")
|
|
)
|
|
})?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn update_submodule(
|
|
parent: &git2::Repository,
|
|
child: &mut git2::Submodule<'_>,
|
|
cargo_config: &Config,
|
|
) -> CargoResult<()> {
|
|
child.init(false)?;
|
|
let url = child.url().ok_or_else(|| {
|
|
anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
|
|
})?;
|
|
|
|
// A submodule which is listed in .gitmodules but not actually
|
|
// checked out will not have a head id, so we should ignore it.
|
|
let head = match child.head_id() {
|
|
Some(head) => head,
|
|
None => return Ok(()),
|
|
};
|
|
|
|
// If the submodule hasn't been checked out yet, we need to
|
|
// clone it. If it has been checked out and the head is the same
|
|
// as the submodule's head, then we can skip an update and keep
|
|
// recursing.
|
|
let head_and_repo = child.open().and_then(|repo| {
|
|
let target = repo.head()?.target();
|
|
Ok((target, repo))
|
|
});
|
|
let mut repo = match head_and_repo {
|
|
Ok((head, repo)) => {
|
|
if child.head_id() == head {
|
|
return update_submodules(&repo, cargo_config);
|
|
}
|
|
repo
|
|
}
|
|
Err(..) => {
|
|
let path = parent.workdir().unwrap().join(child.path());
|
|
let _ = paths::remove_dir_all(&path);
|
|
init(&path, false)?
|
|
}
|
|
};
|
|
// Fetch data from origin and reset to the head commit
|
|
let reference = GitReference::Rev(head.to_string());
|
|
cargo_config
|
|
.shell()
|
|
.status("Updating", format!("git submodule `{}`", url))?;
|
|
fetch(&mut repo, url, &reference, cargo_config).chain_err(|| {
|
|
format!(
|
|
"failed to fetch submodule `{}` from {}",
|
|
child.name().unwrap_or(""),
|
|
url
|
|
)
|
|
})?;
|
|
|
|
let obj = repo.find_object(head, None)?;
|
|
reset(&repo, &obj, cargo_config)?;
|
|
update_submodules(&repo, cargo_config)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Prepare the authentication callbacks for cloning a git repository.
|
|
///
|
|
/// The main purpose of this function is to construct the "authentication
|
|
/// callback" which is used to clone a repository. This callback will attempt to
|
|
/// find the right authentication on the system (without user input) and will
|
|
/// guide libgit2 in doing so.
|
|
///
|
|
/// The callback is provided `allowed` types of credentials, and we try to do as
|
|
/// much as possible based on that:
|
|
///
|
|
/// * Prioritize SSH keys from the local ssh agent as they're likely the most
|
|
/// reliable. The username here is prioritized from the credential
|
|
/// callback, then from whatever is configured in git itself, and finally
|
|
/// we fall back to the generic user of `git`.
|
|
///
|
|
/// * If a username/password is allowed, then we fallback to git2-rs's
|
|
/// implementation of the credential helper. This is what is configured
|
|
/// with `credential.helper` in git, and is the interface for the macOS
|
|
/// keychain, for example.
|
|
///
|
|
/// * After the above two have failed, we just kinda grapple attempting to
|
|
/// return *something*.
|
|
///
|
|
/// If any form of authentication fails, libgit2 will repeatedly ask us for
|
|
/// credentials until we give it a reason to not do so. To ensure we don't
|
|
/// just sit here looping forever we keep track of authentications we've
|
|
/// attempted and we don't try the same ones again.
|
|
fn with_authentication<T, F>(url: &str, cfg: &git2::Config, mut f: F) -> CargoResult<T>
|
|
where
|
|
F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
|
|
{
|
|
let mut cred_helper = git2::CredentialHelper::new(url);
|
|
cred_helper.config(cfg);
|
|
|
|
let mut ssh_username_requested = false;
|
|
let mut cred_helper_bad = None;
|
|
let mut ssh_agent_attempts = Vec::new();
|
|
let mut any_attempts = false;
|
|
let mut tried_sshkey = false;
|
|
let mut url_attempt = None;
|
|
|
|
let orig_url = url;
|
|
let mut res = f(&mut |url, username, allowed| {
|
|
any_attempts = true;
|
|
if url != orig_url {
|
|
url_attempt = Some(url.to_string());
|
|
}
|
|
// libgit2's "USERNAME" authentication actually means that it's just
|
|
// asking us for a username to keep going. This is currently only really
|
|
// used for SSH authentication and isn't really an authentication type.
|
|
// The logic currently looks like:
|
|
//
|
|
// let user = ...;
|
|
// if (user.is_null())
|
|
// user = callback(USERNAME, null, ...);
|
|
//
|
|
// callback(SSH_KEY, user, ...)
|
|
//
|
|
// So if we're being called here then we know that (a) we're using ssh
|
|
// authentication and (b) no username was specified in the URL that
|
|
// we're trying to clone. We need to guess an appropriate username here,
|
|
// but that may involve a few attempts. Unfortunately we can't switch
|
|
// usernames during one authentication session with libgit2, so to
|
|
// handle this we bail out of this authentication session after setting
|
|
// the flag `ssh_username_requested`, and then we handle this below.
|
|
if allowed.contains(git2::CredentialType::USERNAME) {
|
|
debug_assert!(username.is_none());
|
|
ssh_username_requested = true;
|
|
return Err(git2::Error::from_str("gonna try usernames later"));
|
|
}
|
|
|
|
// An "SSH_KEY" authentication indicates that we need some sort of SSH
|
|
// authentication. This can currently either come from the ssh-agent
|
|
// process or from a raw in-memory SSH key. Cargo only supports using
|
|
// ssh-agent currently.
|
|
//
|
|
// If we get called with this then the only way that should be possible
|
|
// is if a username is specified in the URL itself (e.g., `username` is
|
|
// Some), hence the unwrap() here. We try custom usernames down below.
|
|
if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
|
|
// If ssh-agent authentication fails, libgit2 will keep
|
|
// calling this callback asking for other authentication
|
|
// methods to try. Make sure we only try ssh-agent once,
|
|
// to avoid looping forever.
|
|
tried_sshkey = true;
|
|
let username = username.unwrap();
|
|
debug_assert!(!ssh_username_requested);
|
|
ssh_agent_attempts.push(username.to_string());
|
|
return git2::Cred::ssh_key_from_agent(username);
|
|
}
|
|
|
|
// Sometimes libgit2 will ask for a username/password in plaintext. This
|
|
// is where Cargo would have an interactive prompt if we supported it,
|
|
// but we currently don't! Right now the only way we support fetching a
|
|
// plaintext password is through the `credential.helper` support, so
|
|
// fetch that here.
|
|
//
|
|
// If ssh-agent authentication fails, libgit2 will keep calling this
|
|
// callback asking for other authentication methods to try. Check
|
|
// cred_helper_bad to make sure we only try the git credentail helper
|
|
// once, to avoid looping forever.
|
|
if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
|
|
{
|
|
let r = git2::Cred::credential_helper(cfg, url, username);
|
|
cred_helper_bad = Some(r.is_err());
|
|
return r;
|
|
}
|
|
|
|
// I'm... not sure what the DEFAULT kind of authentication is, but seems
|
|
// easy to support?
|
|
if allowed.contains(git2::CredentialType::DEFAULT) {
|
|
return git2::Cred::default();
|
|
}
|
|
|
|
// Whelp, we tried our best
|
|
Err(git2::Error::from_str("no authentication available"))
|
|
});
|
|
|
|
// Ok, so if it looks like we're going to be doing ssh authentication, we
|
|
// want to try a few different usernames as one wasn't specified in the URL
|
|
// for us to use. In order, we'll try:
|
|
//
|
|
// * A credential helper's username for this URL, if available.
|
|
// * This account's username.
|
|
// * "git"
|
|
//
|
|
// We have to restart the authentication session each time (due to
|
|
// constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
|
|
// call our callback, `f`, in a loop here.
|
|
if ssh_username_requested {
|
|
debug_assert!(res.is_err());
|
|
let mut attempts = vec![String::from("git")];
|
|
if let Ok(s) = env::var("USER").or_else(|_| env::var("USERNAME")) {
|
|
attempts.push(s);
|
|
}
|
|
if let Some(ref s) = cred_helper.username {
|
|
attempts.push(s.clone());
|
|
}
|
|
|
|
while let Some(s) = attempts.pop() {
|
|
// We should get `USERNAME` first, where we just return our attempt,
|
|
// and then after that we should get `SSH_KEY`. If the first attempt
|
|
// fails we'll get called again, but we don't have another option so
|
|
// we bail out.
|
|
let mut attempts = 0;
|
|
res = f(&mut |_url, username, allowed| {
|
|
if allowed.contains(git2::CredentialType::USERNAME) {
|
|
return git2::Cred::username(&s);
|
|
}
|
|
if allowed.contains(git2::CredentialType::SSH_KEY) {
|
|
debug_assert_eq!(Some(&s[..]), username);
|
|
attempts += 1;
|
|
if attempts == 1 {
|
|
ssh_agent_attempts.push(s.to_string());
|
|
return git2::Cred::ssh_key_from_agent(&s);
|
|
}
|
|
}
|
|
Err(git2::Error::from_str("no authentication available"))
|
|
});
|
|
|
|
// If we made two attempts then that means:
|
|
//
|
|
// 1. A username was requested, we returned `s`.
|
|
// 2. An ssh key was requested, we returned to look up `s` in the
|
|
// ssh agent.
|
|
// 3. For whatever reason that lookup failed, so we were asked again
|
|
// for another mode of authentication.
|
|
//
|
|
// Essentially, if `attempts == 2` then in theory the only error was
|
|
// that this username failed to authenticate (e.g., no other network
|
|
// errors happened). Otherwise something else is funny so we bail
|
|
// out.
|
|
if attempts != 2 {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
let mut err = match res {
|
|
Ok(e) => return Ok(e),
|
|
Err(e) => e,
|
|
};
|
|
|
|
// In the case of an authentication failure (where we tried something) then
|
|
// we try to give a more helpful error message about precisely what we
|
|
// tried.
|
|
if any_attempts {
|
|
let mut msg = "failed to authenticate when downloading \
|
|
repository"
|
|
.to_string();
|
|
|
|
if let Some(attempt) = &url_attempt {
|
|
if url != attempt {
|
|
msg.push_str(": ");
|
|
msg.push_str(attempt);
|
|
}
|
|
}
|
|
msg.push('\n');
|
|
if !ssh_agent_attempts.is_empty() {
|
|
let names = ssh_agent_attempts
|
|
.iter()
|
|
.map(|s| format!("`{}`", s))
|
|
.collect::<Vec<_>>()
|
|
.join(", ");
|
|
msg.push_str(&format!(
|
|
"\n* attempted ssh-agent authentication, but \
|
|
no usernames succeeded: {}",
|
|
names
|
|
));
|
|
}
|
|
if let Some(failed_cred_helper) = cred_helper_bad {
|
|
if failed_cred_helper {
|
|
msg.push_str(
|
|
"\n* attempted to find username/password via \
|
|
git's `credential.helper` support, but failed",
|
|
);
|
|
} else {
|
|
msg.push_str(
|
|
"\n* attempted to find username/password via \
|
|
`credential.helper`, but maybe the found \
|
|
credentials were incorrect",
|
|
);
|
|
}
|
|
}
|
|
msg.push_str("\n\n");
|
|
msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
|
|
msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
|
|
err = err.context(msg);
|
|
|
|
// Otherwise if we didn't even get to the authentication phase them we may
|
|
// have failed to set up a connection, in these cases hint on the
|
|
// `net.git-fetch-with-cli` configuration option.
|
|
} else if let Some(e) = err.downcast_ref::<git2::Error>() {
|
|
match e.class() {
|
|
ErrorClass::Net
|
|
| ErrorClass::Ssl
|
|
| ErrorClass::Submodule
|
|
| ErrorClass::FetchHead
|
|
| ErrorClass::Ssh
|
|
| ErrorClass::Callback
|
|
| ErrorClass::Http => {
|
|
let mut msg = "network failure seems to have happened\n".to_string();
|
|
msg.push_str(
|
|
"if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
|
|
);
|
|
msg.push_str(
|
|
"https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
|
|
);
|
|
err = err.context(msg);
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
Err(err)
|
|
}
|
|
|
|
fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, config: &Config) -> CargoResult<()> {
|
|
let mut pb = Progress::new("Checkout", config);
|
|
let mut opts = git2::build::CheckoutBuilder::new();
|
|
opts.progress(|_, cur, max| {
|
|
drop(pb.tick(cur, max));
|
|
});
|
|
debug!("doing reset");
|
|
repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
|
|
debug!("reset done");
|
|
Ok(())
|
|
}
|
|
|
|
pub fn with_fetch_options(
|
|
git_config: &git2::Config,
|
|
url: &str,
|
|
config: &Config,
|
|
cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
|
|
) -> CargoResult<()> {
|
|
let mut progress = Progress::new("Fetch", config);
|
|
network::with_retry(config, || {
|
|
with_authentication(url, git_config, |f| {
|
|
let mut rcb = git2::RemoteCallbacks::new();
|
|
rcb.credentials(f);
|
|
|
|
rcb.transfer_progress(|stats| {
|
|
progress
|
|
.tick(stats.indexed_objects(), stats.total_objects())
|
|
.is_ok()
|
|
});
|
|
|
|
// Create a local anonymous remote in the repository to fetch the
|
|
// url
|
|
let mut opts = git2::FetchOptions::new();
|
|
opts.remote_callbacks(rcb);
|
|
cb(opts)
|
|
})?;
|
|
Ok(())
|
|
})
|
|
}
|
|
|
|
pub fn fetch(
|
|
repo: &mut git2::Repository,
|
|
url: &str,
|
|
reference: &GitReference,
|
|
config: &Config,
|
|
) -> CargoResult<()> {
|
|
if config.frozen() {
|
|
anyhow::bail!(
|
|
"attempting to update a git repository, but --frozen \
|
|
was specified"
|
|
)
|
|
}
|
|
if !config.network_allowed() {
|
|
anyhow::bail!("can't update a git repository in the offline mode")
|
|
}
|
|
|
|
// If we're fetching from GitHub, attempt GitHub's special fast path for
|
|
// testing if we've already got an up-to-date copy of the repository
|
|
match github_up_to_date(repo, url, reference, config) {
|
|
Ok(true) => return Ok(()),
|
|
Ok(false) => {}
|
|
Err(e) => debug!("failed to check github {:?}", e),
|
|
}
|
|
|
|
// We reuse repositories quite a lot, so before we go through and update the
|
|
// repo check to see if it's a little too old and could benefit from a gc.
|
|
// In theory this shouldn't be too too expensive compared to the network
|
|
// request we're about to issue.
|
|
maybe_gc_repo(repo)?;
|
|
|
|
// Translate the reference desired here into an actual list of refspecs
|
|
// which need to get fetched. Additionally record if we're fetching tags.
|
|
let mut refspecs = Vec::new();
|
|
let mut tags = false;
|
|
match reference {
|
|
// For branches and tags we can fetch simply one reference and copy it
|
|
// locally, no need to fetch other branches/tags.
|
|
GitReference::Branch(b) => {
|
|
refspecs.push(format!("refs/heads/{0}:refs/remotes/origin/{0}", b));
|
|
}
|
|
GitReference::Tag(t) => {
|
|
refspecs.push(format!("refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
|
|
}
|
|
|
|
GitReference::DefaultBranch => {
|
|
refspecs.push(String::from("HEAD:refs/remotes/origin/HEAD"));
|
|
}
|
|
|
|
// For `rev` dependencies we don't know what the rev will point to. To
|
|
// handle this situation we fetch all branches and tags, and then we
|
|
// pray it's somewhere in there.
|
|
GitReference::Rev(_) => {
|
|
refspecs.push(String::from("refs/heads/*:refs/remotes/origin/*"));
|
|
refspecs.push(String::from("HEAD:refs/remotes/origin/HEAD"));
|
|
tags = true;
|
|
}
|
|
}
|
|
|
|
// Unfortunately `libgit2` is notably lacking in the realm of authentication
|
|
// when compared to the `git` command line. As a result, allow an escape
|
|
// hatch for users that would prefer to use `git`-the-CLI for fetching
|
|
// repositories instead of `libgit2`-the-library. This should make more
|
|
// flavors of authentication possible while also still giving us all the
|
|
// speed and portability of using `libgit2`.
|
|
if let Some(true) = config.net_config()?.git_fetch_with_cli {
|
|
return fetch_with_cli(repo, url, &refspecs, tags, config);
|
|
}
|
|
|
|
debug!("doing a fetch for {}", url);
|
|
let git_config = git2::Config::open_default()?;
|
|
with_fetch_options(&git_config, url, config, &mut |mut opts| {
|
|
if tags {
|
|
opts.download_tags(git2::AutotagOption::All);
|
|
}
|
|
// The `fetch` operation here may fail spuriously due to a corrupt
|
|
// repository. It could also fail, however, for a whole slew of other
|
|
// reasons (aka network related reasons). We want Cargo to automatically
|
|
// recover from corrupt repositories, but we don't want Cargo to stomp
|
|
// over other legitimate errors.
|
|
//
|
|
// Consequently we save off the error of the `fetch` operation and if it
|
|
// looks like a "corrupt repo" error then we blow away the repo and try
|
|
// again. If it looks like any other kind of error, or if we've already
|
|
// blown away the repository, then we want to return the error as-is.
|
|
let mut repo_reinitialized = false;
|
|
loop {
|
|
debug!("initiating fetch of {:?} from {}", refspecs, url);
|
|
let res = repo
|
|
.remote_anonymous(url)?
|
|
.fetch(&refspecs, Some(&mut opts), None);
|
|
let err = match res {
|
|
Ok(()) => break,
|
|
Err(e) => e,
|
|
};
|
|
debug!("fetch failed: {}", err);
|
|
|
|
if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
|
|
{
|
|
repo_reinitialized = true;
|
|
debug!(
|
|
"looks like this is a corrupt repository, reinitializing \
|
|
and trying again"
|
|
);
|
|
if reinitialize(repo).is_ok() {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return Err(err.into());
|
|
}
|
|
Ok(())
|
|
})
|
|
}
|
|
|
|
fn fetch_with_cli(
|
|
repo: &mut git2::Repository,
|
|
url: &str,
|
|
refspecs: &[String],
|
|
tags: bool,
|
|
config: &Config,
|
|
) -> CargoResult<()> {
|
|
let mut cmd = ProcessBuilder::new("git");
|
|
cmd.arg("fetch");
|
|
if tags {
|
|
cmd.arg("--tags");
|
|
}
|
|
cmd.arg("--force") // handle force pushes
|
|
.arg("--update-head-ok") // see discussion in #2078
|
|
.arg(url)
|
|
.args(refspecs)
|
|
// If cargo is run by git (for example, the `exec` command in `git
|
|
// rebase`), the GIT_DIR is set by git and will point to the wrong
|
|
// location (this takes precedence over the cwd). Make sure this is
|
|
// unset so git will look at cwd for the repo.
|
|
.env_remove("GIT_DIR")
|
|
// The reset of these may not be necessary, but I'm including them
|
|
// just to be extra paranoid and avoid any issues.
|
|
.env_remove("GIT_WORK_TREE")
|
|
.env_remove("GIT_INDEX_FILE")
|
|
.env_remove("GIT_OBJECT_DIRECTORY")
|
|
.env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
|
|
.cwd(repo.path());
|
|
config
|
|
.shell()
|
|
.verbose(|s| s.status("Running", &cmd.to_string()))?;
|
|
cmd.exec_with_output()?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Cargo has a bunch of long-lived git repositories in its global cache and
|
|
/// some, like the index, are updated very frequently. Right now each update
|
|
/// creates a new "pack file" inside the git database, and over time this can
|
|
/// cause bad performance and bad current behavior in libgit2.
|
|
///
|
|
/// One pathological use case today is where libgit2 opens hundreds of file
|
|
/// descriptors, getting us dangerously close to blowing out the OS limits of
|
|
/// how many fds we can have open. This is detailed in #4403.
|
|
///
|
|
/// To try to combat this problem we attempt a `git gc` here. Note, though, that
|
|
/// we may not even have `git` installed on the system! As a result we
|
|
/// opportunistically try a `git gc` when the pack directory looks too big, and
|
|
/// failing that we just blow away the repository and start over.
|
|
fn maybe_gc_repo(repo: &mut git2::Repository) -> CargoResult<()> {
|
|
// Here we arbitrarily declare that if you have more than 100 files in your
|
|
// `pack` folder that we need to do a gc.
|
|
let entries = match repo.path().join("objects/pack").read_dir() {
|
|
Ok(e) => e.count(),
|
|
Err(_) => {
|
|
debug!("skipping gc as pack dir appears gone");
|
|
return Ok(());
|
|
}
|
|
};
|
|
let max = env::var("__CARGO_PACKFILE_LIMIT")
|
|
.ok()
|
|
.and_then(|s| s.parse::<usize>().ok())
|
|
.unwrap_or(100);
|
|
if entries < max {
|
|
debug!("skipping gc as there's only {} pack files", entries);
|
|
return Ok(());
|
|
}
|
|
|
|
// First up, try a literal `git gc` by shelling out to git. This is pretty
|
|
// likely to fail though as we may not have `git` installed. Note that
|
|
// libgit2 doesn't currently implement the gc operation, so there's no
|
|
// equivalent there.
|
|
match Command::new("git")
|
|
.arg("gc")
|
|
.current_dir(repo.path())
|
|
.output()
|
|
{
|
|
Ok(out) => {
|
|
debug!(
|
|
"git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
|
|
out.status,
|
|
String::from_utf8_lossy(&out.stdout),
|
|
String::from_utf8_lossy(&out.stderr)
|
|
);
|
|
if out.status.success() {
|
|
let new = git2::Repository::open(repo.path())?;
|
|
*repo = new;
|
|
return Ok(());
|
|
}
|
|
}
|
|
Err(e) => debug!("git-gc failed to spawn: {}", e),
|
|
}
|
|
|
|
// Alright all else failed, let's start over.
|
|
reinitialize(repo)
|
|
}
|
|
|
|
fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
|
|
// Here we want to drop the current repository object pointed to by `repo`,
|
|
// so we initialize temporary repository in a sub-folder, blow away the
|
|
// existing git folder, and then recreate the git repo. Finally we blow away
|
|
// the `tmp` folder we allocated.
|
|
let path = repo.path().to_path_buf();
|
|
debug!("reinitializing git repo at {:?}", path);
|
|
let tmp = path.join("tmp");
|
|
let bare = !repo.path().ends_with(".git");
|
|
*repo = init(&tmp, false)?;
|
|
for entry in path.read_dir()? {
|
|
let entry = entry?;
|
|
if entry.file_name().to_str() == Some("tmp") {
|
|
continue;
|
|
}
|
|
let path = entry.path();
|
|
drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
|
|
}
|
|
*repo = init(&path, bare)?;
|
|
paths::remove_dir_all(&tmp)?;
|
|
Ok(())
|
|
}
|
|
|
|
fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
|
|
let mut opts = git2::RepositoryInitOptions::new();
|
|
// Skip anything related to templates, they just call all sorts of issues as
|
|
// we really don't want to use them yet they insist on being used. See #6240
|
|
// for an example issue that comes up.
|
|
opts.external_template(false);
|
|
opts.bare(bare);
|
|
Ok(git2::Repository::init_opts(&path, &opts)?)
|
|
}
|
|
|
|
/// Updating the index is done pretty regularly so we want it to be as fast as
|
|
/// possible. For registries hosted on GitHub (like the crates.io index) there's
|
|
/// a fast path available to use [1] to tell us that there's no updates to be
|
|
/// made.
|
|
///
|
|
/// This function will attempt to hit that fast path and verify that the `oid`
|
|
/// is actually the current branch of the repository. If `true` is returned then
|
|
/// no update needs to be performed, but if `false` is returned then the
|
|
/// standard update logic still needs to happen.
|
|
///
|
|
/// [1]: https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference
|
|
///
|
|
/// Note that this function should never cause an actual failure because it's
|
|
/// just a fast path. As a result all errors are ignored in this function and we
|
|
/// just return a `bool`. Any real errors will be reported through the normal
|
|
/// update path above.
|
|
fn github_up_to_date(
|
|
repo: &mut git2::Repository,
|
|
url: &str,
|
|
reference: &GitReference,
|
|
config: &Config,
|
|
) -> CargoResult<bool> {
|
|
let url = Url::parse(url)?;
|
|
if url.host_str() != Some("github.com") {
|
|
return Ok(false);
|
|
}
|
|
|
|
let github_branch_name = match reference {
|
|
GitReference::Branch(branch) => branch,
|
|
GitReference::Tag(tag) => tag,
|
|
GitReference::DefaultBranch => "HEAD",
|
|
GitReference::Rev(_) => {
|
|
debug!("can't use github fast path with `rev`");
|
|
return Ok(false);
|
|
}
|
|
};
|
|
|
|
// This expects GitHub urls in the form `github.com/user/repo` and nothing
|
|
// else
|
|
let mut pieces = url
|
|
.path_segments()
|
|
.ok_or_else(|| anyhow!("no path segments on url"))?;
|
|
let username = pieces
|
|
.next()
|
|
.ok_or_else(|| anyhow!("couldn't find username"))?;
|
|
let repository = pieces
|
|
.next()
|
|
.ok_or_else(|| anyhow!("couldn't find repository name"))?;
|
|
if pieces.next().is_some() {
|
|
anyhow::bail!("too many segments on URL");
|
|
}
|
|
|
|
// Trim off the `.git` from the repository, if present, since that's
|
|
// optional for GitHub and won't work when we try to use the API as well.
|
|
let repository = repository.strip_suffix(".git").unwrap_or(repository);
|
|
|
|
let url = format!(
|
|
"https://api.github.com/repos/{}/{}/commits/{}",
|
|
username, repository, github_branch_name,
|
|
);
|
|
let mut handle = config.http()?.borrow_mut();
|
|
debug!("attempting GitHub fast path for {}", url);
|
|
handle.get(true)?;
|
|
handle.url(&url)?;
|
|
handle.useragent("cargo")?;
|
|
let mut headers = List::new();
|
|
headers.append("Accept: application/vnd.github.3.sha")?;
|
|
headers.append(&format!("If-None-Match: \"{}\"", reference.resolve(repo)?))?;
|
|
handle.http_headers(headers)?;
|
|
handle.perform()?;
|
|
Ok(handle.response_code()? == 304)
|
|
}
|