Default to caching git repositories with a `git clone --mirror` like functionality

I first set this up without the "--mirror" like functionality, but unfortunately
I couldn't get my fetch updates to work properly. A future optimization would be
to remove the mirror-refspecs and instead update the refsspecs whenever a new
branch is fetched.

Fixes #40
This commit is contained in:
R Tyler Croy 2020-11-26 21:18:31 -08:00
parent 7bf5d5fb8f
commit dfe6cf9376
4 changed files with 185 additions and 16 deletions

1
Cargo.lock generated
View File

@ -1008,6 +1008,7 @@ dependencies = [
"git2",
"otto-agent",
"serde 1.0.117",
"sha2",
"url",
]

View File

@ -9,3 +9,5 @@ git2 = "~0.13.12"
otto-agent = { path = "../../crates/agent" }
serde = {version = "~1.0.117", features = ["derive"]}
url = "~2.2.0"
# Used for managing the cached reference directories
sha2 = "~0.9.2"

View File

@ -4,6 +4,7 @@
use otto_agent::step::*;
use serde::Deserialize;
use std::path::PathBuf;
use url::Url;
#[derive(Clone, Debug, Deserialize)]
@ -26,13 +27,110 @@ fn repo_from_url(repo_url: &Url) -> Option<String> {
None
}
fn main() -> std::io::Result<()> {
use std::path::Path;
/**
* Generate the reference repo path from the given Url
*/
fn locate_reference_for(url: &Url, cache_dir: &PathBuf) -> PathBuf {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(url.as_str());
let result = hasher.finalize();
cache_dir.join(format!("{:x}", result))
}
/**
* Clone a Git repository
*/
fn clone(
repo: String,
into: &PathBuf,
branch: Option<String>,
bare: Option<bool>,
) -> std::io::Result<()> {
let mut builder = git2::build::RepoBuilder::new();
if let Some(branch) = branch {
builder.branch(&branch);
}
if let Some(bare) = bare {
// https://github.com/rust-lang/git2-rs/issues/521
builder
.bare(bare)
.remote_create(|repo, name, url| repo.remote_with_fetch(name, url, "+refs/*:refs/*"));
}
println!("Cloning {} into {:?}", repo, into);
let _repo = match builder.clone(&repo, into) {
Ok(repo) => repo,
Err(e) => panic!("failed to clone {} to {:?}: {}", repo, into, e),
};
Ok(())
}
/**
* Fetch all remotes in the given repository
*/
fn fetch(repo_path: &PathBuf, refs: Vec<String>, bare: bool) {
println!("Fetching updates for {:?} - {:?}", repo_path, refs);
let repo = match bare {
true => git2::Repository::open_bare(&repo_path).expect("Failed to open repo"),
false => git2::Repository::open(&repo_path).expect("Failed to open repo"),
};
if let Ok(remotes) = repo.remotes() {
for remote in remotes.iter() {
if let Ok(mut remote) = repo.find_remote(remote.unwrap()) {
remote.fetch(&refs, None, None).expect("Failed to fetch");
}
}
}
}
/**
* Return the String of the URL of the repo that should be relied upon for cloning
*/
fn reference_or_upstream_repo(invoke: &Invocation<Parameters>) -> String {
let url = &invoke.parameters.url;
if let Some(cache) = &invoke.configuration.cache {
/*
* When a cache directory is present, the step should create a new cached clone
* for this repo, or update the existing one and return the path
*/
let ref_repo = locate_reference_for(url, cache);
if ref_repo.as_path().is_dir() {
let refs = match &invoke.parameters.branch {
Some(branch) => vec![branch.clone()],
None => vec![],
};
fetch(&ref_repo, refs, true);
} else {
clone(
url.clone().into_string(),
&ref_repo,
invoke.parameters.branch.clone(),
Some(true),
);
}
ref_repo.as_path().to_string_lossy().to_string()
} else {
/*
* In the cases where the cache directory isn't known, the step is just
* going to have to clone the source repo
*/
url.clone().into_string()
}
}
fn main() -> std::io::Result<()> {
let args = std::env::args().collect();
let invoke: Invocation<Parameters> =
invocation_from_args(&args).expect("Failed to deserialize the invocation for the step");
let repo_url = reference_or_upstream_repo(&invoke);
let clone_path = match invoke.parameters.into {
Some(into) => into,
None => {
@ -40,18 +138,12 @@ fn main() -> std::io::Result<()> {
}
};
println!("Clone!");
let mut builder = git2::build::RepoBuilder::new();
if let Some(branch) = &invoke.parameters.branch {
builder.branch(&branch);
}
let _repo = match builder.clone(&invoke.parameters.url.into_string(), Path::new(&clone_path)) {
Ok(repo) => repo,
Err(e) => panic!("failed to clone: {}", e),
};
clone(
repo_url,
&PathBuf::from(clone_path),
invoke.parameters.branch,
None,
);
Ok(())
}
@ -71,4 +163,16 @@ mod tests {
let u = Url::parse("https://example.com/repo").expect("Failed to parse");
assert_eq!(repo_from_url(&u).unwrap(), "repo");
}
#[test]
fn test_location_reference_for() {
use std::path::PathBuf;
let pb = PathBuf::from("/tmp/");
let url = Url::parse("https://example.com").expect("Failed to parse url");
let result = locate_reference_for(&url, &pb);
assert_eq!(
PathBuf::from("/tmp/0f115db062b7c0dd030b16878c99dea5c354b49dc37b38eb8846179c7783e9d7"),
result
);
}
}

View File

@ -28,7 +28,7 @@ EOF
rm -rf otto-test-repository
}
test_clone_ref_tag() {
test_clone_ref_branch() {
cat > $INVOCATION_FILE<<EOF
{
"configuration" : {
@ -46,7 +46,6 @@ test_clone_ref_tag() {
EOF
output=$(git-step $INVOCATION_FILE)
echo $output
assertTrue "step should be able to clone the given url: ${output}" $?
assertTrue "step should have cloned the repo" "test -d otto-test-repository"
assertTrue "step should have cloned the repo to the branch" "test -f otto-test-repository/this-is-a-branch"
@ -76,7 +75,9 @@ EOF
assertTrue "step should be able to clone the given url: ${output}" $?
assertTrue "step should have cloned the repo into $PWD" "test -f README.adoc"
popd
rm -rf work-dir
}
test_clone_with_cache() {
cache_dir="$PWD/caches"
@ -102,6 +103,67 @@ EOF
assertTrue "step should be able to clone the given url: ${output}" $?
popd
assertTrue "Reference repository should exist", "test -d ${cache_dir}/0884584c5aa4d28cbc4779fbc4cc9566625597528ee92e0092603e823057c1aa"
rm -rf work-dir
}
test_repeat_clone_with_cache() {
cache_dir="$PWD/caches"
cat > $INVOCATION_FILE<<EOF
{
"configuration" : {
"pipeline" : "2265b5d0-1f70-46de-bf50-f1050e9fac9a",
"uuid" : "5599cffb-f23a-4e0f-a0b9-f74654641b2b",
"cache" : "${cache_dir}",
"ipc" : "unix:///dev/null",
"endpoints" : {
}
},
"parameters" : {
"url" : "https://git.brokenco.de/rtyler/otto-test-repository"
}
}
EOF
# Clone into one working directory with the "main" refspec
mkdir work-dir
pushd work-dir
output=$(git-step $INVOCATION_FILE)
assertTrue "step should be able to clone the given url: ${output}" $?
assertTrue "step should have cloned the repo" "test -d otto-test-repository"
popd
rm -rf work-dir
# Now that we're confident that the cache is primed, try to clone
# a branch from that cached bare reference repo
cat > $INVOCATION_FILE<<EOF
{
"configuration" : {
"pipeline" : "2265b5d0-1f70-46de-bf50-f1050e9fac9a",
"uuid" : "5599cffb-f23a-4e0f-a0b9-f74654641b2b",
"cache" : "${cache_dir}",
"ipc" : "unix:///dev/null",
"endpoints" : {
}
},
"parameters" : {
"url" : "https://git.brokenco.de/rtyler/otto-test-repository",
"branch" : "test-branch"
}
}
EOF
mkdir work-dir
pushd work-dir
output=$(git-step $INVOCATION_FILE)
assertTrue "step should be able to clone the given url: ${output}" $?
assertTrue "step should have cloned the repo" "test -d otto-test-repository"
assertTrue "step should have cloned the repo to the branch" "test -f otto-test-repository/this-is-a-branch"
popd
rm -rf work-dir
}
. $(dirname $0)/../../../contrib/shunit2/shunit2