From 2437d14bf33ed90b37a25ed60109161cc8c08384 Mon Sep 17 00:00:00 2001 From: Justin Geibel Date: Thu, 6 May 2021 18:46:39 -0400 Subject: [PATCH] Add a background job for squashing the index This adds a background job that squashes the index into a single commit. The current plan is to manually enqueue this job on a 6 week schedule, roughly aligning with new `rustc` releases. Before deploying this, will need to make sure that the SSH key is allowed to do a force push to the protected master branch. This job is derived from a [script] that was periodically run by the cargo team. There are a few minor differences relative to the original script: * The push of the snapshot branch is no longer forced. The job will fail if run more than once on the same day. (If the first attempt fails before pushing a new root commit upstream, then retries should succeed as long as the snapshot can be fast-forwarded.) * The push of the new root commit to the origin no longer uses `--force-with-lease` to reject the force push if new commits have been pushed there in parallel. Other than the occasional manual changes to the index (such as deleting crates), background jobs have exclusive write access to the index while running. Given that such manual changes are rare, this job completes quickly, and such manual tasks should be automated too, this is low risk. The alternative is to shell out to git because `libgit2` (and thus the `git2` crate) do not yet support this portion of the protocol. [script]: https://github.com/rust-lang/crates-io-cargo-teams/issues/47#issuecomment-506236036 --- src/bin/enqueue-job.rs | 3 +- src/git.rs | 63 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/src/bin/enqueue-job.rs b/src/bin/enqueue-job.rs index 07727d9357f..b54c97bfed3 100644 --- a/src/bin/enqueue-job.rs +++ b/src/bin/enqueue-job.rs @@ -1,7 +1,7 @@ #![deny(clippy::all)] use anyhow::{anyhow, Result}; -use cargo_registry::{db, env, tasks}; +use cargo_registry::{db, env, git, tasks}; use diesel::prelude::*; use swirl::schema::background_jobs::dsl::*; use swirl::Job; @@ -36,6 +36,7 @@ fn main() -> Result<()> { Ok(tasks::dump_db(database_url, target_name).enqueue(&conn)?) } "daily_db_maintenance" => Ok(tasks::daily_db_maintenance().enqueue(&conn)?), + "squash_index" => Ok(git::squash_index().enqueue(&conn)?), other => Err(anyhow!("Unrecognized job type `{}`", other)), } } diff --git a/src/git.rs b/src/git.rs index 70aecdf52fb..94ae1a78865 100644 --- a/src/git.rs +++ b/src/git.rs @@ -3,6 +3,8 @@ use std::collections::HashMap; use std::fs::{self, OpenOptions}; use std::path::{Path, PathBuf}; + +use chrono::Utc; use swirl::PerformError; use tempfile::{Builder, TempDir}; use url::Url; @@ -205,12 +207,11 @@ impl Repository { self.repository .commit(Some("HEAD"), &sig, &sig, &msg, &tree, &[&parent])?; - self.push() + self.push("refs/heads/master") } - /// Push the current branch to "refs/heads/master" - fn push(&self) -> Result<(), PerformError> { - let refname = "refs/heads/master"; + /// Push the current branch to the provided refname + fn push(&self, refspec: &str) -> Result<(), PerformError> { let mut ref_status = Ok(()); let mut callback_called = false; { @@ -219,8 +220,7 @@ impl Repository { callbacks.credentials(|_, user_from_url, cred_type| { self.credentials.git2_callback(user_from_url, cred_type) }); - callbacks.push_update_reference(|cb_refname, status| { - assert_eq!(refname, cb_refname); + callbacks.push_update_reference(|_, status| { if let Some(s) = status { ref_status = Err(format!("failed to push a ref: {}", s).into()) } @@ -229,7 +229,7 @@ impl Repository { }); let mut opts = git2::PushOptions::new(); opts.remote_callbacks(callbacks); - origin.push(&[refname], Some(&mut opts))?; + origin.push(&[refspec], Some(&mut opts))?; } if !callback_called { @@ -278,6 +278,24 @@ impl Repository { opts.remote_callbacks(callbacks); opts } + + /// Reset `HEAD` to a single commit with all the index contents, but no parent + fn squash_to_single_commit(&self, msg: &str) -> Result<(), PerformError> { + let tree = self.repository.find_commit(self.head_oid()?)?.tree()?; + let sig = self.repository.signature()?; + + // We cannot update an existing `update_ref`, because that requires the + // first parent of this commit to match the ref's current value. + // Instead, create the commit and then do a hard reset. + let commit = self.repository.commit(None, &sig, &sig, &msg, &tree, &[])?; + let commit = self + .repository + .find_object(commit, Some(git2::ObjectType::Commit))?; + self.repository + .reset(&commit, git2::ResetType::Hard, None)?; + + Ok(()) + } } #[swirl::background_job] @@ -360,3 +378,34 @@ pub fn yank( Ok(()) }) } + +/// Collapse the index into a single commit, archiving the current history in a snapshot branch. +#[swirl::background_job] +pub fn squash_index(env: &Environment) -> Result<(), PerformError> { + let repo = env.lock_index()?; + println!("Squashing the index into a single commit."); + + let now = Utc::now().format("%Y-%m-%d"); + let head = repo.head_oid()?; + let msg = format!("Collapse index into one commit\n\n\ + + Previous HEAD was {}, now on the `snapshot-{}` branch\n\n\ + + More information about this change can be found [online] and on [this issue].\n\n\ + + [online]: https://internals.rust-lang.org/t/cargos-crate-index-upcoming-squash-into-one-commit/8440\n\ + [this issue]: https://github.com/rust-lang/crates-io-cargo-teams/issues/47", head, now); + + // Create a snapshot branch of current `HEAD`. + repo.push(&format!("HEAD:refs/heads/snapshot-{}", now))?; + + repo.squash_to_single_commit(&msg)?; + + // Because this will not be a fast-forward push, `+` is added to the + // beginning of the refspec to force the push. + repo.push("+HEAD:refs/heads/master")?; + + println!("The index has been successfully squashed."); + + Ok(()) +}