commit 5c96beea07c6174cd9cc3bcb10d8bfcbe5560114 Author: Puck Meerburg Date: Thu May 2 18:01:23 2024 +0000 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..1ac7f76 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,426 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "cc" +version = "1.0.96" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "065a29261d53ba54260972629f9ca6bffa69bac13cd1fed61420f7fa68b9f8bd" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] + +[[package]] +name = "clap" +version = "4.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "gerrit-filter-branch" +version = "0.1.0" +dependencies = [ + "clap", + "git2", +] + +[[package]] +name = "git2" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" +dependencies = [ + "bitflags", + "libc", + "libgit2-sys", + "log", + "openssl-probe", + "openssl-sys", + "url", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + +[[package]] +name = "libc" +version = "0.2.154" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" + +[[package]] +name = "libgit2-sys" +version = "0.16.2+1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" +dependencies = [ + "cc", + "libc", + "libssh2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", +] + +[[package]] +name = "libssh2-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dc8a030b787e2119a731f1951d6a773e2280c660f8ec4b0f5e1505a386e71ee" +dependencies = [ + "cc", + "libc", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libz-sys" +version = "1.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e143b5e666b2695d28f6bca6497720813f699c9602dd7f5cac91008b8ada7f9" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "proc-macro2" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..0244b84 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "gerrit-filter-branch" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.5.4", features = ["derive"] } +git2 = "0.18.3" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..9f33336 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,251 @@ +use std::collections::{HashMap, HashSet}; + +use clap::Parser; +use git2::{Oid, Repository, Signature}; + +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +struct Args { + #[arg(short, long)] + /// The path to the repository to rewrite. + repo: String, + + #[arg(short, long)] + /// The email address to look for. + rewrite_email: String, + + #[arg(short, long)] + /// The author/committer name to replace the old one with. + new_name: String, + + #[arg(short, long)] + /// The author/committer email to replace the old one with. + new_email: String, +} + +struct State { + repo: git2::Repository, + rewritten: HashMap, + unrewritten: HashSet, + args: Args, +} + +impl State { + /// Recursively rewrite a non-meta commit. + fn rewrite(&mut self, oid: Oid) -> Oid { + if self.unrewritten.contains(&oid) { + return oid; + } + + if let Some(oid) = self.rewritten.get(&oid) { + return *oid; + } + + let mut commit = self.repo.find_commit(oid).unwrap(); + // If the email address matches, we will have to rewrite this commit either way, even if parent IDs match. + let rewrite_author = commit.author().email().unwrap() == &self.args.rewrite_email; + + let parent_ids: Vec<_> = commit.parent_ids().collect(); + drop(commit); + + let mut new_parent_ids = Vec::new(); + let mut same_parents = true; + // Rewrite the parents of this commit recursively. + for id in parent_ids { + let new_id = self.rewrite(id); + if new_id != id { + // Keep track if the parents are identical cheaply. + same_parents = false; + } + new_parent_ids.push(new_id); + } + + // If this commit has nothing to rewrite, skip it now. + if same_parents && !rewrite_author { + self.unrewritten.insert(oid); + return oid; + } + + commit = self.repo.find_commit(oid).unwrap(); + + // Find the list of new parent commits for this commit. + let new_parents: Vec<_> = new_parent_ids + .into_iter() + .map(|f| self.repo.find_commit(f).unwrap()) + .collect(); + let new_parents_list: Vec<_> = new_parents.iter().collect(); + + // Rewrite author and committer respectively, where necessary. + let mut author = commit.author().to_owned(); + if author.email().unwrap() == &self.args.rewrite_email { + author = + Signature::new(&self.args.new_name, &self.args.new_email, &author.when()).unwrap(); + } + + let mut committer = commit.committer().to_owned(); + if committer.email().unwrap() == &self.args.rewrite_email { + committer = + Signature::new(&self.args.new_name, &self.args.new_email, &committer.when()) + .unwrap(); + } + + // Generate a new commit (Commit::amend doesn't work when changing parents.) + let new_id = self + .repo + .commit( + None, + &author, + &committer, + commit.message_raw().unwrap(), + &commit.tree().unwrap(), + &new_parents_list[..], + ) + .unwrap(); + + self.rewritten.insert(oid, new_id); + + new_id + } + + /// Rewrite a comment/meta commit. + fn rewrite_meta(&mut self, oid: Oid) -> Oid { + let mut commit = self.repo.find_commit(oid).unwrap(); + + // A notedb commit always has one parent; simplify the logic and rewrite the parent if needed. + let mut parent = None; + if commit.parent_count() > 0 { + let parent_oid = commit.parent_id(0).unwrap(); + drop(commit); + let new_meta = self.rewrite_meta(parent_oid); + parent = Some(self.repo.find_commit(new_meta).unwrap()); + commit = self.repo.find_commit(oid).unwrap(); + } + + let tree = commit.tree().unwrap(); + let mut builder = self.repo.treebuilder(None).unwrap(); + + // Notedb comment files are always in the root of the tree. + for entry in &tree { + let name = entry.name().unwrap(); + + // The name of the meta file is a valid OID of a commit. + let oid = Oid::from_str(name).expect("unexpected filename"); + + // Find the new OID to use for the filename. + let newoid = if self.unrewritten.contains(&oid) { + oid + } else { + *self.rewritten.get(&oid).unwrap() + }; + + let blob = self.repo.find_blob(entry.id()).unwrap(); + let content = std::str::from_utf8(blob.content()).unwrap(); + let mut newcontent = content.to_owned(); + // Rewrite each commit ID that we've rewritten so far. + // This is a hack: the data is JSON and we could parse it as such, but this will be likely as reliable, + // and SHA1 hashes are unique enough we don't have to worry about accidental misrewrites causing invalid data. + for (from, to) in &self.rewritten { + newcontent = newcontent.replace(&from.to_string(), &to.to_string()); + } + + // Store as a blob, and insert into the tree using the rewritten OID's name. + let blob = self.repo.blob(newcontent.as_bytes()).unwrap(); + builder + .insert(newoid.to_string(), blob, entry.filemode_raw()) + .unwrap(); + } + + // Write the tree to disk. + let newtree = builder.write().unwrap(); + + // Replace the rewritten OIDs in the update messages too. + // This is a hack: see above for why it's fine. + let mut message = commit.message_raw().unwrap().to_string(); + for (from, to) in &self.rewritten { + message = message.replace(&from.to_string(), &to.to_string()); + } + + // Write the commit back to disk. + if let Some(parent) = parent.as_ref() { + self.repo + .commit( + None, + &commit.author(), + &commit.committer(), + &message, + &self.repo.find_tree(newtree).unwrap(), + &[parent], + ) + .unwrap() + } else { + self.repo + .commit( + None, + &commit.author(), + &commit.committer(), + &message, + &self.repo.find_tree(newtree).unwrap(), + &[], + ) + .unwrap() + } + } +} + +fn main() { + let args = Args::parse(); + let repo = args.repo.clone(); + + let mut state = State { + repo: Repository::open_bare(repo).unwrap(), + rewritten: HashMap::new(), + unrewritten: HashSet::new(), + args, + }; + + let refs: Vec = state + .repo + .references() + .unwrap() + .names() + .map(|f| f.unwrap().to_owned()) + .collect(); + + // First rewrite the non-meta references, as the meta references will always refer to these. + for gref in &refs { + if gref.ends_with("/meta") { + continue; + } + + let refoid = state.repo.find_reference(&gref).unwrap().target().unwrap(); + let newoid = state.rewrite(refoid); + if refoid != newoid { + println!("{:?}: {:?} -> {:?}", gref, refoid, newoid); + state + .repo + .reference(&gref, newoid, true, "filter-branch rewrite") + .unwrap(); + } else { + println!("{:?}: {:?} (unchanged)", gref, refoid); + } + } + + for gref in &refs { + if !gref.ends_with("/meta") { + continue; + } + + let refoid = state.repo.find_reference(&gref).unwrap().target().unwrap(); + let newoid = state.rewrite_meta(refoid); + + if refoid != newoid { + println!("{:?}: {:?} -> {:?}", gref, refoid, newoid); + state + .repo + .reference(&gref, newoid, true, "filter-branch rewrite") + .unwrap(); + } else { + println!("{:?}: {:?} (unchanged)", gref, refoid); + } + } +}