just playing with tangled
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at gvimdiff 2259 lines 86 kB view raw
1// Copyright 2020 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#![allow(missing_docs)] 16 17use std::any::Any; 18use std::collections::HashSet; 19use std::fmt::Debug; 20use std::fmt::Error; 21use std::fmt::Formatter; 22use std::fs; 23use std::io; 24use std::io::Cursor; 25use std::io::Read; 26use std::path::Path; 27use std::path::PathBuf; 28use std::process::Command; 29use std::process::ExitStatus; 30use std::str; 31use std::sync::Arc; 32use std::sync::Mutex; 33use std::sync::MutexGuard; 34use std::time::SystemTime; 35 36use async_trait::async_trait; 37use futures::stream::BoxStream; 38use gix::bstr::BString; 39use gix::objs::CommitRef; 40use gix::objs::CommitRefIter; 41use gix::objs::WriteTo as _; 42use itertools::Itertools as _; 43use pollster::FutureExt as _; 44use prost::Message as _; 45use smallvec::SmallVec; 46use thiserror::Error; 47 48use crate::backend::make_root_commit; 49use crate::backend::Backend; 50use crate::backend::BackendError; 51use crate::backend::BackendInitError; 52use crate::backend::BackendLoadError; 53use crate::backend::BackendResult; 54use crate::backend::ChangeId; 55use crate::backend::Commit; 56use crate::backend::CommitId; 57use crate::backend::Conflict; 58use crate::backend::ConflictId; 59use crate::backend::ConflictTerm; 60use crate::backend::CopyRecord; 61use crate::backend::FileId; 62use crate::backend::MergedTreeId; 63use crate::backend::MillisSinceEpoch; 64use crate::backend::SecureSig; 65use crate::backend::Signature; 66use crate::backend::SigningFn; 67use crate::backend::SymlinkId; 68use crate::backend::Timestamp; 69use crate::backend::Tree; 70use crate::backend::TreeId; 71use crate::backend::TreeValue; 72use crate::file_util::IoResultExt as _; 73use crate::file_util::PathError; 74use crate::index::Index; 75use crate::lock::FileLock; 76use crate::merge::Merge; 77use crate::merge::MergeBuilder; 78use crate::object_id::ObjectId; 79use crate::repo_path::RepoPath; 80use crate::repo_path::RepoPathBuf; 81use crate::repo_path::RepoPathComponentBuf; 82use crate::settings::UserSettings; 83use crate::stacked_table::MutableTable; 84use crate::stacked_table::ReadonlyTable; 85use crate::stacked_table::TableSegment as _; 86use crate::stacked_table::TableStore; 87use crate::stacked_table::TableStoreError; 88 89const HASH_LENGTH: usize = 20; 90const CHANGE_ID_LENGTH: usize = 16; 91/// Ref namespace used only for preventing GC. 92const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/"; 93const CONFLICT_SUFFIX: &str = ".jjconflict"; 94 95pub const JJ_TREES_COMMIT_HEADER: &[u8] = b"jj:trees"; 96 97#[derive(Debug, Error)] 98pub enum GitBackendInitError { 99 #[error("Failed to initialize git repository")] 100 InitRepository(#[source] gix::init::Error), 101 #[error("Failed to open git repository")] 102 OpenRepository(#[source] gix::open::Error), 103 #[error(transparent)] 104 Path(PathError), 105} 106 107impl From<Box<GitBackendInitError>> for BackendInitError { 108 fn from(err: Box<GitBackendInitError>) -> Self { 109 BackendInitError(err) 110 } 111} 112 113#[derive(Debug, Error)] 114pub enum GitBackendLoadError { 115 #[error("Failed to open git repository")] 116 OpenRepository(#[source] gix::open::Error), 117 #[error(transparent)] 118 Path(PathError), 119} 120 121impl From<Box<GitBackendLoadError>> for BackendLoadError { 122 fn from(err: Box<GitBackendLoadError>) -> Self { 123 BackendLoadError(err) 124 } 125} 126 127/// `GitBackend`-specific error that may occur after the backend is loaded. 128#[derive(Debug, Error)] 129pub enum GitBackendError { 130 #[error("Failed to read non-git metadata")] 131 ReadMetadata(#[source] TableStoreError), 132 #[error("Failed to write non-git metadata")] 133 WriteMetadata(#[source] TableStoreError), 134} 135 136impl From<GitBackendError> for BackendError { 137 fn from(err: GitBackendError) -> Self { 138 BackendError::Other(err.into()) 139 } 140} 141 142#[derive(Debug, Error)] 143pub enum GitGcError { 144 #[error("Failed to run git gc command")] 145 GcCommand(#[source] std::io::Error), 146 #[error("git gc command exited with an error: {0}")] 147 GcCommandErrorStatus(ExitStatus), 148} 149 150pub struct GitBackend { 151 // While gix::Repository can be created from gix::ThreadSafeRepository, it's 152 // cheaper to cache the thread-local instance behind a mutex than creating 153 // one for each backend method call. Our GitBackend is most likely to be 154 // used in a single-threaded context. 155 base_repo: gix::ThreadSafeRepository, 156 repo: Mutex<gix::Repository>, 157 root_commit_id: CommitId, 158 root_change_id: ChangeId, 159 empty_tree_id: TreeId, 160 extra_metadata_store: TableStore, 161 cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>, 162} 163 164impl GitBackend { 165 pub fn name() -> &'static str { 166 "git" 167 } 168 169 fn new(base_repo: gix::ThreadSafeRepository, extra_metadata_store: TableStore) -> Self { 170 let repo = Mutex::new(base_repo.to_thread_local()); 171 let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]); 172 let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]); 173 let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904"); 174 GitBackend { 175 base_repo, 176 repo, 177 root_commit_id, 178 root_change_id, 179 empty_tree_id, 180 extra_metadata_store, 181 cached_extra_metadata: Mutex::new(None), 182 } 183 } 184 185 pub fn init_internal( 186 settings: &UserSettings, 187 store_path: &Path, 188 ) -> Result<Self, Box<GitBackendInitError>> { 189 let git_repo_path = Path::new("git"); 190 let git_repo = gix::ThreadSafeRepository::init_opts( 191 store_path.join(git_repo_path), 192 gix::create::Kind::Bare, 193 gix::create::Options::default(), 194 gix_open_opts_from_settings(settings), 195 ) 196 .map_err(GitBackendInitError::InitRepository)?; 197 Self::init_with_repo(store_path, git_repo_path, git_repo) 198 } 199 200 /// Initializes backend by creating a new Git repo at the specified 201 /// workspace path. The workspace directory must exist. 202 pub fn init_colocated( 203 settings: &UserSettings, 204 store_path: &Path, 205 workspace_root: &Path, 206 ) -> Result<Self, Box<GitBackendInitError>> { 207 let canonical_workspace_root = { 208 let path = store_path.join(workspace_root); 209 dunce::canonicalize(&path) 210 .context(&path) 211 .map_err(GitBackendInitError::Path)? 212 }; 213 let git_repo = gix::ThreadSafeRepository::init_opts( 214 canonical_workspace_root, 215 gix::create::Kind::WithWorktree, 216 gix::create::Options::default(), 217 gix_open_opts_from_settings(settings), 218 ) 219 .map_err(GitBackendInitError::InitRepository)?; 220 let git_repo_path = workspace_root.join(".git"); 221 Self::init_with_repo(store_path, &git_repo_path, git_repo) 222 } 223 224 /// Initializes backend with an existing Git repo at the specified path. 225 pub fn init_external( 226 settings: &UserSettings, 227 store_path: &Path, 228 git_repo_path: &Path, 229 ) -> Result<Self, Box<GitBackendInitError>> { 230 let canonical_git_repo_path = { 231 let path = store_path.join(git_repo_path); 232 canonicalize_git_repo_path(&path) 233 .context(&path) 234 .map_err(GitBackendInitError::Path)? 235 }; 236 let git_repo = gix::ThreadSafeRepository::open_opts( 237 canonical_git_repo_path, 238 gix_open_opts_from_settings(settings), 239 ) 240 .map_err(GitBackendInitError::OpenRepository)?; 241 Self::init_with_repo(store_path, git_repo_path, git_repo) 242 } 243 244 fn init_with_repo( 245 store_path: &Path, 246 git_repo_path: &Path, 247 git_repo: gix::ThreadSafeRepository, 248 ) -> Result<Self, Box<GitBackendInitError>> { 249 let extra_path = store_path.join("extra"); 250 fs::create_dir(&extra_path) 251 .context(&extra_path) 252 .map_err(GitBackendInitError::Path)?; 253 let target_path = store_path.join("git_target"); 254 if cfg!(windows) && git_repo_path.is_relative() { 255 // When a repository is created in Windows, format the path with *forward 256 // slashes* and not backwards slashes. This makes it possible to use the same 257 // repository under Windows Subsystem for Linux. 258 // 259 // This only works for relative paths. If the path is absolute, there's not much 260 // we can do, and it simply won't work inside and outside WSL at the same time. 261 let git_repo_path_string = git_repo_path 262 .components() 263 .map(|component| component.as_os_str().to_str().unwrap().to_owned()) 264 .join("/"); 265 fs::write(&target_path, git_repo_path_string.as_bytes()) 266 .context(&target_path) 267 .map_err(GitBackendInitError::Path)?; 268 } else { 269 fs::write(&target_path, git_repo_path.to_str().unwrap().as_bytes()) 270 .context(&target_path) 271 .map_err(GitBackendInitError::Path)?; 272 }; 273 let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH); 274 Ok(GitBackend::new(git_repo, extra_metadata_store)) 275 } 276 277 pub fn load( 278 settings: &UserSettings, 279 store_path: &Path, 280 ) -> Result<Self, Box<GitBackendLoadError>> { 281 let git_repo_path = { 282 let target_path = store_path.join("git_target"); 283 let git_repo_path_str = fs::read_to_string(&target_path) 284 .context(&target_path) 285 .map_err(GitBackendLoadError::Path)?; 286 let git_repo_path = store_path.join(git_repo_path_str); 287 canonicalize_git_repo_path(&git_repo_path) 288 .context(&git_repo_path) 289 .map_err(GitBackendLoadError::Path)? 290 }; 291 let repo = gix::ThreadSafeRepository::open_opts( 292 git_repo_path, 293 gix_open_opts_from_settings(settings), 294 ) 295 .map_err(GitBackendLoadError::OpenRepository)?; 296 let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH); 297 Ok(GitBackend::new(repo, extra_metadata_store)) 298 } 299 300 fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> { 301 self.repo.lock().unwrap() 302 } 303 304 /// Returns new thread-local instance to access to the underlying Git repo. 305 pub fn git_repo(&self) -> gix::Repository { 306 self.base_repo.to_thread_local() 307 } 308 309 /// Path to the `.git` directory or the repository itself if it's bare. 310 pub fn git_repo_path(&self) -> &Path { 311 self.base_repo.path() 312 } 313 314 /// Path to the working directory if the repository isn't bare. 315 pub fn git_workdir(&self) -> Option<&Path> { 316 self.base_repo.work_dir() 317 } 318 319 fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> { 320 let mut locked_head = self.cached_extra_metadata.lock().unwrap(); 321 match locked_head.as_ref() { 322 Some(head) => Ok(head.clone()), 323 None => { 324 let table = self 325 .extra_metadata_store 326 .get_head() 327 .map_err(GitBackendError::ReadMetadata)?; 328 *locked_head = Some(table.clone()); 329 Ok(table) 330 } 331 } 332 } 333 334 fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> { 335 let table = self 336 .extra_metadata_store 337 .get_head_locked() 338 .map_err(GitBackendError::ReadMetadata)?; 339 Ok(table) 340 } 341 342 fn save_extra_metadata_table( 343 &self, 344 mut_table: MutableTable, 345 _table_lock: &FileLock, 346 ) -> BackendResult<()> { 347 let table = self 348 .extra_metadata_store 349 .save_table(mut_table) 350 .map_err(GitBackendError::WriteMetadata)?; 351 // Since the parent table was the head, saved table are likely to be new head. 352 // If it's not, cache will be reloaded when entry can't be found. 353 *self.cached_extra_metadata.lock().unwrap() = Some(table); 354 Ok(()) 355 } 356 357 /// Imports the given commits and ancestors from the backing Git repo. 358 /// 359 /// The `head_ids` may contain commits that have already been imported, but 360 /// the caller should filter them out to eliminate redundant I/O processing. 361 #[tracing::instrument(skip(self, head_ids))] 362 pub fn import_head_commits<'a>( 363 &self, 364 head_ids: impl IntoIterator<Item = &'a CommitId>, 365 ) -> BackendResult<()> { 366 let head_ids: HashSet<&CommitId> = head_ids 367 .into_iter() 368 .filter(|&id| *id != self.root_commit_id) 369 .collect(); 370 if head_ids.is_empty() { 371 return Ok(()); 372 } 373 374 // Create no-gc ref even if known to the extras table. Concurrent GC 375 // process might have deleted the no-gc ref. 376 let locked_repo = self.lock_git_repo(); 377 locked_repo 378 .edit_references(head_ids.iter().copied().map(to_no_gc_ref_update)) 379 .map_err(|err| BackendError::Other(Box::new(err)))?; 380 381 // These commits are imported from Git. Make our change ids persist (otherwise 382 // future write_commit() could reassign new change id.) 383 tracing::debug!( 384 heads_count = head_ids.len(), 385 "import extra metadata entries" 386 ); 387 let (table, table_lock) = self.read_extra_metadata_table_locked()?; 388 let mut mut_table = table.start_mutation(); 389 import_extra_metadata_entries_from_heads( 390 &locked_repo, 391 &mut mut_table, 392 &table_lock, 393 &head_ids, 394 )?; 395 self.save_extra_metadata_table(mut_table, &table_lock) 396 } 397 398 fn read_file_sync(&self, id: &FileId) -> BackendResult<Box<dyn Read>> { 399 let git_blob_id = validate_git_object_id(id)?; 400 let locked_repo = self.lock_git_repo(); 401 let mut blob = locked_repo 402 .find_object(git_blob_id) 403 .map_err(|err| map_not_found_err(err, id))? 404 .try_into_blob() 405 .map_err(|err| to_read_object_err(err, id))?; 406 Ok(Box::new(Cursor::new(blob.take_data()))) 407 } 408 409 fn new_diff_platform(&self) -> BackendResult<gix::diff::blob::Platform> { 410 let attributes = gix::worktree::Stack::new( 411 Path::new(""), 412 gix::worktree::stack::State::AttributesStack(Default::default()), 413 gix::worktree::glob::pattern::Case::Sensitive, 414 Vec::new(), 415 Vec::new(), 416 ); 417 let filter = gix::diff::blob::Pipeline::new( 418 Default::default(), 419 gix::filter::plumbing::Pipeline::new( 420 self.git_repo() 421 .command_context() 422 .map_err(|err| BackendError::Other(Box::new(err)))?, 423 Default::default(), 424 ), 425 Vec::new(), 426 Default::default(), 427 ); 428 Ok(gix::diff::blob::Platform::new( 429 Default::default(), 430 filter, 431 gix::diff::blob::pipeline::Mode::ToGit, 432 attributes, 433 )) 434 } 435 436 fn read_tree_for_commit<'repo>( 437 &self, 438 repo: &'repo gix::Repository, 439 id: &CommitId, 440 ) -> BackendResult<gix::Tree<'repo>> { 441 let tree = self.read_commit(id).block_on()?.root_tree.to_merge(); 442 // TODO(kfm): probably want to do something here if it is a merge 443 let tree_id = tree.first().clone(); 444 let gix_id = validate_git_object_id(&tree_id)?; 445 repo.find_object(gix_id) 446 .map_err(|err| map_not_found_err(err, &tree_id))? 447 .try_into_tree() 448 .map_err(|err| to_read_object_err(err, &tree_id)) 449 } 450} 451 452/// Canonicalizes the given `path` except for the last `".git"` component. 453/// 454/// The last path component matters when opening a Git repo without `core.bare` 455/// config. This config is usually set, but the "repo" tool will set up such 456/// repositories and symlinks. Opening such repo with fully-canonicalized path 457/// would turn a colocated Git repo into a bare repo. 458pub fn canonicalize_git_repo_path(path: &Path) -> io::Result<PathBuf> { 459 if path.ends_with(".git") { 460 let workdir = path.parent().unwrap(); 461 dunce::canonicalize(workdir).map(|dir| dir.join(".git")) 462 } else { 463 dunce::canonicalize(path) 464 } 465} 466 467fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options { 468 let user_name = settings.user_name(); 469 let user_email = settings.user_email(); 470 gix::open::Options::default() 471 .config_overrides([ 472 // Committer has to be configured to record reflog. Author isn't 473 // needed, but let's copy the same values. 474 format!("author.name={user_name}"), 475 format!("author.email={user_email}"), 476 format!("committer.name={user_name}"), 477 format!("committer.email={user_email}"), 478 ]) 479 // The git_target path should point the repository, not the working directory. 480 .open_path_as_is(true) 481 // Gitoxide recommends this when correctness is preferred 482 .strict_config(true) 483 // This breaks tests and generally seems undesirable 484 .lossy_config(false) 485} 486 487/// Reads the `jj:trees` header from the commit. 488fn root_tree_from_header(git_commit: &CommitRef) -> Result<Option<MergedTreeId>, ()> { 489 for (key, value) in &git_commit.extra_headers { 490 if *key == JJ_TREES_COMMIT_HEADER { 491 let mut tree_ids = SmallVec::new(); 492 for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') { 493 let tree_id = TreeId::try_from_hex(hex).or(Err(()))?; 494 if tree_id.as_bytes().len() != HASH_LENGTH { 495 return Err(()); 496 } 497 tree_ids.push(tree_id); 498 } 499 // It is invalid to use `jj:trees` with a non-conflicted tree. If this were 500 // allowed, it would be possible to construct a commit which appears to have 501 // different contents depending on whether it is viewed using `jj` or `git`. 502 if tree_ids.len() == 1 || tree_ids.len() % 2 == 0 { 503 return Err(()); 504 } 505 return Ok(Some(MergedTreeId::Merge(Merge::from_vec(tree_ids)))); 506 } 507 } 508 Ok(None) 509} 510 511fn commit_from_git_without_root_parent( 512 id: &CommitId, 513 git_object: &gix::Object, 514 uses_tree_conflict_format: bool, 515 is_shallow: bool, 516) -> BackendResult<Commit> { 517 let commit = git_object 518 .try_to_commit_ref() 519 .map_err(|err| to_read_object_err(err, id))?; 520 521 // We reverse the bits of the commit id to create the change id. We don't want 522 // to use the first bytes unmodified because then it would be ambiguous 523 // if a given hash prefix refers to the commit id or the change id. It 524 // would have been enough to pick the last 16 bytes instead of the 525 // leading 16 bytes to address that. We also reverse the bits to make it less 526 // likely that users depend on any relationship between the two ids. 527 let change_id = ChangeId::new( 528 id.as_bytes()[4..HASH_LENGTH] 529 .iter() 530 .rev() 531 .map(|b| b.reverse_bits()) 532 .collect(), 533 ); 534 // shallow commits don't have parents their parents actually fetched, so we 535 // discard them here 536 // TODO: This causes issues when a shallow repository is deepened/unshallowed 537 let parents = if is_shallow { 538 vec![] 539 } else { 540 commit 541 .parents() 542 .map(|oid| CommitId::from_bytes(oid.as_bytes())) 543 .collect_vec() 544 }; 545 let tree_id = TreeId::from_bytes(commit.tree().as_bytes()); 546 // If this commit is a conflict, we'll update the root tree later, when we read 547 // the extra metadata. 548 let root_tree = root_tree_from_header(&commit) 549 .map_err(|()| to_read_object_err("Invalid jj:trees header", id))?; 550 let root_tree = root_tree.unwrap_or_else(|| { 551 if uses_tree_conflict_format { 552 MergedTreeId::resolved(tree_id) 553 } else { 554 MergedTreeId::Legacy(tree_id) 555 } 556 }); 557 // Use lossy conversion as commit message with "mojibake" is still better than 558 // nothing. 559 // TODO: what should we do with commit.encoding? 560 let description = String::from_utf8_lossy(commit.message).into_owned(); 561 let author = signature_from_git(commit.author()); 562 let committer = signature_from_git(commit.committer()); 563 564 // If the commit is signed, extract both the signature and the signed data 565 // (which is the commit buffer with the gpgsig header omitted). 566 // We have to re-parse the raw commit data because gix CommitRef does not give 567 // us the sogned data, only the signature. 568 // Ideally, we could use try_to_commit_ref_iter at the beginning of this 569 // function and extract everything from that. For now, this works 570 let secure_sig = commit 571 .extra_headers 572 .iter() 573 // gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too 574 .any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256") 575 .then(|| CommitRefIter::signature(&git_object.data)) 576 .transpose() 577 .map_err(|err| to_read_object_err(err, id))? 578 .flatten() 579 .map(|(sig, data)| SecureSig { 580 data: data.to_bstring().into(), 581 sig: sig.into_owned().into(), 582 }); 583 584 Ok(Commit { 585 parents, 586 predecessors: vec![], 587 // If this commit has associated extra metadata, we may reset this later. 588 root_tree, 589 change_id, 590 description, 591 author, 592 committer, 593 secure_sig, 594 }) 595} 596 597const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING"; 598 599fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature { 600 let name = signature.name; 601 let name = if name != EMPTY_STRING_PLACEHOLDER { 602 String::from_utf8_lossy(name).into_owned() 603 } else { 604 "".to_string() 605 }; 606 let email = signature.email; 607 let email = if email != EMPTY_STRING_PLACEHOLDER { 608 String::from_utf8_lossy(email).into_owned() 609 } else { 610 "".to_string() 611 }; 612 let timestamp = MillisSinceEpoch(signature.time.seconds * 1000); 613 let tz_offset = signature.time.offset.div_euclid(60); // in minutes 614 Signature { 615 name, 616 email, 617 timestamp: Timestamp { 618 timestamp, 619 tz_offset, 620 }, 621 } 622} 623 624fn signature_to_git(signature: &Signature) -> gix::actor::SignatureRef<'_> { 625 // git does not support empty names or emails 626 let name = if !signature.name.is_empty() { 627 &signature.name 628 } else { 629 EMPTY_STRING_PLACEHOLDER 630 }; 631 let email = if !signature.email.is_empty() { 632 &signature.email 633 } else { 634 EMPTY_STRING_PLACEHOLDER 635 }; 636 let time = gix::date::Time::new( 637 signature.timestamp.timestamp.0.div_euclid(1000), 638 signature.timestamp.tz_offset * 60, // in seconds 639 ); 640 gix::actor::SignatureRef { 641 name: name.into(), 642 email: email.into(), 643 time, 644 } 645} 646 647fn serialize_extras(commit: &Commit) -> Vec<u8> { 648 let mut proto = crate::protos::git_store::Commit { 649 change_id: commit.change_id.to_bytes(), 650 ..Default::default() 651 }; 652 if let MergedTreeId::Merge(tree_ids) = &commit.root_tree { 653 proto.uses_tree_conflict_format = true; 654 if !tree_ids.is_resolved() { 655 // This is done for the sake of jj versions <0.28 (before commit 656 // f7b14be) being able to read the repo. At some point in the 657 // future, we can stop doing it. 658 proto.root_tree = tree_ids.iter().map(|r| r.to_bytes()).collect(); 659 } 660 } 661 for predecessor in &commit.predecessors { 662 proto.predecessors.push(predecessor.to_bytes()); 663 } 664 proto.encode_to_vec() 665} 666 667fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) { 668 let proto = crate::protos::git_store::Commit::decode(bytes).unwrap(); 669 commit.change_id = ChangeId::new(proto.change_id); 670 if let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree { 671 if proto.uses_tree_conflict_format { 672 if !proto.root_tree.is_empty() { 673 let merge_builder: MergeBuilder<_> = proto 674 .root_tree 675 .iter() 676 .map(|id_bytes| TreeId::from_bytes(id_bytes)) 677 .collect(); 678 commit.root_tree = MergedTreeId::Merge(merge_builder.build()); 679 } else { 680 // uses_tree_conflict_format was set but there was no root_tree override in the 681 // proto, which means we should just promote the tree id from the 682 // git commit to be a known-conflict-free tree 683 commit.root_tree = MergedTreeId::resolved(legacy_tree_id.clone()); 684 } 685 } 686 } 687 for predecessor in &proto.predecessors { 688 commit.predecessors.push(CommitId::from_bytes(predecessor)); 689 } 690} 691 692/// Returns `RefEdit` that will create a ref in `refs/jj/keep` if not exist. 693/// Used for preventing GC of commits we create. 694fn to_no_gc_ref_update(id: &CommitId) -> gix::refs::transaction::RefEdit { 695 let name = format!("{NO_GC_REF_NAMESPACE}{id}"); 696 let new = gix::refs::Target::Object(gix::ObjectId::from_bytes_or_panic(id.as_bytes())); 697 let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(new.clone()); 698 gix::refs::transaction::RefEdit { 699 change: gix::refs::transaction::Change::Update { 700 log: gix::refs::transaction::LogChange { 701 message: "used by jj".into(), 702 ..Default::default() 703 }, 704 expected, 705 new, 706 }, 707 name: name.try_into().unwrap(), 708 deref: false, 709 } 710} 711 712fn to_ref_deletion(git_ref: gix::refs::Reference) -> gix::refs::transaction::RefEdit { 713 let expected = gix::refs::transaction::PreviousValue::ExistingMustMatch(git_ref.target); 714 gix::refs::transaction::RefEdit { 715 change: gix::refs::transaction::Change::Delete { 716 expected, 717 log: gix::refs::transaction::RefLog::AndReference, 718 }, 719 name: git_ref.name, 720 deref: false, 721 } 722} 723 724/// Recreates `refs/jj/keep` refs for the `new_heads`, and removes the other 725/// unreachable and non-head refs. 726fn recreate_no_gc_refs( 727 git_repo: &gix::Repository, 728 new_heads: impl IntoIterator<Item = CommitId>, 729 keep_newer: SystemTime, 730) -> BackendResult<()> { 731 // Calculate diff between existing no-gc refs and new heads. 732 let new_heads: HashSet<CommitId> = new_heads.into_iter().collect(); 733 let mut no_gc_refs_to_keep_count: usize = 0; 734 let mut no_gc_refs_to_delete: Vec<gix::refs::Reference> = Vec::new(); 735 let git_references = git_repo 736 .references() 737 .map_err(|err| BackendError::Other(err.into()))?; 738 let no_gc_refs_iter = git_references 739 .prefixed(NO_GC_REF_NAMESPACE) 740 .map_err(|err| BackendError::Other(err.into()))?; 741 for git_ref in no_gc_refs_iter { 742 let git_ref = git_ref.map_err(BackendError::Other)?.detach(); 743 let oid = git_ref.target.try_id().ok_or_else(|| { 744 let name = git_ref.name.as_bstr(); 745 BackendError::Other(format!("Symbolic no-gc ref found: {name}").into()) 746 })?; 747 let id = CommitId::from_bytes(oid.as_bytes()); 748 let name_good = git_ref.name.as_bstr()[NO_GC_REF_NAMESPACE.len()..] == id.hex(); 749 if new_heads.contains(&id) && name_good { 750 no_gc_refs_to_keep_count += 1; 751 continue; 752 } 753 // Check timestamp of loose ref, but this is still racy on re-import 754 // because: 755 // - existing packed ref won't be demoted to loose ref 756 // - existing loose ref won't be touched 757 // 758 // TODO: might be better to switch to a dummy merge, where new no-gc ref 759 // will always have a unique name. Doing that with the current 760 // ref-per-head strategy would increase the number of the no-gc refs. 761 // https://github.com/jj-vcs/jj/pull/2659#issuecomment-1837057782 762 let loose_ref_path = git_repo.path().join(git_ref.name.to_path()); 763 if let Ok(metadata) = loose_ref_path.metadata() { 764 let mtime = metadata.modified().expect("unsupported platform?"); 765 if mtime > keep_newer { 766 tracing::trace!(?git_ref, "not deleting new"); 767 no_gc_refs_to_keep_count += 1; 768 continue; 769 } 770 } 771 // Also deletes no-gc ref of random name created by old jj. 772 tracing::trace!(?git_ref, ?name_good, "will delete"); 773 no_gc_refs_to_delete.push(git_ref); 774 } 775 tracing::info!( 776 new_heads_count = new_heads.len(), 777 no_gc_refs_to_keep_count, 778 no_gc_refs_to_delete_count = no_gc_refs_to_delete.len(), 779 "collected reachable refs" 780 ); 781 782 // It's slow to delete packed refs one by one, so update refs all at once. 783 let ref_edits = itertools::chain( 784 no_gc_refs_to_delete.into_iter().map(to_ref_deletion), 785 new_heads.iter().map(to_no_gc_ref_update), 786 ); 787 git_repo 788 .edit_references(ref_edits) 789 .map_err(|err| BackendError::Other(err.into()))?; 790 791 Ok(()) 792} 793 794fn run_git_gc(git_dir: &Path) -> Result<(), GitGcError> { 795 let mut git = Command::new("git"); 796 git.arg("--git-dir=."); // turn off discovery 797 git.arg("gc"); 798 // Don't specify it by GIT_DIR/--git-dir. On Windows, the path could be 799 // canonicalized as UNC path, which wouldn't be supported by git. 800 git.current_dir(git_dir); 801 // TODO: pass output to UI layer instead of printing directly here 802 let status = git.status().map_err(GitGcError::GcCommand)?; 803 if !status.success() { 804 return Err(GitGcError::GcCommandErrorStatus(status)); 805 } 806 Ok(()) 807} 808 809fn validate_git_object_id(id: &impl ObjectId) -> BackendResult<gix::ObjectId> { 810 if id.as_bytes().len() != HASH_LENGTH { 811 return Err(BackendError::InvalidHashLength { 812 expected: HASH_LENGTH, 813 actual: id.as_bytes().len(), 814 object_type: id.object_type(), 815 hash: id.hex(), 816 }); 817 } 818 Ok(gix::ObjectId::from_bytes_or_panic(id.as_bytes())) 819} 820 821fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError { 822 if matches!(err, gix::object::find::existing::Error::NotFound { .. }) { 823 BackendError::ObjectNotFound { 824 object_type: id.object_type(), 825 hash: id.hex(), 826 source: Box::new(err), 827 } 828 } else { 829 to_read_object_err(err, id) 830 } 831} 832 833fn to_read_object_err( 834 err: impl Into<Box<dyn std::error::Error + Send + Sync>>, 835 id: &impl ObjectId, 836) -> BackendError { 837 BackendError::ReadObject { 838 object_type: id.object_type(), 839 hash: id.hex(), 840 source: err.into(), 841 } 842} 843 844fn to_invalid_utf8_err(source: str::Utf8Error, id: &impl ObjectId) -> BackendError { 845 BackendError::InvalidUtf8 { 846 object_type: id.object_type(), 847 hash: id.hex(), 848 source, 849 } 850} 851 852fn import_extra_metadata_entries_from_heads( 853 git_repo: &gix::Repository, 854 mut_table: &mut MutableTable, 855 _table_lock: &FileLock, 856 head_ids: &HashSet<&CommitId>, 857) -> BackendResult<()> { 858 let shallow_commits = git_repo 859 .shallow_commits() 860 .map_err(|e| BackendError::Other(Box::new(e)))?; 861 862 let mut work_ids = head_ids 863 .iter() 864 .filter(|&id| mut_table.get_value(id.as_bytes()).is_none()) 865 .map(|&id| id.clone()) 866 .collect_vec(); 867 while let Some(id) = work_ids.pop() { 868 let git_object = git_repo 869 .find_object(validate_git_object_id(&id)?) 870 .map_err(|err| map_not_found_err(err, &id))?; 871 let is_shallow = shallow_commits 872 .as_ref() 873 .is_some_and(|shallow| shallow.contains(&git_object.id)); 874 // TODO(#1624): Should we read the root tree here and check if it has a 875 // `.jjconflict-...` entries? That could happen if the user used `git` to e.g. 876 // change the description of a commit with tree-level conflicts. 877 let commit = commit_from_git_without_root_parent(&id, &git_object, true, is_shallow)?; 878 mut_table.add_entry(id.to_bytes(), serialize_extras(&commit)); 879 work_ids.extend( 880 commit 881 .parents 882 .into_iter() 883 .filter(|id| mut_table.get_value(id.as_bytes()).is_none()), 884 ); 885 } 886 Ok(()) 887} 888 889impl Debug for GitBackend { 890 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { 891 f.debug_struct("GitBackend") 892 .field("path", &self.git_repo_path()) 893 .finish() 894 } 895} 896 897#[async_trait] 898impl Backend for GitBackend { 899 fn as_any(&self) -> &dyn Any { 900 self 901 } 902 903 fn name(&self) -> &str { 904 Self::name() 905 } 906 907 fn commit_id_length(&self) -> usize { 908 HASH_LENGTH 909 } 910 911 fn change_id_length(&self) -> usize { 912 CHANGE_ID_LENGTH 913 } 914 915 fn root_commit_id(&self) -> &CommitId { 916 &self.root_commit_id 917 } 918 919 fn root_change_id(&self) -> &ChangeId { 920 &self.root_change_id 921 } 922 923 fn empty_tree_id(&self) -> &TreeId { 924 &self.empty_tree_id 925 } 926 927 fn concurrency(&self) -> usize { 928 1 929 } 930 931 async fn read_file(&self, _path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>> { 932 self.read_file_sync(id) 933 } 934 935 async fn write_file( 936 &self, 937 _path: &RepoPath, 938 contents: &mut (dyn Read + Send), 939 ) -> BackendResult<FileId> { 940 let mut bytes = Vec::new(); 941 contents.read_to_end(&mut bytes).unwrap(); 942 let locked_repo = self.lock_git_repo(); 943 let oid = locked_repo 944 .write_blob(bytes) 945 .map_err(|err| BackendError::WriteObject { 946 object_type: "file", 947 source: Box::new(err), 948 })?; 949 Ok(FileId::new(oid.as_bytes().to_vec())) 950 } 951 952 async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> BackendResult<String> { 953 let git_blob_id = validate_git_object_id(id)?; 954 let locked_repo = self.lock_git_repo(); 955 let mut blob = locked_repo 956 .find_object(git_blob_id) 957 .map_err(|err| map_not_found_err(err, id))? 958 .try_into_blob() 959 .map_err(|err| to_read_object_err(err, id))?; 960 let target = String::from_utf8(blob.take_data()) 961 .map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?; 962 Ok(target) 963 } 964 965 async fn write_symlink(&self, _path: &RepoPath, target: &str) -> BackendResult<SymlinkId> { 966 let locked_repo = self.lock_git_repo(); 967 let oid = 968 locked_repo 969 .write_blob(target.as_bytes()) 970 .map_err(|err| BackendError::WriteObject { 971 object_type: "symlink", 972 source: Box::new(err), 973 })?; 974 Ok(SymlinkId::new(oid.as_bytes().to_vec())) 975 } 976 977 async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> { 978 if id == &self.empty_tree_id { 979 return Ok(Tree::default()); 980 } 981 let git_tree_id = validate_git_object_id(id)?; 982 983 let locked_repo = self.lock_git_repo(); 984 let git_tree = locked_repo 985 .find_object(git_tree_id) 986 .map_err(|err| map_not_found_err(err, id))? 987 .try_into_tree() 988 .map_err(|err| to_read_object_err(err, id))?; 989 let mut tree = Tree::default(); 990 for entry in git_tree.iter() { 991 let entry = entry.map_err(|err| to_read_object_err(err, id))?; 992 let name = 993 str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?; 994 let (name, value) = match entry.mode().kind() { 995 gix::object::tree::EntryKind::Tree => { 996 let id = TreeId::from_bytes(entry.oid().as_bytes()); 997 (name, TreeValue::Tree(id)) 998 } 999 gix::object::tree::EntryKind::Blob => { 1000 let id = FileId::from_bytes(entry.oid().as_bytes()); 1001 if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) { 1002 ( 1003 basename, 1004 TreeValue::Conflict(ConflictId::from_bytes(entry.oid().as_bytes())), 1005 ) 1006 } else { 1007 ( 1008 name, 1009 TreeValue::File { 1010 id, 1011 executable: false, 1012 }, 1013 ) 1014 } 1015 } 1016 gix::object::tree::EntryKind::BlobExecutable => { 1017 let id = FileId::from_bytes(entry.oid().as_bytes()); 1018 ( 1019 name, 1020 TreeValue::File { 1021 id, 1022 executable: true, 1023 }, 1024 ) 1025 } 1026 gix::object::tree::EntryKind::Link => { 1027 let id = SymlinkId::from_bytes(entry.oid().as_bytes()); 1028 (name, TreeValue::Symlink(id)) 1029 } 1030 gix::object::tree::EntryKind::Commit => { 1031 let id = CommitId::from_bytes(entry.oid().as_bytes()); 1032 (name, TreeValue::GitSubmodule(id)) 1033 } 1034 }; 1035 tree.set(RepoPathComponentBuf::from(name), value); 1036 } 1037 Ok(tree) 1038 } 1039 1040 async fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> { 1041 // Tree entries to be written must be sorted by Entry::filename(), which 1042 // is slightly different from the order of our backend::Tree. 1043 let entries = contents 1044 .entries() 1045 .map(|entry| { 1046 let name = entry.name().as_internal_str(); 1047 match entry.value() { 1048 TreeValue::File { 1049 id, 1050 executable: false, 1051 } => gix::objs::tree::Entry { 1052 mode: gix::object::tree::EntryKind::Blob.into(), 1053 filename: name.into(), 1054 oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()), 1055 }, 1056 TreeValue::File { 1057 id, 1058 executable: true, 1059 } => gix::objs::tree::Entry { 1060 mode: gix::object::tree::EntryKind::BlobExecutable.into(), 1061 filename: name.into(), 1062 oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()), 1063 }, 1064 TreeValue::Symlink(id) => gix::objs::tree::Entry { 1065 mode: gix::object::tree::EntryKind::Link.into(), 1066 filename: name.into(), 1067 oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()), 1068 }, 1069 TreeValue::Tree(id) => gix::objs::tree::Entry { 1070 mode: gix::object::tree::EntryKind::Tree.into(), 1071 filename: name.into(), 1072 oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()), 1073 }, 1074 TreeValue::GitSubmodule(id) => gix::objs::tree::Entry { 1075 mode: gix::object::tree::EntryKind::Commit.into(), 1076 filename: name.into(), 1077 oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()), 1078 }, 1079 TreeValue::Conflict(id) => gix::objs::tree::Entry { 1080 mode: gix::object::tree::EntryKind::Blob.into(), 1081 filename: (name.to_owned() + CONFLICT_SUFFIX).into(), 1082 oid: gix::ObjectId::from_bytes_or_panic(id.as_bytes()), 1083 }, 1084 } 1085 }) 1086 .sorted_unstable() 1087 .collect(); 1088 let locked_repo = self.lock_git_repo(); 1089 let oid = locked_repo 1090 .write_object(gix::objs::Tree { entries }) 1091 .map_err(|err| BackendError::WriteObject { 1092 object_type: "tree", 1093 source: Box::new(err), 1094 })?; 1095 Ok(TreeId::from_bytes(oid.as_bytes())) 1096 } 1097 1098 fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> { 1099 let mut file = self.read_file_sync(&FileId::new(id.to_bytes()))?; 1100 let mut data = String::new(); 1101 file.read_to_string(&mut data) 1102 .map_err(|err| BackendError::ReadObject { 1103 object_type: "conflict".to_owned(), 1104 hash: id.hex(), 1105 source: err.into(), 1106 })?; 1107 let json: serde_json::Value = serde_json::from_str(&data).unwrap(); 1108 Ok(Conflict { 1109 removes: conflict_term_list_from_json(json.get("removes").unwrap()), 1110 adds: conflict_term_list_from_json(json.get("adds").unwrap()), 1111 }) 1112 } 1113 1114 fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> { 1115 let json = serde_json::json!({ 1116 "removes": conflict_term_list_to_json(&conflict.removes), 1117 "adds": conflict_term_list_to_json(&conflict.adds), 1118 }); 1119 let json_string = json.to_string(); 1120 let bytes = json_string.as_bytes(); 1121 let locked_repo = self.lock_git_repo(); 1122 let oid = locked_repo 1123 .write_blob(bytes) 1124 .map_err(|err| BackendError::WriteObject { 1125 object_type: "conflict", 1126 source: Box::new(err), 1127 })?; 1128 Ok(ConflictId::from_bytes(oid.as_bytes())) 1129 } 1130 1131 #[tracing::instrument(skip(self))] 1132 async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> { 1133 if *id == self.root_commit_id { 1134 return Ok(make_root_commit( 1135 self.root_change_id().clone(), 1136 self.empty_tree_id.clone(), 1137 )); 1138 } 1139 let git_commit_id = validate_git_object_id(id)?; 1140 1141 let mut commit = { 1142 let locked_repo = self.lock_git_repo(); 1143 let git_object = locked_repo 1144 .find_object(git_commit_id) 1145 .map_err(|err| map_not_found_err(err, id))?; 1146 let is_shallow = locked_repo 1147 .shallow_commits() 1148 .ok() 1149 .flatten() 1150 .is_some_and(|shallow| shallow.contains(&git_object.id)); 1151 commit_from_git_without_root_parent(id, &git_object, false, is_shallow)? 1152 }; 1153 if commit.parents.is_empty() { 1154 commit.parents.push(self.root_commit_id.clone()); 1155 }; 1156 1157 let table = self.cached_extra_metadata_table()?; 1158 if let Some(extras) = table.get_value(id.as_bytes()) { 1159 deserialize_extras(&mut commit, extras); 1160 } else { 1161 // TODO: Remove this hack and map to ObjectNotFound error if we're sure that 1162 // there are no reachable ancestor commits without extras metadata. Git commits 1163 // imported by jj < 0.8.0 might not have extras (#924). 1164 // https://github.com/jj-vcs/jj/issues/2343 1165 tracing::info!("unimported Git commit found"); 1166 self.import_head_commits([id])?; 1167 let table = self.cached_extra_metadata_table()?; 1168 let extras = table.get_value(id.as_bytes()).unwrap(); 1169 deserialize_extras(&mut commit, extras); 1170 } 1171 Ok(commit) 1172 } 1173 1174 async fn write_commit( 1175 &self, 1176 mut contents: Commit, 1177 mut sign_with: Option<&mut SigningFn>, 1178 ) -> BackendResult<(CommitId, Commit)> { 1179 assert!(contents.secure_sig.is_none(), "commit.secure_sig was set"); 1180 1181 let locked_repo = self.lock_git_repo(); 1182 let git_tree_id = match &contents.root_tree { 1183 MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?, 1184 MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() { 1185 Some(tree_id) => validate_git_object_id(tree_id)?, 1186 None => write_tree_conflict(&locked_repo, tree_ids)?, 1187 }, 1188 }; 1189 let author = signature_to_git(&contents.author); 1190 let mut committer = signature_to_git(&contents.committer); 1191 let message = &contents.description; 1192 if contents.parents.is_empty() { 1193 return Err(BackendError::Other( 1194 "Cannot write a commit with no parents".into(), 1195 )); 1196 } 1197 let mut parents = SmallVec::new(); 1198 for parent_id in &contents.parents { 1199 if *parent_id == self.root_commit_id { 1200 // Git doesn't have a root commit, so if the parent is the root commit, we don't 1201 // add it to the list of parents to write in the Git commit. We also check that 1202 // there are no other parents since Git cannot represent a merge between a root 1203 // commit and another commit. 1204 if contents.parents.len() > 1 { 1205 return Err(BackendError::Unsupported( 1206 "The Git backend does not support creating merge commits with the root \ 1207 commit as one of the parents." 1208 .to_owned(), 1209 )); 1210 } 1211 } else { 1212 parents.push(validate_git_object_id(parent_id)?); 1213 } 1214 } 1215 let mut extra_headers = vec![]; 1216 if let MergedTreeId::Merge(tree_ids) = &contents.root_tree { 1217 if !tree_ids.is_resolved() { 1218 let value = tree_ids.iter().map(|id| id.hex()).join(" ").into_bytes(); 1219 extra_headers.push(( 1220 BString::new(JJ_TREES_COMMIT_HEADER.to_vec()), 1221 BString::new(value), 1222 )); 1223 } 1224 } 1225 let extras = serialize_extras(&contents); 1226 1227 // If two writers write commits of the same id with different metadata, they 1228 // will both succeed and the metadata entries will be "merged" later. Since 1229 // metadata entry is keyed by the commit id, one of the entries would be lost. 1230 // To prevent such race condition locally, we extend the scope covered by the 1231 // table lock. This is still racy if multiple machines are involved and the 1232 // repository is rsync-ed. 1233 let (table, table_lock) = self.read_extra_metadata_table_locked()?; 1234 let id = loop { 1235 let mut commit = gix::objs::Commit { 1236 message: message.to_owned().into(), 1237 tree: git_tree_id, 1238 author: author.into(), 1239 committer: committer.into(), 1240 encoding: None, 1241 parents: parents.clone(), 1242 extra_headers: extra_headers.clone(), 1243 }; 1244 1245 if let Some(sign) = &mut sign_with { 1246 // we don't use gix pool, but at least use their heuristic 1247 let mut data = Vec::with_capacity(512); 1248 commit.write_to(&mut data).unwrap(); 1249 1250 let sig = sign(&data).map_err(|err| BackendError::WriteObject { 1251 object_type: "commit", 1252 source: Box::new(err), 1253 })?; 1254 commit 1255 .extra_headers 1256 .push(("gpgsig".into(), sig.clone().into())); 1257 contents.secure_sig = Some(SecureSig { data, sig }); 1258 } 1259 1260 let git_id = 1261 locked_repo 1262 .write_object(&commit) 1263 .map_err(|err| BackendError::WriteObject { 1264 object_type: "commit", 1265 source: Box::new(err), 1266 })?; 1267 1268 match table.get_value(git_id.as_bytes()) { 1269 Some(existing_extras) if existing_extras != extras => { 1270 // It's possible a commit already exists with the same commit id but different 1271 // change id. Adjust the timestamp until this is no longer the case. 1272 committer.time.seconds -= 1; 1273 } 1274 _ => break CommitId::from_bytes(git_id.as_bytes()), 1275 } 1276 }; 1277 1278 // Everything up to this point had no permanent effect on the repo except 1279 // GC-able objects 1280 locked_repo 1281 .edit_reference(to_no_gc_ref_update(&id)) 1282 .map_err(|err| BackendError::Other(Box::new(err)))?; 1283 1284 // Update the signature to match the one that was actually written to the object 1285 // store 1286 contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000); 1287 let mut mut_table = table.start_mutation(); 1288 mut_table.add_entry(id.to_bytes(), extras); 1289 self.save_extra_metadata_table(mut_table, &table_lock)?; 1290 Ok((id, contents)) 1291 } 1292 1293 fn get_copy_records( 1294 &self, 1295 paths: Option<&[RepoPathBuf]>, 1296 root_id: &CommitId, 1297 head_id: &CommitId, 1298 ) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> { 1299 let repo = self.git_repo(); 1300 let root_tree = self.read_tree_for_commit(&repo, root_id)?; 1301 let head_tree = self.read_tree_for_commit(&repo, head_id)?; 1302 1303 let change_to_copy_record = 1304 |change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> { 1305 let gix::object::tree::diff::Change::Rewrite { 1306 source_location, 1307 source_id, 1308 location: dest_location, 1309 .. 1310 } = change 1311 else { 1312 return Ok(None); 1313 }; 1314 1315 let source = str::from_utf8(source_location) 1316 .map_err(|err| to_invalid_utf8_err(err, root_id))?; 1317 let dest = str::from_utf8(dest_location) 1318 .map_err(|err| to_invalid_utf8_err(err, head_id))?; 1319 1320 let target = RepoPathBuf::from_internal_string(dest); 1321 if !paths.is_none_or(|paths| paths.contains(&target)) { 1322 return Ok(None); 1323 } 1324 1325 Ok(Some(CopyRecord { 1326 target, 1327 target_commit: head_id.clone(), 1328 source: RepoPathBuf::from_internal_string(source), 1329 source_file: FileId::from_bytes(source_id.as_bytes()), 1330 source_commit: root_id.clone(), 1331 })) 1332 }; 1333 1334 let mut records: Vec<BackendResult<CopyRecord>> = Vec::new(); 1335 root_tree 1336 .changes() 1337 .map_err(|err| BackendError::Other(err.into()))? 1338 .options(|opts| { 1339 opts.track_path().track_rewrites(Some(gix::diff::Rewrites { 1340 copies: Some(gix::diff::rewrites::Copies { 1341 source: gix::diff::rewrites::CopySource::FromSetOfModifiedFiles, 1342 percentage: Some(0.5), 1343 }), 1344 percentage: Some(0.5), 1345 limit: 1000, 1346 track_empty: false, 1347 })); 1348 }) 1349 .for_each_to_obtain_tree_with_cache( 1350 &head_tree, 1351 &mut self.new_diff_platform()?, 1352 |change| -> BackendResult<_> { 1353 match change_to_copy_record(change) { 1354 Ok(None) => {} 1355 Ok(Some(change)) => records.push(Ok(change)), 1356 Err(err) => records.push(Err(err)), 1357 } 1358 Ok(gix::object::tree::diff::Action::Continue) 1359 }, 1360 ) 1361 .map_err(|err| BackendError::Other(err.into()))?; 1362 Ok(Box::pin(futures::stream::iter(records))) 1363 } 1364 1365 #[tracing::instrument(skip(self, index))] 1366 fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> { 1367 let git_repo = self.lock_git_repo(); 1368 let new_heads = index 1369 .all_heads_for_gc() 1370 .map_err(|err| BackendError::Other(err.into()))? 1371 .filter(|id| *id != self.root_commit_id); 1372 recreate_no_gc_refs(&git_repo, new_heads, keep_newer)?; 1373 // TODO: remove unreachable entries from extras table if segment file 1374 // mtime <= keep_newer? (it won't be consistent with no-gc refs 1375 // preserved by the keep_newer timestamp though) 1376 // TODO: remove unreachable extras table segments 1377 // TODO: pass in keep_newer to "git gc" command 1378 run_git_gc(self.git_repo_path()).map_err(|err| BackendError::Other(err.into()))?; 1379 // Since "git gc" will move loose refs into packed refs, in-memory 1380 // packed-refs cache should be invalidated without relying on mtime. 1381 git_repo.refs.force_refresh_packed_buffer().ok(); 1382 Ok(()) 1383 } 1384} 1385 1386/// Write a tree conflict as a special tree with `.jjconflict-base-N` and 1387/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd. 1388fn write_tree_conflict( 1389 repo: &gix::Repository, 1390 conflict: &Merge<TreeId>, 1391) -> BackendResult<gix::ObjectId> { 1392 // Tree entries to be written must be sorted by Entry::filename(). 1393 let mut entries = itertools::chain( 1394 conflict 1395 .removes() 1396 .enumerate() 1397 .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)), 1398 conflict 1399 .adds() 1400 .enumerate() 1401 .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)), 1402 ) 1403 .map(|(name, tree_id)| gix::objs::tree::Entry { 1404 mode: gix::object::tree::EntryKind::Tree.into(), 1405 filename: name.into(), 1406 oid: gix::ObjectId::from_bytes_or_panic(tree_id.as_bytes()), 1407 }) 1408 .collect_vec(); 1409 let readme_id = repo 1410 .write_blob( 1411 r#"This commit was made by jj, https://github.com/jj-vcs/jj. 1412The commit contains file conflicts, and therefore looks wrong when used with plain 1413Git or other tools that are unfamiliar with jj. 1414 1415The .jjconflict-* directories represent the different inputs to the conflict. 1416For details, see 1417https://jj-vcs.github.io/jj/prerelease/git-compatibility/#format-mapping-details 1418 1419If you see this file in your working copy, it probably means that you used a 1420regular `git` command to check out a conflicted commit. Use `jj abandon` to 1421recover. 1422"#, 1423 ) 1424 .map_err(|err| { 1425 BackendError::Other(format!("Failed to write README for conflict tree: {err}").into()) 1426 })? 1427 .detach(); 1428 entries.push(gix::objs::tree::Entry { 1429 mode: gix::object::tree::EntryKind::Blob.into(), 1430 filename: "README".into(), 1431 oid: readme_id, 1432 }); 1433 entries.sort_unstable(); 1434 let id = repo 1435 .write_object(gix::objs::Tree { entries }) 1436 .map_err(|err| BackendError::WriteObject { 1437 object_type: "tree", 1438 source: Box::new(err), 1439 })?; 1440 Ok(id.detach()) 1441} 1442 1443fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value { 1444 serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect()) 1445} 1446 1447fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> { 1448 json.as_array() 1449 .unwrap() 1450 .iter() 1451 .map(conflict_term_from_json) 1452 .collect() 1453} 1454 1455fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value { 1456 serde_json::json!({ 1457 "value": tree_value_to_json(&part.value), 1458 }) 1459} 1460 1461fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm { 1462 let json_value = json.get("value").unwrap(); 1463 ConflictTerm { 1464 value: tree_value_from_json(json_value), 1465 } 1466} 1467 1468fn tree_value_to_json(value: &TreeValue) -> serde_json::Value { 1469 match value { 1470 TreeValue::File { id, executable } => serde_json::json!({ 1471 "file": { 1472 "id": id.hex(), 1473 "executable": executable, 1474 }, 1475 }), 1476 TreeValue::Symlink(id) => serde_json::json!({ 1477 "symlink_id": id.hex(), 1478 }), 1479 TreeValue::Tree(id) => serde_json::json!({ 1480 "tree_id": id.hex(), 1481 }), 1482 TreeValue::GitSubmodule(id) => serde_json::json!({ 1483 "submodule_id": id.hex(), 1484 }), 1485 TreeValue::Conflict(id) => serde_json::json!({ 1486 "conflict_id": id.hex(), 1487 }), 1488 } 1489} 1490 1491fn tree_value_from_json(json: &serde_json::Value) -> TreeValue { 1492 if let Some(json_file) = json.get("file") { 1493 TreeValue::File { 1494 id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())), 1495 executable: json_file.get("executable").unwrap().as_bool().unwrap(), 1496 } 1497 } else if let Some(json_id) = json.get("symlink_id") { 1498 TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id))) 1499 } else if let Some(json_id) = json.get("tree_id") { 1500 TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id))) 1501 } else if let Some(json_id) = json.get("submodule_id") { 1502 TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id))) 1503 } else if let Some(json_id) = json.get("conflict_id") { 1504 TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id))) 1505 } else { 1506 panic!("unexpected json value in conflict: {json:#?}"); 1507 } 1508} 1509 1510fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> { 1511 hex::decode(value.as_str().unwrap()).unwrap() 1512} 1513 1514#[cfg(test)] 1515mod tests { 1516 use assert_matches::assert_matches; 1517 use hex::ToHex as _; 1518 use pollster::FutureExt as _; 1519 1520 use super::*; 1521 use crate::config::StackedConfig; 1522 use crate::content_hash::blake2b_hash; 1523 use crate::tests::new_temp_dir; 1524 1525 const GIT_USER: &str = "Someone"; 1526 const GIT_EMAIL: &str = "someone@example.com"; 1527 1528 fn git_config() -> Vec<bstr::BString> { 1529 vec![ 1530 format!("user.name = {GIT_USER}").into(), 1531 format!("user.email = {GIT_EMAIL}").into(), 1532 "init.defaultBranch = master".into(), 1533 ] 1534 } 1535 1536 fn open_options() -> gix::open::Options { 1537 gix::open::Options::isolated() 1538 .config_overrides(git_config()) 1539 .strict_config(true) 1540 .lossy_config(false) 1541 } 1542 1543 fn git_init(directory: impl AsRef<Path>) -> gix::Repository { 1544 gix::ThreadSafeRepository::init_opts( 1545 directory, 1546 gix::create::Kind::WithWorktree, 1547 gix::create::Options::default(), 1548 open_options(), 1549 ) 1550 .unwrap() 1551 .to_thread_local() 1552 } 1553 1554 #[test] 1555 fn read_plain_git_commit() { 1556 let settings = user_settings(); 1557 let temp_dir = new_temp_dir(); 1558 let store_path = temp_dir.path(); 1559 let git_repo_path = temp_dir.path().join("git"); 1560 let git_repo = git_init(git_repo_path); 1561 1562 // Add a commit with some files in 1563 let blob1 = git_repo.write_blob(b"content1").unwrap().detach(); 1564 let blob2 = git_repo.write_blob(b"normal").unwrap().detach(); 1565 let mut dir_tree_editor = git_repo.empty_tree().edit().unwrap(); 1566 dir_tree_editor 1567 .upsert("normal", gix::object::tree::EntryKind::Blob, blob1) 1568 .unwrap(); 1569 dir_tree_editor 1570 .upsert("symlink", gix::object::tree::EntryKind::Link, blob2) 1571 .unwrap(); 1572 let dir_tree_id = dir_tree_editor.write().unwrap().detach(); 1573 let mut root_tree_builder = git_repo.empty_tree().edit().unwrap(); 1574 root_tree_builder 1575 .upsert("dir", gix::object::tree::EntryKind::Tree, dir_tree_id) 1576 .unwrap(); 1577 let root_tree_id = root_tree_builder.write().unwrap().detach(); 1578 let git_author = gix::actor::Signature { 1579 name: "git author".into(), 1580 email: "git.author@example.com".into(), 1581 time: gix::date::Time::new(1000, 60 * 60), 1582 }; 1583 let git_committer = gix::actor::Signature { 1584 name: "git committer".into(), 1585 email: "git.committer@example.com".into(), 1586 time: gix::date::Time::new(2000, -480 * 60), 1587 }; 1588 let git_commit_id = git_repo 1589 .commit_as( 1590 &git_committer, 1591 &git_author, 1592 "refs/heads/dummy", 1593 "git commit message", 1594 root_tree_id, 1595 [] as [gix::ObjectId; 0], 1596 ) 1597 .unwrap() 1598 .detach(); 1599 git_repo 1600 .find_reference("refs/heads/dummy") 1601 .unwrap() 1602 .delete() 1603 .unwrap(); 1604 let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263"); 1605 // The change id is the leading reverse bits of the commit id 1606 let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25"); 1607 // Check that the git commit above got the hash we expect 1608 assert_eq!( 1609 git_commit_id.as_bytes(), 1610 commit_id.as_bytes(), 1611 "{git_commit_id:?} vs {commit_id:?}" 1612 ); 1613 1614 // Add an empty commit on top 1615 let git_commit_id2 = git_repo 1616 .commit_as( 1617 &git_committer, 1618 &git_author, 1619 "refs/heads/dummy2", 1620 "git commit message 2", 1621 root_tree_id, 1622 [git_commit_id], 1623 ) 1624 .unwrap() 1625 .detach(); 1626 git_repo 1627 .find_reference("refs/heads/dummy2") 1628 .unwrap() 1629 .delete() 1630 .unwrap(); 1631 let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes()); 1632 1633 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap(); 1634 1635 // Import the head commit and its ancestors 1636 backend.import_head_commits([&commit_id2]).unwrap(); 1637 // Ref should be created only for the head commit 1638 let git_refs = backend 1639 .git_repo() 1640 .references() 1641 .unwrap() 1642 .prefixed("refs/jj/keep/") 1643 .unwrap() 1644 .map(|git_ref| git_ref.unwrap().id().detach()) 1645 .collect_vec(); 1646 assert_eq!(git_refs, vec![git_commit_id2]); 1647 1648 let commit = backend.read_commit(&commit_id).block_on().unwrap(); 1649 assert_eq!(&commit.change_id, &change_id); 1650 assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]); 1651 assert_eq!(commit.predecessors, vec![]); 1652 assert_eq!( 1653 commit.root_tree.to_merge(), 1654 Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes())) 1655 ); 1656 assert_matches!(commit.root_tree, MergedTreeId::Merge(_)); 1657 assert_eq!(commit.description, "git commit message"); 1658 assert_eq!(commit.author.name, "git author"); 1659 assert_eq!(commit.author.email, "git.author@example.com"); 1660 assert_eq!( 1661 commit.author.timestamp.timestamp, 1662 MillisSinceEpoch(1000 * 1000) 1663 ); 1664 assert_eq!(commit.author.timestamp.tz_offset, 60); 1665 assert_eq!(commit.committer.name, "git committer"); 1666 assert_eq!(commit.committer.email, "git.committer@example.com"); 1667 assert_eq!( 1668 commit.committer.timestamp.timestamp, 1669 MillisSinceEpoch(2000 * 1000) 1670 ); 1671 assert_eq!(commit.committer.timestamp.tz_offset, -480); 1672 1673 let root_tree = backend 1674 .read_tree( 1675 RepoPath::root(), 1676 &TreeId::from_bytes(root_tree_id.as_bytes()), 1677 ) 1678 .block_on() 1679 .unwrap(); 1680 let mut root_entries = root_tree.entries(); 1681 let dir = root_entries.next().unwrap(); 1682 assert_eq!(root_entries.next(), None); 1683 assert_eq!(dir.name().as_internal_str(), "dir"); 1684 assert_eq!( 1685 dir.value(), 1686 &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes())) 1687 ); 1688 1689 let dir_tree = backend 1690 .read_tree( 1691 RepoPath::from_internal_string("dir"), 1692 &TreeId::from_bytes(dir_tree_id.as_bytes()), 1693 ) 1694 .block_on() 1695 .unwrap(); 1696 let mut entries = dir_tree.entries(); 1697 let file = entries.next().unwrap(); 1698 let symlink = entries.next().unwrap(); 1699 assert_eq!(entries.next(), None); 1700 assert_eq!(file.name().as_internal_str(), "normal"); 1701 assert_eq!( 1702 file.value(), 1703 &TreeValue::File { 1704 id: FileId::from_bytes(blob1.as_bytes()), 1705 executable: false 1706 } 1707 ); 1708 assert_eq!(symlink.name().as_internal_str(), "symlink"); 1709 assert_eq!( 1710 symlink.value(), 1711 &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes())) 1712 ); 1713 1714 let commit2 = backend.read_commit(&commit_id2).block_on().unwrap(); 1715 assert_eq!(commit2.parents, vec![commit_id.clone()]); 1716 assert_eq!(commit.predecessors, vec![]); 1717 assert_eq!( 1718 commit.root_tree.to_merge(), 1719 Merge::resolved(TreeId::from_bytes(root_tree_id.as_bytes())) 1720 ); 1721 assert_matches!(commit.root_tree, MergedTreeId::Merge(_)); 1722 } 1723 1724 #[test] 1725 fn read_git_commit_without_importing() { 1726 let settings = user_settings(); 1727 let temp_dir = new_temp_dir(); 1728 let store_path = temp_dir.path(); 1729 let git_repo_path = temp_dir.path().join("git"); 1730 let git_repo = git_init(&git_repo_path); 1731 1732 let signature = gix::actor::Signature { 1733 name: GIT_USER.into(), 1734 email: GIT_EMAIL.into(), 1735 time: gix::date::Time::now_utc(), 1736 }; 1737 let empty_tree_id = 1738 gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap(); 1739 let git_commit_id = git_repo 1740 .commit_as( 1741 &signature, 1742 &signature, 1743 "refs/heads/main", 1744 "git commit message", 1745 empty_tree_id, 1746 [] as [gix::ObjectId; 0], 1747 ) 1748 .unwrap(); 1749 1750 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap(); 1751 1752 // read_commit() without import_head_commits() works as of now. This might be 1753 // changed later. 1754 assert!(backend 1755 .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes())) 1756 .block_on() 1757 .is_ok()); 1758 assert!( 1759 backend 1760 .cached_extra_metadata_table() 1761 .unwrap() 1762 .get_value(git_commit_id.as_bytes()) 1763 .is_some(), 1764 "extra metadata should have been be created" 1765 ); 1766 } 1767 1768 #[test] 1769 fn read_signed_git_commit() { 1770 let settings = user_settings(); 1771 let temp_dir = new_temp_dir(); 1772 let store_path = temp_dir.path(); 1773 let git_repo_path = temp_dir.path().join("git"); 1774 let git_repo = git_init(git_repo_path); 1775 1776 let signature = gix::actor::Signature { 1777 name: GIT_USER.into(), 1778 email: GIT_EMAIL.into(), 1779 time: gix::date::Time::now_utc(), 1780 }; 1781 let empty_tree_id = 1782 gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap(); 1783 1784 let secure_sig = 1785 "here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP\n"; 1786 1787 let mut commit = gix::objs::Commit { 1788 tree: empty_tree_id, 1789 parents: smallvec::SmallVec::new(), 1790 author: signature.clone(), 1791 committer: signature.clone(), 1792 encoding: None, 1793 message: "git commit message".into(), 1794 extra_headers: Vec::new(), 1795 }; 1796 1797 let mut commit_buf = Vec::new(); 1798 commit.write_to(&mut commit_buf).unwrap(); 1799 let commit_str = std::str::from_utf8(&commit_buf).unwrap(); 1800 1801 commit 1802 .extra_headers 1803 .push(("gpgsig".into(), secure_sig.into())); 1804 1805 let git_commit_id = git_repo.write_object(&commit).unwrap(); 1806 1807 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap(); 1808 1809 let commit = backend 1810 .read_commit(&CommitId::from_bytes(git_commit_id.as_bytes())) 1811 .block_on() 1812 .unwrap(); 1813 1814 let sig = commit.secure_sig.expect("failed to read the signature"); 1815 1816 // converting to string for nicer assert diff 1817 assert_eq!(std::str::from_utf8(&sig.sig).unwrap(), secure_sig); 1818 assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_str); 1819 } 1820 1821 #[test] 1822 fn read_empty_string_placeholder() { 1823 let git_signature1 = gix::actor::SignatureRef { 1824 name: EMPTY_STRING_PLACEHOLDER.into(), 1825 email: "git.author@example.com".into(), 1826 time: gix::date::Time::new(1000, 60 * 60), 1827 }; 1828 let signature1 = signature_from_git(git_signature1); 1829 assert!(signature1.name.is_empty()); 1830 assert_eq!(signature1.email, "git.author@example.com"); 1831 let git_signature2 = gix::actor::SignatureRef { 1832 name: "git committer".into(), 1833 email: EMPTY_STRING_PLACEHOLDER.into(), 1834 time: gix::date::Time::new(2000, -480 * 60), 1835 }; 1836 let signature2 = signature_from_git(git_signature2); 1837 assert_eq!(signature2.name, "git committer"); 1838 assert!(signature2.email.is_empty()); 1839 } 1840 1841 #[test] 1842 fn write_empty_string_placeholder() { 1843 let signature1 = Signature { 1844 name: "".to_string(), 1845 email: "someone@example.com".to_string(), 1846 timestamp: Timestamp { 1847 timestamp: MillisSinceEpoch(0), 1848 tz_offset: 0, 1849 }, 1850 }; 1851 let git_signature1 = signature_to_git(&signature1); 1852 assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER); 1853 assert_eq!(git_signature1.email, "someone@example.com"); 1854 let signature2 = Signature { 1855 name: "Someone".to_string(), 1856 email: "".to_string(), 1857 timestamp: Timestamp { 1858 timestamp: MillisSinceEpoch(0), 1859 tz_offset: 0, 1860 }, 1861 }; 1862 let git_signature2 = signature_to_git(&signature2); 1863 assert_eq!(git_signature2.name, "Someone"); 1864 assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER); 1865 } 1866 1867 /// Test that parents get written correctly 1868 #[test] 1869 fn git_commit_parents() { 1870 let settings = user_settings(); 1871 let temp_dir = new_temp_dir(); 1872 let store_path = temp_dir.path(); 1873 let git_repo_path = temp_dir.path().join("git"); 1874 let git_repo = git_init(&git_repo_path); 1875 1876 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap(); 1877 let mut commit = Commit { 1878 parents: vec![], 1879 predecessors: vec![], 1880 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), 1881 change_id: ChangeId::from_hex("abc123"), 1882 description: "".to_string(), 1883 author: create_signature(), 1884 committer: create_signature(), 1885 secure_sig: None, 1886 }; 1887 1888 let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> { 1889 backend.write_commit(commit, None).block_on() 1890 }; 1891 1892 // No parents 1893 commit.parents = vec![]; 1894 assert_matches!( 1895 write_commit(commit.clone()), 1896 Err(BackendError::Other(err)) if err.to_string().contains("no parents") 1897 ); 1898 1899 // Only root commit as parent 1900 commit.parents = vec![backend.root_commit_id().clone()]; 1901 let first_id = write_commit(commit.clone()).unwrap().0; 1902 let first_commit = backend.read_commit(&first_id).block_on().unwrap(); 1903 assert_eq!(first_commit, commit); 1904 let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap(); 1905 assert!(first_git_commit.parent_ids().collect_vec().is_empty()); 1906 1907 // Only non-root commit as parent 1908 commit.parents = vec![first_id.clone()]; 1909 let second_id = write_commit(commit.clone()).unwrap().0; 1910 let second_commit = backend.read_commit(&second_id).block_on().unwrap(); 1911 assert_eq!(second_commit, commit); 1912 let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap(); 1913 assert_eq!( 1914 second_git_commit.parent_ids().collect_vec(), 1915 vec![git_id(&first_id)] 1916 ); 1917 1918 // Merge commit 1919 commit.parents = vec![first_id.clone(), second_id.clone()]; 1920 let merge_id = write_commit(commit.clone()).unwrap().0; 1921 let merge_commit = backend.read_commit(&merge_id).block_on().unwrap(); 1922 assert_eq!(merge_commit, commit); 1923 let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap(); 1924 assert_eq!( 1925 merge_git_commit.parent_ids().collect_vec(), 1926 vec![git_id(&first_id), git_id(&second_id)] 1927 ); 1928 1929 // Merge commit with root as one parent 1930 commit.parents = vec![first_id, backend.root_commit_id().clone()]; 1931 assert_matches!( 1932 write_commit(commit), 1933 Err(BackendError::Unsupported(message)) if message.contains("root commit") 1934 ); 1935 } 1936 1937 #[test] 1938 fn write_tree_conflicts() { 1939 let settings = user_settings(); 1940 let temp_dir = new_temp_dir(); 1941 let store_path = temp_dir.path(); 1942 let git_repo_path = temp_dir.path().join("git"); 1943 let git_repo = git_init(&git_repo_path); 1944 1945 let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap(); 1946 let create_tree = |i| { 1947 let blob_id = git_repo.write_blob(format!("content {i}")).unwrap(); 1948 let mut tree_builder = git_repo.empty_tree().edit().unwrap(); 1949 tree_builder 1950 .upsert( 1951 format!("file{i}"), 1952 gix::object::tree::EntryKind::Blob, 1953 blob_id, 1954 ) 1955 .unwrap(); 1956 TreeId::from_bytes(tree_builder.write().unwrap().as_bytes()) 1957 }; 1958 1959 let root_tree = Merge::from_removes_adds( 1960 vec![create_tree(0), create_tree(1)], 1961 vec![create_tree(2), create_tree(3), create_tree(4)], 1962 ); 1963 let mut commit = Commit { 1964 parents: vec![backend.root_commit_id().clone()], 1965 predecessors: vec![], 1966 root_tree: MergedTreeId::Merge(root_tree.clone()), 1967 change_id: ChangeId::from_hex("abc123"), 1968 description: "".to_string(), 1969 author: create_signature(), 1970 committer: create_signature(), 1971 secure_sig: None, 1972 }; 1973 1974 let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> { 1975 backend.write_commit(commit, None).block_on() 1976 }; 1977 1978 // When writing a tree-level conflict, the root tree on the git side has the 1979 // individual trees as subtrees. 1980 let read_commit_id = write_commit(commit.clone()).unwrap().0; 1981 let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap(); 1982 assert_eq!(read_commit, commit); 1983 let git_commit = git_repo 1984 .find_commit(gix::ObjectId::from_bytes_or_panic( 1985 read_commit_id.as_bytes(), 1986 )) 1987 .unwrap(); 1988 let git_tree = git_repo.find_tree(git_commit.tree_id().unwrap()).unwrap(); 1989 assert!(git_tree 1990 .iter() 1991 .map(Result::unwrap) 1992 .filter(|entry| entry.filename() != b"README") 1993 .all(|entry| entry.mode().0 == 0o040000)); 1994 let mut iter = git_tree.iter().map(Result::unwrap); 1995 let entry = iter.next().unwrap(); 1996 assert_eq!(entry.filename(), b".jjconflict-base-0"); 1997 assert_eq!( 1998 entry.id().as_bytes(), 1999 root_tree.get_remove(0).unwrap().as_bytes() 2000 ); 2001 let entry = iter.next().unwrap(); 2002 assert_eq!(entry.filename(), b".jjconflict-base-1"); 2003 assert_eq!( 2004 entry.id().as_bytes(), 2005 root_tree.get_remove(1).unwrap().as_bytes() 2006 ); 2007 let entry = iter.next().unwrap(); 2008 assert_eq!(entry.filename(), b".jjconflict-side-0"); 2009 assert_eq!( 2010 entry.id().as_bytes(), 2011 root_tree.get_add(0).unwrap().as_bytes() 2012 ); 2013 let entry = iter.next().unwrap(); 2014 assert_eq!(entry.filename(), b".jjconflict-side-1"); 2015 assert_eq!( 2016 entry.id().as_bytes(), 2017 root_tree.get_add(1).unwrap().as_bytes() 2018 ); 2019 let entry = iter.next().unwrap(); 2020 assert_eq!(entry.filename(), b".jjconflict-side-2"); 2021 assert_eq!( 2022 entry.id().as_bytes(), 2023 root_tree.get_add(2).unwrap().as_bytes() 2024 ); 2025 let entry = iter.next().unwrap(); 2026 assert_eq!(entry.filename(), b"README"); 2027 assert_eq!(entry.mode().0, 0o100644); 2028 assert!(iter.next().is_none()); 2029 2030 // When writing a single tree using the new format, it's represented by a 2031 // regular git tree. 2032 commit.root_tree = MergedTreeId::resolved(create_tree(5)); 2033 let read_commit_id = write_commit(commit.clone()).unwrap().0; 2034 let read_commit = backend.read_commit(&read_commit_id).block_on().unwrap(); 2035 assert_eq!(read_commit, commit); 2036 let git_commit = git_repo 2037 .find_commit(gix::ObjectId::from_bytes_or_panic( 2038 read_commit_id.as_bytes(), 2039 )) 2040 .unwrap(); 2041 assert_eq!( 2042 MergedTreeId::resolved(TreeId::from_bytes(git_commit.tree_id().unwrap().as_bytes())), 2043 commit.root_tree 2044 ); 2045 } 2046 2047 #[test] 2048 fn commit_has_ref() { 2049 let settings = user_settings(); 2050 let temp_dir = new_temp_dir(); 2051 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap(); 2052 let git_repo = backend.git_repo(); 2053 let signature = Signature { 2054 name: "Someone".to_string(), 2055 email: "someone@example.com".to_string(), 2056 timestamp: Timestamp { 2057 timestamp: MillisSinceEpoch(0), 2058 tz_offset: 0, 2059 }, 2060 }; 2061 let commit = Commit { 2062 parents: vec![backend.root_commit_id().clone()], 2063 predecessors: vec![], 2064 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), 2065 change_id: ChangeId::new(vec![]), 2066 description: "initial".to_string(), 2067 author: signature.clone(), 2068 committer: signature, 2069 secure_sig: None, 2070 }; 2071 let commit_id = backend.write_commit(commit, None).block_on().unwrap().0; 2072 let git_refs = git_repo.references().unwrap(); 2073 let git_ref_ids: Vec<_> = git_refs 2074 .prefixed("refs/jj/keep/") 2075 .unwrap() 2076 .map(|x| x.unwrap().id().detach()) 2077 .collect(); 2078 assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id))); 2079 2080 // Concurrently-running GC deletes the ref, leaving the extra metadata. 2081 for git_ref in git_refs.prefixed("refs/jj/keep/").unwrap() { 2082 git_ref.unwrap().delete().unwrap(); 2083 } 2084 // Re-imported commit should have new ref. 2085 backend.import_head_commits([&commit_id]).unwrap(); 2086 let git_refs = git_repo.references().unwrap(); 2087 let git_ref_ids: Vec<_> = git_refs 2088 .prefixed("refs/jj/keep/") 2089 .unwrap() 2090 .map(|x| x.unwrap().id().detach()) 2091 .collect(); 2092 assert!(git_ref_ids.iter().any(|id| *id == git_id(&commit_id))); 2093 } 2094 2095 #[test] 2096 fn import_head_commits_duplicates() { 2097 let settings = user_settings(); 2098 let temp_dir = new_temp_dir(); 2099 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap(); 2100 let git_repo = backend.git_repo(); 2101 2102 let signature = gix::actor::Signature { 2103 name: GIT_USER.into(), 2104 email: GIT_EMAIL.into(), 2105 time: gix::date::Time::now_utc(), 2106 }; 2107 let empty_tree_id = 2108 gix::ObjectId::from_hex(b"4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap(); 2109 let git_commit_id = git_repo 2110 .commit_as( 2111 &signature, 2112 &signature, 2113 "refs/heads/main", 2114 "git commit message", 2115 empty_tree_id, 2116 [] as [gix::ObjectId; 0], 2117 ) 2118 .unwrap() 2119 .detach(); 2120 let commit_id = CommitId::from_bytes(git_commit_id.as_bytes()); 2121 2122 // Ref creation shouldn't fail because of duplicated head ids. 2123 backend 2124 .import_head_commits([&commit_id, &commit_id]) 2125 .unwrap(); 2126 assert!(git_repo 2127 .references() 2128 .unwrap() 2129 .prefixed("refs/jj/keep/") 2130 .unwrap() 2131 .any(|git_ref| git_ref.unwrap().id().detach() == git_commit_id)); 2132 } 2133 2134 #[test] 2135 fn overlapping_git_commit_id() { 2136 let settings = user_settings(); 2137 let temp_dir = new_temp_dir(); 2138 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap(); 2139 let commit1 = Commit { 2140 parents: vec![backend.root_commit_id().clone()], 2141 predecessors: vec![], 2142 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), 2143 change_id: ChangeId::new(vec![]), 2144 description: "initial".to_string(), 2145 author: create_signature(), 2146 committer: create_signature(), 2147 secure_sig: None, 2148 }; 2149 2150 let write_commit = |commit: Commit| -> BackendResult<(CommitId, Commit)> { 2151 backend.write_commit(commit, None).block_on() 2152 }; 2153 2154 let (commit_id1, mut commit2) = write_commit(commit1).unwrap(); 2155 commit2.predecessors.push(commit_id1.clone()); 2156 // `write_commit` should prevent the ids from being the same by changing the 2157 // committer timestamp of the commit it actually writes. 2158 let (commit_id2, mut actual_commit2) = write_commit(commit2.clone()).unwrap(); 2159 // The returned matches the ID 2160 assert_eq!( 2161 backend.read_commit(&commit_id2).block_on().unwrap(), 2162 actual_commit2 2163 ); 2164 assert_ne!(commit_id2, commit_id1); 2165 // The committer timestamp should differ 2166 assert_ne!( 2167 actual_commit2.committer.timestamp.timestamp, 2168 commit2.committer.timestamp.timestamp 2169 ); 2170 // The rest of the commit should be the same 2171 actual_commit2.committer.timestamp.timestamp = commit2.committer.timestamp.timestamp; 2172 assert_eq!(actual_commit2, commit2); 2173 } 2174 2175 #[test] 2176 fn write_signed_commit() { 2177 let settings = user_settings(); 2178 let temp_dir = new_temp_dir(); 2179 let backend = GitBackend::init_internal(&settings, temp_dir.path()).unwrap(); 2180 2181 let commit = Commit { 2182 parents: vec![backend.root_commit_id().clone()], 2183 predecessors: vec![], 2184 root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), 2185 change_id: ChangeId::new(vec![]), 2186 description: "initial".to_string(), 2187 author: create_signature(), 2188 committer: create_signature(), 2189 secure_sig: None, 2190 }; 2191 2192 let mut signer = |data: &_| { 2193 let hash: String = blake2b_hash(data).encode_hex(); 2194 Ok(format!("test sig\nhash={hash}\n").into_bytes()) 2195 }; 2196 2197 let (id, commit) = backend 2198 .write_commit(commit, Some(&mut signer as &mut SigningFn)) 2199 .block_on() 2200 .unwrap(); 2201 2202 let git_repo = backend.git_repo(); 2203 let obj = git_repo 2204 .find_object(gix::ObjectId::from_bytes_or_panic(id.as_bytes())) 2205 .unwrap(); 2206 insta::assert_snapshot!(std::str::from_utf8(&obj.data).unwrap(), @r" 2207 tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904 2208 author Someone <someone@example.com> 0 +0000 2209 committer Someone <someone@example.com> 0 +0000 2210 gpgsig test sig 2211 hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518 2212 2213 initial 2214 "); 2215 2216 let returned_sig = commit.secure_sig.expect("failed to return the signature"); 2217 2218 let commit = backend.read_commit(&id).block_on().unwrap(); 2219 2220 let sig = commit.secure_sig.expect("failed to read the signature"); 2221 assert_eq!(&sig, &returned_sig); 2222 2223 insta::assert_snapshot!(std::str::from_utf8(&sig.sig).unwrap(), @r" 2224 test sig 2225 hash=9ad9526c3b2103c41a229f2f3c82d107a0ecd902f476a855f0e1dd5f7bef1430663de12749b73e293a877113895a8a2a0f29da4bbc5a5f9a19c3523fb0e53518 2226 "); 2227 insta::assert_snapshot!(std::str::from_utf8(&sig.data).unwrap(), @r" 2228 tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904 2229 author Someone <someone@example.com> 0 +0000 2230 committer Someone <someone@example.com> 0 +0000 2231 2232 initial 2233 "); 2234 } 2235 2236 fn git_id(commit_id: &CommitId) -> gix::ObjectId { 2237 gix::ObjectId::from_bytes_or_panic(commit_id.as_bytes()) 2238 } 2239 2240 fn create_signature() -> Signature { 2241 Signature { 2242 name: GIT_USER.to_string(), 2243 email: GIT_EMAIL.to_string(), 2244 timestamp: Timestamp { 2245 timestamp: MillisSinceEpoch(0), 2246 tz_offset: 0, 2247 }, 2248 } 2249 } 2250 2251 // Not using testutils::user_settings() because there is a dependency cycle 2252 // 'jj_lib (1) -> testutils -> jj_lib (2)' which creates another distinct 2253 // UserSettings type. testutils returns jj_lib (2)'s UserSettings, whereas 2254 // our UserSettings type comes from jj_lib (1). 2255 fn user_settings() -> UserSettings { 2256 let config = StackedConfig::with_defaults(); 2257 UserSettings::from_config(config).unwrap() 2258 } 2259}