just playing with tangled
at tmp-tutorial 1202 lines 47 kB view raw
1// Copyright 2020 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#![allow(missing_docs)] 16 17use std::any::Any; 18use std::fmt::{Debug, Error, Formatter}; 19use std::io::{Cursor, Read}; 20use std::ops::Deref; 21use std::path::Path; 22use std::sync::{Arc, Mutex, MutexGuard}; 23use std::{fs, slice}; 24 25use git2::Oid; 26use itertools::Itertools; 27use prost::Message; 28use thiserror::Error; 29 30use crate::backend::{ 31 make_root_commit, Backend, BackendError, BackendInitError, BackendLoadError, BackendResult, 32 ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, FileId, MillisSinceEpoch, 33 ObjectId, Signature, SymlinkId, Timestamp, Tree, TreeId, TreeValue, 34}; 35use crate::conflicts; 36use crate::file_util::{IoResultExt as _, PathError}; 37use crate::lock::FileLock; 38use crate::repo_path::{RepoPath, RepoPathComponent}; 39use crate::stacked_table::{ 40 MutableTable, ReadonlyTable, TableSegment, TableStore, TableStoreError, 41}; 42 43const HASH_LENGTH: usize = 20; 44const CHANGE_ID_LENGTH: usize = 16; 45/// Ref namespace used only for preventing GC. 46pub const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/"; 47const CONFLICT_SUFFIX: &str = ".jjconflict"; 48 49#[derive(Debug, Error)] 50pub enum GitBackendInitError { 51 #[error("Failed to initialize git repository: {0}")] 52 InitRepository(#[source] git2::Error), 53 #[error("Failed to open git repository: {0}")] 54 OpenRepository(#[source] git2::Error), 55 #[error(transparent)] 56 Path(#[from] PathError), 57} 58 59impl From<GitBackendInitError> for BackendInitError { 60 fn from(err: GitBackendInitError) -> Self { 61 BackendInitError(err.into()) 62 } 63} 64 65#[derive(Debug, Error)] 66pub enum GitBackendLoadError { 67 #[error("Failed to open git repository: {0}")] 68 OpenRepository(#[source] git2::Error), 69 #[error(transparent)] 70 Path(#[from] PathError), 71} 72 73impl From<GitBackendLoadError> for BackendLoadError { 74 fn from(err: GitBackendLoadError) -> Self { 75 BackendLoadError(err.into()) 76 } 77} 78 79/// `GitBackend`-specific error that may occur after the backend is loaded. 80#[derive(Debug, Error)] 81pub enum GitBackendError { 82 #[error("Failed to read non-git metadata: {0}")] 83 ReadMetadata(#[source] TableStoreError), 84 #[error("Failed to write non-git metadata: {0}")] 85 WriteMetadata(#[source] TableStoreError), 86} 87 88impl From<GitBackendError> for BackendError { 89 fn from(err: GitBackendError) -> Self { 90 BackendError::Other(err.into()) 91 } 92} 93 94pub struct GitBackend { 95 repo: Mutex<git2::Repository>, 96 root_commit_id: CommitId, 97 root_change_id: ChangeId, 98 empty_tree_id: TreeId, 99 extra_metadata_store: TableStore, 100 cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>, 101} 102 103impl GitBackend { 104 fn new(repo: git2::Repository, extra_metadata_store: TableStore) -> Self { 105 let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]); 106 let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]); 107 let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904"); 108 GitBackend { 109 repo: Mutex::new(repo), 110 root_commit_id, 111 root_change_id, 112 empty_tree_id, 113 extra_metadata_store, 114 cached_extra_metadata: Mutex::new(None), 115 } 116 } 117 118 pub fn init_internal(store_path: &Path) -> Result<Self, GitBackendInitError> { 119 let git_repo = git2::Repository::init_bare(store_path.join("git")) 120 .map_err(GitBackendInitError::InitRepository)?; 121 let extra_path = store_path.join("extra"); 122 fs::create_dir(&extra_path).context(&extra_path)?; 123 let target_path = store_path.join("git_target"); 124 fs::write(&target_path, b"git").context(&target_path)?; 125 let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH); 126 Ok(GitBackend::new(git_repo, extra_metadata_store)) 127 } 128 129 pub fn init_external( 130 store_path: &Path, 131 git_repo_path: &Path, 132 ) -> Result<Self, GitBackendInitError> { 133 let extra_path = store_path.join("extra"); 134 fs::create_dir(&extra_path).context(&extra_path)?; 135 let target_path = store_path.join("git_target"); 136 fs::write(&target_path, git_repo_path.to_str().unwrap().as_bytes()) 137 .context(&target_path)?; 138 let repo = git2::Repository::open(store_path.join(git_repo_path)) 139 .map_err(GitBackendInitError::OpenRepository)?; 140 let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH); 141 Ok(GitBackend::new(repo, extra_metadata_store)) 142 } 143 144 pub fn load(store_path: &Path) -> Result<Self, GitBackendLoadError> { 145 let git_repo_path = { 146 let target_path = store_path.join("git_target"); 147 let git_repo_path_str = fs::read_to_string(&target_path).context(&target_path)?; 148 let git_repo_path = store_path.join(git_repo_path_str); 149 git_repo_path.canonicalize().context(&git_repo_path)? 150 }; 151 let repo = 152 git2::Repository::open(git_repo_path).map_err(GitBackendLoadError::OpenRepository)?; 153 let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH); 154 Ok(GitBackend::new(repo, extra_metadata_store)) 155 } 156 157 pub fn git_repo(&self) -> MutexGuard<'_, git2::Repository> { 158 self.repo.lock().unwrap() 159 } 160 161 pub fn git_repo_clone(&self) -> git2::Repository { 162 let path = self.repo.lock().unwrap().path().to_owned(); 163 git2::Repository::open(path).unwrap() 164 } 165 166 fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> { 167 let mut locked_head = self.cached_extra_metadata.lock().unwrap(); 168 match locked_head.as_ref() { 169 Some(head) => Ok(head.clone()), 170 None => { 171 let table = self 172 .extra_metadata_store 173 .get_head() 174 .map_err(GitBackendError::ReadMetadata)?; 175 *locked_head = Some(table.clone()); 176 Ok(table) 177 } 178 } 179 } 180 181 fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> { 182 let table = self 183 .extra_metadata_store 184 .get_head_locked() 185 .map_err(GitBackendError::ReadMetadata)?; 186 Ok(table) 187 } 188 189 fn save_extra_metadata_table( 190 &self, 191 mut_table: MutableTable, 192 _table_lock: &FileLock, 193 ) -> BackendResult<()> { 194 let table = self 195 .extra_metadata_store 196 .save_table(mut_table) 197 .map_err(GitBackendError::WriteMetadata)?; 198 // Since the parent table was the head, saved table are likely to be new head. 199 // If it's not, cache will be reloaded when entry can't be found. 200 *self.cached_extra_metadata.lock().unwrap() = Some(table); 201 Ok(()) 202 } 203} 204 205fn commit_from_git_without_root_parent(commit: &git2::Commit) -> Commit { 206 // We reverse the bits of the commit id to create the change id. We don't want 207 // to use the first bytes unmodified because then it would be ambiguous 208 // if a given hash prefix refers to the commit id or the change id. It 209 // would have been enough to pick the last 16 bytes instead of the 210 // leading 16 bytes to address that. We also reverse the bits to make it less 211 // likely that users depend on any relationship between the two ids. 212 let change_id = ChangeId::new( 213 commit.id().as_bytes()[4..HASH_LENGTH] 214 .iter() 215 .rev() 216 .map(|b| b.reverse_bits()) 217 .collect(), 218 ); 219 let parents = commit 220 .parent_ids() 221 .map(|oid| CommitId::from_bytes(oid.as_bytes())) 222 .collect_vec(); 223 let tree_id = TreeId::from_bytes(commit.tree_id().as_bytes()); 224 // If this commit is a conflict, we'll update the root tree later, when we read 225 // the extra metadata. 226 let root_tree = conflicts::Conflict::resolved(tree_id); 227 let description = commit.message().unwrap_or("<no message>").to_owned(); 228 let author = signature_from_git(commit.author()); 229 let committer = signature_from_git(commit.committer()); 230 231 Commit { 232 parents, 233 predecessors: vec![], 234 root_tree, 235 // If this commit has associated extra metadata, we may set this later. 236 uses_tree_conflict_format: false, 237 change_id, 238 description, 239 author, 240 committer, 241 } 242} 243 244fn signature_from_git(signature: git2::Signature) -> Signature { 245 let name = signature.name().unwrap_or("<no name>").to_owned(); 246 let email = signature.email().unwrap_or("<no email>").to_owned(); 247 let timestamp = MillisSinceEpoch(signature.when().seconds() * 1000); 248 let tz_offset = signature.when().offset_minutes(); 249 Signature { 250 name, 251 email, 252 timestamp: Timestamp { 253 timestamp, 254 tz_offset, 255 }, 256 } 257} 258 259fn signature_to_git(signature: &Signature) -> git2::Signature<'static> { 260 let name = &signature.name; 261 let email = &signature.email; 262 let time = git2::Time::new( 263 signature.timestamp.timestamp.0.div_euclid(1000), 264 signature.timestamp.tz_offset, 265 ); 266 git2::Signature::new(name, email, &time).unwrap() 267} 268 269fn serialize_extras(commit: &Commit) -> Vec<u8> { 270 let mut proto = crate::protos::git_store::Commit { 271 change_id: commit.change_id.to_bytes(), 272 uses_tree_conflict_format: commit.uses_tree_conflict_format, 273 ..Default::default() 274 }; 275 if commit.root_tree.as_resolved().is_none() { 276 assert!(commit.uses_tree_conflict_format); 277 let removes = commit 278 .root_tree 279 .removes() 280 .iter() 281 .map(|r| r.to_bytes()) 282 .collect_vec(); 283 let adds = commit 284 .root_tree 285 .adds() 286 .iter() 287 .map(|r| r.to_bytes()) 288 .collect_vec(); 289 let conflict = crate::protos::git_store::TreeConflict { removes, adds }; 290 proto.root_tree = Some(crate::protos::git_store::commit::RootTree::Conflict( 291 conflict, 292 )); 293 } 294 for predecessor in &commit.predecessors { 295 proto.predecessors.push(predecessor.to_bytes()); 296 } 297 proto.encode_to_vec() 298} 299 300fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) { 301 let proto = crate::protos::git_store::Commit::decode(bytes).unwrap(); 302 commit.change_id = ChangeId::new(proto.change_id); 303 commit.uses_tree_conflict_format = proto.uses_tree_conflict_format; 304 match proto.root_tree { 305 Some(crate::protos::git_store::commit::RootTree::Conflict(proto_conflict)) => { 306 assert!(commit.uses_tree_conflict_format); 307 commit.root_tree = conflicts::Conflict::new( 308 proto_conflict 309 .removes 310 .iter() 311 .map(|id_bytes| TreeId::from_bytes(id_bytes)) 312 .collect(), 313 proto_conflict 314 .adds 315 .iter() 316 .map(|id_bytes| TreeId::from_bytes(id_bytes)) 317 .collect(), 318 ); 319 } 320 Some(crate::protos::git_store::commit::RootTree::Resolved(_)) => { 321 panic!("found resolved root tree in extras (should only be written to git metadata)"); 322 } 323 None => {} 324 } 325 for predecessor in &proto.predecessors { 326 commit.predecessors.push(CommitId::from_bytes(predecessor)); 327 } 328} 329 330/// Creates a random ref in refs/jj/. Used for preventing GC of commits we 331/// create. 332fn create_no_gc_ref() -> String { 333 let random_bytes: [u8; 16] = rand::random(); 334 format!("{NO_GC_REF_NAMESPACE}{}", hex::encode(random_bytes)) 335} 336 337fn validate_git_object_id(id: &impl ObjectId) -> Result<git2::Oid, BackendError> { 338 if id.as_bytes().len() != HASH_LENGTH { 339 return Err(BackendError::InvalidHashLength { 340 expected: HASH_LENGTH, 341 actual: id.as_bytes().len(), 342 object_type: id.object_type(), 343 hash: id.hex(), 344 }); 345 } 346 let oid = git2::Oid::from_bytes(id.as_bytes()).map_err(|err| BackendError::InvalidHash { 347 object_type: id.object_type(), 348 hash: id.hex(), 349 source: Box::new(err), 350 })?; 351 Ok(oid) 352} 353 354fn map_not_found_err(err: git2::Error, id: &impl ObjectId) -> BackendError { 355 if err.code() == git2::ErrorCode::NotFound { 356 BackendError::ObjectNotFound { 357 object_type: id.object_type(), 358 hash: id.hex(), 359 source: Box::new(err), 360 } 361 } else { 362 BackendError::ReadObject { 363 object_type: id.object_type(), 364 hash: id.hex(), 365 source: Box::new(err), 366 } 367 } 368} 369 370fn import_extra_metadata_entries_from_heads( 371 git_repo: &git2::Repository, 372 mut_table: &mut MutableTable, 373 _table_lock: &FileLock, 374 head_ids: &[CommitId], 375) -> BackendResult<()> { 376 let mut work_ids = head_ids 377 .iter() 378 .filter(|id| mut_table.get_value(id.as_bytes()).is_none()) 379 .cloned() 380 .collect_vec(); 381 while let Some(id) = work_ids.pop() { 382 let git_commit = git_repo 383 .find_commit(validate_git_object_id(&id)?) 384 .map_err(|err| map_not_found_err(err, &id))?; 385 let commit = commit_from_git_without_root_parent(&git_commit); 386 mut_table.add_entry(id.to_bytes(), serialize_extras(&commit)); 387 work_ids.extend( 388 commit 389 .parents 390 .into_iter() 391 .filter(|id| mut_table.get_value(id.as_bytes()).is_none()), 392 ); 393 } 394 Ok(()) 395} 396 397impl Debug for GitBackend { 398 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { 399 f.debug_struct("GitStore") 400 .field("path", &self.repo.lock().unwrap().path()) 401 .finish() 402 } 403} 404 405impl Backend for GitBackend { 406 fn as_any(&self) -> &dyn Any { 407 self 408 } 409 410 fn name(&self) -> &str { 411 "git" 412 } 413 414 fn commit_id_length(&self) -> usize { 415 HASH_LENGTH 416 } 417 418 fn change_id_length(&self) -> usize { 419 CHANGE_ID_LENGTH 420 } 421 422 fn read_file(&self, _path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>> { 423 let git_blob_id = validate_git_object_id(id)?; 424 let locked_repo = self.repo.lock().unwrap(); 425 let blob = locked_repo 426 .find_blob(git_blob_id) 427 .map_err(|err| map_not_found_err(err, id))?; 428 let content = blob.content().to_owned(); 429 Ok(Box::new(Cursor::new(content))) 430 } 431 432 fn write_file(&self, _path: &RepoPath, contents: &mut dyn Read) -> BackendResult<FileId> { 433 let mut bytes = Vec::new(); 434 contents.read_to_end(&mut bytes).unwrap(); 435 let locked_repo = self.repo.lock().unwrap(); 436 let oid = locked_repo 437 .blob(&bytes) 438 .map_err(|err| BackendError::WriteObject { 439 object_type: "file", 440 source: Box::new(err), 441 })?; 442 Ok(FileId::new(oid.as_bytes().to_vec())) 443 } 444 445 fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> Result<String, BackendError> { 446 let git_blob_id = validate_git_object_id(id)?; 447 let locked_repo = self.repo.lock().unwrap(); 448 let blob = locked_repo 449 .find_blob(git_blob_id) 450 .map_err(|err| map_not_found_err(err, id))?; 451 let target = String::from_utf8(blob.content().to_owned()).map_err(|err| { 452 BackendError::InvalidUtf8 { 453 object_type: id.object_type(), 454 hash: id.hex(), 455 source: err, 456 } 457 })?; 458 Ok(target) 459 } 460 461 fn write_symlink(&self, _path: &RepoPath, target: &str) -> Result<SymlinkId, BackendError> { 462 let locked_repo = self.repo.lock().unwrap(); 463 let oid = locked_repo 464 .blob(target.as_bytes()) 465 .map_err(|err| BackendError::WriteObject { 466 object_type: "symlink", 467 source: Box::new(err), 468 })?; 469 Ok(SymlinkId::new(oid.as_bytes().to_vec())) 470 } 471 472 fn root_commit_id(&self) -> &CommitId { 473 &self.root_commit_id 474 } 475 476 fn root_change_id(&self) -> &ChangeId { 477 &self.root_change_id 478 } 479 480 fn empty_tree_id(&self) -> &TreeId { 481 &self.empty_tree_id 482 } 483 484 fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> { 485 if id == &self.empty_tree_id { 486 return Ok(Tree::default()); 487 } 488 let git_tree_id = validate_git_object_id(id)?; 489 490 let locked_repo = self.repo.lock().unwrap(); 491 let git_tree = locked_repo.find_tree(git_tree_id).unwrap(); 492 let mut tree = Tree::default(); 493 for entry in git_tree.iter() { 494 let name = entry.name().unwrap(); 495 let (name, value) = match entry.kind().unwrap() { 496 git2::ObjectType::Tree => { 497 let id = TreeId::from_bytes(entry.id().as_bytes()); 498 (entry.name().unwrap(), TreeValue::Tree(id)) 499 } 500 git2::ObjectType::Blob => match entry.filemode() { 501 0o100644 => { 502 let id = FileId::from_bytes(entry.id().as_bytes()); 503 if name.ends_with(CONFLICT_SUFFIX) { 504 ( 505 &name[0..name.len() - CONFLICT_SUFFIX.len()], 506 TreeValue::Conflict(ConflictId::from_bytes(entry.id().as_bytes())), 507 ) 508 } else { 509 ( 510 name, 511 TreeValue::File { 512 id, 513 executable: false, 514 }, 515 ) 516 } 517 } 518 0o100755 => { 519 let id = FileId::from_bytes(entry.id().as_bytes()); 520 ( 521 name, 522 TreeValue::File { 523 id, 524 executable: true, 525 }, 526 ) 527 } 528 0o120000 => { 529 let id = SymlinkId::from_bytes(entry.id().as_bytes()); 530 (name, TreeValue::Symlink(id)) 531 } 532 mode => panic!("unexpected file mode {mode:?}"), 533 }, 534 git2::ObjectType::Commit => { 535 let id = CommitId::from_bytes(entry.id().as_bytes()); 536 (name, TreeValue::GitSubmodule(id)) 537 } 538 kind => panic!("unexpected object type {kind:?}"), 539 }; 540 tree.set(RepoPathComponent::from(name), value); 541 } 542 Ok(tree) 543 } 544 545 fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> { 546 let locked_repo = self.repo.lock().unwrap(); 547 let mut builder = locked_repo.treebuilder(None).unwrap(); 548 for entry in contents.entries() { 549 let name = entry.name().string(); 550 let (name, id, filemode) = match entry.value() { 551 TreeValue::File { 552 id, 553 executable: false, 554 } => (name, id.as_bytes(), 0o100644), 555 TreeValue::File { 556 id, 557 executable: true, 558 } => (name, id.as_bytes(), 0o100755), 559 TreeValue::Symlink(id) => (name, id.as_bytes(), 0o120000), 560 TreeValue::Tree(id) => (name, id.as_bytes(), 0o040000), 561 TreeValue::GitSubmodule(id) => (name, id.as_bytes(), 0o160000), 562 TreeValue::Conflict(id) => ( 563 entry.name().string() + CONFLICT_SUFFIX, 564 id.as_bytes(), 565 0o100644, 566 ), 567 }; 568 builder 569 .insert(name, Oid::from_bytes(id).unwrap(), filemode) 570 .unwrap(); 571 } 572 let oid = builder.write().map_err(|err| BackendError::WriteObject { 573 object_type: "tree", 574 source: Box::new(err), 575 })?; 576 Ok(TreeId::from_bytes(oid.as_bytes())) 577 } 578 579 fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> { 580 let mut file = self.read_file( 581 &RepoPath::from_internal_string("unused"), 582 &FileId::new(id.to_bytes()), 583 )?; 584 let mut data = String::new(); 585 file.read_to_string(&mut data) 586 .map_err(|err| BackendError::ReadObject { 587 object_type: "conflict".to_owned(), 588 hash: id.hex(), 589 source: err.into(), 590 })?; 591 let json: serde_json::Value = serde_json::from_str(&data).unwrap(); 592 Ok(Conflict { 593 removes: conflict_term_list_from_json(json.get("removes").unwrap()), 594 adds: conflict_term_list_from_json(json.get("adds").unwrap()), 595 }) 596 } 597 598 fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> { 599 let json = serde_json::json!({ 600 "removes": conflict_term_list_to_json(&conflict.removes), 601 "adds": conflict_term_list_to_json(&conflict.adds), 602 }); 603 let json_string = json.to_string(); 604 let bytes = json_string.as_bytes(); 605 let locked_repo = self.repo.lock().unwrap(); 606 let oid = locked_repo 607 .blob(bytes) 608 .map_err(|err| BackendError::WriteObject { 609 object_type: "conflict", 610 source: Box::new(err), 611 })?; 612 Ok(ConflictId::from_bytes(oid.as_bytes())) 613 } 614 615 #[tracing::instrument(skip(self))] 616 fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> { 617 if *id == self.root_commit_id { 618 return Ok(make_root_commit( 619 self.root_change_id().clone(), 620 self.empty_tree_id.clone(), 621 )); 622 } 623 let git_commit_id = validate_git_object_id(id)?; 624 625 let locked_repo = self.repo.lock().unwrap(); 626 let commit = locked_repo 627 .find_commit(git_commit_id) 628 .map_err(|err| map_not_found_err(err, id))?; 629 let mut commit = commit_from_git_without_root_parent(&commit); 630 if commit.parents.is_empty() { 631 commit.parents.push(self.root_commit_id.clone()); 632 }; 633 634 let table = self.cached_extra_metadata_table()?; 635 if let Some(extras) = table.get_value(id.as_bytes()) { 636 deserialize_extras(&mut commit, extras); 637 } else { 638 let (table, table_lock) = self.read_extra_metadata_table_locked()?; 639 if let Some(extras) = table.get_value(id.as_bytes()) { 640 // Concurrent write_commit() would update extras before taking a lock. 641 deserialize_extras(&mut commit, extras); 642 *self.cached_extra_metadata.lock().unwrap() = Some(table); 643 } else { 644 // This commit is imported from Git. Make our change id persist (otherwise 645 // future write_commit() could reassign new change id.) It's likely that 646 // the commit is a branch head, so bulk-import metadata as much as possible. 647 tracing::debug!("import extra metadata entries"); 648 let mut mut_table = table.start_mutation(); 649 // TODO(#1624): Should we read the root tree here and check if it has a 650 // `.jjconflict-...` entries? That could happen if the user used `git` to e.g. 651 // change the description of a commit with tree-level conflicts. 652 mut_table.add_entry(id.to_bytes(), serialize_extras(&commit)); 653 if commit.parents != slice::from_ref(&self.root_commit_id) { 654 import_extra_metadata_entries_from_heads( 655 &locked_repo, 656 &mut mut_table, 657 &table_lock, 658 &commit.parents, 659 )?; 660 } 661 self.save_extra_metadata_table(mut_table, &table_lock)?; 662 } 663 } 664 665 Ok(commit) 666 } 667 668 fn write_commit(&self, mut contents: Commit) -> BackendResult<(CommitId, Commit)> { 669 let locked_repo = self.repo.lock().unwrap(); 670 let git_tree_id = if let Some(tree_id) = contents.root_tree.as_resolved() { 671 validate_git_object_id(tree_id)? 672 } else { 673 write_tree_conflict(locked_repo.deref(), &contents.root_tree)? 674 }; 675 let git_tree = locked_repo 676 .find_tree(git_tree_id) 677 .map_err(|err| map_not_found_err(err, &TreeId::from_bytes(git_tree_id.as_bytes())))?; 678 let author = signature_to_git(&contents.author); 679 let mut committer = signature_to_git(&contents.committer); 680 let message = &contents.description; 681 if contents.parents.is_empty() { 682 return Err(BackendError::Other( 683 "Cannot write a commit with no parents".into(), 684 )); 685 } 686 let mut parents = vec![]; 687 for parent_id in &contents.parents { 688 if *parent_id == self.root_commit_id { 689 // Git doesn't have a root commit, so if the parent is the root commit, we don't 690 // add it to the list of parents to write in the Git commit. We also check that 691 // there are no other parents since Git cannot represent a merge between a root 692 // commit and another commit. 693 if contents.parents.len() > 1 { 694 return Err(BackendError::Other( 695 "The Git backend does not support creating merge commits with the root \ 696 commit as one of the parents." 697 .into(), 698 )); 699 } 700 } else { 701 let git_commit_id = validate_git_object_id(parent_id)?; 702 let parent_git_commit = locked_repo 703 .find_commit(git_commit_id) 704 .map_err(|err| map_not_found_err(err, parent_id))?; 705 parents.push(parent_git_commit); 706 } 707 } 708 let parent_refs = parents.iter().collect_vec(); 709 let extras = serialize_extras(&contents); 710 // If two writers write commits of the same id with different metadata, they 711 // will both succeed and the metadata entries will be "merged" later. Since 712 // metadata entry is keyed by the commit id, one of the entries would be lost. 713 // To prevent such race condition locally, we extend the scope covered by the 714 // table lock. This is still racy if multiple machines are involved and the 715 // repository is rsync-ed. 716 let (table, table_lock) = self.read_extra_metadata_table_locked()?; 717 let id = loop { 718 let git_id = locked_repo 719 .commit( 720 Some(&create_no_gc_ref()), 721 &author, 722 &committer, 723 message, 724 &git_tree, 725 &parent_refs, 726 ) 727 .map_err(|err| BackendError::WriteObject { 728 object_type: "commit", 729 source: Box::new(err), 730 })?; 731 let id = CommitId::from_bytes(git_id.as_bytes()); 732 match table.get_value(id.as_bytes()) { 733 Some(existing_extras) if existing_extras != extras => { 734 // It's possible a commit already exists with the same commit id but different 735 // change id. Adjust the timestamp until this is no longer the case. 736 let new_when = git2::Time::new( 737 committer.when().seconds() - 1, 738 committer.when().offset_minutes(), 739 ); 740 committer = git2::Signature::new( 741 committer.name().unwrap(), 742 committer.email().unwrap(), 743 &new_when, 744 ) 745 .unwrap(); 746 } 747 _ => { 748 break id; 749 } 750 } 751 }; 752 // Update the signatures to match the ones that were actually written to the 753 // object store 754 contents.author.timestamp.timestamp = MillisSinceEpoch(author.when().seconds() * 1000); 755 contents.committer.timestamp.timestamp = 756 MillisSinceEpoch(committer.when().seconds() * 1000); 757 let mut mut_table = table.start_mutation(); 758 mut_table.add_entry(id.to_bytes(), extras); 759 self.save_extra_metadata_table(mut_table, &table_lock)?; 760 Ok((id, contents)) 761 } 762} 763 764/// Write a tree conflict as a special tree with `.jjconflict-base-N` and 765/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd. 766fn write_tree_conflict( 767 repo: &git2::Repository, 768 conflict: &conflicts::Conflict<TreeId>, 769) -> Result<Oid, BackendError> { 770 let mut builder = repo.treebuilder(None).unwrap(); 771 let mut add_tree_entry = |name, tree_id: &TreeId| { 772 let tree_oid = Oid::from_bytes(tree_id.as_bytes()).unwrap(); 773 builder.insert(name, tree_oid, 0o040000).unwrap(); 774 }; 775 for (i, tree_id) in conflict.removes().iter().enumerate() { 776 add_tree_entry(format!(".jjconflict-base-{i}"), tree_id); 777 } 778 for (i, tree_id) in conflict.adds().iter().enumerate() { 779 add_tree_entry(format!(".jjconflict-side-{i}"), tree_id); 780 } 781 builder.write().map_err(|err| BackendError::WriteObject { 782 object_type: "tree", 783 source: Box::new(err), 784 }) 785} 786 787fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value { 788 serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect()) 789} 790 791fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> { 792 json.as_array() 793 .unwrap() 794 .iter() 795 .map(conflict_term_from_json) 796 .collect() 797} 798 799fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value { 800 serde_json::json!({ 801 "value": tree_value_to_json(&part.value), 802 }) 803} 804 805fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm { 806 let json_value = json.get("value").unwrap(); 807 ConflictTerm { 808 value: tree_value_from_json(json_value), 809 } 810} 811 812fn tree_value_to_json(value: &TreeValue) -> serde_json::Value { 813 match value { 814 TreeValue::File { id, executable } => serde_json::json!({ 815 "file": { 816 "id": id.hex(), 817 "executable": executable, 818 }, 819 }), 820 TreeValue::Symlink(id) => serde_json::json!({ 821 "symlink_id": id.hex(), 822 }), 823 TreeValue::Tree(id) => serde_json::json!({ 824 "tree_id": id.hex(), 825 }), 826 TreeValue::GitSubmodule(id) => serde_json::json!({ 827 "submodule_id": id.hex(), 828 }), 829 TreeValue::Conflict(id) => serde_json::json!({ 830 "conflict_id": id.hex(), 831 }), 832 } 833} 834 835fn tree_value_from_json(json: &serde_json::Value) -> TreeValue { 836 if let Some(json_file) = json.get("file") { 837 TreeValue::File { 838 id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())), 839 executable: json_file.get("executable").unwrap().as_bool().unwrap(), 840 } 841 } else if let Some(json_id) = json.get("symlink_id") { 842 TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id))) 843 } else if let Some(json_id) = json.get("tree_id") { 844 TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id))) 845 } else if let Some(json_id) = json.get("submodule_id") { 846 TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id))) 847 } else if let Some(json_id) = json.get("conflict_id") { 848 TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id))) 849 } else { 850 panic!("unexpected json value in conflict: {json:#?}"); 851 } 852} 853 854fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> { 855 hex::decode(value.as_str().unwrap()).unwrap() 856} 857 858#[cfg(test)] 859mod tests { 860 use assert_matches::assert_matches; 861 862 use super::*; 863 use crate::backend::{FileId, MillisSinceEpoch}; 864 865 #[test] 866 fn read_plain_git_commit() { 867 let temp_dir = testutils::new_temp_dir(); 868 let store_path = temp_dir.path(); 869 let git_repo_path = temp_dir.path().join("git"); 870 let git_repo = git2::Repository::init(&git_repo_path).unwrap(); 871 872 // Add a commit with some files in 873 let blob1 = git_repo.blob(b"content1").unwrap(); 874 let blob2 = git_repo.blob(b"normal").unwrap(); 875 let mut dir_tree_builder = git_repo.treebuilder(None).unwrap(); 876 dir_tree_builder.insert("normal", blob1, 0o100644).unwrap(); 877 dir_tree_builder.insert("symlink", blob2, 0o120000).unwrap(); 878 let dir_tree_id = dir_tree_builder.write().unwrap(); 879 let mut root_tree_builder = git_repo.treebuilder(None).unwrap(); 880 root_tree_builder 881 .insert("dir", dir_tree_id, 0o040000) 882 .unwrap(); 883 let root_tree_id = root_tree_builder.write().unwrap(); 884 let git_author = git2::Signature::new( 885 "git author", 886 "git.author@example.com", 887 &git2::Time::new(1000, 60), 888 ) 889 .unwrap(); 890 let git_committer = git2::Signature::new( 891 "git committer", 892 "git.committer@example.com", 893 &git2::Time::new(2000, -480), 894 ) 895 .unwrap(); 896 let git_tree = git_repo.find_tree(root_tree_id).unwrap(); 897 let git_commit_id = git_repo 898 .commit( 899 None, 900 &git_author, 901 &git_committer, 902 "git commit message", 903 &git_tree, 904 &[], 905 ) 906 .unwrap(); 907 let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263"); 908 // The change id is the leading reverse bits of the commit id 909 let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25"); 910 // Check that the git commit above got the hash we expect 911 assert_eq!(git_commit_id.as_bytes(), commit_id.as_bytes()); 912 913 let store = GitBackend::init_external(store_path, &git_repo_path).unwrap(); 914 let commit = store.read_commit(&commit_id).unwrap(); 915 assert_eq!(&commit.change_id, &change_id); 916 assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]); 917 assert_eq!(commit.predecessors, vec![]); 918 assert_eq!( 919 commit.root_tree.as_resolved().unwrap().as_bytes(), 920 root_tree_id.as_bytes() 921 ); 922 assert!(!commit.uses_tree_conflict_format); 923 assert_eq!(commit.description, "git commit message"); 924 assert_eq!(commit.author.name, "git author"); 925 assert_eq!(commit.author.email, "git.author@example.com"); 926 assert_eq!( 927 commit.author.timestamp.timestamp, 928 MillisSinceEpoch(1000 * 1000) 929 ); 930 assert_eq!(commit.author.timestamp.tz_offset, 60); 931 assert_eq!(commit.committer.name, "git committer"); 932 assert_eq!(commit.committer.email, "git.committer@example.com"); 933 assert_eq!( 934 commit.committer.timestamp.timestamp, 935 MillisSinceEpoch(2000 * 1000) 936 ); 937 assert_eq!(commit.committer.timestamp.tz_offset, -480); 938 939 let root_tree = store 940 .read_tree( 941 &RepoPath::root(), 942 &TreeId::from_bytes(root_tree_id.as_bytes()), 943 ) 944 .unwrap(); 945 let mut root_entries = root_tree.entries(); 946 let dir = root_entries.next().unwrap(); 947 assert_eq!(root_entries.next(), None); 948 assert_eq!(dir.name().as_str(), "dir"); 949 assert_eq!( 950 dir.value(), 951 &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes())) 952 ); 953 954 let dir_tree = store 955 .read_tree( 956 &RepoPath::from_internal_string("dir"), 957 &TreeId::from_bytes(dir_tree_id.as_bytes()), 958 ) 959 .unwrap(); 960 let mut entries = dir_tree.entries(); 961 let file = entries.next().unwrap(); 962 let symlink = entries.next().unwrap(); 963 assert_eq!(entries.next(), None); 964 assert_eq!(file.name().as_str(), "normal"); 965 assert_eq!( 966 file.value(), 967 &TreeValue::File { 968 id: FileId::from_bytes(blob1.as_bytes()), 969 executable: false 970 } 971 ); 972 assert_eq!(symlink.name().as_str(), "symlink"); 973 assert_eq!( 974 symlink.value(), 975 &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes())) 976 ); 977 } 978 979 /// Test that parents get written correctly 980 #[test] 981 fn git_commit_parents() { 982 let temp_dir = testutils::new_temp_dir(); 983 let store_path = temp_dir.path(); 984 let git_repo_path = temp_dir.path().join("git"); 985 let git_repo = git2::Repository::init(&git_repo_path).unwrap(); 986 987 let backend = GitBackend::init_external(store_path, &git_repo_path).unwrap(); 988 let mut commit = Commit { 989 parents: vec![], 990 predecessors: vec![], 991 root_tree: conflicts::Conflict::resolved(backend.empty_tree_id().clone()), 992 uses_tree_conflict_format: false, 993 change_id: ChangeId::from_hex("abc123"), 994 description: "".to_string(), 995 author: create_signature(), 996 committer: create_signature(), 997 }; 998 999 // No parents 1000 commit.parents = vec![]; 1001 assert_matches!( 1002 backend.write_commit(commit.clone()), 1003 Err(BackendError::Other(err)) if err.to_string().contains("no parents") 1004 ); 1005 1006 // Only root commit as parent 1007 commit.parents = vec![backend.root_commit_id().clone()]; 1008 let first_id = backend.write_commit(commit.clone()).unwrap().0; 1009 let first_commit = backend.read_commit(&first_id).unwrap(); 1010 assert_eq!(first_commit, commit); 1011 let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap(); 1012 assert_eq!(first_git_commit.parent_ids().collect_vec(), vec![]); 1013 1014 // Only non-root commit as parent 1015 commit.parents = vec![first_id.clone()]; 1016 let second_id = backend.write_commit(commit.clone()).unwrap().0; 1017 let second_commit = backend.read_commit(&second_id).unwrap(); 1018 assert_eq!(second_commit, commit); 1019 let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap(); 1020 assert_eq!( 1021 second_git_commit.parent_ids().collect_vec(), 1022 vec![git_id(&first_id)] 1023 ); 1024 1025 // Merge commit 1026 commit.parents = vec![first_id.clone(), second_id.clone()]; 1027 let merge_id = backend.write_commit(commit.clone()).unwrap().0; 1028 let merge_commit = backend.read_commit(&merge_id).unwrap(); 1029 assert_eq!(merge_commit, commit); 1030 let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap(); 1031 assert_eq!( 1032 merge_git_commit.parent_ids().collect_vec(), 1033 vec![git_id(&first_id), git_id(&second_id)] 1034 ); 1035 1036 // Merge commit with root as one parent 1037 commit.parents = vec![first_id, backend.root_commit_id().clone()]; 1038 assert_matches!( 1039 backend.write_commit(commit), 1040 Err(BackendError::Other(err)) if err.to_string().contains("root commit") 1041 ); 1042 } 1043 1044 #[test] 1045 fn write_tree_conflicts() { 1046 let temp_dir = testutils::new_temp_dir(); 1047 let store_path = temp_dir.path(); 1048 let git_repo_path = temp_dir.path().join("git"); 1049 let git_repo = git2::Repository::init(&git_repo_path).unwrap(); 1050 1051 let backend = GitBackend::init_external(store_path, &git_repo_path).unwrap(); 1052 let crete_tree = |i| { 1053 let blob_id = git_repo.blob(b"content {i}").unwrap(); 1054 let mut tree_builder = git_repo.treebuilder(None).unwrap(); 1055 tree_builder 1056 .insert(format!("file{i}"), blob_id, 0o100644) 1057 .unwrap(); 1058 TreeId::from_bytes(tree_builder.write().unwrap().as_bytes()) 1059 }; 1060 1061 let root_tree = conflicts::Conflict::new( 1062 vec![crete_tree(0), crete_tree(1)], 1063 vec![crete_tree(2), crete_tree(3), crete_tree(4)], 1064 ); 1065 let mut commit = Commit { 1066 parents: vec![backend.root_commit_id().clone()], 1067 predecessors: vec![], 1068 root_tree: root_tree.clone(), 1069 uses_tree_conflict_format: true, 1070 change_id: ChangeId::from_hex("abc123"), 1071 description: "".to_string(), 1072 author: create_signature(), 1073 committer: create_signature(), 1074 }; 1075 1076 // When writing a tree-level conflict, the root tree on the git side has the 1077 // individual trees as subtrees. 1078 let read_commit_id = backend.write_commit(commit.clone()).unwrap().0; 1079 let read_commit = backend.read_commit(&read_commit_id).unwrap(); 1080 assert_eq!(read_commit, commit); 1081 let git_commit = git_repo 1082 .find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap()) 1083 .unwrap(); 1084 let git_tree = git_repo.find_tree(git_commit.tree_id()).unwrap(); 1085 assert!(git_tree.iter().all(|entry| entry.filemode() == 0o040000)); 1086 let mut iter = git_tree.iter(); 1087 let entry = iter.next().unwrap(); 1088 assert_eq!(entry.name(), Some(".jjconflict-base-0")); 1089 assert_eq!(entry.id().as_bytes(), root_tree.removes()[0].as_bytes()); 1090 let entry = iter.next().unwrap(); 1091 assert_eq!(entry.name(), Some(".jjconflict-base-1")); 1092 assert_eq!(entry.id().as_bytes(), root_tree.removes()[1].as_bytes()); 1093 let entry = iter.next().unwrap(); 1094 assert_eq!(entry.name(), Some(".jjconflict-side-0")); 1095 assert_eq!(entry.id().as_bytes(), root_tree.adds()[0].as_bytes()); 1096 let entry = iter.next().unwrap(); 1097 assert_eq!(entry.name(), Some(".jjconflict-side-1")); 1098 assert_eq!(entry.id().as_bytes(), root_tree.adds()[1].as_bytes()); 1099 let entry = iter.next().unwrap(); 1100 assert_eq!(entry.name(), Some(".jjconflict-side-2")); 1101 assert_eq!(entry.id().as_bytes(), root_tree.adds()[2].as_bytes()); 1102 assert!(iter.next().is_none()); 1103 1104 // When writing a single tree using the new format, it's represented by a 1105 // regular git tree. 1106 commit.root_tree = conflicts::Conflict::resolved(crete_tree(5)); 1107 let read_commit_id = backend.write_commit(commit.clone()).unwrap().0; 1108 let read_commit = backend.read_commit(&read_commit_id).unwrap(); 1109 assert_eq!(read_commit, commit); 1110 let git_commit = git_repo 1111 .find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap()) 1112 .unwrap(); 1113 assert_eq!( 1114 git_commit.tree_id().as_bytes(), 1115 commit.root_tree.adds()[0].as_bytes() 1116 ); 1117 } 1118 1119 #[test] 1120 fn commit_has_ref() { 1121 let temp_dir = testutils::new_temp_dir(); 1122 let store = GitBackend::init_internal(temp_dir.path()).unwrap(); 1123 let signature = Signature { 1124 name: "Someone".to_string(), 1125 email: "someone@example.com".to_string(), 1126 timestamp: Timestamp { 1127 timestamp: MillisSinceEpoch(0), 1128 tz_offset: 0, 1129 }, 1130 }; 1131 let commit = Commit { 1132 parents: vec![store.root_commit_id().clone()], 1133 predecessors: vec![], 1134 root_tree: conflicts::Conflict::resolved(store.empty_tree_id().clone()), 1135 uses_tree_conflict_format: false, 1136 change_id: ChangeId::new(vec![]), 1137 description: "initial".to_string(), 1138 author: signature.clone(), 1139 committer: signature, 1140 }; 1141 let commit_id = store.write_commit(commit).unwrap().0; 1142 let git_refs = store 1143 .git_repo() 1144 .references_glob("refs/jj/keep/*") 1145 .unwrap() 1146 .map(|git_ref| git_ref.unwrap().target().unwrap()) 1147 .collect_vec(); 1148 assert_eq!(git_refs, vec![git_id(&commit_id)]); 1149 } 1150 1151 #[test] 1152 fn overlapping_git_commit_id() { 1153 let temp_dir = testutils::new_temp_dir(); 1154 let store = GitBackend::init_internal(temp_dir.path()).unwrap(); 1155 let mut commit1 = Commit { 1156 parents: vec![store.root_commit_id().clone()], 1157 predecessors: vec![], 1158 root_tree: conflicts::Conflict::resolved(store.empty_tree_id().clone()), 1159 uses_tree_conflict_format: false, 1160 change_id: ChangeId::new(vec![]), 1161 description: "initial".to_string(), 1162 author: create_signature(), 1163 committer: create_signature(), 1164 }; 1165 // libgit2 doesn't seem to preserve negative timestamps, so set it to at least 1 1166 // second after the epoch, so the timestamp adjustment can remove 1 1167 // second and it will still be nonnegative 1168 commit1.committer.timestamp.timestamp = MillisSinceEpoch(1000); 1169 let (commit_id1, mut commit2) = store.write_commit(commit1).unwrap(); 1170 commit2.predecessors.push(commit_id1.clone()); 1171 // `write_commit` should prevent the ids from being the same by changing the 1172 // committer timestamp of the commit it actually writes. 1173 let (commit_id2, mut actual_commit2) = store.write_commit(commit2.clone()).unwrap(); 1174 // The returned matches the ID 1175 assert_eq!(store.read_commit(&commit_id2).unwrap(), actual_commit2); 1176 assert_ne!(commit_id2, commit_id1); 1177 // The committer timestamp should differ 1178 assert_ne!( 1179 actual_commit2.committer.timestamp.timestamp, 1180 commit2.committer.timestamp.timestamp 1181 ); 1182 // The rest of the commit should be the same 1183 actual_commit2.committer.timestamp.timestamp = 1184 commit2.committer.timestamp.timestamp.clone(); 1185 assert_eq!(actual_commit2, commit2); 1186 } 1187 1188 fn git_id(commit_id: &CommitId) -> Oid { 1189 Oid::from_bytes(commit_id.as_bytes()).unwrap() 1190 } 1191 1192 fn create_signature() -> Signature { 1193 Signature { 1194 name: "Someone".to_string(), 1195 email: "someone@example.com".to_string(), 1196 timestamp: Timestamp { 1197 timestamp: MillisSinceEpoch(0), 1198 tz_offset: 0, 1199 }, 1200 } 1201 } 1202}