just playing with tangled
1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::any::Any;
18use std::fmt::{Debug, Error, Formatter};
19use std::io::{Cursor, Read};
20use std::ops::Deref;
21use std::path::Path;
22use std::sync::{Arc, Mutex, MutexGuard};
23use std::{fs, slice};
24
25use git2::Oid;
26use itertools::Itertools;
27use prost::Message;
28use thiserror::Error;
29
30use crate::backend::{
31 make_root_commit, Backend, BackendError, BackendInitError, BackendLoadError, BackendResult,
32 ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, FileId, MillisSinceEpoch,
33 ObjectId, Signature, SymlinkId, Timestamp, Tree, TreeId, TreeValue,
34};
35use crate::conflicts;
36use crate::file_util::{IoResultExt as _, PathError};
37use crate::lock::FileLock;
38use crate::repo_path::{RepoPath, RepoPathComponent};
39use crate::stacked_table::{
40 MutableTable, ReadonlyTable, TableSegment, TableStore, TableStoreError,
41};
42
43const HASH_LENGTH: usize = 20;
44const CHANGE_ID_LENGTH: usize = 16;
45/// Ref namespace used only for preventing GC.
46pub const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
47const CONFLICT_SUFFIX: &str = ".jjconflict";
48
49#[derive(Debug, Error)]
50pub enum GitBackendInitError {
51 #[error("Failed to initialize git repository: {0}")]
52 InitRepository(#[source] git2::Error),
53 #[error("Failed to open git repository: {0}")]
54 OpenRepository(#[source] git2::Error),
55 #[error(transparent)]
56 Path(#[from] PathError),
57}
58
59impl From<GitBackendInitError> for BackendInitError {
60 fn from(err: GitBackendInitError) -> Self {
61 BackendInitError(err.into())
62 }
63}
64
65#[derive(Debug, Error)]
66pub enum GitBackendLoadError {
67 #[error("Failed to open git repository: {0}")]
68 OpenRepository(#[source] git2::Error),
69 #[error(transparent)]
70 Path(#[from] PathError),
71}
72
73impl From<GitBackendLoadError> for BackendLoadError {
74 fn from(err: GitBackendLoadError) -> Self {
75 BackendLoadError(err.into())
76 }
77}
78
79/// `GitBackend`-specific error that may occur after the backend is loaded.
80#[derive(Debug, Error)]
81pub enum GitBackendError {
82 #[error("Failed to read non-git metadata: {0}")]
83 ReadMetadata(#[source] TableStoreError),
84 #[error("Failed to write non-git metadata: {0}")]
85 WriteMetadata(#[source] TableStoreError),
86}
87
88impl From<GitBackendError> for BackendError {
89 fn from(err: GitBackendError) -> Self {
90 BackendError::Other(err.into())
91 }
92}
93
94pub struct GitBackend {
95 repo: Mutex<git2::Repository>,
96 root_commit_id: CommitId,
97 root_change_id: ChangeId,
98 empty_tree_id: TreeId,
99 extra_metadata_store: TableStore,
100 cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
101}
102
103impl GitBackend {
104 fn new(repo: git2::Repository, extra_metadata_store: TableStore) -> Self {
105 let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
106 let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
107 let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
108 GitBackend {
109 repo: Mutex::new(repo),
110 root_commit_id,
111 root_change_id,
112 empty_tree_id,
113 extra_metadata_store,
114 cached_extra_metadata: Mutex::new(None),
115 }
116 }
117
118 pub fn init_internal(store_path: &Path) -> Result<Self, GitBackendInitError> {
119 let git_repo = git2::Repository::init_bare(store_path.join("git"))
120 .map_err(GitBackendInitError::InitRepository)?;
121 let extra_path = store_path.join("extra");
122 fs::create_dir(&extra_path).context(&extra_path)?;
123 let target_path = store_path.join("git_target");
124 fs::write(&target_path, b"git").context(&target_path)?;
125 let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
126 Ok(GitBackend::new(git_repo, extra_metadata_store))
127 }
128
129 pub fn init_external(
130 store_path: &Path,
131 git_repo_path: &Path,
132 ) -> Result<Self, GitBackendInitError> {
133 let extra_path = store_path.join("extra");
134 fs::create_dir(&extra_path).context(&extra_path)?;
135 let target_path = store_path.join("git_target");
136 fs::write(&target_path, git_repo_path.to_str().unwrap().as_bytes())
137 .context(&target_path)?;
138 let repo = git2::Repository::open(store_path.join(git_repo_path))
139 .map_err(GitBackendInitError::OpenRepository)?;
140 let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
141 Ok(GitBackend::new(repo, extra_metadata_store))
142 }
143
144 pub fn load(store_path: &Path) -> Result<Self, GitBackendLoadError> {
145 let git_repo_path = {
146 let target_path = store_path.join("git_target");
147 let git_repo_path_str = fs::read_to_string(&target_path).context(&target_path)?;
148 let git_repo_path = store_path.join(git_repo_path_str);
149 git_repo_path.canonicalize().context(&git_repo_path)?
150 };
151 let repo =
152 git2::Repository::open(git_repo_path).map_err(GitBackendLoadError::OpenRepository)?;
153 let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
154 Ok(GitBackend::new(repo, extra_metadata_store))
155 }
156
157 pub fn git_repo(&self) -> MutexGuard<'_, git2::Repository> {
158 self.repo.lock().unwrap()
159 }
160
161 pub fn git_repo_clone(&self) -> git2::Repository {
162 let path = self.repo.lock().unwrap().path().to_owned();
163 git2::Repository::open(path).unwrap()
164 }
165
166 fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
167 let mut locked_head = self.cached_extra_metadata.lock().unwrap();
168 match locked_head.as_ref() {
169 Some(head) => Ok(head.clone()),
170 None => {
171 let table = self
172 .extra_metadata_store
173 .get_head()
174 .map_err(GitBackendError::ReadMetadata)?;
175 *locked_head = Some(table.clone());
176 Ok(table)
177 }
178 }
179 }
180
181 fn read_extra_metadata_table_locked(&self) -> BackendResult<(Arc<ReadonlyTable>, FileLock)> {
182 let table = self
183 .extra_metadata_store
184 .get_head_locked()
185 .map_err(GitBackendError::ReadMetadata)?;
186 Ok(table)
187 }
188
189 fn save_extra_metadata_table(
190 &self,
191 mut_table: MutableTable,
192 _table_lock: &FileLock,
193 ) -> BackendResult<()> {
194 let table = self
195 .extra_metadata_store
196 .save_table(mut_table)
197 .map_err(GitBackendError::WriteMetadata)?;
198 // Since the parent table was the head, saved table are likely to be new head.
199 // If it's not, cache will be reloaded when entry can't be found.
200 *self.cached_extra_metadata.lock().unwrap() = Some(table);
201 Ok(())
202 }
203}
204
205fn commit_from_git_without_root_parent(commit: &git2::Commit) -> Commit {
206 // We reverse the bits of the commit id to create the change id. We don't want
207 // to use the first bytes unmodified because then it would be ambiguous
208 // if a given hash prefix refers to the commit id or the change id. It
209 // would have been enough to pick the last 16 bytes instead of the
210 // leading 16 bytes to address that. We also reverse the bits to make it less
211 // likely that users depend on any relationship between the two ids.
212 let change_id = ChangeId::new(
213 commit.id().as_bytes()[4..HASH_LENGTH]
214 .iter()
215 .rev()
216 .map(|b| b.reverse_bits())
217 .collect(),
218 );
219 let parents = commit
220 .parent_ids()
221 .map(|oid| CommitId::from_bytes(oid.as_bytes()))
222 .collect_vec();
223 let tree_id = TreeId::from_bytes(commit.tree_id().as_bytes());
224 // If this commit is a conflict, we'll update the root tree later, when we read
225 // the extra metadata.
226 let root_tree = conflicts::Conflict::resolved(tree_id);
227 let description = commit.message().unwrap_or("<no message>").to_owned();
228 let author = signature_from_git(commit.author());
229 let committer = signature_from_git(commit.committer());
230
231 Commit {
232 parents,
233 predecessors: vec![],
234 root_tree,
235 // If this commit has associated extra metadata, we may set this later.
236 uses_tree_conflict_format: false,
237 change_id,
238 description,
239 author,
240 committer,
241 }
242}
243
244fn signature_from_git(signature: git2::Signature) -> Signature {
245 let name = signature.name().unwrap_or("<no name>").to_owned();
246 let email = signature.email().unwrap_or("<no email>").to_owned();
247 let timestamp = MillisSinceEpoch(signature.when().seconds() * 1000);
248 let tz_offset = signature.when().offset_minutes();
249 Signature {
250 name,
251 email,
252 timestamp: Timestamp {
253 timestamp,
254 tz_offset,
255 },
256 }
257}
258
259fn signature_to_git(signature: &Signature) -> git2::Signature<'static> {
260 let name = &signature.name;
261 let email = &signature.email;
262 let time = git2::Time::new(
263 signature.timestamp.timestamp.0.div_euclid(1000),
264 signature.timestamp.tz_offset,
265 );
266 git2::Signature::new(name, email, &time).unwrap()
267}
268
269fn serialize_extras(commit: &Commit) -> Vec<u8> {
270 let mut proto = crate::protos::git_store::Commit {
271 change_id: commit.change_id.to_bytes(),
272 uses_tree_conflict_format: commit.uses_tree_conflict_format,
273 ..Default::default()
274 };
275 if commit.root_tree.as_resolved().is_none() {
276 assert!(commit.uses_tree_conflict_format);
277 let removes = commit
278 .root_tree
279 .removes()
280 .iter()
281 .map(|r| r.to_bytes())
282 .collect_vec();
283 let adds = commit
284 .root_tree
285 .adds()
286 .iter()
287 .map(|r| r.to_bytes())
288 .collect_vec();
289 let conflict = crate::protos::git_store::TreeConflict { removes, adds };
290 proto.root_tree = Some(crate::protos::git_store::commit::RootTree::Conflict(
291 conflict,
292 ));
293 }
294 for predecessor in &commit.predecessors {
295 proto.predecessors.push(predecessor.to_bytes());
296 }
297 proto.encode_to_vec()
298}
299
300fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
301 let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
302 commit.change_id = ChangeId::new(proto.change_id);
303 commit.uses_tree_conflict_format = proto.uses_tree_conflict_format;
304 match proto.root_tree {
305 Some(crate::protos::git_store::commit::RootTree::Conflict(proto_conflict)) => {
306 assert!(commit.uses_tree_conflict_format);
307 commit.root_tree = conflicts::Conflict::new(
308 proto_conflict
309 .removes
310 .iter()
311 .map(|id_bytes| TreeId::from_bytes(id_bytes))
312 .collect(),
313 proto_conflict
314 .adds
315 .iter()
316 .map(|id_bytes| TreeId::from_bytes(id_bytes))
317 .collect(),
318 );
319 }
320 Some(crate::protos::git_store::commit::RootTree::Resolved(_)) => {
321 panic!("found resolved root tree in extras (should only be written to git metadata)");
322 }
323 None => {}
324 }
325 for predecessor in &proto.predecessors {
326 commit.predecessors.push(CommitId::from_bytes(predecessor));
327 }
328}
329
330/// Creates a random ref in refs/jj/. Used for preventing GC of commits we
331/// create.
332fn create_no_gc_ref() -> String {
333 let random_bytes: [u8; 16] = rand::random();
334 format!("{NO_GC_REF_NAMESPACE}{}", hex::encode(random_bytes))
335}
336
337fn validate_git_object_id(id: &impl ObjectId) -> Result<git2::Oid, BackendError> {
338 if id.as_bytes().len() != HASH_LENGTH {
339 return Err(BackendError::InvalidHashLength {
340 expected: HASH_LENGTH,
341 actual: id.as_bytes().len(),
342 object_type: id.object_type(),
343 hash: id.hex(),
344 });
345 }
346 let oid = git2::Oid::from_bytes(id.as_bytes()).map_err(|err| BackendError::InvalidHash {
347 object_type: id.object_type(),
348 hash: id.hex(),
349 source: Box::new(err),
350 })?;
351 Ok(oid)
352}
353
354fn map_not_found_err(err: git2::Error, id: &impl ObjectId) -> BackendError {
355 if err.code() == git2::ErrorCode::NotFound {
356 BackendError::ObjectNotFound {
357 object_type: id.object_type(),
358 hash: id.hex(),
359 source: Box::new(err),
360 }
361 } else {
362 BackendError::ReadObject {
363 object_type: id.object_type(),
364 hash: id.hex(),
365 source: Box::new(err),
366 }
367 }
368}
369
370fn import_extra_metadata_entries_from_heads(
371 git_repo: &git2::Repository,
372 mut_table: &mut MutableTable,
373 _table_lock: &FileLock,
374 head_ids: &[CommitId],
375) -> BackendResult<()> {
376 let mut work_ids = head_ids
377 .iter()
378 .filter(|id| mut_table.get_value(id.as_bytes()).is_none())
379 .cloned()
380 .collect_vec();
381 while let Some(id) = work_ids.pop() {
382 let git_commit = git_repo
383 .find_commit(validate_git_object_id(&id)?)
384 .map_err(|err| map_not_found_err(err, &id))?;
385 let commit = commit_from_git_without_root_parent(&git_commit);
386 mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
387 work_ids.extend(
388 commit
389 .parents
390 .into_iter()
391 .filter(|id| mut_table.get_value(id.as_bytes()).is_none()),
392 );
393 }
394 Ok(())
395}
396
397impl Debug for GitBackend {
398 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
399 f.debug_struct("GitStore")
400 .field("path", &self.repo.lock().unwrap().path())
401 .finish()
402 }
403}
404
405impl Backend for GitBackend {
406 fn as_any(&self) -> &dyn Any {
407 self
408 }
409
410 fn name(&self) -> &str {
411 "git"
412 }
413
414 fn commit_id_length(&self) -> usize {
415 HASH_LENGTH
416 }
417
418 fn change_id_length(&self) -> usize {
419 CHANGE_ID_LENGTH
420 }
421
422 fn read_file(&self, _path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>> {
423 let git_blob_id = validate_git_object_id(id)?;
424 let locked_repo = self.repo.lock().unwrap();
425 let blob = locked_repo
426 .find_blob(git_blob_id)
427 .map_err(|err| map_not_found_err(err, id))?;
428 let content = blob.content().to_owned();
429 Ok(Box::new(Cursor::new(content)))
430 }
431
432 fn write_file(&self, _path: &RepoPath, contents: &mut dyn Read) -> BackendResult<FileId> {
433 let mut bytes = Vec::new();
434 contents.read_to_end(&mut bytes).unwrap();
435 let locked_repo = self.repo.lock().unwrap();
436 let oid = locked_repo
437 .blob(&bytes)
438 .map_err(|err| BackendError::WriteObject {
439 object_type: "file",
440 source: Box::new(err),
441 })?;
442 Ok(FileId::new(oid.as_bytes().to_vec()))
443 }
444
445 fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> Result<String, BackendError> {
446 let git_blob_id = validate_git_object_id(id)?;
447 let locked_repo = self.repo.lock().unwrap();
448 let blob = locked_repo
449 .find_blob(git_blob_id)
450 .map_err(|err| map_not_found_err(err, id))?;
451 let target = String::from_utf8(blob.content().to_owned()).map_err(|err| {
452 BackendError::InvalidUtf8 {
453 object_type: id.object_type(),
454 hash: id.hex(),
455 source: err,
456 }
457 })?;
458 Ok(target)
459 }
460
461 fn write_symlink(&self, _path: &RepoPath, target: &str) -> Result<SymlinkId, BackendError> {
462 let locked_repo = self.repo.lock().unwrap();
463 let oid = locked_repo
464 .blob(target.as_bytes())
465 .map_err(|err| BackendError::WriteObject {
466 object_type: "symlink",
467 source: Box::new(err),
468 })?;
469 Ok(SymlinkId::new(oid.as_bytes().to_vec()))
470 }
471
472 fn root_commit_id(&self) -> &CommitId {
473 &self.root_commit_id
474 }
475
476 fn root_change_id(&self) -> &ChangeId {
477 &self.root_change_id
478 }
479
480 fn empty_tree_id(&self) -> &TreeId {
481 &self.empty_tree_id
482 }
483
484 fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
485 if id == &self.empty_tree_id {
486 return Ok(Tree::default());
487 }
488 let git_tree_id = validate_git_object_id(id)?;
489
490 let locked_repo = self.repo.lock().unwrap();
491 let git_tree = locked_repo.find_tree(git_tree_id).unwrap();
492 let mut tree = Tree::default();
493 for entry in git_tree.iter() {
494 let name = entry.name().unwrap();
495 let (name, value) = match entry.kind().unwrap() {
496 git2::ObjectType::Tree => {
497 let id = TreeId::from_bytes(entry.id().as_bytes());
498 (entry.name().unwrap(), TreeValue::Tree(id))
499 }
500 git2::ObjectType::Blob => match entry.filemode() {
501 0o100644 => {
502 let id = FileId::from_bytes(entry.id().as_bytes());
503 if name.ends_with(CONFLICT_SUFFIX) {
504 (
505 &name[0..name.len() - CONFLICT_SUFFIX.len()],
506 TreeValue::Conflict(ConflictId::from_bytes(entry.id().as_bytes())),
507 )
508 } else {
509 (
510 name,
511 TreeValue::File {
512 id,
513 executable: false,
514 },
515 )
516 }
517 }
518 0o100755 => {
519 let id = FileId::from_bytes(entry.id().as_bytes());
520 (
521 name,
522 TreeValue::File {
523 id,
524 executable: true,
525 },
526 )
527 }
528 0o120000 => {
529 let id = SymlinkId::from_bytes(entry.id().as_bytes());
530 (name, TreeValue::Symlink(id))
531 }
532 mode => panic!("unexpected file mode {mode:?}"),
533 },
534 git2::ObjectType::Commit => {
535 let id = CommitId::from_bytes(entry.id().as_bytes());
536 (name, TreeValue::GitSubmodule(id))
537 }
538 kind => panic!("unexpected object type {kind:?}"),
539 };
540 tree.set(RepoPathComponent::from(name), value);
541 }
542 Ok(tree)
543 }
544
545 fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
546 let locked_repo = self.repo.lock().unwrap();
547 let mut builder = locked_repo.treebuilder(None).unwrap();
548 for entry in contents.entries() {
549 let name = entry.name().string();
550 let (name, id, filemode) = match entry.value() {
551 TreeValue::File {
552 id,
553 executable: false,
554 } => (name, id.as_bytes(), 0o100644),
555 TreeValue::File {
556 id,
557 executable: true,
558 } => (name, id.as_bytes(), 0o100755),
559 TreeValue::Symlink(id) => (name, id.as_bytes(), 0o120000),
560 TreeValue::Tree(id) => (name, id.as_bytes(), 0o040000),
561 TreeValue::GitSubmodule(id) => (name, id.as_bytes(), 0o160000),
562 TreeValue::Conflict(id) => (
563 entry.name().string() + CONFLICT_SUFFIX,
564 id.as_bytes(),
565 0o100644,
566 ),
567 };
568 builder
569 .insert(name, Oid::from_bytes(id).unwrap(), filemode)
570 .unwrap();
571 }
572 let oid = builder.write().map_err(|err| BackendError::WriteObject {
573 object_type: "tree",
574 source: Box::new(err),
575 })?;
576 Ok(TreeId::from_bytes(oid.as_bytes()))
577 }
578
579 fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
580 let mut file = self.read_file(
581 &RepoPath::from_internal_string("unused"),
582 &FileId::new(id.to_bytes()),
583 )?;
584 let mut data = String::new();
585 file.read_to_string(&mut data)
586 .map_err(|err| BackendError::ReadObject {
587 object_type: "conflict".to_owned(),
588 hash: id.hex(),
589 source: err.into(),
590 })?;
591 let json: serde_json::Value = serde_json::from_str(&data).unwrap();
592 Ok(Conflict {
593 removes: conflict_term_list_from_json(json.get("removes").unwrap()),
594 adds: conflict_term_list_from_json(json.get("adds").unwrap()),
595 })
596 }
597
598 fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> {
599 let json = serde_json::json!({
600 "removes": conflict_term_list_to_json(&conflict.removes),
601 "adds": conflict_term_list_to_json(&conflict.adds),
602 });
603 let json_string = json.to_string();
604 let bytes = json_string.as_bytes();
605 let locked_repo = self.repo.lock().unwrap();
606 let oid = locked_repo
607 .blob(bytes)
608 .map_err(|err| BackendError::WriteObject {
609 object_type: "conflict",
610 source: Box::new(err),
611 })?;
612 Ok(ConflictId::from_bytes(oid.as_bytes()))
613 }
614
615 #[tracing::instrument(skip(self))]
616 fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
617 if *id == self.root_commit_id {
618 return Ok(make_root_commit(
619 self.root_change_id().clone(),
620 self.empty_tree_id.clone(),
621 ));
622 }
623 let git_commit_id = validate_git_object_id(id)?;
624
625 let locked_repo = self.repo.lock().unwrap();
626 let commit = locked_repo
627 .find_commit(git_commit_id)
628 .map_err(|err| map_not_found_err(err, id))?;
629 let mut commit = commit_from_git_without_root_parent(&commit);
630 if commit.parents.is_empty() {
631 commit.parents.push(self.root_commit_id.clone());
632 };
633
634 let table = self.cached_extra_metadata_table()?;
635 if let Some(extras) = table.get_value(id.as_bytes()) {
636 deserialize_extras(&mut commit, extras);
637 } else {
638 let (table, table_lock) = self.read_extra_metadata_table_locked()?;
639 if let Some(extras) = table.get_value(id.as_bytes()) {
640 // Concurrent write_commit() would update extras before taking a lock.
641 deserialize_extras(&mut commit, extras);
642 *self.cached_extra_metadata.lock().unwrap() = Some(table);
643 } else {
644 // This commit is imported from Git. Make our change id persist (otherwise
645 // future write_commit() could reassign new change id.) It's likely that
646 // the commit is a branch head, so bulk-import metadata as much as possible.
647 tracing::debug!("import extra metadata entries");
648 let mut mut_table = table.start_mutation();
649 // TODO(#1624): Should we read the root tree here and check if it has a
650 // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
651 // change the description of a commit with tree-level conflicts.
652 mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
653 if commit.parents != slice::from_ref(&self.root_commit_id) {
654 import_extra_metadata_entries_from_heads(
655 &locked_repo,
656 &mut mut_table,
657 &table_lock,
658 &commit.parents,
659 )?;
660 }
661 self.save_extra_metadata_table(mut_table, &table_lock)?;
662 }
663 }
664
665 Ok(commit)
666 }
667
668 fn write_commit(&self, mut contents: Commit) -> BackendResult<(CommitId, Commit)> {
669 let locked_repo = self.repo.lock().unwrap();
670 let git_tree_id = if let Some(tree_id) = contents.root_tree.as_resolved() {
671 validate_git_object_id(tree_id)?
672 } else {
673 write_tree_conflict(locked_repo.deref(), &contents.root_tree)?
674 };
675 let git_tree = locked_repo
676 .find_tree(git_tree_id)
677 .map_err(|err| map_not_found_err(err, &TreeId::from_bytes(git_tree_id.as_bytes())))?;
678 let author = signature_to_git(&contents.author);
679 let mut committer = signature_to_git(&contents.committer);
680 let message = &contents.description;
681 if contents.parents.is_empty() {
682 return Err(BackendError::Other(
683 "Cannot write a commit with no parents".into(),
684 ));
685 }
686 let mut parents = vec![];
687 for parent_id in &contents.parents {
688 if *parent_id == self.root_commit_id {
689 // Git doesn't have a root commit, so if the parent is the root commit, we don't
690 // add it to the list of parents to write in the Git commit. We also check that
691 // there are no other parents since Git cannot represent a merge between a root
692 // commit and another commit.
693 if contents.parents.len() > 1 {
694 return Err(BackendError::Other(
695 "The Git backend does not support creating merge commits with the root \
696 commit as one of the parents."
697 .into(),
698 ));
699 }
700 } else {
701 let git_commit_id = validate_git_object_id(parent_id)?;
702 let parent_git_commit = locked_repo
703 .find_commit(git_commit_id)
704 .map_err(|err| map_not_found_err(err, parent_id))?;
705 parents.push(parent_git_commit);
706 }
707 }
708 let parent_refs = parents.iter().collect_vec();
709 let extras = serialize_extras(&contents);
710 // If two writers write commits of the same id with different metadata, they
711 // will both succeed and the metadata entries will be "merged" later. Since
712 // metadata entry is keyed by the commit id, one of the entries would be lost.
713 // To prevent such race condition locally, we extend the scope covered by the
714 // table lock. This is still racy if multiple machines are involved and the
715 // repository is rsync-ed.
716 let (table, table_lock) = self.read_extra_metadata_table_locked()?;
717 let id = loop {
718 let git_id = locked_repo
719 .commit(
720 Some(&create_no_gc_ref()),
721 &author,
722 &committer,
723 message,
724 &git_tree,
725 &parent_refs,
726 )
727 .map_err(|err| BackendError::WriteObject {
728 object_type: "commit",
729 source: Box::new(err),
730 })?;
731 let id = CommitId::from_bytes(git_id.as_bytes());
732 match table.get_value(id.as_bytes()) {
733 Some(existing_extras) if existing_extras != extras => {
734 // It's possible a commit already exists with the same commit id but different
735 // change id. Adjust the timestamp until this is no longer the case.
736 let new_when = git2::Time::new(
737 committer.when().seconds() - 1,
738 committer.when().offset_minutes(),
739 );
740 committer = git2::Signature::new(
741 committer.name().unwrap(),
742 committer.email().unwrap(),
743 &new_when,
744 )
745 .unwrap();
746 }
747 _ => {
748 break id;
749 }
750 }
751 };
752 // Update the signatures to match the ones that were actually written to the
753 // object store
754 contents.author.timestamp.timestamp = MillisSinceEpoch(author.when().seconds() * 1000);
755 contents.committer.timestamp.timestamp =
756 MillisSinceEpoch(committer.when().seconds() * 1000);
757 let mut mut_table = table.start_mutation();
758 mut_table.add_entry(id.to_bytes(), extras);
759 self.save_extra_metadata_table(mut_table, &table_lock)?;
760 Ok((id, contents))
761 }
762}
763
764/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
765/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
766fn write_tree_conflict(
767 repo: &git2::Repository,
768 conflict: &conflicts::Conflict<TreeId>,
769) -> Result<Oid, BackendError> {
770 let mut builder = repo.treebuilder(None).unwrap();
771 let mut add_tree_entry = |name, tree_id: &TreeId| {
772 let tree_oid = Oid::from_bytes(tree_id.as_bytes()).unwrap();
773 builder.insert(name, tree_oid, 0o040000).unwrap();
774 };
775 for (i, tree_id) in conflict.removes().iter().enumerate() {
776 add_tree_entry(format!(".jjconflict-base-{i}"), tree_id);
777 }
778 for (i, tree_id) in conflict.adds().iter().enumerate() {
779 add_tree_entry(format!(".jjconflict-side-{i}"), tree_id);
780 }
781 builder.write().map_err(|err| BackendError::WriteObject {
782 object_type: "tree",
783 source: Box::new(err),
784 })
785}
786
787fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value {
788 serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect())
789}
790
791fn conflict_term_list_from_json(json: &serde_json::Value) -> Vec<ConflictTerm> {
792 json.as_array()
793 .unwrap()
794 .iter()
795 .map(conflict_term_from_json)
796 .collect()
797}
798
799fn conflict_term_to_json(part: &ConflictTerm) -> serde_json::Value {
800 serde_json::json!({
801 "value": tree_value_to_json(&part.value),
802 })
803}
804
805fn conflict_term_from_json(json: &serde_json::Value) -> ConflictTerm {
806 let json_value = json.get("value").unwrap();
807 ConflictTerm {
808 value: tree_value_from_json(json_value),
809 }
810}
811
812fn tree_value_to_json(value: &TreeValue) -> serde_json::Value {
813 match value {
814 TreeValue::File { id, executable } => serde_json::json!({
815 "file": {
816 "id": id.hex(),
817 "executable": executable,
818 },
819 }),
820 TreeValue::Symlink(id) => serde_json::json!({
821 "symlink_id": id.hex(),
822 }),
823 TreeValue::Tree(id) => serde_json::json!({
824 "tree_id": id.hex(),
825 }),
826 TreeValue::GitSubmodule(id) => serde_json::json!({
827 "submodule_id": id.hex(),
828 }),
829 TreeValue::Conflict(id) => serde_json::json!({
830 "conflict_id": id.hex(),
831 }),
832 }
833}
834
835fn tree_value_from_json(json: &serde_json::Value) -> TreeValue {
836 if let Some(json_file) = json.get("file") {
837 TreeValue::File {
838 id: FileId::new(bytes_vec_from_json(json_file.get("id").unwrap())),
839 executable: json_file.get("executable").unwrap().as_bool().unwrap(),
840 }
841 } else if let Some(json_id) = json.get("symlink_id") {
842 TreeValue::Symlink(SymlinkId::new(bytes_vec_from_json(json_id)))
843 } else if let Some(json_id) = json.get("tree_id") {
844 TreeValue::Tree(TreeId::new(bytes_vec_from_json(json_id)))
845 } else if let Some(json_id) = json.get("submodule_id") {
846 TreeValue::GitSubmodule(CommitId::new(bytes_vec_from_json(json_id)))
847 } else if let Some(json_id) = json.get("conflict_id") {
848 TreeValue::Conflict(ConflictId::new(bytes_vec_from_json(json_id)))
849 } else {
850 panic!("unexpected json value in conflict: {json:#?}");
851 }
852}
853
854fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
855 hex::decode(value.as_str().unwrap()).unwrap()
856}
857
858#[cfg(test)]
859mod tests {
860 use assert_matches::assert_matches;
861
862 use super::*;
863 use crate::backend::{FileId, MillisSinceEpoch};
864
865 #[test]
866 fn read_plain_git_commit() {
867 let temp_dir = testutils::new_temp_dir();
868 let store_path = temp_dir.path();
869 let git_repo_path = temp_dir.path().join("git");
870 let git_repo = git2::Repository::init(&git_repo_path).unwrap();
871
872 // Add a commit with some files in
873 let blob1 = git_repo.blob(b"content1").unwrap();
874 let blob2 = git_repo.blob(b"normal").unwrap();
875 let mut dir_tree_builder = git_repo.treebuilder(None).unwrap();
876 dir_tree_builder.insert("normal", blob1, 0o100644).unwrap();
877 dir_tree_builder.insert("symlink", blob2, 0o120000).unwrap();
878 let dir_tree_id = dir_tree_builder.write().unwrap();
879 let mut root_tree_builder = git_repo.treebuilder(None).unwrap();
880 root_tree_builder
881 .insert("dir", dir_tree_id, 0o040000)
882 .unwrap();
883 let root_tree_id = root_tree_builder.write().unwrap();
884 let git_author = git2::Signature::new(
885 "git author",
886 "git.author@example.com",
887 &git2::Time::new(1000, 60),
888 )
889 .unwrap();
890 let git_committer = git2::Signature::new(
891 "git committer",
892 "git.committer@example.com",
893 &git2::Time::new(2000, -480),
894 )
895 .unwrap();
896 let git_tree = git_repo.find_tree(root_tree_id).unwrap();
897 let git_commit_id = git_repo
898 .commit(
899 None,
900 &git_author,
901 &git_committer,
902 "git commit message",
903 &git_tree,
904 &[],
905 )
906 .unwrap();
907 let commit_id = CommitId::from_hex("efdcea5ca4b3658149f899ca7feee6876d077263");
908 // The change id is the leading reverse bits of the commit id
909 let change_id = ChangeId::from_hex("c64ee0b6e16777fe53991f9281a6cd25");
910 // Check that the git commit above got the hash we expect
911 assert_eq!(git_commit_id.as_bytes(), commit_id.as_bytes());
912
913 let store = GitBackend::init_external(store_path, &git_repo_path).unwrap();
914 let commit = store.read_commit(&commit_id).unwrap();
915 assert_eq!(&commit.change_id, &change_id);
916 assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
917 assert_eq!(commit.predecessors, vec![]);
918 assert_eq!(
919 commit.root_tree.as_resolved().unwrap().as_bytes(),
920 root_tree_id.as_bytes()
921 );
922 assert!(!commit.uses_tree_conflict_format);
923 assert_eq!(commit.description, "git commit message");
924 assert_eq!(commit.author.name, "git author");
925 assert_eq!(commit.author.email, "git.author@example.com");
926 assert_eq!(
927 commit.author.timestamp.timestamp,
928 MillisSinceEpoch(1000 * 1000)
929 );
930 assert_eq!(commit.author.timestamp.tz_offset, 60);
931 assert_eq!(commit.committer.name, "git committer");
932 assert_eq!(commit.committer.email, "git.committer@example.com");
933 assert_eq!(
934 commit.committer.timestamp.timestamp,
935 MillisSinceEpoch(2000 * 1000)
936 );
937 assert_eq!(commit.committer.timestamp.tz_offset, -480);
938
939 let root_tree = store
940 .read_tree(
941 &RepoPath::root(),
942 &TreeId::from_bytes(root_tree_id.as_bytes()),
943 )
944 .unwrap();
945 let mut root_entries = root_tree.entries();
946 let dir = root_entries.next().unwrap();
947 assert_eq!(root_entries.next(), None);
948 assert_eq!(dir.name().as_str(), "dir");
949 assert_eq!(
950 dir.value(),
951 &TreeValue::Tree(TreeId::from_bytes(dir_tree_id.as_bytes()))
952 );
953
954 let dir_tree = store
955 .read_tree(
956 &RepoPath::from_internal_string("dir"),
957 &TreeId::from_bytes(dir_tree_id.as_bytes()),
958 )
959 .unwrap();
960 let mut entries = dir_tree.entries();
961 let file = entries.next().unwrap();
962 let symlink = entries.next().unwrap();
963 assert_eq!(entries.next(), None);
964 assert_eq!(file.name().as_str(), "normal");
965 assert_eq!(
966 file.value(),
967 &TreeValue::File {
968 id: FileId::from_bytes(blob1.as_bytes()),
969 executable: false
970 }
971 );
972 assert_eq!(symlink.name().as_str(), "symlink");
973 assert_eq!(
974 symlink.value(),
975 &TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
976 );
977 }
978
979 /// Test that parents get written correctly
980 #[test]
981 fn git_commit_parents() {
982 let temp_dir = testutils::new_temp_dir();
983 let store_path = temp_dir.path();
984 let git_repo_path = temp_dir.path().join("git");
985 let git_repo = git2::Repository::init(&git_repo_path).unwrap();
986
987 let backend = GitBackend::init_external(store_path, &git_repo_path).unwrap();
988 let mut commit = Commit {
989 parents: vec![],
990 predecessors: vec![],
991 root_tree: conflicts::Conflict::resolved(backend.empty_tree_id().clone()),
992 uses_tree_conflict_format: false,
993 change_id: ChangeId::from_hex("abc123"),
994 description: "".to_string(),
995 author: create_signature(),
996 committer: create_signature(),
997 };
998
999 // No parents
1000 commit.parents = vec![];
1001 assert_matches!(
1002 backend.write_commit(commit.clone()),
1003 Err(BackendError::Other(err)) if err.to_string().contains("no parents")
1004 );
1005
1006 // Only root commit as parent
1007 commit.parents = vec![backend.root_commit_id().clone()];
1008 let first_id = backend.write_commit(commit.clone()).unwrap().0;
1009 let first_commit = backend.read_commit(&first_id).unwrap();
1010 assert_eq!(first_commit, commit);
1011 let first_git_commit = git_repo.find_commit(git_id(&first_id)).unwrap();
1012 assert_eq!(first_git_commit.parent_ids().collect_vec(), vec![]);
1013
1014 // Only non-root commit as parent
1015 commit.parents = vec![first_id.clone()];
1016 let second_id = backend.write_commit(commit.clone()).unwrap().0;
1017 let second_commit = backend.read_commit(&second_id).unwrap();
1018 assert_eq!(second_commit, commit);
1019 let second_git_commit = git_repo.find_commit(git_id(&second_id)).unwrap();
1020 assert_eq!(
1021 second_git_commit.parent_ids().collect_vec(),
1022 vec![git_id(&first_id)]
1023 );
1024
1025 // Merge commit
1026 commit.parents = vec![first_id.clone(), second_id.clone()];
1027 let merge_id = backend.write_commit(commit.clone()).unwrap().0;
1028 let merge_commit = backend.read_commit(&merge_id).unwrap();
1029 assert_eq!(merge_commit, commit);
1030 let merge_git_commit = git_repo.find_commit(git_id(&merge_id)).unwrap();
1031 assert_eq!(
1032 merge_git_commit.parent_ids().collect_vec(),
1033 vec![git_id(&first_id), git_id(&second_id)]
1034 );
1035
1036 // Merge commit with root as one parent
1037 commit.parents = vec![first_id, backend.root_commit_id().clone()];
1038 assert_matches!(
1039 backend.write_commit(commit),
1040 Err(BackendError::Other(err)) if err.to_string().contains("root commit")
1041 );
1042 }
1043
1044 #[test]
1045 fn write_tree_conflicts() {
1046 let temp_dir = testutils::new_temp_dir();
1047 let store_path = temp_dir.path();
1048 let git_repo_path = temp_dir.path().join("git");
1049 let git_repo = git2::Repository::init(&git_repo_path).unwrap();
1050
1051 let backend = GitBackend::init_external(store_path, &git_repo_path).unwrap();
1052 let crete_tree = |i| {
1053 let blob_id = git_repo.blob(b"content {i}").unwrap();
1054 let mut tree_builder = git_repo.treebuilder(None).unwrap();
1055 tree_builder
1056 .insert(format!("file{i}"), blob_id, 0o100644)
1057 .unwrap();
1058 TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
1059 };
1060
1061 let root_tree = conflicts::Conflict::new(
1062 vec![crete_tree(0), crete_tree(1)],
1063 vec![crete_tree(2), crete_tree(3), crete_tree(4)],
1064 );
1065 let mut commit = Commit {
1066 parents: vec![backend.root_commit_id().clone()],
1067 predecessors: vec![],
1068 root_tree: root_tree.clone(),
1069 uses_tree_conflict_format: true,
1070 change_id: ChangeId::from_hex("abc123"),
1071 description: "".to_string(),
1072 author: create_signature(),
1073 committer: create_signature(),
1074 };
1075
1076 // When writing a tree-level conflict, the root tree on the git side has the
1077 // individual trees as subtrees.
1078 let read_commit_id = backend.write_commit(commit.clone()).unwrap().0;
1079 let read_commit = backend.read_commit(&read_commit_id).unwrap();
1080 assert_eq!(read_commit, commit);
1081 let git_commit = git_repo
1082 .find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap())
1083 .unwrap();
1084 let git_tree = git_repo.find_tree(git_commit.tree_id()).unwrap();
1085 assert!(git_tree.iter().all(|entry| entry.filemode() == 0o040000));
1086 let mut iter = git_tree.iter();
1087 let entry = iter.next().unwrap();
1088 assert_eq!(entry.name(), Some(".jjconflict-base-0"));
1089 assert_eq!(entry.id().as_bytes(), root_tree.removes()[0].as_bytes());
1090 let entry = iter.next().unwrap();
1091 assert_eq!(entry.name(), Some(".jjconflict-base-1"));
1092 assert_eq!(entry.id().as_bytes(), root_tree.removes()[1].as_bytes());
1093 let entry = iter.next().unwrap();
1094 assert_eq!(entry.name(), Some(".jjconflict-side-0"));
1095 assert_eq!(entry.id().as_bytes(), root_tree.adds()[0].as_bytes());
1096 let entry = iter.next().unwrap();
1097 assert_eq!(entry.name(), Some(".jjconflict-side-1"));
1098 assert_eq!(entry.id().as_bytes(), root_tree.adds()[1].as_bytes());
1099 let entry = iter.next().unwrap();
1100 assert_eq!(entry.name(), Some(".jjconflict-side-2"));
1101 assert_eq!(entry.id().as_bytes(), root_tree.adds()[2].as_bytes());
1102 assert!(iter.next().is_none());
1103
1104 // When writing a single tree using the new format, it's represented by a
1105 // regular git tree.
1106 commit.root_tree = conflicts::Conflict::resolved(crete_tree(5));
1107 let read_commit_id = backend.write_commit(commit.clone()).unwrap().0;
1108 let read_commit = backend.read_commit(&read_commit_id).unwrap();
1109 assert_eq!(read_commit, commit);
1110 let git_commit = git_repo
1111 .find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap())
1112 .unwrap();
1113 assert_eq!(
1114 git_commit.tree_id().as_bytes(),
1115 commit.root_tree.adds()[0].as_bytes()
1116 );
1117 }
1118
1119 #[test]
1120 fn commit_has_ref() {
1121 let temp_dir = testutils::new_temp_dir();
1122 let store = GitBackend::init_internal(temp_dir.path()).unwrap();
1123 let signature = Signature {
1124 name: "Someone".to_string(),
1125 email: "someone@example.com".to_string(),
1126 timestamp: Timestamp {
1127 timestamp: MillisSinceEpoch(0),
1128 tz_offset: 0,
1129 },
1130 };
1131 let commit = Commit {
1132 parents: vec![store.root_commit_id().clone()],
1133 predecessors: vec![],
1134 root_tree: conflicts::Conflict::resolved(store.empty_tree_id().clone()),
1135 uses_tree_conflict_format: false,
1136 change_id: ChangeId::new(vec![]),
1137 description: "initial".to_string(),
1138 author: signature.clone(),
1139 committer: signature,
1140 };
1141 let commit_id = store.write_commit(commit).unwrap().0;
1142 let git_refs = store
1143 .git_repo()
1144 .references_glob("refs/jj/keep/*")
1145 .unwrap()
1146 .map(|git_ref| git_ref.unwrap().target().unwrap())
1147 .collect_vec();
1148 assert_eq!(git_refs, vec![git_id(&commit_id)]);
1149 }
1150
1151 #[test]
1152 fn overlapping_git_commit_id() {
1153 let temp_dir = testutils::new_temp_dir();
1154 let store = GitBackend::init_internal(temp_dir.path()).unwrap();
1155 let mut commit1 = Commit {
1156 parents: vec![store.root_commit_id().clone()],
1157 predecessors: vec![],
1158 root_tree: conflicts::Conflict::resolved(store.empty_tree_id().clone()),
1159 uses_tree_conflict_format: false,
1160 change_id: ChangeId::new(vec![]),
1161 description: "initial".to_string(),
1162 author: create_signature(),
1163 committer: create_signature(),
1164 };
1165 // libgit2 doesn't seem to preserve negative timestamps, so set it to at least 1
1166 // second after the epoch, so the timestamp adjustment can remove 1
1167 // second and it will still be nonnegative
1168 commit1.committer.timestamp.timestamp = MillisSinceEpoch(1000);
1169 let (commit_id1, mut commit2) = store.write_commit(commit1).unwrap();
1170 commit2.predecessors.push(commit_id1.clone());
1171 // `write_commit` should prevent the ids from being the same by changing the
1172 // committer timestamp of the commit it actually writes.
1173 let (commit_id2, mut actual_commit2) = store.write_commit(commit2.clone()).unwrap();
1174 // The returned matches the ID
1175 assert_eq!(store.read_commit(&commit_id2).unwrap(), actual_commit2);
1176 assert_ne!(commit_id2, commit_id1);
1177 // The committer timestamp should differ
1178 assert_ne!(
1179 actual_commit2.committer.timestamp.timestamp,
1180 commit2.committer.timestamp.timestamp
1181 );
1182 // The rest of the commit should be the same
1183 actual_commit2.committer.timestamp.timestamp =
1184 commit2.committer.timestamp.timestamp.clone();
1185 assert_eq!(actual_commit2, commit2);
1186 }
1187
1188 fn git_id(commit_id: &CommitId) -> Oid {
1189 Oid::from_bytes(commit_id.as_bytes()).unwrap()
1190 }
1191
1192 fn create_signature() -> Signature {
1193 Signature {
1194 name: "Someone".to_string(),
1195 email: "someone@example.com".to_string(),
1196 timestamp: Timestamp {
1197 timestamp: MillisSinceEpoch(0),
1198 tz_offset: 0,
1199 },
1200 }
1201 }
1202}