From c5526a551e0e27e620405385abdbfbf72d015b4f Mon Sep 17 00:00:00 2001 From: Samuel Shuert Date: Thu, 9 Oct 2025 17:10:22 +0000 Subject: [PATCH] lexicons,appview/pulls: upload patches as blobs Change-Id: tyqoqrtkqlontnksmprwzstynxloosqu By using blobs we massively increase our maximum patch size. When stored directly on the record patches can be at most 2 MiB. By moving it to a blob, we get a minimum of 50 MiB (smaller if the pds has set it but 50 is the default). legacy field name `patch` in `repo.pull` lexicon is preserved (but not used) to support some level of backward compatibility Signed-off-by: Samuel Shuert Signed-off-by: Seongmin Lee --- api/tangled/cbor_gen.go | 99 ++++++++++++++++++++++++++++++++-------- api/tangled/repopull.go | 21 +++++---- appview/models/pull.go | 2 +- appview/pulls/pulls.go | 84 +++++++++++++++++++--------------- lexicons/pulls/pull.json | 10 +++- 5 files changed, 148 insertions(+), 68 deletions(-) diff --git a/api/tangled/cbor_gen.go b/api/tangled/cbor_gen.go index 93c51a29..5974f133 100644 --- a/api/tangled/cbor_gen.go +++ b/api/tangled/cbor_gen.go @@ -7934,7 +7934,7 @@ func (t *RepoPull) MarshalCBOR(w io.Writer) error { } cw := cbg.NewCborWriter(w) - fieldCount := 9 + fieldCount := 10 if t.Body == nil { fieldCount-- @@ -7944,6 +7944,10 @@ func (t *RepoPull) MarshalCBOR(w io.Writer) error { fieldCount-- } + if t.Patch == nil { + fieldCount-- + } + if t.References == nil { fieldCount-- } @@ -8008,26 +8012,35 @@ func (t *RepoPull) MarshalCBOR(w io.Writer) error { } // t.Patch (string) (string) - if len("patch") > 1000000 { - return xerrors.Errorf("Value in field \"patch\" was too long") - } + if t.Patch != nil { - if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("patch"))); err != nil { - return err - } - if _, err := cw.WriteString(string("patch")); err != nil { - return err - } + if len("patch") > 1000000 { + return xerrors.Errorf("Value in field \"patch\" was too long") + } - if len(t.Patch) > 1000000 { - return xerrors.Errorf("Value in field t.Patch was too long") - } + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("patch"))); err != nil { + return err + } + if _, err := cw.WriteString(string("patch")); err != nil { + return err + } - if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len(t.Patch))); err != nil { - return err - } - if _, err := cw.WriteString(string(t.Patch)); err != nil { - return err + if t.Patch == nil { + if _, err := cw.Write(cbg.CborNull); err != nil { + return err + } + } else { + if len(*t.Patch) > 1000000 { + return xerrors.Errorf("Value in field t.Patch was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len(*t.Patch))); err != nil { + return err + } + if _, err := cw.WriteString(string(*t.Patch)); err != nil { + return err + } + } } // t.Title (string) (string) @@ -8147,6 +8160,22 @@ func (t *RepoPull) MarshalCBOR(w io.Writer) error { return err } + // t.PatchBlob (util.LexBlob) (struct) + if len("patchBlob") > 1000000 { + return xerrors.Errorf("Value in field \"patchBlob\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("patchBlob"))); err != nil { + return err + } + if _, err := cw.WriteString(string("patchBlob")); err != nil { + return err + } + + if err := t.PatchBlob.MarshalCBOR(cw); err != nil { + return err + } + // t.References ([]string) (slice) if t.References != nil { @@ -8262,12 +8291,22 @@ func (t *RepoPull) UnmarshalCBOR(r io.Reader) (err error) { case "patch": { - sval, err := cbg.ReadStringWithMax(cr, 1000000) + b, err := cr.ReadByte() if err != nil { return err } + if b != cbg.CborNull[0] { + if err := cr.UnreadByte(); err != nil { + return err + } + + sval, err := cbg.ReadStringWithMax(cr, 1000000) + if err != nil { + return err + } - t.Patch = string(sval) + t.Patch = (*string)(&sval) + } } // t.Title (string) (string) case "title": @@ -8371,6 +8410,26 @@ func (t *RepoPull) UnmarshalCBOR(r io.Reader) (err error) { t.CreatedAt = string(sval) } + // t.PatchBlob (util.LexBlob) (struct) + case "patchBlob": + + { + + b, err := cr.ReadByte() + if err != nil { + return err + } + if b != cbg.CborNull[0] { + if err := cr.UnreadByte(); err != nil { + return err + } + t.PatchBlob = new(util.LexBlob) + if err := t.PatchBlob.UnmarshalCBOR(cr); err != nil { + return xerrors.Errorf("unmarshaling t.PatchBlob pointer: %w", err) + } + } + + } // t.References ([]string) (slice) case "references": diff --git a/api/tangled/repopull.go b/api/tangled/repopull.go index 3292367b..d923855b 100644 --- a/api/tangled/repopull.go +++ b/api/tangled/repopull.go @@ -17,15 +17,18 @@ func init() { } // // RECORDTYPE: RepoPull type RepoPull struct { - LexiconTypeID string `json:"$type,const=sh.tangled.repo.pull" cborgen:"$type,const=sh.tangled.repo.pull"` - Body *string `json:"body,omitempty" cborgen:"body,omitempty"` - CreatedAt string `json:"createdAt" cborgen:"createdAt"` - Mentions []string `json:"mentions,omitempty" cborgen:"mentions,omitempty"` - Patch string `json:"patch" cborgen:"patch"` - References []string `json:"references,omitempty" cborgen:"references,omitempty"` - Source *RepoPull_Source `json:"source,omitempty" cborgen:"source,omitempty"` - Target *RepoPull_Target `json:"target" cborgen:"target"` - Title string `json:"title" cborgen:"title"` + LexiconTypeID string `json:"$type,const=sh.tangled.repo.pull" cborgen:"$type,const=sh.tangled.repo.pull"` + Body *string `json:"body,omitempty" cborgen:"body,omitempty"` + CreatedAt string `json:"createdAt" cborgen:"createdAt"` + Mentions []string `json:"mentions,omitempty" cborgen:"mentions,omitempty"` + // patch: (deprecated) use patchBlob instead + Patch *string `json:"patch,omitempty" cborgen:"patch,omitempty"` + // patchBlob: patch content + PatchBlob *util.LexBlob `json:"patchBlob" cborgen:"patchBlob"` + References []string `json:"references,omitempty" cborgen:"references,omitempty"` + Source *RepoPull_Source `json:"source,omitempty" cborgen:"source,omitempty"` + Target *RepoPull_Target `json:"target" cborgen:"target"` + Title string `json:"title" cborgen:"title"` } // RepoPull_Source is a "source" in the sh.tangled.repo.pull schema. diff --git a/appview/models/pull.go b/appview/models/pull.go index af34e9ea..28397911 100644 --- a/appview/models/pull.go +++ b/appview/models/pull.go @@ -83,6 +83,7 @@ type Pull struct { Repo *Repo } +// NOTE: This method does not include patch blob in returned atproto record func (p Pull) AsRecord() tangled.RepoPull { var source *tangled.RepoPull_Source if p.PullSource != nil { @@ -113,7 +114,6 @@ func (p Pull) AsRecord() tangled.RepoPull { Repo: p.RepoAt.String(), Branch: p.TargetBranch, }, - Patch: p.LatestPatch(), Source: source, } return record diff --git a/appview/pulls/pulls.go b/appview/pulls/pulls.go index bce62607..25b3abd7 100644 --- a/appview/pulls/pulls.go +++ b/appview/pulls/pulls.go @@ -1241,6 +1241,13 @@ func (s *Pulls) createPullRequest( return } + blob, err := comatproto.RepoUploadBlob(r.Context(), client, strings.NewReader(patch)) + if err != nil { + log.Println("failed to upload patch", err) + s.pages.Notice(w, "pull", "Failed to create pull request. Try again later.") + return + } + _, err = comatproto.RepoPutRecord(r.Context(), client, &comatproto.RepoPutRecord_Input{ Collection: tangled.RepoPullNSID, Repo: user.Did, @@ -1252,7 +1259,7 @@ func (s *Pulls) createPullRequest( Repo: string(repo.RepoAt()), Branch: targetBranch, }, - Patch: patch, + PatchBlob: blob.Blob, Source: recordPullSource, CreatedAt: time.Now().Format(time.RFC3339), }, @@ -1328,8 +1335,16 @@ func (s *Pulls) createStackedPullRequest( // apply all record creations at once var writes []*comatproto.RepoApplyWrites_Input_Writes_Elem for _, p := range stack { + blob, err := comatproto.RepoUploadBlob(r.Context(), client, strings.NewReader(p.LatestPatch())) + if err != nil { + log.Println("failed to upload patch blob", err) + s.pages.Notice(w, "pull", "Failed to create pull request. Try again later.") + return + } + record := p.AsRecord() - write := comatproto.RepoApplyWrites_Input_Writes_Elem{ + record.PatchBlob = blob.Blob + writes = append(writes, &comatproto.RepoApplyWrites_Input_Writes_Elem{ RepoApplyWrites_Create: &comatproto.RepoApplyWrites_Create{ Collection: tangled.RepoPullNSID, Rkey: &p.Rkey, @@ -1337,8 +1352,7 @@ func (s *Pulls) createStackedPullRequest( Val: &record, }, }, - } - writes = append(writes, &write) + }) } _, err = comatproto.RepoApplyWrites(r.Context(), client, &comatproto.RepoApplyWrites_Input{ Repo: user.Did, @@ -1871,21 +1885,15 @@ func (s *Pulls) resubmitPullHelper( return } - var recordPullSource *tangled.RepoPull_Source - if pull.IsBranchBased() { - recordPullSource = &tangled.RepoPull_Source{ - Branch: pull.PullSource.Branch, - Sha: sourceRev, - } - } - if pull.IsForkBased() { - repoAt := pull.PullSource.RepoAt.String() - recordPullSource = &tangled.RepoPull_Source{ - Branch: pull.PullSource.Branch, - Repo: &repoAt, - Sha: sourceRev, - } + blob, err := comatproto.RepoUploadBlob(r.Context(), client, strings.NewReader(patch)) + if err != nil { + log.Println("failed to upload patch blob", err) + s.pages.Notice(w, "resubmit-error", "Failed to update pull request on the PDS. Try again later.") + return } + record := pull.AsRecord() + record.PatchBlob = blob.Blob + record.CreatedAt = time.Now().Format(time.RFC3339) _, err = comatproto.RepoPutRecord(r.Context(), client, &comatproto.RepoPutRecord_Input{ Collection: tangled.RepoPullNSID, @@ -1893,16 +1901,7 @@ func (s *Pulls) resubmitPullHelper( Rkey: pull.Rkey, SwapRecord: ex.Cid, Record: &lexutil.LexiconTypeDecoder{ - Val: &tangled.RepoPull{ - Title: pull.Title, - Target: &tangled.RepoPull_Target{ - Repo: string(repo.RepoAt()), - Branch: pull.TargetBranch, - }, - Patch: patch, // new patch - Source: recordPullSource, - CreatedAt: time.Now().Format(time.RFC3339), - }, + Val: &record, }, }) if err != nil { @@ -1988,6 +1987,13 @@ func (s *Pulls) resubmitStackedPullHelper( } defer tx.Rollback() + client, err := s.oauth.AuthorizedClient(r) + if err != nil { + log.Println("failed to authorize client") + s.pages.Notice(w, "resubmit-error", "Failed to create pull request. Try again later.") + return + } + // pds updates to make var writes []*comatproto.RepoApplyWrites_Input_Writes_Elem @@ -2021,7 +2027,14 @@ func (s *Pulls) resubmitStackedPullHelper( return } + blob, err := comatproto.RepoUploadBlob(r.Context(), client, strings.NewReader(patch)) + if err != nil { + log.Println("failed to upload patch blob", err) + s.pages.Notice(w, "resubmit-error", "Failed to update pull request on the PDS. Try again later.") + return + } record := p.AsRecord() + record.PatchBlob = blob.Blob writes = append(writes, &comatproto.RepoApplyWrites_Input_Writes_Elem{ RepoApplyWrites_Create: &comatproto.RepoApplyWrites_Create{ Collection: tangled.RepoPullNSID, @@ -2056,8 +2069,14 @@ func (s *Pulls) resubmitStackedPullHelper( return } + blob, err := comatproto.RepoUploadBlob(r.Context(), client, strings.NewReader(patch)) + if err != nil { + log.Println("failed to upload patch blob", err) + s.pages.Notice(w, "resubmit-error", "Failed to update pull request on the PDS. Try again later.") + return + } record := np.AsRecord() - + record.PatchBlob = blob.Blob writes = append(writes, &comatproto.RepoApplyWrites_Input_Writes_Elem{ RepoApplyWrites_Update: &comatproto.RepoApplyWrites_Update{ Collection: tangled.RepoPullNSID, @@ -2094,13 +2113,6 @@ func (s *Pulls) resubmitStackedPullHelper( return } - client, err := s.oauth.AuthorizedClient(r) - if err != nil { - log.Println("failed to authorize client") - s.pages.Notice(w, "resubmit-error", "Failed to create pull request. Try again later.") - return - } - _, err = comatproto.RepoApplyWrites(r.Context(), client, &comatproto.RepoApplyWrites_Input{ Repo: user.Did, Writes: writes, diff --git a/lexicons/pulls/pull.json b/lexicons/pulls/pull.json index e2527e47..8e5e7c9e 100644 --- a/lexicons/pulls/pull.json +++ b/lexicons/pulls/pull.json @@ -12,7 +12,7 @@ "required": [ "target", "title", - "patch", + "patchBlob", "createdAt" ], "properties": { @@ -27,7 +27,13 @@ "type": "string" }, "patch": { - "type": "string" + "type": "string", + "description": "(deprecated) use patchBlob instead" + }, + "patchBlob": { + "type": "blob", + "accept": "text/x-patch", + "description": "patch content" }, "source": { "type": "ref", -- 2.43.0