loading up the forgejo repo on tangled to test page performance
1// Copyright 2023 The Gitea Authors. All rights reserved.
2// SPDX-License-Identifier: MIT
3
4package debian
5
6import (
7 "archive/tar"
8 "bufio"
9 "compress/gzip"
10 "io"
11 "net/mail"
12 "regexp"
13 "strings"
14
15 "forgejo.org/modules/util"
16 "forgejo.org/modules/validation"
17 "forgejo.org/modules/zstd"
18
19 "github.com/blakesmith/ar"
20 "github.com/ulikunitz/xz"
21)
22
23const (
24 PropertyDistribution = "debian.distribution"
25 PropertyComponent = "debian.component"
26 PropertyArchitecture = "debian.architecture"
27 PropertyControl = "debian.control"
28 PropertyRepositoryIncludeInRelease = "debian.repository.include_in_release"
29
30 SettingKeyPrivate = "debian.key.private"
31 SettingKeyPublic = "debian.key.public"
32
33 RepositoryPackage = "_debian"
34 RepositoryVersion = "_repository"
35
36 controlTar = "control.tar"
37)
38
39var (
40 ErrMissingControlFile = util.NewInvalidArgumentErrorf("control file is missing")
41 ErrUnsupportedCompression = util.NewInvalidArgumentErrorf("unsupported compression algorithm")
42 ErrInvalidName = util.NewInvalidArgumentErrorf("package name is invalid")
43 ErrInvalidVersion = util.NewInvalidArgumentErrorf("package version is invalid")
44 ErrInvalidArchitecture = util.NewInvalidArgumentErrorf("package architecture is invalid")
45
46 // https://www.debian.org/doc/debian-policy/ch-controlfields.html#source
47 namePattern = regexp.MustCompile(`\A[a-z0-9][a-z0-9+-.]+\z`)
48 // https://www.debian.org/doc/debian-policy/ch-controlfields.html#version
49 versionPattern = regexp.MustCompile(`\A(?:[0-9]:)?[a-zA-Z0-9.+~]+(?:-[a-zA-Z0-9.+-~]+)?\z`)
50)
51
52type Package struct {
53 Name string
54 Version string
55 Architecture string
56 Control string
57 Metadata *Metadata
58}
59
60type Metadata struct {
61 Maintainer string `json:"maintainer,omitempty"`
62 ProjectURL string `json:"project_url,omitempty"`
63 Description string `json:"description,omitempty"`
64 Dependencies []string `json:"dependencies,omitempty"`
65}
66
67// ParsePackage parses the Debian package file
68// https://manpages.debian.org/bullseye/dpkg-dev/deb.5.en.html
69func ParsePackage(r io.Reader) (*Package, error) {
70 arr := ar.NewReader(r)
71
72 for {
73 hd, err := arr.Next()
74 if err == io.EOF {
75 break
76 }
77 if err != nil {
78 return nil, err
79 }
80
81 if strings.HasPrefix(hd.Name, controlTar) {
82 var inner io.Reader
83 // https://man7.org/linux/man-pages/man5/deb-split.5.html#FORMAT
84 // The file names might contain a trailing slash (since dpkg 1.15.6).
85 switch strings.TrimSuffix(hd.Name[len(controlTar):], "/") {
86 case "":
87 inner = arr
88 case ".gz":
89 gzr, err := gzip.NewReader(arr)
90 if err != nil {
91 return nil, err
92 }
93 defer gzr.Close()
94
95 inner = gzr
96 case ".xz":
97 xzr, err := xz.NewReader(arr)
98 if err != nil {
99 return nil, err
100 }
101
102 inner = xzr
103 case ".zst":
104 zr, err := zstd.NewReader(arr)
105 if err != nil {
106 return nil, err
107 }
108 defer zr.Close()
109
110 inner = zr
111 default:
112 return nil, ErrUnsupportedCompression
113 }
114
115 tr := tar.NewReader(inner)
116 for {
117 hd, err := tr.Next()
118 if err == io.EOF {
119 break
120 }
121 if err != nil {
122 return nil, err
123 }
124
125 if hd.Typeflag != tar.TypeReg {
126 continue
127 }
128
129 if hd.FileInfo().Name() == "control" {
130 return ParseControlFile(tr)
131 }
132 }
133 }
134 }
135
136 return nil, ErrMissingControlFile
137}
138
139// ParseControlFile parses a Debian control file to retrieve the metadata
140func ParseControlFile(r io.Reader) (*Package, error) {
141 p := &Package{
142 Metadata: &Metadata{},
143 }
144
145 key := ""
146 var depends strings.Builder
147 var control strings.Builder
148
149 s := bufio.NewScanner(io.TeeReader(r, &control))
150 for s.Scan() {
151 line := s.Text()
152
153 trimmed := strings.TrimSpace(line)
154 if trimmed == "" {
155 continue
156 }
157
158 if line[0] == ' ' || line[0] == '\t' {
159 switch key {
160 case "Description":
161 p.Metadata.Description += line
162 case "Depends":
163 depends.WriteString(trimmed)
164 }
165 } else {
166 parts := strings.SplitN(trimmed, ":", 2)
167 if len(parts) < 2 {
168 continue
169 }
170
171 key = parts[0]
172 value := strings.TrimSpace(parts[1])
173 switch key {
174 case "Package":
175 p.Name = value
176 case "Version":
177 p.Version = value
178 case "Architecture":
179 p.Architecture = value
180 case "Maintainer":
181 a, err := mail.ParseAddress(value)
182 if err != nil || a.Name == "" {
183 p.Metadata.Maintainer = value
184 } else {
185 p.Metadata.Maintainer = a.Name
186 }
187 case "Description":
188 p.Metadata.Description = value
189 case "Depends":
190 depends.WriteString(value)
191 case "Homepage":
192 if validation.IsValidURL(value) {
193 p.Metadata.ProjectURL = value
194 }
195 }
196 }
197 }
198 if err := s.Err(); err != nil {
199 return nil, err
200 }
201
202 if !namePattern.MatchString(p.Name) {
203 return nil, ErrInvalidName
204 }
205 if !versionPattern.MatchString(p.Version) {
206 return nil, ErrInvalidVersion
207 }
208 if p.Architecture == "" {
209 return nil, ErrInvalidArchitecture
210 }
211
212 dependencies := strings.Split(depends.String(), ",")
213 for i := range dependencies {
214 dependencies[i] = strings.TrimSpace(dependencies[i])
215 }
216 p.Metadata.Dependencies = dependencies
217
218 p.Control = strings.TrimSpace(control.String())
219
220 return p, nil
221}