forked from
npmx.dev/npmx.dev
[READ-ONLY]
a fast, modern browser for the npm registry
1import { withoutTrailingSlash } from 'ufo'
2
3export type ProviderId =
4 | 'github'
5 | 'gitlab'
6 | 'bitbucket'
7 | 'gitea'
8 | 'forgejo'
9 | 'codeberg'
10 | 'sourcehut'
11 | 'gitee'
12 | 'tangled'
13 | 'radicle'
14
15export interface RepoRef {
16 provider: ProviderId
17 owner: string
18 repo: string
19 host?: string
20}
21
22export interface RepositoryInfo extends RepoRef {
23 /** Raw file URL base (e.g., https://raw.githubusercontent.com/owner/repo/HEAD) */
24 rawBaseUrl: string
25 /** Blob/rendered file URL base (e.g., https://github.com/owner/repo/blob/HEAD) */
26 blobBaseUrl: string
27 /** Subdirectory within repo where package lives (e.g., packages/ai) */
28 directory?: string
29}
30
31/** Known GitLab instances (self-hosted) */
32export const GITLAB_HOSTS = [
33 'gitlab.com',
34 'gitlab.gnome.org',
35 'gitlab.freedesktop.org',
36 'invent.kde.org',
37 'salsa.debian.org',
38 'framagit.org',
39]
40
41interface ProviderConfig {
42 id: ProviderId
43 /** Check if hostname matches this provider */
44 matchHost(host: string): boolean
45 /** Parse URL path into owner/repo, returns null if invalid */
46 parsePath(parts: string[]): { owner: string; repo: string } | null
47 /** Get raw file URL base for resolving relative paths */
48 getRawBaseUrl(ref: RepoRef, branch?: string): string
49 /** Get blob/rendered URL base for markdown files */
50 getBlobBaseUrl(ref: RepoRef, branch?: string): string
51 /** Convert file URLs to blob URLs (for images) */
52 fileToRaw?(url: string): string
53 /** Convert blob URLs to raw URLs (for images) */
54 blobToRaw?(url: string): string
55}
56
57const providers: ProviderConfig[] = [
58 {
59 id: 'github',
60 matchHost: host => host === 'github.com' || host === 'www.github.com',
61 parsePath: parts => {
62 if (parts.length < 2) return null
63 const owner = decodeURIComponent(parts[0] ?? '').trim()
64 const repo = decodeURIComponent(parts[1] ?? '')
65 .trim()
66 .replace(/\.git$/i, '')
67 if (!owner || !repo) return null
68 return { owner, repo }
69 },
70 getRawBaseUrl: (ref, branch = 'HEAD') =>
71 `https://raw.githubusercontent.com/${ref.owner}/${ref.repo}/${branch}`,
72 getBlobBaseUrl: (ref, branch = 'HEAD') =>
73 `https://github.com/${ref.owner}/${ref.repo}/blob/${branch}`,
74 fileToRaw: url => url.replace('/tree/', '/raw/'),
75 blobToRaw: url => url.replace('/blob/', '/raw/'),
76 },
77 {
78 id: 'gitlab',
79 matchHost: host => GITLAB_HOSTS.some(h => host === h || host === `www.${h}`),
80 parsePath: parts => {
81 if (parts.length < 2) return null
82 // GitLab supports nested groups
83 const repo = decodeURIComponent(parts[parts.length - 1] ?? '')
84 .trim()
85 .replace(/\.git$/i, '')
86 const owner = parts
87 .slice(0, -1)
88 .map(p => decodeURIComponent(p).trim())
89 .join('/')
90 if (!owner || !repo) return null
91 return { owner, repo }
92 },
93 getRawBaseUrl: (ref, branch = 'HEAD') => {
94 const host = ref.host ?? 'gitlab.com'
95 return `https://${host}/${ref.owner}/${ref.repo}/-/raw/${branch}`
96 },
97 getBlobBaseUrl: (ref, branch = 'HEAD') => {
98 const host = ref.host ?? 'gitlab.com'
99 return `https://${host}/${ref.owner}/${ref.repo}/-/blob/${branch}`
100 },
101 blobToRaw: url => url.replace('/-/blob/', '/-/raw/'),
102 },
103 {
104 id: 'bitbucket',
105 matchHost: host => host === 'bitbucket.org' || host === 'www.bitbucket.org',
106 parsePath: parts => {
107 if (parts.length < 2) return null
108 const owner = decodeURIComponent(parts[0] ?? '').trim()
109 const repo = decodeURIComponent(parts[1] ?? '')
110 .trim()
111 .replace(/\.git$/i, '')
112 if (!owner || !repo) return null
113 return { owner, repo }
114 },
115 getRawBaseUrl: (ref, branch = 'HEAD') =>
116 `https://bitbucket.org/${ref.owner}/${ref.repo}/raw/${branch}`,
117 getBlobBaseUrl: (ref, branch = 'HEAD') =>
118 `https://bitbucket.org/${ref.owner}/${ref.repo}/src/${branch}`,
119 blobToRaw: url => url.replace('/src/', '/raw/'),
120 },
121 {
122 id: 'codeberg',
123 matchHost: host => host === 'codeberg.org' || host === 'www.codeberg.org',
124 parsePath: parts => {
125 if (parts.length < 2) return null
126 const owner = decodeURIComponent(parts[0] ?? '').trim()
127 const repo = decodeURIComponent(parts[1] ?? '')
128 .trim()
129 .replace(/\.git$/i, '')
130 if (!owner || !repo) return null
131 return { owner, repo }
132 },
133 getRawBaseUrl: (ref, branch = 'HEAD') =>
134 `https://codeberg.org/${ref.owner}/${ref.repo}/raw/branch/${branch === 'HEAD' ? 'main' : branch}`,
135 getBlobBaseUrl: (ref, branch = 'HEAD') =>
136 `https://codeberg.org/${ref.owner}/${ref.repo}/src/branch/${branch === 'HEAD' ? 'main' : branch}`,
137 blobToRaw: url => url.replace('/src/', '/raw/'),
138 },
139 {
140 id: 'gitee',
141 matchHost: host => host === 'gitee.com' || host === 'www.gitee.com',
142 parsePath: parts => {
143 if (parts.length < 2) return null
144 const owner = decodeURIComponent(parts[0] ?? '').trim()
145 const repo = decodeURIComponent(parts[1] ?? '')
146 .trim()
147 .replace(/\.git$/i, '')
148 if (!owner || !repo) return null
149 return { owner, repo }
150 },
151 getRawBaseUrl: (ref, branch = 'master') =>
152 `https://gitee.com/${ref.owner}/${ref.repo}/raw/${branch}`,
153 getBlobBaseUrl: (ref, branch = 'master') =>
154 `https://gitee.com/${ref.owner}/${ref.repo}/blob/${branch}`,
155 blobToRaw: url => url.replace('/blob/', '/raw/'),
156 },
157 {
158 id: 'sourcehut',
159 matchHost: host => host === 'sr.ht' || host === 'git.sr.ht',
160 parsePath: parts => {
161 if (parts.length < 2) return null
162 // Sourcehut uses ~username/repo format
163 const owner = decodeURIComponent(parts[0] ?? '').trim()
164 const repo = decodeURIComponent(parts[1] ?? '')
165 .trim()
166 .replace(/\.git$/i, '')
167 if (!owner || !repo) return null
168 return { owner, repo }
169 },
170 getRawBaseUrl: (ref, branch = 'HEAD') =>
171 `https://git.sr.ht/${ref.owner}/${ref.repo}/blob/${branch}`,
172 getBlobBaseUrl: (ref, branch = 'HEAD') =>
173 `https://git.sr.ht/${ref.owner}/${ref.repo}/tree/${branch}/item`,
174 },
175 {
176 id: 'tangled',
177 matchHost: host =>
178 host === 'tangled.sh' ||
179 host === 'www.tangled.sh' ||
180 host === 'tangled.org' ||
181 host === 'www.tangled.org',
182 parsePath: parts => {
183 if (parts.length < 2) return null
184 // Tangled uses owner/repo format (owner is a domain-like identifier, e.g., nonbinary.computer)
185 const owner = decodeURIComponent(parts[0] ?? '').trim()
186 const repo = decodeURIComponent(parts[1] ?? '')
187 .trim()
188 .replace(/\.git$/i, '')
189 if (!owner || !repo) return null
190 return { owner, repo }
191 },
192 getRawBaseUrl: (ref, branch = 'main') =>
193 `https://tangled.sh/${ref.owner}/${ref.repo}/raw/branch/${branch}`,
194 getBlobBaseUrl: (ref, branch = 'main') =>
195 `https://tangled.sh/${ref.owner}/${ref.repo}/src/branch/${branch}`,
196 blobToRaw: url => url.replace('/blob/', '/raw/branch/'),
197 },
198 {
199 id: 'radicle',
200 matchHost: host =>
201 host === 'radicle.at' || host === 'app.radicle.at' || host === 'seed.radicle.at',
202 parsePath: parts => {
203 // Radicle URLs: app.radicle.at/nodes/seed.radicle.at/rad:z3nP4yT1PE3m1PxLEzr173sZtJVnT
204 // We extract the rad:... identifier as the "repo" with no owner
205 const path = parts.join('/')
206 const radMatch = path.match(/rad:[a-zA-Z0-9]+/)
207 if (!radMatch?.[0]) return null
208 // Use empty owner, store full rad: ID as repo
209 return { owner: '', repo: radMatch[0] }
210 },
211 getRawBaseUrl: (ref, branch = 'HEAD') =>
212 `https://seed.radicle.at/api/v1/projects/${ref.repo}/blob/${branch}`,
213 getBlobBaseUrl: (ref, branch = 'HEAD') =>
214 `https://app.radicle.at/nodes/seed.radicle.at/${ref.repo}/tree/${branch}`,
215 },
216 {
217 id: 'forgejo',
218 matchHost: host => {
219 // Match explicit Forgejo instances
220 const forgejoPatterns = [/^forgejo\./i, /\.forgejo\./i]
221 // Known Forgejo instances
222 const knownInstances = ['next.forgejo.org', 'try.next.forgejo.org']
223 if (knownInstances.some(h => host === h)) return true
224 return forgejoPatterns.some(p => p.test(host))
225 },
226 parsePath: parts => {
227 if (parts.length < 2) return null
228 const owner = decodeURIComponent(parts[0] ?? '').trim()
229 const repo = decodeURIComponent(parts[1] ?? '')
230 .trim()
231 .replace(/\.git$/i, '')
232 if (!owner || !repo) return null
233 return { owner, repo }
234 },
235 getRawBaseUrl: (ref, branch = 'HEAD') => {
236 const host = ref.host ?? 'codeberg.org'
237 return `https://${host}/${ref.owner}/${ref.repo}/raw/branch/${branch === 'HEAD' ? 'main' : branch}`
238 },
239 getBlobBaseUrl: (ref, branch = 'HEAD') => {
240 const host = ref.host ?? 'codeberg.org'
241 return `https://${host}/${ref.owner}/${ref.repo}/src/branch/${branch === 'HEAD' ? 'main' : branch}`
242 },
243 blobToRaw: url => url.replace('/src/', '/raw/'),
244 },
245 {
246 id: 'gitea',
247 matchHost: host => {
248 // Match common Gitea hosting patterns (Forgejo has its own adapter)
249 const giteaPatterns = [/^git\./i, /^gitea\./i, /^code\./i, /^src\./i, /gitea\.io$/i]
250 // Skip known providers (including Forgejo patterns)
251 const skipHosts = [
252 'github.com',
253 'gitlab.com',
254 'codeberg.org',
255 'bitbucket.org',
256 'gitee.com',
257 'sr.ht',
258 'git.sr.ht',
259 'tangled.sh',
260 'tangled.org',
261 'next.forgejo.org',
262 'try.next.forgejo.org',
263 ...GITLAB_HOSTS,
264 ]
265 if (skipHosts.some(h => host === h || host.endsWith(`.${h}`))) return false
266 // Skip Forgejo patterns
267 if (/^forgejo\./i.test(host) || /\.forgejo\./i.test(host)) return false
268 return giteaPatterns.some(p => p.test(host))
269 },
270 parsePath: parts => {
271 if (parts.length < 2) return null
272 const owner = decodeURIComponent(parts[0] ?? '').trim()
273 const repo = decodeURIComponent(parts[1] ?? '')
274 .trim()
275 .replace(/\.git$/i, '')
276 if (!owner || !repo) return null
277 return { owner, repo }
278 },
279 getRawBaseUrl: (ref, branch = 'HEAD') => {
280 const host = ref.host ?? 'gitea.io'
281 return `https://${host}/${ref.owner}/${ref.repo}/raw/branch/${branch === 'HEAD' ? 'main' : branch}`
282 },
283 getBlobBaseUrl: (ref, branch = 'HEAD') => {
284 const host = ref.host ?? 'gitea.io'
285 return `https://${host}/${ref.owner}/${ref.repo}/src/branch/${branch === 'HEAD' ? 'main' : branch}`
286 },
287 blobToRaw: url => url.replace('/src/', '/raw/'),
288 },
289]
290
291/**
292 * Normalize various git URL formats to a standard HTTPS URL.
293 * Handles: git+https://, git://, git@host:path, ssh://git@host/path
294 */
295export function normalizeGitUrl(input: string): string | null {
296 const raw = input.trim()
297 if (!raw) return null
298
299 const normalized = raw.replace(/^git\+/, '')
300
301 // Handle ssh:// and git:// URLs by converting to https://
302 if (/^(?:ssh|git):\/\//i.test(normalized)) {
303 try {
304 const url = new URL(normalized)
305 const path = url.pathname.replace(/^\/*/, '')
306 return `https://${url.hostname}/${path}`
307 } catch {
308 // Fall through to SCP handling
309 }
310 }
311
312 if (!/^https?:\/\//i.test(normalized)) {
313 // Handle SCP-style URLs: git@host:path
314 const scp = normalized.match(/^(?:git@)?([^:/]+):(.+)$/i)
315 if (scp?.[1] && scp?.[2]) {
316 const host = scp[1]
317 const path = scp[2].replace(/^\/*/, '')
318 return `https://${host}/${path}`
319 }
320 }
321
322 return normalized
323}
324
325export function parseRepoUrl(input: string): RepoRef | null {
326 const normalized = normalizeGitUrl(input)
327 if (!normalized) return null
328
329 try {
330 const url = new URL(normalized)
331 const host = url.hostname.toLowerCase()
332 const parts = url.pathname.split('/').filter(Boolean)
333
334 for (const provider of providers) {
335 if (!provider.matchHost(host)) continue
336 const parsed = provider.parsePath(parts)
337 if (parsed) {
338 const needsHost = ['gitlab', 'gitea', 'forgejo', 'radicle'].includes(provider.id)
339 return {
340 provider: provider.id,
341 owner: parsed.owner,
342 repo: parsed.repo,
343 host: needsHost ? host : undefined,
344 }
345 }
346 }
347 return null
348 } catch {
349 return null
350 }
351}
352
353/**
354 * Parse repository field from package.json into repository info.
355 * Supports both full objects and shorthand strings.
356 */
357export function parseRepositoryInfo(
358 repository?: { type?: string; url?: string; directory?: string } | string,
359): RepositoryInfo | undefined {
360 if (!repository) return undefined
361
362 let url: string | undefined
363 let directory: string | undefined
364
365 if (typeof repository === 'string') {
366 url = repository
367 } else {
368 url = repository.url
369 directory = repository.directory
370 }
371
372 if (!url) return undefined
373
374 const ref = parseRepoUrl(url)
375 if (!ref) return undefined
376
377 const provider = providers.find(p => p.id === ref.provider)
378 if (!provider) return undefined
379
380 return {
381 ...ref,
382 rawBaseUrl: provider.getRawBaseUrl(ref),
383 blobBaseUrl: provider.getBlobBaseUrl(ref),
384 directory: directory ? withoutTrailingSlash(directory) : undefined,
385 }
386}
387
388export function getProviderConfig(providerId: ProviderId): ProviderConfig | undefined {
389 return providers.find(p => p.id === providerId)
390}
391
392export function convertBlobOrFileToRawUrl(url: string, providerId: ProviderId): string {
393 const provider = providers.find(p => p.id === providerId)
394 let rawUrl = url
395 if (provider?.fileToRaw) {
396 rawUrl = provider.fileToRaw(url)
397 }
398 if (provider?.blobToRaw) {
399 rawUrl = provider.blobToRaw(rawUrl)
400 }
401 return rawUrl
402}
403
404export function isKnownGitProvider(url: string): boolean {
405 return parseRepoUrl(url) !== null
406}