[READ-ONLY] a fast, modern browser for the npm registry
at main 406 lines 14 kB view raw
1import { withoutTrailingSlash } from 'ufo' 2 3export type ProviderId = 4 | 'github' 5 | 'gitlab' 6 | 'bitbucket' 7 | 'gitea' 8 | 'forgejo' 9 | 'codeberg' 10 | 'sourcehut' 11 | 'gitee' 12 | 'tangled' 13 | 'radicle' 14 15export interface RepoRef { 16 provider: ProviderId 17 owner: string 18 repo: string 19 host?: string 20} 21 22export interface RepositoryInfo extends RepoRef { 23 /** Raw file URL base (e.g., https://raw.githubusercontent.com/owner/repo/HEAD) */ 24 rawBaseUrl: string 25 /** Blob/rendered file URL base (e.g., https://github.com/owner/repo/blob/HEAD) */ 26 blobBaseUrl: string 27 /** Subdirectory within repo where package lives (e.g., packages/ai) */ 28 directory?: string 29} 30 31/** Known GitLab instances (self-hosted) */ 32export const GITLAB_HOSTS = [ 33 'gitlab.com', 34 'gitlab.gnome.org', 35 'gitlab.freedesktop.org', 36 'invent.kde.org', 37 'salsa.debian.org', 38 'framagit.org', 39] 40 41interface ProviderConfig { 42 id: ProviderId 43 /** Check if hostname matches this provider */ 44 matchHost(host: string): boolean 45 /** Parse URL path into owner/repo, returns null if invalid */ 46 parsePath(parts: string[]): { owner: string; repo: string } | null 47 /** Get raw file URL base for resolving relative paths */ 48 getRawBaseUrl(ref: RepoRef, branch?: string): string 49 /** Get blob/rendered URL base for markdown files */ 50 getBlobBaseUrl(ref: RepoRef, branch?: string): string 51 /** Convert file URLs to blob URLs (for images) */ 52 fileToRaw?(url: string): string 53 /** Convert blob URLs to raw URLs (for images) */ 54 blobToRaw?(url: string): string 55} 56 57const providers: ProviderConfig[] = [ 58 { 59 id: 'github', 60 matchHost: host => host === 'github.com' || host === 'www.github.com', 61 parsePath: parts => { 62 if (parts.length < 2) return null 63 const owner = decodeURIComponent(parts[0] ?? '').trim() 64 const repo = decodeURIComponent(parts[1] ?? '') 65 .trim() 66 .replace(/\.git$/i, '') 67 if (!owner || !repo) return null 68 return { owner, repo } 69 }, 70 getRawBaseUrl: (ref, branch = 'HEAD') => 71 `https://raw.githubusercontent.com/${ref.owner}/${ref.repo}/${branch}`, 72 getBlobBaseUrl: (ref, branch = 'HEAD') => 73 `https://github.com/${ref.owner}/${ref.repo}/blob/${branch}`, 74 fileToRaw: url => url.replace('/tree/', '/raw/'), 75 blobToRaw: url => url.replace('/blob/', '/raw/'), 76 }, 77 { 78 id: 'gitlab', 79 matchHost: host => GITLAB_HOSTS.some(h => host === h || host === `www.${h}`), 80 parsePath: parts => { 81 if (parts.length < 2) return null 82 // GitLab supports nested groups 83 const repo = decodeURIComponent(parts[parts.length - 1] ?? '') 84 .trim() 85 .replace(/\.git$/i, '') 86 const owner = parts 87 .slice(0, -1) 88 .map(p => decodeURIComponent(p).trim()) 89 .join('/') 90 if (!owner || !repo) return null 91 return { owner, repo } 92 }, 93 getRawBaseUrl: (ref, branch = 'HEAD') => { 94 const host = ref.host ?? 'gitlab.com' 95 return `https://${host}/${ref.owner}/${ref.repo}/-/raw/${branch}` 96 }, 97 getBlobBaseUrl: (ref, branch = 'HEAD') => { 98 const host = ref.host ?? 'gitlab.com' 99 return `https://${host}/${ref.owner}/${ref.repo}/-/blob/${branch}` 100 }, 101 blobToRaw: url => url.replace('/-/blob/', '/-/raw/'), 102 }, 103 { 104 id: 'bitbucket', 105 matchHost: host => host === 'bitbucket.org' || host === 'www.bitbucket.org', 106 parsePath: parts => { 107 if (parts.length < 2) return null 108 const owner = decodeURIComponent(parts[0] ?? '').trim() 109 const repo = decodeURIComponent(parts[1] ?? '') 110 .trim() 111 .replace(/\.git$/i, '') 112 if (!owner || !repo) return null 113 return { owner, repo } 114 }, 115 getRawBaseUrl: (ref, branch = 'HEAD') => 116 `https://bitbucket.org/${ref.owner}/${ref.repo}/raw/${branch}`, 117 getBlobBaseUrl: (ref, branch = 'HEAD') => 118 `https://bitbucket.org/${ref.owner}/${ref.repo}/src/${branch}`, 119 blobToRaw: url => url.replace('/src/', '/raw/'), 120 }, 121 { 122 id: 'codeberg', 123 matchHost: host => host === 'codeberg.org' || host === 'www.codeberg.org', 124 parsePath: parts => { 125 if (parts.length < 2) return null 126 const owner = decodeURIComponent(parts[0] ?? '').trim() 127 const repo = decodeURIComponent(parts[1] ?? '') 128 .trim() 129 .replace(/\.git$/i, '') 130 if (!owner || !repo) return null 131 return { owner, repo } 132 }, 133 getRawBaseUrl: (ref, branch = 'HEAD') => 134 `https://codeberg.org/${ref.owner}/${ref.repo}/raw/branch/${branch === 'HEAD' ? 'main' : branch}`, 135 getBlobBaseUrl: (ref, branch = 'HEAD') => 136 `https://codeberg.org/${ref.owner}/${ref.repo}/src/branch/${branch === 'HEAD' ? 'main' : branch}`, 137 blobToRaw: url => url.replace('/src/', '/raw/'), 138 }, 139 { 140 id: 'gitee', 141 matchHost: host => host === 'gitee.com' || host === 'www.gitee.com', 142 parsePath: parts => { 143 if (parts.length < 2) return null 144 const owner = decodeURIComponent(parts[0] ?? '').trim() 145 const repo = decodeURIComponent(parts[1] ?? '') 146 .trim() 147 .replace(/\.git$/i, '') 148 if (!owner || !repo) return null 149 return { owner, repo } 150 }, 151 getRawBaseUrl: (ref, branch = 'master') => 152 `https://gitee.com/${ref.owner}/${ref.repo}/raw/${branch}`, 153 getBlobBaseUrl: (ref, branch = 'master') => 154 `https://gitee.com/${ref.owner}/${ref.repo}/blob/${branch}`, 155 blobToRaw: url => url.replace('/blob/', '/raw/'), 156 }, 157 { 158 id: 'sourcehut', 159 matchHost: host => host === 'sr.ht' || host === 'git.sr.ht', 160 parsePath: parts => { 161 if (parts.length < 2) return null 162 // Sourcehut uses ~username/repo format 163 const owner = decodeURIComponent(parts[0] ?? '').trim() 164 const repo = decodeURIComponent(parts[1] ?? '') 165 .trim() 166 .replace(/\.git$/i, '') 167 if (!owner || !repo) return null 168 return { owner, repo } 169 }, 170 getRawBaseUrl: (ref, branch = 'HEAD') => 171 `https://git.sr.ht/${ref.owner}/${ref.repo}/blob/${branch}`, 172 getBlobBaseUrl: (ref, branch = 'HEAD') => 173 `https://git.sr.ht/${ref.owner}/${ref.repo}/tree/${branch}/item`, 174 }, 175 { 176 id: 'tangled', 177 matchHost: host => 178 host === 'tangled.sh' || 179 host === 'www.tangled.sh' || 180 host === 'tangled.org' || 181 host === 'www.tangled.org', 182 parsePath: parts => { 183 if (parts.length < 2) return null 184 // Tangled uses owner/repo format (owner is a domain-like identifier, e.g., nonbinary.computer) 185 const owner = decodeURIComponent(parts[0] ?? '').trim() 186 const repo = decodeURIComponent(parts[1] ?? '') 187 .trim() 188 .replace(/\.git$/i, '') 189 if (!owner || !repo) return null 190 return { owner, repo } 191 }, 192 getRawBaseUrl: (ref, branch = 'main') => 193 `https://tangled.sh/${ref.owner}/${ref.repo}/raw/branch/${branch}`, 194 getBlobBaseUrl: (ref, branch = 'main') => 195 `https://tangled.sh/${ref.owner}/${ref.repo}/src/branch/${branch}`, 196 blobToRaw: url => url.replace('/blob/', '/raw/branch/'), 197 }, 198 { 199 id: 'radicle', 200 matchHost: host => 201 host === 'radicle.at' || host === 'app.radicle.at' || host === 'seed.radicle.at', 202 parsePath: parts => { 203 // Radicle URLs: app.radicle.at/nodes/seed.radicle.at/rad:z3nP4yT1PE3m1PxLEzr173sZtJVnT 204 // We extract the rad:... identifier as the "repo" with no owner 205 const path = parts.join('/') 206 const radMatch = path.match(/rad:[a-zA-Z0-9]+/) 207 if (!radMatch?.[0]) return null 208 // Use empty owner, store full rad: ID as repo 209 return { owner: '', repo: radMatch[0] } 210 }, 211 getRawBaseUrl: (ref, branch = 'HEAD') => 212 `https://seed.radicle.at/api/v1/projects/${ref.repo}/blob/${branch}`, 213 getBlobBaseUrl: (ref, branch = 'HEAD') => 214 `https://app.radicle.at/nodes/seed.radicle.at/${ref.repo}/tree/${branch}`, 215 }, 216 { 217 id: 'forgejo', 218 matchHost: host => { 219 // Match explicit Forgejo instances 220 const forgejoPatterns = [/^forgejo\./i, /\.forgejo\./i] 221 // Known Forgejo instances 222 const knownInstances = ['next.forgejo.org', 'try.next.forgejo.org'] 223 if (knownInstances.some(h => host === h)) return true 224 return forgejoPatterns.some(p => p.test(host)) 225 }, 226 parsePath: parts => { 227 if (parts.length < 2) return null 228 const owner = decodeURIComponent(parts[0] ?? '').trim() 229 const repo = decodeURIComponent(parts[1] ?? '') 230 .trim() 231 .replace(/\.git$/i, '') 232 if (!owner || !repo) return null 233 return { owner, repo } 234 }, 235 getRawBaseUrl: (ref, branch = 'HEAD') => { 236 const host = ref.host ?? 'codeberg.org' 237 return `https://${host}/${ref.owner}/${ref.repo}/raw/branch/${branch === 'HEAD' ? 'main' : branch}` 238 }, 239 getBlobBaseUrl: (ref, branch = 'HEAD') => { 240 const host = ref.host ?? 'codeberg.org' 241 return `https://${host}/${ref.owner}/${ref.repo}/src/branch/${branch === 'HEAD' ? 'main' : branch}` 242 }, 243 blobToRaw: url => url.replace('/src/', '/raw/'), 244 }, 245 { 246 id: 'gitea', 247 matchHost: host => { 248 // Match common Gitea hosting patterns (Forgejo has its own adapter) 249 const giteaPatterns = [/^git\./i, /^gitea\./i, /^code\./i, /^src\./i, /gitea\.io$/i] 250 // Skip known providers (including Forgejo patterns) 251 const skipHosts = [ 252 'github.com', 253 'gitlab.com', 254 'codeberg.org', 255 'bitbucket.org', 256 'gitee.com', 257 'sr.ht', 258 'git.sr.ht', 259 'tangled.sh', 260 'tangled.org', 261 'next.forgejo.org', 262 'try.next.forgejo.org', 263 ...GITLAB_HOSTS, 264 ] 265 if (skipHosts.some(h => host === h || host.endsWith(`.${h}`))) return false 266 // Skip Forgejo patterns 267 if (/^forgejo\./i.test(host) || /\.forgejo\./i.test(host)) return false 268 return giteaPatterns.some(p => p.test(host)) 269 }, 270 parsePath: parts => { 271 if (parts.length < 2) return null 272 const owner = decodeURIComponent(parts[0] ?? '').trim() 273 const repo = decodeURIComponent(parts[1] ?? '') 274 .trim() 275 .replace(/\.git$/i, '') 276 if (!owner || !repo) return null 277 return { owner, repo } 278 }, 279 getRawBaseUrl: (ref, branch = 'HEAD') => { 280 const host = ref.host ?? 'gitea.io' 281 return `https://${host}/${ref.owner}/${ref.repo}/raw/branch/${branch === 'HEAD' ? 'main' : branch}` 282 }, 283 getBlobBaseUrl: (ref, branch = 'HEAD') => { 284 const host = ref.host ?? 'gitea.io' 285 return `https://${host}/${ref.owner}/${ref.repo}/src/branch/${branch === 'HEAD' ? 'main' : branch}` 286 }, 287 blobToRaw: url => url.replace('/src/', '/raw/'), 288 }, 289] 290 291/** 292 * Normalize various git URL formats to a standard HTTPS URL. 293 * Handles: git+https://, git://, git@host:path, ssh://git@host/path 294 */ 295export function normalizeGitUrl(input: string): string | null { 296 const raw = input.trim() 297 if (!raw) return null 298 299 const normalized = raw.replace(/^git\+/, '') 300 301 // Handle ssh:// and git:// URLs by converting to https:// 302 if (/^(?:ssh|git):\/\//i.test(normalized)) { 303 try { 304 const url = new URL(normalized) 305 const path = url.pathname.replace(/^\/*/, '') 306 return `https://${url.hostname}/${path}` 307 } catch { 308 // Fall through to SCP handling 309 } 310 } 311 312 if (!/^https?:\/\//i.test(normalized)) { 313 // Handle SCP-style URLs: git@host:path 314 const scp = normalized.match(/^(?:git@)?([^:/]+):(.+)$/i) 315 if (scp?.[1] && scp?.[2]) { 316 const host = scp[1] 317 const path = scp[2].replace(/^\/*/, '') 318 return `https://${host}/${path}` 319 } 320 } 321 322 return normalized 323} 324 325export function parseRepoUrl(input: string): RepoRef | null { 326 const normalized = normalizeGitUrl(input) 327 if (!normalized) return null 328 329 try { 330 const url = new URL(normalized) 331 const host = url.hostname.toLowerCase() 332 const parts = url.pathname.split('/').filter(Boolean) 333 334 for (const provider of providers) { 335 if (!provider.matchHost(host)) continue 336 const parsed = provider.parsePath(parts) 337 if (parsed) { 338 const needsHost = ['gitlab', 'gitea', 'forgejo', 'radicle'].includes(provider.id) 339 return { 340 provider: provider.id, 341 owner: parsed.owner, 342 repo: parsed.repo, 343 host: needsHost ? host : undefined, 344 } 345 } 346 } 347 return null 348 } catch { 349 return null 350 } 351} 352 353/** 354 * Parse repository field from package.json into repository info. 355 * Supports both full objects and shorthand strings. 356 */ 357export function parseRepositoryInfo( 358 repository?: { type?: string; url?: string; directory?: string } | string, 359): RepositoryInfo | undefined { 360 if (!repository) return undefined 361 362 let url: string | undefined 363 let directory: string | undefined 364 365 if (typeof repository === 'string') { 366 url = repository 367 } else { 368 url = repository.url 369 directory = repository.directory 370 } 371 372 if (!url) return undefined 373 374 const ref = parseRepoUrl(url) 375 if (!ref) return undefined 376 377 const provider = providers.find(p => p.id === ref.provider) 378 if (!provider) return undefined 379 380 return { 381 ...ref, 382 rawBaseUrl: provider.getRawBaseUrl(ref), 383 blobBaseUrl: provider.getBlobBaseUrl(ref), 384 directory: directory ? withoutTrailingSlash(directory) : undefined, 385 } 386} 387 388export function getProviderConfig(providerId: ProviderId): ProviderConfig | undefined { 389 return providers.find(p => p.id === providerId) 390} 391 392export function convertBlobOrFileToRawUrl(url: string, providerId: ProviderId): string { 393 const provider = providers.find(p => p.id === providerId) 394 let rawUrl = url 395 if (provider?.fileToRaw) { 396 rawUrl = provider.fileToRaw(url) 397 } 398 if (provider?.blobToRaw) { 399 rawUrl = provider.blobToRaw(rawUrl) 400 } 401 return rawUrl 402} 403 404export function isKnownGitProvider(url: string): boolean { 405 return parseRepoUrl(url) !== null 406}