A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go

better logic for relative urls

evan.jarrett.net 8e31137c 023efb05

verified
Changed files
+92 -9
pkg
appview
+50 -9
pkg/appview/readme/fetcher.go
··· 7 7 "io" 8 8 "net/http" 9 9 "net/url" 10 + "regexp" 10 11 "strings" 11 12 "time" 12 13 ··· 192 193 return f.renderMarkdown(content, "") 193 194 } 194 195 196 + // Regex patterns for matching relative URLs that need rewriting 197 + // These match src="..." or href="..." where the URL is relative (not absolute, not data:, not #anchor) 198 + var ( 199 + // Match src="filename" where filename doesn't start with http://, https://, //, /, #, data:, or mailto: 200 + relativeSrcPattern = regexp.MustCompile(`src="([^"/:][^"]*)"`) 201 + // Match href="filename" where filename doesn't start with http://, https://, //, /, #, data:, or mailto: 202 + relativeHrefPattern = regexp.MustCompile(`href="([^"/:][^"]*)"`) 203 + ) 204 + 195 205 // rewriteRelativeURLs converts relative URLs to absolute URLs 196 206 func rewriteRelativeURLs(html, baseURL string) string { 197 207 if baseURL == "" { ··· 203 213 return html 204 214 } 205 215 206 - // Simple string replacement for common patterns 207 - // This is a basic implementation - for production, consider using an HTML parser 216 + // Handle root-relative URLs (starting with /) first 217 + // Must be done before bare relative URLs to avoid double-processing 218 + if base.Scheme != "" && base.Host != "" { 219 + root := fmt.Sprintf("%s://%s/", base.Scheme, base.Host) 220 + // Replace src="/" and href="/" but not src="//" (protocol-relative URLs) 221 + html = strings.ReplaceAll(html, `src="/`, fmt.Sprintf(`src="%s`, root)) 222 + html = strings.ReplaceAll(html, `href="/`, fmt.Sprintf(`href="%s`, root)) 223 + } 224 + 225 + // Handle explicit relative paths (./something and ../something) 208 226 html = strings.ReplaceAll(html, `src="./`, fmt.Sprintf(`src="%s`, baseURL)) 209 227 html = strings.ReplaceAll(html, `href="./`, fmt.Sprintf(`href="%s`, baseURL)) 210 228 html = strings.ReplaceAll(html, `src="../`, fmt.Sprintf(`src="%s../`, baseURL)) 211 229 html = strings.ReplaceAll(html, `href="../`, fmt.Sprintf(`href="%s../`, baseURL)) 212 230 213 - // Handle root-relative URLs (starting with /) 214 - if base.Scheme != "" && base.Host != "" { 215 - root := fmt.Sprintf("%s://%s/", base.Scheme, base.Host) 216 - // Replace src="/" and href="/" but not src="//" (absolute URLs) 217 - html = strings.ReplaceAll(html, `src="/`, fmt.Sprintf(`src="%s`, root)) 218 - html = strings.ReplaceAll(html, `href="/`, fmt.Sprintf(`href="%s`, root)) 219 - } 231 + // Handle bare relative URLs (e.g., src="image.png" without ./ prefix) 232 + // Skip URLs that are already absolute (start with http://, https://, or //) 233 + // Skip anchors (#), data URLs (data:), and mailto links 234 + html = relativeSrcPattern.ReplaceAllStringFunc(html, func(match string) string { 235 + // Extract the URL from src="..." 236 + url := match[5 : len(match)-1] // Remove 'src="' and '"' 237 + 238 + // Skip if already processed or is a special URL type 239 + if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") || 240 + strings.HasPrefix(url, "//") || strings.HasPrefix(url, "#") || 241 + strings.HasPrefix(url, "data:") || strings.HasPrefix(url, "mailto:") { 242 + return match 243 + } 244 + 245 + return fmt.Sprintf(`src="%s%s"`, baseURL, url) 246 + }) 247 + 248 + html = relativeHrefPattern.ReplaceAllStringFunc(html, func(match string) string { 249 + // Extract the URL from href="..." 250 + url := match[6 : len(match)-1] // Remove 'href="' and '"' 251 + 252 + // Skip if already processed or is a special URL type 253 + if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") || 254 + strings.HasPrefix(url, "//") || strings.HasPrefix(url, "#") || 255 + strings.HasPrefix(url, "data:") || strings.HasPrefix(url, "mailto:") { 256 + return match 257 + } 258 + 259 + return fmt.Sprintf(`href="%s%s"`, baseURL, url) 260 + }) 220 261 221 262 return html 222 263 }
+42
pkg/appview/readme/fetcher_test.go
··· 145 145 baseURL: "https://example.com/docs/", 146 146 expected: `<img src="https://example.com//cdn.example.com/image.png">`, 147 147 }, 148 + { 149 + name: "bare relative src (no ./ prefix)", 150 + html: `<img src="image.png">`, 151 + baseURL: "https://example.com/docs/", 152 + expected: `<img src="https://example.com/docs/image.png">`, 153 + }, 154 + { 155 + name: "bare relative href (no ./ prefix)", 156 + html: `<a href="page.html">link</a>`, 157 + baseURL: "https://example.com/docs/", 158 + expected: `<a href="https://example.com/docs/page.html">link</a>`, 159 + }, 160 + { 161 + name: "bare relative with path", 162 + html: `<img src="images/logo.png">`, 163 + baseURL: "https://example.com/docs/", 164 + expected: `<img src="https://example.com/docs/images/logo.png">`, 165 + }, 166 + { 167 + name: "anchor links unchanged", 168 + html: `<a href="#section">link</a>`, 169 + baseURL: "https://example.com/docs/", 170 + expected: `<a href="#section">link</a>`, 171 + }, 172 + { 173 + name: "data URLs unchanged", 174 + html: `<img src="">`, 175 + baseURL: "https://example.com/docs/", 176 + expected: `<img src="">`, 177 + }, 178 + { 179 + name: "mailto links unchanged", 180 + html: `<a href="mailto:test@example.com">email</a>`, 181 + baseURL: "https://example.com/docs/", 182 + expected: `<a href="mailto:test@example.com">email</a>`, 183 + }, 184 + { 185 + name: "mixed bare and prefixed relative URLs", 186 + html: `<img src="slices_and_lucy.png"><a href="./other.md">link</a>`, 187 + baseURL: "https://github.com/user/repo/blob/main/", 188 + expected: `<img src="https://github.com/user/repo/blob/main/slices_and_lucy.png"><a href="https://github.com/user/repo/blob/main/other.md">link</a>`, 189 + }, 148 190 } 149 191 150 192 for _, tt := range tests {