A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go

better logic for relative urls

evan.jarrett.net 8e31137c 023efb05

verified
Changed files
+92 -9
pkg
appview
+50 -9
pkg/appview/readme/fetcher.go
··· 7 "io" 8 "net/http" 9 "net/url" 10 "strings" 11 "time" 12 ··· 192 return f.renderMarkdown(content, "") 193 } 194 195 // rewriteRelativeURLs converts relative URLs to absolute URLs 196 func rewriteRelativeURLs(html, baseURL string) string { 197 if baseURL == "" { ··· 203 return html 204 } 205 206 - // Simple string replacement for common patterns 207 - // This is a basic implementation - for production, consider using an HTML parser 208 html = strings.ReplaceAll(html, `src="./`, fmt.Sprintf(`src="%s`, baseURL)) 209 html = strings.ReplaceAll(html, `href="./`, fmt.Sprintf(`href="%s`, baseURL)) 210 html = strings.ReplaceAll(html, `src="../`, fmt.Sprintf(`src="%s../`, baseURL)) 211 html = strings.ReplaceAll(html, `href="../`, fmt.Sprintf(`href="%s../`, baseURL)) 212 213 - // Handle root-relative URLs (starting with /) 214 - if base.Scheme != "" && base.Host != "" { 215 - root := fmt.Sprintf("%s://%s/", base.Scheme, base.Host) 216 - // Replace src="/" and href="/" but not src="//" (absolute URLs) 217 - html = strings.ReplaceAll(html, `src="/`, fmt.Sprintf(`src="%s`, root)) 218 - html = strings.ReplaceAll(html, `href="/`, fmt.Sprintf(`href="%s`, root)) 219 - } 220 221 return html 222 }
··· 7 "io" 8 "net/http" 9 "net/url" 10 + "regexp" 11 "strings" 12 "time" 13 ··· 193 return f.renderMarkdown(content, "") 194 } 195 196 + // Regex patterns for matching relative URLs that need rewriting 197 + // These match src="..." or href="..." where the URL is relative (not absolute, not data:, not #anchor) 198 + var ( 199 + // Match src="filename" where filename doesn't start with http://, https://, //, /, #, data:, or mailto: 200 + relativeSrcPattern = regexp.MustCompile(`src="([^"/:][^"]*)"`) 201 + // Match href="filename" where filename doesn't start with http://, https://, //, /, #, data:, or mailto: 202 + relativeHrefPattern = regexp.MustCompile(`href="([^"/:][^"]*)"`) 203 + ) 204 + 205 // rewriteRelativeURLs converts relative URLs to absolute URLs 206 func rewriteRelativeURLs(html, baseURL string) string { 207 if baseURL == "" { ··· 213 return html 214 } 215 216 + // Handle root-relative URLs (starting with /) first 217 + // Must be done before bare relative URLs to avoid double-processing 218 + if base.Scheme != "" && base.Host != "" { 219 + root := fmt.Sprintf("%s://%s/", base.Scheme, base.Host) 220 + // Replace src="/" and href="/" but not src="//" (protocol-relative URLs) 221 + html = strings.ReplaceAll(html, `src="/`, fmt.Sprintf(`src="%s`, root)) 222 + html = strings.ReplaceAll(html, `href="/`, fmt.Sprintf(`href="%s`, root)) 223 + } 224 + 225 + // Handle explicit relative paths (./something and ../something) 226 html = strings.ReplaceAll(html, `src="./`, fmt.Sprintf(`src="%s`, baseURL)) 227 html = strings.ReplaceAll(html, `href="./`, fmt.Sprintf(`href="%s`, baseURL)) 228 html = strings.ReplaceAll(html, `src="../`, fmt.Sprintf(`src="%s../`, baseURL)) 229 html = strings.ReplaceAll(html, `href="../`, fmt.Sprintf(`href="%s../`, baseURL)) 230 231 + // Handle bare relative URLs (e.g., src="image.png" without ./ prefix) 232 + // Skip URLs that are already absolute (start with http://, https://, or //) 233 + // Skip anchors (#), data URLs (data:), and mailto links 234 + html = relativeSrcPattern.ReplaceAllStringFunc(html, func(match string) string { 235 + // Extract the URL from src="..." 236 + url := match[5 : len(match)-1] // Remove 'src="' and '"' 237 + 238 + // Skip if already processed or is a special URL type 239 + if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") || 240 + strings.HasPrefix(url, "//") || strings.HasPrefix(url, "#") || 241 + strings.HasPrefix(url, "data:") || strings.HasPrefix(url, "mailto:") { 242 + return match 243 + } 244 + 245 + return fmt.Sprintf(`src="%s%s"`, baseURL, url) 246 + }) 247 + 248 + html = relativeHrefPattern.ReplaceAllStringFunc(html, func(match string) string { 249 + // Extract the URL from href="..." 250 + url := match[6 : len(match)-1] // Remove 'href="' and '"' 251 + 252 + // Skip if already processed or is a special URL type 253 + if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") || 254 + strings.HasPrefix(url, "//") || strings.HasPrefix(url, "#") || 255 + strings.HasPrefix(url, "data:") || strings.HasPrefix(url, "mailto:") { 256 + return match 257 + } 258 + 259 + return fmt.Sprintf(`href="%s%s"`, baseURL, url) 260 + }) 261 262 return html 263 }
+42
pkg/appview/readme/fetcher_test.go
··· 145 baseURL: "https://example.com/docs/", 146 expected: `<img src="https://example.com//cdn.example.com/image.png">`, 147 }, 148 } 149 150 for _, tt := range tests {
··· 145 baseURL: "https://example.com/docs/", 146 expected: `<img src="https://example.com//cdn.example.com/image.png">`, 147 }, 148 + { 149 + name: "bare relative src (no ./ prefix)", 150 + html: `<img src="image.png">`, 151 + baseURL: "https://example.com/docs/", 152 + expected: `<img src="https://example.com/docs/image.png">`, 153 + }, 154 + { 155 + name: "bare relative href (no ./ prefix)", 156 + html: `<a href="page.html">link</a>`, 157 + baseURL: "https://example.com/docs/", 158 + expected: `<a href="https://example.com/docs/page.html">link</a>`, 159 + }, 160 + { 161 + name: "bare relative with path", 162 + html: `<img src="images/logo.png">`, 163 + baseURL: "https://example.com/docs/", 164 + expected: `<img src="https://example.com/docs/images/logo.png">`, 165 + }, 166 + { 167 + name: "anchor links unchanged", 168 + html: `<a href="#section">link</a>`, 169 + baseURL: "https://example.com/docs/", 170 + expected: `<a href="#section">link</a>`, 171 + }, 172 + { 173 + name: "data URLs unchanged", 174 + html: `<img src="data:image/png;base64,abc123">`, 175 + baseURL: "https://example.com/docs/", 176 + expected: `<img src="data:image/png;base64,abc123">`, 177 + }, 178 + { 179 + name: "mailto links unchanged", 180 + html: `<a href="mailto:test@example.com">email</a>`, 181 + baseURL: "https://example.com/docs/", 182 + expected: `<a href="mailto:test@example.com">email</a>`, 183 + }, 184 + { 185 + name: "mixed bare and prefixed relative URLs", 186 + html: `<img src="slices_and_lucy.png"><a href="./other.md">link</a>`, 187 + baseURL: "https://github.com/user/repo/blob/main/", 188 + expected: `<img src="https://github.com/user/repo/blob/main/slices_and_lucy.png"><a href="https://github.com/user/repo/blob/main/other.md">link</a>`, 189 + }, 190 } 191 192 for _, tt := range tests {