+50
-9
pkg/appview/readme/fetcher.go
+50
-9
pkg/appview/readme/fetcher.go
···
7
"io"
8
"net/http"
9
"net/url"
10
"strings"
11
"time"
12
···
192
return f.renderMarkdown(content, "")
193
}
194
195
// rewriteRelativeURLs converts relative URLs to absolute URLs
196
func rewriteRelativeURLs(html, baseURL string) string {
197
if baseURL == "" {
···
203
return html
204
}
205
206
-
// Simple string replacement for common patterns
207
-
// This is a basic implementation - for production, consider using an HTML parser
208
html = strings.ReplaceAll(html, `src="./`, fmt.Sprintf(`src="%s`, baseURL))
209
html = strings.ReplaceAll(html, `href="./`, fmt.Sprintf(`href="%s`, baseURL))
210
html = strings.ReplaceAll(html, `src="../`, fmt.Sprintf(`src="%s../`, baseURL))
211
html = strings.ReplaceAll(html, `href="../`, fmt.Sprintf(`href="%s../`, baseURL))
212
213
-
// Handle root-relative URLs (starting with /)
214
-
if base.Scheme != "" && base.Host != "" {
215
-
root := fmt.Sprintf("%s://%s/", base.Scheme, base.Host)
216
-
// Replace src="/" and href="/" but not src="//" (absolute URLs)
217
-
html = strings.ReplaceAll(html, `src="/`, fmt.Sprintf(`src="%s`, root))
218
-
html = strings.ReplaceAll(html, `href="/`, fmt.Sprintf(`href="%s`, root))
219
-
}
220
221
return html
222
}
···
7
"io"
8
"net/http"
9
"net/url"
10
+
"regexp"
11
"strings"
12
"time"
13
···
193
return f.renderMarkdown(content, "")
194
}
195
196
+
// Regex patterns for matching relative URLs that need rewriting
197
+
// These match src="..." or href="..." where the URL is relative (not absolute, not data:, not #anchor)
198
+
var (
199
+
// Match src="filename" where filename doesn't start with http://, https://, //, /, #, data:, or mailto:
200
+
relativeSrcPattern = regexp.MustCompile(`src="([^"/:][^"]*)"`)
201
+
// Match href="filename" where filename doesn't start with http://, https://, //, /, #, data:, or mailto:
202
+
relativeHrefPattern = regexp.MustCompile(`href="([^"/:][^"]*)"`)
203
+
)
204
+
205
// rewriteRelativeURLs converts relative URLs to absolute URLs
206
func rewriteRelativeURLs(html, baseURL string) string {
207
if baseURL == "" {
···
213
return html
214
}
215
216
+
// Handle root-relative URLs (starting with /) first
217
+
// Must be done before bare relative URLs to avoid double-processing
218
+
if base.Scheme != "" && base.Host != "" {
219
+
root := fmt.Sprintf("%s://%s/", base.Scheme, base.Host)
220
+
// Replace src="/" and href="/" but not src="//" (protocol-relative URLs)
221
+
html = strings.ReplaceAll(html, `src="/`, fmt.Sprintf(`src="%s`, root))
222
+
html = strings.ReplaceAll(html, `href="/`, fmt.Sprintf(`href="%s`, root))
223
+
}
224
+
225
+
// Handle explicit relative paths (./something and ../something)
226
html = strings.ReplaceAll(html, `src="./`, fmt.Sprintf(`src="%s`, baseURL))
227
html = strings.ReplaceAll(html, `href="./`, fmt.Sprintf(`href="%s`, baseURL))
228
html = strings.ReplaceAll(html, `src="../`, fmt.Sprintf(`src="%s../`, baseURL))
229
html = strings.ReplaceAll(html, `href="../`, fmt.Sprintf(`href="%s../`, baseURL))
230
231
+
// Handle bare relative URLs (e.g., src="image.png" without ./ prefix)
232
+
// Skip URLs that are already absolute (start with http://, https://, or //)
233
+
// Skip anchors (#), data URLs (data:), and mailto links
234
+
html = relativeSrcPattern.ReplaceAllStringFunc(html, func(match string) string {
235
+
// Extract the URL from src="..."
236
+
url := match[5 : len(match)-1] // Remove 'src="' and '"'
237
+
238
+
// Skip if already processed or is a special URL type
239
+
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") ||
240
+
strings.HasPrefix(url, "//") || strings.HasPrefix(url, "#") ||
241
+
strings.HasPrefix(url, "data:") || strings.HasPrefix(url, "mailto:") {
242
+
return match
243
+
}
244
+
245
+
return fmt.Sprintf(`src="%s%s"`, baseURL, url)
246
+
})
247
+
248
+
html = relativeHrefPattern.ReplaceAllStringFunc(html, func(match string) string {
249
+
// Extract the URL from href="..."
250
+
url := match[6 : len(match)-1] // Remove 'href="' and '"'
251
+
252
+
// Skip if already processed or is a special URL type
253
+
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") ||
254
+
strings.HasPrefix(url, "//") || strings.HasPrefix(url, "#") ||
255
+
strings.HasPrefix(url, "data:") || strings.HasPrefix(url, "mailto:") {
256
+
return match
257
+
}
258
+
259
+
return fmt.Sprintf(`href="%s%s"`, baseURL, url)
260
+
})
261
262
return html
263
}
+42
pkg/appview/readme/fetcher_test.go
+42
pkg/appview/readme/fetcher_test.go
···
145
baseURL: "https://example.com/docs/",
146
expected: `<img src="https://example.com//cdn.example.com/image.png">`,
147
},
148
+
{
149
+
name: "bare relative src (no ./ prefix)",
150
+
html: `<img src="image.png">`,
151
+
baseURL: "https://example.com/docs/",
152
+
expected: `<img src="https://example.com/docs/image.png">`,
153
+
},
154
+
{
155
+
name: "bare relative href (no ./ prefix)",
156
+
html: `<a href="page.html">link</a>`,
157
+
baseURL: "https://example.com/docs/",
158
+
expected: `<a href="https://example.com/docs/page.html">link</a>`,
159
+
},
160
+
{
161
+
name: "bare relative with path",
162
+
html: `<img src="images/logo.png">`,
163
+
baseURL: "https://example.com/docs/",
164
+
expected: `<img src="https://example.com/docs/images/logo.png">`,
165
+
},
166
+
{
167
+
name: "anchor links unchanged",
168
+
html: `<a href="#section">link</a>`,
169
+
baseURL: "https://example.com/docs/",
170
+
expected: `<a href="#section">link</a>`,
171
+
},
172
+
{
173
+
name: "data URLs unchanged",
174
+
html: `<img src="data:image/png;base64,abc123">`,
175
+
baseURL: "https://example.com/docs/",
176
+
expected: `<img src="data:image/png;base64,abc123">`,
177
+
},
178
+
{
179
+
name: "mailto links unchanged",
180
+
html: `<a href="mailto:test@example.com">email</a>`,
181
+
baseURL: "https://example.com/docs/",
182
+
expected: `<a href="mailto:test@example.com">email</a>`,
183
+
},
184
+
{
185
+
name: "mixed bare and prefixed relative URLs",
186
+
html: `<img src="slices_and_lucy.png"><a href="./other.md">link</a>`,
187
+
baseURL: "https://github.com/user/repo/blob/main/",
188
+
expected: `<img src="https://github.com/user/repo/blob/main/slices_and_lucy.png"><a href="https://github.com/user/repo/blob/main/other.md">link</a>`,
189
+
},
190
}
191
192
for _, tt := range tests {