1// Copyright 2022 The Gitea Authors. All rights reserved.
2// SPDX-License-Identifier: MIT
3
4package charset
5
6import (
7 "fmt"
8 "io"
9
10 "golang.org/x/net/html"
11)
12
13// HTMLStreamer represents a SAX-like interface for HTML
14type HTMLStreamer interface {
15 Error(err error) error
16 Doctype(data string) error
17 Comment(data string) error
18 StartTag(data string, attrs ...html.Attribute) error
19 SelfClosingTag(data string, attrs ...html.Attribute) error
20 EndTag(data string) error
21 Text(data string) error
22}
23
24// PassthroughHTMLStreamer is a passthrough streamer
25type PassthroughHTMLStreamer struct {
26 next HTMLStreamer
27}
28
29func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer {
30 return &PassthroughHTMLStreamer{next: next}
31}
32
33var _ (HTMLStreamer) = &PassthroughHTMLStreamer{}
34
35// Error tells the next streamer in line that there is an error
36func (p *PassthroughHTMLStreamer) Error(err error) error {
37 return p.next.Error(err)
38}
39
40// Doctype tells the next streamer what the doctype is
41func (p *PassthroughHTMLStreamer) Doctype(data string) error {
42 return p.next.Doctype(data)
43}
44
45// Comment tells the next streamer there is a comment
46func (p *PassthroughHTMLStreamer) Comment(data string) error {
47 return p.next.Comment(data)
48}
49
50// StartTag tells the next streamer there is a starting tag
51func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error {
52 return p.next.StartTag(data, attrs...)
53}
54
55// SelfClosingTag tells the next streamer there is a self-closing tag
56func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error {
57 return p.next.SelfClosingTag(data, attrs...)
58}
59
60// EndTag tells the next streamer there is a end tag
61func (p *PassthroughHTMLStreamer) EndTag(data string) error {
62 return p.next.EndTag(data)
63}
64
65// Text tells the next streamer there is a text
66func (p *PassthroughHTMLStreamer) Text(data string) error {
67 return p.next.Text(data)
68}
69
70// HTMLStreamWriter acts as a writing sink
71type HTMLStreamerWriter struct {
72 io.Writer
73 err error
74}
75
76// Write implements io.Writer
77func (h *HTMLStreamerWriter) Write(data []byte) (int, error) {
78 if h.err != nil {
79 return 0, h.err
80 }
81 return h.Writer.Write(data)
82}
83
84// Write implements io.StringWriter
85func (h *HTMLStreamerWriter) WriteString(data string) (int, error) {
86 if h.err != nil {
87 return 0, h.err
88 }
89 return h.Writer.Write([]byte(data))
90}
91
92// Error tells the next streamer in line that there is an error
93func (h *HTMLStreamerWriter) Error(err error) error {
94 if h.err == nil {
95 h.err = err
96 }
97 return h.err
98}
99
100// Doctype tells the next streamer what the doctype is
101func (h *HTMLStreamerWriter) Doctype(data string) error {
102 _, h.err = h.WriteString("<!DOCTYPE " + data + ">")
103 return h.err
104}
105
106// Comment tells the next streamer there is a comment
107func (h *HTMLStreamerWriter) Comment(data string) error {
108 _, h.err = h.WriteString("<!--" + data + "-->")
109 return h.err
110}
111
112// StartTag tells the next streamer there is a starting tag
113func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error {
114 return h.startTag(data, attrs, false)
115}
116
117// SelfClosingTag tells the next streamer there is a self-closing tag
118func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error {
119 return h.startTag(data, attrs, true)
120}
121
122func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error {
123 if _, h.err = h.WriteString("<" + data); h.err != nil {
124 return h.err
125 }
126 for _, attr := range attrs {
127 if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil {
128 return h.err
129 }
130 }
131 if selfclosing {
132 if _, h.err = h.WriteString("/>"); h.err != nil {
133 return h.err
134 }
135 } else {
136 if _, h.err = h.WriteString(">"); h.err != nil {
137 return h.err
138 }
139 }
140 return h.err
141}
142
143// EndTag tells the next streamer there is a end tag
144func (h *HTMLStreamerWriter) EndTag(data string) error {
145 _, h.err = h.WriteString("</" + data + ">")
146 return h.err
147}
148
149// Text tells the next streamer there is a text
150func (h *HTMLStreamerWriter) Text(data string) error {
151 _, h.err = h.WriteString(html.EscapeString(data))
152 return h.err
153}
154
155// StreamHTML streams an html to a provided streamer
156func StreamHTML(source io.Reader, streamer HTMLStreamer) error {
157 tokenizer := html.NewTokenizer(source)
158 for {
159 tt := tokenizer.Next()
160 switch tt {
161 case html.ErrorToken:
162 if tokenizer.Err() != io.EOF {
163 return tokenizer.Err()
164 }
165 return nil
166 case html.DoctypeToken:
167 token := tokenizer.Token()
168 if err := streamer.Doctype(token.Data); err != nil {
169 return err
170 }
171 case html.CommentToken:
172 token := tokenizer.Token()
173 if err := streamer.Comment(token.Data); err != nil {
174 return err
175 }
176 case html.StartTagToken:
177 token := tokenizer.Token()
178 if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
179 return err
180 }
181 case html.SelfClosingTagToken:
182 token := tokenizer.Token()
183 if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
184 return err
185 }
186 case html.EndTagToken:
187 token := tokenizer.Token()
188 if err := streamer.EndTag(token.Data); err != nil {
189 return err
190 }
191 case html.TextToken:
192 token := tokenizer.Token()
193 if err := streamer.Text(token.Data); err != nil {
194 return err
195 }
196 default:
197 return fmt.Errorf("unknown type of token: %d", tt)
198 }
199 }
200}