Serenity Operating System
1/*
2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <AK/FileSystemPath.h>
28#include <AK/StringBuilder.h>
29#include <AK/URL.h>
30
31namespace AK {
32
33static inline bool is_valid_protocol_character(char ch)
34{
35 return ch >= 'a' && ch <= 'z';
36}
37
38static inline bool is_valid_hostname_character(char ch)
39{
40 return ch && ch != '/' && ch != ':';
41}
42
43static inline bool is_digit(char ch)
44{
45 return ch >= '0' && ch <= '9';
46}
47
48bool URL::parse(const StringView& string)
49{
50 enum class State {
51 InProtocol,
52 InHostname,
53 InPort,
54 InPath,
55 };
56
57 Vector<char, 256> buffer;
58 State state { State::InProtocol };
59
60 size_t index = 0;
61
62 auto peek = [&] {
63 if (index >= string.length())
64 return '\0';
65 return string[index];
66 };
67
68 auto consume = [&] {
69 if (index >= string.length())
70 return '\0';
71 return string[index++];
72 };
73
74 while (index < string.length()) {
75 switch (state) {
76 case State::InProtocol:
77 if (is_valid_protocol_character(peek())) {
78 buffer.append(consume());
79 continue;
80 }
81 if (consume() != ':')
82 return false;
83 if (consume() != '/')
84 return false;
85 if (consume() != '/')
86 return false;
87 if (buffer.is_empty())
88 return false;
89 m_protocol = String::copy(buffer);
90 if (m_protocol == "http")
91 m_port = 80;
92 else if (m_protocol == "https")
93 m_port = 443;
94 buffer.clear();
95 if (m_protocol == "file")
96 state = State::InPath;
97 else
98 state = State::InHostname;
99 continue;
100 case State::InHostname:
101 if (is_valid_hostname_character(peek())) {
102 buffer.append(consume());
103 continue;
104 }
105 if (buffer.is_empty())
106 return false;
107 m_host = String::copy(buffer);
108 buffer.clear();
109 if (peek() == ':') {
110 consume();
111 state = State::InPort;
112 continue;
113 }
114 if (peek() == '/') {
115 state = State::InPath;
116 continue;
117 }
118 return false;
119 case State::InPort:
120 if (is_digit(peek())) {
121 buffer.append(consume());
122 continue;
123 }
124 if (buffer.is_empty())
125 return false;
126 {
127 bool ok;
128 m_port = String::copy(buffer).to_uint(ok);
129 buffer.clear();
130 if (!ok)
131 return false;
132 }
133 if (peek() == '/') {
134 state = State::InPath;
135 continue;
136 }
137 return false;
138 case State::InPath:
139 buffer.append(consume());
140 continue;
141 }
142 }
143 if (state == State::InHostname) {
144 // We're still in the hostname, so e.g "http://serenityos.org"
145 if (buffer.is_empty())
146 return false;
147 m_host = String::copy(buffer);
148 m_path = "/";
149 return true;
150 }
151 m_path = String::copy(buffer);
152 return true;
153}
154
155URL::URL(const StringView& string)
156{
157 m_valid = parse(string);
158}
159
160String URL::to_string() const
161{
162 StringBuilder builder;
163 builder.append(m_protocol);
164 builder.append("://");
165 if (protocol() != "file") {
166 builder.append(m_host);
167 if (!(protocol() == "http" && port() == 80) && !(protocol() == "https" && port() == 443)) {
168 builder.append(':');
169 builder.append(String::number(m_port));
170 }
171 }
172 builder.append(m_path);
173 if (!m_query.is_empty()) {
174 builder.append('?');
175 builder.append(m_query);
176 }
177 return builder.to_string();
178}
179
180URL URL::complete_url(const String& string) const
181{
182 URL url(string);
183 if (url.is_valid())
184 return url;
185
186 if (string.starts_with("/")) {
187 url = *this;
188 url.set_path(string);
189 return url;
190 }
191
192 StringBuilder builder;
193 FileSystemPath fspath(path());
194 builder.append('/');
195
196 bool document_url_ends_in_slash = path()[path().length() - 1] == '/';
197
198 for (size_t i = 0; i < fspath.parts().size(); ++i) {
199 if (i == fspath.parts().size() - 1 && !document_url_ends_in_slash)
200 break;
201 builder.append(fspath.parts()[i]);
202 builder.append('/');
203 }
204 builder.append(string);
205 auto built = builder.to_string();
206 fspath = FileSystemPath(built);
207
208 url = *this;
209 url.set_path(fspath.string());
210 return url;
211}
212
213}