Serenity Operating System
1/*
2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3 * Copyright (c) 2022, Thomas Keppler <serenity@tkeppler.de>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#include <AK/Base64.h>
9#include <AK/GenericLexer.h>
10#include <AK/LexicalPath.h>
11#include <AK/MaybeOwned.h>
12#include <AK/NumberFormat.h>
13#include <AK/String.h>
14#include <AK/URL.h>
15#include <LibCore/ArgsParser.h>
16#include <LibCore/DeprecatedFile.h>
17#include <LibCore/EventLoop.h>
18#include <LibCore/System.h>
19#include <LibHTTP/HttpResponse.h>
20#include <LibMain/Main.h>
21#include <LibProtocol/Request.h>
22#include <LibProtocol/RequestClient.h>
23#include <ctype.h>
24#include <stdio.h>
25
26// FIXME: Move this somewhere else when it's needed (e.g. in the Browser)
27class ContentDispositionParser {
28public:
29 ContentDispositionParser(StringView value)
30 {
31 GenericLexer lexer(value);
32
33 lexer.ignore_while(isspace);
34
35 if (lexer.consume_specific("inline")) {
36 m_kind = Kind::Inline;
37 if (!lexer.is_eof())
38 m_might_be_wrong = true;
39 return;
40 }
41
42 if (lexer.consume_specific("attachment")) {
43 m_kind = Kind::Attachment;
44 if (lexer.consume_specific(";")) {
45 lexer.ignore_while(isspace);
46 if (lexer.consume_specific("filename=")) {
47 // RFC 2183: "A short (length <= 78 characters)
48 // parameter value containing only non-`tspecials' characters SHOULD be
49 // represented as a single `token'."
50 // Some people seem to take this as generic advice of "if it doesn't have special characters,
51 // it's safe to specify as a single token"
52 // So let's just be as lenient as possible.
53 if (lexer.next_is('"'))
54 m_filename = lexer.consume_quoted_string();
55 else
56 m_filename = lexer.consume_until(is_any_of("()<>@,;:\\\"/[]?= "sv));
57 } else {
58 m_might_be_wrong = true;
59 }
60 }
61 return;
62 }
63
64 if (lexer.consume_specific("form-data")) {
65 m_kind = Kind::FormData;
66 while (lexer.consume_specific(";")) {
67 lexer.ignore_while(isspace);
68 if (lexer.consume_specific("name=")) {
69 m_name = lexer.consume_quoted_string();
70 } else if (lexer.consume_specific("filename=")) {
71 if (lexer.next_is('"'))
72 m_filename = lexer.consume_quoted_string();
73 else
74 m_filename = lexer.consume_until(is_any_of("()<>@,;:\\\"/[]?= "sv));
75 } else {
76 m_might_be_wrong = true;
77 }
78 }
79
80 return;
81 }
82
83 // FIXME: Support 'filename*'
84 m_might_be_wrong = true;
85 }
86
87 enum class Kind {
88 Inline,
89 Attachment,
90 FormData,
91 };
92
93 StringView filename() const { return m_filename; }
94 StringView name() const { return m_name; }
95 Kind kind() const { return m_kind; }
96 bool might_be_wrong() const { return m_might_be_wrong; }
97
98private:
99 StringView m_filename;
100 StringView m_name;
101 Kind m_kind { Kind::Inline };
102 bool m_might_be_wrong { false };
103};
104
105/// Wraps a stream to silently ignore writes when the condition isn't true.
106template<typename ConditionT>
107class ConditionalOutputStream final : public Stream {
108public:
109 ConditionalOutputStream(ConditionT&& condition, MaybeOwned<Stream> stream)
110 : m_stream(move(stream))
111 , m_condition(condition)
112 {
113 }
114
115 virtual ErrorOr<Bytes> read_some(Bytes) override
116 {
117 return Error::from_errno(EBADF);
118 }
119
120 virtual ErrorOr<size_t> write_some(ReadonlyBytes bytes) override
121 {
122 // Pretend that we wrote the whole buffer if the condition is untrue.
123 if (!m_condition())
124 return bytes.size();
125
126 return m_stream->write_some(bytes);
127 }
128
129 virtual bool is_eof() const override
130 {
131 return true;
132 }
133
134 virtual bool is_open() const override
135 {
136 return m_stream->is_open();
137 }
138
139 virtual void close() override
140 {
141 }
142
143private:
144 MaybeOwned<Stream> m_stream;
145 ConditionT m_condition;
146};
147
148ErrorOr<int> serenity_main(Main::Arguments arguments)
149{
150 StringView url_str;
151 bool save_at_provided_name = false;
152 bool should_follow_url = false;
153 bool verbose_output = false;
154 StringView data;
155 StringView proxy_spec;
156 DeprecatedString method = "GET";
157 StringView method_override;
158 HashMap<DeprecatedString, DeprecatedString, CaseInsensitiveStringTraits> request_headers;
159 String credentials;
160
161 Core::ArgsParser args_parser;
162 args_parser.set_general_help(
163 "Request a file from an arbitrary URL. This command uses RequestServer, "
164 "and thus supports at least http, https, and gemini.");
165 args_parser.add_option(save_at_provided_name, "Write to a file named as the remote file", nullptr, 'O');
166 args_parser.add_option(data, "(HTTP only) Send the provided data via an HTTP POST request", "data", 'd', "data");
167 args_parser.add_option(method_override, "(HTTP only) HTTP method to use for the request (eg, GET, POST, etc)", "method", 'm', "method");
168 args_parser.add_option(should_follow_url, "(HTTP only) Follow the Location header if a 3xx status is encountered", "follow", 'l');
169 args_parser.add_option(Core::ArgsParser::Option {
170 .argument_mode = Core::ArgsParser::OptionArgumentMode::Required,
171 .help_string = "Add a header entry to the request",
172 .long_name = "header",
173 .short_name = 'H',
174 .value_name = "key:value",
175 .accept_value = [&](StringView header) {
176 auto split = header.find(':');
177 if (!split.has_value())
178 return false;
179 request_headers.set(header.substring_view(0, split.value()), header.substring_view(split.value() + 1));
180 return true;
181 } });
182 args_parser.add_option(Core::ArgsParser::Option {
183 .argument_mode = Core::ArgsParser::OptionArgumentMode::Required,
184 .help_string = "(HTTP only) Provide basic authentication credentials",
185 .long_name = "auth",
186 .short_name = 'u',
187 .value_name = "username:password",
188 .accept_value = [&](StringView input) {
189 if (!input.contains(':'))
190 return false;
191
192 // NOTE: Input is explicitly not trimmed, but instead taken in raw;
193 // Space prepended usernames and appended passwords might be legal in the user's context.
194 auto maybe_credentials = String::from_utf8(input);
195 if (maybe_credentials.is_error())
196 return false;
197
198 credentials = maybe_credentials.release_value();
199 return true;
200 } });
201 args_parser.add_option(proxy_spec, "Specify a proxy server to use for this request (proto://ip:port)", "proxy", 'p', "proxy");
202 args_parser.add_option(verbose_output, "(HTTP only) Log request and response metadata", "verbose", 'v');
203 args_parser.add_positional_argument(url_str, "URL to download from", "url");
204 args_parser.parse(arguments);
205
206 // If writing to a file was requested, we'll open a new file descriptor with the same number later.
207 // Until then, we just clone the stdout file descriptor, because we shouldn't be reopening the actual stdout.
208 int const output_fd = TRY(Core::System::dup(STDOUT_FILENO));
209
210 if (!method_override.is_empty()) {
211 method = method_override;
212 } else if (!data.is_empty()) {
213 method = "POST";
214 // FIXME: Content-Type?
215 }
216
217 URL url(url_str);
218 if (!url.is_valid()) {
219 warnln("'{}' is not a valid URL", url_str);
220 return 1;
221 }
222
223 bool const is_http_url = url.scheme().is_one_of("http"sv, "https"sv);
224
225 Core::ProxyData proxy_data {};
226 if (!proxy_spec.is_empty())
227 proxy_data = TRY(Core::ProxyData::parse_url(proxy_spec));
228
229 Core::EventLoop loop;
230 bool received_actual_headers = false;
231 bool should_save_stream_data = false;
232 bool following_url = false;
233
234 u32 previous_downloaded_size = 0;
235 u32 const report_time_in_ms = 100;
236 u32 const speed_update_time_in_ms = 4000;
237
238 timeval previous_time, current_time, time_diff;
239 gettimeofday(&previous_time, nullptr);
240
241 RefPtr<Protocol::Request> request;
242 auto protocol_client = TRY(Protocol::RequestClient::try_create());
243 auto output_stream = ConditionalOutputStream { [&] { return should_save_stream_data; }, TRY(Core::File::adopt_fd(output_fd, Core::File::OpenMode::Write)) };
244
245 // https://httpwg.org/specs/rfc9110.html#authentication
246 auto const has_credentials = !credentials.is_empty();
247 auto const has_manual_authorization_header = request_headers.contains("Authorization");
248 if (is_http_url && has_credentials && !has_manual_authorization_header) {
249 // 11.2. Authentication Parameters
250 // The authentication scheme is followed by additional information necessary for achieving authentication via
251 // that scheme as (...) or a single sequence of characters capable of holding base64-encoded information.
252 auto const encoded_credentials = TRY(encode_base64(credentials.bytes()));
253 auto const authorization = TRY(String::formatted("Basic {}", encoded_credentials));
254 request_headers.set("Authorization", authorization.to_deprecated_string());
255 } else {
256 if (is_http_url && has_credentials && has_manual_authorization_header)
257 warnln("* Skipping encoding provided authorization, manual header present.");
258 if (!is_http_url && has_credentials)
259 warnln("* Skipping adding Authorization header, request was not for the HTTP protocol.");
260 }
261
262 Function<void()> setup_request = [&] {
263 if (!request) {
264 warnln("Failed to start request for '{}'", url_str);
265 exit(1);
266 }
267
268 if (verbose_output && is_http_url) {
269 warnln("* Setting up request");
270 warnln("> Method={}, URL={}", method, url);
271 for (auto const& header : request_headers) {
272 warnln("> {}: {}", header.key, header.value);
273 }
274 }
275
276 request->on_progress = [&](Optional<u32> maybe_total_size, u32 downloaded_size) {
277 gettimeofday(¤t_time, nullptr);
278 timersub(¤t_time, &previous_time, &time_diff);
279 auto time_diff_ms = time_diff.tv_sec * 1000 + time_diff.tv_usec / 1000;
280 if (time_diff_ms < report_time_in_ms)
281 return;
282
283 warn("\r\033[2K");
284 if (maybe_total_size.has_value()) {
285 warn("\033]9;{};{};\033\\", downloaded_size, maybe_total_size.value());
286 warn("Download progress: {} / {}", human_readable_size(downloaded_size), human_readable_size(maybe_total_size.value()));
287 } else {
288 warn("Download progress: {} / ???", human_readable_size(downloaded_size));
289 }
290
291 auto size_diff = downloaded_size - previous_downloaded_size;
292 if (time_diff_ms > speed_update_time_in_ms) {
293 previous_time = current_time;
294 previous_downloaded_size = downloaded_size;
295 }
296
297 warn(" at {}/s", human_readable_size(((float)size_diff / (float)time_diff_ms) * 1000));
298 };
299 request->on_headers_received = [&](auto& response_headers, auto status_code) {
300 if (received_actual_headers)
301 return;
302 dbgln("Received headers! response code = {}", status_code.value_or(0));
303 received_actual_headers = true; // And not trailers!
304 should_save_stream_data = true;
305
306 if (verbose_output && is_http_url) {
307 warnln("* Received headers");
308 auto const value = status_code.value_or(0);
309 auto const reason_phrase = (value != 0)
310 ? HTTP::HttpResponse::reason_phrase_for_code(value)
311 : "UNKNOWN"sv;
312 warnln("< Code={}, Reason={}", value, reason_phrase);
313 for (auto const& header : response_headers) {
314 warnln("< {}: {}", header.key, header.value);
315 }
316 }
317
318 if (!following_url && save_at_provided_name) {
319 DeprecatedString output_name;
320 if (auto content_disposition = response_headers.get("Content-Disposition"); content_disposition.has_value()) {
321 auto& value = content_disposition.value();
322 ContentDispositionParser parser(value);
323 output_name = parser.filename();
324 }
325
326 if (output_name.is_empty())
327 output_name = url.path();
328
329 LexicalPath path { output_name };
330 output_name = path.basename();
331
332 // The URL didn't have a name component, e.g. 'serenityos.org'
333 if (output_name.is_empty() || output_name == "/") {
334 int i = -1;
335 do {
336 output_name = url.host();
337 if (i > -1)
338 output_name = DeprecatedString::formatted("{}.{}", output_name, i);
339 ++i;
340 } while (Core::DeprecatedFile::exists(output_name));
341 }
342
343 int target_file_fd = open(output_name.characters(), O_WRONLY | O_CREAT | O_TRUNC, 0644);
344 if (target_file_fd < 0) {
345 perror("target file open");
346 loop.quit(1);
347 return;
348 }
349
350 if (dup2(target_file_fd, output_fd) < 0) {
351 perror("target file dup2");
352 loop.quit(1);
353 return;
354 }
355
356 if (close(target_file_fd) < 0) {
357 perror("target file close");
358 loop.quit(1);
359 return;
360 }
361 }
362
363 auto status_code_value = status_code.value_or(0);
364 if (should_follow_url && status_code_value >= 300 && status_code_value < 400) {
365 if (auto location = response_headers.get("Location"); location.has_value()) {
366 auto was_following_url = following_url;
367 following_url = true;
368 received_actual_headers = false;
369 should_save_stream_data = false;
370 request->on_finish = nullptr;
371 request->on_headers_received = nullptr;
372 request->on_progress = nullptr;
373 request->stop();
374
375 Core::deferred_invoke([&, was_following_url, url = location.value()] {
376 warnln("{}Following to {}", was_following_url ? "" : "\n", url);
377 request = protocol_client->start_request(method, url, request_headers, ReadonlyBytes {}, proxy_data);
378 setup_request();
379 });
380 }
381 } else {
382 following_url = false;
383
384 if (status_code_value >= 400)
385 warnln("Request returned error {}", status_code_value);
386 }
387 };
388 request->on_finish = [&](bool success, auto) {
389 if (following_url)
390 return;
391
392 warn("\033]9;-1;\033\\");
393 warnln();
394 if (!success)
395 warnln("Request failed :(");
396 loop.quit(0);
397 };
398
399 request->stream_into(output_stream);
400 };
401
402 request = protocol_client->start_request(method, url, request_headers, data.bytes(), proxy_data);
403 setup_request();
404
405 dbgln("started request with id {}", request->id());
406
407 return loop.exec();
408}