@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.)
hq.recaptime.dev/wiki/Phorge
phorge
phabricator
1<?php
2
3/**
4 * Handle request startup, before loading the environment or libraries. This
5 * class bootstraps the request state up to the point where we can enter
6 * Phorge code.
7 *
8 * NOTE: This class MUST NOT have any dependencies. It runs before libraries
9 * load.
10 *
11 * Rate Limiting
12 * =============
13 *
14 * Phorge limits the rate at which clients can request pages, and issues HTTP
15 * 429" Too Many Requests" responses if clients request too many pages too
16 * quickly. Although this is not a complete defense against high-volume attacks,
17 * it can protect an install against aggressive crawlers, security scanners,
18 * and some types of malicious activity.
19 *
20 * To perform rate limiting, each page increments a score counter for the
21 * requesting user's IP. The page can give the IP more points for an expensive
22 * request, or fewer for an authetnicated request.
23 *
24 * Score counters are kept in buckets, and writes move to a new bucket every
25 * minute. After a few minutes (defined by @{method:getRateLimitBucketCount}),
26 * the oldest bucket is discarded. This provides a simple mechanism for keeping
27 * track of scores without needing to store, access, or read very much data.
28 *
29 * Users are allowed to accumulate up to 1000 points per minute, averaged across
30 * all of the tracked buckets.
31 *
32 * @task info Accessing Request Information
33 * @task hook Startup Hooks
34 * @task apocalypse In Case Of Apocalypse
35 * @task validation Validation
36 * @task ratelimit Rate Limiting
37 * @task phases Startup Phase Timers
38 * @task request-path Request Path
39 */
40final class PhabricatorStartup {
41
42 private static $startTime;
43 private static $debugTimeLimit;
44 private static $accessLog;
45 private static $capturingOutput;
46 private static $rawInput;
47 private static $oldMemoryLimit;
48 private static $phases;
49
50 private static $limits = array();
51 private static $requestPath;
52
53
54/* -( Accessing Request Information )-------------------------------------- */
55
56
57 /**
58 * @task info
59 */
60 public static function getStartTime() {
61 return self::$startTime;
62 }
63
64
65 /**
66 * @task info
67 */
68 public static function getMicrosecondsSinceStart() {
69 // This is the same as "phutil_microseconds_since()", but we may not have
70 // loaded libraries yet.
71 return (int)(1000000 * (microtime(true) - self::getStartTime()));
72 }
73
74
75 /**
76 * @task info
77 */
78 public static function setAccessLog($access_log) {
79 self::$accessLog = $access_log;
80 }
81
82
83 /**
84 * @task info
85 */
86 public static function getRawInput() {
87 if (self::$rawInput === null) {
88 $stream = new AphrontRequestStream();
89
90 if (isset($_SERVER['HTTP_CONTENT_ENCODING'])) {
91 $encoding = trim($_SERVER['HTTP_CONTENT_ENCODING']);
92 $stream->setEncoding($encoding);
93 }
94
95 $input = '';
96 do {
97 $bytes = $stream->readData();
98 if ($bytes === null) {
99 break;
100 }
101 $input .= $bytes;
102 } while (true);
103
104 self::$rawInput = $input;
105 }
106
107 return self::$rawInput;
108 }
109
110
111/* -( Startup Hooks )------------------------------------------------------ */
112
113
114 /**
115 * @param float $start_time Request start time, from `microtime(true)`.
116 * @task hook
117 */
118 public static function didStartup($start_time) {
119 self::$startTime = $start_time;
120
121 self::$phases = array();
122
123 self::$accessLog = null;
124 self::$requestPath = null;
125
126 static $registered;
127 if (!$registered) {
128 // NOTE: This protects us against multiple calls to didStartup() in the
129 // same request, but also against repeated requests to the same
130 // interpreter state, which we may implement in the future.
131 register_shutdown_function(array(self::class, 'didShutdown'));
132 $registered = true;
133 }
134
135 self::setupPHP();
136 self::verifyPHP();
137
138 // If we've made it this far, the environment isn't completely broken so
139 // we can switch over to relying on our own exception recovery mechanisms.
140 ini_set('display_errors', 0);
141
142 self::connectRateLimits();
143
144 self::normalizeInput();
145
146 self::readRequestPath();
147
148 self::beginOutputCapture();
149 }
150
151
152 /**
153 * @task hook
154 */
155 public static function didShutdown() {
156 // Disconnect any active rate limits before we shut down. If we don't do
157 // this, requests which exit early will lock a slot in any active
158 // connection limits, and won't count for rate limits.
159 self::disconnectRateLimits(array());
160
161 $event = error_get_last();
162
163 if (!$event) {
164 return;
165 }
166
167 switch ($event['type']) {
168 case E_ERROR:
169 case E_PARSE:
170 case E_COMPILE_ERROR:
171 break;
172 default:
173 return;
174 }
175
176 $msg = ">>> UNRECOVERABLE FATAL ERROR <<<\n\n";
177 // Even though we should be emitting this as text-plain, escape things
178 // just to be sure since we can't really be sure what the program state
179 // is when we get here.
180 $msg .= htmlspecialchars(
181 $event['message']."\n\n".$event['file'].':'.$event['line'],
182 ENT_QUOTES,
183 'UTF-8');
184
185 // flip dem tables
186 $msg .= "\n\n\n";
187 $msg .= "\xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb\x20\xef\xb8\xb5\x20\xc2\xaf".
188 "\x5c\x5f\x28\xe3\x83\x84\x29\x5f\x2f\xc2\xaf\x20\xef\xb8\xb5\x20".
189 "\xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb";
190
191 self::didFatal($msg);
192 }
193
194 public static function loadCoreLibraries() {
195 $phabricator_root = dirname(dirname(dirname(__FILE__)));
196 $libraries_root = dirname($phabricator_root);
197
198 $root = null;
199 if (!empty($_SERVER['PHUTIL_LIBRARY_ROOT'])) {
200 $root = $_SERVER['PHUTIL_LIBRARY_ROOT'];
201 }
202
203 ini_set(
204 'include_path',
205 $libraries_root.PATH_SEPARATOR.ini_get('include_path'));
206
207 $ok = @include_once $root.'arcanist/src/init/init-library.php';
208 if (!$ok) {
209 self::didFatal(
210 'Unable to load the "Arcanist" library. Put "arcanist/" next to '.
211 '"phorge/" on disk.');
212 }
213
214 // Load Phorge itself using the absolute path, so we never end up doing
215 // anything surprising (loading index.php and libraries from different
216 // directories).
217 phutil_load_library($phabricator_root.'/src');
218 }
219
220/* -( Output Capture )----------------------------------------------------- */
221
222
223 public static function beginOutputCapture() {
224 if (self::$capturingOutput) {
225 self::didFatal('Already capturing output!');
226 }
227 self::$capturingOutput = true;
228 ob_start();
229 }
230
231
232 public static function endOutputCapture() {
233 if (!self::$capturingOutput) {
234 return null;
235 }
236 self::$capturingOutput = false;
237 return ob_get_clean();
238 }
239
240
241/* -( Debug Time Limit )--------------------------------------------------- */
242
243
244 /**
245 * Set a time limit (in seconds) for the current script. After time expires,
246 * the script fatals.
247 *
248 * This works like `max_execution_time`, but prints out a useful stack trace
249 * when the time limit expires. This is primarily intended to make it easier
250 * to debug pages which hang by allowing extraction of a stack trace: set a
251 * short debug limit, then use the trace to figure out what's happening.
252 *
253 * The limit is implemented with a tick function, so enabling it implies
254 * some accounting overhead.
255 *
256 * @param int $limit Time limit in seconds.
257 * @return void
258 */
259 public static function setDebugTimeLimit($limit) {
260 self::$debugTimeLimit = $limit;
261
262 static $initialized = false;
263 if (!$initialized) {
264 declare(ticks=1);
265 register_tick_function(array(self::class, 'onDebugTick'));
266 $initialized = true;
267 }
268 }
269
270
271 /**
272 * Callback tick function used by @{method:setDebugTimeLimit}.
273 *
274 * Fatals with a useful stack trace after the time limit expires.
275 *
276 * @return void
277 */
278 public static function onDebugTick() {
279 $limit = self::$debugTimeLimit;
280 if (!$limit) {
281 return;
282 }
283
284 $elapsed = (microtime(true) - self::getStartTime());
285 if ($elapsed > $limit) {
286 $frames = array();
287 foreach (debug_backtrace() as $frame) {
288 $file = isset($frame['file']) ? $frame['file'] : '-';
289 $file = basename($file);
290
291 $line = isset($frame['line']) ? $frame['line'] : '-';
292 $class = isset($frame['class']) ? $frame['class'].'->' : null;
293 $func = isset($frame['function']) ? $frame['function'].'()' : '?';
294
295 $frames[] = "{$file}:{$line} {$class}{$func}";
296 }
297
298 self::didFatal(
299 "Request aborted by debug time limit after {$limit} seconds.\n\n".
300 "STACK TRACE\n".
301 implode("\n", $frames));
302 }
303 }
304
305
306/* -( In Case of Apocalypse )---------------------------------------------- */
307
308
309 /**
310 * Fatal the request completely in response to an exception, sending a plain
311 * text message to the client. Calls @{method:didFatal} internally.
312 *
313 * @param string $note Brief description of the exception context, like
314 * `"Rendering Exception"`.
315 * @param Throwable $ex The exception itself.
316 * @param bool $show_trace True if it's okay to show the exception's
317 * stack trace to the user. The trace will always be
318 * logged.
319 *
320 * @task apocalypse
321 */
322 public static function didEncounterFatalException(
323 $note,
324 $ex,
325 $show_trace) {
326
327 $message = '['.$note.'/'.get_class($ex).'] '.$ex->getMessage();
328
329 $full_message = $message;
330 $full_message .= "\n\n";
331 $full_message .= $ex->getTraceAsString();
332
333 if ($show_trace) {
334 $message = $full_message;
335 }
336
337 self::didFatal($message, $full_message);
338 }
339
340
341 /**
342 * Fatal the request completely, sending a plain text message to the client.
343 *
344 * @param string $message Plain text message to send to the client.
345 * @param string $log_message (optional) Plain text message to send to the
346 * error log. If not provided, the client message is used.
347 * You can pass a more detailed message here (e.g., with
348 * stack traces) to avoid showing it to users.
349 * @return never-returns This method **does not return**.
350 *
351 * @task apocalypse
352 */
353 public static function didFatal($message, $log_message = null) {
354 if ($log_message === null) {
355 $log_message = $message;
356 }
357
358 self::endOutputCapture();
359 $access_log = self::$accessLog;
360 if ($access_log) {
361 // We may end up here before the access log is initialized, e.g. from
362 // verifyPHP().
363 $access_log->setData(
364 array(
365 'c' => 500,
366 ));
367 $access_log->write();
368 }
369
370 header(
371 'Content-Type: text/plain; charset=utf-8',
372 $replace = true,
373 $http_error = 500);
374
375 error_log($log_message);
376 echo $message."\n";
377
378 exit(1);
379 }
380
381
382/* -( Validation )--------------------------------------------------------- */
383
384
385 /**
386 * @task validation
387 */
388 private static function setupPHP() {
389 error_reporting(E_ALL);
390 self::$oldMemoryLimit = ini_get('memory_limit');
391 ini_set('memory_limit', -1);
392
393 // If we have libxml, disable the incredibly dangerous entity loader.
394 // PHP 8 deprecates this function and disables this by default; remove once
395 // PHP 7 is no longer supported or a future version has removed the function
396 // entirely.
397 if (function_exists('libxml_disable_entity_loader')) {
398 @libxml_disable_entity_loader(true);
399 }
400
401 // See T13060. If the locale for this process (the parent process) is not
402 // a UTF-8 locale we can encounter problems when launching subprocesses
403 // which receive UTF-8 parameters in their command line argument list.
404 @setlocale(LC_ALL, 'en_US.UTF-8');
405
406 $config_map = array(
407 // See PHI1894. Keep "args" in exception backtraces.
408 'zend.exception_ignore_args' => 0,
409
410 // See T13100. We'd like the regex engine to fail, rather than segfault,
411 // if handed a pathological regular expression.
412 'pcre.backtrack_limit' => 10000,
413 'pcre.recusion_limit' => 10000,
414
415 // NOTE: Arcanist applies a similar set of startup options for CLI
416 // environments in "init-script.php". Changes here may also be
417 // appropriate to apply there.
418 );
419
420 foreach ($config_map as $config_key => $config_value) {
421 ini_set($config_key, $config_value);
422 }
423 }
424
425
426 /**
427 * @task validation
428 */
429 public static function getOldMemoryLimit() {
430 return self::$oldMemoryLimit;
431 }
432
433 /**
434 * @task validation
435 */
436 private static function normalizeInput() {
437 // Replace superglobals with unfiltered versions, disrespect php.ini (we
438 // filter ourselves).
439
440 // NOTE: We don't filter INPUT_SERVER because we don't want to overwrite
441 // changes made in "preamble.php".
442
443 // NOTE: WE don't filter INPUT_POST because we may be constructing it
444 // lazily if "enable_post_data_reading" is disabled.
445
446 $filter = array(
447 INPUT_GET,
448 INPUT_ENV,
449 INPUT_COOKIE,
450 );
451 foreach ($filter as $type) {
452 $filtered = filter_input_array($type, FILTER_UNSAFE_RAW);
453 if (!is_array($filtered)) {
454 continue;
455 }
456 switch ($type) {
457 case INPUT_GET:
458 $_GET = array_merge($_GET, $filtered);
459 break;
460 case INPUT_COOKIE:
461 $_COOKIE = array_merge($_COOKIE, $filtered);
462 break;
463 case INPUT_ENV:
464 $env = array_merge($_ENV, $filtered);
465 $_ENV = self::filterEnvSuperglobal($env);
466 break;
467 }
468 }
469
470 self::rebuildRequest();
471 }
472
473 /**
474 * @task validation
475 */
476 public static function rebuildRequest() {
477 // Rebuild $_REQUEST, respecting order declared in ".ini" files.
478 $order = ini_get('request_order');
479
480 if (!$order) {
481 $order = ini_get('variables_order');
482 }
483
484 if (!$order) {
485 // $_REQUEST will be empty, so leave it alone.
486 return;
487 }
488
489 $_REQUEST = array();
490 for ($ii = 0; $ii < strlen($order); $ii++) {
491 switch ($order[$ii]) {
492 case 'G':
493 $_REQUEST = array_merge($_REQUEST, $_GET);
494 break;
495 case 'P':
496 $_REQUEST = array_merge($_REQUEST, $_POST);
497 break;
498 case 'C':
499 $_REQUEST = array_merge($_REQUEST, $_COOKIE);
500 break;
501 default:
502 // $_ENV and $_SERVER never go into $_REQUEST.
503 break;
504 }
505 }
506 }
507
508
509 /**
510 * Adjust `$_ENV` before execution.
511 *
512 * Adjustments here primarily impact the environment as seen by subprocesses.
513 * The environment is forwarded explicitly by @{class:ExecFuture}.
514 *
515 * @param map<string, mixed> $env Input `$_ENV`.
516 * @return map<string, string> Suitable `$_ENV`.
517 * @task validation
518 */
519 private static function filterEnvSuperglobal(array $env) {
520
521 // In some configurations, we may get "argc" and "argv" set in $_ENV.
522 // These are not real environmental variables, and "argv" may have an array
523 // value which can not be forwarded to subprocesses. Remove these from the
524 // environment if they are present.
525 unset($env['argc']);
526 unset($env['argv']);
527
528 return $env;
529 }
530
531
532 /**
533 * @task validation
534 */
535 private static function verifyPHP() {
536 $required_version = '7.2.25';
537 if (version_compare(PHP_VERSION, $required_version) < 0) {
538 self::didFatal(
539 "You are running PHP version '".PHP_VERSION."', which is older than ".
540 "the minimum version, '{$required_version}'. Update to at least ".
541 "'{$required_version}'.");
542 }
543
544 if (function_exists('get_magic_quotes_gpc')) {
545 if (@get_magic_quotes_gpc()) {
546 self::didFatal(
547 'Your server is configured with the PHP language feature '.
548 '"magic_quotes_gpc" enabled.'.
549 "\n\n".
550 'This feature is "highly discouraged" by PHP\'s developers, and '.
551 'has been removed entirely in PHP8.'.
552 "\n\n".
553 'You must disable "magic_quotes_gpc" to run Phorge. Consult the '.
554 'PHP manual for instructions.');
555 }
556 }
557
558 if (isset($_SERVER['HTTP_PROXY'])) {
559 self::didFatal(
560 'This HTTP request included a "Proxy:" header, poisoning the '.
561 'environment (CVE-2016-5385 / httpoxy). Declining to process this '.
562 'request. For details, see: https://secure.phabricator.com/T11359');
563 }
564 }
565
566
567 /**
568 * @task request-path
569 */
570 private static function readRequestPath() {
571
572 // See T13575. The request path may be provided in:
573 //
574 // - the "$_GET" parameter "__path__" (normal for Apache and nginx); or
575 // - the "$_SERVER" parameter "REQUEST_URI" (normal for the PHP builtin
576 // webserver).
577 //
578 // Locate it wherever it is, and store it for later use. Note that writing
579 // to "$_REQUEST" here won't always work, because later code may rebuild
580 // "$_REQUEST" from other sources.
581
582 if (isset($_REQUEST['__path__']) && $_REQUEST['__path__'] !== '') {
583 // Carefully crafted urls can supply their own __path__.
584 // Harmless normally, but when specified as __path__[],
585 // it becomes an array and overwrites the initial __path__.
586 // Parse the request uri directly to send the user to the right place.
587 if (is_array($_REQUEST['__path__'])) {
588 $path = parse_url($_SERVER['REQUEST_URI'])['path'];
589 } else {
590 $path = $_REQUEST['__path__'];
591 }
592
593 self::setRequestPath($path);
594 return;
595 }
596
597 // Compatibility with PHP 5.4+ built-in web server.
598 if (php_sapi_name() == 'cli-server') {
599 $path = parse_url($_SERVER['REQUEST_URI']);
600 self::setRequestPath($path['path']);
601 return;
602 }
603
604 if (!isset($_REQUEST['__path__'])) {
605 self::didFatal(
606 "Request parameter '__path__' is not set. Your rewrite rules ".
607 "are not configured correctly.");
608 }
609
610 if ($_REQUEST['__path__'] === '') {
611 self::didFatal(
612 "Request parameter '__path__' is set, but empty. Your rewrite rules ".
613 "are not configured correctly. The '__path__' should always ".
614 "begin with a '/'.");
615 }
616 }
617
618 /**
619 * @task request-path
620 */
621 public static function getRequestPath() {
622 $path = self::$requestPath;
623
624 if ($path === null) {
625 self::didFatal(
626 'Request attempted to access request path, but no request path is '.
627 'available for this request. You may be calling web request code '.
628 'from a non-request context, or your webserver may not be passing '.
629 'a request path to Phorge in a format that it understands.');
630 }
631
632 return $path;
633 }
634
635 /**
636 * @task request-path
637 * @param string $path
638 */
639 public static function setRequestPath($path) {
640 self::$requestPath = $path;
641 }
642
643
644/* -( Rate Limiting )------------------------------------------------------ */
645
646
647 /**
648 * Add a new client limits.
649 *
650 * @param PhabricatorClientLimit $limit New limit.
651 * @return PhabricatorClientLimit The limit.
652 */
653 public static function addRateLimit(PhabricatorClientLimit $limit) {
654 self::$limits[] = $limit;
655 return $limit;
656 }
657
658
659 /**
660 * Apply configured rate limits.
661 *
662 * If any limit is exceeded, this method terminates the request.
663 *
664 * @return void
665 * @task ratelimit
666 */
667 private static function connectRateLimits() {
668 $limits = self::$limits;
669
670 $reason = null;
671 $connected = array();
672 foreach ($limits as $limit) {
673 $reason = $limit->didConnect();
674 $connected[] = $limit;
675 if ($reason !== null) {
676 break;
677 }
678 }
679
680 // If we're killing the request here, disconnect any limits that we
681 // connected to try to keep the accounting straight.
682 if ($reason !== null) {
683 foreach ($connected as $limit) {
684 $limit->didDisconnect(array());
685 }
686
687 self::didRateLimit($reason);
688 }
689 }
690
691
692 /**
693 * Tear down rate limiting and allow limits to score the request.
694 *
695 * @param map<string, mixed> $request_state Additional, freeform request
696 * state.
697 * @return void
698 * @task ratelimit
699 */
700 public static function disconnectRateLimits(array $request_state) {
701 $limits = self::$limits;
702
703 // Remove all limits before disconnecting them so this works properly if
704 // it runs twice. (We run this automatically as a shutdown handler.)
705 self::$limits = array();
706
707 foreach ($limits as $limit) {
708 $limit->didDisconnect($request_state);
709 }
710 }
711
712
713
714 /**
715 * Emit an HTTP 429 "Too Many Requests" response (indicating that the user
716 * has exceeded application rate limits) and exit.
717 *
718 * @return never-returns This method **does not return**.
719 * @task ratelimit
720 */
721 private static function didRateLimit($reason) {
722 header(
723 'Content-Type: text/plain; charset=utf-8',
724 $replace = true,
725 $http_error = 429);
726
727 echo $reason;
728
729 exit(1);
730 }
731
732
733/* -( Startup Timers )----------------------------------------------------- */
734
735
736 /**
737 * Record the beginning of a new startup phase.
738 *
739 * For phases which occur before @{class:PhabricatorStartup} loads, save the
740 * time and record it with @{method:recordStartupPhase} after the class is
741 * available.
742 *
743 * @param string $phase Phase name.
744 * @task phases
745 */
746 public static function beginStartupPhase($phase) {
747 self::recordStartupPhase($phase, microtime(true));
748 }
749
750
751 /**
752 * Record the start time of a previously executed startup phase.
753 *
754 * For startup phases which occur after @{class:PhabricatorStartup} loads,
755 * use @{method:beginStartupPhase} instead. This method can be used to
756 * record a time before the class loads, then hand it over once the class
757 * becomes available.
758 *
759 * @param string $phase Phase name.
760 * @param float $time Phase start time, from `microtime(true)`.
761 * @task phases
762 */
763 public static function recordStartupPhase($phase, $time) {
764 self::$phases[$phase] = $time;
765 }
766
767
768 /**
769 * Get information about startup phase timings.
770 *
771 * Sometimes, performance problems can occur before we start the profiler.
772 * Since the profiler can't examine these phases, it isn't useful in
773 * understanding their performance costs.
774 *
775 * Instead, the startup process marks when it enters various phases using
776 * @{method:beginStartupPhase}. A later call to this method can retrieve this
777 * information, which can be examined to gain greater insight into where
778 * time was spent. The output is still crude, but better than nothing.
779 *
780 * @task phases
781 */
782 public static function getPhases() {
783 return self::$phases;
784 }
785
786}