@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
at upstream/main 786 lines 22 kB view raw
1<?php 2 3/** 4 * Handle request startup, before loading the environment or libraries. This 5 * class bootstraps the request state up to the point where we can enter 6 * Phorge code. 7 * 8 * NOTE: This class MUST NOT have any dependencies. It runs before libraries 9 * load. 10 * 11 * Rate Limiting 12 * ============= 13 * 14 * Phorge limits the rate at which clients can request pages, and issues HTTP 15 * 429" Too Many Requests" responses if clients request too many pages too 16 * quickly. Although this is not a complete defense against high-volume attacks, 17 * it can protect an install against aggressive crawlers, security scanners, 18 * and some types of malicious activity. 19 * 20 * To perform rate limiting, each page increments a score counter for the 21 * requesting user's IP. The page can give the IP more points for an expensive 22 * request, or fewer for an authetnicated request. 23 * 24 * Score counters are kept in buckets, and writes move to a new bucket every 25 * minute. After a few minutes (defined by @{method:getRateLimitBucketCount}), 26 * the oldest bucket is discarded. This provides a simple mechanism for keeping 27 * track of scores without needing to store, access, or read very much data. 28 * 29 * Users are allowed to accumulate up to 1000 points per minute, averaged across 30 * all of the tracked buckets. 31 * 32 * @task info Accessing Request Information 33 * @task hook Startup Hooks 34 * @task apocalypse In Case Of Apocalypse 35 * @task validation Validation 36 * @task ratelimit Rate Limiting 37 * @task phases Startup Phase Timers 38 * @task request-path Request Path 39 */ 40final class PhabricatorStartup { 41 42 private static $startTime; 43 private static $debugTimeLimit; 44 private static $accessLog; 45 private static $capturingOutput; 46 private static $rawInput; 47 private static $oldMemoryLimit; 48 private static $phases; 49 50 private static $limits = array(); 51 private static $requestPath; 52 53 54/* -( Accessing Request Information )-------------------------------------- */ 55 56 57 /** 58 * @task info 59 */ 60 public static function getStartTime() { 61 return self::$startTime; 62 } 63 64 65 /** 66 * @task info 67 */ 68 public static function getMicrosecondsSinceStart() { 69 // This is the same as "phutil_microseconds_since()", but we may not have 70 // loaded libraries yet. 71 return (int)(1000000 * (microtime(true) - self::getStartTime())); 72 } 73 74 75 /** 76 * @task info 77 */ 78 public static function setAccessLog($access_log) { 79 self::$accessLog = $access_log; 80 } 81 82 83 /** 84 * @task info 85 */ 86 public static function getRawInput() { 87 if (self::$rawInput === null) { 88 $stream = new AphrontRequestStream(); 89 90 if (isset($_SERVER['HTTP_CONTENT_ENCODING'])) { 91 $encoding = trim($_SERVER['HTTP_CONTENT_ENCODING']); 92 $stream->setEncoding($encoding); 93 } 94 95 $input = ''; 96 do { 97 $bytes = $stream->readData(); 98 if ($bytes === null) { 99 break; 100 } 101 $input .= $bytes; 102 } while (true); 103 104 self::$rawInput = $input; 105 } 106 107 return self::$rawInput; 108 } 109 110 111/* -( Startup Hooks )------------------------------------------------------ */ 112 113 114 /** 115 * @param float $start_time Request start time, from `microtime(true)`. 116 * @task hook 117 */ 118 public static function didStartup($start_time) { 119 self::$startTime = $start_time; 120 121 self::$phases = array(); 122 123 self::$accessLog = null; 124 self::$requestPath = null; 125 126 static $registered; 127 if (!$registered) { 128 // NOTE: This protects us against multiple calls to didStartup() in the 129 // same request, but also against repeated requests to the same 130 // interpreter state, which we may implement in the future. 131 register_shutdown_function(array(self::class, 'didShutdown')); 132 $registered = true; 133 } 134 135 self::setupPHP(); 136 self::verifyPHP(); 137 138 // If we've made it this far, the environment isn't completely broken so 139 // we can switch over to relying on our own exception recovery mechanisms. 140 ini_set('display_errors', 0); 141 142 self::connectRateLimits(); 143 144 self::normalizeInput(); 145 146 self::readRequestPath(); 147 148 self::beginOutputCapture(); 149 } 150 151 152 /** 153 * @task hook 154 */ 155 public static function didShutdown() { 156 // Disconnect any active rate limits before we shut down. If we don't do 157 // this, requests which exit early will lock a slot in any active 158 // connection limits, and won't count for rate limits. 159 self::disconnectRateLimits(array()); 160 161 $event = error_get_last(); 162 163 if (!$event) { 164 return; 165 } 166 167 switch ($event['type']) { 168 case E_ERROR: 169 case E_PARSE: 170 case E_COMPILE_ERROR: 171 break; 172 default: 173 return; 174 } 175 176 $msg = ">>> UNRECOVERABLE FATAL ERROR <<<\n\n"; 177 // Even though we should be emitting this as text-plain, escape things 178 // just to be sure since we can't really be sure what the program state 179 // is when we get here. 180 $msg .= htmlspecialchars( 181 $event['message']."\n\n".$event['file'].':'.$event['line'], 182 ENT_QUOTES, 183 'UTF-8'); 184 185 // flip dem tables 186 $msg .= "\n\n\n"; 187 $msg .= "\xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb\x20\xef\xb8\xb5\x20\xc2\xaf". 188 "\x5c\x5f\x28\xe3\x83\x84\x29\x5f\x2f\xc2\xaf\x20\xef\xb8\xb5\x20". 189 "\xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb"; 190 191 self::didFatal($msg); 192 } 193 194 public static function loadCoreLibraries() { 195 $phabricator_root = dirname(dirname(dirname(__FILE__))); 196 $libraries_root = dirname($phabricator_root); 197 198 $root = null; 199 if (!empty($_SERVER['PHUTIL_LIBRARY_ROOT'])) { 200 $root = $_SERVER['PHUTIL_LIBRARY_ROOT']; 201 } 202 203 ini_set( 204 'include_path', 205 $libraries_root.PATH_SEPARATOR.ini_get('include_path')); 206 207 $ok = @include_once $root.'arcanist/src/init/init-library.php'; 208 if (!$ok) { 209 self::didFatal( 210 'Unable to load the "Arcanist" library. Put "arcanist/" next to '. 211 '"phorge/" on disk.'); 212 } 213 214 // Load Phorge itself using the absolute path, so we never end up doing 215 // anything surprising (loading index.php and libraries from different 216 // directories). 217 phutil_load_library($phabricator_root.'/src'); 218 } 219 220/* -( Output Capture )----------------------------------------------------- */ 221 222 223 public static function beginOutputCapture() { 224 if (self::$capturingOutput) { 225 self::didFatal('Already capturing output!'); 226 } 227 self::$capturingOutput = true; 228 ob_start(); 229 } 230 231 232 public static function endOutputCapture() { 233 if (!self::$capturingOutput) { 234 return null; 235 } 236 self::$capturingOutput = false; 237 return ob_get_clean(); 238 } 239 240 241/* -( Debug Time Limit )--------------------------------------------------- */ 242 243 244 /** 245 * Set a time limit (in seconds) for the current script. After time expires, 246 * the script fatals. 247 * 248 * This works like `max_execution_time`, but prints out a useful stack trace 249 * when the time limit expires. This is primarily intended to make it easier 250 * to debug pages which hang by allowing extraction of a stack trace: set a 251 * short debug limit, then use the trace to figure out what's happening. 252 * 253 * The limit is implemented with a tick function, so enabling it implies 254 * some accounting overhead. 255 * 256 * @param int $limit Time limit in seconds. 257 * @return void 258 */ 259 public static function setDebugTimeLimit($limit) { 260 self::$debugTimeLimit = $limit; 261 262 static $initialized = false; 263 if (!$initialized) { 264 declare(ticks=1); 265 register_tick_function(array(self::class, 'onDebugTick')); 266 $initialized = true; 267 } 268 } 269 270 271 /** 272 * Callback tick function used by @{method:setDebugTimeLimit}. 273 * 274 * Fatals with a useful stack trace after the time limit expires. 275 * 276 * @return void 277 */ 278 public static function onDebugTick() { 279 $limit = self::$debugTimeLimit; 280 if (!$limit) { 281 return; 282 } 283 284 $elapsed = (microtime(true) - self::getStartTime()); 285 if ($elapsed > $limit) { 286 $frames = array(); 287 foreach (debug_backtrace() as $frame) { 288 $file = isset($frame['file']) ? $frame['file'] : '-'; 289 $file = basename($file); 290 291 $line = isset($frame['line']) ? $frame['line'] : '-'; 292 $class = isset($frame['class']) ? $frame['class'].'->' : null; 293 $func = isset($frame['function']) ? $frame['function'].'()' : '?'; 294 295 $frames[] = "{$file}:{$line} {$class}{$func}"; 296 } 297 298 self::didFatal( 299 "Request aborted by debug time limit after {$limit} seconds.\n\n". 300 "STACK TRACE\n". 301 implode("\n", $frames)); 302 } 303 } 304 305 306/* -( In Case of Apocalypse )---------------------------------------------- */ 307 308 309 /** 310 * Fatal the request completely in response to an exception, sending a plain 311 * text message to the client. Calls @{method:didFatal} internally. 312 * 313 * @param string $note Brief description of the exception context, like 314 * `"Rendering Exception"`. 315 * @param Throwable $ex The exception itself. 316 * @param bool $show_trace True if it's okay to show the exception's 317 * stack trace to the user. The trace will always be 318 * logged. 319 * 320 * @task apocalypse 321 */ 322 public static function didEncounterFatalException( 323 $note, 324 $ex, 325 $show_trace) { 326 327 $message = '['.$note.'/'.get_class($ex).'] '.$ex->getMessage(); 328 329 $full_message = $message; 330 $full_message .= "\n\n"; 331 $full_message .= $ex->getTraceAsString(); 332 333 if ($show_trace) { 334 $message = $full_message; 335 } 336 337 self::didFatal($message, $full_message); 338 } 339 340 341 /** 342 * Fatal the request completely, sending a plain text message to the client. 343 * 344 * @param string $message Plain text message to send to the client. 345 * @param string $log_message (optional) Plain text message to send to the 346 * error log. If not provided, the client message is used. 347 * You can pass a more detailed message here (e.g., with 348 * stack traces) to avoid showing it to users. 349 * @return never-returns This method **does not return**. 350 * 351 * @task apocalypse 352 */ 353 public static function didFatal($message, $log_message = null) { 354 if ($log_message === null) { 355 $log_message = $message; 356 } 357 358 self::endOutputCapture(); 359 $access_log = self::$accessLog; 360 if ($access_log) { 361 // We may end up here before the access log is initialized, e.g. from 362 // verifyPHP(). 363 $access_log->setData( 364 array( 365 'c' => 500, 366 )); 367 $access_log->write(); 368 } 369 370 header( 371 'Content-Type: text/plain; charset=utf-8', 372 $replace = true, 373 $http_error = 500); 374 375 error_log($log_message); 376 echo $message."\n"; 377 378 exit(1); 379 } 380 381 382/* -( Validation )--------------------------------------------------------- */ 383 384 385 /** 386 * @task validation 387 */ 388 private static function setupPHP() { 389 error_reporting(E_ALL); 390 self::$oldMemoryLimit = ini_get('memory_limit'); 391 ini_set('memory_limit', -1); 392 393 // If we have libxml, disable the incredibly dangerous entity loader. 394 // PHP 8 deprecates this function and disables this by default; remove once 395 // PHP 7 is no longer supported or a future version has removed the function 396 // entirely. 397 if (function_exists('libxml_disable_entity_loader')) { 398 @libxml_disable_entity_loader(true); 399 } 400 401 // See T13060. If the locale for this process (the parent process) is not 402 // a UTF-8 locale we can encounter problems when launching subprocesses 403 // which receive UTF-8 parameters in their command line argument list. 404 @setlocale(LC_ALL, 'en_US.UTF-8'); 405 406 $config_map = array( 407 // See PHI1894. Keep "args" in exception backtraces. 408 'zend.exception_ignore_args' => 0, 409 410 // See T13100. We'd like the regex engine to fail, rather than segfault, 411 // if handed a pathological regular expression. 412 'pcre.backtrack_limit' => 10000, 413 'pcre.recusion_limit' => 10000, 414 415 // NOTE: Arcanist applies a similar set of startup options for CLI 416 // environments in "init-script.php". Changes here may also be 417 // appropriate to apply there. 418 ); 419 420 foreach ($config_map as $config_key => $config_value) { 421 ini_set($config_key, $config_value); 422 } 423 } 424 425 426 /** 427 * @task validation 428 */ 429 public static function getOldMemoryLimit() { 430 return self::$oldMemoryLimit; 431 } 432 433 /** 434 * @task validation 435 */ 436 private static function normalizeInput() { 437 // Replace superglobals with unfiltered versions, disrespect php.ini (we 438 // filter ourselves). 439 440 // NOTE: We don't filter INPUT_SERVER because we don't want to overwrite 441 // changes made in "preamble.php". 442 443 // NOTE: WE don't filter INPUT_POST because we may be constructing it 444 // lazily if "enable_post_data_reading" is disabled. 445 446 $filter = array( 447 INPUT_GET, 448 INPUT_ENV, 449 INPUT_COOKIE, 450 ); 451 foreach ($filter as $type) { 452 $filtered = filter_input_array($type, FILTER_UNSAFE_RAW); 453 if (!is_array($filtered)) { 454 continue; 455 } 456 switch ($type) { 457 case INPUT_GET: 458 $_GET = array_merge($_GET, $filtered); 459 break; 460 case INPUT_COOKIE: 461 $_COOKIE = array_merge($_COOKIE, $filtered); 462 break; 463 case INPUT_ENV: 464 $env = array_merge($_ENV, $filtered); 465 $_ENV = self::filterEnvSuperglobal($env); 466 break; 467 } 468 } 469 470 self::rebuildRequest(); 471 } 472 473 /** 474 * @task validation 475 */ 476 public static function rebuildRequest() { 477 // Rebuild $_REQUEST, respecting order declared in ".ini" files. 478 $order = ini_get('request_order'); 479 480 if (!$order) { 481 $order = ini_get('variables_order'); 482 } 483 484 if (!$order) { 485 // $_REQUEST will be empty, so leave it alone. 486 return; 487 } 488 489 $_REQUEST = array(); 490 for ($ii = 0; $ii < strlen($order); $ii++) { 491 switch ($order[$ii]) { 492 case 'G': 493 $_REQUEST = array_merge($_REQUEST, $_GET); 494 break; 495 case 'P': 496 $_REQUEST = array_merge($_REQUEST, $_POST); 497 break; 498 case 'C': 499 $_REQUEST = array_merge($_REQUEST, $_COOKIE); 500 break; 501 default: 502 // $_ENV and $_SERVER never go into $_REQUEST. 503 break; 504 } 505 } 506 } 507 508 509 /** 510 * Adjust `$_ENV` before execution. 511 * 512 * Adjustments here primarily impact the environment as seen by subprocesses. 513 * The environment is forwarded explicitly by @{class:ExecFuture}. 514 * 515 * @param map<string, mixed> $env Input `$_ENV`. 516 * @return map<string, string> Suitable `$_ENV`. 517 * @task validation 518 */ 519 private static function filterEnvSuperglobal(array $env) { 520 521 // In some configurations, we may get "argc" and "argv" set in $_ENV. 522 // These are not real environmental variables, and "argv" may have an array 523 // value which can not be forwarded to subprocesses. Remove these from the 524 // environment if they are present. 525 unset($env['argc']); 526 unset($env['argv']); 527 528 return $env; 529 } 530 531 532 /** 533 * @task validation 534 */ 535 private static function verifyPHP() { 536 $required_version = '7.2.25'; 537 if (version_compare(PHP_VERSION, $required_version) < 0) { 538 self::didFatal( 539 "You are running PHP version '".PHP_VERSION."', which is older than ". 540 "the minimum version, '{$required_version}'. Update to at least ". 541 "'{$required_version}'."); 542 } 543 544 if (function_exists('get_magic_quotes_gpc')) { 545 if (@get_magic_quotes_gpc()) { 546 self::didFatal( 547 'Your server is configured with the PHP language feature '. 548 '"magic_quotes_gpc" enabled.'. 549 "\n\n". 550 'This feature is "highly discouraged" by PHP\'s developers, and '. 551 'has been removed entirely in PHP8.'. 552 "\n\n". 553 'You must disable "magic_quotes_gpc" to run Phorge. Consult the '. 554 'PHP manual for instructions.'); 555 } 556 } 557 558 if (isset($_SERVER['HTTP_PROXY'])) { 559 self::didFatal( 560 'This HTTP request included a "Proxy:" header, poisoning the '. 561 'environment (CVE-2016-5385 / httpoxy). Declining to process this '. 562 'request. For details, see: https://secure.phabricator.com/T11359'); 563 } 564 } 565 566 567 /** 568 * @task request-path 569 */ 570 private static function readRequestPath() { 571 572 // See T13575. The request path may be provided in: 573 // 574 // - the "$_GET" parameter "__path__" (normal for Apache and nginx); or 575 // - the "$_SERVER" parameter "REQUEST_URI" (normal for the PHP builtin 576 // webserver). 577 // 578 // Locate it wherever it is, and store it for later use. Note that writing 579 // to "$_REQUEST" here won't always work, because later code may rebuild 580 // "$_REQUEST" from other sources. 581 582 if (isset($_REQUEST['__path__']) && $_REQUEST['__path__'] !== '') { 583 // Carefully crafted urls can supply their own __path__. 584 // Harmless normally, but when specified as __path__[], 585 // it becomes an array and overwrites the initial __path__. 586 // Parse the request uri directly to send the user to the right place. 587 if (is_array($_REQUEST['__path__'])) { 588 $path = parse_url($_SERVER['REQUEST_URI'])['path']; 589 } else { 590 $path = $_REQUEST['__path__']; 591 } 592 593 self::setRequestPath($path); 594 return; 595 } 596 597 // Compatibility with PHP 5.4+ built-in web server. 598 if (php_sapi_name() == 'cli-server') { 599 $path = parse_url($_SERVER['REQUEST_URI']); 600 self::setRequestPath($path['path']); 601 return; 602 } 603 604 if (!isset($_REQUEST['__path__'])) { 605 self::didFatal( 606 "Request parameter '__path__' is not set. Your rewrite rules ". 607 "are not configured correctly."); 608 } 609 610 if ($_REQUEST['__path__'] === '') { 611 self::didFatal( 612 "Request parameter '__path__' is set, but empty. Your rewrite rules ". 613 "are not configured correctly. The '__path__' should always ". 614 "begin with a '/'."); 615 } 616 } 617 618 /** 619 * @task request-path 620 */ 621 public static function getRequestPath() { 622 $path = self::$requestPath; 623 624 if ($path === null) { 625 self::didFatal( 626 'Request attempted to access request path, but no request path is '. 627 'available for this request. You may be calling web request code '. 628 'from a non-request context, or your webserver may not be passing '. 629 'a request path to Phorge in a format that it understands.'); 630 } 631 632 return $path; 633 } 634 635 /** 636 * @task request-path 637 * @param string $path 638 */ 639 public static function setRequestPath($path) { 640 self::$requestPath = $path; 641 } 642 643 644/* -( Rate Limiting )------------------------------------------------------ */ 645 646 647 /** 648 * Add a new client limits. 649 * 650 * @param PhabricatorClientLimit $limit New limit. 651 * @return PhabricatorClientLimit The limit. 652 */ 653 public static function addRateLimit(PhabricatorClientLimit $limit) { 654 self::$limits[] = $limit; 655 return $limit; 656 } 657 658 659 /** 660 * Apply configured rate limits. 661 * 662 * If any limit is exceeded, this method terminates the request. 663 * 664 * @return void 665 * @task ratelimit 666 */ 667 private static function connectRateLimits() { 668 $limits = self::$limits; 669 670 $reason = null; 671 $connected = array(); 672 foreach ($limits as $limit) { 673 $reason = $limit->didConnect(); 674 $connected[] = $limit; 675 if ($reason !== null) { 676 break; 677 } 678 } 679 680 // If we're killing the request here, disconnect any limits that we 681 // connected to try to keep the accounting straight. 682 if ($reason !== null) { 683 foreach ($connected as $limit) { 684 $limit->didDisconnect(array()); 685 } 686 687 self::didRateLimit($reason); 688 } 689 } 690 691 692 /** 693 * Tear down rate limiting and allow limits to score the request. 694 * 695 * @param map<string, mixed> $request_state Additional, freeform request 696 * state. 697 * @return void 698 * @task ratelimit 699 */ 700 public static function disconnectRateLimits(array $request_state) { 701 $limits = self::$limits; 702 703 // Remove all limits before disconnecting them so this works properly if 704 // it runs twice. (We run this automatically as a shutdown handler.) 705 self::$limits = array(); 706 707 foreach ($limits as $limit) { 708 $limit->didDisconnect($request_state); 709 } 710 } 711 712 713 714 /** 715 * Emit an HTTP 429 "Too Many Requests" response (indicating that the user 716 * has exceeded application rate limits) and exit. 717 * 718 * @return never-returns This method **does not return**. 719 * @task ratelimit 720 */ 721 private static function didRateLimit($reason) { 722 header( 723 'Content-Type: text/plain; charset=utf-8', 724 $replace = true, 725 $http_error = 429); 726 727 echo $reason; 728 729 exit(1); 730 } 731 732 733/* -( Startup Timers )----------------------------------------------------- */ 734 735 736 /** 737 * Record the beginning of a new startup phase. 738 * 739 * For phases which occur before @{class:PhabricatorStartup} loads, save the 740 * time and record it with @{method:recordStartupPhase} after the class is 741 * available. 742 * 743 * @param string $phase Phase name. 744 * @task phases 745 */ 746 public static function beginStartupPhase($phase) { 747 self::recordStartupPhase($phase, microtime(true)); 748 } 749 750 751 /** 752 * Record the start time of a previously executed startup phase. 753 * 754 * For startup phases which occur after @{class:PhabricatorStartup} loads, 755 * use @{method:beginStartupPhase} instead. This method can be used to 756 * record a time before the class loads, then hand it over once the class 757 * becomes available. 758 * 759 * @param string $phase Phase name. 760 * @param float $time Phase start time, from `microtime(true)`. 761 * @task phases 762 */ 763 public static function recordStartupPhase($phase, $time) { 764 self::$phases[$phase] = $time; 765 } 766 767 768 /** 769 * Get information about startup phase timings. 770 * 771 * Sometimes, performance problems can occur before we start the profiler. 772 * Since the profiler can't examine these phases, it isn't useful in 773 * understanding their performance costs. 774 * 775 * Instead, the startup process marks when it enters various phases using 776 * @{method:beginStartupPhase}. A later call to this method can retrieve this 777 * information, which can be examined to gain greater insight into where 778 * time was spent. The output is still crude, but better than nothing. 779 * 780 * @task phases 781 */ 782 public static function getPhases() { 783 return self::$phases; 784 } 785 786}