@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator

Allow Phabricator to serve Mercurial repositories over HTTP

Summary: Ref T2230. This is easily the worst thing I've had to write in a while. I'll leave some notes inline.

Test Plan: Ran `hg clone http://...` on a hosted repo. Ran `hg push` on the same. Changed sync'd both ways.

Reviewers: asherkin, btrahan

Reviewed By: btrahan

CC: aran

Maniphest Tasks: T2230

Differential Revision: https://secure.phabricator.com/D7520

+257 -6
+3
src/__phutil_library_map__.php
··· 505 505 'DiffusionMercurialFileContentQuery' => 'applications/diffusion/query/filecontent/DiffusionMercurialFileContentQuery.php', 506 506 'DiffusionMercurialRawDiffQuery' => 'applications/diffusion/query/rawdiff/DiffusionMercurialRawDiffQuery.php', 507 507 'DiffusionMercurialRequest' => 'applications/diffusion/request/DiffusionMercurialRequest.php', 508 + 'DiffusionMercurialResponse' => 'applications/diffusion/response/DiffusionMercurialResponse.php', 509 + 'DiffusionMercurialWireProtocol' => 'applications/diffusion/protocol/DiffusionMercurialWireProtocol.php', 508 510 'DiffusionPathChange' => 'applications/diffusion/data/DiffusionPathChange.php', 509 511 'DiffusionPathChangeQuery' => 'applications/diffusion/query/pathchange/DiffusionPathChangeQuery.php', 510 512 'DiffusionPathCompleteController' => 'applications/diffusion/controller/DiffusionPathCompleteController.php', ··· 2757 2759 'DiffusionMercurialFileContentQuery' => 'DiffusionFileContentQuery', 2758 2760 'DiffusionMercurialRawDiffQuery' => 'DiffusionRawDiffQuery', 2759 2761 'DiffusionMercurialRequest' => 'DiffusionRequest', 2762 + 'DiffusionMercurialResponse' => 'AphrontResponse', 2760 2763 'DiffusionPathCompleteController' => 'DiffusionController', 2761 2764 'DiffusionPathQueryTestCase' => 'PhabricatorTestCase', 2762 2765 'DiffusionPathValidateController' => 'DiffusionController',
+160 -5
src/applications/diffusion/controller/DiffusionServeController.php
··· 176 176 case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: 177 177 $result = $this->serveGitRequest($repository, $viewer); 178 178 break; 179 + case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: 180 + $result = $this->serveMercurialRequest($repository, $viewer); 181 + break; 179 182 default: 180 183 $result = new PhabricatorVCSResponse( 181 184 999, ··· 224 227 break; 225 228 case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: 226 229 $cmd = $request->getStr('cmd'); 227 - switch ($cmd) { 228 - case 'capabilities': 230 + if ($cmd == 'batch') { 231 + // For "batch" we get a "cmds" argument like 232 + // 233 + // heads ;known nodes= 234 + // 235 + // We need to examine the commands (here, "heads" and "known") to 236 + // make sure they're all read-only. 237 + 238 + $args = $this->getMercurialArguments(); 239 + $cmds = idx($args, 'cmds'); 240 + if ($cmds) { 241 + 242 + // NOTE: Mercurial has some code to escape semicolons, but it does 243 + // not actually function for command separation. For example, these 244 + // two batch commands will produce completely different results (the 245 + // former will run the lookup; the latter will fail with a parser 246 + // error): 247 + // 248 + // lookup key=a:xb;lookup key=z* 0 249 + // lookup key=a:;b;lookup key=z* 0 250 + // ^ 251 + // | 252 + // +-- Note semicolon. 253 + // 254 + // So just split unconditionally. 255 + 256 + $cmds = explode(';', $cmds); 257 + foreach ($cmds as $sub_cmd) { 258 + $name = head(explode(' ', $sub_cmd, 2)); 259 + if (!DiffusionMercurialWireProtocol::isReadOnlyCommand($name)) { 260 + return false; 261 + } 262 + } 229 263 return true; 230 - default: 231 - return false; 264 + } 232 265 } 233 - break; 266 + return DiffusionMercurialWireProtocol::isReadOnlyCommand($cmd); 234 267 case PhabricatorRepositoryType::REPOSITORY_TYPE_SUBVERSION: 235 268 break; 236 269 } ··· 357 390 358 391 return $user; 359 392 } 393 + 394 + private function serveMercurialRequest(PhabricatorRepository $repository) { 395 + $request = $this->getRequest(); 396 + 397 + $bin = Filesystem::resolveBinary('hg'); 398 + if (!$bin) { 399 + throw new Exception("Unable to find `hg` in PATH!"); 400 + } 401 + 402 + $env = array(); 403 + $input = PhabricatorStartup::getRawInput(); 404 + 405 + $cmd = $request->getStr('cmd'); 406 + 407 + $args = $this->getMercurialArguments(); 408 + $args = $this->formatMercurialArguments($cmd, $args); 409 + 410 + if (strlen($input)) { 411 + $input = strlen($input)."\n".$input."0\n"; 412 + } 413 + 414 + list($err, $stdout, $stderr) = id(new ExecFuture('%s serve --stdio', $bin)) 415 + ->setEnv($env, true) 416 + ->setCWD($repository->getLocalPath()) 417 + ->write("{$cmd}\n{$args}{$input}") 418 + ->resolve(); 419 + 420 + if ($err) { 421 + return new PhabricatorVCSResponse( 422 + 500, 423 + pht('Error %d: %s', $err, $stderr)); 424 + } 425 + 426 + if ($cmd == 'getbundle' || 427 + $cmd == 'changegroup' || 428 + $cmd == 'changegroupsubset') { 429 + // We're not completely sure that "changegroup" and "changegroupsubset" 430 + // actually work, they're for very old Mercurial. 431 + $body = gzcompress($stdout); 432 + } else if ($cmd == 'unbundle') { 433 + // This includes diagnostic information and anything echoed by commit 434 + // hooks. We ignore `stdout` since it just has protocol garbage, and 435 + // substitute `stderr`. 436 + $body = strlen($stderr)."\n".$stderr; 437 + } else { 438 + list($length, $body) = explode("\n", $stdout, 2); 439 + } 440 + 441 + return id(new DiffusionMercurialResponse())->setContent($body); 442 + } 443 + 444 + 445 + private function getMercurialArguments() { 446 + // Mercurial sends arguments in HTTP headers. "Why?", you might wonder, 447 + // "Why would you do this?". 448 + 449 + $args_raw = array(); 450 + for ($ii = 1; ; $ii++) { 451 + $header = 'HTTP_X_HGARG_'.$ii; 452 + if (!array_key_exists($header, $_SERVER)) { 453 + break; 454 + } 455 + $args_raw[] = $_SERVER[$header]; 456 + } 457 + $args_raw = implode('', $args_raw); 458 + 459 + return id(new PhutilQueryStringParser()) 460 + ->parseQueryString($args_raw); 461 + } 462 + 463 + private function formatMercurialArguments($command, array $arguments) { 464 + $spec = DiffusionMercurialWireProtocol::getCommandArgs($command); 465 + 466 + $out = array(); 467 + 468 + // Mercurial takes normal arguments like this: 469 + // 470 + // name <length(value)> 471 + // value 472 + 473 + $has_star = false; 474 + foreach ($spec as $arg_key) { 475 + if ($arg_key == '*') { 476 + $has_star = true; 477 + continue; 478 + } 479 + if (isset($arguments[$arg_key])) { 480 + $value = $arguments[$arg_key]; 481 + $size = strlen($value); 482 + $out[] = "{$arg_key} {$size}\n{$value}"; 483 + unset($arguments[$arg_key]); 484 + } 485 + } 486 + 487 + if ($has_star) { 488 + 489 + // Mercurial takes arguments for variable argument lists roughly like 490 + // this: 491 + // 492 + // * <count(args)> 493 + // argname1 <length(argvalue1)> 494 + // argvalue1 495 + // argname2 <length(argvalue2)> 496 + // argvalue2 497 + 498 + $count = count($arguments); 499 + 500 + $out[] = "* {$count}\n"; 501 + 502 + foreach ($arguments as $key => $value) { 503 + if (in_array($key, $spec)) { 504 + // We already added this argument above, so skip it. 505 + continue; 506 + } 507 + $size = strlen($value); 508 + $out[] = "{$key} {$size}\n{$value}"; 509 + } 510 + } 511 + 512 + return implode('', $out); 513 + } 514 + 360 515 } 361 516
+62
src/applications/diffusion/protocol/DiffusionMercurialWireProtocol.php
··· 1 + <?php 2 + 3 + final class DiffusionMercurialWireProtocol { 4 + 5 + public static function getCommandArgs($command) { 6 + // We need to enumerate all of the Mercurial wire commands because the 7 + // argument encoding varies based on the command. "Why?", you might ask, 8 + // "Why would you do this?". 9 + 10 + $commands = array( 11 + 'batch' => array('cmds', '*'), 12 + 'between' => array('pairs'), 13 + 'branchmap' => array(), 14 + 'branches' => array('nodes'), 15 + 'capabilities' => array(), 16 + 'changegroup' => array('roots'), 17 + 'changegroupsubset' => array('bases heads'), 18 + 'debugwireargs' => array('one two *'), 19 + 'getbundle' => array('*'), 20 + 'heads' => array(), 21 + 'hello' => array(), 22 + 'known' => array('nodes', '*'), 23 + 'listkeys' => array('namespace'), 24 + 'lookup' => array('key'), 25 + 'pushkey' => array('namespace', 'key', 'old', 'new'), 26 + 'stream_out' => array(''), 27 + 'unbundle' => array('heads'), 28 + ); 29 + 30 + if (!isset($commands[$command])) { 31 + throw new Exception("Unknown Mercurial command '{$command}!"); 32 + } 33 + 34 + return $commands[$command]; 35 + } 36 + 37 + public static function isReadOnlyCommand($command) { 38 + $read_only = array( 39 + 'between' => true, 40 + 'branchmap' => true, 41 + 'branches' => true, 42 + 'capabilities' => true, 43 + 'changegroup' => true, 44 + 'changegroupsubset' => true, 45 + 'debugwireargs' => true, 46 + 'getbundle' => true, 47 + 'heads' => true, 48 + 'hello' => true, 49 + 'known' => true, 50 + 'listkeys' => true, 51 + 'lookup' => true, 52 + 'stream_out' => true, 53 + ); 54 + 55 + // Notably, the write commands are "pushkey" and "unbundle". The 56 + // "batch" command is theoretically read only, but we require explicit 57 + // analysis of the actual commands. 58 + 59 + return isset($read_only[$command]); 60 + } 61 + 62 + }
+1 -1
src/applications/diffusion/response/DiffusionGitResponse.php
··· 30 30 } 31 31 32 32 public function getHeaders() { 33 - return $this->headers; 33 + return array_merge(parent::getHeaders(), $this->headers); 34 34 } 35 35 36 36 public function getCacheHeaders() {
+31
src/applications/diffusion/response/DiffusionMercurialResponse.php
··· 1 + <?php 2 + 3 + final class DiffusionMercurialResponse extends AphrontResponse { 4 + 5 + private $content; 6 + 7 + public function setContent($content) { 8 + $this->content = $content; 9 + return $this; 10 + } 11 + 12 + public function buildResponseString() { 13 + return $this->content; 14 + } 15 + 16 + public function getHeaders() { 17 + $headers = array( 18 + array('Content-Type', 'application/mercurial-0.1'), 19 + ); 20 + return array_merge(parent::getHeaders(), $headers); 21 + } 22 + 23 + public function getCacheHeaders() { 24 + return array(); 25 + } 26 + 27 + public function getHTTPResponseCode() { 28 + return 200; 29 + } 30 + 31 + }