@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
at upstream/main 594 lines 19 kB view raw
1<?php 2 3final class DivinerGenerateWorkflow extends DivinerWorkflow { 4 5 private $atomCache; 6 7 protected function didConstruct() { 8 $this 9 ->setName('generate') 10 ->setSynopsis(pht('Generate documentation.')) 11 ->setArguments( 12 array( 13 array( 14 'name' => 'clean', 15 'help' => pht('Clear the caches before generating documentation.'), 16 ), 17 array( 18 'name' => 'book', 19 'param' => 'path', 20 'help' => pht('Path to a Diviner book configuration.'), 21 ), 22 array( 23 'name' => 'publisher', 24 'param' => 'class', 25 'help' => pht('Specify a subclass of %s.', 'DivinerPublisher'), 26 'default' => 'DivinerLivePublisher', 27 ), 28 array( 29 'name' => 'repository', 30 'param' => 'identifier', 31 'help' => pht('Repository that the documentation belongs to.'), 32 ), 33 )); 34 } 35 36 protected function getAtomCache() { 37 if (!$this->atomCache) { 38 $book_root = $this->getConfig('root'); 39 $book_name = $this->getConfig('name'); 40 $cache_directory = $book_root.'/.divinercache/'.$book_name; 41 $this->atomCache = new DivinerAtomCache($cache_directory); 42 } 43 return $this->atomCache; 44 } 45 46 protected function log($message) { 47 $console = PhutilConsole::getConsole(); 48 $console->writeErr($message."\n"); 49 } 50 51 public function execute(PhutilArgumentParser $args) { 52 $book = $args->getArg('book'); 53 if ($book) { 54 $books = array($book); 55 } else { 56 $cwd = getcwd(); 57 $this->log(pht('FINDING DOCUMENTATION BOOKS')); 58 $books = array(); 59 60 if ($cwd) { 61 $books = id(new FileFinder($cwd)) 62 ->withType('f') 63 ->withSuffix('book') 64 ->find(); 65 } 66 67 if (!$books) { 68 throw new PhutilArgumentUsageException( 69 pht( 70 "There are no Diviner '%s' files anywhere beneath the current ". 71 "directory. Use '%s' to specify a documentation book to generate.", 72 '.book', 73 '--book <book>')); 74 } else { 75 $this->log(pht('Found %s book(s).', phutil_count($books))); 76 } 77 } 78 79 foreach ($books as $book) { 80 $short_name = basename($book); 81 82 $this->log(pht('Generating book "%s"...', $short_name)); 83 $this->generateBook($book, $args); 84 $this->log(pht('Completed generation of "%s".', $short_name)."\n"); 85 } 86 } 87 88 private function generateBook($book, PhutilArgumentParser $args) { 89 $this->atomCache = null; 90 91 $this->readBookConfiguration($book); 92 93 if ($args->getArg('clean')) { 94 $this->log(pht('CLEARING CACHES')); 95 $this->getAtomCache()->delete(); 96 $this->log(pht('Done.')."\n"); 97 } 98 99 // The major challenge of documentation generation is one of dependency 100 // management. When regenerating documentation, we want to do the smallest 101 // amount of work we can, so that regenerating documentation after minor 102 // changes is quick. 103 // 104 // = Atom Cache = 105 // 106 // In the first stage, we find all the direct changes to source code since 107 // the last run. This stage relies on two data structures: 108 // 109 // - File Hash Map: `map<file_hash, node_hash>` 110 // - Atom Map: `map<node_hash, true>` 111 // 112 // First, we hash all the source files in the project to detect any which 113 // have changed since the previous run (i.e., their hash is not present in 114 // the File Hash Map). If a file's content hash appears in the map, it has 115 // not changed, so we don't need to reparse it. 116 // 117 // We break the contents of each file into "atoms", which represent a unit 118 // of source code (like a function, method, class or file). Each atom has a 119 // "node hash" based on the content of the atom: if a function definition 120 // changes, the node hash of the atom changes too. The primary output of 121 // the atom cache is a list of node hashes which exist in the project. This 122 // is the Atom Map. The node hash depends only on the definition of the atom 123 // and the atomizer implementation. It ends with an "N", for "node". 124 // 125 // (We need the Atom Map in addition to the File Hash Map because each file 126 // may have several atoms in it (e.g., multiple functions, or a class and 127 // its methods). The File Hash Map contains an exhaustive list of all atoms 128 // with type "file", but not child atoms of those top-level atoms.) 129 // 130 // = Graph Cache = 131 // 132 // We now know which atoms exist, and can compare the Atom Map to some 133 // existing cache to figure out what has changed. However, this isn't 134 // sufficient to figure out which documentation actually needs to be 135 // regenerated, because atoms depend on other atoms. For example, if `B 136 // extends A` and the definition for `A` changes, we need to regenerate the 137 // documentation in `B`. Similarly, if `X` links to `Y` and `Y` changes, we 138 // should regenerate `X`. (In both these cases, the documentation for the 139 // connected atom may not actually change, but in some cases it will, and 140 // the extra work we need to do is generally very small compared to the 141 // size of the project.) 142 // 143 // To figure out which other nodes have changed, we compute a "graph hash" 144 // for each node. This hash combines the "node hash" with the node hashes 145 // of connected nodes. Our primary output is a list of graph hashes, which 146 // a documentation generator can use to easily determine what work needs 147 // to be done by comparing the list with a list of cached graph hashes, 148 // then generating documentation for new hashes and deleting documentation 149 // for missing hashes. The graph hash ends with a "G", for "graph". 150 // 151 // In this stage, we rely on three data structures: 152 // 153 // - Symbol Map: `map<node_hash, symbol_hash>` 154 // - Edge Map: `map<node_hash, list<symbol_hash>>` 155 // - Graph Map: `map<node_hash, graph_hash>` 156 // 157 // Calculating the graph hash requires several steps, because we need to 158 // figure out which nodes an atom is attached to. The atom contains symbolic 159 // references to other nodes by name (e.g., `extends SomeClass`) in the form 160 // of @{class:DivinerAtomRefs}. We can also build a symbolic reference for 161 // any atom from the atom itself. Each @{class:DivinerAtomRef} generates a 162 // symbol hash, which ends with an "S", for "symbol". 163 // 164 // First, we update the symbol map. We remove (and mark dirty) any symbols 165 // associated with node hashes which no longer exist (e.g., old/dead nodes). 166 // Second, we add (and mark dirty) any symbols associated with new nodes. 167 // We also add edges defined by new nodes to the graph. 168 // 169 // We initialize a list of dirty nodes to the list of new nodes, then find 170 // all nodes connected to dirty symbols and add them to the dirty node list. 171 // This list now contains every node with a new or changed graph hash. 172 // 173 // We walk the dirty list and compute the new graph hashes, adding them 174 // to the graph hash map. This Graph Map can then be passed to an actual 175 // documentation generator, which can compare the graph hashes to a list 176 // of already-generated graph hashes and easily assess which documents need 177 // to be regenerated and which can be deleted. 178 179 $this->buildAtomCache(); 180 $this->buildGraphCache(); 181 182 $publisher_class = $args->getArg('publisher'); 183 $symbols = id(new PhutilSymbolLoader()) 184 ->setName($publisher_class) 185 ->setConcreteOnly(true) 186 ->setAncestorClass(DivinerPublisher::class) 187 ->selectAndLoadSymbols(); 188 189 if (!$symbols) { 190 throw new PhutilArgumentUsageException( 191 pht( 192 "Publisher class '%s' must be a concrete subclass of %s.", 193 $publisher_class, 194 'DivinerPublisher')); 195 } 196 $publisher = newv($publisher_class, array()); 197 198 $identifier = $args->getArg('repository'); 199 $repository = null; 200 if (phutil_nonempty_string($identifier)) { 201 $repository = id(new PhabricatorRepositoryQuery()) 202 ->setViewer(PhabricatorUser::getOmnipotentUser()) 203 ->withIdentifiers(array($identifier)) 204 ->executeOne(); 205 206 if (!$repository) { 207 throw new PhutilArgumentUsageException( 208 pht( 209 'Repository "%s" does not exist.', 210 $identifier)); 211 } 212 213 $publisher->setRepositoryPHID($repository->getPHID()); 214 } 215 216 $this->publishDocumentation($args->getArg('clean'), $publisher); 217 } 218 219 220/* -( Atom Cache )--------------------------------------------------------- */ 221 222 223 private function buildAtomCache() { 224 $this->log(pht('BUILDING ATOM CACHE')); 225 226 $file_hashes = $this->findFilesInProject(); 227 $this->log( 228 pht( 229 'Found %s file(s) in project.', 230 phutil_count($file_hashes))); 231 232 $this->deleteDeadAtoms($file_hashes); 233 $atomize = $this->getFilesToAtomize($file_hashes); 234 $this->log( 235 pht( 236 'Found %s unatomized, uncached file(s).', 237 phutil_count($atomize))); 238 239 $file_atomizers = $this->getAtomizersForFiles($atomize); 240 $this->log( 241 pht( 242 'Found %s file(s) to atomize.', 243 phutil_count($file_atomizers))); 244 245 $futures = $this->buildAtomizerFutures($file_atomizers); 246 $this->log( 247 pht( 248 'Atomizing %s file(s).', 249 phutil_count($file_atomizers))); 250 251 if ($futures) { 252 $this->resolveAtomizerFutures($futures, $file_hashes); 253 $this->log(pht('Atomization complete.')); 254 } else { 255 $this->log(pht('Atom cache is up to date, no files to atomize.')); 256 } 257 258 $this->log(pht('Writing atom cache.')); 259 $this->getAtomCache()->saveAtoms(); 260 $this->log(pht('Done.')."\n"); 261 } 262 263 private function getAtomizersForFiles(array $files) { 264 $rules = $this->getRules(); 265 $exclude = $this->getExclude(); 266 $atomizers = array(); 267 268 foreach ($files as $file) { 269 foreach ($exclude as $pattern) { 270 if (preg_match($pattern, $file)) { 271 continue 2; 272 } 273 } 274 275 foreach ($rules as $rule => $atomizer) { 276 $ok = preg_match($rule, $file); 277 if ($ok === false) { 278 throw new Exception( 279 pht("Rule '%s' is not a valid regular expression.", $rule)); 280 } 281 if ($ok) { 282 $atomizers[$file] = $atomizer; 283 continue; 284 } 285 } 286 } 287 288 return $atomizers; 289 } 290 291 private function getRules() { 292 return $this->getConfig('rules', array( 293 '/\\.diviner$/' => 'DivinerArticleAtomizer', 294 '/\\.php$/' => 'DivinerPHPAtomizer', 295 )); 296 } 297 298 private function getExclude() { 299 $exclude = (array)$this->getConfig('exclude', array()); 300 return $exclude; 301 } 302 303 private function findFilesInProject() { 304 $raw_hashes = id(new FileFinder($this->getConfig('root'))) 305 ->excludePath('*/.*') 306 ->withType('f') 307 ->setGenerateChecksums(true) 308 ->find(); 309 310 $version = $this->getDivinerAtomWorldVersion(); 311 312 $file_hashes = array(); 313 foreach ($raw_hashes as $file => $md5_hash) { 314 $rel_file = Filesystem::readablePath($file, $this->getConfig('root')); 315 // We want the hash to change if the file moves or Diviner gets updated, 316 // not just if the file content changes. Derive a hash from everything 317 // we care about. 318 $file_hashes[$rel_file] = md5("{$rel_file}\0{$md5_hash}\0{$version}").'F'; 319 } 320 321 return $file_hashes; 322 } 323 324 private function deleteDeadAtoms(array $file_hashes) { 325 $atom_cache = $this->getAtomCache(); 326 327 $hash_to_file = array_flip($file_hashes); 328 foreach ($atom_cache->getFileHashMap() as $hash => $atom) { 329 if (empty($hash_to_file[$hash])) { 330 $atom_cache->deleteFileHash($hash); 331 } 332 } 333 } 334 335 private function getFilesToAtomize(array $file_hashes) { 336 $atom_cache = $this->getAtomCache(); 337 338 $atomize = array(); 339 foreach ($file_hashes as $file => $hash) { 340 if (!$atom_cache->fileHashExists($hash)) { 341 $atomize[] = $file; 342 } 343 } 344 345 return $atomize; 346 } 347 348 private function buildAtomizerFutures(array $file_atomizers) { 349 $atomizers = array(); 350 foreach ($file_atomizers as $file => $atomizer) { 351 $atomizers[$atomizer][] = $file; 352 } 353 354 $root = dirname(phutil_get_library_root('phorge')); 355 $config_root = $this->getConfig('root'); 356 357 $bar = id(new PhutilConsoleProgressBar()) 358 ->setTotal(count($file_atomizers)); 359 360 $futures = array(); 361 foreach ($atomizers as $class => $files) { 362 foreach (array_chunk($files, 32) as $chunk) { 363 $future = new ExecFuture( 364 '%s atomize --ugly --book %s --atomizer %s -- %Ls', 365 $root.'/bin/diviner', 366 $this->getBookConfigPath(), 367 $class, 368 $chunk); 369 370 $futures[] = $future; 371 372 $bar->update(count($chunk)); 373 } 374 } 375 376 $bar->done(); 377 378 return $futures; 379 } 380 381 /** 382 * @param array<Future> $futures 383 * @param array<string,string> $file_hashes 384 */ 385 private function resolveAtomizerFutures(array $futures, array $file_hashes) { 386 assert_instances_of($futures, Future::class); 387 388 $atom_cache = $this->getAtomCache(); 389 $bar = id(new PhutilConsoleProgressBar()) 390 ->setTotal(count($futures)); 391 $futures = id(new FutureIterator($futures)) 392 ->limit(4); 393 394 foreach ($futures as $key => $future) { 395 try { 396 $atoms = $future->resolveJSON(); 397 398 foreach ($atoms as $atom) { 399 if ($atom['type'] == DivinerAtom::TYPE_FILE) { 400 $file_hash = $file_hashes[$atom['file']]; 401 $atom_cache->addFileHash($file_hash, $atom['hash']); 402 } 403 $atom_cache->addAtom($atom); 404 } 405 } catch (Exception $e) { 406 phlog($e); 407 } 408 409 $bar->update(1); 410 } 411 $bar->done(); 412 } 413 414 /** 415 * Get a global version number, which changes whenever any atom or atomizer 416 * implementation changes in a way which is not backward-compatible. 417 */ 418 private function getDivinerAtomWorldVersion() { 419 $version = array(); 420 $version['atom'] = DivinerAtom::getAtomSerializationVersion(); 421 $version['rules'] = $this->getRules(); 422 423 $atomizers = id(new PhutilClassMapQuery()) 424 ->setAncestorClass(DivinerAtomizer::class) 425 ->execute(); 426 427 $atomizer_versions = array(); 428 foreach ($atomizers as $atomizer) { 429 $name = get_class($atomizer); 430 $atomizer_versions[$name] = call_user_func( 431 array( 432 $name, 433 'getAtomizerVersion', 434 )); 435 } 436 437 ksort($atomizer_versions); 438 $version['atomizers'] = $atomizer_versions; 439 440 return md5(serialize($version)); 441 } 442 443 444/* -( Graph Cache )-------------------------------------------------------- */ 445 446 447 private function buildGraphCache() { 448 $this->log(pht('BUILDING GRAPH CACHE')); 449 450 $atom_cache = $this->getAtomCache(); 451 $symbol_map = $atom_cache->getSymbolMap(); 452 $atoms = $atom_cache->getAtomMap(); 453 454 $dirty_symbols = array(); 455 $dirty_nhashes = array(); 456 457 $del_atoms = array_diff_key($symbol_map, $atoms); 458 $this->log( 459 pht( 460 'Found %s obsolete atom(s) in graph.', 461 phutil_count($del_atoms))); 462 463 foreach ($del_atoms as $nhash => $shash) { 464 $atom_cache->deleteSymbol($nhash); 465 $dirty_symbols[$shash] = true; 466 467 $atom_cache->deleteEdges($nhash); 468 $atom_cache->deleteGraph($nhash); 469 } 470 471 $new_atoms = array_diff_key($atoms, $symbol_map); 472 $this->log( 473 pht( 474 'Found %s new atom(s) in graph.', 475 phutil_count($new_atoms))); 476 477 foreach ($new_atoms as $nhash => $ignored) { 478 $shash = $this->computeSymbolHash($nhash); 479 $atom_cache->addSymbol($nhash, $shash); 480 $dirty_symbols[$shash] = true; 481 482 $atom_cache->addEdges($nhash, $this->getEdges($nhash)); 483 484 $dirty_nhashes[$nhash] = true; 485 } 486 487 $this->log(pht('Propagating changes through the graph.')); 488 489 // Find all the nodes which point at a dirty node, and dirty them. Then 490 // find all the nodes which point at those nodes and dirty them, and so 491 // on. (This is slightly overkill since we probably don't need to propagate 492 // dirtiness across documentation "links" between symbols, but we do want 493 // to propagate it across "extends", and we suffer only a little bit of 494 // collateral damage by over-dirtying as long as the documentation isn't 495 // too well-connected.) 496 497 $symbol_stack = array_keys($dirty_symbols); 498 while ($symbol_stack) { 499 $symbol_hash = array_pop($symbol_stack); 500 501 foreach ($atom_cache->getEdgesWithDestination($symbol_hash) as $edge) { 502 $dirty_nhashes[$edge] = true; 503 $src_hash = $this->computeSymbolHash($edge); 504 if (empty($dirty_symbols[$src_hash])) { 505 $dirty_symbols[$src_hash] = true; 506 $symbol_stack[] = $src_hash; 507 } 508 } 509 } 510 511 $this->log( 512 pht( 513 'Found %s affected atoms.', 514 phutil_count($dirty_nhashes))); 515 516 foreach ($dirty_nhashes as $nhash => $ignored) { 517 $atom_cache->addGraph($nhash, $this->computeGraphHash($nhash)); 518 } 519 520 $this->log(pht('Writing graph cache.')); 521 522 $atom_cache->saveGraph(); 523 $atom_cache->saveEdges(); 524 $atom_cache->saveSymbols(); 525 526 $this->log(pht('Done.')."\n"); 527 } 528 529 private function computeSymbolHash($node_hash) { 530 $atom_cache = $this->getAtomCache(); 531 $atom = $atom_cache->getAtom($node_hash); 532 533 if (!$atom) { 534 throw new Exception( 535 pht("No such atom with node hash '%s'!", $node_hash)); 536 } 537 538 $ref = DivinerAtomRef::newFromDictionary($atom['ref']); 539 return $ref->toHash(); 540 } 541 542 private function getEdges($node_hash) { 543 $atom_cache = $this->getAtomCache(); 544 $atom = $atom_cache->getAtom($node_hash); 545 546 $refs = array(); 547 548 // Make the atom depend on its own symbol, so that all atoms with the same 549 // symbol are dirtied (e.g., if a codebase defines the function `f()` 550 // several times, all of them should be dirtied when one is dirtied). 551 $refs[DivinerAtomRef::newFromDictionary($atom)->toHash()] = true; 552 553 foreach (array_merge($atom['extends'], $atom['links']) as $ref_dict) { 554 $ref = DivinerAtomRef::newFromDictionary($ref_dict); 555 if ($ref->getBook() == $atom['book']) { 556 $refs[$ref->toHash()] = true; 557 } 558 } 559 560 return array_keys($refs); 561 } 562 563 private function computeGraphHash($node_hash) { 564 $atom_cache = $this->getAtomCache(); 565 $atom = $atom_cache->getAtom($node_hash); 566 567 $edges = $this->getEdges($node_hash); 568 sort($edges); 569 570 $inputs = array( 571 'atomHash' => $atom['hash'], 572 'edges' => $edges, 573 ); 574 575 return md5(serialize($inputs)).'G'; 576 } 577 578 private function publishDocumentation($clean, DivinerPublisher $publisher) { 579 $atom_cache = $this->getAtomCache(); 580 $graph_map = $atom_cache->getGraphMap(); 581 582 $this->log(pht('PUBLISHING DOCUMENTATION')); 583 584 $publisher 585 ->setDropCaches($clean) 586 ->setConfig($this->getAllConfig()) 587 ->setAtomCache($atom_cache) 588 ->setRenderer(new DivinerDefaultRenderer()) 589 ->publishAtoms(array_values($graph_map)); 590 591 $this->log(pht('Done.')); 592 } 593 594}