@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.)
hq.recaptime.dev/wiki/Phorge
phorge
phabricator
1<?php
2
3final class DivinerGenerateWorkflow extends DivinerWorkflow {
4
5 private $atomCache;
6
7 protected function didConstruct() {
8 $this
9 ->setName('generate')
10 ->setSynopsis(pht('Generate documentation.'))
11 ->setArguments(
12 array(
13 array(
14 'name' => 'clean',
15 'help' => pht('Clear the caches before generating documentation.'),
16 ),
17 array(
18 'name' => 'book',
19 'param' => 'path',
20 'help' => pht('Path to a Diviner book configuration.'),
21 ),
22 array(
23 'name' => 'publisher',
24 'param' => 'class',
25 'help' => pht('Specify a subclass of %s.', 'DivinerPublisher'),
26 'default' => 'DivinerLivePublisher',
27 ),
28 array(
29 'name' => 'repository',
30 'param' => 'identifier',
31 'help' => pht('Repository that the documentation belongs to.'),
32 ),
33 ));
34 }
35
36 protected function getAtomCache() {
37 if (!$this->atomCache) {
38 $book_root = $this->getConfig('root');
39 $book_name = $this->getConfig('name');
40 $cache_directory = $book_root.'/.divinercache/'.$book_name;
41 $this->atomCache = new DivinerAtomCache($cache_directory);
42 }
43 return $this->atomCache;
44 }
45
46 protected function log($message) {
47 $console = PhutilConsole::getConsole();
48 $console->writeErr($message."\n");
49 }
50
51 public function execute(PhutilArgumentParser $args) {
52 $book = $args->getArg('book');
53 if ($book) {
54 $books = array($book);
55 } else {
56 $cwd = getcwd();
57 $this->log(pht('FINDING DOCUMENTATION BOOKS'));
58 $books = array();
59
60 if ($cwd) {
61 $books = id(new FileFinder($cwd))
62 ->withType('f')
63 ->withSuffix('book')
64 ->find();
65 }
66
67 if (!$books) {
68 throw new PhutilArgumentUsageException(
69 pht(
70 "There are no Diviner '%s' files anywhere beneath the current ".
71 "directory. Use '%s' to specify a documentation book to generate.",
72 '.book',
73 '--book <book>'));
74 } else {
75 $this->log(pht('Found %s book(s).', phutil_count($books)));
76 }
77 }
78
79 foreach ($books as $book) {
80 $short_name = basename($book);
81
82 $this->log(pht('Generating book "%s"...', $short_name));
83 $this->generateBook($book, $args);
84 $this->log(pht('Completed generation of "%s".', $short_name)."\n");
85 }
86 }
87
88 private function generateBook($book, PhutilArgumentParser $args) {
89 $this->atomCache = null;
90
91 $this->readBookConfiguration($book);
92
93 if ($args->getArg('clean')) {
94 $this->log(pht('CLEARING CACHES'));
95 $this->getAtomCache()->delete();
96 $this->log(pht('Done.')."\n");
97 }
98
99 // The major challenge of documentation generation is one of dependency
100 // management. When regenerating documentation, we want to do the smallest
101 // amount of work we can, so that regenerating documentation after minor
102 // changes is quick.
103 //
104 // = Atom Cache =
105 //
106 // In the first stage, we find all the direct changes to source code since
107 // the last run. This stage relies on two data structures:
108 //
109 // - File Hash Map: `map<file_hash, node_hash>`
110 // - Atom Map: `map<node_hash, true>`
111 //
112 // First, we hash all the source files in the project to detect any which
113 // have changed since the previous run (i.e., their hash is not present in
114 // the File Hash Map). If a file's content hash appears in the map, it has
115 // not changed, so we don't need to reparse it.
116 //
117 // We break the contents of each file into "atoms", which represent a unit
118 // of source code (like a function, method, class or file). Each atom has a
119 // "node hash" based on the content of the atom: if a function definition
120 // changes, the node hash of the atom changes too. The primary output of
121 // the atom cache is a list of node hashes which exist in the project. This
122 // is the Atom Map. The node hash depends only on the definition of the atom
123 // and the atomizer implementation. It ends with an "N", for "node".
124 //
125 // (We need the Atom Map in addition to the File Hash Map because each file
126 // may have several atoms in it (e.g., multiple functions, or a class and
127 // its methods). The File Hash Map contains an exhaustive list of all atoms
128 // with type "file", but not child atoms of those top-level atoms.)
129 //
130 // = Graph Cache =
131 //
132 // We now know which atoms exist, and can compare the Atom Map to some
133 // existing cache to figure out what has changed. However, this isn't
134 // sufficient to figure out which documentation actually needs to be
135 // regenerated, because atoms depend on other atoms. For example, if `B
136 // extends A` and the definition for `A` changes, we need to regenerate the
137 // documentation in `B`. Similarly, if `X` links to `Y` and `Y` changes, we
138 // should regenerate `X`. (In both these cases, the documentation for the
139 // connected atom may not actually change, but in some cases it will, and
140 // the extra work we need to do is generally very small compared to the
141 // size of the project.)
142 //
143 // To figure out which other nodes have changed, we compute a "graph hash"
144 // for each node. This hash combines the "node hash" with the node hashes
145 // of connected nodes. Our primary output is a list of graph hashes, which
146 // a documentation generator can use to easily determine what work needs
147 // to be done by comparing the list with a list of cached graph hashes,
148 // then generating documentation for new hashes and deleting documentation
149 // for missing hashes. The graph hash ends with a "G", for "graph".
150 //
151 // In this stage, we rely on three data structures:
152 //
153 // - Symbol Map: `map<node_hash, symbol_hash>`
154 // - Edge Map: `map<node_hash, list<symbol_hash>>`
155 // - Graph Map: `map<node_hash, graph_hash>`
156 //
157 // Calculating the graph hash requires several steps, because we need to
158 // figure out which nodes an atom is attached to. The atom contains symbolic
159 // references to other nodes by name (e.g., `extends SomeClass`) in the form
160 // of @{class:DivinerAtomRefs}. We can also build a symbolic reference for
161 // any atom from the atom itself. Each @{class:DivinerAtomRef} generates a
162 // symbol hash, which ends with an "S", for "symbol".
163 //
164 // First, we update the symbol map. We remove (and mark dirty) any symbols
165 // associated with node hashes which no longer exist (e.g., old/dead nodes).
166 // Second, we add (and mark dirty) any symbols associated with new nodes.
167 // We also add edges defined by new nodes to the graph.
168 //
169 // We initialize a list of dirty nodes to the list of new nodes, then find
170 // all nodes connected to dirty symbols and add them to the dirty node list.
171 // This list now contains every node with a new or changed graph hash.
172 //
173 // We walk the dirty list and compute the new graph hashes, adding them
174 // to the graph hash map. This Graph Map can then be passed to an actual
175 // documentation generator, which can compare the graph hashes to a list
176 // of already-generated graph hashes and easily assess which documents need
177 // to be regenerated and which can be deleted.
178
179 $this->buildAtomCache();
180 $this->buildGraphCache();
181
182 $publisher_class = $args->getArg('publisher');
183 $symbols = id(new PhutilSymbolLoader())
184 ->setName($publisher_class)
185 ->setConcreteOnly(true)
186 ->setAncestorClass(DivinerPublisher::class)
187 ->selectAndLoadSymbols();
188
189 if (!$symbols) {
190 throw new PhutilArgumentUsageException(
191 pht(
192 "Publisher class '%s' must be a concrete subclass of %s.",
193 $publisher_class,
194 'DivinerPublisher'));
195 }
196 $publisher = newv($publisher_class, array());
197
198 $identifier = $args->getArg('repository');
199 $repository = null;
200 if (phutil_nonempty_string($identifier)) {
201 $repository = id(new PhabricatorRepositoryQuery())
202 ->setViewer(PhabricatorUser::getOmnipotentUser())
203 ->withIdentifiers(array($identifier))
204 ->executeOne();
205
206 if (!$repository) {
207 throw new PhutilArgumentUsageException(
208 pht(
209 'Repository "%s" does not exist.',
210 $identifier));
211 }
212
213 $publisher->setRepositoryPHID($repository->getPHID());
214 }
215
216 $this->publishDocumentation($args->getArg('clean'), $publisher);
217 }
218
219
220/* -( Atom Cache )--------------------------------------------------------- */
221
222
223 private function buildAtomCache() {
224 $this->log(pht('BUILDING ATOM CACHE'));
225
226 $file_hashes = $this->findFilesInProject();
227 $this->log(
228 pht(
229 'Found %s file(s) in project.',
230 phutil_count($file_hashes)));
231
232 $this->deleteDeadAtoms($file_hashes);
233 $atomize = $this->getFilesToAtomize($file_hashes);
234 $this->log(
235 pht(
236 'Found %s unatomized, uncached file(s).',
237 phutil_count($atomize)));
238
239 $file_atomizers = $this->getAtomizersForFiles($atomize);
240 $this->log(
241 pht(
242 'Found %s file(s) to atomize.',
243 phutil_count($file_atomizers)));
244
245 $futures = $this->buildAtomizerFutures($file_atomizers);
246 $this->log(
247 pht(
248 'Atomizing %s file(s).',
249 phutil_count($file_atomizers)));
250
251 if ($futures) {
252 $this->resolveAtomizerFutures($futures, $file_hashes);
253 $this->log(pht('Atomization complete.'));
254 } else {
255 $this->log(pht('Atom cache is up to date, no files to atomize.'));
256 }
257
258 $this->log(pht('Writing atom cache.'));
259 $this->getAtomCache()->saveAtoms();
260 $this->log(pht('Done.')."\n");
261 }
262
263 private function getAtomizersForFiles(array $files) {
264 $rules = $this->getRules();
265 $exclude = $this->getExclude();
266 $atomizers = array();
267
268 foreach ($files as $file) {
269 foreach ($exclude as $pattern) {
270 if (preg_match($pattern, $file)) {
271 continue 2;
272 }
273 }
274
275 foreach ($rules as $rule => $atomizer) {
276 $ok = preg_match($rule, $file);
277 if ($ok === false) {
278 throw new Exception(
279 pht("Rule '%s' is not a valid regular expression.", $rule));
280 }
281 if ($ok) {
282 $atomizers[$file] = $atomizer;
283 continue;
284 }
285 }
286 }
287
288 return $atomizers;
289 }
290
291 private function getRules() {
292 return $this->getConfig('rules', array(
293 '/\\.diviner$/' => 'DivinerArticleAtomizer',
294 '/\\.php$/' => 'DivinerPHPAtomizer',
295 ));
296 }
297
298 private function getExclude() {
299 $exclude = (array)$this->getConfig('exclude', array());
300 return $exclude;
301 }
302
303 private function findFilesInProject() {
304 $raw_hashes = id(new FileFinder($this->getConfig('root')))
305 ->excludePath('*/.*')
306 ->withType('f')
307 ->setGenerateChecksums(true)
308 ->find();
309
310 $version = $this->getDivinerAtomWorldVersion();
311
312 $file_hashes = array();
313 foreach ($raw_hashes as $file => $md5_hash) {
314 $rel_file = Filesystem::readablePath($file, $this->getConfig('root'));
315 // We want the hash to change if the file moves or Diviner gets updated,
316 // not just if the file content changes. Derive a hash from everything
317 // we care about.
318 $file_hashes[$rel_file] = md5("{$rel_file}\0{$md5_hash}\0{$version}").'F';
319 }
320
321 return $file_hashes;
322 }
323
324 private function deleteDeadAtoms(array $file_hashes) {
325 $atom_cache = $this->getAtomCache();
326
327 $hash_to_file = array_flip($file_hashes);
328 foreach ($atom_cache->getFileHashMap() as $hash => $atom) {
329 if (empty($hash_to_file[$hash])) {
330 $atom_cache->deleteFileHash($hash);
331 }
332 }
333 }
334
335 private function getFilesToAtomize(array $file_hashes) {
336 $atom_cache = $this->getAtomCache();
337
338 $atomize = array();
339 foreach ($file_hashes as $file => $hash) {
340 if (!$atom_cache->fileHashExists($hash)) {
341 $atomize[] = $file;
342 }
343 }
344
345 return $atomize;
346 }
347
348 private function buildAtomizerFutures(array $file_atomizers) {
349 $atomizers = array();
350 foreach ($file_atomizers as $file => $atomizer) {
351 $atomizers[$atomizer][] = $file;
352 }
353
354 $root = dirname(phutil_get_library_root('phorge'));
355 $config_root = $this->getConfig('root');
356
357 $bar = id(new PhutilConsoleProgressBar())
358 ->setTotal(count($file_atomizers));
359
360 $futures = array();
361 foreach ($atomizers as $class => $files) {
362 foreach (array_chunk($files, 32) as $chunk) {
363 $future = new ExecFuture(
364 '%s atomize --ugly --book %s --atomizer %s -- %Ls',
365 $root.'/bin/diviner',
366 $this->getBookConfigPath(),
367 $class,
368 $chunk);
369
370 $futures[] = $future;
371
372 $bar->update(count($chunk));
373 }
374 }
375
376 $bar->done();
377
378 return $futures;
379 }
380
381 /**
382 * @param array<Future> $futures
383 * @param array<string,string> $file_hashes
384 */
385 private function resolveAtomizerFutures(array $futures, array $file_hashes) {
386 assert_instances_of($futures, Future::class);
387
388 $atom_cache = $this->getAtomCache();
389 $bar = id(new PhutilConsoleProgressBar())
390 ->setTotal(count($futures));
391 $futures = id(new FutureIterator($futures))
392 ->limit(4);
393
394 foreach ($futures as $key => $future) {
395 try {
396 $atoms = $future->resolveJSON();
397
398 foreach ($atoms as $atom) {
399 if ($atom['type'] == DivinerAtom::TYPE_FILE) {
400 $file_hash = $file_hashes[$atom['file']];
401 $atom_cache->addFileHash($file_hash, $atom['hash']);
402 }
403 $atom_cache->addAtom($atom);
404 }
405 } catch (Exception $e) {
406 phlog($e);
407 }
408
409 $bar->update(1);
410 }
411 $bar->done();
412 }
413
414 /**
415 * Get a global version number, which changes whenever any atom or atomizer
416 * implementation changes in a way which is not backward-compatible.
417 */
418 private function getDivinerAtomWorldVersion() {
419 $version = array();
420 $version['atom'] = DivinerAtom::getAtomSerializationVersion();
421 $version['rules'] = $this->getRules();
422
423 $atomizers = id(new PhutilClassMapQuery())
424 ->setAncestorClass(DivinerAtomizer::class)
425 ->execute();
426
427 $atomizer_versions = array();
428 foreach ($atomizers as $atomizer) {
429 $name = get_class($atomizer);
430 $atomizer_versions[$name] = call_user_func(
431 array(
432 $name,
433 'getAtomizerVersion',
434 ));
435 }
436
437 ksort($atomizer_versions);
438 $version['atomizers'] = $atomizer_versions;
439
440 return md5(serialize($version));
441 }
442
443
444/* -( Graph Cache )-------------------------------------------------------- */
445
446
447 private function buildGraphCache() {
448 $this->log(pht('BUILDING GRAPH CACHE'));
449
450 $atom_cache = $this->getAtomCache();
451 $symbol_map = $atom_cache->getSymbolMap();
452 $atoms = $atom_cache->getAtomMap();
453
454 $dirty_symbols = array();
455 $dirty_nhashes = array();
456
457 $del_atoms = array_diff_key($symbol_map, $atoms);
458 $this->log(
459 pht(
460 'Found %s obsolete atom(s) in graph.',
461 phutil_count($del_atoms)));
462
463 foreach ($del_atoms as $nhash => $shash) {
464 $atom_cache->deleteSymbol($nhash);
465 $dirty_symbols[$shash] = true;
466
467 $atom_cache->deleteEdges($nhash);
468 $atom_cache->deleteGraph($nhash);
469 }
470
471 $new_atoms = array_diff_key($atoms, $symbol_map);
472 $this->log(
473 pht(
474 'Found %s new atom(s) in graph.',
475 phutil_count($new_atoms)));
476
477 foreach ($new_atoms as $nhash => $ignored) {
478 $shash = $this->computeSymbolHash($nhash);
479 $atom_cache->addSymbol($nhash, $shash);
480 $dirty_symbols[$shash] = true;
481
482 $atom_cache->addEdges($nhash, $this->getEdges($nhash));
483
484 $dirty_nhashes[$nhash] = true;
485 }
486
487 $this->log(pht('Propagating changes through the graph.'));
488
489 // Find all the nodes which point at a dirty node, and dirty them. Then
490 // find all the nodes which point at those nodes and dirty them, and so
491 // on. (This is slightly overkill since we probably don't need to propagate
492 // dirtiness across documentation "links" between symbols, but we do want
493 // to propagate it across "extends", and we suffer only a little bit of
494 // collateral damage by over-dirtying as long as the documentation isn't
495 // too well-connected.)
496
497 $symbol_stack = array_keys($dirty_symbols);
498 while ($symbol_stack) {
499 $symbol_hash = array_pop($symbol_stack);
500
501 foreach ($atom_cache->getEdgesWithDestination($symbol_hash) as $edge) {
502 $dirty_nhashes[$edge] = true;
503 $src_hash = $this->computeSymbolHash($edge);
504 if (empty($dirty_symbols[$src_hash])) {
505 $dirty_symbols[$src_hash] = true;
506 $symbol_stack[] = $src_hash;
507 }
508 }
509 }
510
511 $this->log(
512 pht(
513 'Found %s affected atoms.',
514 phutil_count($dirty_nhashes)));
515
516 foreach ($dirty_nhashes as $nhash => $ignored) {
517 $atom_cache->addGraph($nhash, $this->computeGraphHash($nhash));
518 }
519
520 $this->log(pht('Writing graph cache.'));
521
522 $atom_cache->saveGraph();
523 $atom_cache->saveEdges();
524 $atom_cache->saveSymbols();
525
526 $this->log(pht('Done.')."\n");
527 }
528
529 private function computeSymbolHash($node_hash) {
530 $atom_cache = $this->getAtomCache();
531 $atom = $atom_cache->getAtom($node_hash);
532
533 if (!$atom) {
534 throw new Exception(
535 pht("No such atom with node hash '%s'!", $node_hash));
536 }
537
538 $ref = DivinerAtomRef::newFromDictionary($atom['ref']);
539 return $ref->toHash();
540 }
541
542 private function getEdges($node_hash) {
543 $atom_cache = $this->getAtomCache();
544 $atom = $atom_cache->getAtom($node_hash);
545
546 $refs = array();
547
548 // Make the atom depend on its own symbol, so that all atoms with the same
549 // symbol are dirtied (e.g., if a codebase defines the function `f()`
550 // several times, all of them should be dirtied when one is dirtied).
551 $refs[DivinerAtomRef::newFromDictionary($atom)->toHash()] = true;
552
553 foreach (array_merge($atom['extends'], $atom['links']) as $ref_dict) {
554 $ref = DivinerAtomRef::newFromDictionary($ref_dict);
555 if ($ref->getBook() == $atom['book']) {
556 $refs[$ref->toHash()] = true;
557 }
558 }
559
560 return array_keys($refs);
561 }
562
563 private function computeGraphHash($node_hash) {
564 $atom_cache = $this->getAtomCache();
565 $atom = $atom_cache->getAtom($node_hash);
566
567 $edges = $this->getEdges($node_hash);
568 sort($edges);
569
570 $inputs = array(
571 'atomHash' => $atom['hash'],
572 'edges' => $edges,
573 );
574
575 return md5(serialize($inputs)).'G';
576 }
577
578 private function publishDocumentation($clean, DivinerPublisher $publisher) {
579 $atom_cache = $this->getAtomCache();
580 $graph_map = $atom_cache->getGraphMap();
581
582 $this->log(pht('PUBLISHING DOCUMENTATION'));
583
584 $publisher
585 ->setDropCaches($clean)
586 ->setConfig($this->getAllConfig())
587 ->setAtomCache($atom_cache)
588 ->setRenderer(new DivinerDefaultRenderer())
589 ->publishAtoms(array_values($graph_map));
590
591 $this->log(pht('Done.'));
592 }
593
594}