@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.)
hq.recaptime.dev/wiki/Phorge
phorge
phabricator
1<?php
2
3/**
4 * Manages markup engine selection, configuration, application, caching and
5 * pipelining.
6 *
7 * @{class:PhabricatorMarkupEngine} can be used to render objects which
8 * implement @{interface:PhabricatorMarkupInterface} in a batched, cache-aware
9 * way. For example, if you have a list of comments written in remarkup (and
10 * the objects implement the correct interface) you can render them by first
11 * building an engine and adding the fields with @{method:addObject}.
12 *
13 * $field = 'field:body'; // Field you want to render. Each object exposes
14 * // one or more fields of markup.
15 *
16 * $engine = new PhabricatorMarkupEngine();
17 * foreach ($comments as $comment) {
18 * $engine->addObject($comment, $field);
19 * }
20 *
21 * Now, call @{method:process} to perform the actual cache/rendering
22 * step. This is a heavyweight call which does batched data access and
23 * transforms the markup into output.
24 *
25 * $engine->process();
26 *
27 * Finally, do something with the results:
28 *
29 * $results = array();
30 * foreach ($comments as $comment) {
31 * $results[] = $engine->getOutput($comment, $field);
32 * }
33 *
34 * If you have a single object to render, you can use the convenience method
35 * @{method:renderOneObject}.
36 *
37 * @task markup Markup Pipeline
38 * @task engine Engine Construction
39 */
40final class PhabricatorMarkupEngine extends Phobject {
41
42 private $objects = array();
43 private $viewer;
44 private $contextObject;
45 private $version = 21;
46 private $engineCaches = array();
47 private $auxiliaryConfig = array();
48
49 private static $engineStack = array();
50
51
52/* -( Markup Pipeline )---------------------------------------------------- */
53
54
55 /**
56 * Convenience method for pushing a single object through the markup
57 * pipeline.
58 *
59 * @param PhabricatorMarkupInterface $object The object to render.
60 * @param string $field The field to render.
61 * @param PhabricatorUser $viewer User viewing the markup.
62 * @param object $context_object (optional) A context
63 * object for policy checks.
64 * @return string Marked up output.
65 * @task markup
66 */
67 public static function renderOneObject(
68 PhabricatorMarkupInterface $object,
69 $field,
70 PhabricatorUser $viewer,
71 $context_object = null) {
72 return id(new PhabricatorMarkupEngine())
73 ->setViewer($viewer)
74 ->setContextObject($context_object)
75 ->addObject($object, $field)
76 ->process()
77 ->getOutput($object, $field);
78 }
79
80
81 /**
82 * Queue an object for markup generation when @{method:process} is
83 * called. You can retrieve the output later with @{method:getOutput}.
84 *
85 * @param PhabricatorMarkupInterface $object The object to render.
86 * @param string $field The field to render.
87 * @return $this
88 * @task markup
89 */
90 public function addObject(PhabricatorMarkupInterface $object, $field) {
91 $key = $this->getMarkupFieldKey($object, $field);
92 $this->objects[$key] = array(
93 'object' => $object,
94 'field' => $field,
95 );
96
97 return $this;
98 }
99
100
101 /**
102 * Process objects queued with @{method:addObject}. You can then retrieve
103 * the output with @{method:getOutput}.
104 *
105 * @return $this
106 * @task markup
107 */
108 public function process() {
109 self::$engineStack[] = $this;
110
111 try {
112 $result = $this->execute();
113 } finally {
114 array_pop(self::$engineStack);
115 }
116
117 return $result;
118 }
119
120 public static function isRenderingEmbeddedContent() {
121 // See T13678. This prevents cycles when rendering embedded content that
122 // itself has remarkup fields.
123 return (count(self::$engineStack) > 1);
124 }
125
126 private function execute() {
127 $keys = array();
128 foreach ($this->objects as $key => $info) {
129 if (!isset($info['markup'])) {
130 $keys[] = $key;
131 }
132 }
133
134 if (!$keys) {
135 return $this;
136 }
137
138 $objects = array_select_keys($this->objects, $keys);
139
140 // Build all the markup engines. We need an engine for each field whether
141 // we have a cache or not, since we still need to postprocess the cache.
142 $engines = array();
143 foreach ($objects as $key => $info) {
144 $engines[$key] = $info['object']->newMarkupEngine($info['field']);
145 $engines[$key]->setConfig('viewer', $this->viewer);
146 $engines[$key]->setConfig('contextObject', $this->contextObject);
147
148 foreach ($this->auxiliaryConfig as $aux_key => $aux_value) {
149 $engines[$key]->setConfig($aux_key, $aux_value);
150 }
151 }
152
153 // Load or build the preprocessor caches.
154 $blocks = $this->loadPreprocessorCaches($engines, $objects);
155 $blocks = mpull($blocks, 'getCacheData');
156
157 $this->engineCaches = $blocks;
158
159 // Finalize the output.
160 foreach ($objects as $key => $info) {
161 $engine = $engines[$key];
162 $field = $info['field'];
163 $object = $info['object'];
164
165 $output = $engine->postprocessText($blocks[$key]);
166 $output = $object->didMarkupText($field, $output, $engine);
167 $this->objects[$key]['output'] = $output;
168 }
169
170 return $this;
171 }
172
173
174 /**
175 * Get the output of markup processing for a field queued with
176 * @{method:addObject}. Before you can call this method, you must call
177 * @{method:process}.
178 *
179 * @param PhabricatorMarkupInterface $object The object to retrieve.
180 * @param string $field The field to retrieve.
181 * @return string Processed output.
182 * @task markup
183 */
184 public function getOutput(PhabricatorMarkupInterface $object, $field) {
185 $key = $this->getMarkupFieldKey($object, $field);
186 $this->requireKeyProcessed($key);
187
188 return $this->objects[$key]['output'];
189 }
190
191
192 /**
193 * Retrieve engine metadata for a given field.
194 *
195 * @param PhabricatorMarkupInterface $object The object to retrieve.
196 * @param string $field The field to retrieve.
197 * @param string $metadata_key The engine metadata field
198 * to retrieve.
199 * @param mixed $default (optional) Default value.
200 * @task markup
201 */
202 public function getEngineMetadata(
203 PhabricatorMarkupInterface $object,
204 $field,
205 $metadata_key,
206 $default = null) {
207
208 $key = $this->getMarkupFieldKey($object, $field);
209 $this->requireKeyProcessed($key);
210
211 return idx($this->engineCaches[$key]['metadata'], $metadata_key, $default);
212 }
213
214
215 /**
216 * @task markup
217 */
218 private function requireKeyProcessed($key) {
219 if (empty($this->objects[$key])) {
220 throw new Exception(
221 pht(
222 "Call %s before using results (key = '%s').",
223 'addObject()',
224 $key));
225 }
226
227 if (!isset($this->objects[$key]['output'])) {
228 throw new PhutilInvalidStateException('process');
229 }
230 }
231
232
233 /**
234 * @task markup
235 */
236 private function getMarkupFieldKey(
237 PhabricatorMarkupInterface $object,
238 $field) {
239
240 static $custom;
241 if ($custom === null) {
242 $custom = array_merge(
243 self::loadCustomInlineRules(),
244 self::loadCustomBlockRules());
245
246 $custom = mpull($custom, 'getRuleVersion', null);
247 ksort($custom);
248 $custom = PhabricatorHash::digestForIndex(serialize($custom));
249 }
250
251 return $object->getMarkupFieldKey($field).'@'.$this->version.'@'.$custom;
252 }
253
254
255 /**
256 * @task markup
257 */
258 private function loadPreprocessorCaches(array $engines, array $objects) {
259 $blocks = array();
260
261 $use_cache = array();
262 foreach ($objects as $key => $info) {
263 if ($info['object']->shouldUseMarkupCache($info['field'])) {
264 $use_cache[$key] = true;
265 }
266 }
267
268 if ($use_cache) {
269 try {
270 $blocks = id(new PhabricatorMarkupCache())->loadAllWhere(
271 'cacheKey IN (%Ls)',
272 array_keys($use_cache));
273 $blocks = mpull($blocks, null, 'getCacheKey');
274 } catch (Exception $ex) {
275 phlog($ex);
276 }
277 }
278
279 $is_readonly = PhabricatorEnv::isReadOnly();
280
281 foreach ($objects as $key => $info) {
282 // False check in case MySQL doesn't support unicode characters
283 // in the string (T1191), resulting in unserialize returning false.
284 if (isset($blocks[$key]) && $blocks[$key]->getCacheData() !== false) {
285 // If we already have a preprocessing cache, we don't need to rebuild
286 // it.
287 continue;
288 }
289
290 $text = $info['object']->getMarkupText($info['field']);
291 $data = $engines[$key]->preprocessText($text);
292
293 // NOTE: This is just debugging information to help sort out cache issues.
294 // If one machine is misconfigured and poisoning caches you can use this
295 // field to hunt it down.
296
297 $metadata = array(
298 'host' => php_uname('n'),
299 );
300
301 $blocks[$key] = id(new PhabricatorMarkupCache())
302 ->setCacheKey($key)
303 ->setCacheData($data)
304 ->setMetadata($metadata);
305
306 if (isset($use_cache[$key]) && !$is_readonly) {
307 // This is just filling a cache and always safe, even on a read pathway.
308 $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites();
309 $blocks[$key]->replace();
310 unset($unguarded);
311 }
312 }
313
314 return $blocks;
315 }
316
317
318 /**
319 * Set the viewing user. Used to implement object permissions.
320 *
321 * @param PhabricatorUser $viewer The viewing user.
322 * @return $this
323 * @task markup
324 */
325 public function setViewer(PhabricatorUser $viewer) {
326 $this->viewer = $viewer;
327 return $this;
328 }
329
330 /**
331 * Set the context object. Used to implement object permissions.
332 *
333 * @param $object The object in which context this remarkup is used.
334 * @return $this
335 * @task markup
336 */
337 public function setContextObject($object) {
338 $this->contextObject = $object;
339 return $this;
340 }
341
342 public function setAuxiliaryConfig($key, $value) {
343 // TODO: This is gross and should be removed. Avoid use.
344 $this->auxiliaryConfig[$key] = $value;
345 return $this;
346 }
347
348
349/* -( Engine Construction )------------------------------------------------ */
350
351
352
353 /**
354 * @task engine
355 */
356 public static function newManiphestMarkupEngine() {
357 return self::newMarkupEngine(array(
358 ));
359 }
360
361
362 /**
363 * @task engine
364 */
365 public static function newPhrictionMarkupEngine() {
366 return self::newMarkupEngine(array(
367 'header.generate-toc' => true,
368 ));
369 }
370
371
372 /**
373 * @task engine
374 */
375 public static function newPhameMarkupEngine() {
376 return self::newMarkupEngine(
377 array(
378 'macros' => false,
379 'uri.full' => true,
380 'uri.same-window' => true,
381 'uri.base' => PhabricatorEnv::getURI('/'),
382 ));
383 }
384
385
386 /**
387 * @task engine
388 */
389 public static function newFeedMarkupEngine() {
390 return self::newMarkupEngine(
391 array(
392 'macros' => false,
393 'youtube' => false,
394 ));
395 }
396
397 /**
398 * @task engine
399 */
400 public static function newCalendarMarkupEngine() {
401 return self::newMarkupEngine(array(
402 ));
403 }
404
405
406 /**
407 * @task engine
408 */
409 public static function newDifferentialMarkupEngine(array $options = array()) {
410 return self::newMarkupEngine(array(
411 'differential.diff' => idx($options, 'differential.diff'),
412 ));
413 }
414
415
416 /**
417 * @task engine
418 */
419 public static function newDiffusionMarkupEngine(array $options = array()) {
420 return self::newMarkupEngine(array(
421 'header.generate-toc' => true,
422 ));
423 }
424
425 /**
426 * @task engine
427 */
428 public static function getEngine($ruleset = 'default') {
429 static $engines = array();
430 if (isset($engines[$ruleset])) {
431 return $engines[$ruleset];
432 }
433
434 $engine = null;
435 switch ($ruleset) {
436 case 'default':
437 $engine = self::newMarkupEngine(array());
438 break;
439 case 'feed':
440 $engine = self::newMarkupEngine(array());
441 $engine->setConfig('autoplay.disable', true);
442 break;
443 case 'nolinebreaks':
444 $engine = self::newMarkupEngine(array());
445 $engine->setConfig('preserve-linebreaks', false);
446 break;
447 case 'diffusion-readme':
448 $engine = self::newMarkupEngine(array());
449 $engine->setConfig('preserve-linebreaks', false);
450 $engine->setConfig('header.generate-toc', true);
451 break;
452 case 'diviner':
453 $engine = self::newMarkupEngine(array());
454 $engine->setConfig('preserve-linebreaks', false);
455 // $engine->setConfig('diviner.renderer', new DivinerDefaultRenderer());
456 $engine->setConfig('header.generate-toc', true);
457 break;
458 case 'extract':
459 // Engine used for reference/edge extraction. Turn off anything which
460 // is slow and doesn't change reference extraction.
461 $engine = self::newMarkupEngine(array());
462 $engine->setConfig('pygments.enabled', false);
463 break;
464 default:
465 throw new Exception(pht('Unknown engine ruleset: %s!', $ruleset));
466 }
467
468 $engines[$ruleset] = $engine;
469 return $engine;
470 }
471
472 /**
473 * @task engine
474 */
475 private static function getMarkupEngineDefaultConfiguration() {
476 return array(
477 'pygments' => PhabricatorEnv::getEnvConfig('pygments.enabled'),
478 'youtube' => PhabricatorEnv::getEnvConfig(
479 'remarkup.enable-embedded-youtube'),
480 'differential.diff' => null,
481 'header.generate-toc' => false,
482 'macros' => true,
483 'uri.allowed-protocols' => PhabricatorEnv::getEnvConfig(
484 'uri.allowed-protocols'),
485 'uri.full' => false,
486 'syntax-highlighter.engine' => PhabricatorEnv::getEnvConfig(
487 'syntax-highlighter.engine'),
488 'preserve-linebreaks' => true,
489 );
490 }
491
492
493 /**
494 * @task engine
495 */
496 public static function newMarkupEngine(array $options) {
497 $options += self::getMarkupEngineDefaultConfiguration();
498
499 $engine = new PhutilRemarkupEngine();
500
501 $engine->setConfig('preserve-linebreaks', $options['preserve-linebreaks']);
502
503 $engine->setConfig('pygments.enabled', $options['pygments']);
504 $engine->setConfig(
505 'uri.allowed-protocols',
506 $options['uri.allowed-protocols']);
507 $engine->setConfig('differential.diff', $options['differential.diff']);
508 $engine->setConfig('header.generate-toc', $options['header.generate-toc']);
509 $engine->setConfig(
510 'syntax-highlighter.engine',
511 $options['syntax-highlighter.engine']);
512
513 $style_map = id(new PhabricatorDefaultSyntaxStyle())
514 ->getRemarkupStyleMap();
515 $engine->setConfig('phutil.codeblock.style-map', $style_map);
516
517 $engine->setConfig('uri.full', $options['uri.full']);
518
519 if (isset($options['uri.base'])) {
520 $engine->setConfig('uri.base', $options['uri.base']);
521 }
522
523 if (isset($options['uri.same-window'])) {
524 $engine->setConfig('uri.same-window', $options['uri.same-window']);
525 }
526
527 $rules = array();
528 $rules[] = new PhutilRemarkupEscapeRemarkupRule();
529 $rules[] = new PhutilRemarkupEvalRule();
530 $rules[] = new PhutilRemarkupMonospaceRule();
531 $rules[] = new PhutilRemarkupHexColorCodeRule();
532
533 $rules[] = new PhutilRemarkupDocumentLinkRule();
534 $rules[] = new PhabricatorNavigationRemarkupRule();
535 $rules[] = new PhabricatorKeyboardRemarkupRule();
536 $rules[] = new PhabricatorConfigRemarkupRule();
537
538 if ($options['youtube']) {
539 $rules[] = new PhabricatorYoutubeRemarkupRule();
540 }
541
542 $rules[] = new PhabricatorIconRemarkupRule();
543 $rules[] = new PhabricatorEmojiRemarkupRule();
544 $rules[] = new PhabricatorHandleRemarkupRule();
545
546 $applications = PhabricatorApplication::getAllInstalledApplications();
547 foreach ($applications as $application) {
548 foreach ($application->getRemarkupRules() as $rule) {
549 $rules[] = $rule;
550 }
551 }
552
553 $rules[] = new PhutilRemarkupHyperlinkRule();
554
555 if ($options['macros']) {
556 $rules[] = new PhabricatorImageMacroRemarkupRule();
557 $rules[] = new PhabricatorMemeRemarkupRule();
558 }
559
560 $rules[] = new PhutilRemarkupBoldRule();
561 $rules[] = new PhutilRemarkupItalicRule();
562 $rules[] = new PhutilRemarkupDelRule();
563 $rules[] = new PhutilRemarkupUnderlineRule();
564 $rules[] = new PhutilRemarkupHighlightRule();
565 $rules[] = new PhutilRemarkupAnchorRule();
566
567 foreach (self::loadCustomInlineRules() as $rule) {
568 $rules[] = clone $rule;
569 }
570
571 $blocks = array();
572 $blocks[] = new PhutilRemarkupQuotesBlockRule();
573 $blocks[] = new PhutilRemarkupReplyBlockRule();
574 $blocks[] = new PhutilRemarkupLiteralBlockRule();
575 $blocks[] = new PhutilRemarkupHeaderBlockRule();
576 $blocks[] = new PhutilRemarkupHorizontalRuleBlockRule();
577 $blocks[] = new PhutilRemarkupListBlockRule();
578 $blocks[] = new PhutilRemarkupCodeBlockRule();
579 $blocks[] = new PhutilRemarkupNoteBlockRule();
580 $blocks[] = new PhutilRemarkupTableBlockRule();
581 $blocks[] = new PhutilRemarkupSimpleTableBlockRule();
582 $blocks[] = new PhutilRemarkupInterpreterBlockRule();
583 $blocks[] = new PhutilRemarkupDefaultBlockRule();
584
585 foreach (self::loadCustomBlockRules() as $rule) {
586 $blocks[] = $rule;
587 }
588
589 foreach ($blocks as $block) {
590 $block->setMarkupRules($rules);
591 }
592
593 $engine->setBlockRules($blocks);
594
595 return $engine;
596 }
597
598 public static function extractPHIDsFromMentions(
599 PhabricatorUser $viewer,
600 array $content_blocks) {
601
602 $mentions = array();
603
604 $engine = self::newDifferentialMarkupEngine();
605 $engine->setConfig('viewer', $viewer);
606
607 foreach ($content_blocks as $content_block) {
608 if ($content_block === null) {
609 continue;
610 }
611
612 if (!strlen($content_block)) {
613 continue;
614 }
615
616 $engine->markupText($content_block);
617 $phids = $engine->getTextMetadata(
618 PhabricatorMentionRemarkupRule::KEY_MENTIONED,
619 array());
620 $mentions += $phids;
621 }
622
623 return $mentions;
624 }
625
626 public static function extractFilePHIDsFromEmbeddedFiles(
627 PhabricatorUser $viewer,
628 array $content_blocks) {
629 $files = array();
630
631 $engine = self::newDifferentialMarkupEngine();
632 $engine->setConfig('viewer', $viewer);
633
634 foreach ($content_blocks as $content_block) {
635 $engine->markupText($content_block);
636 $phids = $engine->getTextMetadata(
637 PhabricatorEmbedFileRemarkupRule::KEY_ATTACH_INTENT_FILE_PHIDS,
638 array());
639 foreach ($phids as $phid) {
640 $files[$phid] = $phid;
641 }
642 }
643
644 return array_values($files);
645 }
646
647 public static function summarizeSentence($corpus) {
648 $corpus = trim($corpus);
649 $blocks = preg_split('/\n+/', $corpus, 2);
650 $block = head($blocks);
651
652 $sentences = preg_split(
653 '/\b([.?!]+)\B/u',
654 $block,
655 2,
656 PREG_SPLIT_DELIM_CAPTURE);
657
658 if (count($sentences) > 1) {
659 $result = $sentences[0].$sentences[1];
660 } else {
661 $result = head($sentences);
662 }
663
664 return id(new PhutilUTF8StringTruncator())
665 ->setMaximumGlyphs(128)
666 ->truncateString($result);
667 }
668
669 /**
670 * Produce a corpus summary, in a way that shortens the underlying text
671 * without truncating it somewhere awkward.
672 *
673 * TODO: We could do a better job of this.
674 *
675 * @param string $corpus Remarkup corpus to summarize.
676 * @return string Summarized corpus.
677 */
678 public static function summarize($corpus) {
679
680 // Major goals here are:
681 // - Don't split in the middle of a character (utf-8).
682 // - Don't split in the middle of, e.g., **bold** text, since
683 // we end up with hanging '**' in the summary.
684 // - Try not to pick an image macro, header, embedded file, etc.
685 // - Hopefully don't return too much text. We don't explicitly limit
686 // this right now.
687
688 $blocks = preg_split("/\n *\n\s*/", $corpus);
689
690 $best = null;
691 foreach ($blocks as $block) {
692 // This is a test for normal spaces in the block, i.e. a heuristic to
693 // distinguish standard paragraphs from things like image macros. It may
694 // not work well for non-latin text. We prefer to summarize with a
695 // paragraph of normal words over an image macro, if possible.
696 $has_space = preg_match('/\w\s\w/', $block);
697
698 // This is a test to find embedded images and headers. We prefer to
699 // summarize with a normal paragraph over a header or an embedded object,
700 // if possible.
701 $has_embed = preg_match('/^[{=]/', $block);
702
703 if ($has_space && !$has_embed) {
704 // This seems like a good summary, so return it.
705 return $block;
706 }
707
708 if (!$best) {
709 // This is the first block we found; if everything is garbage just
710 // use the first block.
711 $best = $block;
712 }
713 }
714
715 return $best;
716 }
717
718 private static function loadCustomInlineRules() {
719 return id(new PhutilClassMapQuery())
720 ->setAncestorClass(PhabricatorRemarkupCustomInlineRule::class)
721 ->execute();
722 }
723
724 private static function loadCustomBlockRules() {
725 return id(new PhutilClassMapQuery())
726 ->setAncestorClass(PhabricatorRemarkupCustomBlockRule::class)
727 ->execute();
728 }
729
730 public static function digestRemarkupContent($object, $content) {
731 $parts = array();
732 $parts[] = get_class($object);
733
734 if ($object instanceof PhabricatorLiskDAO) {
735 $parts[] = $object->getID();
736 }
737
738 $parts[] = $content;
739
740 $message = implode("\n", $parts);
741
742 return PhabricatorHash::digestWithNamedKey($message, 'remarkup');
743 }
744
745}