@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.)
hq.recaptime.dev/wiki/Phorge
phorge
phabricator
1<?php
2
3final class PhutilRemarkupEngine extends PhutilMarkupEngine {
4
5 const MODE_DEFAULT = 0;
6 const MODE_TEXT = 1;
7 const MODE_HTML_MAIL = 2;
8
9 const MAX_CHILD_DEPTH = 32;
10
11 private $blockRules = array();
12 private $config = array();
13 private $mode;
14 private $metadata = array();
15 private $states = array();
16 private $postprocessRules = array();
17 private $storage;
18
19 public function setConfig($key, $value) {
20 $this->config[$key] = $value;
21 return $this;
22 }
23
24 public function getConfig($key, $default = null) {
25 return idx($this->config, $key, $default);
26 }
27
28 public function setMode($mode) {
29 $this->mode = $mode;
30 return $this;
31 }
32
33 public function isTextMode() {
34 return $this->mode & self::MODE_TEXT;
35 }
36
37 public function isAnchorMode() {
38 return $this->getState('toc');
39 }
40
41 public function isHTMLMailMode() {
42 return $this->mode & self::MODE_HTML_MAIL;
43 }
44
45 public function getQuoteDepth() {
46 return $this->getConfig('runtime.quote.depth', 0);
47 }
48
49 public function setQuoteDepth($depth) {
50 return $this->setConfig('runtime.quote.depth', $depth);
51 }
52
53 /**
54 * @param array<PhutilRemarkupBlockRule> $rules
55 */
56 public function setBlockRules(array $rules) {
57 assert_instances_of($rules, PhutilRemarkupBlockRule::class);
58
59 $rules = msortv($rules, 'getPriorityVector');
60
61 $this->blockRules = $rules;
62 foreach ($this->blockRules as $rule) {
63 $rule->setEngine($this);
64 }
65
66 $post_rules = array();
67 foreach ($this->blockRules as $block_rule) {
68 foreach ($block_rule->getMarkupRules() as $rule) {
69 $key = $rule->getPostprocessKey();
70 if ($key !== null) {
71 $post_rules[$key] = $rule;
72 }
73 }
74 }
75
76 $this->postprocessRules = $post_rules;
77
78 return $this;
79 }
80
81 public function getTextMetadata($key, $default = null) {
82 if (isset($this->metadata[$key])) {
83 return $this->metadata[$key];
84 }
85 return idx($this->metadata, $key, $default);
86 }
87
88 public function setTextMetadata($key, $value) {
89 $this->metadata[$key] = $value;
90 return $this;
91 }
92
93 public function storeText($text) {
94 if ($this->isTextMode()) {
95 $text = phutil_safe_html($text);
96 }
97 return $this->storage->store($text);
98 }
99
100 public function overwriteStoredText($token, $new_text) {
101 if ($this->isTextMode()) {
102 $new_text = phutil_safe_html($new_text);
103 }
104 $this->storage->overwrite($token, $new_text);
105 return $this;
106 }
107
108 public function markupText($text) {
109 return $this->postprocessText($this->preprocessText($text));
110 }
111
112 public function pushState($state) {
113 if (empty($this->states[$state])) {
114 $this->states[$state] = 0;
115 }
116 $this->states[$state]++;
117 return $this;
118 }
119
120 public function popState($state) {
121 if (empty($this->states[$state])) {
122 throw new Exception(pht("State '%s' pushed more than popped!", $state));
123 }
124 $this->states[$state]--;
125 if (!$this->states[$state]) {
126 unset($this->states[$state]);
127 }
128 return $this;
129 }
130
131 public function getState($state) {
132 return !empty($this->states[$state]);
133 }
134
135 public function preprocessText($text) {
136 $this->metadata = array();
137 $this->storage = new PhutilRemarkupBlockStorage();
138
139 $blocks = $this->splitTextIntoBlocks($text);
140
141 $output = array();
142 foreach ($blocks as $block) {
143 $output[] = $this->markupBlock($block);
144 }
145 $output = $this->flattenOutput($output);
146
147 $map = $this->storage->getMap();
148 $this->storage = null;
149 $metadata = $this->metadata;
150
151
152 return array(
153 'output' => $output,
154 'storage' => $map,
155 'metadata' => $metadata,
156 );
157 }
158
159 private function splitTextIntoBlocks($text, $depth = 0) {
160 // Apply basic block and paragraph normalization to the text. NOTE: We don't
161 // strip trailing whitespace because it is semantic in some contexts,
162 // notably inlined diffs that the author intends to show as a code block.
163 $text = phutil_split_lines($text, true);
164 $block_rules = $this->blockRules;
165 $blocks = array();
166 $cursor = 0;
167
168 $can_merge = array();
169 foreach ($block_rules as $key => $block_rule) {
170 if ($block_rule instanceof PhutilRemarkupDefaultBlockRule) {
171 $can_merge[$key] = true;
172 }
173 }
174
175 $last_block = null;
176 $last_block_key = -1;
177
178 // See T13487. For very large inputs, block separation can dominate
179 // runtime. This is written somewhat clumsily to attempt to handle
180 // very large inputs as gracefully as is practical.
181
182 while (isset($text[$cursor])) {
183 $starting_cursor = $cursor;
184 foreach ($block_rules as $block_key => $block_rule) {
185 $num_lines = $block_rule->getMatchingLineCount($text, $cursor);
186
187 if ($num_lines) {
188 $current_block = array(
189 'start' => $cursor,
190 'num_lines' => $num_lines,
191 'rule' => $block_rule,
192 'empty' => self::isEmptyBlock($text, $cursor, $num_lines),
193 'children' => array(),
194 'merge' => isset($can_merge[$block_key]),
195 );
196
197 $should_merge = self::shouldMergeParagraphBlocks(
198 $text,
199 $last_block,
200 $current_block);
201
202 if ($should_merge) {
203 $last_block['num_lines'] =
204 ($last_block['num_lines'] + $current_block['num_lines']);
205
206 $last_block['empty'] =
207 ($last_block['empty'] && $current_block['empty']);
208
209 $blocks[$last_block_key] = $last_block;
210 } else {
211 $blocks[] = $current_block;
212
213 $last_block = $current_block;
214 $last_block_key++;
215 }
216
217 $cursor += $num_lines;
218
219 break;
220 }
221 }
222
223 if ($starting_cursor === $cursor) {
224 throw new Exception(pht('Block in text did not match any block rule.'));
225 }
226 }
227
228 // See T13487. It's common for blocks to be small, and this loop seems to
229 // measure as faster if we manually concatenate blocks than if we
230 // "array_slice()" and "implode()" blocks. This is a bit muddy.
231
232 foreach ($blocks as $key => $block) {
233 $min = $block['start'];
234 $max = $min + $block['num_lines'];
235
236 $lines = '';
237 for ($ii = $min; $ii < $max; $ii++) {
238 $lines .= $text[$ii];
239 }
240
241 $blocks[$key]['text'] = $lines;
242 }
243
244 // Stop splitting child blocks apart if we get too deep. This arrests
245 // any blocks which have looping child rules, and stops the stack from
246 // exploding if someone writes a hilarious comment with 5,000 levels of
247 // quoted text.
248
249 if ($depth < self::MAX_CHILD_DEPTH) {
250 foreach ($blocks as $key => $block) {
251 $rule = $block['rule'];
252 if (!$rule->supportsChildBlocks()) {
253 continue;
254 }
255
256 list($parent_text, $child_text) = $rule->extractChildText(
257 $block['text']);
258 $blocks[$key]['text'] = $parent_text;
259 $blocks[$key]['children'] = $this->splitTextIntoBlocks(
260 $child_text,
261 $depth + 1);
262 }
263 }
264
265 return $blocks;
266 }
267
268 private function markupBlock(array $block) {
269 $rule = $block['rule'];
270
271 $rule->willMarkupChildBlocks();
272
273 $children = array();
274 foreach ($block['children'] as $child) {
275 $children[] = $this->markupBlock($child);
276 }
277
278 $rule->didMarkupChildBlocks();
279
280 if ($children) {
281 $children = $this->flattenOutput($children);
282 } else {
283 $children = null;
284 }
285
286 return $rule->markupText($block['text'], $children);
287 }
288
289 private function flattenOutput(array $output) {
290 if ($this->isTextMode()) {
291 $output = implode("\n\n", $output)."\n";
292 } else {
293 $output = phutil_implode_html("\n\n", $output);
294 }
295
296 return $output;
297 }
298
299 private static function shouldMergeParagraphBlocks(
300 $text,
301 $last_block,
302 $current_block) {
303
304 // If we're at the beginning of the input, we can't merge.
305 if ($last_block === null) {
306 return false;
307 }
308
309 // If the previous block wasn't a default block, we can't merge.
310 if (!$last_block['merge']) {
311 return false;
312 }
313
314 // If the current block isn't a default block, we can't merge.
315 if (!$current_block['merge']) {
316 return false;
317 }
318
319 // If the last block was empty, we definitely want to merge.
320 if ($last_block['empty']) {
321 return true;
322 }
323
324 // If this block is empty, we definitely want to merge.
325 if ($current_block['empty']) {
326 return true;
327 }
328
329 // Check if the last line of the previous block or the first line of this
330 // block have any non-whitespace text. If they both do, we're going to
331 // merge.
332
333 // If either of them are a blank line or a line with only whitespace, we
334 // do not merge: this means we've found a paragraph break.
335
336 $tail = $text[$current_block['start'] - 1];
337 $head = $text[$current_block['start']];
338 if (strlen(trim($tail)) && strlen(trim($head))) {
339 return true;
340 }
341
342 return false;
343 }
344
345 private static function isEmptyBlock($text, $start, $num_lines) {
346 for ($cursor = $start; $cursor < $start + $num_lines; $cursor++) {
347 if (strlen(trim($text[$cursor]))) {
348 return false;
349 }
350 }
351 return true;
352 }
353
354 public function postprocessText(array $dict) {
355 $this->metadata = idx($dict, 'metadata', array());
356
357 $this->storage = new PhutilRemarkupBlockStorage();
358 $this->storage->setMap(idx($dict, 'storage', array()));
359
360 foreach ($this->blockRules as $block_rule) {
361 $block_rule->postprocess();
362 }
363
364 foreach ($this->postprocessRules as $rule) {
365 $rule->didMarkupText();
366 }
367
368 return $this->restoreText(idx($dict, 'output'));
369 }
370
371 public function restoreText($text) {
372 return $this->storage->restore($text, $this->isTextMode());
373 }
374}