@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
at recaptime-dev/main 374 lines 9.8 kB view raw
1<?php 2 3final class PhutilRemarkupEngine extends PhutilMarkupEngine { 4 5 const MODE_DEFAULT = 0; 6 const MODE_TEXT = 1; 7 const MODE_HTML_MAIL = 2; 8 9 const MAX_CHILD_DEPTH = 32; 10 11 private $blockRules = array(); 12 private $config = array(); 13 private $mode; 14 private $metadata = array(); 15 private $states = array(); 16 private $postprocessRules = array(); 17 private $storage; 18 19 public function setConfig($key, $value) { 20 $this->config[$key] = $value; 21 return $this; 22 } 23 24 public function getConfig($key, $default = null) { 25 return idx($this->config, $key, $default); 26 } 27 28 public function setMode($mode) { 29 $this->mode = $mode; 30 return $this; 31 } 32 33 public function isTextMode() { 34 return $this->mode & self::MODE_TEXT; 35 } 36 37 public function isAnchorMode() { 38 return $this->getState('toc'); 39 } 40 41 public function isHTMLMailMode() { 42 return $this->mode & self::MODE_HTML_MAIL; 43 } 44 45 public function getQuoteDepth() { 46 return $this->getConfig('runtime.quote.depth', 0); 47 } 48 49 public function setQuoteDepth($depth) { 50 return $this->setConfig('runtime.quote.depth', $depth); 51 } 52 53 /** 54 * @param array<PhutilRemarkupBlockRule> $rules 55 */ 56 public function setBlockRules(array $rules) { 57 assert_instances_of($rules, PhutilRemarkupBlockRule::class); 58 59 $rules = msortv($rules, 'getPriorityVector'); 60 61 $this->blockRules = $rules; 62 foreach ($this->blockRules as $rule) { 63 $rule->setEngine($this); 64 } 65 66 $post_rules = array(); 67 foreach ($this->blockRules as $block_rule) { 68 foreach ($block_rule->getMarkupRules() as $rule) { 69 $key = $rule->getPostprocessKey(); 70 if ($key !== null) { 71 $post_rules[$key] = $rule; 72 } 73 } 74 } 75 76 $this->postprocessRules = $post_rules; 77 78 return $this; 79 } 80 81 public function getTextMetadata($key, $default = null) { 82 if (isset($this->metadata[$key])) { 83 return $this->metadata[$key]; 84 } 85 return idx($this->metadata, $key, $default); 86 } 87 88 public function setTextMetadata($key, $value) { 89 $this->metadata[$key] = $value; 90 return $this; 91 } 92 93 public function storeText($text) { 94 if ($this->isTextMode()) { 95 $text = phutil_safe_html($text); 96 } 97 return $this->storage->store($text); 98 } 99 100 public function overwriteStoredText($token, $new_text) { 101 if ($this->isTextMode()) { 102 $new_text = phutil_safe_html($new_text); 103 } 104 $this->storage->overwrite($token, $new_text); 105 return $this; 106 } 107 108 public function markupText($text) { 109 return $this->postprocessText($this->preprocessText($text)); 110 } 111 112 public function pushState($state) { 113 if (empty($this->states[$state])) { 114 $this->states[$state] = 0; 115 } 116 $this->states[$state]++; 117 return $this; 118 } 119 120 public function popState($state) { 121 if (empty($this->states[$state])) { 122 throw new Exception(pht("State '%s' pushed more than popped!", $state)); 123 } 124 $this->states[$state]--; 125 if (!$this->states[$state]) { 126 unset($this->states[$state]); 127 } 128 return $this; 129 } 130 131 public function getState($state) { 132 return !empty($this->states[$state]); 133 } 134 135 public function preprocessText($text) { 136 $this->metadata = array(); 137 $this->storage = new PhutilRemarkupBlockStorage(); 138 139 $blocks = $this->splitTextIntoBlocks($text); 140 141 $output = array(); 142 foreach ($blocks as $block) { 143 $output[] = $this->markupBlock($block); 144 } 145 $output = $this->flattenOutput($output); 146 147 $map = $this->storage->getMap(); 148 $this->storage = null; 149 $metadata = $this->metadata; 150 151 152 return array( 153 'output' => $output, 154 'storage' => $map, 155 'metadata' => $metadata, 156 ); 157 } 158 159 private function splitTextIntoBlocks($text, $depth = 0) { 160 // Apply basic block and paragraph normalization to the text. NOTE: We don't 161 // strip trailing whitespace because it is semantic in some contexts, 162 // notably inlined diffs that the author intends to show as a code block. 163 $text = phutil_split_lines($text, true); 164 $block_rules = $this->blockRules; 165 $blocks = array(); 166 $cursor = 0; 167 168 $can_merge = array(); 169 foreach ($block_rules as $key => $block_rule) { 170 if ($block_rule instanceof PhutilRemarkupDefaultBlockRule) { 171 $can_merge[$key] = true; 172 } 173 } 174 175 $last_block = null; 176 $last_block_key = -1; 177 178 // See T13487. For very large inputs, block separation can dominate 179 // runtime. This is written somewhat clumsily to attempt to handle 180 // very large inputs as gracefully as is practical. 181 182 while (isset($text[$cursor])) { 183 $starting_cursor = $cursor; 184 foreach ($block_rules as $block_key => $block_rule) { 185 $num_lines = $block_rule->getMatchingLineCount($text, $cursor); 186 187 if ($num_lines) { 188 $current_block = array( 189 'start' => $cursor, 190 'num_lines' => $num_lines, 191 'rule' => $block_rule, 192 'empty' => self::isEmptyBlock($text, $cursor, $num_lines), 193 'children' => array(), 194 'merge' => isset($can_merge[$block_key]), 195 ); 196 197 $should_merge = self::shouldMergeParagraphBlocks( 198 $text, 199 $last_block, 200 $current_block); 201 202 if ($should_merge) { 203 $last_block['num_lines'] = 204 ($last_block['num_lines'] + $current_block['num_lines']); 205 206 $last_block['empty'] = 207 ($last_block['empty'] && $current_block['empty']); 208 209 $blocks[$last_block_key] = $last_block; 210 } else { 211 $blocks[] = $current_block; 212 213 $last_block = $current_block; 214 $last_block_key++; 215 } 216 217 $cursor += $num_lines; 218 219 break; 220 } 221 } 222 223 if ($starting_cursor === $cursor) { 224 throw new Exception(pht('Block in text did not match any block rule.')); 225 } 226 } 227 228 // See T13487. It's common for blocks to be small, and this loop seems to 229 // measure as faster if we manually concatenate blocks than if we 230 // "array_slice()" and "implode()" blocks. This is a bit muddy. 231 232 foreach ($blocks as $key => $block) { 233 $min = $block['start']; 234 $max = $min + $block['num_lines']; 235 236 $lines = ''; 237 for ($ii = $min; $ii < $max; $ii++) { 238 $lines .= $text[$ii]; 239 } 240 241 $blocks[$key]['text'] = $lines; 242 } 243 244 // Stop splitting child blocks apart if we get too deep. This arrests 245 // any blocks which have looping child rules, and stops the stack from 246 // exploding if someone writes a hilarious comment with 5,000 levels of 247 // quoted text. 248 249 if ($depth < self::MAX_CHILD_DEPTH) { 250 foreach ($blocks as $key => $block) { 251 $rule = $block['rule']; 252 if (!$rule->supportsChildBlocks()) { 253 continue; 254 } 255 256 list($parent_text, $child_text) = $rule->extractChildText( 257 $block['text']); 258 $blocks[$key]['text'] = $parent_text; 259 $blocks[$key]['children'] = $this->splitTextIntoBlocks( 260 $child_text, 261 $depth + 1); 262 } 263 } 264 265 return $blocks; 266 } 267 268 private function markupBlock(array $block) { 269 $rule = $block['rule']; 270 271 $rule->willMarkupChildBlocks(); 272 273 $children = array(); 274 foreach ($block['children'] as $child) { 275 $children[] = $this->markupBlock($child); 276 } 277 278 $rule->didMarkupChildBlocks(); 279 280 if ($children) { 281 $children = $this->flattenOutput($children); 282 } else { 283 $children = null; 284 } 285 286 return $rule->markupText($block['text'], $children); 287 } 288 289 private function flattenOutput(array $output) { 290 if ($this->isTextMode()) { 291 $output = implode("\n\n", $output)."\n"; 292 } else { 293 $output = phutil_implode_html("\n\n", $output); 294 } 295 296 return $output; 297 } 298 299 private static function shouldMergeParagraphBlocks( 300 $text, 301 $last_block, 302 $current_block) { 303 304 // If we're at the beginning of the input, we can't merge. 305 if ($last_block === null) { 306 return false; 307 } 308 309 // If the previous block wasn't a default block, we can't merge. 310 if (!$last_block['merge']) { 311 return false; 312 } 313 314 // If the current block isn't a default block, we can't merge. 315 if (!$current_block['merge']) { 316 return false; 317 } 318 319 // If the last block was empty, we definitely want to merge. 320 if ($last_block['empty']) { 321 return true; 322 } 323 324 // If this block is empty, we definitely want to merge. 325 if ($current_block['empty']) { 326 return true; 327 } 328 329 // Check if the last line of the previous block or the first line of this 330 // block have any non-whitespace text. If they both do, we're going to 331 // merge. 332 333 // If either of them are a blank line or a line with only whitespace, we 334 // do not merge: this means we've found a paragraph break. 335 336 $tail = $text[$current_block['start'] - 1]; 337 $head = $text[$current_block['start']]; 338 if (strlen(trim($tail)) && strlen(trim($head))) { 339 return true; 340 } 341 342 return false; 343 } 344 345 private static function isEmptyBlock($text, $start, $num_lines) { 346 for ($cursor = $start; $cursor < $start + $num_lines; $cursor++) { 347 if (strlen(trim($text[$cursor]))) { 348 return false; 349 } 350 } 351 return true; 352 } 353 354 public function postprocessText(array $dict) { 355 $this->metadata = idx($dict, 'metadata', array()); 356 357 $this->storage = new PhutilRemarkupBlockStorage(); 358 $this->storage->setMap(idx($dict, 'storage', array())); 359 360 foreach ($this->blockRules as $block_rule) { 361 $block_rule->postprocess(); 362 } 363 364 foreach ($this->postprocessRules as $rule) { 365 $rule->didMarkupText(); 366 } 367 368 return $this->restoreText(idx($dict, 'output')); 369 } 370 371 public function restoreText($text) { 372 return $this->storage->restore($text, $this->isTextMode()); 373 } 374}