@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
at recaptime-dev/main 179 lines 5.4 kB view raw
1<?php 2 3/** 4 * Remarkup prevents several classes of text-processing problems by replacing 5 * tokens in the text as they are marked up. For example, if you write something 6 * like this: 7 * 8 * //D12// 9 * 10 * It is processed in several stages. First the "D12" matches and is replaced 11 * with a token, in the form of "<0x01><ID number><literal "Z">". The first 12 * byte, "<0x01>" is a single byte with value 1 that marks a token. If this is 13 * token ID "444", the text may now look like this: 14 * 15 * //<0x01>444Z// 16 * 17 * Now the italics match and are replaced, using the next token ID: 18 * 19 * <0x01>445Z 20 * 21 * When processing completes, all the tokens are replaced with their final 22 * equivalents. For example, token 444 is evaluated to: 23 * 24 * <a href="http://...">...</a> 25 * 26 * Then token 445 is evaluated: 27 * 28 * <em><0x01>444Z</em> 29 * 30 * ...and all tokens it contains are replaced: 31 * 32 * <em><a href="http://...">...</a></em> 33 * 34 * If we didn't do this, the italics rule could match the "//" in "http://", 35 * or any other number of processing mistakes could occur, some of which create 36 * security risks. 37 * 38 * This class generates keys, and stores the map of keys to replacement text. 39 */ 40final class PhutilRemarkupBlockStorage extends Phobject { 41 42 const MAGIC_BYTE = "\1"; 43 44 private $map = array(); 45 private $index = 0; 46 47 /** 48 * @return string Token in the format <0x01>1234Z 49 */ 50 public function store($text) { 51 $key = self::MAGIC_BYTE.(++$this->index).'Z'; 52 $this->map[$key] = $text; 53 return $key; 54 } 55 56 public function restore($corpus, $text_mode = false) { 57 $map = $this->map; 58 59 if (!$text_mode) { 60 foreach ($map as $key => $content) { 61 $map[$key] = phutil_escape_html($content); 62 } 63 $corpus = phutil_escape_html($corpus); 64 } 65 66 // NOTE: Tokens may contain other tokens: for example, a table may have 67 // links inside it. So we can't do a single simple find/replace, because 68 // we need to find and replace child tokens inside the content of parent 69 // tokens. 70 71 // However, we know that rules which have child tokens must always store 72 // all their child tokens first, before they store their parent token: you 73 // have to pass the "store(text)" API a block of text with tokens already 74 // in it, so you must have created child tokens already. 75 76 // Thus, all child tokens will appear in the list before parent tokens, so 77 // if we start at the beginning of the list and replace all the tokens we 78 // find in each piece of content, we'll end up expanding all subtokens 79 // correctly. 80 81 $map[] = $corpus; 82 $seen = array(); 83 foreach ($map as $key => $content) { 84 $seen[$key] = true; 85 86 // If the content contains no token magic, we don't need to replace 87 // anything. 88 if (strpos($content, self::MAGIC_BYTE) === false) { 89 continue; 90 } 91 92 $matches = null; 93 preg_match_all( 94 '/'.self::MAGIC_BYTE.'\d+Z/', 95 $content, 96 $matches, 97 PREG_OFFSET_CAPTURE); 98 99 $matches = $matches[0]; 100 101 // See PHI1114. We're replacing all the matches in one pass because this 102 // is significantly faster than doing "substr_replace()" in a loop if the 103 // corpus is large and we have a large number of matches. 104 105 // Build a list of string pieces in "$parts" by interleaving the 106 // plain strings between each token and the replacement token text, then 107 // implode the whole thing when we're done. 108 109 $parts = array(); 110 $pos = 0; 111 foreach ($matches as $next) { 112 $subkey = $next[0]; 113 114 // If we've matched a token pattern but don't actually have any 115 // corresponding token, just skip this match. This should not be 116 // possible, and should perhaps be an error. 117 if (!isset($seen[$subkey])) { 118 if (!isset($map[$subkey])) { 119 throw new Exception( 120 pht( 121 'Matched token key "%s" while processing remarkup block, but '. 122 'this token does not exist in the token map.', 123 $subkey)); 124 } else { 125 throw new Exception( 126 pht( 127 'Matched token key "%s" while processing remarkup block, but '. 128 'this token appears later in the list than the key being '. 129 'processed ("%s").', 130 $subkey, 131 $key)); 132 } 133 } 134 135 $subpos = $next[1]; 136 137 // If there were any non-token bytes since the last token, add them. 138 if ($subpos > $pos) { 139 $parts[] = substr($content, $pos, $subpos - $pos); 140 } 141 142 // Add the token replacement text. 143 $parts[] = $map[$subkey]; 144 145 // Move the non-token cursor forward over the token. 146 $pos = $subpos + strlen($subkey); 147 } 148 149 // Add any leftover non-token bytes after the last token. 150 $parts[] = substr($content, $pos); 151 152 $content = implode('', $parts); 153 154 $map[$key] = $content; 155 } 156 $corpus = last($map); 157 158 if (!$text_mode) { 159 $corpus = phutil_safe_html($corpus); 160 } 161 162 return $corpus; 163 } 164 165 public function overwrite($key, $new_text) { 166 $this->map[$key] = $new_text; 167 return $this; 168 } 169 170 public function getMap() { 171 return $this->map; 172 } 173 174 public function setMap(array $map) { 175 $this->map = $map; 176 return $this; 177 } 178 179}