@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.)
hq.recaptime.dev/wiki/Phorge
phorge
phabricator
1<?php
2
3/**
4 * Remarkup prevents several classes of text-processing problems by replacing
5 * tokens in the text as they are marked up. For example, if you write something
6 * like this:
7 *
8 * //D12//
9 *
10 * It is processed in several stages. First the "D12" matches and is replaced
11 * with a token, in the form of "<0x01><ID number><literal "Z">". The first
12 * byte, "<0x01>" is a single byte with value 1 that marks a token. If this is
13 * token ID "444", the text may now look like this:
14 *
15 * //<0x01>444Z//
16 *
17 * Now the italics match and are replaced, using the next token ID:
18 *
19 * <0x01>445Z
20 *
21 * When processing completes, all the tokens are replaced with their final
22 * equivalents. For example, token 444 is evaluated to:
23 *
24 * <a href="http://...">...</a>
25 *
26 * Then token 445 is evaluated:
27 *
28 * <em><0x01>444Z</em>
29 *
30 * ...and all tokens it contains are replaced:
31 *
32 * <em><a href="http://...">...</a></em>
33 *
34 * If we didn't do this, the italics rule could match the "//" in "http://",
35 * or any other number of processing mistakes could occur, some of which create
36 * security risks.
37 *
38 * This class generates keys, and stores the map of keys to replacement text.
39 */
40final class PhutilRemarkupBlockStorage extends Phobject {
41
42 const MAGIC_BYTE = "\1";
43
44 private $map = array();
45 private $index = 0;
46
47 /**
48 * @return string Token in the format <0x01>1234Z
49 */
50 public function store($text) {
51 $key = self::MAGIC_BYTE.(++$this->index).'Z';
52 $this->map[$key] = $text;
53 return $key;
54 }
55
56 public function restore($corpus, $text_mode = false) {
57 $map = $this->map;
58
59 if (!$text_mode) {
60 foreach ($map as $key => $content) {
61 $map[$key] = phutil_escape_html($content);
62 }
63 $corpus = phutil_escape_html($corpus);
64 }
65
66 // NOTE: Tokens may contain other tokens: for example, a table may have
67 // links inside it. So we can't do a single simple find/replace, because
68 // we need to find and replace child tokens inside the content of parent
69 // tokens.
70
71 // However, we know that rules which have child tokens must always store
72 // all their child tokens first, before they store their parent token: you
73 // have to pass the "store(text)" API a block of text with tokens already
74 // in it, so you must have created child tokens already.
75
76 // Thus, all child tokens will appear in the list before parent tokens, so
77 // if we start at the beginning of the list and replace all the tokens we
78 // find in each piece of content, we'll end up expanding all subtokens
79 // correctly.
80
81 $map[] = $corpus;
82 $seen = array();
83 foreach ($map as $key => $content) {
84 $seen[$key] = true;
85
86 // If the content contains no token magic, we don't need to replace
87 // anything.
88 if (strpos($content, self::MAGIC_BYTE) === false) {
89 continue;
90 }
91
92 $matches = null;
93 preg_match_all(
94 '/'.self::MAGIC_BYTE.'\d+Z/',
95 $content,
96 $matches,
97 PREG_OFFSET_CAPTURE);
98
99 $matches = $matches[0];
100
101 // See PHI1114. We're replacing all the matches in one pass because this
102 // is significantly faster than doing "substr_replace()" in a loop if the
103 // corpus is large and we have a large number of matches.
104
105 // Build a list of string pieces in "$parts" by interleaving the
106 // plain strings between each token and the replacement token text, then
107 // implode the whole thing when we're done.
108
109 $parts = array();
110 $pos = 0;
111 foreach ($matches as $next) {
112 $subkey = $next[0];
113
114 // If we've matched a token pattern but don't actually have any
115 // corresponding token, just skip this match. This should not be
116 // possible, and should perhaps be an error.
117 if (!isset($seen[$subkey])) {
118 if (!isset($map[$subkey])) {
119 throw new Exception(
120 pht(
121 'Matched token key "%s" while processing remarkup block, but '.
122 'this token does not exist in the token map.',
123 $subkey));
124 } else {
125 throw new Exception(
126 pht(
127 'Matched token key "%s" while processing remarkup block, but '.
128 'this token appears later in the list than the key being '.
129 'processed ("%s").',
130 $subkey,
131 $key));
132 }
133 }
134
135 $subpos = $next[1];
136
137 // If there were any non-token bytes since the last token, add them.
138 if ($subpos > $pos) {
139 $parts[] = substr($content, $pos, $subpos - $pos);
140 }
141
142 // Add the token replacement text.
143 $parts[] = $map[$subkey];
144
145 // Move the non-token cursor forward over the token.
146 $pos = $subpos + strlen($subkey);
147 }
148
149 // Add any leftover non-token bytes after the last token.
150 $parts[] = substr($content, $pos);
151
152 $content = implode('', $parts);
153
154 $map[$key] = $content;
155 }
156 $corpus = last($map);
157
158 if (!$text_mode) {
159 $corpus = phutil_safe_html($corpus);
160 }
161
162 return $corpus;
163 }
164
165 public function overwrite($key, $new_text) {
166 $this->map[$key] = $new_text;
167 return $this;
168 }
169
170 public function getMap() {
171 return $this->map;
172 }
173
174 public function setMap(array $map) {
175 $this->map = $map;
176 return $this;
177 }
178
179}