Serenity Operating System
1/*
2 * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/ByteBuffer.h>
8#include <LibCrypto/Hash/MD5.h>
9#include <LibPDF/CommonNames.h>
10#include <LibPDF/Document.h>
11#include <LibPDF/Encryption.h>
12
13namespace PDF {
14
15static constexpr Array<u8, 32> standard_encryption_key_padding_bytes = {
16 0x28,
17 0xBF,
18 0x4E,
19 0x5E,
20 0x4E,
21 0x75,
22 0x8A,
23 0x41,
24 0x64,
25 0x00,
26 0x4E,
27 0x56,
28 0xFF,
29 0xFA,
30 0x01,
31 0x08,
32 0x2E,
33 0x2E,
34 0x00,
35 0xB6,
36 0xD0,
37 0x68,
38 0x3E,
39 0x80,
40 0x2F,
41 0x0C,
42 0xA9,
43 0xFE,
44 0x64,
45 0x53,
46 0x69,
47 0x7A,
48};
49
50PDFErrorOr<NonnullRefPtr<SecurityHandler>> SecurityHandler::create(Document* document, NonnullRefPtr<DictObject> encryption_dict)
51{
52 auto filter = TRY(encryption_dict->get_name(document, CommonNames::Filter))->name();
53 if (filter == "Standard")
54 return TRY(StandardSecurityHandler::create(document, encryption_dict));
55
56 dbgln("Unrecognized security handler filter: {}", filter);
57 TODO();
58}
59
60PDFErrorOr<NonnullRefPtr<StandardSecurityHandler>> StandardSecurityHandler::create(Document* document, NonnullRefPtr<DictObject> encryption_dict)
61{
62 auto revision = encryption_dict->get_value(CommonNames::R).get<int>();
63 auto o = TRY(encryption_dict->get_string(document, CommonNames::O))->string();
64 auto u = TRY(encryption_dict->get_string(document, CommonNames::U))->string();
65 auto p = encryption_dict->get_value(CommonNames::P).get<int>();
66
67 // V, number: [...] 1 "Algorithm 1 Encryption of data using the RC4 or AES algorithms" in 7.6.2,
68 // "General Encryption Algorithm," with an encryption key length of 40 bits, see below [...]
69 // Lenght, integer: (Optional; PDF 1.4; only if V is 2 or 3) The length of the encryption key, in bits.
70 // The value shall be a multiple of 8, in the range 40 to 128. Default value: 40.
71 int length_in_bits;
72 auto v = encryption_dict->get_value(CommonNames::V).get<int>();
73 if (encryption_dict->contains(CommonNames::Length))
74 length_in_bits = encryption_dict->get_value(CommonNames::Length).get<int>();
75 else if (v == 1)
76 length_in_bits = 40;
77 else
78 return Error(Error::Type::Parse, "Can't determine length of encryption key");
79 auto length = length_in_bits / 8;
80
81 bool encrypt_metadata = true;
82 if (encryption_dict->contains(CommonNames::EncryptMetadata))
83 encryption_dict->get_value(CommonNames::EncryptMetadata).get<bool>();
84 return adopt_ref(*new StandardSecurityHandler(document, revision, o, u, p, encrypt_metadata, length));
85}
86
87StandardSecurityHandler::StandardSecurityHandler(Document* document, size_t revision, DeprecatedString const& o_entry, DeprecatedString const& u_entry, u32 flags, bool encrypt_metadata, size_t length)
88 : m_document(document)
89 , m_revision(revision)
90 , m_o_entry(o_entry)
91 , m_u_entry(u_entry)
92 , m_flags(flags)
93 , m_encrypt_metadata(encrypt_metadata)
94 , m_length(length)
95{
96}
97
98template<>
99ByteBuffer StandardSecurityHandler::compute_user_password_value<true>(ByteBuffer password_string)
100{
101 // Algorithm 4: Computing the encryption dictionary's U (user password)
102 // value (Security handlers of revision 2)
103
104 // a) Create an encryption key based on the user password string, as
105 // described in [Algorithm 2]
106 auto encryption_key = compute_encryption_key(password_string);
107
108 // b) Encrypt the 32-byte padding string shown in step (a) of [Algorithm 2],
109 // using an RC4 encryption function with the encryption key from the
110 // preceding step.
111 RC4 rc4(encryption_key);
112 auto output = rc4.encrypt(standard_encryption_key_padding_bytes);
113
114 // c) Store the result of step (b) as the value of the U entry in the
115 // encryption dictionary.
116 return output;
117}
118
119template<>
120ByteBuffer StandardSecurityHandler::compute_user_password_value<false>(ByteBuffer password_string)
121{
122 // Algorithm 5: Computing the encryption dictionary's U (user password)
123 // value (Security handlers of revision 3 or greater)
124
125 // a) Create an encryption key based on the user password string, as
126 // described in [Algorithm 2]
127 auto encryption_key = compute_encryption_key(password_string);
128
129 // b) Initialize the MD5 hash function and pass the 32-byte padding string
130 // shown in step (a) of [Algorithm 2] as input to this function
131 Crypto::Hash::MD5 md5;
132 md5.update(standard_encryption_key_padding_bytes);
133
134 // e) Pass the first element of the file's file identifier array to the MD5
135 // hash function.
136 auto id_array = MUST(m_document->trailer()->get_array(m_document, CommonNames::ID));
137 auto first_element_string = MUST(id_array->get_string_at(m_document, 0))->string();
138 md5.update(first_element_string);
139
140 // d) Encrypt the 16-byte result of the hash, using an RC4 encryption function
141 // with the encryption key from step (a).
142 RC4 rc4(encryption_key);
143 auto out = md5.peek();
144 auto buffer = rc4.encrypt(out.bytes());
145
146 // e) Do the following 19 times:
147 //
148 // Take the output from the previous invocation of the RC4 function and pass
149 // it as input to a new invocation of the function; use an encryption key generated
150 // by taking each byte of the original encryption key obtained in step (a) and
151 // performing an XOR operation between the that byte and the single-byte value of
152 // the iteration counter (from 1 to 19).
153 auto new_encryption_key = MUST(ByteBuffer::create_uninitialized(encryption_key.size()));
154 for (size_t i = 1; i <= 19; i++) {
155 for (size_t j = 0; j < encryption_key.size(); j++)
156 new_encryption_key[j] = encryption_key[j] ^ i;
157
158 RC4 new_rc4(new_encryption_key);
159 buffer = new_rc4.encrypt(buffer);
160 }
161
162 // f) Append 16 bytes of the arbitrary padding to the output from the final invocation
163 // of the RC4 function and store the 32-byte result as the value of the U entry in
164 // the encryption dictionary.
165 VERIFY(buffer.size() == 16);
166 for (size_t i = 0; i < 16; i++)
167 buffer.append(0xab);
168
169 return buffer;
170}
171
172bool StandardSecurityHandler::try_provide_user_password(StringView password_string)
173{
174 // Algorithm 6: Authenticating the user password
175
176 // a) Perform all but the last step of [Algorithm 4] or [Algorithm 5] using the
177 // supplied password string.
178 ByteBuffer password_buffer = MUST(ByteBuffer::copy(password_string.bytes()));
179 if (m_revision == 2) {
180 password_buffer = compute_user_password_value<true>(password_buffer);
181 } else {
182 password_buffer = compute_user_password_value<false>(password_buffer);
183 }
184
185 // b) If the result of step (a) is equal to the value of the encryption
186 // dictionary's "U" entry (comparing the first 16 bytes in the case of security
187 // handlers of revision 3 or greater), the password supplied is the correct user
188 // password.
189 auto u_bytes = m_u_entry.bytes();
190 bool has_user_password;
191 if (m_revision >= 3)
192 has_user_password = u_bytes.slice(0, 16) == password_buffer.bytes().slice(0, 16);
193 else
194 has_user_password = u_bytes == password_buffer.bytes();
195 if (!has_user_password)
196 m_encryption_key = {};
197 return has_user_password;
198}
199
200ByteBuffer StandardSecurityHandler::compute_encryption_key(ByteBuffer password_string)
201{
202 // This function should never be called after we have a valid encryption key.
203 VERIFY(!m_encryption_key.has_value());
204
205 // 7.6.3.3 Encryption Key Algorithm
206
207 // Algorithm 2: Computing an encryption key
208
209 // a) Pad or truncate the password string to exactly 32 bytes. If the password string
210 // is more than 32 bytes long, use only its first 32 bytes; if it is less than 32
211 // bytes long, pad it by appending the required number of additional bytes from the
212 // beginning of the following padding string: [omitted]
213
214 if (password_string.size() > 32) {
215 password_string.resize(32);
216 } else {
217 password_string.append(standard_encryption_key_padding_bytes.data(), 32 - password_string.size());
218 }
219
220 // b) Initialize the MD5 hash function and pass the result of step (a) as input to
221 // this function.
222 Crypto::Hash::MD5 md5;
223 md5.update(password_string);
224
225 // c) Pass the value of the encryption dictionary's "O" entry to the MD5 hash function.
226 md5.update(m_o_entry);
227
228 // d) Convert the integer value of the P entry to a 32-bit unsigned binary number and pass
229 // these bytes to the MD5 hash function, low-order byte first.
230 md5.update(reinterpret_cast<u8 const*>(&m_flags), sizeof(m_flags));
231
232 // e) Pass the first element of the file's file identifier array to the MD5 hash function.
233 auto id_array = MUST(m_document->trailer()->get_array(m_document, CommonNames::ID));
234 auto first_element_string = MUST(id_array->get_string_at(m_document, 0))->string();
235 md5.update(first_element_string);
236
237 // f) (Security handlers of revision 4 or greater) if the document metadata is not being
238 // encrypted, pass 4 bytes with the value 0xffffffff to the MD5 hash function.
239 if (m_revision >= 4 && !m_encrypt_metadata) {
240 u32 value = 0xffffffff;
241 md5.update(reinterpret_cast<u8 const*>(&value), 4);
242 }
243
244 // g) Finish the hash.
245 // h) (Security handlers of revision 3 or greater) Do the following 50 times:
246 //
247 // Take the output from the previous MD5 hash and pass the first n bytes
248 // of the output as input into a new MD5 hash, where n is the number of
249 // bytes of the encryption key as defined by the value of the encryption
250 // dictionary's Length entry.
251 if (m_revision >= 3) {
252 ByteBuffer n_bytes;
253
254 for (u32 i = 0; i < 50; i++) {
255 Crypto::Hash::MD5 new_md5;
256 n_bytes.ensure_capacity(m_length);
257
258 while (n_bytes.size() < m_length) {
259 auto out = md5.peek();
260 for (size_t j = 0; j < out.data_length() && n_bytes.size() < m_length; j++)
261 n_bytes.append(out.data[j]);
262 }
263
264 VERIFY(n_bytes.size() == m_length);
265 new_md5.update(n_bytes);
266 md5 = move(new_md5);
267 n_bytes.clear();
268 }
269 }
270
271 // i) Set the encryption key to the first n bytes of the output from the final MD5
272 // hash, where n shall always be 5 for security handlers of revision 2 but, for
273 // security handlers of revision 3 or greater, shall depend on the value of the
274 // encryption dictionary's Length entry.
275 size_t n;
276 if (m_revision == 2) {
277 n = 5;
278 } else if (m_revision >= 3) {
279 n = m_length;
280 } else {
281 VERIFY_NOT_REACHED();
282 }
283
284 ByteBuffer encryption_key;
285 encryption_key.ensure_capacity(n);
286 while (encryption_key.size() < n) {
287 auto out = md5.peek();
288 for (size_t i = 0; encryption_key.size() < n && i < out.data_length(); i++)
289 encryption_key.append(out.bytes()[i]);
290 }
291
292 m_encryption_key = encryption_key;
293
294 return encryption_key;
295}
296
297void StandardSecurityHandler::encrypt(NonnullRefPtr<Object> object, Reference reference) const
298{
299 // 7.6.2 General Encryption Algorithm
300 // Algorithm 1: Encryption of data using the RC3 or AES algorithms
301
302 // FIXME: Support AES
303
304 VERIFY(m_encryption_key.has_value());
305
306 // a) Obtain the object number and generation number from the object identifier of
307 // the string or stream to be encrypted. If the string is a direct object, use
308 // the identifier of the indirect object containing it.
309 //
310 // Note: This is always passed in at parse time because objects don't know their own
311 // object number.
312
313 // b) For all strings and streams with crypt filter specifier; treating the object
314 // number as binary integers, extends the origin n-byte encryption key to n + 5
315 // bytes by appending the low-order 3 bytes of the object number and the low-order
316 // 2 bytes of the generation number in that order, low-order byte first. ...
317
318 auto encryption_key = m_encryption_key.value();
319 ReadonlyBytes bytes;
320 Function<void(ByteBuffer const&)> assign;
321
322 if (object->is<StreamObject>()) {
323 auto stream = object->cast<StreamObject>();
324 bytes = stream->bytes();
325
326 assign = [&stream](ByteBuffer const& buffer) {
327 stream->buffer() = buffer;
328 };
329
330 if (stream->dict()->contains(CommonNames::Filter)) {
331 auto filter = MUST(stream->dict()->get_name(m_document, CommonNames::Filter))->name();
332 if (filter == "Crypt")
333 TODO();
334 }
335 } else if (object->is<StringObject>()) {
336 auto string = object->cast<StringObject>();
337 bytes = string->string().bytes();
338 assign = [&string](ByteBuffer const& buffer) {
339 string->set_string(DeprecatedString(buffer.bytes()));
340 };
341 } else {
342 VERIFY_NOT_REACHED();
343 }
344
345 auto index = reference.as_ref_index();
346 auto generation = reference.as_ref_generation_index();
347
348 encryption_key.append(index & 0xff);
349 encryption_key.append((index >> 8) & 0xff);
350 encryption_key.append((index >> 16) & 0xff);
351 encryption_key.append(generation & 0xff);
352 encryption_key.append((generation >> 8) & 0xff);
353
354 // c) Initialize the MD5 hash function and pass the result of step (b) as input to this
355 // function.
356 Crypto::Hash::MD5 md5;
357 md5.update(encryption_key);
358
359 // d) Use the first (n + 5) bytes, up to a maximum of 16, of the output from the MD5
360 // hash as the key for the RC4 or AES symmetric key algorithms, along with the string
361 // or stream data to be encrypted.
362 auto key = MUST(ByteBuffer::copy(md5.peek().bytes()));
363
364 if (key.size() > min(encryption_key.size(), 16))
365 key.resize(encryption_key.size());
366
367 RC4 rc4(key);
368 auto output = rc4.encrypt(bytes);
369
370 assign(output);
371}
372
373void StandardSecurityHandler::decrypt(NonnullRefPtr<Object> object, Reference reference) const
374{
375 // AES and RC4 are both symmetric, so decryption is the same as encryption
376 encrypt(object, reference);
377}
378
379static constexpr auto identity_permutation = iota_array<size_t, 256>(0);
380
381RC4::RC4(ReadonlyBytes key)
382 : m_bytes(identity_permutation)
383{
384 size_t j = 0;
385 for (size_t i = 0; i < 256; i++) {
386 j = (j + m_bytes[i] + key[i % key.size()]) & 0xff;
387 swap(m_bytes[i], m_bytes[j]);
388 }
389}
390
391void RC4::generate_bytes(ByteBuffer& bytes)
392{
393 size_t i = 0;
394 size_t j = 0;
395
396 for (size_t count = 0; count < bytes.size(); count++) {
397 i = (i + 1) % 256;
398 j = (j + m_bytes[i]) % 256;
399 swap(m_bytes[i], m_bytes[j]);
400 bytes[count] = m_bytes[(m_bytes[i] + m_bytes[j]) % 256];
401 }
402}
403
404ByteBuffer RC4::encrypt(ReadonlyBytes bytes)
405{
406 auto output = MUST(ByteBuffer::create_uninitialized(bytes.size()));
407 generate_bytes(output);
408 for (size_t i = 0; i < bytes.size(); i++)
409 output[i] ^= bytes[i];
410 return output;
411}
412
413}