Serenity Operating System
1/*
2 * Copyright (c) 2023, Rodrigo Tobar <rtobarc@gmail.com>.
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/Endian.h>
8#include <AK/String.h>
9#include <LibGfx/Forward.h>
10#include <LibPDF/Encoding.h>
11#include <LibPDF/Error.h>
12#include <LibPDF/Fonts/CFF.h>
13#include <LibPDF/Reader.h>
14
15namespace PDF {
16
17PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPtr<Encoding> encoding)
18{
19 Reader reader(cff_bytes);
20
21 // Header
22 // skip major, minor version
23 reader.consume(2);
24 auto header_size = TRY(reader.try_read<Card8>());
25 // skip offset size
26 reader.consume(1);
27 reader.move_to(header_size);
28
29 // Name INDEX
30 Vector<String> font_names;
31 TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr<void> {
32 auto string = TRY(String::from_utf8(data));
33 return TRY(font_names.try_append(string));
34 }));
35
36 auto cff = adopt_ref(*new CFF());
37 cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f });
38
39 // Top DICT INDEX
40 int charset_offset = 0;
41 Vector<u8> encoding_codes;
42 auto charstrings_offset = 0;
43 Vector<ByteBuffer> subroutines;
44 int defaultWidthX = 0;
45 int nominalWidthX = 0;
46 TRY(parse_index(reader, [&](ReadonlyBytes const& element_data) {
47 Reader element_reader { element_data };
48 return parse_dict<TopDictOperator>(element_reader, [&](TopDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
49 switch (op) {
50 case TopDictOperator::Encoding: {
51 auto encoding_offset = 0;
52 if (!operands.is_empty())
53 encoding_offset = operands[0].get<int>();
54 encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset))));
55 break;
56 }
57 case TopDictOperator::Charset: {
58 if (!operands.is_empty())
59 charset_offset = operands[0].get<int>();
60 break;
61 }
62 case TopDictOperator::CharStrings: {
63 if (!operands.is_empty())
64 charstrings_offset = operands[0].get<int>();
65 break;
66 }
67 case TopDictOperator::Private: {
68 auto private_dict_size = operands[0].get<int>();
69 auto private_dict_offset = operands[1].get<int>();
70 Reader priv_dict_reader { cff_bytes.slice(private_dict_offset, private_dict_size) };
71 TRY(parse_dict<PrivDictOperator>(priv_dict_reader, [&](PrivDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> {
72 switch (op) {
73 case PrivDictOperator::Subrs: {
74 auto subrs_offset = operands[0].get<int>();
75 Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) };
76 dbgln("Parsing Subrs INDEX");
77 TRY(parse_index(subrs_reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr<void> {
78 return TRY(subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes))));
79 }));
80 break;
81 }
82 case PrivDictOperator::DefaultWidthX:
83 defaultWidthX = operands[0].get<int>();
84 break;
85 case PrivDictOperator::NominalWidthX:
86 nominalWidthX = operands[0].get<int>();
87 break;
88 }
89 return {};
90 }));
91 break;
92 }
93 default:;
94 }
95 return {};
96 });
97 }));
98
99 // Create glpyhs (now that we have the subroutines) and associate missing information to store them and their encoding
100 auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines));
101 auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size()));
102
103 // Adjust glyphs' widths as they are deltas from nominalWidthX
104 for (auto& glyph : glyphs) {
105 if (!glyph.has_width())
106 glyph.set_width(float(defaultWidthX));
107 else
108 glyph.set_width(glyph.width() + float(nominalWidthX));
109 }
110
111 for (size_t i = 0; i < glyphs.size(); i++) {
112 if (i == 0) {
113 TRY(cff->add_glyph(0, move(glyphs[0])));
114 continue;
115 }
116 auto const& name = charset[i - 1];
117 TRY(cff->add_glyph(name, move(glyphs[i])));
118 }
119 cff->consolidate_glyphs();
120
121 // Encoding given or read
122 if (encoding) {
123 cff->set_encoding(move(encoding));
124 } else {
125 auto encoding = Encoding::create();
126 for (size_t i = 0; i < glyphs.size(); i++) {
127 if (i == 0) {
128 encoding->set(0, ".notdef");
129 continue;
130 }
131 auto code = encoding_codes[i - 1];
132 auto char_name = charset[i - 1];
133 encoding->set(code, char_name);
134 }
135 cff->set_encoding(move(encoding));
136 }
137
138 return cff;
139}
140
141/// Appendix C: Predefined Charsets
142static constexpr Array s_cff_builtin_names {
143 ".notdef"sv,
144 "space"sv,
145 "exclam"sv,
146 "quotedbl"sv,
147 "numbersign"sv,
148 "dollar"sv,
149 "percent"sv,
150 "ampersand"sv,
151 "quoteright"sv,
152 "parenleft"sv,
153 "parenright"sv,
154 "asterisk"sv,
155 "plus"sv,
156 "comma"sv,
157 "hyphen"sv,
158 "period"sv,
159 "slash"sv,
160 "zero"sv,
161 "one"sv,
162 "two"sv,
163 "three"sv,
164 "four"sv,
165 "five"sv,
166 "six"sv,
167 "seven"sv,
168 "eight"sv,
169 "nine"sv,
170 "colon"sv,
171 "semicolon"sv,
172 "less"sv,
173 "equal"sv,
174 "greater"sv,
175 "question"sv,
176 "at"sv,
177 "A"sv,
178 "B"sv,
179 "C"sv,
180 "D"sv,
181 "E"sv,
182 "F"sv,
183 "G"sv,
184 "H"sv,
185 "I"sv,
186 "J"sv,
187 "K"sv,
188 "L"sv,
189 "M"sv,
190 "N"sv,
191 "O"sv,
192 "P"sv,
193 "Q"sv,
194 "R"sv,
195 "S"sv,
196 "T"sv,
197 "U"sv,
198 "V"sv,
199 "W"sv,
200 "X"sv,
201 "Y"sv,
202 "Z"sv,
203 "bracketleft"sv,
204 "backslash"sv,
205 "bracketright"sv,
206 "asciicircum"sv,
207 "underscore"sv,
208 "quoteleft"sv,
209 "a"sv,
210 "b"sv,
211 "c"sv,
212 "d"sv,
213 "e"sv,
214 "f"sv,
215 "g"sv,
216 "h"sv,
217 "i"sv,
218 "j"sv,
219 "k"sv,
220 "l"sv,
221 "m"sv,
222 "n"sv,
223 "o"sv,
224 "p"sv,
225 "q"sv,
226 "r"sv,
227 "s"sv,
228 "t"sv,
229 "u"sv,
230 "v"sv,
231 "w"sv,
232 "x"sv,
233 "y"sv,
234 "z"sv,
235 "braceleft"sv,
236 "bar"sv,
237 "braceright"sv,
238 "asciitilde"sv,
239 "exclamdown"sv,
240 "cent"sv,
241 "sterling"sv,
242 "fraction"sv,
243 "yen"sv,
244 "florin"sv,
245 "section"sv,
246 "currency"sv,
247 "quotesingle"sv,
248 "quotedblleft"sv,
249 "guillemotleft"sv,
250 "guilsinglleft"sv,
251 "guilsinglright"sv,
252 "fi"sv,
253 "fl"sv,
254 "endash"sv,
255 "dagger"sv,
256 "daggerdbl"sv,
257 "periodcentered"sv,
258 "paragraph"sv,
259 "bullet"sv,
260 "quotesinglbase"sv,
261 "quotedblbase"sv,
262 "quotedblright"sv,
263 "guillemotright"sv,
264 "ellipsis"sv,
265 "perthousand"sv,
266 "questiondown"sv,
267 "grave"sv,
268 "acute"sv,
269 "circumflex"sv,
270 "tilde"sv,
271 "macron"sv,
272 "breve"sv,
273 "dotaccent"sv,
274 "dieresis"sv,
275 "ring"sv,
276 "cedilla"sv,
277 "hungarumlaut"sv,
278 "ogonek"sv,
279 "caron"sv,
280 "emdash"sv,
281 "AE"sv,
282 "ordfeminine"sv,
283 "Lslash"sv,
284 "Oslash"sv,
285 "OE"sv,
286 "ordmasculine"sv,
287 "ae"sv,
288 "dotlessi"sv,
289 "lslash"sv,
290 "oslash"sv,
291 "oe"sv,
292 "germandbls"sv,
293 "onesuperior"sv,
294 "logicalnot"sv,
295 "mu"sv,
296 "trademark"sv,
297 "Eth"sv,
298 "onehalf"sv,
299 "plusminus"sv,
300 "Thorn"sv,
301 "onequarter"sv,
302 "divide"sv,
303 "brokenbar"sv,
304 "degree"sv,
305 "thorn"sv,
306 "threequarters"sv,
307 "twosuperior"sv,
308 "registered"sv,
309 "minus"sv,
310 "eth"sv,
311 "multiply"sv,
312 "threesuperior"sv,
313 "copyright"sv,
314 "Aacute"sv,
315 "Acircumflex"sv,
316 "Adieresis"sv,
317 "Agrave"sv,
318 "Aring"sv,
319 "Atilde"sv,
320 "Ccedilla"sv,
321 "Eacute"sv,
322 "Ecircumflex"sv,
323 "Edieresis"sv,
324 "Egrave"sv,
325 "Iacute"sv,
326 "Icircumflex"sv,
327 "Idieresis"sv,
328 "Igrave"sv,
329 "Ntilde"sv,
330 "Oacute"sv,
331 "Ocircumflex"sv,
332 "Odieresis"sv,
333 "Ograve"sv,
334 "Otilde"sv,
335 "Scaron"sv,
336 "Uacute"sv,
337 "Ucircumflex"sv,
338 "Udieresis"sv,
339 "Ugrave"sv,
340 "Yacute"sv,
341 "Ydieresis"sv,
342 "Zcaron"sv,
343 "aacute"sv,
344 "acircumflex"sv,
345 "adieresis"sv,
346 "agrave"sv,
347 "aring"sv,
348 "atilde"sv,
349 "ccedilla"sv,
350 "eacute"sv,
351 "ecircumflex"sv,
352 "edieresis"sv,
353 "egrave"sv,
354 "iacute"sv,
355 "icircumflex"sv,
356 "idieresis"sv,
357 "igrave"sv,
358 "ntilde"sv,
359 "oacute"sv,
360 "ocircumflex"sv,
361 "odieresis"sv,
362 "ograve"sv,
363 "otilde"sv,
364 "scaron"sv,
365 "uacute"sv,
366 "ucircumflex"sv,
367 "udieresis"sv,
368 "ugrave"sv,
369 "yacute"sv,
370 "ydieresis"sv,
371 "zcaron"sv,
372};
373
374PDFErrorOr<Vector<DeprecatedFlyString>> CFF::parse_charset(Reader&& reader, size_t glyph_count)
375{
376 Vector<DeprecatedFlyString> names;
377 auto resolve = [](SID sid) {
378 if (sid < s_cff_builtin_names.size())
379 return DeprecatedFlyString(s_cff_builtin_names[sid]);
380 dbgln("Cound't find string for SID {}, going with space", sid);
381 return DeprecatedFlyString("space");
382 };
383
384 auto format = TRY(reader.try_read<Card8>());
385 if (format == 0) {
386 for (u8 i = 0; i < glyph_count - 1; i++) {
387 SID sid = TRY(reader.try_read<BigEndian<SID>>());
388 TRY(names.try_append(resolve(sid)));
389 }
390 } else if (format == 1) {
391 while (names.size() < glyph_count - 1) {
392 auto first_sid = TRY(reader.try_read<BigEndian<SID>>());
393 int left = TRY(reader.try_read<Card8>());
394 for (u8 sid = first_sid; left >= 0; left--, sid++)
395 TRY(names.try_append(resolve(sid)));
396 }
397 }
398 return names;
399}
400
401PDFErrorOr<Vector<CFF::Glyph>> CFF::parse_charstrings(Reader&& reader, Vector<ByteBuffer> const& subroutines)
402{
403 Vector<Glyph> glyphs;
404 TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr<void> {
405 GlyphParserState state;
406 auto glyph = TRY(parse_glyph(charstring_data, subroutines, state, true));
407 return TRY(glyphs.try_append(glyph));
408 }));
409 return glyphs;
410}
411
412PDFErrorOr<Vector<u8>> CFF::parse_encoding(Reader&& reader)
413{
414 Vector<u8> encoding_codes;
415 auto format_raw = TRY(reader.try_read<Card8>());
416 // TODO: support encoding supplements when highest bit is set
417 auto format = format_raw & 0x7f;
418 if (format == 0) {
419 auto n_codes = TRY(reader.try_read<Card8>());
420 for (u8 i = 0; i < n_codes; i++) {
421 TRY(encoding_codes.try_append(TRY(reader.try_read<Card8>())));
422 }
423 } else if (format == 1) {
424 auto n_ranges = TRY(reader.try_read<Card8>());
425 for (u8 i = 0; i < n_ranges; i++) {
426 auto first_code = TRY(reader.try_read<Card8>());
427 int left = TRY(reader.try_read<Card8>());
428 for (u8 code = first_code; left >= 0; left--, code++)
429 TRY(encoding_codes.try_append(code));
430 }
431 } else
432 return error(DeprecatedString::formatted("Invalid encoding format: {}", format));
433 return encoding_codes;
434}
435
436template<typename OperatorT>
437PDFErrorOr<void> CFF::parse_dict(Reader& reader, DictEntryHandler<OperatorT>&& handler)
438{
439 Vector<DictOperand> operands;
440 while (reader.remaining() > 0) {
441 auto b0 = reader.read<u8>();
442 // A command
443 if (b0 <= 21) {
444 auto op = TRY(parse_dict_operator<OperatorT>(b0, reader));
445 TRY(handler(op, operands));
446 operands.clear();
447 continue;
448 }
449 // An operand
450 TRY(operands.try_append(TRY(load_dict_operand(b0, reader))));
451 }
452 return {};
453}
454
455template PDFErrorOr<void> CFF::parse_dict<CFF::TopDictOperator>(Reader&, DictEntryHandler<TopDictOperator>&&);
456template PDFErrorOr<void> CFF::parse_dict<CFF::PrivDictOperator>(Reader&, DictEntryHandler<PrivDictOperator>&&);
457
458template<typename OperatorT>
459PDFErrorOr<OperatorT> CFF::parse_dict_operator(u8 b0, Reader& reader)
460{
461 VERIFY(b0 <= 21);
462 if (b0 != 12)
463 return OperatorT { (int)b0 };
464 auto b1 = TRY(reader.try_read<u8>());
465 return OperatorT { b0 << 8 | b1 };
466}
467
468template PDFErrorOr<CFF::TopDictOperator> CFF::parse_dict_operator(u8, Reader&);
469
470PDFErrorOr<void> CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler)
471{
472 Card16 count = TRY(reader.try_read<BigEndian<Card16>>());
473 if (count == 0)
474 return {};
475 auto offset_size = TRY(reader.try_read<OffSize>());
476 if (offset_size == 1)
477 return parse_index_data<u8>(count, reader, data_handler);
478 if (offset_size == 2)
479 return parse_index_data<u16>(count, reader, data_handler);
480 if (offset_size == 4)
481 return parse_index_data<u32>(count, reader, data_handler);
482 VERIFY_NOT_REACHED();
483}
484
485template<typename OffsetType>
486PDFErrorOr<void> CFF::parse_index_data(Card16 count, Reader& reader, IndexDataHandler& handler)
487{
488 OffsetType last_data_end = 1;
489 auto offset_refpoint = reader.offset() + sizeof(OffsetType) * (count + 1) - 1;
490 for (u16 i = 0; i < count; i++) {
491 reader.save();
492 reader.move_by(sizeof(OffsetType) * i);
493 OffsetType data_start = reader.read<BigEndian<OffsetType>>();
494 last_data_end = reader.read<BigEndian<OffsetType>>();
495 auto data_size = last_data_end - data_start;
496 reader.move_to(offset_refpoint + data_start);
497 TRY(handler(reader.bytes().slice(reader.offset(), data_size)));
498 reader.load();
499 }
500 reader.move_to(offset_refpoint + last_data_end);
501 return {};
502}
503
504template PDFErrorOr<void> CFF::parse_index_data<u8>(Card16, Reader&, IndexDataHandler&);
505template PDFErrorOr<void> CFF::parse_index_data<u16>(Card16, Reader&, IndexDataHandler&);
506template PDFErrorOr<void> CFF::parse_index_data<u32>(Card16, Reader&, IndexDataHandler&);
507
508// 4 DICT DATA, Table 3 Operand Encoding
509int CFF::load_int_dict_operand(u8 b0, Reader& reader)
510{
511 if (b0 >= 32 && b0 <= 246) {
512 return b0 - 139;
513 }
514 if (b0 >= 247 && b0 <= 250) {
515 auto b1 = reader.read<u8>();
516 return (b0 - 247) * 256 + b1 + 108;
517 }
518 if (b0 >= 251 && b0 <= 254) {
519 auto b1 = reader.read<u8>();
520 return -(b0 - 251) * 256 - b1 - 108;
521 }
522 if (b0 == 28) {
523 auto b1 = reader.read<u8>();
524 auto b2 = reader.read<u8>();
525 return b1 << 8 | b2;
526 }
527 if (b0 == 29) {
528 auto b1 = reader.read<u8>();
529 auto b2 = reader.read<u8>();
530 auto b3 = reader.read<u8>();
531 auto b4 = reader.read<u8>();
532 return b1 << 24 | b2 << 16 | b3 << 8 | b4;
533 }
534 VERIFY_NOT_REACHED();
535}
536
537float CFF::load_float_dict_operand(Reader& reader)
538{
539 StringBuilder sb;
540 auto add_nibble = [&](char nibble) {
541 if (nibble < 0xa)
542 sb.append('0' + nibble);
543 else if (nibble == 0xa)
544 sb.append('.');
545 else if (nibble == 0xb)
546 sb.append('E');
547 else if (nibble == 0xc)
548 sb.append("E-"sv);
549 else if (nibble == 0xe)
550 sb.append('-');
551 };
552 while (true) {
553 auto byte = reader.read<u8>();
554 char nibble1 = (byte & 0xf0) >> 4;
555 char nibble2 = byte & 0x0f;
556 if (nibble1 == 0xf)
557 break;
558 add_nibble(nibble1);
559 if (nibble2 == 0xf)
560 break;
561 add_nibble(nibble2);
562 }
563 auto result = AK::StringUtils::convert_to_floating_point<float>(sb.string_view());
564 return result.release_value();
565}
566
567PDFErrorOr<CFF::DictOperand> CFF::load_dict_operand(u8 b0, Reader& reader)
568{
569 if (b0 == 30)
570 return load_float_dict_operand(reader);
571 if (b0 >= 28)
572 return load_int_dict_operand(b0, reader);
573 return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Unknown CFF dict element prefix: {}", b0) };
574}
575}