Serenity Operating System
at master 575 lines 16 kB view raw
1/* 2 * Copyright (c) 2023, Rodrigo Tobar <rtobarc@gmail.com>. 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/Endian.h> 8#include <AK/String.h> 9#include <LibGfx/Forward.h> 10#include <LibPDF/Encoding.h> 11#include <LibPDF/Error.h> 12#include <LibPDF/Fonts/CFF.h> 13#include <LibPDF/Reader.h> 14 15namespace PDF { 16 17PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPtr<Encoding> encoding) 18{ 19 Reader reader(cff_bytes); 20 21 // Header 22 // skip major, minor version 23 reader.consume(2); 24 auto header_size = TRY(reader.try_read<Card8>()); 25 // skip offset size 26 reader.consume(1); 27 reader.move_to(header_size); 28 29 // Name INDEX 30 Vector<String> font_names; 31 TRY(parse_index(reader, [&](ReadonlyBytes const& data) -> PDFErrorOr<void> { 32 auto string = TRY(String::from_utf8(data)); 33 return TRY(font_names.try_append(string)); 34 })); 35 36 auto cff = adopt_ref(*new CFF()); 37 cff->set_font_matrix({ 0.001f, 0.0f, 0.0f, 0.001f, 0.0f, 0.0f }); 38 39 // Top DICT INDEX 40 int charset_offset = 0; 41 Vector<u8> encoding_codes; 42 auto charstrings_offset = 0; 43 Vector<ByteBuffer> subroutines; 44 int defaultWidthX = 0; 45 int nominalWidthX = 0; 46 TRY(parse_index(reader, [&](ReadonlyBytes const& element_data) { 47 Reader element_reader { element_data }; 48 return parse_dict<TopDictOperator>(element_reader, [&](TopDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> { 49 switch (op) { 50 case TopDictOperator::Encoding: { 51 auto encoding_offset = 0; 52 if (!operands.is_empty()) 53 encoding_offset = operands[0].get<int>(); 54 encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset)))); 55 break; 56 } 57 case TopDictOperator::Charset: { 58 if (!operands.is_empty()) 59 charset_offset = operands[0].get<int>(); 60 break; 61 } 62 case TopDictOperator::CharStrings: { 63 if (!operands.is_empty()) 64 charstrings_offset = operands[0].get<int>(); 65 break; 66 } 67 case TopDictOperator::Private: { 68 auto private_dict_size = operands[0].get<int>(); 69 auto private_dict_offset = operands[1].get<int>(); 70 Reader priv_dict_reader { cff_bytes.slice(private_dict_offset, private_dict_size) }; 71 TRY(parse_dict<PrivDictOperator>(priv_dict_reader, [&](PrivDictOperator op, Vector<DictOperand> const& operands) -> PDFErrorOr<void> { 72 switch (op) { 73 case PrivDictOperator::Subrs: { 74 auto subrs_offset = operands[0].get<int>(); 75 Reader subrs_reader { cff_bytes.slice(private_dict_offset + subrs_offset) }; 76 dbgln("Parsing Subrs INDEX"); 77 TRY(parse_index(subrs_reader, [&](ReadonlyBytes const& subroutine_bytes) -> PDFErrorOr<void> { 78 return TRY(subroutines.try_append(TRY(ByteBuffer::copy(subroutine_bytes)))); 79 })); 80 break; 81 } 82 case PrivDictOperator::DefaultWidthX: 83 defaultWidthX = operands[0].get<int>(); 84 break; 85 case PrivDictOperator::NominalWidthX: 86 nominalWidthX = operands[0].get<int>(); 87 break; 88 } 89 return {}; 90 })); 91 break; 92 } 93 default:; 94 } 95 return {}; 96 }); 97 })); 98 99 // Create glpyhs (now that we have the subroutines) and associate missing information to store them and their encoding 100 auto glyphs = TRY(parse_charstrings(Reader(cff_bytes.slice(charstrings_offset)), subroutines)); 101 auto charset = TRY(parse_charset(Reader { cff_bytes.slice(charset_offset) }, glyphs.size())); 102 103 // Adjust glyphs' widths as they are deltas from nominalWidthX 104 for (auto& glyph : glyphs) { 105 if (!glyph.has_width()) 106 glyph.set_width(float(defaultWidthX)); 107 else 108 glyph.set_width(glyph.width() + float(nominalWidthX)); 109 } 110 111 for (size_t i = 0; i < glyphs.size(); i++) { 112 if (i == 0) { 113 TRY(cff->add_glyph(0, move(glyphs[0]))); 114 continue; 115 } 116 auto const& name = charset[i - 1]; 117 TRY(cff->add_glyph(name, move(glyphs[i]))); 118 } 119 cff->consolidate_glyphs(); 120 121 // Encoding given or read 122 if (encoding) { 123 cff->set_encoding(move(encoding)); 124 } else { 125 auto encoding = Encoding::create(); 126 for (size_t i = 0; i < glyphs.size(); i++) { 127 if (i == 0) { 128 encoding->set(0, ".notdef"); 129 continue; 130 } 131 auto code = encoding_codes[i - 1]; 132 auto char_name = charset[i - 1]; 133 encoding->set(code, char_name); 134 } 135 cff->set_encoding(move(encoding)); 136 } 137 138 return cff; 139} 140 141/// Appendix C: Predefined Charsets 142static constexpr Array s_cff_builtin_names { 143 ".notdef"sv, 144 "space"sv, 145 "exclam"sv, 146 "quotedbl"sv, 147 "numbersign"sv, 148 "dollar"sv, 149 "percent"sv, 150 "ampersand"sv, 151 "quoteright"sv, 152 "parenleft"sv, 153 "parenright"sv, 154 "asterisk"sv, 155 "plus"sv, 156 "comma"sv, 157 "hyphen"sv, 158 "period"sv, 159 "slash"sv, 160 "zero"sv, 161 "one"sv, 162 "two"sv, 163 "three"sv, 164 "four"sv, 165 "five"sv, 166 "six"sv, 167 "seven"sv, 168 "eight"sv, 169 "nine"sv, 170 "colon"sv, 171 "semicolon"sv, 172 "less"sv, 173 "equal"sv, 174 "greater"sv, 175 "question"sv, 176 "at"sv, 177 "A"sv, 178 "B"sv, 179 "C"sv, 180 "D"sv, 181 "E"sv, 182 "F"sv, 183 "G"sv, 184 "H"sv, 185 "I"sv, 186 "J"sv, 187 "K"sv, 188 "L"sv, 189 "M"sv, 190 "N"sv, 191 "O"sv, 192 "P"sv, 193 "Q"sv, 194 "R"sv, 195 "S"sv, 196 "T"sv, 197 "U"sv, 198 "V"sv, 199 "W"sv, 200 "X"sv, 201 "Y"sv, 202 "Z"sv, 203 "bracketleft"sv, 204 "backslash"sv, 205 "bracketright"sv, 206 "asciicircum"sv, 207 "underscore"sv, 208 "quoteleft"sv, 209 "a"sv, 210 "b"sv, 211 "c"sv, 212 "d"sv, 213 "e"sv, 214 "f"sv, 215 "g"sv, 216 "h"sv, 217 "i"sv, 218 "j"sv, 219 "k"sv, 220 "l"sv, 221 "m"sv, 222 "n"sv, 223 "o"sv, 224 "p"sv, 225 "q"sv, 226 "r"sv, 227 "s"sv, 228 "t"sv, 229 "u"sv, 230 "v"sv, 231 "w"sv, 232 "x"sv, 233 "y"sv, 234 "z"sv, 235 "braceleft"sv, 236 "bar"sv, 237 "braceright"sv, 238 "asciitilde"sv, 239 "exclamdown"sv, 240 "cent"sv, 241 "sterling"sv, 242 "fraction"sv, 243 "yen"sv, 244 "florin"sv, 245 "section"sv, 246 "currency"sv, 247 "quotesingle"sv, 248 "quotedblleft"sv, 249 "guillemotleft"sv, 250 "guilsinglleft"sv, 251 "guilsinglright"sv, 252 "fi"sv, 253 "fl"sv, 254 "endash"sv, 255 "dagger"sv, 256 "daggerdbl"sv, 257 "periodcentered"sv, 258 "paragraph"sv, 259 "bullet"sv, 260 "quotesinglbase"sv, 261 "quotedblbase"sv, 262 "quotedblright"sv, 263 "guillemotright"sv, 264 "ellipsis"sv, 265 "perthousand"sv, 266 "questiondown"sv, 267 "grave"sv, 268 "acute"sv, 269 "circumflex"sv, 270 "tilde"sv, 271 "macron"sv, 272 "breve"sv, 273 "dotaccent"sv, 274 "dieresis"sv, 275 "ring"sv, 276 "cedilla"sv, 277 "hungarumlaut"sv, 278 "ogonek"sv, 279 "caron"sv, 280 "emdash"sv, 281 "AE"sv, 282 "ordfeminine"sv, 283 "Lslash"sv, 284 "Oslash"sv, 285 "OE"sv, 286 "ordmasculine"sv, 287 "ae"sv, 288 "dotlessi"sv, 289 "lslash"sv, 290 "oslash"sv, 291 "oe"sv, 292 "germandbls"sv, 293 "onesuperior"sv, 294 "logicalnot"sv, 295 "mu"sv, 296 "trademark"sv, 297 "Eth"sv, 298 "onehalf"sv, 299 "plusminus"sv, 300 "Thorn"sv, 301 "onequarter"sv, 302 "divide"sv, 303 "brokenbar"sv, 304 "degree"sv, 305 "thorn"sv, 306 "threequarters"sv, 307 "twosuperior"sv, 308 "registered"sv, 309 "minus"sv, 310 "eth"sv, 311 "multiply"sv, 312 "threesuperior"sv, 313 "copyright"sv, 314 "Aacute"sv, 315 "Acircumflex"sv, 316 "Adieresis"sv, 317 "Agrave"sv, 318 "Aring"sv, 319 "Atilde"sv, 320 "Ccedilla"sv, 321 "Eacute"sv, 322 "Ecircumflex"sv, 323 "Edieresis"sv, 324 "Egrave"sv, 325 "Iacute"sv, 326 "Icircumflex"sv, 327 "Idieresis"sv, 328 "Igrave"sv, 329 "Ntilde"sv, 330 "Oacute"sv, 331 "Ocircumflex"sv, 332 "Odieresis"sv, 333 "Ograve"sv, 334 "Otilde"sv, 335 "Scaron"sv, 336 "Uacute"sv, 337 "Ucircumflex"sv, 338 "Udieresis"sv, 339 "Ugrave"sv, 340 "Yacute"sv, 341 "Ydieresis"sv, 342 "Zcaron"sv, 343 "aacute"sv, 344 "acircumflex"sv, 345 "adieresis"sv, 346 "agrave"sv, 347 "aring"sv, 348 "atilde"sv, 349 "ccedilla"sv, 350 "eacute"sv, 351 "ecircumflex"sv, 352 "edieresis"sv, 353 "egrave"sv, 354 "iacute"sv, 355 "icircumflex"sv, 356 "idieresis"sv, 357 "igrave"sv, 358 "ntilde"sv, 359 "oacute"sv, 360 "ocircumflex"sv, 361 "odieresis"sv, 362 "ograve"sv, 363 "otilde"sv, 364 "scaron"sv, 365 "uacute"sv, 366 "ucircumflex"sv, 367 "udieresis"sv, 368 "ugrave"sv, 369 "yacute"sv, 370 "ydieresis"sv, 371 "zcaron"sv, 372}; 373 374PDFErrorOr<Vector<DeprecatedFlyString>> CFF::parse_charset(Reader&& reader, size_t glyph_count) 375{ 376 Vector<DeprecatedFlyString> names; 377 auto resolve = [](SID sid) { 378 if (sid < s_cff_builtin_names.size()) 379 return DeprecatedFlyString(s_cff_builtin_names[sid]); 380 dbgln("Cound't find string for SID {}, going with space", sid); 381 return DeprecatedFlyString("space"); 382 }; 383 384 auto format = TRY(reader.try_read<Card8>()); 385 if (format == 0) { 386 for (u8 i = 0; i < glyph_count - 1; i++) { 387 SID sid = TRY(reader.try_read<BigEndian<SID>>()); 388 TRY(names.try_append(resolve(sid))); 389 } 390 } else if (format == 1) { 391 while (names.size() < glyph_count - 1) { 392 auto first_sid = TRY(reader.try_read<BigEndian<SID>>()); 393 int left = TRY(reader.try_read<Card8>()); 394 for (u8 sid = first_sid; left >= 0; left--, sid++) 395 TRY(names.try_append(resolve(sid))); 396 } 397 } 398 return names; 399} 400 401PDFErrorOr<Vector<CFF::Glyph>> CFF::parse_charstrings(Reader&& reader, Vector<ByteBuffer> const& subroutines) 402{ 403 Vector<Glyph> glyphs; 404 TRY(parse_index(reader, [&](ReadonlyBytes const& charstring_data) -> PDFErrorOr<void> { 405 GlyphParserState state; 406 auto glyph = TRY(parse_glyph(charstring_data, subroutines, state, true)); 407 return TRY(glyphs.try_append(glyph)); 408 })); 409 return glyphs; 410} 411 412PDFErrorOr<Vector<u8>> CFF::parse_encoding(Reader&& reader) 413{ 414 Vector<u8> encoding_codes; 415 auto format_raw = TRY(reader.try_read<Card8>()); 416 // TODO: support encoding supplements when highest bit is set 417 auto format = format_raw & 0x7f; 418 if (format == 0) { 419 auto n_codes = TRY(reader.try_read<Card8>()); 420 for (u8 i = 0; i < n_codes; i++) { 421 TRY(encoding_codes.try_append(TRY(reader.try_read<Card8>()))); 422 } 423 } else if (format == 1) { 424 auto n_ranges = TRY(reader.try_read<Card8>()); 425 for (u8 i = 0; i < n_ranges; i++) { 426 auto first_code = TRY(reader.try_read<Card8>()); 427 int left = TRY(reader.try_read<Card8>()); 428 for (u8 code = first_code; left >= 0; left--, code++) 429 TRY(encoding_codes.try_append(code)); 430 } 431 } else 432 return error(DeprecatedString::formatted("Invalid encoding format: {}", format)); 433 return encoding_codes; 434} 435 436template<typename OperatorT> 437PDFErrorOr<void> CFF::parse_dict(Reader& reader, DictEntryHandler<OperatorT>&& handler) 438{ 439 Vector<DictOperand> operands; 440 while (reader.remaining() > 0) { 441 auto b0 = reader.read<u8>(); 442 // A command 443 if (b0 <= 21) { 444 auto op = TRY(parse_dict_operator<OperatorT>(b0, reader)); 445 TRY(handler(op, operands)); 446 operands.clear(); 447 continue; 448 } 449 // An operand 450 TRY(operands.try_append(TRY(load_dict_operand(b0, reader)))); 451 } 452 return {}; 453} 454 455template PDFErrorOr<void> CFF::parse_dict<CFF::TopDictOperator>(Reader&, DictEntryHandler<TopDictOperator>&&); 456template PDFErrorOr<void> CFF::parse_dict<CFF::PrivDictOperator>(Reader&, DictEntryHandler<PrivDictOperator>&&); 457 458template<typename OperatorT> 459PDFErrorOr<OperatorT> CFF::parse_dict_operator(u8 b0, Reader& reader) 460{ 461 VERIFY(b0 <= 21); 462 if (b0 != 12) 463 return OperatorT { (int)b0 }; 464 auto b1 = TRY(reader.try_read<u8>()); 465 return OperatorT { b0 << 8 | b1 }; 466} 467 468template PDFErrorOr<CFF::TopDictOperator> CFF::parse_dict_operator(u8, Reader&); 469 470PDFErrorOr<void> CFF::parse_index(Reader& reader, IndexDataHandler&& data_handler) 471{ 472 Card16 count = TRY(reader.try_read<BigEndian<Card16>>()); 473 if (count == 0) 474 return {}; 475 auto offset_size = TRY(reader.try_read<OffSize>()); 476 if (offset_size == 1) 477 return parse_index_data<u8>(count, reader, data_handler); 478 if (offset_size == 2) 479 return parse_index_data<u16>(count, reader, data_handler); 480 if (offset_size == 4) 481 return parse_index_data<u32>(count, reader, data_handler); 482 VERIFY_NOT_REACHED(); 483} 484 485template<typename OffsetType> 486PDFErrorOr<void> CFF::parse_index_data(Card16 count, Reader& reader, IndexDataHandler& handler) 487{ 488 OffsetType last_data_end = 1; 489 auto offset_refpoint = reader.offset() + sizeof(OffsetType) * (count + 1) - 1; 490 for (u16 i = 0; i < count; i++) { 491 reader.save(); 492 reader.move_by(sizeof(OffsetType) * i); 493 OffsetType data_start = reader.read<BigEndian<OffsetType>>(); 494 last_data_end = reader.read<BigEndian<OffsetType>>(); 495 auto data_size = last_data_end - data_start; 496 reader.move_to(offset_refpoint + data_start); 497 TRY(handler(reader.bytes().slice(reader.offset(), data_size))); 498 reader.load(); 499 } 500 reader.move_to(offset_refpoint + last_data_end); 501 return {}; 502} 503 504template PDFErrorOr<void> CFF::parse_index_data<u8>(Card16, Reader&, IndexDataHandler&); 505template PDFErrorOr<void> CFF::parse_index_data<u16>(Card16, Reader&, IndexDataHandler&); 506template PDFErrorOr<void> CFF::parse_index_data<u32>(Card16, Reader&, IndexDataHandler&); 507 508// 4 DICT DATA, Table 3 Operand Encoding 509int CFF::load_int_dict_operand(u8 b0, Reader& reader) 510{ 511 if (b0 >= 32 && b0 <= 246) { 512 return b0 - 139; 513 } 514 if (b0 >= 247 && b0 <= 250) { 515 auto b1 = reader.read<u8>(); 516 return (b0 - 247) * 256 + b1 + 108; 517 } 518 if (b0 >= 251 && b0 <= 254) { 519 auto b1 = reader.read<u8>(); 520 return -(b0 - 251) * 256 - b1 - 108; 521 } 522 if (b0 == 28) { 523 auto b1 = reader.read<u8>(); 524 auto b2 = reader.read<u8>(); 525 return b1 << 8 | b2; 526 } 527 if (b0 == 29) { 528 auto b1 = reader.read<u8>(); 529 auto b2 = reader.read<u8>(); 530 auto b3 = reader.read<u8>(); 531 auto b4 = reader.read<u8>(); 532 return b1 << 24 | b2 << 16 | b3 << 8 | b4; 533 } 534 VERIFY_NOT_REACHED(); 535} 536 537float CFF::load_float_dict_operand(Reader& reader) 538{ 539 StringBuilder sb; 540 auto add_nibble = [&](char nibble) { 541 if (nibble < 0xa) 542 sb.append('0' + nibble); 543 else if (nibble == 0xa) 544 sb.append('.'); 545 else if (nibble == 0xb) 546 sb.append('E'); 547 else if (nibble == 0xc) 548 sb.append("E-"sv); 549 else if (nibble == 0xe) 550 sb.append('-'); 551 }; 552 while (true) { 553 auto byte = reader.read<u8>(); 554 char nibble1 = (byte & 0xf0) >> 4; 555 char nibble2 = byte & 0x0f; 556 if (nibble1 == 0xf) 557 break; 558 add_nibble(nibble1); 559 if (nibble2 == 0xf) 560 break; 561 add_nibble(nibble2); 562 } 563 auto result = AK::StringUtils::convert_to_floating_point<float>(sb.string_view()); 564 return result.release_value(); 565} 566 567PDFErrorOr<CFF::DictOperand> CFF::load_dict_operand(u8 b0, Reader& reader) 568{ 569 if (b0 == 30) 570 return load_float_dict_operand(reader); 571 if (b0 >= 28) 572 return load_int_dict_operand(b0, reader); 573 return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Unknown CFF dict element prefix: {}", b0) }; 574} 575}