Serenity Operating System
at master 179 lines 5.5 kB view raw
1/* 2 * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org> 3 * Copyright (c) 2022, Julian Offenhäuser <offenhaeuser@protonmail.com> 4 * 5 * SPDX-License-Identifier: BSD-2-Clause 6 */ 7 8#include <AK/Utf8View.h> 9#include <LibPDF/CommonNames.h> 10#include <LibPDF/Encoding.h> 11 12namespace PDF { 13 14NonnullRefPtr<Encoding> Encoding::create() 15{ 16 return adopt_ref(*new Encoding()); 17} 18 19PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj) 20{ 21 if (obj->is<NameObject>()) { 22 auto name = obj->cast<NameObject>()->name(); 23 if (name == "StandardEncoding") 24 return standard_encoding(); 25 if (name == "MacRomanEncoding") 26 return mac_encoding(); 27 if (name == "WinAnsiEncoding") 28 return windows_encoding(); 29 30 VERIFY_NOT_REACHED(); 31 } 32 33 // Make a custom encoding 34 auto dict = obj->cast<DictObject>(); 35 36 RefPtr<Encoding> base_encoding; 37 if (dict->contains(CommonNames::BaseEncoding)) { 38 auto base_encoding_obj = MUST(dict->get_object(document, CommonNames::BaseEncoding)); 39 base_encoding = TRY(Encoding::from_object(document, base_encoding_obj)); 40 } else { 41 base_encoding = Encoding::standard_encoding(); 42 } 43 44 auto encoding = adopt_ref(*new Encoding()); 45 46 encoding->m_descriptors = base_encoding->m_descriptors; 47 encoding->m_name_mapping = base_encoding->m_name_mapping; 48 49 auto differences_array = TRY(dict->get_array(document, CommonNames::Differences)); 50 51 u16 current_code_point = 0; 52 bool first = true; 53 54 for (auto& item : *differences_array) { 55 if (item.has_u32()) { 56 current_code_point = item.to_int(); 57 first = false; 58 } else { 59 VERIFY(item.has<NonnullRefPtr<Object>>()); 60 VERIFY(!first); 61 auto& object = item.get<NonnullRefPtr<Object>>(); 62 auto name = object->cast<NameObject>()->name(); 63 encoding->set(current_code_point, name); 64 current_code_point++; 65 } 66 } 67 68 return encoding; 69} 70 71void Encoding::set(CharCodeType char_code, DeprecatedFlyString const& glyph_name) 72{ 73 m_descriptors.set(char_code, glyph_name); 74 m_name_mapping.set(glyph_name, char_code); 75} 76 77NonnullRefPtr<Encoding> Encoding::standard_encoding() 78{ 79 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding()); 80 if (encoding->m_descriptors.is_empty()) { 81#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \ 82 encoding->set(standard_code, #name); 83 ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE) 84#undef ENUMERATE 85 } 86 87 return encoding; 88} 89 90NonnullRefPtr<Encoding> Encoding::mac_encoding() 91{ 92 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding()); 93 if (encoding->m_descriptors.is_empty()) { 94#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \ 95 encoding->set(mac_code, #name); 96 ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE) 97#undef ENUMERATE 98 } 99 100 return encoding; 101} 102 103NonnullRefPtr<Encoding> Encoding::windows_encoding() 104{ 105 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding()); 106 if (encoding->m_descriptors.is_empty()) { 107#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \ 108 encoding->set(win_code, #name); 109 ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE) 110#undef ENUMERATE 111 112 // PDF Annex D table D.2, note 3: 113 // In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only 114 // code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment. 115 // 116 // Since CharCodeType is u8 *and* we need to include 255, we iterate in reverse order to have more readable code. 117 for (CharCodeType char_code = 255; char_code > 040; char_code--) { 118 if (!encoding->m_descriptors.contains(char_code)) 119 encoding->set(char_code, "bullet"); 120 } 121 } 122 return encoding; 123} 124 125NonnullRefPtr<Encoding> Encoding::pdf_doc_encoding() 126{ 127 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding()); 128 if (encoding->m_descriptors.is_empty()) { 129#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \ 130 encoding->set(pdf_code, #name); 131 ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE) 132#undef ENUMERATE 133 } 134 135 return encoding; 136} 137 138NonnullRefPtr<Encoding> Encoding::symbol_encoding() 139{ 140 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding()); 141 if (encoding->m_descriptors.is_empty()) { 142#define ENUMERATE(name, code) \ 143 encoding->set(code, #name); 144 ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE) 145#undef ENUMERATE 146 } 147 148 return encoding; 149} 150 151NonnullRefPtr<Encoding> Encoding::zapf_encoding() 152{ 153 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding()); 154 if (encoding->m_descriptors.is_empty()) { 155#define ENUMERATE(name, code) \ 156 encoding->set(code, #name); 157 ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE) 158#undef ENUMERATE 159 } 160 return encoding; 161} 162 163u16 Encoding::get_char_code(DeprecatedString const& name) const 164{ 165 auto code_iterator = m_name_mapping.find(name); 166 if (code_iterator != m_name_mapping.end()) 167 return code_iterator->value; 168 return 0; 169} 170 171DeprecatedFlyString Encoding::get_name(u8 char_code) const 172{ 173 auto name_iterator = m_descriptors.find(char_code); 174 if (name_iterator != m_descriptors.end()) 175 return name_iterator->value; 176 return 0; 177} 178 179}