Serenity Operating System
1/*
2 * Copyright (c) 2022, Matthew Olsson <mattco@serenityos.org>
3 * Copyright (c) 2022, Julian Offenhäuser <offenhaeuser@protonmail.com>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#include <AK/Utf8View.h>
9#include <LibPDF/CommonNames.h>
10#include <LibPDF/Encoding.h>
11
12namespace PDF {
13
14NonnullRefPtr<Encoding> Encoding::create()
15{
16 return adopt_ref(*new Encoding());
17}
18
19PDFErrorOr<NonnullRefPtr<Encoding>> Encoding::from_object(Document* document, NonnullRefPtr<Object> const& obj)
20{
21 if (obj->is<NameObject>()) {
22 auto name = obj->cast<NameObject>()->name();
23 if (name == "StandardEncoding")
24 return standard_encoding();
25 if (name == "MacRomanEncoding")
26 return mac_encoding();
27 if (name == "WinAnsiEncoding")
28 return windows_encoding();
29
30 VERIFY_NOT_REACHED();
31 }
32
33 // Make a custom encoding
34 auto dict = obj->cast<DictObject>();
35
36 RefPtr<Encoding> base_encoding;
37 if (dict->contains(CommonNames::BaseEncoding)) {
38 auto base_encoding_obj = MUST(dict->get_object(document, CommonNames::BaseEncoding));
39 base_encoding = TRY(Encoding::from_object(document, base_encoding_obj));
40 } else {
41 base_encoding = Encoding::standard_encoding();
42 }
43
44 auto encoding = adopt_ref(*new Encoding());
45
46 encoding->m_descriptors = base_encoding->m_descriptors;
47 encoding->m_name_mapping = base_encoding->m_name_mapping;
48
49 auto differences_array = TRY(dict->get_array(document, CommonNames::Differences));
50
51 u16 current_code_point = 0;
52 bool first = true;
53
54 for (auto& item : *differences_array) {
55 if (item.has_u32()) {
56 current_code_point = item.to_int();
57 first = false;
58 } else {
59 VERIFY(item.has<NonnullRefPtr<Object>>());
60 VERIFY(!first);
61 auto& object = item.get<NonnullRefPtr<Object>>();
62 auto name = object->cast<NameObject>()->name();
63 encoding->set(current_code_point, name);
64 current_code_point++;
65 }
66 }
67
68 return encoding;
69}
70
71void Encoding::set(CharCodeType char_code, DeprecatedFlyString const& glyph_name)
72{
73 m_descriptors.set(char_code, glyph_name);
74 m_name_mapping.set(glyph_name, char_code);
75}
76
77NonnullRefPtr<Encoding> Encoding::standard_encoding()
78{
79 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
80 if (encoding->m_descriptors.is_empty()) {
81#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
82 encoding->set(standard_code, #name);
83 ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
84#undef ENUMERATE
85 }
86
87 return encoding;
88}
89
90NonnullRefPtr<Encoding> Encoding::mac_encoding()
91{
92 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
93 if (encoding->m_descriptors.is_empty()) {
94#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
95 encoding->set(mac_code, #name);
96 ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
97#undef ENUMERATE
98 }
99
100 return encoding;
101}
102
103NonnullRefPtr<Encoding> Encoding::windows_encoding()
104{
105 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
106 if (encoding->m_descriptors.is_empty()) {
107#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
108 encoding->set(win_code, #name);
109 ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
110#undef ENUMERATE
111
112 // PDF Annex D table D.2, note 3:
113 // In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only
114 // code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment.
115 //
116 // Since CharCodeType is u8 *and* we need to include 255, we iterate in reverse order to have more readable code.
117 for (CharCodeType char_code = 255; char_code > 040; char_code--) {
118 if (!encoding->m_descriptors.contains(char_code))
119 encoding->set(char_code, "bullet");
120 }
121 }
122 return encoding;
123}
124
125NonnullRefPtr<Encoding> Encoding::pdf_doc_encoding()
126{
127 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
128 if (encoding->m_descriptors.is_empty()) {
129#define ENUMERATE(name, standard_code, mac_code, win_code, pdf_code) \
130 encoding->set(pdf_code, #name);
131 ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE)
132#undef ENUMERATE
133 }
134
135 return encoding;
136}
137
138NonnullRefPtr<Encoding> Encoding::symbol_encoding()
139{
140 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
141 if (encoding->m_descriptors.is_empty()) {
142#define ENUMERATE(name, code) \
143 encoding->set(code, #name);
144 ENUMERATE_SYMBOL_CHARACTER_SET(ENUMERATE)
145#undef ENUMERATE
146 }
147
148 return encoding;
149}
150
151NonnullRefPtr<Encoding> Encoding::zapf_encoding()
152{
153 static NonnullRefPtr<Encoding> encoding = adopt_ref(*new Encoding());
154 if (encoding->m_descriptors.is_empty()) {
155#define ENUMERATE(name, code) \
156 encoding->set(code, #name);
157 ENUMERATE_ZAPF_DINGBATS_CHARACTER_SET(ENUMERATE)
158#undef ENUMERATE
159 }
160 return encoding;
161}
162
163u16 Encoding::get_char_code(DeprecatedString const& name) const
164{
165 auto code_iterator = m_name_mapping.find(name);
166 if (code_iterator != m_name_mapping.end())
167 return code_iterator->value;
168 return 0;
169}
170
171DeprecatedFlyString Encoding::get_name(u8 char_code) const
172{
173 auto name_iterator = m_descriptors.find(char_code);
174 if (name_iterator != m_descriptors.end())
175 return name_iterator->value;
176 return 0;
177}
178
179}