Serenity Operating System
at master 426 lines 17 kB view raw
1/* 2 * Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <LibPDF/CommonNames.h> 8#include <LibPDF/Document.h> 9#include <LibPDF/Parser.h> 10 11namespace PDF { 12 13DeprecatedString OutlineItem::to_deprecated_string(int indent) const 14{ 15 auto indent_str = DeprecatedString::repeated(" "sv, indent + 1); 16 17 StringBuilder child_builder; 18 child_builder.append('['); 19 for (auto& child : children) 20 child_builder.appendff("{}\n", child->to_deprecated_string(indent + 1)); 21 child_builder.appendff("{}]", indent_str); 22 23 StringBuilder builder; 24 builder.append("OutlineItem {{\n"sv); 25 builder.appendff("{}title={}\n", indent_str, title); 26 builder.appendff("{}count={}\n", indent_str, count); 27 builder.appendff("{}dest={}\n", indent_str, dest); 28 builder.appendff("{}color={}\n", indent_str, color); 29 builder.appendff("{}italic={}\n", indent_str, italic); 30 builder.appendff("{}bold={}\n", indent_str, bold); 31 builder.appendff("{}children={}\n", indent_str, child_builder.to_deprecated_string()); 32 builder.appendff("{}}}", DeprecatedString::repeated(" "sv, indent)); 33 34 return builder.to_deprecated_string(); 35} 36 37PDFErrorOr<NonnullRefPtr<Document>> Document::create(ReadonlyBytes bytes) 38{ 39 auto parser = adopt_ref(*new DocumentParser({}, bytes)); 40 auto document = adopt_ref(*new Document(parser)); 41 42 TRY(parser->initialize()); 43 44 document->m_trailer = parser->trailer(); 45 document->m_catalog = TRY(parser->trailer()->get_dict(document, CommonNames::Root)); 46 47 if (document->m_trailer->contains(CommonNames::Encrypt)) { 48 auto encryption_dict = TRY(document->m_trailer->get_dict(document, CommonNames::Encrypt)); 49 document->m_security_handler = TRY(SecurityHandler::create(document, encryption_dict)); 50 51 // Automatically attempt to unencrypt the document with the empty string. The 52 // result is not important; it is the caller's responsibility to ensure the 53 // document is unencrypted before calling initialize(). 54 document->m_security_handler->try_provide_user_password(""sv); 55 } 56 57 return document; 58} 59 60Document::Document(NonnullRefPtr<DocumentParser> const& parser) 61 : m_parser(parser) 62{ 63 m_parser->set_document(this); 64} 65 66PDFErrorOr<void> Document::initialize() 67{ 68 if (m_security_handler) 69 VERIFY(m_security_handler->has_user_password()); 70 71 TRY(build_page_tree()); 72 TRY(build_outline()); 73 74 return {}; 75} 76 77PDFErrorOr<Value> Document::get_or_load_value(u32 index) 78{ 79 auto value = get_value(index); 80 if (!value.has<Empty>()) // FIXME: Use Optional instead? 81 return value; 82 83 auto object = TRY(m_parser->parse_object_with_index(index)); 84 m_values.set(index, object); 85 return object; 86} 87 88u32 Document::get_first_page_index() const 89{ 90 // FIXME: A PDF can have a different default first page, which 91 // should be fetched and returned here 92 return 0; 93} 94 95u32 Document::get_page_count() const 96{ 97 return m_page_object_indices.size(); 98} 99 100PDFErrorOr<Page> Document::get_page(u32 index) 101{ 102 VERIFY(index < m_page_object_indices.size()); 103 104 auto cached_page = m_pages.get(index); 105 if (cached_page.has_value()) 106 return cached_page.value(); 107 108 auto page_object_index = m_page_object_indices[index]; 109 auto page_object = TRY(get_or_load_value(page_object_index)); 110 auto raw_page_object = TRY(resolve_to<DictObject>(page_object)); 111 112 auto resources = TRY(get_inheritable_object(CommonNames::Resources, raw_page_object))->cast<DictObject>(); 113 auto contents = TRY(raw_page_object->get_object(this, CommonNames::Contents)); 114 115 auto media_box_array = TRY(get_inheritable_object(CommonNames::MediaBox, raw_page_object))->cast<ArrayObject>(); 116 auto media_box = Rectangle { 117 media_box_array->at(0).to_float(), 118 media_box_array->at(1).to_float(), 119 media_box_array->at(2).to_float(), 120 media_box_array->at(3).to_float(), 121 }; 122 123 auto crop_box = media_box; 124 if (raw_page_object->contains(CommonNames::CropBox)) { 125 auto crop_box_array = TRY(raw_page_object->get_array(this, CommonNames::CropBox)); 126 crop_box = Rectangle { 127 crop_box_array->at(0).to_float(), 128 crop_box_array->at(1).to_float(), 129 crop_box_array->at(2).to_float(), 130 crop_box_array->at(3).to_float(), 131 }; 132 } 133 134 float user_unit = 1.0f; 135 if (raw_page_object->contains(CommonNames::UserUnit)) 136 user_unit = raw_page_object->get_value(CommonNames::UserUnit).to_float(); 137 138 int rotate = 0; 139 if (raw_page_object->contains(CommonNames::Rotate)) { 140 rotate = raw_page_object->get_value(CommonNames::Rotate).get<int>(); 141 VERIFY(rotate % 90 == 0); 142 } 143 144 Page page { move(resources), move(contents), media_box, crop_box, user_unit, rotate }; 145 m_pages.set(index, page); 146 return page; 147} 148 149PDFErrorOr<Value> Document::resolve(Value const& value) 150{ 151 if (value.has<Reference>()) { 152 // FIXME: Surely indirect PDF objects can't contain another indirect PDF object, 153 // right? Unsure from the spec, but if they can, these return values would have 154 // to be wrapped with another resolve() call. 155 return get_or_load_value(value.as_ref_index()); 156 } 157 158 if (!value.has<NonnullRefPtr<Object>>()) 159 return value; 160 161 auto& obj = value.get<NonnullRefPtr<Object>>(); 162 163 if (obj->is<IndirectValue>()) 164 return static_ptr_cast<IndirectValue>(obj)->value(); 165 166 return value; 167} 168 169PDFErrorOr<void> Document::build_page_tree() 170{ 171 auto page_tree = TRY(m_catalog->get_dict(this, CommonNames::Pages)); 172 return add_page_tree_node_to_page_tree(page_tree); 173} 174 175PDFErrorOr<void> Document::add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> const& page_tree) 176{ 177 auto kids_array = TRY(page_tree->get_array(this, CommonNames::Kids)); 178 auto page_count = page_tree->get(CommonNames::Count).value().get<int>(); 179 180 if (static_cast<size_t>(page_count) != kids_array->elements().size()) { 181 // This page tree contains child page trees, so we recursively add 182 // these pages to the overall page tree 183 184 for (auto& value : *kids_array) { 185 auto reference_index = value.as_ref_index(); 186 auto maybe_page_tree_node = TRY(m_parser->conditionally_parse_page_tree_node(reference_index)); 187 if (maybe_page_tree_node) { 188 TRY(add_page_tree_node_to_page_tree(maybe_page_tree_node.release_nonnull())); 189 } else { 190 m_page_object_indices.append(reference_index); 191 } 192 } 193 } else { 194 // We know all of the kids are leaf nodes 195 for (auto& value : *kids_array) 196 m_page_object_indices.append(value.as_ref_index()); 197 } 198 199 return {}; 200} 201 202PDFErrorOr<NonnullRefPtr<Object>> Document::find_in_name_tree(NonnullRefPtr<DictObject> tree, DeprecatedFlyString name) 203{ 204 if (tree->contains(CommonNames::Kids)) { 205 return find_in_name_tree_nodes(tree->get_array(CommonNames::Kids), name); 206 } 207 if (!tree->contains(CommonNames::Names)) 208 return Error { Error::Type::MalformedPDF, "name tree has neither Kids nor Names" }; 209 auto key_value_names_array = TRY(tree->get_array(this, CommonNames::Names)); 210 return find_in_key_value_array(key_value_names_array, name); 211} 212 213PDFErrorOr<NonnullRefPtr<Object>> Document::find_in_name_tree_nodes(NonnullRefPtr<ArrayObject> siblings, DeprecatedFlyString name) 214{ 215 for (size_t i = 0; i < siblings->size(); i++) { 216 auto sibling = TRY(resolve_to<DictObject>(siblings->at(i))); 217 auto limits = sibling->get_array(CommonNames::Limits); 218 if (limits->size() != 2) 219 return Error { Error::Type::MalformedPDF, "Expected 2-element Limits array" }; 220 auto start = limits->get_string_at(0); 221 auto end = limits->get_string_at(1); 222 if (start->string() <= name && end->string() >= name) { 223 return find_in_name_tree(sibling, name); 224 } 225 } 226 return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Didn't find node in name tree containing name {}", name) }; 227} 228 229PDFErrorOr<NonnullRefPtr<Object>> Document::find_in_key_value_array(NonnullRefPtr<ArrayObject> key_value_array, DeprecatedFlyString name) 230{ 231 if (key_value_array->size() % 2 == 1) 232 return Error { Error::Type::MalformedPDF, "key/value array has dangling key" }; 233 for (size_t i = 0; i < key_value_array->size() / 2; i++) { 234 auto key = key_value_array->get_string_at(2 * i); 235 if (key->string() == name) { 236 return key_value_array->get_object_at(this, 2 * i + 1); 237 } 238 } 239 return Error { Error::Type::MalformedPDF, DeprecatedString::formatted("Didn't find expected name {} in key/value array", name) }; 240} 241 242PDFErrorOr<void> Document::build_outline() 243{ 244 if (!m_catalog->contains(CommonNames::Outlines)) 245 return {}; 246 247 auto outline_dict = TRY(m_catalog->get_dict(this, CommonNames::Outlines)); 248 if (!outline_dict->contains(CommonNames::First)) 249 return {}; 250 if (!outline_dict->contains(CommonNames::Last)) 251 return {}; 252 253 HashMap<u32, u32> page_number_by_index_ref; 254 for (u32 page_number = 0; page_number < m_page_object_indices.size(); ++page_number) { 255 page_number_by_index_ref.set(m_page_object_indices[page_number], page_number); 256 } 257 258 auto first_ref = outline_dict->get_value(CommonNames::First); 259 260 auto children = TRY(build_outline_item_chain(first_ref, page_number_by_index_ref)); 261 262 m_outline = adopt_ref(*new OutlineDict()); 263 m_outline->children = move(children); 264 if (outline_dict->contains(CommonNames::Count)) 265 m_outline->count = outline_dict->get_value(CommonNames::Count).get<int>(); 266 267 return {}; 268} 269 270PDFErrorOr<Destination> Document::create_destination_from_parameters(NonnullRefPtr<ArrayObject> array, HashMap<u32, u32> const& page_number_by_index_ref) 271{ 272 auto page_ref = array->at(0); 273 auto type_name = TRY(array->get_name_at(this, 1))->name(); 274 275 Vector<Optional<float>> parameters; 276 TRY(parameters.try_ensure_capacity(array->size() - 2)); 277 for (size_t i = 2; i < array->size(); i++) { 278 auto& param = array->at(i); 279 if (param.has<nullptr_t>()) 280 parameters.unchecked_append({}); 281 else 282 parameters.append(param.to_float()); 283 } 284 285 Destination::Type type; 286 if (type_name == CommonNames::XYZ) { 287 type = Destination::Type::XYZ; 288 } else if (type_name == CommonNames::Fit) { 289 type = Destination::Type::Fit; 290 } else if (type_name == CommonNames::FitH) { 291 type = Destination::Type::FitH; 292 } else if (type_name == CommonNames::FitV) { 293 type = Destination::Type::FitV; 294 } else if (type_name == CommonNames::FitR) { 295 type = Destination::Type::FitR; 296 } else if (type_name == CommonNames::FitB) { 297 type = Destination::Type::FitB; 298 } else if (type_name == CommonNames::FitBH) { 299 type = Destination::Type::FitBH; 300 } else if (type_name == CommonNames::FitBV) { 301 type = Destination::Type::FitBV; 302 } else { 303 VERIFY_NOT_REACHED(); 304 } 305 306 return Destination { type, page_number_by_index_ref.get(page_ref.as_ref_index()), parameters }; 307} 308 309PDFErrorOr<NonnullRefPtr<Object>> Document::get_inheritable_object(DeprecatedFlyString const& name, NonnullRefPtr<DictObject> object) 310{ 311 if (!object->contains(name)) { 312 auto parent = TRY(object->get_dict(this, CommonNames::Parent)); 313 return get_inheritable_object(name, parent); 314 } 315 return object->get_object(this, name); 316} 317 318PDFErrorOr<Destination> Document::create_destination_from_dictionary_entry(NonnullRefPtr<Object> const& entry, HashMap<u32, u32> const& page_number_by_index_ref) 319{ 320 if (entry->is<ArrayObject>()) { 321 auto entry_array = entry->cast<ArrayObject>(); 322 return create_destination_from_parameters(entry_array, page_number_by_index_ref); 323 } 324 auto entry_dictionary = entry->cast<DictObject>(); 325 auto d_array = MUST(entry_dictionary->get_array(this, CommonNames::D)); 326 return create_destination_from_parameters(d_array, page_number_by_index_ref); 327} 328 329PDFErrorOr<NonnullRefPtr<OutlineItem>> Document::build_outline_item(NonnullRefPtr<DictObject> const& outline_item_dict, HashMap<u32, u32> const& page_number_by_index_ref) 330{ 331 auto outline_item = adopt_ref(*new OutlineItem {}); 332 333 if (outline_item_dict->contains(CommonNames::First)) { 334 VERIFY(outline_item_dict->contains(CommonNames::Last)); 335 auto first_ref = outline_item_dict->get_value(CommonNames::First); 336 auto children = TRY(build_outline_item_chain(first_ref, page_number_by_index_ref)); 337 for (auto& child : children) { 338 child->parent = outline_item; 339 } 340 outline_item->children = move(children); 341 } 342 343 outline_item->title = TRY(outline_item_dict->get_string(this, CommonNames::Title))->string(); 344 345 if (outline_item_dict->contains(CommonNames::Count)) 346 outline_item->count = outline_item_dict->get_value(CommonNames::Count).get<int>(); 347 348 if (outline_item_dict->contains(CommonNames::Dest)) { 349 auto dest_obj = TRY(outline_item_dict->get_object(this, CommonNames::Dest)); 350 351 if (dest_obj->is<ArrayObject>()) { 352 auto dest_arr = dest_obj->cast<ArrayObject>(); 353 outline_item->dest = TRY(create_destination_from_parameters(dest_arr, page_number_by_index_ref)); 354 } else if (dest_obj->is<NameObject>() || dest_obj->is<StringObject>()) { 355 DeprecatedFlyString dest_name; 356 if (dest_obj->is<NameObject>()) 357 dest_name = dest_obj->cast<NameObject>()->name(); 358 else 359 dest_name = dest_obj->cast<StringObject>()->string(); 360 if (auto dests_value = m_catalog->get(CommonNames::Dests); dests_value.has_value()) { 361 auto dests = dests_value.value().get<NonnullRefPtr<Object>>()->cast<DictObject>(); 362 auto entry = MUST(dests->get_object(this, dest_name)); 363 outline_item->dest = TRY(create_destination_from_dictionary_entry(entry, page_number_by_index_ref)); 364 } else if (auto names_value = m_catalog->get(CommonNames::Names); names_value.has_value()) { 365 auto names = TRY(resolve(names_value.release_value())).get<NonnullRefPtr<Object>>()->cast<DictObject>(); 366 if (!names->contains(CommonNames::Dests)) 367 return Error { Error::Type::MalformedPDF, "Missing Dests key in document catalogue's Names dictionary" }; 368 auto dest_obj = TRY(find_in_name_tree(TRY(names->get_dict(this, CommonNames::Dests)), dest_name)); 369 outline_item->dest = TRY(create_destination_from_dictionary_entry(dest_obj, page_number_by_index_ref)); 370 } else { 371 return Error { Error::Type::MalformedPDF, "Malformed outline destination" }; 372 } 373 } 374 } 375 376 if (outline_item_dict->contains(CommonNames::C)) { 377 auto color_array = TRY(outline_item_dict->get_array(this, CommonNames::C)); 378 auto r = static_cast<int>(255.0f * color_array->at(0).get<float>()); 379 auto g = static_cast<int>(255.0f * color_array->at(1).get<float>()); 380 auto b = static_cast<int>(255.0f * color_array->at(2).get<float>()); 381 outline_item->color = Color(r, g, b); 382 } 383 384 if (outline_item_dict->contains(CommonNames::F)) { 385 auto bitfield = outline_item_dict->get_value(CommonNames::F).get<int>(); 386 outline_item->italic = bitfield & 0x1; 387 outline_item->bold = bitfield & 0x2; 388 } 389 390 return outline_item; 391} 392 393PDFErrorOr<Vector<NonnullRefPtr<OutlineItem>>> Document::build_outline_item_chain(Value const& first_ref, HashMap<u32, u32> const& page_number_by_index_ref) 394{ 395 // We used to receive a last_ref parameter, which was what the parent of this chain 396 // thought was this chain's last child. There are documents out there in the wild 397 // where this cross-references don't match though, and it seems like simply following 398 // the /First and /Next links is the way to go to construct the whole Outline 399 // (we already ignore the /Parent attribute too, which can also be out of sync). 400 VERIFY(first_ref.has<Reference>()); 401 402 Vector<NonnullRefPtr<OutlineItem>> children; 403 404 auto first_value = TRY(get_or_load_value(first_ref.as_ref_index())).get<NonnullRefPtr<Object>>(); 405 auto first_dict = first_value->cast<DictObject>(); 406 auto first = TRY(build_outline_item(first_dict, page_number_by_index_ref)); 407 children.append(first); 408 409 auto current_child_dict = first_dict; 410 u32 current_child_index = first_ref.as_ref_index(); 411 412 while (current_child_dict->contains(CommonNames::Next)) { 413 auto next_child_dict_ref = current_child_dict->get_value(CommonNames::Next); 414 current_child_index = next_child_dict_ref.as_ref_index(); 415 auto next_child_value = TRY(get_or_load_value(current_child_index)).get<NonnullRefPtr<Object>>(); 416 auto next_child_dict = next_child_value->cast<DictObject>(); 417 auto next_child = TRY(build_outline_item(next_child_dict, page_number_by_index_ref)); 418 children.append(next_child); 419 420 current_child_dict = move(next_child_dict); 421 } 422 423 return children; 424} 425 426}