Serenity Operating System
at master 176 lines 7.6 kB view raw
1/* 2 * Copyright (c) 2023, the SerenityOS developers. 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#pragma once 8 9#include <AK/Math.h> 10#include <AK/Random.h> 11#include <AK/StdLibExtras.h> 12 13namespace AK { 14 15static constexpr int MEDIAN_OF_MEDIAN_CUTOFF = 4500; 16 17// FIXME: Stole and adapted these two functions from `Userland/Demos/Tubes/Tubes.cpp` we really need something like this in `AK/Random.h` 18static inline double random_double() 19{ 20 return get_random<u32>() / static_cast<double>(NumericLimits<u32>::max()); 21} 22 23static inline size_t random_int(size_t min, size_t max) 24{ 25 return min + round_to<size_t>(random_double() * (max - min)); 26} 27 28// Implementations of common pivot functions 29namespace PivotFunctions { 30 31// Just use the first element of the range as the pivot 32// Mainly used to debug the quick select algorithm 33// Good with random data since it has nearly no overhead 34// Attention: Turns the algorithm quadratic if used with already (partially) sorted data 35template<typename Collection, typename LessThan> 36size_t first_element([[maybe_unused]] Collection& collection, size_t left, [[maybe_unused]] size_t right, [[maybe_unused]] LessThan less_than) 37{ 38 return left; 39} 40 41// Just use the middle element of the range as the pivot 42// This is what is used in AK::single_pivot_quick_sort in quicksort.h 43// Works fairly well with random Data 44// Works incredibly well with sorted data since the pivot is always a perfect split 45template<typename Collection, typename LessThan> 46size_t middle_element([[maybe_unused]] Collection& collection, size_t left, size_t right, [[maybe_unused]] LessThan less_than) 47{ 48 return (left + right) / 2; 49} 50 51// Pick a random Pivot 52// This is the "Traditional" implementation of both quicksort and quick select 53// Performs fairly well both with random and sorted data 54template<typename Collection, typename LessThan> 55size_t random_element([[maybe_unused]] Collection& collection, size_t left, size_t right, [[maybe_unused]] LessThan less_than) 56{ 57 return random_int(left, right); 58} 59 60// Implementation detail of median_of_medians 61// Whilst this looks quadratic in runtime, it always gets called with 5 or fewer elements so can be considered constant runtime 62template<typename Collection, typename LessThan> 63size_t partition5(Collection& collection, size_t left, size_t right, LessThan less_than) 64{ 65 VERIFY((right - left) <= 5); 66 for (size_t i = left + 1; i <= right; i++) { 67 for (size_t j = i; j > left && less_than(collection.at(j), collection.at(j - 1)); j--) { 68 swap(collection.at(j), collection.at(j - 1)); 69 } 70 } 71 return (left + right) / 2; 72} 73 74// https://en.wikipedia.org/wiki/Median_of_medians 75// Use the median of medians algorithm to pick a really good pivot 76// This makes quick select run in linear time but comes with a lot of overhead that only pays off with very large inputs 77template<typename Collection, typename LessThan> 78size_t median_of_medians(Collection& collection, size_t left, size_t right, LessThan less_than) 79{ 80 if ((right - left) < 5) 81 return partition5(collection, left, right, less_than); 82 83 for (size_t i = left; i <= right; i += 5) { 84 size_t sub_right = i + 4; 85 if (sub_right > right) 86 sub_right = right; 87 88 size_t median5 = partition5(collection, i, sub_right, less_than); 89 swap(collection.at(median5), collection.at(left + (i - left) / 5)); 90 } 91 size_t mid = (right - left) / 10 + left + 1; 92 93 // We're using mutual recursion here, using quickselect_inplace to find the pivot for quickselect_inplace. 94 // Whilst this achieves True linear Runtime, it is a lot of overhead, so use only this variant with very large inputs 95 return quickselect_inplace( 96 collection, left, left + ((right - left) / 5), mid, [](auto collection, size_t left, size_t right, auto less_than) { return AK::PivotFunctions::median_of_medians(collection, left, right, less_than); }, less_than); 97} 98 99} 100 101// This is the Lomuto Partition scheme which is simpler but less efficient than Hoare's partitioning scheme that is traditionally used with quicksort 102// https://en.wikipedia.org/wiki/Quicksort#Lomuto_partition_scheme 103template<typename Collection, typename PivotFn, typename LessThan> 104static size_t partition(Collection& collection, size_t left, size_t right, PivotFn pivot_fn, LessThan less_than) 105{ 106 auto pivot_index = pivot_fn(collection, left, right, less_than); 107 auto pivot_value = collection.at(pivot_index); 108 swap(collection.at(pivot_index), collection.at(right)); 109 auto store_index = left; 110 111 for (size_t i = left; i < right; i++) { 112 if (less_than(collection.at(i), pivot_value)) { 113 swap(collection.at(store_index), collection.at(i)); 114 store_index++; 115 } 116 } 117 118 swap(collection.at(right), collection.at(store_index)); 119 return store_index; 120} 121 122template<typename Collection, typename PivotFn, typename LessThan> 123size_t quickselect_inplace(Collection& collection, size_t left, size_t right, size_t k, PivotFn pivot_fn, LessThan less_than) 124{ 125 // Bail if left is somehow bigger than right and return default constructed result 126 // FIXME: This can also occur when the collection is empty maybe propagate this error somehow? 127 // returning 0 would be a really bad thing since this returns and index and that might lead to memory errors 128 // returning in ErrorOr<size_t> here might be a good option but this is a very specific error that in nearly all circumstances should be considered a bug on the callers site 129 VERIFY(left <= right); 130 131 // If there's only one element, return that element 132 if (left == right) 133 return left; 134 135 auto pivot_index = partition(collection, left, right, pivot_fn, less_than); 136 137 // we found the thing we were searching for 138 if (k == pivot_index) 139 return k; 140 141 // Recurse on the left side 142 if (k < pivot_index) 143 return quickselect_inplace(collection, left, pivot_index - 1, k, pivot_fn, less_than); 144 145 // recurse on the right side 146 return quickselect_inplace(collection, pivot_index + 1, right, k, pivot_fn, less_than); 147} 148 149// 150template<typename Collection, typename PivotFn, typename LessThan> 151size_t quickselect_inplace(Collection& collection, size_t k, PivotFn pivot_fn, LessThan less_than) 152{ 153 return quickselect_inplace(collection, 0, collection.size() - 1, k, pivot_fn, less_than); 154} 155 156template<typename Collection, typename PivotFn> 157size_t quickselect_inplace(Collection& collection, size_t k, PivotFn pivot_fn) 158{ 159 return quickselect_inplace(collection, 0, collection.size() - 1, k, pivot_fn, [](auto& a, auto& b) { return a < b; }); 160} 161 162// All of these quick select implementation versions return the `index` of the resulting element, after the algorithm has run, not the element itself! 163// As Part of the Algorithm, they all modify the collection in place, partially sorting it in the process. 164template<typename Collection> 165size_t quickselect_inplace(Collection& collection, size_t k) 166{ 167 if (collection.size() >= MEDIAN_OF_MEDIAN_CUTOFF) 168 return quickselect_inplace( 169 collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::median_of_medians(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; }); 170 171 else 172 return quickselect_inplace( 173 collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::random_element(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; }); 174} 175 176}