···11+/*22+ * tools/testing/selftests/kvm/lib/kvm_util.c33+ *44+ * Copyright (C) 2018, Google LLC.55+ *66+ * This work is licensed under the terms of the GNU GPL, version 2.77+ */88+99+#ifndef KVM_UTIL_INTERNAL_H1010+#define KVM_UTIL_INTERNAL_H 11111+1212+#include "sparsebit.h"1313+1414+#ifndef BITS_PER_BYTE1515+#define BITS_PER_BYTE 81616+#endif1717+1818+#ifndef BITS_PER_LONG1919+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))2020+#endif2121+2222+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))2323+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)2424+2525+/* Concrete definition of struct kvm_vm. */2626+struct userspace_mem_region {2727+ struct userspace_mem_region *next, *prev;2828+ struct kvm_userspace_memory_region region;2929+ struct sparsebit *unused_phy_pages;3030+ int fd;3131+ off_t offset;3232+ void *host_mem;3333+ void *mmap_start;3434+ size_t mmap_size;3535+};3636+3737+struct vcpu {3838+ struct vcpu *next, *prev;3939+ uint32_t id;4040+ int fd;4141+ struct kvm_run *state;4242+};4343+4444+struct kvm_vm {4545+ int mode;4646+ int fd;4747+ unsigned int page_size;4848+ unsigned int page_shift;4949+ uint64_t max_gfn;5050+ struct vcpu *vcpu_head;5151+ struct userspace_mem_region *userspace_mem_region_head;5252+ struct sparsebit *vpages_valid;5353+ struct sparsebit *vpages_mapped;5454+ bool pgd_created;5555+ vm_paddr_t pgd;5656+};5757+5858+struct vcpu *vcpu_find(struct kvm_vm *vm,5959+ uint32_t vcpuid);6060+void vcpu_setup(struct kvm_vm *vm, int vcpuid);6161+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);6262+void regs_dump(FILE *stream, struct kvm_regs *regs,6363+ uint8_t indent);6464+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,6565+ uint8_t indent);6666+6767+#endif
+2087
tools/testing/selftests/kvm/lib/sparsebit.c
···11+/*22+ * Sparse bit array33+ *44+ * Copyright (C) 2018, Google LLC.55+ * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)66+ *77+ * This work is licensed under the terms of the GNU GPL, version 2.88+ *99+ * This library provides functions to support a memory efficient bit array,1010+ * with an index size of 2^64. A sparsebit array is allocated through1111+ * the use sparsebit_alloc() and free'd via sparsebit_free(),1212+ * such as in the following:1313+ *1414+ * struct sparsebit *s;1515+ * s = sparsebit_alloc();1616+ * sparsebit_free(&s);1717+ *1818+ * The struct sparsebit type resolves down to a struct sparsebit.1919+ * Note that, sparsebit_free() takes a pointer to the sparsebit2020+ * structure. This is so that sparsebit_free() is able to poison2121+ * the pointer (e.g. set it to NULL) to the struct sparsebit before2222+ * returning to the caller.2323+ *2424+ * Between the return of sparsebit_alloc() and the call of2525+ * sparsebit_free(), there are multiple query and modifying operations2626+ * that can be performed on the allocated sparsebit array. All of2727+ * these operations take as a parameter the value returned from2828+ * sparsebit_alloc() and most also take a bit index. Frequently2929+ * used routines include:3030+ *3131+ * ---- Query Operations3232+ * sparsebit_is_set(s, idx)3333+ * sparsebit_is_clear(s, idx)3434+ * sparsebit_any_set(s)3535+ * sparsebit_first_set(s)3636+ * sparsebit_next_set(s, prev_idx)3737+ *3838+ * ---- Modifying Operations3939+ * sparsebit_set(s, idx)4040+ * sparsebit_clear(s, idx)4141+ * sparsebit_set_num(s, idx, num);4242+ * sparsebit_clear_num(s, idx, num);4343+ *4444+ * A common operation, is to itterate over all the bits set in a test4545+ * sparsebit array. This can be done via code with the following structure:4646+ *4747+ * sparsebit_idx_t idx;4848+ * if (sparsebit_any_set(s)) {4949+ * idx = sparsebit_first_set(s);5050+ * do {5151+ * ...5252+ * idx = sparsebit_next_set(s, idx);5353+ * } while (idx != 0);5454+ * }5555+ *5656+ * The index of the first bit set needs to be obtained via5757+ * sparsebit_first_set(), because sparsebit_next_set(), needs5858+ * the index of the previously set. The sparsebit_idx_t type is5959+ * unsigned, so there is no previous index before 0 that is available.6060+ * Also, the call to sparsebit_first_set() is not made unless there6161+ * is at least 1 bit in the array set. This is because sparsebit_first_set()6262+ * aborts if sparsebit_first_set() is called with no bits set.6363+ * It is the callers responsibility to assure that the6464+ * sparsebit array has at least a single bit set before calling6565+ * sparsebit_first_set().6666+ *6767+ * ==== Implementation Overview ====6868+ * For the most part the internal implementation of sparsebit is6969+ * opaque to the caller. One important implementation detail that the7070+ * caller may need to be aware of is the spatial complexity of the7171+ * implementation. This implementation of a sparsebit array is not7272+ * only sparse, in that it uses memory proportional to the number of bits7373+ * set. It is also efficient in memory usage when most of the bits are7474+ * set.7575+ *7676+ * At a high-level the state of the bit settings are maintained through7777+ * the use of a binary-search tree, where each node contains at least7878+ * the following members:7979+ *8080+ * typedef uint64_t sparsebit_idx_t;8181+ * typedef uint64_t sparsebit_num_t;8282+ *8383+ * sparsebit_idx_t idx;8484+ * uint32_t mask;8585+ * sparsebit_num_t num_after;8686+ *8787+ * The idx member contains the bit index of the first bit described by this8888+ * node, while the mask member stores the setting of the first 32-bits.8989+ * The setting of the bit at idx + n, where 0 <= n < 32, is located in the9090+ * mask member at 1 << n.9191+ *9292+ * Nodes are sorted by idx and the bits described by two nodes will never9393+ * overlap. The idx member is always aligned to the mask size, i.e. a9494+ * multiple of 32.9595+ *9696+ * Beyond a typical implementation, the nodes in this implementation also9797+ * contains a member named num_after. The num_after member holds the9898+ * number of bits immediately after the mask bits that are contiguously set.9999+ * The use of the num_after member allows this implementation to efficiently100100+ * represent cases where most bits are set. For example, the case of all101101+ * but the last two bits set, is represented by the following two nodes:102102+ *103103+ * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0104104+ * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0105105+ *106106+ * ==== Invariants ====107107+ * This implementation usses the following invariants:108108+ *109109+ * + Node are only used to represent bits that are set.110110+ * Nodes with a mask of 0 and num_after of 0 are not allowed.111111+ *112112+ * + Sum of bits set in all the nodes is equal to the value of113113+ * the struct sparsebit_pvt num_set member.114114+ *115115+ * + The setting of at least one bit is always described in a nodes116116+ * mask (mask >= 1).117117+ *118118+ * + A node with all mask bits set only occurs when the last bit119119+ * described by the previous node is not equal to this nodes120120+ * starting index - 1. All such occurences of this condition are121121+ * avoided by moving the setting of the nodes mask bits into122122+ * the previous nodes num_after setting.123123+ *124124+ * + Node starting index is evenly divisable by the number of bits125125+ * within a nodes mask member.126126+ *127127+ * + Nodes never represent a range of bits that wrap around the128128+ * highest supported index.129129+ *130130+ * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)131131+ *132132+ * As a consequence of the above, the num_after member of a node133133+ * will always be <=:134134+ *135135+ * maximum_index - nodes_starting_index - number_of_mask_bits136136+ *137137+ * + Nodes within the binary search tree are sorted based on each138138+ * nodes starting index.139139+ *140140+ * + The range of bits described by any two nodes do not overlap. The141141+ * range of bits described by a single node is:142142+ *143143+ * start: node->idx144144+ * end (inclusive): node->idx + MASK_BITS + node->num_after - 1;145145+ *146146+ * Note, at times these invariants are temporarily violated for a147147+ * specific portion of the code. For example, when setting a mask148148+ * bit, there is a small delay between when the mask bit is set and the149149+ * value in the struct sparsebit_pvt num_set member is updated. Other150150+ * temporary violations occur when node_split() is called with a specified151151+ * index and assures that a node where its mask represents the bit152152+ * at the specified index exists. At times to do this node_split()153153+ * must split an existing node into two nodes or create a node that154154+ * has no bits set. Such temporary violations must be corrected before155155+ * returning to the caller. These corrections are typically performed156156+ * by the local function node_reduce().157157+ */158158+159159+#include "test_util.h"160160+#include "sparsebit.h"161161+#include <limits.h>162162+#include <assert.h>163163+164164+#define DUMP_LINE_MAX 100 /* Does not include indent amount */165165+166166+typedef uint32_t mask_t;167167+#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)168168+169169+struct node {170170+ struct node *parent;171171+ struct node *left;172172+ struct node *right;173173+ sparsebit_idx_t idx; /* index of least-significant bit in mask */174174+ sparsebit_num_t num_after; /* num contiguously set after mask */175175+ mask_t mask;176176+};177177+178178+struct sparsebit {179179+ /*180180+ * Points to root node of the binary search181181+ * tree. Equal to NULL when no bits are set in182182+ * the entire sparsebit array.183183+ */184184+ struct node *root;185185+186186+ /*187187+ * A redundant count of the total number of bits set. Used for188188+ * diagnostic purposes and to change the time complexity of189189+ * sparsebit_num_set() from O(n) to O(1).190190+ * Note: Due to overflow, a value of 0 means none or all set.191191+ */192192+ sparsebit_num_t num_set;193193+};194194+195195+/* Returns the number of set bits described by the settings196196+ * of the node pointed to by nodep.197197+ */198198+static sparsebit_num_t node_num_set(struct node *nodep)199199+{200200+ return nodep->num_after + __builtin_popcount(nodep->mask);201201+}202202+203203+/* Returns a pointer to the node that describes the204204+ * lowest bit index.205205+ */206206+static struct node *node_first(struct sparsebit *s)207207+{208208+ struct node *nodep;209209+210210+ for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)211211+ ;212212+213213+ return nodep;214214+}215215+216216+/* Returns a pointer to the node that describes the217217+ * lowest bit index > the index of the node pointed to by np.218218+ * Returns NULL if no node with a higher index exists.219219+ */220220+static struct node *node_next(struct sparsebit *s, struct node *np)221221+{222222+ struct node *nodep = np;223223+224224+ /*225225+ * If current node has a right child, next node is the left-most226226+ * of the right child.227227+ */228228+ if (nodep->right) {229229+ for (nodep = nodep->right; nodep->left; nodep = nodep->left)230230+ ;231231+ return nodep;232232+ }233233+234234+ /*235235+ * No right child. Go up until node is left child of a parent.236236+ * That parent is then the next node.237237+ */238238+ while (nodep->parent && nodep == nodep->parent->right)239239+ nodep = nodep->parent;240240+241241+ return nodep->parent;242242+}243243+244244+/* Searches for and returns a pointer to the node that describes the245245+ * highest index < the index of the node pointed to by np.246246+ * Returns NULL if no node with a lower index exists.247247+ */248248+static struct node *node_prev(struct sparsebit *s, struct node *np)249249+{250250+ struct node *nodep = np;251251+252252+ /*253253+ * If current node has a left child, next node is the right-most254254+ * of the left child.255255+ */256256+ if (nodep->left) {257257+ for (nodep = nodep->left; nodep->right; nodep = nodep->right)258258+ ;259259+ return (struct node *) nodep;260260+ }261261+262262+ /*263263+ * No left child. Go up until node is right child of a parent.264264+ * That parent is then the next node.265265+ */266266+ while (nodep->parent && nodep == nodep->parent->left)267267+ nodep = nodep->parent;268268+269269+ return (struct node *) nodep->parent;270270+}271271+272272+273273+/* Allocates space to hold a copy of the node sub-tree pointed to by274274+ * subtree and duplicates the bit settings to the newly allocated nodes.275275+ * Returns the newly allocated copy of subtree.276276+ */277277+static struct node *node_copy_subtree(struct node *subtree)278278+{279279+ struct node *root;280280+281281+ /* Duplicate the node at the root of the subtree */282282+ root = calloc(1, sizeof(*root));283283+ if (!root) {284284+ perror("calloc");285285+ abort();286286+ }287287+288288+ root->idx = subtree->idx;289289+ root->mask = subtree->mask;290290+ root->num_after = subtree->num_after;291291+292292+ /* As needed, recursively duplicate the left and right subtrees */293293+ if (subtree->left) {294294+ root->left = node_copy_subtree(subtree->left);295295+ root->left->parent = root;296296+ }297297+298298+ if (subtree->right) {299299+ root->right = node_copy_subtree(subtree->right);300300+ root->right->parent = root;301301+ }302302+303303+ return root;304304+}305305+306306+/* Searches for and returns a pointer to the node that describes the setting307307+ * of the bit given by idx. A node describes the setting of a bit if its308308+ * index is within the bits described by the mask bits or the number of309309+ * contiguous bits set after the mask. Returns NULL if there is no such node.310310+ */311311+static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)312312+{313313+ struct node *nodep;314314+315315+ /* Find the node that describes the setting of the bit at idx */316316+ for (nodep = s->root; nodep;317317+ nodep = nodep->idx > idx ? nodep->left : nodep->right) {318318+ if (idx >= nodep->idx &&319319+ idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)320320+ break;321321+ }322322+323323+ return nodep;324324+}325325+326326+/* Entry Requirements:327327+ * + A node that describes the setting of idx is not already present.328328+ *329329+ * Adds a new node to describe the setting of the bit at the index given330330+ * by idx. Returns a pointer to the newly added node.331331+ *332332+ * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.333333+ */334334+static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)335335+{336336+ struct node *nodep, *parentp, *prev;337337+338338+ /* Allocate and initialize the new node. */339339+ nodep = calloc(1, sizeof(*nodep));340340+ if (!nodep) {341341+ perror("calloc");342342+ abort();343343+ }344344+345345+ nodep->idx = idx & -MASK_BITS;346346+347347+ /* If no nodes, set it up as the root node. */348348+ if (!s->root) {349349+ s->root = nodep;350350+ return nodep;351351+ }352352+353353+ /*354354+ * Find the parent where the new node should be attached355355+ * and add the node there.356356+ */357357+ parentp = s->root;358358+ while (true) {359359+ if (idx < parentp->idx) {360360+ if (!parentp->left) {361361+ parentp->left = nodep;362362+ nodep->parent = parentp;363363+ break;364364+ }365365+ parentp = parentp->left;366366+ } else {367367+ assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);368368+ if (!parentp->right) {369369+ parentp->right = nodep;370370+ nodep->parent = parentp;371371+ break;372372+ }373373+ parentp = parentp->right;374374+ }375375+ }376376+377377+ /*378378+ * Does num_after bits of previous node overlap with the mask379379+ * of the new node? If so set the bits in the new nodes mask380380+ * and reduce the previous nodes num_after.381381+ */382382+ prev = node_prev(s, nodep);383383+ while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {384384+ unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)385385+ - nodep->idx;386386+ assert(prev->num_after > 0);387387+ assert(n1 < MASK_BITS);388388+ assert(!(nodep->mask & (1 << n1)));389389+ nodep->mask |= (1 << n1);390390+ prev->num_after--;391391+ }392392+393393+ return nodep;394394+}395395+396396+/* Returns whether all the bits in the sparsebit array are set. */397397+bool sparsebit_all_set(struct sparsebit *s)398398+{399399+ /*400400+ * If any nodes there must be at least one bit set. Only case401401+ * where a bit is set and total num set is 0, is when all bits402402+ * are set.403403+ */404404+ return s->root && s->num_set == 0;405405+}406406+407407+/* Clears all bits described by the node pointed to by nodep, then408408+ * removes the node.409409+ */410410+static void node_rm(struct sparsebit *s, struct node *nodep)411411+{412412+ struct node *tmp;413413+ sparsebit_num_t num_set;414414+415415+ num_set = node_num_set(nodep);416416+ assert(s->num_set >= num_set || sparsebit_all_set(s));417417+ s->num_set -= node_num_set(nodep);418418+419419+ /* Have both left and right child */420420+ if (nodep->left && nodep->right) {421421+ /*422422+ * Move left children to the leftmost leaf node423423+ * of the right child.424424+ */425425+ for (tmp = nodep->right; tmp->left; tmp = tmp->left)426426+ ;427427+ tmp->left = nodep->left;428428+ nodep->left = NULL;429429+ tmp->left->parent = tmp;430430+ }431431+432432+ /* Left only child */433433+ if (nodep->left) {434434+ if (!nodep->parent) {435435+ s->root = nodep->left;436436+ nodep->left->parent = NULL;437437+ } else {438438+ nodep->left->parent = nodep->parent;439439+ if (nodep == nodep->parent->left)440440+ nodep->parent->left = nodep->left;441441+ else {442442+ assert(nodep == nodep->parent->right);443443+ nodep->parent->right = nodep->left;444444+ }445445+ }446446+447447+ nodep->parent = nodep->left = nodep->right = NULL;448448+ free(nodep);449449+450450+ return;451451+ }452452+453453+454454+ /* Right only child */455455+ if (nodep->right) {456456+ if (!nodep->parent) {457457+ s->root = nodep->right;458458+ nodep->right->parent = NULL;459459+ } else {460460+ nodep->right->parent = nodep->parent;461461+ if (nodep == nodep->parent->left)462462+ nodep->parent->left = nodep->right;463463+ else {464464+ assert(nodep == nodep->parent->right);465465+ nodep->parent->right = nodep->right;466466+ }467467+ }468468+469469+ nodep->parent = nodep->left = nodep->right = NULL;470470+ free(nodep);471471+472472+ return;473473+ }474474+475475+ /* Leaf Node */476476+ if (!nodep->parent) {477477+ s->root = NULL;478478+ } else {479479+ if (nodep->parent->left == nodep)480480+ nodep->parent->left = NULL;481481+ else {482482+ assert(nodep == nodep->parent->right);483483+ nodep->parent->right = NULL;484484+ }485485+ }486486+487487+ nodep->parent = nodep->left = nodep->right = NULL;488488+ free(nodep);489489+490490+ return;491491+}492492+493493+/* Splits the node containing the bit at idx so that there is a node494494+ * that starts at the specified index. If no such node exists, a new495495+ * node at the specified index is created. Returns the new node.496496+ *497497+ * idx must start of a mask boundary.498498+ */499499+static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)500500+{501501+ struct node *nodep1, *nodep2;502502+ sparsebit_idx_t offset;503503+ sparsebit_num_t orig_num_after;504504+505505+ assert(!(idx % MASK_BITS));506506+507507+ /*508508+ * Is there a node that describes the setting of idx?509509+ * If not, add it.510510+ */511511+ nodep1 = node_find(s, idx);512512+ if (!nodep1)513513+ return node_add(s, idx);514514+515515+ /*516516+ * All done if the starting index of the node is where the517517+ * split should occur.518518+ */519519+ if (nodep1->idx == idx)520520+ return nodep1;521521+522522+ /*523523+ * Split point not at start of mask, so it must be part of524524+ * bits described by num_after.525525+ */526526+527527+ /*528528+ * Calculate offset within num_after for where the split is529529+ * to occur.530530+ */531531+ offset = idx - (nodep1->idx + MASK_BITS);532532+ orig_num_after = nodep1->num_after;533533+534534+ /*535535+ * Add a new node to describe the bits starting at536536+ * the split point.537537+ */538538+ nodep1->num_after = offset;539539+ nodep2 = node_add(s, idx);540540+541541+ /* Move bits after the split point into the new node */542542+ nodep2->num_after = orig_num_after - offset;543543+ if (nodep2->num_after >= MASK_BITS) {544544+ nodep2->mask = ~(mask_t) 0;545545+ nodep2->num_after -= MASK_BITS;546546+ } else {547547+ nodep2->mask = (1 << nodep2->num_after) - 1;548548+ nodep2->num_after = 0;549549+ }550550+551551+ return nodep2;552552+}553553+554554+/* Iteratively reduces the node pointed to by nodep and its adjacent555555+ * nodes into a more compact form. For example, a node with a mask with556556+ * all bits set adjacent to a previous node, will get combined into a557557+ * single node with an increased num_after setting.558558+ *559559+ * After each reduction, a further check is made to see if additional560560+ * reductions are possible with the new previous and next nodes. Note,561561+ * a search for a reduction is only done across the nodes nearest nodep562562+ * and those that became part of a reduction. Reductions beyond nodep563563+ * and the adjacent nodes that are reduced are not discovered. It is the564564+ * responsibility of the caller to pass a nodep that is within one node565565+ * of each possible reduction.566566+ *567567+ * This function does not fix the temporary violation of all invariants.568568+ * For example it does not fix the case where the bit settings described569569+ * by two or more nodes overlap. Such a violation introduces the potential570570+ * complication of a bit setting for a specific index having different settings571571+ * in different nodes. This would then introduce the further complication572572+ * of which node has the correct setting of the bit and thus such conditions573573+ * are not allowed.574574+ *575575+ * This function is designed to fix invariant violations that are introduced576576+ * by node_split() and by changes to the nodes mask or num_after members.577577+ * For example, when setting a bit within a nodes mask, the function that578578+ * sets the bit doesn't have to worry about whether the setting of that579579+ * bit caused the mask to have leading only or trailing only bits set.580580+ * Instead, the function can call node_reduce(), with nodep equal to the581581+ * node address that it set a mask bit in, and node_reduce() will notice582582+ * the cases of leading or trailing only bits and that there is an583583+ * adjacent node that the bit settings could be merged into.584584+ *585585+ * This implementation specifically detects and corrects violation of the586586+ * following invariants:587587+ *588588+ * + Node are only used to represent bits that are set.589589+ * Nodes with a mask of 0 and num_after of 0 are not allowed.590590+ *591591+ * + The setting of at least one bit is always described in a nodes592592+ * mask (mask >= 1).593593+ *594594+ * + A node with all mask bits set only occurs when the last bit595595+ * described by the previous node is not equal to this nodes596596+ * starting index - 1. All such occurences of this condition are597597+ * avoided by moving the setting of the nodes mask bits into598598+ * the previous nodes num_after setting.599599+ */600600+static void node_reduce(struct sparsebit *s, struct node *nodep)601601+{602602+ bool reduction_performed;603603+604604+ do {605605+ reduction_performed = false;606606+ struct node *prev, *next, *tmp;607607+608608+ /* 1) Potential reductions within the current node. */609609+610610+ /* Nodes with all bits cleared may be removed. */611611+ if (nodep->mask == 0 && nodep->num_after == 0) {612612+ /*613613+ * About to remove the node pointed to by614614+ * nodep, which normally would cause a problem615615+ * for the next pass through the reduction loop,616616+ * because the node at the starting point no longer617617+ * exists. This potential problem is handled618618+ * by first remembering the location of the next619619+ * or previous nodes. Doesn't matter which, because620620+ * once the node at nodep is removed, there will be621621+ * no other nodes between prev and next.622622+ *623623+ * Note, the checks performed on nodep against both624624+ * both prev and next both check for an adjacent625625+ * node that can be reduced into a single node. As626626+ * such, after removing the node at nodep, doesn't627627+ * matter whether the nodep for the next pass628628+ * through the loop is equal to the previous pass629629+ * prev or next node. Either way, on the next pass630630+ * the one not selected will become either the631631+ * prev or next node.632632+ */633633+ tmp = node_next(s, nodep);634634+ if (!tmp)635635+ tmp = node_prev(s, nodep);636636+637637+ node_rm(s, nodep);638638+ nodep = NULL;639639+640640+ nodep = tmp;641641+ reduction_performed = true;642642+ continue;643643+ }644644+645645+ /*646646+ * When the mask is 0, can reduce the amount of num_after647647+ * bits by moving the initial num_after bits into the mask.648648+ */649649+ if (nodep->mask == 0) {650650+ assert(nodep->num_after != 0);651651+ assert(nodep->idx + MASK_BITS > nodep->idx);652652+653653+ nodep->idx += MASK_BITS;654654+655655+ if (nodep->num_after >= MASK_BITS) {656656+ nodep->mask = ~0;657657+ nodep->num_after -= MASK_BITS;658658+ } else {659659+ nodep->mask = (1u << nodep->num_after) - 1;660660+ nodep->num_after = 0;661661+ }662662+663663+ reduction_performed = true;664664+ continue;665665+ }666666+667667+ /*668668+ * 2) Potential reductions between the current and669669+ * previous nodes.670670+ */671671+ prev = node_prev(s, nodep);672672+ if (prev) {673673+ sparsebit_idx_t prev_highest_bit;674674+675675+ /* Nodes with no bits set can be removed. */676676+ if (prev->mask == 0 && prev->num_after == 0) {677677+ node_rm(s, prev);678678+679679+ reduction_performed = true;680680+ continue;681681+ }682682+683683+ /*684684+ * All mask bits set and previous node has685685+ * adjacent index.686686+ */687687+ if (nodep->mask + 1 == 0 &&688688+ prev->idx + MASK_BITS == nodep->idx) {689689+ prev->num_after += MASK_BITS + nodep->num_after;690690+ nodep->mask = 0;691691+ nodep->num_after = 0;692692+693693+ reduction_performed = true;694694+ continue;695695+ }696696+697697+ /*698698+ * Is node adjacent to previous node and the node699699+ * contains a single contiguous range of bits700700+ * starting from the beginning of the mask?701701+ */702702+ prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;703703+ if (prev_highest_bit + 1 == nodep->idx &&704704+ (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {705705+ /*706706+ * How many contiguous bits are there?707707+ * Is equal to the total number of set708708+ * bits, due to an earlier check that709709+ * there is a single contiguous range of710710+ * set bits.711711+ */712712+ unsigned int num_contiguous713713+ = __builtin_popcount(nodep->mask);714714+ assert((num_contiguous > 0) &&715715+ ((1ULL << num_contiguous) - 1) == nodep->mask);716716+717717+ prev->num_after += num_contiguous;718718+ nodep->mask = 0;719719+720720+ /*721721+ * For predictable performance, handle special722722+ * case where all mask bits are set and there723723+ * is a non-zero num_after setting. This code724724+ * is functionally correct without the following725725+ * conditionalized statements, but without them726726+ * the value of num_after is only reduced by727727+ * the number of mask bits per pass. There are728728+ * cases where num_after can be close to 2^64.729729+ * Without this code it could take nearly730730+ * (2^64) / 32 passes to perform the full731731+ * reduction.732732+ */733733+ if (num_contiguous == MASK_BITS) {734734+ prev->num_after += nodep->num_after;735735+ nodep->num_after = 0;736736+ }737737+738738+ reduction_performed = true;739739+ continue;740740+ }741741+ }742742+743743+ /*744744+ * 3) Potential reductions between the current and745745+ * next nodes.746746+ */747747+ next = node_next(s, nodep);748748+ if (next) {749749+ /* Nodes with no bits set can be removed. */750750+ if (next->mask == 0 && next->num_after == 0) {751751+ node_rm(s, next);752752+ reduction_performed = true;753753+ continue;754754+ }755755+756756+ /*757757+ * Is next node index adjacent to current node758758+ * and has a mask with all bits set?759759+ */760760+ if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&761761+ next->mask == ~(mask_t) 0) {762762+ nodep->num_after += MASK_BITS;763763+ next->mask = 0;764764+ nodep->num_after += next->num_after;765765+ next->num_after = 0;766766+767767+ node_rm(s, next);768768+ next = NULL;769769+770770+ reduction_performed = true;771771+ continue;772772+ }773773+ }774774+ } while (nodep && reduction_performed);775775+}776776+777777+/* Returns whether the bit at the index given by idx, within the778778+ * sparsebit array is set or not.779779+ */780780+bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)781781+{782782+ struct node *nodep;783783+784784+ /* Find the node that describes the setting of the bit at idx */785785+ for (nodep = s->root; nodep;786786+ nodep = nodep->idx > idx ? nodep->left : nodep->right)787787+ if (idx >= nodep->idx &&788788+ idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)789789+ goto have_node;790790+791791+ return false;792792+793793+have_node:794794+ /* Bit is set if it is any of the bits described by num_after */795795+ if (nodep->num_after && idx >= nodep->idx + MASK_BITS)796796+ return true;797797+798798+ /* Is the corresponding mask bit set */799799+ assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);800800+ return !!(nodep->mask & (1 << (idx - nodep->idx)));801801+}802802+803803+/* Within the sparsebit array pointed to by s, sets the bit804804+ * at the index given by idx.805805+ */806806+static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)807807+{808808+ struct node *nodep;809809+810810+ /* Skip bits that are already set */811811+ if (sparsebit_is_set(s, idx))812812+ return;813813+814814+ /*815815+ * Get a node where the bit at idx is described by the mask.816816+ * The node_split will also create a node, if there isn't817817+ * already a node that describes the setting of bit.818818+ */819819+ nodep = node_split(s, idx & -MASK_BITS);820820+821821+ /* Set the bit within the nodes mask */822822+ assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);823823+ assert(!(nodep->mask & (1 << (idx - nodep->idx))));824824+ nodep->mask |= 1 << (idx - nodep->idx);825825+ s->num_set++;826826+827827+ node_reduce(s, nodep);828828+}829829+830830+/* Within the sparsebit array pointed to by s, clears the bit831831+ * at the index given by idx.832832+ */833833+static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)834834+{835835+ struct node *nodep;836836+837837+ /* Skip bits that are already cleared */838838+ if (!sparsebit_is_set(s, idx))839839+ return;840840+841841+ /* Is there a node that describes the setting of this bit? */842842+ nodep = node_find(s, idx);843843+ if (!nodep)844844+ return;845845+846846+ /*847847+ * If a num_after bit, split the node, so that the bit is848848+ * part of a node mask.849849+ */850850+ if (idx >= nodep->idx + MASK_BITS)851851+ nodep = node_split(s, idx & -MASK_BITS);852852+853853+ /*854854+ * After node_split above, bit at idx should be within the mask.855855+ * Clear that bit.856856+ */857857+ assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);858858+ assert(nodep->mask & (1 << (idx - nodep->idx)));859859+ nodep->mask &= ~(1 << (idx - nodep->idx));860860+ assert(s->num_set > 0 || sparsebit_all_set(s));861861+ s->num_set--;862862+863863+ node_reduce(s, nodep);864864+}865865+866866+/* Recursively dumps to the FILE stream given by stream the contents867867+ * of the sub-tree of nodes pointed to by nodep. Each line of output868868+ * is prefixed by the number of spaces given by indent. On each869869+ * recursion, the indent amount is increased by 2. This causes nodes870870+ * at each level deeper into the binary search tree to be displayed871871+ * with a greater indent.872872+ */873873+static void dump_nodes(FILE *stream, struct node *nodep,874874+ unsigned int indent)875875+{876876+ char *node_type;877877+878878+ /* Dump contents of node */879879+ if (!nodep->parent)880880+ node_type = "root";881881+ else if (nodep == nodep->parent->left)882882+ node_type = "left";883883+ else {884884+ assert(nodep == nodep->parent->right);885885+ node_type = "right";886886+ }887887+ fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);888888+ fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",889889+ nodep->parent, nodep->left, nodep->right);890890+ fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",891891+ indent, "", nodep->idx, nodep->mask, nodep->num_after);892892+893893+ /* If present, dump contents of left child nodes */894894+ if (nodep->left)895895+ dump_nodes(stream, nodep->left, indent + 2);896896+897897+ /* If present, dump contents of right child nodes */898898+ if (nodep->right)899899+ dump_nodes(stream, nodep->right, indent + 2);900900+}901901+902902+static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)903903+{904904+ mask_t leading = (mask_t)1 << start;905905+ int n1 = __builtin_ctz(nodep->mask & -leading);906906+907907+ return nodep->idx + n1;908908+}909909+910910+static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)911911+{912912+ mask_t leading = (mask_t)1 << start;913913+ int n1 = __builtin_ctz(~nodep->mask & -leading);914914+915915+ return nodep->idx + n1;916916+}917917+918918+/* Dumps to the FILE stream specified by stream, the implementation dependent919919+ * internal state of s. Each line of output is prefixed with the number920920+ * of spaces given by indent. The output is completely implementation921921+ * dependent and subject to change. Output from this function should only922922+ * be used for diagnostic purposes. For example, this function can be923923+ * used by test cases after they detect an unexpected condition, as a means924924+ * to capture diagnostic information.925925+ */926926+static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,927927+ unsigned int indent)928928+{929929+ /* Dump the contents of s */930930+ fprintf(stream, "%*sroot: %p\n", indent, "", s->root);931931+ fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);932932+933933+ if (s->root)934934+ dump_nodes(stream, s->root, indent);935935+}936936+937937+/* Allocates and returns a new sparsebit array. The initial state938938+ * of the newly allocated sparsebit array has all bits cleared.939939+ */940940+struct sparsebit *sparsebit_alloc(void)941941+{942942+ struct sparsebit *s;943943+944944+ /* Allocate top level structure. */945945+ s = calloc(1, sizeof(*s));946946+ if (!s) {947947+ perror("calloc");948948+ abort();949949+ }950950+951951+ return s;952952+}953953+954954+/* Frees the implementation dependent data for the sparsebit array955955+ * pointed to by s and poisons the pointer to that data.956956+ */957957+void sparsebit_free(struct sparsebit **sbitp)958958+{959959+ struct sparsebit *s = *sbitp;960960+961961+ if (!s)962962+ return;963963+964964+ sparsebit_clear_all(s);965965+ free(s);966966+ *sbitp = NULL;967967+}968968+969969+/* Makes a copy of the sparsebit array given by s, to the sparsebit970970+ * array given by d. Note, d must have already been allocated via971971+ * sparsebit_alloc(). It can though already have bits set, which972972+ * if different from src will be cleared.973973+ */974974+void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)975975+{976976+ /* First clear any bits already set in the destination */977977+ sparsebit_clear_all(d);978978+979979+ if (s->root) {980980+ d->root = node_copy_subtree(s->root);981981+ d->num_set = s->num_set;982982+ }983983+}984984+985985+/* Returns whether num consecutive bits starting at idx are all set. */986986+bool sparsebit_is_set_num(struct sparsebit *s,987987+ sparsebit_idx_t idx, sparsebit_num_t num)988988+{989989+ sparsebit_idx_t next_cleared;990990+991991+ assert(num > 0);992992+ assert(idx + num - 1 >= idx);993993+994994+ /* With num > 0, the first bit must be set. */995995+ if (!sparsebit_is_set(s, idx))996996+ return false;997997+998998+ /* Find the next cleared bit */999999+ next_cleared = sparsebit_next_clear(s, idx);10001000+10011001+ /*10021002+ * If no cleared bits beyond idx, then there are at least num10031003+ * set bits. idx + num doesn't wrap. Otherwise check if10041004+ * there are enough set bits between idx and the next cleared bit.10051005+ */10061006+ return next_cleared == 0 || next_cleared - idx >= num;10071007+}10081008+10091009+/* Returns whether the bit at the index given by idx. */10101010+bool sparsebit_is_clear(struct sparsebit *s,10111011+ sparsebit_idx_t idx)10121012+{10131013+ return !sparsebit_is_set(s, idx);10141014+}10151015+10161016+/* Returns whether num consecutive bits starting at idx are all cleared. */10171017+bool sparsebit_is_clear_num(struct sparsebit *s,10181018+ sparsebit_idx_t idx, sparsebit_num_t num)10191019+{10201020+ sparsebit_idx_t next_set;10211021+10221022+ assert(num > 0);10231023+ assert(idx + num - 1 >= idx);10241024+10251025+ /* With num > 0, the first bit must be cleared. */10261026+ if (!sparsebit_is_clear(s, idx))10271027+ return false;10281028+10291029+ /* Find the next set bit */10301030+ next_set = sparsebit_next_set(s, idx);10311031+10321032+ /*10331033+ * If no set bits beyond idx, then there are at least num10341034+ * cleared bits. idx + num doesn't wrap. Otherwise check if10351035+ * there are enough cleared bits between idx and the next set bit.10361036+ */10371037+ return next_set == 0 || next_set - idx >= num;10381038+}10391039+10401040+/* Returns the total number of bits set. Note: 0 is also returned for10411041+ * the case of all bits set. This is because with all bits set, there10421042+ * is 1 additional bit set beyond what can be represented in the return10431043+ * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,10441044+ * to determine if the sparsebit array has any bits set.10451045+ */10461046+sparsebit_num_t sparsebit_num_set(struct sparsebit *s)10471047+{10481048+ return s->num_set;10491049+}10501050+10511051+/* Returns whether any bit is set in the sparsebit array. */10521052+bool sparsebit_any_set(struct sparsebit *s)10531053+{10541054+ /*10551055+ * Nodes only describe set bits. If any nodes then there10561056+ * is at least 1 bit set.10571057+ */10581058+ if (!s->root)10591059+ return false;10601060+10611061+ /*10621062+ * Every node should have a non-zero mask. For now will10631063+ * just assure that the root node has a non-zero mask,10641064+ * which is a quick check that at least 1 bit is set.10651065+ */10661066+ assert(s->root->mask != 0);10671067+ assert(s->num_set > 0 ||10681068+ (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&10691069+ s->root->mask == ~(mask_t) 0));10701070+10711071+ return true;10721072+}10731073+10741074+/* Returns whether all the bits in the sparsebit array are cleared. */10751075+bool sparsebit_all_clear(struct sparsebit *s)10761076+{10771077+ return !sparsebit_any_set(s);10781078+}10791079+10801080+/* Returns whether all the bits in the sparsebit array are set. */10811081+bool sparsebit_any_clear(struct sparsebit *s)10821082+{10831083+ return !sparsebit_all_set(s);10841084+}10851085+10861086+/* Returns the index of the first set bit. Abort if no bits are set.10871087+ */10881088+sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)10891089+{10901090+ struct node *nodep;10911091+10921092+ /* Validate at least 1 bit is set */10931093+ assert(sparsebit_any_set(s));10941094+10951095+ nodep = node_first(s);10961096+ return node_first_set(nodep, 0);10971097+}10981098+10991099+/* Returns the index of the first cleared bit. Abort if11001100+ * no bits are cleared.11011101+ */11021102+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)11031103+{11041104+ struct node *nodep1, *nodep2;11051105+11061106+ /* Validate at least 1 bit is cleared. */11071107+ assert(sparsebit_any_clear(s));11081108+11091109+ /* If no nodes or first node index > 0 then lowest cleared is 0 */11101110+ nodep1 = node_first(s);11111111+ if (!nodep1 || nodep1->idx > 0)11121112+ return 0;11131113+11141114+ /* Does the mask in the first node contain any cleared bits. */11151115+ if (nodep1->mask != ~(mask_t) 0)11161116+ return node_first_clear(nodep1, 0);11171117+11181118+ /*11191119+ * All mask bits set in first node. If there isn't a second node11201120+ * then the first cleared bit is the first bit after the bits11211121+ * described by the first node.11221122+ */11231123+ nodep2 = node_next(s, nodep1);11241124+ if (!nodep2) {11251125+ /*11261126+ * No second node. First cleared bit is first bit beyond11271127+ * bits described by first node.11281128+ */11291129+ assert(nodep1->mask == ~(mask_t) 0);11301130+ assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);11311131+ return nodep1->idx + MASK_BITS + nodep1->num_after;11321132+ }11331133+11341134+ /*11351135+ * There is a second node.11361136+ * If it is not adjacent to the first node, then there is a gap11371137+ * of cleared bits between the nodes, and the first cleared bit11381138+ * is the first bit within the gap.11391139+ */11401140+ if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)11411141+ return nodep1->idx + MASK_BITS + nodep1->num_after;11421142+11431143+ /*11441144+ * Second node is adjacent to the first node.11451145+ * Because it is adjacent, its mask should be non-zero. If all11461146+ * its mask bits are set, then with it being adjacent, it should11471147+ * have had the mask bits moved into the num_after setting of the11481148+ * previous node.11491149+ */11501150+ return node_first_clear(nodep2, 0);11511151+}11521152+11531153+/* Returns index of next bit set within s after the index given by prev.11541154+ * Returns 0 if there are no bits after prev that are set.11551155+ */11561156+sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,11571157+ sparsebit_idx_t prev)11581158+{11591159+ sparsebit_idx_t lowest_possible = prev + 1;11601160+ sparsebit_idx_t start;11611161+ struct node *nodep;11621162+11631163+ /* A bit after the highest index can't be set. */11641164+ if (lowest_possible == 0)11651165+ return 0;11661166+11671167+ /*11681168+ * Find the leftmost 'candidate' overlapping or to the right11691169+ * of lowest_possible.11701170+ */11711171+ struct node *candidate = NULL;11721172+11731173+ /* True iff lowest_possible is within candidate */11741174+ bool contains = false;11751175+11761176+ /*11771177+ * Find node that describes setting of bit at lowest_possible.11781178+ * If such a node doesn't exist, find the node with the lowest11791179+ * starting index that is > lowest_possible.11801180+ */11811181+ for (nodep = s->root; nodep;) {11821182+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1)11831183+ >= lowest_possible) {11841184+ candidate = nodep;11851185+ if (candidate->idx <= lowest_possible) {11861186+ contains = true;11871187+ break;11881188+ }11891189+ nodep = nodep->left;11901190+ } else {11911191+ nodep = nodep->right;11921192+ }11931193+ }11941194+ if (!candidate)11951195+ return 0;11961196+11971197+ assert(candidate->mask != 0);11981198+11991199+ /* Does the candidate node describe the setting of lowest_possible? */12001200+ if (!contains) {12011201+ /*12021202+ * Candidate doesn't describe setting of bit at lowest_possible.12031203+ * Candidate points to the first node with a starting index12041204+ * > lowest_possible.12051205+ */12061206+ assert(candidate->idx > lowest_possible);12071207+12081208+ return node_first_set(candidate, 0);12091209+ }12101210+12111211+ /*12121212+ * Candidate describes setting of bit at lowest_possible.12131213+ * Note: although the node describes the setting of the bit12141214+ * at lowest_possible, its possible that its setting and the12151215+ * setting of all latter bits described by this node are 0.12161216+ * For now, just handle the cases where this node describes12171217+ * a bit at or after an index of lowest_possible that is set.12181218+ */12191219+ start = lowest_possible - candidate->idx;12201220+12211221+ if (start < MASK_BITS && candidate->mask >= (1 << start))12221222+ return node_first_set(candidate, start);12231223+12241224+ if (candidate->num_after) {12251225+ sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;12261226+12271227+ return lowest_possible < first_num_after_idx12281228+ ? first_num_after_idx : lowest_possible;12291229+ }12301230+12311231+ /*12321232+ * Although candidate node describes setting of bit at12331233+ * the index of lowest_possible, all bits at that index and12341234+ * latter that are described by candidate are cleared. With12351235+ * this, the next bit is the first bit in the next node, if12361236+ * such a node exists. If a next node doesn't exist, then12371237+ * there is no next set bit.12381238+ */12391239+ candidate = node_next(s, candidate);12401240+ if (!candidate)12411241+ return 0;12421242+12431243+ return node_first_set(candidate, 0);12441244+}12451245+12461246+/* Returns index of next bit cleared within s after the index given by prev.12471247+ * Returns 0 if there are no bits after prev that are cleared.12481248+ */12491249+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,12501250+ sparsebit_idx_t prev)12511251+{12521252+ sparsebit_idx_t lowest_possible = prev + 1;12531253+ sparsebit_idx_t idx;12541254+ struct node *nodep1, *nodep2;12551255+12561256+ /* A bit after the highest index can't be set. */12571257+ if (lowest_possible == 0)12581258+ return 0;12591259+12601260+ /*12611261+ * Does a node describing the setting of lowest_possible exist?12621262+ * If not, the bit at lowest_possible is cleared.12631263+ */12641264+ nodep1 = node_find(s, lowest_possible);12651265+ if (!nodep1)12661266+ return lowest_possible;12671267+12681268+ /* Does a mask bit in node 1 describe the next cleared bit. */12691269+ for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)12701270+ if (!(nodep1->mask & (1 << idx)))12711271+ return nodep1->idx + idx;12721272+12731273+ /*12741274+ * Next cleared bit is not described by node 1. If there12751275+ * isn't a next node, then next cleared bit is described12761276+ * by bit after the bits described by the first node.12771277+ */12781278+ nodep2 = node_next(s, nodep1);12791279+ if (!nodep2)12801280+ return nodep1->idx + MASK_BITS + nodep1->num_after;12811281+12821282+ /*12831283+ * There is a second node.12841284+ * If it is not adjacent to the first node, then there is a gap12851285+ * of cleared bits between the nodes, and the next cleared bit12861286+ * is the first bit within the gap.12871287+ */12881288+ if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)12891289+ return nodep1->idx + MASK_BITS + nodep1->num_after;12901290+12911291+ /*12921292+ * Second node is adjacent to the first node.12931293+ * Because it is adjacent, its mask should be non-zero. If all12941294+ * its mask bits are set, then with it being adjacent, it should12951295+ * have had the mask bits moved into the num_after setting of the12961296+ * previous node.12971297+ */12981298+ return node_first_clear(nodep2, 0);12991299+}13001300+13011301+/* Starting with the index 1 greater than the index given by start, finds13021302+ * and returns the index of the first sequence of num consecutively set13031303+ * bits. Returns a value of 0 of no such sequence exists.13041304+ */13051305+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,13061306+ sparsebit_idx_t start, sparsebit_num_t num)13071307+{13081308+ sparsebit_idx_t idx;13091309+13101310+ assert(num >= 1);13111311+13121312+ for (idx = sparsebit_next_set(s, start);13131313+ idx != 0 && idx + num - 1 >= idx;13141314+ idx = sparsebit_next_set(s, idx)) {13151315+ assert(sparsebit_is_set(s, idx));13161316+13171317+ /*13181318+ * Does the sequence of bits starting at idx consist of13191319+ * num set bits?13201320+ */13211321+ if (sparsebit_is_set_num(s, idx, num))13221322+ return idx;13231323+13241324+ /*13251325+ * Sequence of set bits at idx isn't large enough.13261326+ * Skip this entire sequence of set bits.13271327+ */13281328+ idx = sparsebit_next_clear(s, idx);13291329+ if (idx == 0)13301330+ return 0;13311331+ }13321332+13331333+ return 0;13341334+}13351335+13361336+/* Starting with the index 1 greater than the index given by start, finds13371337+ * and returns the index of the first sequence of num consecutively cleared13381338+ * bits. Returns a value of 0 of no such sequence exists.13391339+ */13401340+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,13411341+ sparsebit_idx_t start, sparsebit_num_t num)13421342+{13431343+ sparsebit_idx_t idx;13441344+13451345+ assert(num >= 1);13461346+13471347+ for (idx = sparsebit_next_clear(s, start);13481348+ idx != 0 && idx + num - 1 >= idx;13491349+ idx = sparsebit_next_clear(s, idx)) {13501350+ assert(sparsebit_is_clear(s, idx));13511351+13521352+ /*13531353+ * Does the sequence of bits starting at idx consist of13541354+ * num cleared bits?13551355+ */13561356+ if (sparsebit_is_clear_num(s, idx, num))13571357+ return idx;13581358+13591359+ /*13601360+ * Sequence of cleared bits at idx isn't large enough.13611361+ * Skip this entire sequence of cleared bits.13621362+ */13631363+ idx = sparsebit_next_set(s, idx);13641364+ if (idx == 0)13651365+ return 0;13661366+ }13671367+13681368+ return 0;13691369+}13701370+13711371+/* Sets the bits * in the inclusive range idx through idx + num - 1. */13721372+void sparsebit_set_num(struct sparsebit *s,13731373+ sparsebit_idx_t start, sparsebit_num_t num)13741374+{13751375+ struct node *nodep, *next;13761376+ unsigned int n1;13771377+ sparsebit_idx_t idx;13781378+ sparsebit_num_t n;13791379+ sparsebit_idx_t middle_start, middle_end;13801380+13811381+ assert(num > 0);13821382+ assert(start + num - 1 >= start);13831383+13841384+ /*13851385+ * Leading - bits before first mask boundary.13861386+ *13871387+ * TODO(lhuemill): With some effort it may be possible to13881388+ * replace the following loop with a sequential sequence13891389+ * of statements. High level sequence would be:13901390+ *13911391+ * 1. Use node_split() to force node that describes setting13921392+ * of idx to be within the mask portion of a node.13931393+ * 2. Form mask of bits to be set.13941394+ * 3. Determine number of mask bits already set in the node13951395+ * and store in a local variable named num_already_set.13961396+ * 4. Set the appropriate mask bits within the node.13971397+ * 5. Increment struct sparsebit_pvt num_set member13981398+ * by the number of bits that were actually set.13991399+ * Exclude from the counts bits that were already set.14001400+ * 6. Before returning to the caller, use node_reduce() to14011401+ * handle the multiple corner cases that this method14021402+ * introduces.14031403+ */14041404+ for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)14051405+ bit_set(s, idx);14061406+14071407+ /* Middle - bits spanning one or more entire mask */14081408+ middle_start = idx;14091409+ middle_end = middle_start + (n & -MASK_BITS) - 1;14101410+ if (n >= MASK_BITS) {14111411+ nodep = node_split(s, middle_start);14121412+14131413+ /*14141414+ * As needed, split just after end of middle bits.14151415+ * No split needed if end of middle bits is at highest14161416+ * supported bit index.14171417+ */14181418+ if (middle_end + 1 > middle_end)14191419+ (void) node_split(s, middle_end + 1);14201420+14211421+ /* Delete nodes that only describe bits within the middle. */14221422+ for (next = node_next(s, nodep);14231423+ next && (next->idx < middle_end);14241424+ next = node_next(s, nodep)) {14251425+ assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);14261426+ node_rm(s, next);14271427+ next = NULL;14281428+ }14291429+14301430+ /* As needed set each of the mask bits */14311431+ for (n1 = 0; n1 < MASK_BITS; n1++) {14321432+ if (!(nodep->mask & (1 << n1))) {14331433+ nodep->mask |= 1 << n1;14341434+ s->num_set++;14351435+ }14361436+ }14371437+14381438+ s->num_set -= nodep->num_after;14391439+ nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;14401440+ s->num_set += nodep->num_after;14411441+14421442+ node_reduce(s, nodep);14431443+ }14441444+ idx = middle_end + 1;14451445+ n -= middle_end - middle_start + 1;14461446+14471447+ /* Trailing - bits at and beyond last mask boundary */14481448+ assert(n < MASK_BITS);14491449+ for (; n > 0; idx++, n--)14501450+ bit_set(s, idx);14511451+}14521452+14531453+/* Clears the bits * in the inclusive range idx through idx + num - 1. */14541454+void sparsebit_clear_num(struct sparsebit *s,14551455+ sparsebit_idx_t start, sparsebit_num_t num)14561456+{14571457+ struct node *nodep, *next;14581458+ unsigned int n1;14591459+ sparsebit_idx_t idx;14601460+ sparsebit_num_t n;14611461+ sparsebit_idx_t middle_start, middle_end;14621462+14631463+ assert(num > 0);14641464+ assert(start + num - 1 >= start);14651465+14661466+ /* Leading - bits before first mask boundary */14671467+ for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)14681468+ bit_clear(s, idx);14691469+14701470+ /* Middle - bits spanning one or more entire mask */14711471+ middle_start = idx;14721472+ middle_end = middle_start + (n & -MASK_BITS) - 1;14731473+ if (n >= MASK_BITS) {14741474+ nodep = node_split(s, middle_start);14751475+14761476+ /*14771477+ * As needed, split just after end of middle bits.14781478+ * No split needed if end of middle bits is at highest14791479+ * supported bit index.14801480+ */14811481+ if (middle_end + 1 > middle_end)14821482+ (void) node_split(s, middle_end + 1);14831483+14841484+ /* Delete nodes that only describe bits within the middle. */14851485+ for (next = node_next(s, nodep);14861486+ next && (next->idx < middle_end);14871487+ next = node_next(s, nodep)) {14881488+ assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);14891489+ node_rm(s, next);14901490+ next = NULL;14911491+ }14921492+14931493+ /* As needed clear each of the mask bits */14941494+ for (n1 = 0; n1 < MASK_BITS; n1++) {14951495+ if (nodep->mask & (1 << n1)) {14961496+ nodep->mask &= ~(1 << n1);14971497+ s->num_set--;14981498+ }14991499+ }15001500+15011501+ /* Clear any bits described by num_after */15021502+ s->num_set -= nodep->num_after;15031503+ nodep->num_after = 0;15041504+15051505+ /*15061506+ * Delete the node that describes the beginning of15071507+ * the middle bits and perform any allowed reductions15081508+ * with the nodes prev or next of nodep.15091509+ */15101510+ node_reduce(s, nodep);15111511+ nodep = NULL;15121512+ }15131513+ idx = middle_end + 1;15141514+ n -= middle_end - middle_start + 1;15151515+15161516+ /* Trailing - bits at and beyond last mask boundary */15171517+ assert(n < MASK_BITS);15181518+ for (; n > 0; idx++, n--)15191519+ bit_clear(s, idx);15201520+}15211521+15221522+/* Sets the bit at the index given by idx. */15231523+void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)15241524+{15251525+ sparsebit_set_num(s, idx, 1);15261526+}15271527+15281528+/* Clears the bit at the index given by idx. */15291529+void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)15301530+{15311531+ sparsebit_clear_num(s, idx, 1);15321532+}15331533+15341534+/* Sets the bits in the entire addressable range of the sparsebit array. */15351535+void sparsebit_set_all(struct sparsebit *s)15361536+{15371537+ sparsebit_set(s, 0);15381538+ sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);15391539+ assert(sparsebit_all_set(s));15401540+}15411541+15421542+/* Clears the bits in the entire addressable range of the sparsebit array. */15431543+void sparsebit_clear_all(struct sparsebit *s)15441544+{15451545+ sparsebit_clear(s, 0);15461546+ sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);15471547+ assert(!sparsebit_any_set(s));15481548+}15491549+15501550+static size_t display_range(FILE *stream, sparsebit_idx_t low,15511551+ sparsebit_idx_t high, bool prepend_comma_space)15521552+{15531553+ char *fmt_str;15541554+ size_t sz;15551555+15561556+ /* Determine the printf format string */15571557+ if (low == high)15581558+ fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";15591559+ else15601560+ fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";15611561+15621562+ /*15631563+ * When stream is NULL, just determine the size of what would15641564+ * have been printed, else print the range.15651565+ */15661566+ if (!stream)15671567+ sz = snprintf(NULL, 0, fmt_str, low, high);15681568+ else15691569+ sz = fprintf(stream, fmt_str, low, high);15701570+15711571+ return sz;15721572+}15731573+15741574+15751575+/* Dumps to the FILE stream given by stream, the bit settings15761576+ * of s. Each line of output is prefixed with the number of15771577+ * spaces given by indent. The length of each line is implementation15781578+ * dependent and does not depend on the indent amount. The following15791579+ * is an example output of a sparsebit array that has bits:15801580+ *15811581+ * 0x5, 0x8, 0xa:0xe, 0x1215821582+ *15831583+ * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 1815841584+ * are set. Note that a ':', instead of a '-' is used to specify a range of15851585+ * contiguous bits. This is done because '-' is used to specify command-line15861586+ * options, and sometimes ranges are specified as command-line arguments.15871587+ */15881588+void sparsebit_dump(FILE *stream, struct sparsebit *s,15891589+ unsigned int indent)15901590+{15911591+ size_t current_line_len = 0;15921592+ size_t sz;15931593+ struct node *nodep;15941594+15951595+ if (!sparsebit_any_set(s))15961596+ return;15971597+15981598+ /* Display initial indent */15991599+ fprintf(stream, "%*s", indent, "");16001600+16011601+ /* For each node */16021602+ for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {16031603+ unsigned int n1;16041604+ sparsebit_idx_t low, high;16051605+16061606+ /* For each group of bits in the mask */16071607+ for (n1 = 0; n1 < MASK_BITS; n1++) {16081608+ if (nodep->mask & (1 << n1)) {16091609+ low = high = nodep->idx + n1;16101610+16111611+ for (; n1 < MASK_BITS; n1++) {16121612+ if (nodep->mask & (1 << n1))16131613+ high = nodep->idx + n1;16141614+ else16151615+ break;16161616+ }16171617+16181618+ if ((n1 == MASK_BITS) && nodep->num_after)16191619+ high += nodep->num_after;16201620+16211621+ /*16221622+ * How much room will it take to display16231623+ * this range.16241624+ */16251625+ sz = display_range(NULL, low, high,16261626+ current_line_len != 0);16271627+16281628+ /*16291629+ * If there is not enough room, display16301630+ * a newline plus the indent of the next16311631+ * line.16321632+ */16331633+ if (current_line_len + sz > DUMP_LINE_MAX) {16341634+ fputs("\n", stream);16351635+ fprintf(stream, "%*s", indent, "");16361636+ current_line_len = 0;16371637+ }16381638+16391639+ /* Display the range */16401640+ sz = display_range(stream, low, high,16411641+ current_line_len != 0);16421642+ current_line_len += sz;16431643+ }16441644+ }16451645+16461646+ /*16471647+ * If num_after and most significant-bit of mask is not16481648+ * set, then still need to display a range for the bits16491649+ * described by num_after.16501650+ */16511651+ if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {16521652+ low = nodep->idx + MASK_BITS;16531653+ high = nodep->idx + MASK_BITS + nodep->num_after - 1;16541654+16551655+ /*16561656+ * How much room will it take to display16571657+ * this range.16581658+ */16591659+ sz = display_range(NULL, low, high,16601660+ current_line_len != 0);16611661+16621662+ /*16631663+ * If there is not enough room, display16641664+ * a newline plus the indent of the next16651665+ * line.16661666+ */16671667+ if (current_line_len + sz > DUMP_LINE_MAX) {16681668+ fputs("\n", stream);16691669+ fprintf(stream, "%*s", indent, "");16701670+ current_line_len = 0;16711671+ }16721672+16731673+ /* Display the range */16741674+ sz = display_range(stream, low, high,16751675+ current_line_len != 0);16761676+ current_line_len += sz;16771677+ }16781678+ }16791679+ fputs("\n", stream);16801680+}16811681+16821682+/* Validates the internal state of the sparsebit array given by16831683+ * s. On error, diagnostic information is printed to stderr and16841684+ * abort is called.16851685+ */16861686+void sparsebit_validate_internal(struct sparsebit *s)16871687+{16881688+ bool error_detected = false;16891689+ struct node *nodep, *prev = NULL;16901690+ sparsebit_num_t total_bits_set = 0;16911691+ unsigned int n1;16921692+16931693+ /* For each node */16941694+ for (nodep = node_first(s); nodep;16951695+ prev = nodep, nodep = node_next(s, nodep)) {16961696+16971697+ /*16981698+ * Increase total bits set by the number of bits set16991699+ * in this node.17001700+ */17011701+ for (n1 = 0; n1 < MASK_BITS; n1++)17021702+ if (nodep->mask & (1 << n1))17031703+ total_bits_set++;17041704+17051705+ total_bits_set += nodep->num_after;17061706+17071707+ /*17081708+ * Arbitrary choice as to whether a mask of 0 is allowed17091709+ * or not. For diagnostic purposes it is beneficial to17101710+ * have only one valid means to represent a set of bits.17111711+ * To support this an arbitrary choice has been made17121712+ * to not allow a mask of zero.17131713+ */17141714+ if (nodep->mask == 0) {17151715+ fprintf(stderr, "Node mask of zero, "17161716+ "nodep: %p nodep->mask: 0x%x",17171717+ nodep, nodep->mask);17181718+ error_detected = true;17191719+ break;17201720+ }17211721+17221722+ /*17231723+ * Validate num_after is not greater than the max index17241724+ * - the number of mask bits. The num_after member17251725+ * uses 0-based indexing and thus has no value that17261726+ * represents all bits set. This limitation is handled17271727+ * by requiring a non-zero mask. With a non-zero mask,17281728+ * MASK_BITS worth of bits are described by the mask,17291729+ * which makes the largest needed num_after equal to:17301730+ *17311731+ * (~(sparsebit_num_t) 0) - MASK_BITS + 117321732+ */17331733+ if (nodep->num_after17341734+ > (~(sparsebit_num_t) 0) - MASK_BITS + 1) {17351735+ fprintf(stderr, "num_after too large, "17361736+ "nodep: %p nodep->num_after: 0x%lx",17371737+ nodep, nodep->num_after);17381738+ error_detected = true;17391739+ break;17401740+ }17411741+17421742+ /* Validate node index is divisible by the mask size */17431743+ if (nodep->idx % MASK_BITS) {17441744+ fprintf(stderr, "Node index not divisable by "17451745+ "mask size,\n"17461746+ " nodep: %p nodep->idx: 0x%lx "17471747+ "MASK_BITS: %lu\n",17481748+ nodep, nodep->idx, MASK_BITS);17491749+ error_detected = true;17501750+ break;17511751+ }17521752+17531753+ /*17541754+ * Validate bits described by node don't wrap beyond the17551755+ * highest supported index.17561756+ */17571757+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {17581758+ fprintf(stderr, "Bits described by node wrap "17591759+ "beyond highest supported index,\n"17601760+ " nodep: %p nodep->idx: 0x%lx\n"17611761+ " MASK_BITS: %lu nodep->num_after: 0x%lx",17621762+ nodep, nodep->idx, MASK_BITS, nodep->num_after);17631763+ error_detected = true;17641764+ break;17651765+ }17661766+17671767+ /* Check parent pointers. */17681768+ if (nodep->left) {17691769+ if (nodep->left->parent != nodep) {17701770+ fprintf(stderr, "Left child parent pointer "17711771+ "doesn't point to this node,\n"17721772+ " nodep: %p nodep->left: %p "17731773+ "nodep->left->parent: %p",17741774+ nodep, nodep->left,17751775+ nodep->left->parent);17761776+ error_detected = true;17771777+ break;17781778+ }17791779+ }17801780+17811781+ if (nodep->right) {17821782+ if (nodep->right->parent != nodep) {17831783+ fprintf(stderr, "Right child parent pointer "17841784+ "doesn't point to this node,\n"17851785+ " nodep: %p nodep->right: %p "17861786+ "nodep->right->parent: %p",17871787+ nodep, nodep->right,17881788+ nodep->right->parent);17891789+ error_detected = true;17901790+ break;17911791+ }17921792+ }17931793+17941794+ if (!nodep->parent) {17951795+ if (s->root != nodep) {17961796+ fprintf(stderr, "Unexpected root node, "17971797+ "s->root: %p nodep: %p",17981798+ s->root, nodep);17991799+ error_detected = true;18001800+ break;18011801+ }18021802+ }18031803+18041804+ if (prev) {18051805+ /*18061806+ * Is index of previous node before index of18071807+ * current node?18081808+ */18091809+ if (prev->idx >= nodep->idx) {18101810+ fprintf(stderr, "Previous node index "18111811+ ">= current node index,\n"18121812+ " prev: %p prev->idx: 0x%lx\n"18131813+ " nodep: %p nodep->idx: 0x%lx",18141814+ prev, prev->idx, nodep, nodep->idx);18151815+ error_detected = true;18161816+ break;18171817+ }18181818+18191819+ /*18201820+ * Nodes occur in asscending order, based on each18211821+ * nodes starting index.18221822+ */18231823+ if ((prev->idx + MASK_BITS + prev->num_after - 1)18241824+ >= nodep->idx) {18251825+ fprintf(stderr, "Previous node bit range "18261826+ "overlap with current node bit range,\n"18271827+ " prev: %p prev->idx: 0x%lx "18281828+ "prev->num_after: 0x%lx\n"18291829+ " nodep: %p nodep->idx: 0x%lx "18301830+ "nodep->num_after: 0x%lx\n"18311831+ " MASK_BITS: %lu",18321832+ prev, prev->idx, prev->num_after,18331833+ nodep, nodep->idx, nodep->num_after,18341834+ MASK_BITS);18351835+ error_detected = true;18361836+ break;18371837+ }18381838+18391839+ /*18401840+ * When the node has all mask bits set, it shouldn't18411841+ * be adjacent to the last bit described by the18421842+ * previous node.18431843+ */18441844+ if (nodep->mask == ~(mask_t) 0 &&18451845+ prev->idx + MASK_BITS + prev->num_after == nodep->idx) {18461846+ fprintf(stderr, "Current node has mask with "18471847+ "all bits set and is adjacent to the "18481848+ "previous node,\n"18491849+ " prev: %p prev->idx: 0x%lx "18501850+ "prev->num_after: 0x%lx\n"18511851+ " nodep: %p nodep->idx: 0x%lx "18521852+ "nodep->num_after: 0x%lx\n"18531853+ " MASK_BITS: %lu",18541854+ prev, prev->idx, prev->num_after,18551855+ nodep, nodep->idx, nodep->num_after,18561856+ MASK_BITS);18571857+18581858+ error_detected = true;18591859+ break;18601860+ }18611861+ }18621862+ }18631863+18641864+ if (!error_detected) {18651865+ /*18661866+ * Is sum of bits set in each node equal to the count18671867+ * of total bits set.18681868+ */18691869+ if (s->num_set != total_bits_set) {18701870+ fprintf(stderr, "Number of bits set missmatch,\n"18711871+ " s->num_set: 0x%lx total_bits_set: 0x%lx",18721872+ s->num_set, total_bits_set);18731873+18741874+ error_detected = true;18751875+ }18761876+ }18771877+18781878+ if (error_detected) {18791879+ fputs(" dump_internal:\n", stderr);18801880+ sparsebit_dump_internal(stderr, s, 4);18811881+ abort();18821882+ }18831883+}18841884+18851885+18861886+#ifdef FUZZ18871887+/* A simple but effective fuzzing driver. Look for bugs with the help18881888+ * of some invariants and of a trivial representation of sparsebit.18891889+ * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let18901890+ * afl-fuzz do the magic. :)18911891+ */18921892+18931893+#include <stdlib.h>18941894+#include <assert.h>18951895+18961896+struct range {18971897+ sparsebit_idx_t first, last;18981898+ bool set;18991899+};19001900+19011901+struct sparsebit *s;19021902+struct range ranges[1000];19031903+int num_ranges;19041904+19051905+static bool get_value(sparsebit_idx_t idx)19061906+{19071907+ int i;19081908+19091909+ for (i = num_ranges; --i >= 0; )19101910+ if (ranges[i].first <= idx && idx <= ranges[i].last)19111911+ return ranges[i].set;19121912+19131913+ return false;19141914+}19151915+19161916+static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)19171917+{19181918+ sparsebit_num_t num;19191919+ sparsebit_idx_t next;19201920+19211921+ if (first < last) {19221922+ num = last - first + 1;19231923+ } else {19241924+ num = first - last + 1;19251925+ first = last;19261926+ last = first + num - 1;19271927+ }19281928+19291929+ switch (code) {19301930+ case 0:19311931+ sparsebit_set(s, first);19321932+ assert(sparsebit_is_set(s, first));19331933+ assert(!sparsebit_is_clear(s, first));19341934+ assert(sparsebit_any_set(s));19351935+ assert(!sparsebit_all_clear(s));19361936+ if (get_value(first))19371937+ return;19381938+ if (num_ranges == 1000)19391939+ exit(0);19401940+ ranges[num_ranges++] = (struct range)19411941+ { .first = first, .last = first, .set = true };19421942+ break;19431943+ case 1:19441944+ sparsebit_clear(s, first);19451945+ assert(!sparsebit_is_set(s, first));19461946+ assert(sparsebit_is_clear(s, first));19471947+ assert(sparsebit_any_clear(s));19481948+ assert(!sparsebit_all_set(s));19491949+ if (!get_value(first))19501950+ return;19511951+ if (num_ranges == 1000)19521952+ exit(0);19531953+ ranges[num_ranges++] = (struct range)19541954+ { .first = first, .last = first, .set = false };19551955+ break;19561956+ case 2:19571957+ assert(sparsebit_is_set(s, first) == get_value(first));19581958+ assert(sparsebit_is_clear(s, first) == !get_value(first));19591959+ break;19601960+ case 3:19611961+ if (sparsebit_any_set(s))19621962+ assert(get_value(sparsebit_first_set(s)));19631963+ if (sparsebit_any_clear(s))19641964+ assert(!get_value(sparsebit_first_clear(s)));19651965+ sparsebit_set_all(s);19661966+ assert(!sparsebit_any_clear(s));19671967+ assert(sparsebit_all_set(s));19681968+ num_ranges = 0;19691969+ ranges[num_ranges++] = (struct range)19701970+ { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };19711971+ break;19721972+ case 4:19731973+ if (sparsebit_any_set(s))19741974+ assert(get_value(sparsebit_first_set(s)));19751975+ if (sparsebit_any_clear(s))19761976+ assert(!get_value(sparsebit_first_clear(s)));19771977+ sparsebit_clear_all(s);19781978+ assert(!sparsebit_any_set(s));19791979+ assert(sparsebit_all_clear(s));19801980+ num_ranges = 0;19811981+ break;19821982+ case 5:19831983+ next = sparsebit_next_set(s, first);19841984+ assert(next == 0 || next > first);19851985+ assert(next == 0 || get_value(next));19861986+ break;19871987+ case 6:19881988+ next = sparsebit_next_clear(s, first);19891989+ assert(next == 0 || next > first);19901990+ assert(next == 0 || !get_value(next));19911991+ break;19921992+ case 7:19931993+ next = sparsebit_next_clear(s, first);19941994+ if (sparsebit_is_set_num(s, first, num)) {19951995+ assert(next == 0 || next > last);19961996+ if (first)19971997+ next = sparsebit_next_set(s, first - 1);19981998+ else if (sparsebit_any_set(s))19991999+ next = sparsebit_first_set(s);20002000+ else20012001+ return;20022002+ assert(next == first);20032003+ } else {20042004+ assert(sparsebit_is_clear(s, first) || next <= last);20052005+ }20062006+ break;20072007+ case 8:20082008+ next = sparsebit_next_set(s, first);20092009+ if (sparsebit_is_clear_num(s, first, num)) {20102010+ assert(next == 0 || next > last);20112011+ if (first)20122012+ next = sparsebit_next_clear(s, first - 1);20132013+ else if (sparsebit_any_clear(s))20142014+ next = sparsebit_first_clear(s);20152015+ else20162016+ return;20172017+ assert(next == first);20182018+ } else {20192019+ assert(sparsebit_is_set(s, first) || next <= last);20202020+ }20212021+ break;20222022+ case 9:20232023+ sparsebit_set_num(s, first, num);20242024+ assert(sparsebit_is_set_num(s, first, num));20252025+ assert(!sparsebit_is_clear_num(s, first, num));20262026+ assert(sparsebit_any_set(s));20272027+ assert(!sparsebit_all_clear(s));20282028+ if (num_ranges == 1000)20292029+ exit(0);20302030+ ranges[num_ranges++] = (struct range)20312031+ { .first = first, .last = last, .set = true };20322032+ break;20332033+ case 10:20342034+ sparsebit_clear_num(s, first, num);20352035+ assert(!sparsebit_is_set_num(s, first, num));20362036+ assert(sparsebit_is_clear_num(s, first, num));20372037+ assert(sparsebit_any_clear(s));20382038+ assert(!sparsebit_all_set(s));20392039+ if (num_ranges == 1000)20402040+ exit(0);20412041+ ranges[num_ranges++] = (struct range)20422042+ { .first = first, .last = last, .set = false };20432043+ break;20442044+ case 11:20452045+ sparsebit_validate_internal(s);20462046+ break;20472047+ default:20482048+ break;20492049+ }20502050+}20512051+20522052+unsigned char get8(void)20532053+{20542054+ int ch;20552055+20562056+ ch = getchar();20572057+ if (ch == EOF)20582058+ exit(0);20592059+ return ch;20602060+}20612061+20622062+uint64_t get64(void)20632063+{20642064+ uint64_t x;20652065+20662066+ x = get8();20672067+ x = (x << 8) | get8();20682068+ x = (x << 8) | get8();20692069+ x = (x << 8) | get8();20702070+ x = (x << 8) | get8();20712071+ x = (x << 8) | get8();20722072+ x = (x << 8) | get8();20732073+ return (x << 8) | get8();20742074+}20752075+20762076+int main(void)20772077+{20782078+ s = sparsebit_alloc();20792079+ for (;;) {20802080+ uint8_t op = get8() & 0xf;20812081+ uint64_t first = get64();20822082+ uint64_t last = get64();20832083+20842084+ operate(op, first, last);20852085+ }20862086+}20872087+#endif
+697
tools/testing/selftests/kvm/lib/x86.c
···11+/*22+ * tools/testing/selftests/kvm/lib/x86.c33+ *44+ * Copyright (C) 2018, Google LLC.55+ *66+ * This work is licensed under the terms of the GNU GPL, version 2.77+ */88+99+#define _GNU_SOURCE /* for program_invocation_name */1010+1111+#include "test_util.h"1212+#include "kvm_util.h"1313+#include "kvm_util_internal.h"1414+#include "x86.h"1515+1616+/* Minimum physical address used for virtual translation tables. */1717+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x1800001818+1919+/* Virtual translation table structure declarations */2020+struct pageMapL4Entry {2121+ uint64_t present:1;2222+ uint64_t writable:1;2323+ uint64_t user:1;2424+ uint64_t write_through:1;2525+ uint64_t cache_disable:1;2626+ uint64_t accessed:1;2727+ uint64_t ignored_06:1;2828+ uint64_t page_size:1;2929+ uint64_t ignored_11_08:4;3030+ uint64_t address:40;3131+ uint64_t ignored_62_52:11;3232+ uint64_t execute_disable:1;3333+};3434+3535+struct pageDirectoryPointerEntry {3636+ uint64_t present:1;3737+ uint64_t writable:1;3838+ uint64_t user:1;3939+ uint64_t write_through:1;4040+ uint64_t cache_disable:1;4141+ uint64_t accessed:1;4242+ uint64_t ignored_06:1;4343+ uint64_t page_size:1;4444+ uint64_t ignored_11_08:4;4545+ uint64_t address:40;4646+ uint64_t ignored_62_52:11;4747+ uint64_t execute_disable:1;4848+};4949+5050+struct pageDirectoryEntry {5151+ uint64_t present:1;5252+ uint64_t writable:1;5353+ uint64_t user:1;5454+ uint64_t write_through:1;5555+ uint64_t cache_disable:1;5656+ uint64_t accessed:1;5757+ uint64_t ignored_06:1;5858+ uint64_t page_size:1;5959+ uint64_t ignored_11_08:4;6060+ uint64_t address:40;6161+ uint64_t ignored_62_52:11;6262+ uint64_t execute_disable:1;6363+};6464+6565+struct pageTableEntry {6666+ uint64_t present:1;6767+ uint64_t writable:1;6868+ uint64_t user:1;6969+ uint64_t write_through:1;7070+ uint64_t cache_disable:1;7171+ uint64_t accessed:1;7272+ uint64_t dirty:1;7373+ uint64_t reserved_07:1;7474+ uint64_t global:1;7575+ uint64_t ignored_11_09:3;7676+ uint64_t address:40;7777+ uint64_t ignored_62_52:11;7878+ uint64_t execute_disable:1;7979+};8080+8181+/* Register Dump8282+ *8383+ * Input Args:8484+ * indent - Left margin indent amount8585+ * regs - register8686+ *8787+ * Output Args:8888+ * stream - Output FILE stream8989+ *9090+ * Return: None9191+ *9292+ * Dumps the state of the registers given by regs, to the FILE stream9393+ * given by steam.9494+ */9595+void regs_dump(FILE *stream, struct kvm_regs *regs,9696+ uint8_t indent)9797+{9898+ fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "9999+ "rcx: 0x%.16llx rdx: 0x%.16llx\n",100100+ indent, "",101101+ regs->rax, regs->rbx, regs->rcx, regs->rdx);102102+ fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "103103+ "rsp: 0x%.16llx rbp: 0x%.16llx\n",104104+ indent, "",105105+ regs->rsi, regs->rdi, regs->rsp, regs->rbp);106106+ fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "107107+ "r10: 0x%.16llx r11: 0x%.16llx\n",108108+ indent, "",109109+ regs->r8, regs->r9, regs->r10, regs->r11);110110+ fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "111111+ "r14: 0x%.16llx r15: 0x%.16llx\n",112112+ indent, "",113113+ regs->r12, regs->r13, regs->r14, regs->r15);114114+ fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",115115+ indent, "",116116+ regs->rip, regs->rflags);117117+}118118+119119+/* Segment Dump120120+ *121121+ * Input Args:122122+ * indent - Left margin indent amount123123+ * segment - KVM segment124124+ *125125+ * Output Args:126126+ * stream - Output FILE stream127127+ *128128+ * Return: None129129+ *130130+ * Dumps the state of the KVM segment given by segment, to the FILE stream131131+ * given by steam.132132+ */133133+static void segment_dump(FILE *stream, struct kvm_segment *segment,134134+ uint8_t indent)135135+{136136+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "137137+ "selector: 0x%.4x type: 0x%.2x\n",138138+ indent, "", segment->base, segment->limit,139139+ segment->selector, segment->type);140140+ fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "141141+ "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",142142+ indent, "", segment->present, segment->dpl,143143+ segment->db, segment->s, segment->l);144144+ fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "145145+ "unusable: 0x%.2x padding: 0x%.2x\n",146146+ indent, "", segment->g, segment->avl,147147+ segment->unusable, segment->padding);148148+}149149+150150+/* dtable Dump151151+ *152152+ * Input Args:153153+ * indent - Left margin indent amount154154+ * dtable - KVM dtable155155+ *156156+ * Output Args:157157+ * stream - Output FILE stream158158+ *159159+ * Return: None160160+ *161161+ * Dumps the state of the KVM dtable given by dtable, to the FILE stream162162+ * given by steam.163163+ */164164+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,165165+ uint8_t indent)166166+{167167+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "168168+ "padding: 0x%.4x 0x%.4x 0x%.4x\n",169169+ indent, "", dtable->base, dtable->limit,170170+ dtable->padding[0], dtable->padding[1], dtable->padding[2]);171171+}172172+173173+/* System Register Dump174174+ *175175+ * Input Args:176176+ * indent - Left margin indent amount177177+ * sregs - System registers178178+ *179179+ * Output Args:180180+ * stream - Output FILE stream181181+ *182182+ * Return: None183183+ *184184+ * Dumps the state of the system registers given by sregs, to the FILE stream185185+ * given by steam.186186+ */187187+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,188188+ uint8_t indent)189189+{190190+ unsigned int i;191191+192192+ fprintf(stream, "%*scs:\n", indent, "");193193+ segment_dump(stream, &sregs->cs, indent + 2);194194+ fprintf(stream, "%*sds:\n", indent, "");195195+ segment_dump(stream, &sregs->ds, indent + 2);196196+ fprintf(stream, "%*ses:\n", indent, "");197197+ segment_dump(stream, &sregs->es, indent + 2);198198+ fprintf(stream, "%*sfs:\n", indent, "");199199+ segment_dump(stream, &sregs->fs, indent + 2);200200+ fprintf(stream, "%*sgs:\n", indent, "");201201+ segment_dump(stream, &sregs->gs, indent + 2);202202+ fprintf(stream, "%*sss:\n", indent, "");203203+ segment_dump(stream, &sregs->ss, indent + 2);204204+ fprintf(stream, "%*str:\n", indent, "");205205+ segment_dump(stream, &sregs->tr, indent + 2);206206+ fprintf(stream, "%*sldt:\n", indent, "");207207+ segment_dump(stream, &sregs->ldt, indent + 2);208208+209209+ fprintf(stream, "%*sgdt:\n", indent, "");210210+ dtable_dump(stream, &sregs->gdt, indent + 2);211211+ fprintf(stream, "%*sidt:\n", indent, "");212212+ dtable_dump(stream, &sregs->idt, indent + 2);213213+214214+ fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "215215+ "cr3: 0x%.16llx cr4: 0x%.16llx\n",216216+ indent, "",217217+ sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);218218+ fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "219219+ "apic_base: 0x%.16llx\n",220220+ indent, "",221221+ sregs->cr8, sregs->efer, sregs->apic_base);222222+223223+ fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");224224+ for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {225225+ fprintf(stream, "%*s%.16llx\n", indent + 2, "",226226+ sregs->interrupt_bitmap[i]);227227+ }228228+}229229+230230+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)231231+{232232+ int rc;233233+234234+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "235235+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);236236+237237+ /* If needed, create page map l4 table. */238238+ if (!vm->pgd_created) {239239+ vm_paddr_t paddr = vm_phy_page_alloc(vm,240240+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);241241+ vm->pgd = paddr;242242+243243+ /* Set pointer to pgd tables in all the VCPUs that244244+ * have already been created. Future VCPUs will have245245+ * the value set as each one is created.246246+ */247247+ for (struct vcpu *vcpu = vm->vcpu_head; vcpu;248248+ vcpu = vcpu->next) {249249+ struct kvm_sregs sregs;250250+251251+ /* Obtain the current system register settings */252252+ vcpu_sregs_get(vm, vcpu->id, &sregs);253253+254254+ /* Set and store the pointer to the start of the255255+ * pgd tables.256256+ */257257+ sregs.cr3 = vm->pgd;258258+ vcpu_sregs_set(vm, vcpu->id, &sregs);259259+ }260260+261261+ vm->pgd_created = true;262262+ }263263+}264264+265265+/* VM Virtual Page Map266266+ *267267+ * Input Args:268268+ * vm - Virtual Machine269269+ * vaddr - VM Virtual Address270270+ * paddr - VM Physical Address271271+ * pgd_memslot - Memory region slot for new virtual translation tables272272+ *273273+ * Output Args: None274274+ *275275+ * Return: None276276+ *277277+ * Within the VM given by vm, creates a virtual translation for the page278278+ * starting at vaddr to the page starting at paddr.279279+ */280280+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,281281+ uint32_t pgd_memslot)282282+{283283+ uint16_t index[4];284284+ struct pageMapL4Entry *pml4e;285285+286286+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "287287+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);288288+289289+ TEST_ASSERT((vaddr % vm->page_size) == 0,290290+ "Virtual address not on page boundary,\n"291291+ " vaddr: 0x%lx vm->page_size: 0x%x",292292+ vaddr, vm->page_size);293293+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,294294+ (vaddr >> vm->page_shift)),295295+ "Invalid virtual address, vaddr: 0x%lx",296296+ vaddr);297297+ TEST_ASSERT((paddr % vm->page_size) == 0,298298+ "Physical address not on page boundary,\n"299299+ " paddr: 0x%lx vm->page_size: 0x%x",300300+ paddr, vm->page_size);301301+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,302302+ "Physical address beyond beyond maximum supported,\n"303303+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",304304+ paddr, vm->max_gfn, vm->page_size);305305+306306+ index[0] = (vaddr >> 12) & 0x1ffu;307307+ index[1] = (vaddr >> 21) & 0x1ffu;308308+ index[2] = (vaddr >> 30) & 0x1ffu;309309+ index[3] = (vaddr >> 39) & 0x1ffu;310310+311311+ /* Allocate page directory pointer table if not present. */312312+ pml4e = addr_gpa2hva(vm, vm->pgd);313313+ if (!pml4e[index[3]].present) {314314+ pml4e[index[3]].address = vm_phy_page_alloc(vm,315315+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)316316+ >> vm->page_shift;317317+ pml4e[index[3]].writable = true;318318+ pml4e[index[3]].present = true;319319+ }320320+321321+ /* Allocate page directory table if not present. */322322+ struct pageDirectoryPointerEntry *pdpe;323323+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);324324+ if (!pdpe[index[2]].present) {325325+ pdpe[index[2]].address = vm_phy_page_alloc(vm,326326+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)327327+ >> vm->page_shift;328328+ pdpe[index[2]].writable = true;329329+ pdpe[index[2]].present = true;330330+ }331331+332332+ /* Allocate page table if not present. */333333+ struct pageDirectoryEntry *pde;334334+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);335335+ if (!pde[index[1]].present) {336336+ pde[index[1]].address = vm_phy_page_alloc(vm,337337+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)338338+ >> vm->page_shift;339339+ pde[index[1]].writable = true;340340+ pde[index[1]].present = true;341341+ }342342+343343+ /* Fill in page table entry. */344344+ struct pageTableEntry *pte;345345+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);346346+ pte[index[0]].address = paddr >> vm->page_shift;347347+ pte[index[0]].writable = true;348348+ pte[index[0]].present = 1;349349+}350350+351351+/* Virtual Translation Tables Dump352352+ *353353+ * Input Args:354354+ * vm - Virtual Machine355355+ * indent - Left margin indent amount356356+ *357357+ * Output Args:358358+ * stream - Output FILE stream359359+ *360360+ * Return: None361361+ *362362+ * Dumps to the FILE stream given by stream, the contents of all the363363+ * virtual translation tables for the VM given by vm.364364+ */365365+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)366366+{367367+ struct pageMapL4Entry *pml4e, *pml4e_start;368368+ struct pageDirectoryPointerEntry *pdpe, *pdpe_start;369369+ struct pageDirectoryEntry *pde, *pde_start;370370+ struct pageTableEntry *pte, *pte_start;371371+372372+ if (!vm->pgd_created)373373+ return;374374+375375+ fprintf(stream, "%*s "376376+ " no\n", indent, "");377377+ fprintf(stream, "%*s index hvaddr gpaddr "378378+ "addr w exec dirty\n",379379+ indent, "");380380+ pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,381381+ vm->pgd);382382+ for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {383383+ pml4e = &pml4e_start[n1];384384+ if (!pml4e->present)385385+ continue;386386+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "387387+ " %u\n",388388+ indent, "",389389+ pml4e - pml4e_start, pml4e,390390+ addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,391391+ pml4e->writable, pml4e->execute_disable);392392+393393+ pdpe_start = addr_gpa2hva(vm, pml4e->address394394+ * vm->page_size);395395+ for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {396396+ pdpe = &pdpe_start[n2];397397+ if (!pdpe->present)398398+ continue;399399+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "400400+ "%u %u\n",401401+ indent, "",402402+ pdpe - pdpe_start, pdpe,403403+ addr_hva2gpa(vm, pdpe),404404+ (uint64_t) pdpe->address, pdpe->writable,405405+ pdpe->execute_disable);406406+407407+ pde_start = addr_gpa2hva(vm,408408+ pdpe->address * vm->page_size);409409+ for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {410410+ pde = &pde_start[n3];411411+ if (!pde->present)412412+ continue;413413+ fprintf(stream, "%*spde 0x%-3zx %p "414414+ "0x%-12lx 0x%-10lx %u %u\n",415415+ indent, "", pde - pde_start, pde,416416+ addr_hva2gpa(vm, pde),417417+ (uint64_t) pde->address, pde->writable,418418+ pde->execute_disable);419419+420420+ pte_start = addr_gpa2hva(vm,421421+ pde->address * vm->page_size);422422+ for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {423423+ pte = &pte_start[n4];424424+ if (!pte->present)425425+ continue;426426+ fprintf(stream, "%*spte 0x%-3zx %p "427427+ "0x%-12lx 0x%-10lx %u %u "428428+ " %u 0x%-10lx\n",429429+ indent, "",430430+ pte - pte_start, pte,431431+ addr_hva2gpa(vm, pte),432432+ (uint64_t) pte->address,433433+ pte->writable,434434+ pte->execute_disable,435435+ pte->dirty,436436+ ((uint64_t) n1 << 27)437437+ | ((uint64_t) n2 << 18)438438+ | ((uint64_t) n3 << 9)439439+ | ((uint64_t) n4));440440+ }441441+ }442442+ }443443+ }444444+}445445+446446+/* Set Unusable Segment447447+ *448448+ * Input Args: None449449+ *450450+ * Output Args:451451+ * segp - Pointer to segment register452452+ *453453+ * Return: None454454+ *455455+ * Sets the segment register pointed to by segp to an unusable state.456456+ */457457+static void kvm_seg_set_unusable(struct kvm_segment *segp)458458+{459459+ memset(segp, 0, sizeof(*segp));460460+ segp->unusable = true;461461+}462462+463463+/* Set Long Mode Flat Kernel Code Segment464464+ *465465+ * Input Args:466466+ * selector - selector value467467+ *468468+ * Output Args:469469+ * segp - Pointer to KVM segment470470+ *471471+ * Return: None472472+ *473473+ * Sets up the KVM segment pointed to by segp, to be a code segment474474+ * with the selector value given by selector.475475+ */476476+static void kvm_seg_set_kernel_code_64bit(uint16_t selector,477477+ struct kvm_segment *segp)478478+{479479+ memset(segp, 0, sizeof(*segp));480480+ segp->selector = selector;481481+ segp->limit = 0xFFFFFFFFu;482482+ segp->s = 0x1; /* kTypeCodeData */483483+ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed484484+ * | kFlagCodeReadable485485+ */486486+ segp->g = true;487487+ segp->l = true;488488+ segp->present = 1;489489+}490490+491491+/* Set Long Mode Flat Kernel Data Segment492492+ *493493+ * Input Args:494494+ * selector - selector value495495+ *496496+ * Output Args:497497+ * segp - Pointer to KVM segment498498+ *499499+ * Return: None500500+ *501501+ * Sets up the KVM segment pointed to by segp, to be a data segment502502+ * with the selector value given by selector.503503+ */504504+static void kvm_seg_set_kernel_data_64bit(uint16_t selector,505505+ struct kvm_segment *segp)506506+{507507+ memset(segp, 0, sizeof(*segp));508508+ segp->selector = selector;509509+ segp->limit = 0xFFFFFFFFu;510510+ segp->s = 0x1; /* kTypeCodeData */511511+ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed512512+ * | kFlagDataWritable513513+ */514514+ segp->g = true;515515+ segp->present = true;516516+}517517+518518+/* Address Guest Virtual to Guest Physical519519+ *520520+ * Input Args:521521+ * vm - Virtual Machine522522+ * gpa - VM virtual address523523+ *524524+ * Output Args: None525525+ *526526+ * Return:527527+ * Equivalent VM physical address528528+ *529529+ * Translates the VM virtual address given by gva to a VM physical530530+ * address and then locates the memory region containing the VM531531+ * physical address, within the VM given by vm. When found, the host532532+ * virtual address providing the memory to the vm physical address is returned.533533+ * A TEST_ASSERT failure occurs if no region containing translated534534+ * VM virtual address exists.535535+ */536536+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)537537+{538538+ uint16_t index[4];539539+ struct pageMapL4Entry *pml4e;540540+ struct pageDirectoryPointerEntry *pdpe;541541+ struct pageDirectoryEntry *pde;542542+ struct pageTableEntry *pte;543543+ void *hva;544544+545545+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "546546+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);547547+548548+ index[0] = (gva >> 12) & 0x1ffu;549549+ index[1] = (gva >> 21) & 0x1ffu;550550+ index[2] = (gva >> 30) & 0x1ffu;551551+ index[3] = (gva >> 39) & 0x1ffu;552552+553553+ if (!vm->pgd_created)554554+ goto unmapped_gva;555555+ pml4e = addr_gpa2hva(vm, vm->pgd);556556+ if (!pml4e[index[3]].present)557557+ goto unmapped_gva;558558+559559+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);560560+ if (!pdpe[index[2]].present)561561+ goto unmapped_gva;562562+563563+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);564564+ if (!pde[index[1]].present)565565+ goto unmapped_gva;566566+567567+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);568568+ if (!pte[index[0]].present)569569+ goto unmapped_gva;570570+571571+ return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);572572+573573+unmapped_gva:574574+ TEST_ASSERT(false, "No mapping for vm virtual address, "575575+ "gva: 0x%lx", gva);576576+}577577+578578+void vcpu_setup(struct kvm_vm *vm, int vcpuid)579579+{580580+ struct kvm_sregs sregs;581581+582582+ /* Set mode specific system register values. */583583+ vcpu_sregs_get(vm, vcpuid, &sregs);584584+585585+ switch (vm->mode) {586586+ case VM_MODE_FLAT48PG:587587+ sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;588588+ sregs.cr4 |= X86_CR4_PAE;589589+ sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);590590+591591+ kvm_seg_set_unusable(&sregs.ldt);592592+ kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs);593593+ kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds);594594+ kvm_seg_set_kernel_data_64bit(0x10, &sregs.es);595595+ break;596596+597597+ default:598598+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);599599+ }600600+ vcpu_sregs_set(vm, vcpuid, &sregs);601601+602602+ /* If virtual translation table have been setup, set system register603603+ * to point to the tables. It's okay if they haven't been setup yet,604604+ * in that the code that sets up the virtual translation tables, will605605+ * go back through any VCPUs that have already been created and set606606+ * their values.607607+ */608608+ if (vm->pgd_created) {609609+ struct kvm_sregs sregs;610610+611611+ vcpu_sregs_get(vm, vcpuid, &sregs);612612+613613+ sregs.cr3 = vm->pgd;614614+ vcpu_sregs_set(vm, vcpuid, &sregs);615615+ }616616+}617617+/* Adds a vCPU with reasonable defaults (i.e., a stack)618618+ *619619+ * Input Args:620620+ * vcpuid - The id of the VCPU to add to the VM.621621+ * guest_code - The vCPU's entry point622622+ */623623+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)624624+{625625+ struct kvm_mp_state mp_state;626626+ struct kvm_regs regs;627627+ vm_vaddr_t stack_vaddr;628628+ stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),629629+ DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);630630+631631+ /* Create VCPU */632632+ vm_vcpu_add(vm, vcpuid);633633+634634+ /* Setup guest general purpose registers */635635+ vcpu_regs_get(vm, vcpuid, ®s);636636+ regs.rflags = regs.rflags | 0x2;637637+ regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());638638+ regs.rip = (unsigned long) guest_code;639639+ vcpu_regs_set(vm, vcpuid, ®s);640640+641641+ /* Setup the MP state */642642+ mp_state.mp_state = 0;643643+ vcpu_set_mp_state(vm, vcpuid, &mp_state);644644+}645645+646646+/* VM VCPU CPUID Set647647+ *648648+ * Input Args:649649+ * vm - Virtual Machine650650+ * vcpuid - VCPU id651651+ * cpuid - The CPUID values to set.652652+ *653653+ * Output Args: None654654+ *655655+ * Return: void656656+ *657657+ * Set the VCPU's CPUID.658658+ */659659+void vcpu_set_cpuid(struct kvm_vm *vm,660660+ uint32_t vcpuid, struct kvm_cpuid2 *cpuid)661661+{662662+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);663663+ int rc;664664+665665+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);666666+667667+ rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);668668+ TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",669669+ rc, errno);670670+671671+}672672+/* Create a VM with reasonable defaults673673+ *674674+ * Input Args:675675+ * vcpuid - The id of the single VCPU to add to the VM.676676+ * guest_code - The vCPU's entry point677677+ *678678+ * Output Args: None679679+ *680680+ * Return:681681+ * Pointer to opaque structure that describes the created VM.682682+ */683683+struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)684684+{685685+ struct kvm_vm *vm;686686+687687+ /* Create VM */688688+ vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR);689689+690690+ /* Setup IRQ Chip */691691+ vm_create_irqchip(vm);692692+693693+ /* Add the first vCPU. */694694+ vm_vcpu_add_default(vm, vcpuid, guest_code);695695+696696+ return vm;697697+}
+54
tools/testing/selftests/kvm/set_sregs_test.c
···11+/*22+ * KVM_SET_SREGS tests33+ *44+ * Copyright (C) 2018, Google LLC.55+ *66+ * This work is licensed under the terms of the GNU GPL, version 2.77+ *88+ * This is a regression test for the bug fixed by the following commit:99+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")1010+ *1111+ * That bug allowed a user-mode program that called the KVM_SET_SREGS1212+ * ioctl to put a VCPU's local APIC into an invalid state.1313+ *1414+ */1515+#define _GNU_SOURCE /* for program_invocation_short_name */1616+#include <fcntl.h>1717+#include <stdio.h>1818+#include <stdlib.h>1919+#include <string.h>2020+#include <sys/ioctl.h>2121+2222+#include "test_util.h"2323+2424+#include "kvm_util.h"2525+#include "x86.h"2626+2727+#define VCPU_ID 52828+2929+int main(int argc, char *argv[])3030+{3131+ struct kvm_sregs sregs;3232+ struct kvm_vm *vm;3333+ int rc;3434+3535+ /* Tell stdout not to buffer its content */3636+ setbuf(stdout, NULL);3737+3838+ /* Create VM */3939+ vm = vm_create_default(VCPU_ID, NULL);4040+4141+ vcpu_sregs_get(vm, VCPU_ID, &sregs);4242+ sregs.apic_base = 1 << 10;4343+ rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);4444+ TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",4545+ sregs.apic_base);4646+ sregs.apic_base = 1 << 11;4747+ rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);4848+ TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",4949+ sregs.apic_base);5050+5151+ kvm_vm_free(vm);5252+5353+ return 0;5454+}