this repo has no description
at trunk 350 lines 8.9 kB view raw
1#define _GNU_SOURCE 2#include <assert.h> // for assert 3#include <stdbool.h> // for bool 4#include <stddef.h> // for NULL 5#include <stdint.h> // for int32_t, etc 6#include <string.h> // for memcpy 7#include <sys/mman.h> // for mmap 8#undef _GNU_SOURCE 9 10#include "greatest.h" 11 12// Objects 13 14typedef int64_t word; 15typedef uint64_t uword; 16 17// These constants are defined in a enum because the right hand side of a 18// statement like 19// static const int kFoo = ...; 20// must be a so-called "Integer Constant Expression". Compilers are required to 21// support a certain set of these expressions, but are not required to support 22// arbitrary arithmetic with other integer constants. Compilers such as gcc 23// before gcc-8 just decided not to play this game, while gcc-8+ and Clang play 24// just fine. 25// Since this arithmetic with constant values works just fine for enums, make 26// all these constants enum values instead. 27// See https://twitter.com/tekknolagi/status/1328449329472835586 for more info. 28enum { 29 kBitsPerByte = 8, // bits 30 kWordSize = sizeof(word), // bytes 31 kBitsPerWord = kWordSize * kBitsPerByte, // bits 32 33 kIntegerTag = 0x0, // 0b00 34 kIntegerTagMask = 0x3, // 0b11 35 kIntegerShift = 2, 36 kIntegerBits = kBitsPerWord - kIntegerShift, 37}; 38 39// These are defined as macros because they will not work as static const int 40// constants (per above explanation), and enum constants are only required to 41// be an int wide (per ISO C). 42#define INTEGER_MAX ((1LL << (kIntegerBits - 1)) - 1) 43#define INTEGER_MIN (-(1LL << (kIntegerBits - 1))) 44 45word Object_encode_integer(word value) { 46 assert(value < INTEGER_MAX && "too big"); 47 assert(value > INTEGER_MIN && "too small"); 48 return value << kIntegerShift; 49} 50 51// End Objects 52 53// Buffer 54 55typedef unsigned char byte; 56 57typedef enum { 58 kWritable, 59 kExecutable, 60} BufferState; 61 62typedef struct { 63 byte *address; 64 BufferState state; 65 size_t len; 66 size_t capacity; 67} Buffer; 68 69byte *Buffer_alloc_writable(size_t capacity) { 70 byte *result = mmap(/*addr=*/NULL, capacity, PROT_READ | PROT_WRITE, 71 MAP_ANONYMOUS | MAP_PRIVATE, 72 /*filedes=*/-1, /*off=*/0); 73 assert(result != MAP_FAILED); 74 return result; 75} 76 77void Buffer_init(Buffer *result, size_t capacity) { 78 result->address = Buffer_alloc_writable(capacity); 79 assert(result->address != MAP_FAILED); 80 result->state = kWritable; 81 result->len = 0; 82 result->capacity = capacity; 83} 84 85void Buffer_deinit(Buffer *buf) { 86 munmap(buf->address, buf->capacity); 87 buf->address = NULL; 88 buf->len = 0; 89 buf->capacity = 0; 90} 91 92int Buffer_make_executable(Buffer *buf) { 93 int result = mprotect(buf->address, buf->len, PROT_EXEC); 94 buf->state = kExecutable; 95 return result; 96} 97 98byte Buffer_at8(Buffer *buf, size_t pos) { return buf->address[pos]; } 99 100void Buffer_at_put8(Buffer *buf, size_t pos, byte b) { buf->address[pos] = b; } 101 102word max(word left, word right) { return left > right ? left : right; } 103 104void Buffer_ensure_capacity(Buffer *buf, word additional_capacity) { 105 if (buf->len + additional_capacity <= buf->capacity) { 106 return; 107 } 108 word new_capacity = 109 max(buf->capacity * 2, buf->capacity + additional_capacity); 110 byte *address = Buffer_alloc_writable(new_capacity); 111 memcpy(address, buf->address, buf->len); 112 int result = munmap(buf->address, buf->capacity); 113 assert(result == 0 && "munmap failed"); 114 buf->address = address; 115 buf->capacity = new_capacity; 116} 117 118void Buffer_write8(Buffer *buf, byte b) { 119 Buffer_ensure_capacity(buf, sizeof b); 120 Buffer_at_put8(buf, buf->len++, b); 121} 122 123void Buffer_write32(Buffer *buf, int32_t value) { 124 for (size_t i = 0; i < 4; i++) { 125 Buffer_write8(buf, (value >> (i * kBitsPerByte)) & 0xff); 126 } 127} 128 129// End Buffer 130 131// Emit 132 133typedef enum { 134 kRax = 0, 135 kRcx, 136 kRdx, 137 kRbx, 138 kRsp, 139 kRbp, 140 kRsi, 141 kRdi, 142} Register; 143 144enum { 145 kRexPrefix = 0x48, 146}; 147 148void Emit_mov_reg_imm32(Buffer *buf, Register dst, int32_t src) { 149 Buffer_write8(buf, kRexPrefix); 150 Buffer_write8(buf, 0xc7); 151 Buffer_write8(buf, 0xc0 + dst); 152 Buffer_write32(buf, src); 153} 154 155void Emit_ret(Buffer *buf) { Buffer_write8(buf, 0xc3); } 156 157// End Emit 158 159// AST 160 161typedef enum { 162 kInteger, 163} ASTNodeType; 164 165struct ASTNode; 166typedef struct ASTNode ASTNode; 167 168ASTNodeType AST_type_of(ASTNode *node) { 169 uint64_t address = (uint64_t)node; 170 if ((address & kIntegerTagMask) == kIntegerTag) { 171 return kInteger; 172 } 173 assert(0 && "unexpected node type"); 174} 175 176bool AST_is_integer(ASTNode *node) { return AST_type_of(node) == kInteger; } 177 178word AST_get_integer(ASTNode *node) { return (word)node >> kIntegerShift; } 179 180ASTNode *AST_new_integer(word value) { 181 return (ASTNode *)Object_encode_integer(value); 182} 183 184// End AST 185 186// Compile 187 188int Compile_expr(Buffer *buf, ASTNode *node) { 189 if (AST_is_integer(node)) { 190 word value = AST_get_integer(node); 191 Emit_mov_reg_imm32(buf, kRax, Object_encode_integer(value)); 192 return 0; 193 } 194 assert(0 && "unexpected node type"); 195} 196 197int Compile_function(Buffer *buf, ASTNode *node) { 198 int result = Compile_expr(buf, node); 199 if (result != 0) 200 return result; 201 Emit_ret(buf); 202 return 0; 203} 204 205// End Compile 206 207typedef int (*JitFunction)(); 208 209// Testing 210 211word Testing_execute_expr(Buffer *buf) { 212 assert(buf != NULL); 213 assert(buf->address != NULL); 214 assert(buf->state == kExecutable); 215 // The pointer-pointer cast is allowed but the underlying 216 // data-to-function-pointer back-and-forth is only guaranteed to work on 217 // POSIX systems (because of eg dlsym). 218 JitFunction function = *(JitFunction *)(&buf->address); 219 return function(); 220} 221 222#define EXPECT_EQUALS_BYTES(buf, arr) \ 223 ASSERT_MEM_EQ((arr), (buf)->address, sizeof arr) 224 225// End Testing 226 227// Tests 228 229TEST encode_positive_integer(void) { 230 ASSERT_EQ(Object_encode_integer(0), 0x0); 231 ASSERT_EQ(Object_encode_integer(1), 0x4); 232 ASSERT_EQ(Object_encode_integer(10), 0x28); 233 PASS(); 234} 235 236TEST encode_negative_integer(void) { 237 ASSERT_EQ(0x0, Object_encode_integer(0)); 238 ASSERT_EQ(Object_encode_integer(-1), (word)0xfffffffffffffffc); 239 ASSERT_EQ(Object_encode_integer(-10), (word)0xffffffffffffffd8); 240 PASS(); 241} 242 243TEST buffer_write8_increases_length(void) { 244 Buffer buf; 245 Buffer_init(&buf, 5); 246 ASSERT_EQ(buf.len, 0); 247 Buffer_write8(&buf, 0xdb); 248 ASSERT_EQ(Buffer_at8(&buf, 0), 0xdb); 249 ASSERT_EQ(buf.len, 1); 250 Buffer_deinit(&buf); 251 PASS(); 252} 253 254TEST buffer_write8_expands_buffer(void) { 255 Buffer buf; 256 Buffer_init(&buf, 1); 257 ASSERT_EQ(buf.capacity, 1); 258 ASSERT_EQ(buf.len, 0); 259 Buffer_write8(&buf, 0xdb); 260 Buffer_write8(&buf, 0xef); 261 ASSERT(buf.capacity > 1); 262 ASSERT_EQ(buf.len, 2); 263 Buffer_deinit(&buf); 264 PASS(); 265} 266 267TEST buffer_write32_expands_buffer(void) { 268 Buffer buf; 269 Buffer_init(&buf, 1); 270 ASSERT_EQ(buf.capacity, 1); 271 ASSERT_EQ(buf.len, 0); 272 Buffer_write32(&buf, 0xdeadbeef); 273 ASSERT(buf.capacity > 1); 274 ASSERT_EQ(buf.len, 4); 275 Buffer_deinit(&buf); 276 PASS(); 277} 278 279TEST buffer_write32_writes_little_endian(void) { 280 Buffer buf; 281 Buffer_init(&buf, 4); 282 Buffer_write32(&buf, 0xdeadbeef); 283 ASSERT_EQ(Buffer_at8(&buf, 0), 0xef); 284 ASSERT_EQ(Buffer_at8(&buf, 1), 0xbe); 285 ASSERT_EQ(Buffer_at8(&buf, 2), 0xad); 286 ASSERT_EQ(Buffer_at8(&buf, 3), 0xde); 287 Buffer_deinit(&buf); 288 PASS(); 289} 290 291TEST compile_positive_integer(void) { 292 word value = 123; 293 ASTNode *node = AST_new_integer(value); 294 Buffer buf; 295 Buffer_init(&buf, 1); 296 int compile_result = Compile_function(&buf, node); 297 ASSERT_EQ(compile_result, 0); 298 // mov eax, imm(123); ret 299 byte expected[] = {0x48, 0xc7, 0xc0, 0xec, 0x01, 0x00, 0x00, 0xc3}; 300 EXPECT_EQUALS_BYTES(&buf, expected); 301 Buffer_make_executable(&buf); 302 word result = Testing_execute_expr(&buf); 303 ASSERT_EQ(result, Object_encode_integer(value)); 304 PASS(); 305} 306 307TEST compile_negative_integer(void) { 308 word value = -123; 309 ASTNode *node = AST_new_integer(value); 310 Buffer buf; 311 Buffer_init(&buf, 1); 312 int compile_result = Compile_function(&buf, node); 313 ASSERT_EQ(compile_result, 0); 314 // mov eax, imm(-123); ret 315 byte expected[] = {0x48, 0xc7, 0xc0, 0x14, 0xfe, 0xff, 0xff, 0xc3}; 316 EXPECT_EQUALS_BYTES(&buf, expected); 317 Buffer_make_executable(&buf); 318 word result = Testing_execute_expr(&buf); 319 ASSERT_EQ(result, Object_encode_integer(value)); 320 PASS(); 321} 322 323SUITE(object_tests) { 324 RUN_TEST(encode_positive_integer); 325 RUN_TEST(encode_negative_integer); 326} 327 328SUITE(buffer_tests) { 329 RUN_TEST(buffer_write8_increases_length); 330 RUN_TEST(buffer_write8_expands_buffer); 331 RUN_TEST(buffer_write32_expands_buffer); 332 RUN_TEST(buffer_write32_writes_little_endian); 333} 334 335SUITE(compiler_tests) { 336 RUN_TEST(compile_positive_integer); 337 RUN_TEST(compile_negative_integer); 338} 339 340// End Tests 341 342GREATEST_MAIN_DEFS(); 343 344int main(int argc, char **argv) { 345 GREATEST_MAIN_BEGIN(); 346 RUN_SUITE(object_tests); 347 RUN_SUITE(buffer_tests); 348 RUN_SUITE(compiler_tests); 349 GREATEST_MAIN_END(); 350}