this repo has no description

Write a compiler from Scrapscript to C (#120)

Emit straight C, no IR. Includes a GC and small runtime.

Has immediate small integers (no bignum), small strings and heap
strings, and immediate empty list.

authored by bernsteinbear.com and committed by

GitHub d049541f 55f63183

+1324 -2
+12
.clang-format
··· 1 + BasedOnStyle: Google 2 + AlignEscapedNewlinesLeft: false 3 + DerivePointerAlignment: false 4 + PointerAlignment: Left 5 + IncludeBlocks: Regroup 6 + IncludeCategories: 7 + # C system headers 8 + - Regex: '^<.*\.h?>' 9 + Priority: 1 10 + # Scrapscript headers 11 + - Regex: '.*' 12 + Priority: 2
+23
.clang-tidy
··· 1 + Checks: > 2 + -*, 3 + bugprone-argument-comment, 4 + google-readability-casting, 5 + google-readability-todo, 6 + modernize-use-nullptr, 7 + readability-braces-around-statements, 8 + readability-else-after-return, 9 + readability-identifier-naming, 10 + readability-inconsistent-declaration-parameter-name, 11 + readability-static-accessed-through-instance, 12 + FormatStyle: file 13 + CheckOptions: 14 + - { key: readability-identifier-naming.ClassCase, value: CamelCase } 15 + - { key: readability-identifier-naming.ClassMemberCase, value: lower_case } 16 + - { key: readability-identifier-naming.ClassMemberSuffix, value: '_' } 17 + - { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase } 18 + - { key: readability-identifier-naming.GlobalConstantPrefix, value: 'k' } 19 + - { key: readability-identifier-naming.LocalVariableCase, value: lower_case } 20 + - { key: readability-identifier-naming.MethodCase, value: camelBack } 21 + - { key: readability-identifier-naming.NamespaceCase, value: lower_case } 22 + - { key: readability-identifier-naming.ParameterCase, value: lower_case } 23 + - { key: readability-braces-around-statements.ShortStatementLines, value: 1 }
+3 -1
.github/workflows/ci.yml
··· 42 42 run: sudo apt update 43 43 - name: Install Python 44 44 run: sudo apt install --yes ${{matrix.PYTHON}} 45 - - name: Run tests 45 + - name: Run interpreter tests 46 46 run: ${{matrix.PYTHON}} scrapscript.py test 47 + - name: Run compiler tests 48 + run: ${{matrix.PYTHON}} compiler_tests.py 47 49 build_docker_image: 48 50 runs-on: ubuntu-latest 49 51 permissions:
+24
README.md
··· 53 53 docker run -i -t ghcr.io/tekknolagi/scrapscript:trunk repl 54 54 ``` 55 55 56 + ### The experimental compiler: 57 + 58 + #### Normal ELF 59 + 60 + ```bash 61 + ./compiler.py some.scrap # produces output.c 62 + ./compiler.py some.scrap --compile # produces a.out 63 + ``` 64 + 65 + #### Cosmopolitan 66 + 67 + ```bash 68 + CC=~/Downloads/cosmos/bin/cosmocc ./compiler.py some.scrap --compile # produces a.out 69 + ``` 70 + 71 + #### Wasm 72 + 73 + ```bash 74 + CC=/opt/wasi-sdk/bin/clang \ 75 + CFLAGS=-D_WASI_EMULATED_MMAN \ 76 + LDFLAGS=-lwasi-emulated-mman \ 77 + ./compiler.py some.scrap --compile # produces a.out 78 + ``` 79 + 56 80 ## Running Tests 57 81 58 82 ```bash
+475
compiler.py
··· 1 + #!/usr/bin/env python3 2 + import dataclasses 3 + import io 4 + import itertools 5 + import json 6 + import os 7 + import typing 8 + 9 + from typing import Dict, Optional 10 + 11 + from scrapscript import ( 12 + Access, 13 + Apply, 14 + Assign, 15 + Binop, 16 + BinopKind, 17 + Function, 18 + Hole, 19 + Int, 20 + List, 21 + MatchFunction, 22 + Object, 23 + Record, 24 + Spread, 25 + String, 26 + Var, 27 + Variant, 28 + Where, 29 + free_in, 30 + parse, 31 + tokenize, 32 + ) 33 + 34 + Env = Dict[str, str] 35 + 36 + 37 + fn_counter = itertools.count() 38 + 39 + 40 + @dataclasses.dataclass 41 + class CompiledFunction: 42 + id: int = dataclasses.field(default=0, init=False, compare=False, hash=False) 43 + name: str 44 + params: typing.List[str] 45 + fields: typing.List[str] = dataclasses.field(default_factory=list) 46 + code: typing.List[str] = dataclasses.field(default_factory=list) 47 + 48 + def __post_init__(self) -> None: 49 + self.id = next(fn_counter) 50 + self.code.append("HANDLES();") 51 + for param in self.params: 52 + # The parameters are raw pointers and must be updated on GC 53 + self.code.append(f"GC_PROTECT({param});") 54 + 55 + def decl(self) -> str: 56 + args = ", ".join(f"struct object* {arg}" for arg in self.params) 57 + return f"struct object* {self.name}({args})" 58 + 59 + 60 + class Compiler: 61 + def __init__(self, main_fn: CompiledFunction) -> None: 62 + self.gensym_counter: int = 0 63 + self.functions: typing.List[CompiledFunction] = [main_fn] 64 + self.function: CompiledFunction = main_fn 65 + self.record_keys: Dict[str, int] = {} 66 + self.variant_tags: Dict[str, int] = {} 67 + self.debug: bool = False 68 + self.used_runtime_functions: set[str] = set() 69 + 70 + def record_key(self, key: str) -> int: 71 + result = self.record_keys.get(key) 72 + if result is not None: 73 + return result 74 + result = self.record_keys[key] = len(self.record_keys) 75 + return result 76 + 77 + def variant_tag(self, key: str) -> int: 78 + result = self.variant_tags.get(key) 79 + if result is not None: 80 + return result 81 + result = self.variant_tags[key] = len(self.variant_tags) 82 + return result 83 + 84 + def gensym(self, stem: str = "tmp") -> str: 85 + self.gensym_counter += 1 86 + return f"{stem}_{self.gensym_counter-1}" 87 + 88 + def _emit(self, line: str) -> None: 89 + self.function.code.append(line) 90 + 91 + def _debug(self, line: str) -> None: 92 + if not self.debug: 93 + return 94 + self._emit("#ifndef NDEBUG") 95 + self._emit(line) 96 + self._emit("#endif") 97 + 98 + def _handle(self, name: str, exp: str) -> str: 99 + # TODO(max): Liveness analysis to avoid unnecessary handles 100 + self._emit(f"OBJECT_HANDLE({name}, {exp});") 101 + return name 102 + 103 + def _guard(self, cond: str, msg: Optional[str] = None) -> None: 104 + if msg is None: 105 + msg = f"assertion {cond!s} failed" 106 + self._emit(f"if (!({cond})) {{") 107 + self._emit(f'fprintf(stderr, "{msg}\\n");') 108 + self._emit("abort();") 109 + self._emit("}") 110 + 111 + def _mktemp(self, exp: str) -> str: 112 + temp = self.gensym() 113 + return self._handle(temp, exp) 114 + 115 + def compile_assign(self, env: Env, exp: Assign) -> Env: 116 + assert isinstance(exp.name, Var) 117 + name = exp.name.name 118 + if isinstance(exp.value, Function): 119 + # Named function 120 + value = self.compile_function(env, exp.value, name) 121 + return {**env, name: value} 122 + if isinstance(exp.value, MatchFunction): 123 + # Named match function 124 + value = self.compile_match_function(env, exp.value, name) 125 + return {**env, name: value} 126 + value = self.compile(env, exp.value) 127 + return {**env, name: value} 128 + 129 + def make_compiled_function(self, arg: str, exp: Object, name: Optional[str]) -> CompiledFunction: 130 + assert isinstance(exp, (Function, MatchFunction)) 131 + free = free_in(exp) 132 + if name is not None and name in free: 133 + free.remove(name) 134 + fields = sorted(free) 135 + fn_name = self.gensym(name if name else "fn") # must be globally unique 136 + return CompiledFunction(fn_name, params=["this", arg], fields=fields) 137 + 138 + def compile_function_env(self, fn: CompiledFunction, name: Optional[str]) -> Env: 139 + result = {param: param for param in fn.params} 140 + if name is not None: 141 + result[name] = "this" 142 + for i, field in enumerate(fn.fields): 143 + result[field] = self._mktemp(f"closure_get(this, {i})") 144 + return result 145 + 146 + def compile_function(self, env: Env, exp: Function, name: Optional[str]) -> str: 147 + assert isinstance(exp.arg, Var) 148 + fn = self.make_compiled_function(exp.arg.name, exp, name) 149 + self.functions.append(fn) 150 + cur = self.function 151 + self.function = fn 152 + funcenv = self.compile_function_env(fn, name) 153 + val = self.compile(funcenv, exp.body) 154 + fn.code.append(f"return {val};") 155 + self.function = cur 156 + return self.make_closure(env, fn) 157 + 158 + def try_match(self, env: Env, arg: str, pattern: Object, fallthrough: str) -> Env: 159 + if isinstance(pattern, Int): 160 + self._emit(f"if (!is_num({arg})) {{ goto {fallthrough}; }}") 161 + self._emit(f"if (num_value({arg}) != {pattern.value}) {{ goto {fallthrough}; }}") 162 + return {} 163 + if isinstance(pattern, Variant): 164 + self.variant_tag(pattern.tag) # register it for the big enum 165 + self._emit(f"if (!is_variant({arg})) {{ goto {fallthrough}; }}") 166 + self._emit(f"if (variant_tag({arg}) != Tag_{pattern.tag}) {{ goto {fallthrough}; }}") 167 + return self.try_match(env, self._mktemp(f"variant_value({arg})"), pattern.value, fallthrough) 168 + 169 + if isinstance(pattern, String): 170 + self._emit(f"if (!is_string({arg})) {{ goto {fallthrough}; }}") 171 + value = pattern.value 172 + self._emit( 173 + f"if (!string_equal_cstr_len({arg}, {json.dumps(value)}, {len(value)})) {{ goto {fallthrough}; }}" 174 + ) 175 + return {} 176 + if isinstance(pattern, Var): 177 + return {pattern.name: arg} 178 + if isinstance(pattern, List): 179 + self._emit(f"if (!is_list({arg})) {{ goto {fallthrough}; }}") 180 + updates = {} 181 + the_list = arg 182 + use_spread = False 183 + for i, pattern_item in enumerate(pattern.items): 184 + if isinstance(pattern_item, Spread): 185 + use_spread = True 186 + if pattern_item.name: 187 + updates[pattern_item.name] = the_list 188 + break 189 + # Not enough elements 190 + self._emit(f"if (is_empty_list({the_list})) {{ goto {fallthrough}; }}") 191 + list_item = self._mktemp(f"list_first({the_list})") 192 + updates.update(self.try_match(env, list_item, pattern_item, fallthrough)) 193 + the_list = self._mktemp(f"list_rest({the_list})") 194 + if not use_spread: 195 + # Too many elements 196 + self._emit(f"if (!is_empty_list({the_list})) {{ goto {fallthrough}; }}") 197 + return updates 198 + raise NotImplementedError("try_match", pattern) 199 + 200 + def compile_match_function(self, env: Env, exp: MatchFunction, name: Optional[str]) -> str: 201 + arg = self.gensym() 202 + fn = self.make_compiled_function(arg, exp, name) 203 + self.functions.append(fn) 204 + cur = self.function 205 + self.function = fn 206 + funcenv = self.compile_function_env(fn, name) 207 + for i, case in enumerate(exp.cases): 208 + fallthrough = f"case_{i+1}" if i < len(exp.cases) - 1 else "no_match" 209 + env_updates = self.try_match(funcenv, arg, case.pattern, fallthrough) 210 + case_result = self.compile({**funcenv, **env_updates}, case.body) 211 + self._emit(f"return {case_result};") 212 + self._emit(f"{fallthrough}:;") 213 + # TODO(max): (non-fatal?) exceptions 214 + self._emit(r'fprintf(stderr, "no matching cases\n");') 215 + self._emit("abort();") 216 + self.function = cur 217 + return self.make_closure(env, fn) 218 + 219 + def make_closure(self, env: Env, fn: CompiledFunction) -> str: 220 + name = self._mktemp(f"mkclosure(heap, {fn.name}, {len(fn.fields)})") 221 + for i, field in enumerate(fn.fields): 222 + self._emit(f"closure_set({name}, {i}, {env[field]});") 223 + self._debug("collect(heap);") 224 + return name 225 + 226 + def compile(self, env: Env, exp: Object) -> str: 227 + if isinstance(exp, Int): 228 + # TODO(max): Bignum 229 + self._debug("collect(heap);") 230 + return self._mktemp(f"mknum(heap, {exp.value})") 231 + if isinstance(exp, Hole): 232 + return self._mktemp("hole()") 233 + if isinstance(exp, Variant): 234 + self._debug("collect(heap);") 235 + self.variant_tag(exp.tag) 236 + value = self.compile(env, exp.value) 237 + result = self._mktemp(f"mkvariant(heap, Tag_{exp.tag})") 238 + self._emit(f"variant_set({result}, {value});") 239 + return result 240 + if isinstance(exp, String): 241 + self._debug("collect(heap);") 242 + string_repr = json.dumps(exp.value) 243 + return self._mktemp(f"mkstring(heap, {string_repr}, {len(exp.value)});") 244 + if isinstance(exp, Binop): 245 + left = self.compile(env, exp.left) 246 + right = self.compile(env, exp.right) 247 + if exp.op == BinopKind.ADD: 248 + self._debug("collect(heap);") 249 + self._guard(f"is_num({left})") 250 + self._guard(f"is_num({right})") 251 + return self._mktemp(f"num_add({left}, {right})") 252 + if exp.op == BinopKind.MUL: 253 + self._debug("collect(heap);") 254 + self._guard(f"is_num({left})") 255 + self._guard(f"is_num({right})") 256 + return self._mktemp(f"num_mul({left}, {right})") 257 + if exp.op == BinopKind.SUB: 258 + self._debug("collect(heap);") 259 + self._guard(f"is_num({left})") 260 + self._guard(f"is_num({right})") 261 + return self._mktemp(f"num_sub({left}, {right})") 262 + if exp.op == BinopKind.LIST_CONS: 263 + self._debug("collect(heap);") 264 + return self._mktemp(f"list_cons({left}, {right})") 265 + if exp.op == BinopKind.STRING_CONCAT: 266 + self._debug("collect(heap);") 267 + self._guard(f"is_string({left})") 268 + self._guard(f"is_string({right})") 269 + return self._mktemp(f"string_concat({left}, {right})") 270 + raise NotImplementedError(f"binop {exp.op}") 271 + if isinstance(exp, Where): 272 + assert isinstance(exp.binding, Assign) 273 + res_env = self.compile_assign(env, exp.binding) 274 + new_env = {**env, **res_env} 275 + return self.compile(new_env, exp.body) 276 + if isinstance(exp, Var): 277 + var_value = env.get(exp.name) 278 + if var_value is None: 279 + raise NameError(f"name '{exp.name}' is not defined") 280 + return var_value 281 + if isinstance(exp, Apply): 282 + if isinstance(exp.func, Var): 283 + if exp.func.name == "runtime": 284 + assert isinstance(exp.arg, String) 285 + self.used_runtime_functions.add(exp.arg.value) 286 + return f"builtin_{exp.arg.value}" 287 + callee = self.compile(env, exp.func) 288 + arg = self.compile(env, exp.arg) 289 + return self._mktemp(f"closure_call({callee}, {arg})") 290 + if isinstance(exp, List): 291 + items = [self.compile(env, item) for item in exp.items] 292 + result = self._mktemp("empty_list()") 293 + for item in reversed(items): 294 + result = self._mktemp(f"list_cons({item}, {result})") 295 + self._debug("collect(heap);") 296 + return result 297 + if isinstance(exp, Record): 298 + values: Dict[str, str] = {} 299 + for key, value_exp in exp.data.items(): 300 + values[key] = self.compile(env, value_exp) 301 + result = self._mktemp(f"mkrecord(heap, {len(values)})") 302 + for i, (key, value) in enumerate(values.items()): 303 + key_idx = self.record_key(key) 304 + self._emit( 305 + f"record_set({result}, /*index=*/{i}, (struct record_field){{.key={key_idx}, .value={value}}});" 306 + ) 307 + self._debug("collect(heap);") 308 + return result 309 + if isinstance(exp, Access): 310 + assert isinstance(exp.at, Var), f"only Var access is supported, got {type(exp.at)}" 311 + record = self.compile(env, exp.obj) 312 + key_idx = self.record_key(exp.at.name) 313 + # Check if the record is a record 314 + self._guard(f"is_record({record})", "not a record") 315 + value = self._mktemp(f"record_get({record}, {key_idx})") 316 + self._guard(f"{value} != NULL", f"missing key {exp.at.name!s}") 317 + return value 318 + if isinstance(exp, Function): 319 + # Anonymous function 320 + return self.compile_function(env, exp, name=None) 321 + if isinstance(exp, MatchFunction): 322 + # Anonymous match function 323 + return self.compile_match_function(env, exp, name=None) 324 + raise NotImplementedError(f"exp {type(exp)} {exp}") 325 + 326 + 327 + # TODO(max): Emit constants into the const heap 328 + # The const heap must only point within the const heap 329 + # The const heap will never be scanned 330 + # The const heap can be serialized to disk and mmap'd 331 + 332 + BUILTINS = [ 333 + "print", 334 + "println", 335 + ] 336 + 337 + 338 + def env_get_split(key: str, default: Optional[typing.List[str]] = None) -> typing.List[str]: 339 + import shlex 340 + 341 + cflags = os.environ.get(key) 342 + if cflags: 343 + return shlex.split(cflags) 344 + if default: 345 + return default 346 + return [] 347 + 348 + 349 + def compile_to_string(source: str, memory: int, debug: bool) -> str: 350 + program = parse(tokenize(source)) 351 + 352 + main_fn = CompiledFunction("scrap_main", params=[]) 353 + compiler = Compiler(main_fn) 354 + compiler.debug = debug 355 + result = compiler.compile({}, program) 356 + main_fn.code.append(f"return {result};") 357 + 358 + builtins = [builtin for builtin in BUILTINS if builtin in compiler.used_runtime_functions] 359 + for builtin in builtins: 360 + fn = CompiledFunction(f"builtin_{builtin}_wrapper", params=["this", "arg"]) 361 + fn.code.append(f"return {builtin}(arg);") 362 + compiler.functions.append(fn) 363 + 364 + f = io.StringIO() 365 + print('#include "runtime.c"', file=f) 366 + print("#define OBJECT_HANDLE(name, exp) GC_HANDLE(struct object*, name, exp)", file=f) 367 + # Declare all functions 368 + print("const char* record_keys[] = {", file=f) 369 + for key in compiler.record_keys: 370 + print(f'"{key}",', file=f) 371 + print("};", file=f) 372 + if compiler.variant_tags: 373 + print("const char* variant_names[] = {", file=f) 374 + for key in compiler.variant_tags: 375 + print(f'"{key}",', file=f) 376 + print("};", file=f) 377 + print("enum {", file=f) 378 + for key, idx in compiler.variant_tags.items(): 379 + print(f"Tag_{key} = {idx},", file=f) 380 + print("};", file=f) 381 + for function in compiler.functions: 382 + print(function.decl() + ";", file=f) 383 + for builtin in builtins: 384 + print(f"struct object* builtin_{builtin} = NULL;", file=f) 385 + for function in compiler.functions: 386 + print(f"{function.decl()} {{", file=f) 387 + for line in function.code: 388 + print(line, file=f) 389 + print("}", file=f) 390 + print("int main() {", file=f) 391 + print(f"heap = make_heap({memory});", file=f) 392 + print("HANDLES();", file=f) 393 + for builtin in builtins: 394 + print(f"builtin_{builtin} = mkclosure(heap, builtin_{builtin}_wrapper, 0);", file=f) 395 + print(f"GC_PROTECT(builtin_{builtin});", file=f) 396 + print(f"struct object* result = {main_fn.name}();", file=f) 397 + print("println(result);", file=f) 398 + print("destroy_heap(heap);", file=f) 399 + print("}", file=f) 400 + return f.getvalue() 401 + 402 + 403 + def discover_cflags(cc: typing.List[str], debug: bool = True) -> typing.List[str]: 404 + default_cflags = ["-Wall", "-Wextra", "-fno-strict-aliasing"] 405 + if debug: 406 + default_cflags += ["-O0", "-ggdb"] 407 + else: 408 + default_cflags += ["-O2", "-DNDEBUG"] 409 + if "cosmo" not in cc[0]: 410 + # cosmocc does not support LTO 411 + default_cflags.append("-flto") 412 + return env_get_split("CFLAGS", default_cflags) 413 + 414 + 415 + def compile_to_binary(source: str, memory: int, debug: bool) -> str: 416 + import shlex 417 + import shutil 418 + import subprocess 419 + import sysconfig 420 + import tempfile 421 + 422 + cc = env_get_split("CC", shlex.split(sysconfig.get_config_var("CC"))) 423 + cflags = discover_cflags(cc, debug) 424 + c_code = compile_to_string(source, memory, debug) 425 + with tempfile.NamedTemporaryFile(mode="w", suffix=".c", delete=False) as c_file: 426 + outdir = os.path.dirname(c_file.name) 427 + shutil.copy("runtime.c", outdir) 428 + c_file.write(c_code) 429 + with tempfile.NamedTemporaryFile(mode="w", suffix=".out", delete=False) as out_file: 430 + subprocess.run([*cc, *cflags, "-o", out_file.name, c_file.name], check=True) 431 + return out_file.name 432 + 433 + 434 + def main() -> None: 435 + import argparse 436 + 437 + parser = argparse.ArgumentParser(prog="scrapscript") 438 + parser.add_argument("file") 439 + parser.add_argument("-o", "--output", default="output.c") 440 + parser.add_argument("--format", action="store_true") 441 + parser.add_argument("--compile", action="store_true") 442 + parser.add_argument("--memory", type=int, default=1024) 443 + parser.add_argument("--run", action="store_true") 444 + parser.add_argument("--debug", action="store_true", default=False) 445 + args = parser.parse_args() 446 + 447 + with open(args.file, "r") as f: 448 + source = f.read() 449 + 450 + c_program = compile_to_string(source, args.memory, args.debug) 451 + 452 + with open(args.output, "w") as f: 453 + f.write(c_program) 454 + 455 + if args.format: 456 + import subprocess 457 + 458 + subprocess.run(["clang-format-15", "-i", args.output], check=True) 459 + 460 + if args.compile: 461 + import subprocess 462 + 463 + cc = env_get_split("CC", ["clang"]) 464 + cflags = discover_cflags(cc, args.debug) 465 + ldflags = env_get_split("LDFLAGS") 466 + subprocess.run([*cc, "-o", "a.out", *cflags, args.output, *ldflags], check=True) 467 + 468 + if args.run: 469 + import subprocess 470 + 471 + subprocess.run(["sh", "-c", "./a.out"], check=True) 472 + 473 + 474 + if __name__ == "__main__": 475 + main()
+58
compiler_tests.py
··· 1 + import unittest 2 + import subprocess 3 + 4 + from compiler import compile_to_binary 5 + 6 + 7 + class CompilerEndToEndTests(unittest.TestCase): 8 + def _run(self, code: str) -> str: 9 + binary = compile_to_binary(code, memory=1024, debug=False) 10 + result = subprocess.run(binary, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True) 11 + return result.stdout 12 + 13 + def test_int(self) -> None: 14 + self.assertEqual(self._run("1"), "1\n") 15 + 16 + def test_add(self) -> None: 17 + self.assertEqual(self._run("1 + 2"), "3\n") 18 + 19 + def test_sub(self) -> None: 20 + self.assertEqual(self._run("1 - 2"), "-1\n") 21 + 22 + def test_mul(self) -> None: 23 + self.assertEqual(self._run("2 * 3"), "6\n") 24 + 25 + def test_list(self) -> None: 26 + self.assertEqual(self._run("[1, 2, 3]"), "[1, 2, 3]\n") 27 + 28 + def test_var(self) -> None: 29 + self.assertEqual(self._run("a . a = 1"), "1\n") 30 + 31 + def test_record(self) -> None: 32 + self.assertEqual(self._run("{a = 1, b = 2}"), "{a = 1, b = 2}\n") 33 + 34 + def test_record_access(self) -> None: 35 + self.assertEqual(self._run("rec@a . rec = {a = 1, b = 2}"), "1\n") 36 + 37 + def test_hole(self) -> None: 38 + self.assertEqual(self._run("()"), "()\n") 39 + 40 + def test_variant(self) -> None: 41 + self.assertEqual(self._run("# foo 123"), "#foo 123\n") 42 + 43 + def test_function(self) -> None: 44 + self.assertEqual(self._run("f 1 . f = x -> x + 1"), "2\n") 45 + 46 + def test_match_int(self) -> None: 47 + self.assertEqual(self._run("f 3 . f = | 1 -> 2 | 3 -> 4"), "4\n") 48 + 49 + def test_match_list(self) -> None: 50 + self.assertEqual(self._run("f [4, 5] . f = | [1, 2] -> 3 | [4, 5] -> 6"), "6\n") 51 + 52 + @unittest.skip("TODO") 53 + def test_match_record(self) -> None: 54 + self.assertEqual(self._run("f {a = 4, b = 5} . f = | {a = 1, b = 2} -> 3 | {a = 4, b = 5} -> 6"), "6\n") 55 + 56 + 57 + if __name__ == "__main__": 58 + unittest.main()
+8 -1
hooks/pre-commit
··· 13 13 # cat always has error code 0 14 14 # ignore deleted files (can't be formatted) 15 15 filenames=$(git diff --cached --name-only --diff-filter=d $against | grep '\.py$' | cat) 16 + # If changed files include scrapscript.py, run tests 17 + if echo $filenames | grep scrapscript.py 18 + then 19 + ./scrapscript.py test 20 + fi 21 + 22 + # If there are any other changed Python files, make sure they lint 16 23 if [ -n "$filenames" ] 17 24 then 18 - ./scrapscript.py test 19 25 ruff format --check $filenames 26 + ruff check $filenames 20 27 mypy --strict $filenames 21 28 fi
+721
runtime.c
··· 1 + #include <assert.h> 2 + #include <stdbool.h> 3 + #include <stddef.h> 4 + #include <stdint.h> 5 + #include <stdio.h> 6 + #include <stdlib.h> 7 + #include <string.h> 8 + #include <sys/mman.h> 9 + #include <unistd.h> 10 + 11 + #define ALWAYS_INLINE inline __attribute__((always_inline)) 12 + #define NEVER_INLINE __attribute__((noinline)) 13 + 14 + const int kPointerSize = sizeof(void*); 15 + typedef intptr_t word; 16 + typedef uintptr_t uword; 17 + 18 + // Garbage collector core by Andy Wingo <wingo@pobox.com>. 19 + 20 + struct gc_obj { 21 + uintptr_t tag; // low bit is 0 if forwarding ptr 22 + uintptr_t payload[0]; 23 + }; 24 + 25 + // The low bit of the pointer is 1 if it's a heap object and 0 if it's an 26 + // immediate integer 27 + struct object {}; 28 + 29 + // Up to the five least significant bits are used to tag the object's layout. 30 + // The three low bits make up a primary tag, used to differentiate gc_obj 31 + // from immediate objects. All even tags map to SmallInt, which is 32 + // optimized by checking only the lowest bit for parity. 33 + static const uword kSmallIntTagBits = 1; 34 + static const uword kPrimaryTagBits = 3; 35 + static const uword kImmediateTagBits = 5; 36 + static const uword kSmallIntTagMask = (1 << kSmallIntTagBits) - 1; 37 + static const uword kPrimaryTagMask = (1 << kPrimaryTagBits) - 1; 38 + static const uword kImmediateTagMask = (1 << kImmediateTagBits) - 1; 39 + 40 + const int kWordSize = sizeof(word); 41 + static const word kMaxSmallStringLength = kWordSize - 1; 42 + const int kBitsPerByte = 8; 43 + 44 + static const uword kSmallIntTag = 0; // 0b****0 45 + static const uword kHeapObjectTag = 1; // 0b**001 46 + static const uword kEmptyListTag = 5; // 0b00101 47 + static const uword kHoleTag = 7; // 0b00111 48 + static const uword kSmallStringTag = 13; // 0b01101 49 + 50 + bool is_small_int(struct object* obj) { 51 + return (((uword)obj) & kSmallIntTagMask) == kSmallIntTag; 52 + } 53 + bool is_immediate_not_small_int(struct object* obj) { 54 + return (((uword)obj) & (kPrimaryTagMask & ~kSmallIntTagMask)) != 0; 55 + } 56 + bool is_heap_object(struct object* obj) { 57 + return (((uword)obj) & kPrimaryTagMask) == kHeapObjectTag; 58 + } 59 + struct object* empty_list() { return (struct object*)kEmptyListTag; } 60 + bool is_empty_list(struct object* obj) { return obj == empty_list(); } 61 + struct object* hole() { return (struct object*)kHoleTag; } 62 + bool is_hole(struct object* obj) { 63 + return (((uword)obj) & kImmediateTagMask) == kHoleTag; 64 + } 65 + static ALWAYS_INLINE bool is_small_string(struct object* obj) { 66 + return (((uword)obj) & kImmediateTagMask) == kSmallStringTag; 67 + } 68 + static ALWAYS_INLINE uword small_string_length(struct object* obj) { 69 + assert(is_small_string(obj)); 70 + return (((uword)obj) >> kImmediateTagBits) & kMaxSmallStringLength; 71 + } 72 + static ALWAYS_INLINE struct object* mksmallstring(const char* data, 73 + uword length) { 74 + assert(length >= 0); 75 + assert(length <= kMaxSmallStringLength); 76 + uword result = 0; 77 + for (word i = length - 1; i >= 0; i--) { 78 + result = (result << kBitsPerByte) | data[i]; 79 + } 80 + struct object* result_obj = 81 + (struct object*)((result << kBitsPerByte) | 82 + (length << kImmediateTagBits) | kSmallStringTag); 83 + assert(!is_heap_object(result_obj)); 84 + assert(is_small_string(result_obj)); 85 + assert(small_string_length(result_obj) == length); 86 + return result_obj; 87 + } 88 + static ALWAYS_INLINE char small_string_at(struct object* obj, uword index) { 89 + assert(is_small_string(obj)); 90 + assert(index >= 0); 91 + assert(index < small_string_length(obj)); 92 + // +1 for (length | tag) byte 93 + return ((uword)obj >> ((index + 1) * kBitsPerByte)) & 0xFF; 94 + } 95 + struct gc_obj* as_heap_object(struct object* obj) { 96 + assert(is_heap_object(obj)); 97 + assert(kHeapObjectTag == 1); 98 + return (struct gc_obj*)((uword)obj - 1); 99 + } 100 + 101 + static const uintptr_t kNotForwardedBit = 1ULL; 102 + int is_forwarded(struct gc_obj* obj) { 103 + return (obj->tag & kNotForwardedBit) == 0; 104 + } 105 + struct gc_obj* forwarded(struct gc_obj* obj) { 106 + assert(is_forwarded(obj)); 107 + return (struct gc_obj*)obj->tag; 108 + } 109 + void forward(struct gc_obj* from, struct gc_obj* to) { 110 + assert(!is_forwarded(from)); 111 + assert((((uintptr_t)to) & kNotForwardedBit) == 0); 112 + from->tag = (uintptr_t)to; 113 + } 114 + 115 + struct gc_heap; 116 + 117 + typedef void (*VisitFn)(struct object**, struct gc_heap*); 118 + 119 + // To implement by the user: 120 + size_t heap_object_size(struct gc_obj* obj); 121 + size_t trace_heap_object(struct gc_obj* obj, struct gc_heap* heap, 122 + VisitFn visit); 123 + void trace_roots(struct gc_heap* heap, VisitFn visit); 124 + 125 + struct gc_heap { 126 + uintptr_t hp; 127 + uintptr_t limit; 128 + uintptr_t from_space; 129 + uintptr_t to_space; 130 + size_t size; 131 + }; 132 + 133 + static uintptr_t align(uintptr_t val, uintptr_t alignment) { 134 + return (val + alignment - 1) & ~(alignment - 1); 135 + } 136 + static uintptr_t align_size(uintptr_t size) { 137 + return align(size, sizeof(uintptr_t)); 138 + } 139 + 140 + static struct gc_heap* make_heap(size_t size) { 141 + size = align(size, getpagesize()); 142 + struct gc_heap* heap = malloc(sizeof(struct gc_heap)); 143 + void* mem = mmap(NULL, size, PROT_READ | PROT_WRITE, 144 + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 145 + heap->to_space = heap->hp = (uintptr_t)mem; 146 + heap->from_space = heap->limit = heap->hp + size / 2; 147 + heap->size = size; 148 + return heap; 149 + } 150 + 151 + void destroy_heap(struct gc_heap* heap) { 152 + munmap((void*)heap->to_space, heap->size); 153 + free(heap); 154 + } 155 + 156 + struct gc_obj* copy(struct gc_heap* heap, struct gc_obj* obj) { 157 + size_t size = heap_object_size(obj); 158 + struct gc_obj* new_obj = (struct gc_obj*)heap->hp; 159 + memcpy(new_obj, obj, size); 160 + forward(obj, new_obj); 161 + heap->hp += align_size(size); 162 + return new_obj; 163 + } 164 + 165 + void flip(struct gc_heap* heap) { 166 + heap->hp = heap->from_space; 167 + heap->from_space = heap->to_space; 168 + heap->to_space = heap->hp; 169 + heap->limit = heap->hp + heap->size / 2; 170 + } 171 + 172 + struct object* heap_tag(uintptr_t addr) { 173 + return (struct object*)(addr | (uword)1ULL); 174 + } 175 + 176 + void visit_field(struct object** pointer, struct gc_heap* heap) { 177 + if (!is_heap_object(*pointer)) { 178 + return; 179 + } 180 + struct gc_obj* from = as_heap_object(*pointer); 181 + struct gc_obj* to = is_forwarded(from) ? forwarded(from) : copy(heap, from); 182 + *pointer = heap_tag((uintptr_t)to); 183 + } 184 + 185 + void collect(struct gc_heap* heap) { 186 + flip(heap); 187 + uintptr_t scan = heap->hp; 188 + trace_roots(heap, visit_field); 189 + while (scan < heap->hp) { 190 + struct gc_obj* obj = (struct gc_obj*)scan; 191 + scan += align_size(trace_heap_object(obj, heap, visit_field)); 192 + } 193 + #ifndef NDEBUG 194 + // Zero out the rest of the heap for debugging 195 + memset((void*)scan, 0, heap->limit - scan); 196 + #endif 197 + } 198 + 199 + #define LIKELY(x) __builtin_expect(!!(x), 1) 200 + #define UNLIKELY(x) __builtin_expect(!!(x), 0) 201 + #define ALLOCATOR __attribute__((__malloc__)) 202 + 203 + static NEVER_INLINE ALLOCATOR struct object* allocate_slow_path( 204 + struct gc_heap* heap, size_t size) { 205 + // size is already aligned 206 + collect(heap); 207 + if (UNLIKELY(heap->limit - heap->hp < size)) { 208 + fprintf(stderr, "out of memory\n"); 209 + abort(); 210 + } 211 + uintptr_t addr = heap->hp; 212 + uintptr_t new_hp = align_size(addr + size); 213 + heap->hp = new_hp; 214 + return heap_tag(addr); 215 + } 216 + 217 + static ALWAYS_INLINE ALLOCATOR struct object* allocate(struct gc_heap* heap, 218 + size_t size) { 219 + uintptr_t addr = heap->hp; 220 + uintptr_t new_hp = align_size(addr + size); 221 + if (UNLIKELY(heap->limit < new_hp)) { 222 + return allocate_slow_path(heap, size); 223 + } 224 + heap->hp = new_hp; 225 + return heap_tag(addr); 226 + } 227 + 228 + // Application 229 + 230 + #define FOREACH_TAG(TAG) \ 231 + TAG(TAG_LIST) \ 232 + TAG(TAG_CLOSURE) \ 233 + TAG(TAG_RECORD) \ 234 + TAG(TAG_STRING) \ 235 + TAG(TAG_VARIANT) 236 + 237 + enum { 238 + // All odd becase of the kNotForwardedBit 239 + #define ENUM_TAG(TAG) TAG = __COUNTER__ * 2 + 1, 240 + FOREACH_TAG(ENUM_TAG) 241 + #undef ENUM_TAG 242 + }; 243 + 244 + struct list { 245 + struct gc_obj HEAD; 246 + struct object* first; 247 + struct object* rest; 248 + }; 249 + 250 + typedef struct object* (*ClosureFn)(struct object*, struct object*); 251 + 252 + // TODO(max): Figure out if there is a way to do a PyObject_HEAD version of 253 + // this where each closure actually has its own struct with named members 254 + struct closure { 255 + struct gc_obj HEAD; 256 + ClosureFn fn; 257 + size_t size; 258 + struct object* env[]; 259 + }; 260 + 261 + struct record_field { 262 + size_t key; 263 + struct object* value; 264 + }; 265 + 266 + struct record { 267 + struct gc_obj HEAD; 268 + size_t size; 269 + struct record_field fields[]; 270 + }; 271 + 272 + struct heap_string { 273 + struct gc_obj HEAD; 274 + size_t size; 275 + char data[]; 276 + }; 277 + 278 + struct variant { 279 + struct gc_obj HEAD; 280 + size_t tag; 281 + struct object* value; 282 + }; 283 + 284 + size_t variable_size(size_t base, size_t count) { 285 + return base + count * kPointerSize; 286 + } 287 + 288 + size_t record_size(size_t count) { 289 + return sizeof(struct record) + count * sizeof(struct record_field); 290 + } 291 + 292 + size_t heap_string_size(size_t count) { 293 + return sizeof(struct heap_string) + count; 294 + } 295 + 296 + size_t heap_object_size(struct gc_obj* obj) { 297 + switch (obj->tag) { 298 + case TAG_LIST: 299 + return sizeof(struct list); 300 + case TAG_CLOSURE: 301 + return variable_size(sizeof(struct closure), 302 + ((struct closure*)obj)->size); 303 + case TAG_RECORD: 304 + return record_size(((struct record*)obj)->size); 305 + case TAG_STRING: 306 + return heap_string_size(((struct heap_string*)obj)->size); 307 + case TAG_VARIANT: 308 + return sizeof(struct variant); 309 + default: 310 + fprintf(stderr, "unknown tag: %lu\n", obj->tag); 311 + abort(); 312 + } 313 + } 314 + 315 + size_t trace_heap_object(struct gc_obj* obj, struct gc_heap* heap, 316 + VisitFn visit) { 317 + switch (obj->tag) { 318 + case TAG_LIST: 319 + visit(&((struct list*)obj)->first, heap); 320 + visit(&((struct list*)obj)->rest, heap); 321 + break; 322 + case TAG_CLOSURE: 323 + for (size_t i = 0; i < ((struct closure*)obj)->size; i++) { 324 + visit(&((struct closure*)obj)->env[i], heap); 325 + } 326 + break; 327 + case TAG_RECORD: 328 + for (size_t i = 0; i < ((struct record*)obj)->size; i++) { 329 + visit(&((struct record*)obj)->fields[i].value, heap); 330 + } 331 + break; 332 + case TAG_STRING: 333 + break; 334 + case TAG_VARIANT: 335 + visit(&((struct variant*)obj)->value, heap); 336 + break; 337 + default: 338 + fprintf(stderr, "unknown tag: %lu\n", obj->tag); 339 + abort(); 340 + } 341 + return heap_object_size(obj); 342 + } 343 + 344 + const int kBitsPerPointer = kBitsPerByte * kWordSize; 345 + static const word kSmallIntBits = kBitsPerPointer - kSmallIntTagBits; 346 + static const word kSmallIntMinValue = -(((word)1) << (kSmallIntBits - 1)); 347 + static const word kSmallIntMaxValue = (((word)1) << (kSmallIntBits - 1)) - 1; 348 + 349 + bool smallint_is_valid(word value) { 350 + return (value >= kSmallIntMinValue) && (value <= kSmallIntMaxValue); 351 + } 352 + 353 + struct object* mknum(struct gc_heap* heap, word value) { 354 + (void)heap; 355 + assert(smallint_is_valid(value)); 356 + return (struct object*)(((uword)value << kSmallIntTagBits)); 357 + } 358 + 359 + bool is_num(struct object* obj) { return is_small_int(obj); } 360 + 361 + word num_value(struct object* obj) { 362 + assert(is_num(obj)); 363 + return ((word)obj) >> 1; // sign extend 364 + } 365 + 366 + bool is_list(struct object* obj) { 367 + if (is_empty_list(obj)) { 368 + return true; 369 + } 370 + return is_heap_object(obj) && as_heap_object(obj)->tag == TAG_LIST; 371 + } 372 + 373 + struct list* as_list(struct object* obj) { 374 + assert(is_list(obj)); 375 + return (struct list*)as_heap_object(obj); 376 + } 377 + 378 + struct object* list_first(struct object* obj) { 379 + assert(!is_empty_list(obj)); 380 + return as_list(obj)->first; 381 + } 382 + 383 + struct object* list_rest(struct object* list) { 384 + assert(!is_empty_list(list)); 385 + return as_list(list)->rest; 386 + } 387 + 388 + struct object* mklist(struct gc_heap* heap) { 389 + struct object* result = allocate(heap, sizeof(struct list)); 390 + as_heap_object(result)->tag = TAG_LIST; 391 + as_list(result)->first = empty_list(); 392 + as_list(result)->rest = empty_list(); 393 + return result; 394 + } 395 + 396 + bool is_closure(struct object* obj) { 397 + return is_heap_object(obj) && as_heap_object(obj)->tag == TAG_CLOSURE; 398 + } 399 + 400 + struct closure* as_closure(struct object* obj) { 401 + assert(is_closure(obj)); 402 + return (struct closure*)as_heap_object(obj); 403 + } 404 + 405 + struct object* mkclosure(struct gc_heap* heap, ClosureFn fn, size_t size) { 406 + struct object* result = 407 + allocate(heap, variable_size(sizeof(struct closure), size)); 408 + as_heap_object(result)->tag = TAG_CLOSURE; 409 + as_closure(result)->fn = fn; 410 + as_closure(result)->size = size; 411 + // Assumes the items will be filled in immediately after calling mkclosure so 412 + // they are not initialized 413 + return result; 414 + } 415 + 416 + ClosureFn closure_fn(struct object* obj) { return as_closure(obj)->fn; } 417 + 418 + void closure_set(struct object* closure, size_t i, struct object* item) { 419 + struct closure* c = as_closure(closure); 420 + assert(i < c->size); 421 + c->env[i] = item; 422 + } 423 + 424 + struct object* closure_get(struct object* closure, size_t i) { 425 + struct closure* c = as_closure(closure); 426 + assert(i < c->size); 427 + return c->env[i]; 428 + } 429 + 430 + struct object* closure_call(struct object* closure, struct object* arg) { 431 + ClosureFn fn = closure_fn(closure); 432 + return fn(closure, arg); 433 + } 434 + 435 + bool is_record(struct object* obj) { 436 + return is_heap_object(obj) && as_heap_object(obj)->tag == TAG_RECORD; 437 + } 438 + 439 + struct record* as_record(struct object* obj) { 440 + assert(is_record(obj)); 441 + return (struct record*)as_heap_object(obj); 442 + } 443 + 444 + struct object* mkrecord(struct gc_heap* heap, size_t size) { 445 + // size is the number of fields, each of which has an index and a value 446 + // (object) 447 + struct object* result = allocate(heap, record_size(size)); 448 + as_heap_object(result)->tag = TAG_RECORD; 449 + as_record(result)->size = size; 450 + // Assumes the items will be filled in immediately after calling mkrecord so 451 + // they are not initialized 452 + return result; 453 + } 454 + 455 + void record_set(struct object* record, size_t index, 456 + struct record_field field) { 457 + struct record* r = as_record(record); 458 + assert(index < r->size); 459 + r->fields[index] = field; 460 + } 461 + 462 + struct object* record_get(struct object* record, size_t key) { 463 + struct record* r = as_record(record); 464 + struct record_field* fields = r->fields; 465 + for (size_t i = 0; i < r->size; i++) { 466 + struct record_field field = fields[i]; 467 + if (field.key == key) { 468 + return field.value; 469 + } 470 + } 471 + return NULL; 472 + } 473 + 474 + bool is_string(struct object* obj) { 475 + if (is_small_string(obj)) { 476 + return true; 477 + } 478 + return is_heap_object(obj) && as_heap_object(obj)->tag == TAG_STRING; 479 + } 480 + 481 + struct heap_string* as_heap_string(struct object* obj) { 482 + assert(is_string(obj)); 483 + return (struct heap_string*)as_heap_object(obj); 484 + } 485 + 486 + struct object* mkstring_uninit_private(struct gc_heap* heap, size_t size) { 487 + assert(size > kMaxSmallStringLength); // can't fill in small string later 488 + struct object* result = allocate(heap, heap_string_size(size)); 489 + as_heap_object(result)->tag = TAG_STRING; 490 + as_heap_string(result)->size = size; 491 + return result; 492 + } 493 + 494 + struct object* mkstring(struct gc_heap* heap, const char* data, uword length) { 495 + if (length <= kMaxSmallStringLength) { 496 + return mksmallstring(data, length); 497 + } 498 + struct object* result = mkstring_uninit_private(heap, length); 499 + memcpy(as_heap_string(result)->data, data, length); 500 + return result; 501 + } 502 + 503 + static ALWAYS_INLINE uword string_length(struct object* obj) { 504 + if (is_small_string(obj)) { 505 + return small_string_length(obj); 506 + } 507 + return as_heap_string(obj)->size; 508 + } 509 + 510 + char string_at(struct object* obj, uword index) { 511 + if (is_small_string(obj)) { 512 + return small_string_at(obj, index); 513 + } 514 + return as_heap_string(obj)->data[index]; 515 + } 516 + 517 + bool is_variant(struct object* obj) { 518 + return is_heap_object(obj) && as_heap_object(obj)->tag == TAG_VARIANT; 519 + } 520 + 521 + struct variant* as_variant(struct object* obj) { 522 + assert(is_variant(obj)); 523 + return (struct variant*)as_heap_object(obj); 524 + } 525 + 526 + struct object* mkvariant(struct gc_heap* heap, size_t tag) { 527 + struct object* result = allocate(heap, sizeof(struct variant)); 528 + as_heap_object(result)->tag = TAG_VARIANT; 529 + as_variant(result)->tag = tag; 530 + return result; 531 + } 532 + 533 + size_t variant_tag(struct object* obj) { return as_variant(obj)->tag; } 534 + 535 + struct object* variant_value(struct object* obj) { 536 + return as_variant(obj)->value; 537 + } 538 + 539 + void variant_set(struct object* variant, struct object* value) { 540 + as_variant(variant)->value = value; 541 + } 542 + 543 + #define MAX_HANDLES 20 544 + 545 + struct handles { 546 + // TODO(max): Figure out how to make this a flat linked list with whole 547 + // chunks popped off at function return 548 + struct object** stack[MAX_HANDLES]; 549 + size_t stack_pointer; 550 + struct handles* next; 551 + }; 552 + 553 + static struct handles* handles = NULL; 554 + 555 + void pop_handles(void* local_handles) { 556 + (void)local_handles; 557 + handles = handles->next; 558 + } 559 + 560 + #define HANDLES() \ 561 + struct handles local_handles \ 562 + __attribute__((__cleanup__(pop_handles))) = {.next = handles}; \ 563 + handles = &local_handles 564 + #define GC_PROTECT(x) \ 565 + assert(local_handles.stack_pointer < MAX_HANDLES); \ 566 + local_handles.stack[local_handles.stack_pointer++] = (struct object**)(&x) 567 + #define END_HANDLES() handles = local_handles.next 568 + #define GC_HANDLE(type, name, val) \ 569 + type name = val; \ 570 + GC_PROTECT(name) 571 + 572 + void trace_roots(struct gc_heap* heap, VisitFn visit) { 573 + for (struct handles* h = handles; h; h = h->next) { 574 + for (size_t i = 0; i < h->stack_pointer; i++) { 575 + visit(h->stack[i], heap); 576 + } 577 + } 578 + } 579 + 580 + static struct gc_heap* heap = NULL; 581 + 582 + struct object* num_add(struct object* a, struct object* b) { 583 + // NB: doesn't use pointers after allocating 584 + return mknum(heap, num_value(a) + num_value(b)); 585 + } 586 + 587 + struct object* num_sub(struct object* a, struct object* b) { 588 + // NB: doesn't use pointers after allocating 589 + return mknum(heap, num_value(a) - num_value(b)); 590 + } 591 + 592 + struct object* num_mul(struct object* a, struct object* b) { 593 + // NB: doesn't use pointers after allocating 594 + return mknum(heap, num_value(a) * num_value(b)); 595 + } 596 + 597 + struct object* list_cons(struct object* item, struct object* list) { 598 + HANDLES(); 599 + GC_PROTECT(item); 600 + GC_PROTECT(list); 601 + struct object* result = mklist(heap); 602 + as_list(result)->first = item; 603 + as_list(result)->rest = list; 604 + return result; 605 + } 606 + 607 + struct object* heap_string_concat(struct object* a, struct object* b) { 608 + uword a_size = string_length(a); 609 + uword b_size = string_length(b); 610 + assert(a_size + b_size > kMaxSmallStringLength); 611 + HANDLES(); 612 + GC_PROTECT(a); 613 + GC_PROTECT(b); 614 + struct object* result = mkstring_uninit_private(heap, a_size + b_size); 615 + for (uword i = 0; i < a_size; i++) { 616 + as_heap_string(result)->data[i] = string_at(a, i); 617 + } 618 + for (uword i = 0; i < b_size; i++) { 619 + as_heap_string(result)->data[a_size + i] = string_at(b, i); 620 + } 621 + return result; 622 + } 623 + 624 + static ALWAYS_INLINE struct object* small_string_concat(struct object* a, 625 + struct object* b) { 626 + uword a_size = string_length(a); 627 + uword b_size = string_length(b); 628 + assert(a_size + b_size <= kMaxSmallStringLength); 629 + char data[kMaxSmallStringLength]; 630 + for (uword i = 0; i < a_size; i++) { 631 + data[i] = small_string_at(a, i); 632 + } 633 + for (uword i = 0; i < b_size; i++) { 634 + data[a_size + i] = small_string_at(b, i); 635 + } 636 + return mksmallstring(data, a_size + b_size); 637 + } 638 + 639 + struct object* string_concat(struct object* a, struct object* b) { 640 + uword a_size = string_length(a); 641 + if (a_size == 0) { 642 + return b; 643 + } 644 + uword b_size = string_length(b); 645 + if (b_size == 0) { 646 + return a; 647 + } 648 + if (a_size + b_size <= kMaxSmallStringLength) { 649 + return small_string_concat(a, b); 650 + } 651 + return heap_string_concat(a, b); 652 + } 653 + 654 + bool string_equal_cstr_len(struct object* string, const char* cstr, uword len) { 655 + assert(is_string(string)); 656 + if (string_length(string) != len) { 657 + return false; 658 + } 659 + for (uword i = 0; i < len; i++) { 660 + if (string_at(string, i) != cstr[i]) { 661 + return false; 662 + } 663 + } 664 + return true; 665 + } 666 + 667 + const char* record_keys[]; 668 + const char* variant_names[]; 669 + 670 + struct object* print(struct object* obj) { 671 + if (is_num(obj)) { 672 + printf("%ld", num_value(obj)); 673 + } else if (is_list(obj)) { 674 + putchar('['); 675 + while (!is_empty_list(obj)) { 676 + print(list_first(obj)); 677 + obj = list_rest(obj); 678 + if (!is_empty_list(obj)) { 679 + putchar(','); 680 + putchar(' '); 681 + } 682 + } 683 + putchar(']'); 684 + } else if (is_record(obj)) { 685 + struct record* record = as_record(obj); 686 + putchar('{'); 687 + for (size_t i = 0; i < record->size; i++) { 688 + printf("%s = ", record_keys[record->fields[i].key]); 689 + print(record->fields[i].value); 690 + if (i + 1 < record->size) { 691 + fputs(", ", stdout); 692 + } 693 + } 694 + putchar('}'); 695 + } else if (is_closure(obj)) { 696 + fputs("<closure>", stdout); 697 + } else if (is_string(obj)) { 698 + putchar('"'); 699 + for (uword i = 0; i < string_length(obj); i++) { 700 + putchar(string_at(obj, i)); 701 + } 702 + putchar('"'); 703 + } else if (is_variant(obj)) { 704 + putchar('#'); 705 + printf("%s ", variant_names[variant_tag(obj)]); 706 + print(variant_value(obj)); 707 + } else if (is_hole(obj)) { 708 + fputs("()", stdout); 709 + } else { 710 + assert(is_heap_object(obj)); 711 + fprintf(stderr, "unknown tag: %lu\n", as_heap_object(obj)->tag); 712 + abort(); 713 + } 714 + return obj; 715 + } 716 + 717 + struct object* println(struct object* obj) { 718 + print(obj); 719 + putchar('\n'); 720 + return obj; 721 + }