Fast and robust atproto CAR file processing in rust

heed why not

it's fast but it's being allowed to use a lot of mem

Changed files
+633 -25
examples
disk-read-file
src
+459
Cargo.lock
··· 159 159 version = "2.9.4" 160 160 source = "registry+https://github.com/rust-lang/crates.io-index" 161 161 checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" 162 + dependencies = [ 163 + "serde", 164 + ] 162 165 163 166 [[package]] 164 167 name = "bumpalo" 165 168 version = "3.19.0" 166 169 source = "registry+https://github.com/rust-lang/crates.io-index" 167 170 checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" 171 + 172 + [[package]] 173 + name = "byteorder" 174 + version = "1.5.0" 175 + source = "registry+https://github.com/rust-lang/crates.io-index" 176 + checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 168 177 169 178 [[package]] 170 179 name = "bytes" ··· 185 194 checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4" 186 195 dependencies = [ 187 196 "serde", 197 + ] 198 + 199 + [[package]] 200 + name = "cc" 201 + version = "1.2.44" 202 + source = "registry+https://github.com/rust-lang/crates.io-index" 203 + checksum = "37521ac7aabe3d13122dc382493e20c9416f299d2ccd5b3a5340a2570cdeb0f3" 204 + dependencies = [ 205 + "find-msvc-tools", 206 + "shlex", 188 207 ] 189 208 190 209 [[package]] ··· 349 368 ] 350 369 351 370 [[package]] 371 + name = "crossbeam-queue" 372 + version = "0.3.12" 373 + source = "registry+https://github.com/rust-lang/crates.io-index" 374 + checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" 375 + dependencies = [ 376 + "crossbeam-utils", 377 + ] 378 + 379 + [[package]] 352 380 name = "crossbeam-utils" 353 381 version = "0.8.21" 354 382 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 387 415 ] 388 416 389 417 [[package]] 418 + name = "displaydoc" 419 + version = "0.2.5" 420 + source = "registry+https://github.com/rust-lang/crates.io-index" 421 + checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" 422 + dependencies = [ 423 + "proc-macro2", 424 + "quote", 425 + "syn 2.0.106", 426 + ] 427 + 428 + [[package]] 429 + name = "doxygen-rs" 430 + version = "0.4.2" 431 + source = "registry+https://github.com/rust-lang/crates.io-index" 432 + checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9" 433 + dependencies = [ 434 + "phf", 435 + ] 436 + 437 + [[package]] 390 438 name = "either" 391 439 version = "1.15.0" 392 440 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 444 492 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" 445 493 446 494 [[package]] 495 + name = "find-msvc-tools" 496 + version = "0.1.4" 497 + source = "registry+https://github.com/rust-lang/crates.io-index" 498 + checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" 499 + 500 + [[package]] 447 501 name = "foldhash" 448 502 version = "0.1.5" 449 503 source = "registry+https://github.com/rust-lang/crates.io-index" 450 504 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" 451 505 452 506 [[package]] 507 + name = "form_urlencoded" 508 + version = "1.2.2" 509 + source = "registry+https://github.com/rust-lang/crates.io-index" 510 + checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" 511 + dependencies = [ 512 + "percent-encoding", 513 + ] 514 + 515 + [[package]] 453 516 name = "futures" 454 517 version = "0.3.31" 455 518 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 603 666 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 604 667 605 668 [[package]] 669 + name = "heed" 670 + version = "0.22.0" 671 + source = "registry+https://github.com/rust-lang/crates.io-index" 672 + checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393" 673 + dependencies = [ 674 + "bitflags", 675 + "byteorder", 676 + "heed-traits", 677 + "heed-types", 678 + "libc", 679 + "lmdb-master-sys", 680 + "once_cell", 681 + "page_size", 682 + "serde", 683 + "synchronoise", 684 + "url", 685 + ] 686 + 687 + [[package]] 688 + name = "heed-traits" 689 + version = "0.20.0" 690 + source = "registry+https://github.com/rust-lang/crates.io-index" 691 + checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff" 692 + 693 + [[package]] 694 + name = "heed-types" 695 + version = "0.21.0" 696 + source = "registry+https://github.com/rust-lang/crates.io-index" 697 + checksum = "13c255bdf46e07fb840d120a36dcc81f385140d7191c76a7391672675c01a55d" 698 + dependencies = [ 699 + "bincode 1.3.3", 700 + "byteorder", 701 + "heed-traits", 702 + "serde", 703 + "serde_json", 704 + ] 705 + 706 + [[package]] 707 + name = "icu_collections" 708 + version = "2.1.1" 709 + source = "registry+https://github.com/rust-lang/crates.io-index" 710 + checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" 711 + dependencies = [ 712 + "displaydoc", 713 + "potential_utf", 714 + "yoke", 715 + "zerofrom", 716 + "zerovec", 717 + ] 718 + 719 + [[package]] 720 + name = "icu_locale_core" 721 + version = "2.1.1" 722 + source = "registry+https://github.com/rust-lang/crates.io-index" 723 + checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" 724 + dependencies = [ 725 + "displaydoc", 726 + "litemap", 727 + "tinystr", 728 + "writeable", 729 + "zerovec", 730 + ] 731 + 732 + [[package]] 733 + name = "icu_normalizer" 734 + version = "2.1.1" 735 + source = "registry+https://github.com/rust-lang/crates.io-index" 736 + checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" 737 + dependencies = [ 738 + "icu_collections", 739 + "icu_normalizer_data", 740 + "icu_properties", 741 + "icu_provider", 742 + "smallvec", 743 + "zerovec", 744 + ] 745 + 746 + [[package]] 747 + name = "icu_normalizer_data" 748 + version = "2.1.1" 749 + source = "registry+https://github.com/rust-lang/crates.io-index" 750 + checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" 751 + 752 + [[package]] 753 + name = "icu_properties" 754 + version = "2.1.1" 755 + source = "registry+https://github.com/rust-lang/crates.io-index" 756 + checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99" 757 + dependencies = [ 758 + "icu_collections", 759 + "icu_locale_core", 760 + "icu_properties_data", 761 + "icu_provider", 762 + "zerotrie", 763 + "zerovec", 764 + ] 765 + 766 + [[package]] 767 + name = "icu_properties_data" 768 + version = "2.1.1" 769 + source = "registry+https://github.com/rust-lang/crates.io-index" 770 + checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899" 771 + 772 + [[package]] 773 + name = "icu_provider" 774 + version = "2.1.1" 775 + source = "registry+https://github.com/rust-lang/crates.io-index" 776 + checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" 777 + dependencies = [ 778 + "displaydoc", 779 + "icu_locale_core", 780 + "writeable", 781 + "yoke", 782 + "zerofrom", 783 + "zerotrie", 784 + "zerovec", 785 + ] 786 + 787 + [[package]] 788 + name = "idna" 789 + version = "1.1.0" 790 + source = "registry+https://github.com/rust-lang/crates.io-index" 791 + checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" 792 + dependencies = [ 793 + "idna_adapter", 794 + "smallvec", 795 + "utf8_iter", 796 + ] 797 + 798 + [[package]] 799 + name = "idna_adapter" 800 + version = "1.2.1" 801 + source = "registry+https://github.com/rust-lang/crates.io-index" 802 + checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" 803 + dependencies = [ 804 + "icu_normalizer", 805 + "icu_properties", 806 + ] 807 + 808 + [[package]] 606 809 name = "io-uring" 607 810 version = "0.7.10" 608 811 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 718 921 checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" 719 922 720 923 [[package]] 924 + name = "litemap" 925 + version = "0.8.1" 926 + source = "registry+https://github.com/rust-lang/crates.io-index" 927 + checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" 928 + 929 + [[package]] 930 + name = "lmdb-master-sys" 931 + version = "0.2.5" 932 + source = "registry+https://github.com/rust-lang/crates.io-index" 933 + checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df" 934 + dependencies = [ 935 + "cc", 936 + "doxygen-rs", 937 + "libc", 938 + ] 939 + 940 + [[package]] 721 941 name = "lock_api" 722 942 version = "0.4.14" 723 943 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 827 1047 version = "11.1.5" 828 1048 source = "registry+https://github.com/rust-lang/crates.io-index" 829 1049 checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" 1050 + 1051 + [[package]] 1052 + name = "page_size" 1053 + version = "0.6.0" 1054 + source = "registry+https://github.com/rust-lang/crates.io-index" 1055 + checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" 1056 + dependencies = [ 1057 + "libc", 1058 + "winapi", 1059 + ] 830 1060 831 1061 [[package]] 832 1062 name = "parking_lot" ··· 852 1082 ] 853 1083 854 1084 [[package]] 1085 + name = "percent-encoding" 1086 + version = "2.3.2" 1087 + source = "registry+https://github.com/rust-lang/crates.io-index" 1088 + checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" 1089 + 1090 + [[package]] 1091 + name = "phf" 1092 + version = "0.11.3" 1093 + source = "registry+https://github.com/rust-lang/crates.io-index" 1094 + checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" 1095 + dependencies = [ 1096 + "phf_macros", 1097 + "phf_shared", 1098 + ] 1099 + 1100 + [[package]] 1101 + name = "phf_generator" 1102 + version = "0.11.3" 1103 + source = "registry+https://github.com/rust-lang/crates.io-index" 1104 + checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" 1105 + dependencies = [ 1106 + "phf_shared", 1107 + "rand", 1108 + ] 1109 + 1110 + [[package]] 1111 + name = "phf_macros" 1112 + version = "0.11.3" 1113 + source = "registry+https://github.com/rust-lang/crates.io-index" 1114 + checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" 1115 + dependencies = [ 1116 + "phf_generator", 1117 + "phf_shared", 1118 + "proc-macro2", 1119 + "quote", 1120 + "syn 2.0.106", 1121 + ] 1122 + 1123 + [[package]] 1124 + name = "phf_shared" 1125 + version = "0.11.3" 1126 + source = "registry+https://github.com/rust-lang/crates.io-index" 1127 + checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" 1128 + dependencies = [ 1129 + "siphasher", 1130 + ] 1131 + 1132 + [[package]] 855 1133 name = "pin-project-lite" 856 1134 version = "0.2.16" 857 1135 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 910 1188 checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 911 1189 dependencies = [ 912 1190 "portable-atomic", 1191 + ] 1192 + 1193 + [[package]] 1194 + name = "potential_utf" 1195 + version = "0.1.4" 1196 + source = "registry+https://github.com/rust-lang/crates.io-index" 1197 + checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" 1198 + dependencies = [ 1199 + "zerovec", 913 1200 ] 914 1201 915 1202 [[package]] ··· 1052 1339 "env_logger", 1053 1340 "futures", 1054 1341 "futures-core", 1342 + "heed", 1055 1343 "ipld-core", 1056 1344 "iroh-car", 1057 1345 "log", ··· 1209 1497 ] 1210 1498 1211 1499 [[package]] 1500 + name = "shlex" 1501 + version = "1.3.0" 1502 + source = "registry+https://github.com/rust-lang/crates.io-index" 1503 + checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 1504 + 1505 + [[package]] 1212 1506 name = "signal-hook-registry" 1213 1507 version = "1.4.6" 1214 1508 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1216 1510 dependencies = [ 1217 1511 "libc", 1218 1512 ] 1513 + 1514 + [[package]] 1515 + name = "siphasher" 1516 + version = "1.0.1" 1517 + source = "registry+https://github.com/rust-lang/crates.io-index" 1518 + checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" 1219 1519 1220 1520 [[package]] 1221 1521 name = "slab" ··· 1240 1540 ] 1241 1541 1242 1542 [[package]] 1543 + name = "stable_deref_trait" 1544 + version = "1.2.1" 1545 + source = "registry+https://github.com/rust-lang/crates.io-index" 1546 + checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" 1547 + 1548 + [[package]] 1243 1549 name = "strsim" 1244 1550 version = "0.11.1" 1245 1551 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1268 1574 ] 1269 1575 1270 1576 [[package]] 1577 + name = "synchronoise" 1578 + version = "1.0.1" 1579 + source = "registry+https://github.com/rust-lang/crates.io-index" 1580 + checksum = "3dbc01390fc626ce8d1cffe3376ded2b72a11bb70e1c75f404a210e4daa4def2" 1581 + dependencies = [ 1582 + "crossbeam-queue", 1583 + ] 1584 + 1585 + [[package]] 1586 + name = "synstructure" 1587 + version = "0.13.2" 1588 + source = "registry+https://github.com/rust-lang/crates.io-index" 1589 + checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" 1590 + dependencies = [ 1591 + "proc-macro2", 1592 + "quote", 1593 + "syn 2.0.106", 1594 + ] 1595 + 1596 + [[package]] 1271 1597 name = "tempfile" 1272 1598 version = "3.23.0" 1273 1599 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1321 1647 ] 1322 1648 1323 1649 [[package]] 1650 + name = "tinystr" 1651 + version = "0.8.2" 1652 + source = "registry+https://github.com/rust-lang/crates.io-index" 1653 + checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" 1654 + dependencies = [ 1655 + "displaydoc", 1656 + "zerovec", 1657 + ] 1658 + 1659 + [[package]] 1324 1660 name = "tinytemplate" 1325 1661 version = "1.2.1" 1326 1662 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1386 1722 checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" 1387 1723 1388 1724 [[package]] 1725 + name = "url" 1726 + version = "2.5.7" 1727 + source = "registry+https://github.com/rust-lang/crates.io-index" 1728 + checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" 1729 + dependencies = [ 1730 + "form_urlencoded", 1731 + "idna", 1732 + "percent-encoding", 1733 + "serde", 1734 + ] 1735 + 1736 + [[package]] 1737 + name = "utf8_iter" 1738 + version = "1.0.4" 1739 + source = "registry+https://github.com/rust-lang/crates.io-index" 1740 + checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" 1741 + 1742 + [[package]] 1389 1743 name = "utf8parse" 1390 1744 version = "0.2.2" 1391 1745 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1507 1861 ] 1508 1862 1509 1863 [[package]] 1864 + name = "winapi" 1865 + version = "0.3.9" 1866 + source = "registry+https://github.com/rust-lang/crates.io-index" 1867 + checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1868 + dependencies = [ 1869 + "winapi-i686-pc-windows-gnu", 1870 + "winapi-x86_64-pc-windows-gnu", 1871 + ] 1872 + 1873 + [[package]] 1874 + name = "winapi-i686-pc-windows-gnu" 1875 + version = "0.4.0" 1876 + source = "registry+https://github.com/rust-lang/crates.io-index" 1877 + checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1878 + 1879 + [[package]] 1510 1880 name = "winapi-util" 1511 1881 version = "0.1.11" 1512 1882 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1516 1886 ] 1517 1887 1518 1888 [[package]] 1889 + name = "winapi-x86_64-pc-windows-gnu" 1890 + version = "0.4.0" 1891 + source = "registry+https://github.com/rust-lang/crates.io-index" 1892 + checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1893 + 1894 + [[package]] 1519 1895 name = "windows-link" 1520 1896 version = "0.2.1" 1521 1897 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1675 2051 checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" 1676 2052 1677 2053 [[package]] 2054 + name = "writeable" 2055 + version = "0.6.2" 2056 + source = "registry+https://github.com/rust-lang/crates.io-index" 2057 + checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" 2058 + 2059 + [[package]] 2060 + name = "yoke" 2061 + version = "0.8.1" 2062 + source = "registry+https://github.com/rust-lang/crates.io-index" 2063 + checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" 2064 + dependencies = [ 2065 + "stable_deref_trait", 2066 + "yoke-derive", 2067 + "zerofrom", 2068 + ] 2069 + 2070 + [[package]] 2071 + name = "yoke-derive" 2072 + version = "0.8.1" 2073 + source = "registry+https://github.com/rust-lang/crates.io-index" 2074 + checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" 2075 + dependencies = [ 2076 + "proc-macro2", 2077 + "quote", 2078 + "syn 2.0.106", 2079 + "synstructure", 2080 + ] 2081 + 2082 + [[package]] 1678 2083 name = "zerocopy" 1679 2084 version = "0.8.27" 1680 2085 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1693 2098 "quote", 1694 2099 "syn 2.0.106", 1695 2100 ] 2101 + 2102 + [[package]] 2103 + name = "zerofrom" 2104 + version = "0.1.6" 2105 + source = "registry+https://github.com/rust-lang/crates.io-index" 2106 + checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" 2107 + dependencies = [ 2108 + "zerofrom-derive", 2109 + ] 2110 + 2111 + [[package]] 2112 + name = "zerofrom-derive" 2113 + version = "0.1.6" 2114 + source = "registry+https://github.com/rust-lang/crates.io-index" 2115 + checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" 2116 + dependencies = [ 2117 + "proc-macro2", 2118 + "quote", 2119 + "syn 2.0.106", 2120 + "synstructure", 2121 + ] 2122 + 2123 + [[package]] 2124 + name = "zerotrie" 2125 + version = "0.2.3" 2126 + source = "registry+https://github.com/rust-lang/crates.io-index" 2127 + checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" 2128 + dependencies = [ 2129 + "displaydoc", 2130 + "yoke", 2131 + "zerofrom", 2132 + ] 2133 + 2134 + [[package]] 2135 + name = "zerovec" 2136 + version = "0.11.5" 2137 + source = "registry+https://github.com/rust-lang/crates.io-index" 2138 + checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" 2139 + dependencies = [ 2140 + "yoke", 2141 + "zerofrom", 2142 + "zerovec-derive", 2143 + ] 2144 + 2145 + [[package]] 2146 + name = "zerovec-derive" 2147 + version = "0.11.2" 2148 + source = "registry+https://github.com/rust-lang/crates.io-index" 2149 + checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" 2150 + dependencies = [ 2151 + "proc-macro2", 2152 + "quote", 2153 + "syn 2.0.106", 2154 + ]
+1
Cargo.toml
··· 10 10 bincode = { version = "2.0.1", features = ["serde"] } 11 11 futures = "0.3.31" 12 12 futures-core = "0.3.31" 13 + heed = "0.22.0" 13 14 ipld-core = { version = "0.4.2", features = ["serde"] } 14 15 iroh-car = "0.5.1" 15 16 log = "0.4.28"
+14 -6
examples/disk-read-file/main.rs
··· 35 35 let mb = 2_usize.pow(20); 36 36 37 37 let mut driver = 38 - match repo_stream::drive::load_car(reader, |block| S(block.len()), 1 * mb).await? { 38 + match repo_stream::drive::load_car(reader, |block| S(block.len()), 5 * mb).await? { 39 39 repo_stream::drive::Vehicle::Lil(_, _) => panic!("try this on a bigger car"), 40 40 repo_stream::drive::Vehicle::Big(big_stuff) => { 41 - // let disk_store = repo_stream::disk::SqliteStore::new(tmpfile); 42 - // let disk_store = repo_stream::disk::RedbStore::new(tmpfile); 43 - let disk_store = repo_stream::disk::RustcaskStore::new(tmpfile); 41 + let disk_store = repo_stream::disk::SqliteStore::new(tmpfile.clone()); 42 + // let disk_store = repo_stream::disk::RedbStore::new(tmpfile.clone()); 43 + // let disk_store = repo_stream::disk::RustcaskStore::new(tmpfile.clone()); 44 + // let disk_store = repo_stream::disk::HeedStore::new(tmpfile.clone()); 44 45 let (commit, driver) = big_stuff.finish_loading(disk_store).await?; 45 46 log::warn!("big: {:?}", commit); 46 47 driver ··· 51 52 52 53 let mut n = 0; 53 54 loop { 54 - let (d, Some(pairs)) = driver.next_chunk(256).await? else { 55 + let (d, p) = driver.next_chunk(1024).await?; 56 + driver = d; 57 + let Some(pairs) = p else { 55 58 break; 56 59 }; 57 - driver = d; 58 60 n += pairs.len(); 59 61 // log::info!("got {rkey:?}"); 60 62 } 63 + // log::info!("now is the time to check mem..."); 64 + // tokio::time::sleep(std::time::Duration::from_secs(22)).await; 65 + drop(driver); 61 66 log::info!("bye! {n}"); 67 + 68 + std::fs::remove_file(tmpfile).unwrap(); 69 + // std::fs::remove_dir_all(tmpfile).unwrap(); 62 70 63 71 Ok(()) 64 72 }
+144 -9
src/disk.rs
··· 30 30 31 31 pub trait DiskWriter<E: StorageErrorBase> { 32 32 fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> Result<(), E>; 33 + fn put_many(&mut self, _kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>) -> Result<(), E>; 33 34 } 34 35 35 36 pub trait DiskReader { ··· 59 60 let conn = tokio::task::spawn_blocking(move || { 60 61 let conn = rusqlite::Connection::open(path)?; 61 62 63 + let sq_mb = -(2_i64.pow(10)); // negative is kibibytes for sqlite cache_size 64 + 65 + // conn.pragma_update(None, "journal_mode", "OFF")?; 66 + // conn.pragma_update(None, "journal_mode", "MEMORY")?; 62 67 conn.pragma_update(None, "journal_mode", "WAL")?; 63 68 conn.pragma_update(None, "synchronous", "OFF")?; 64 - conn.pragma_update(None, "cache_size", (-4 * 2_i64.pow(10)).to_string())?; 69 + conn.pragma_update(None, "cache_size", (5 * sq_mb).to_string())?; 65 70 conn.execute( 66 71 "CREATE TABLE blocks ( 67 72 key BLOB PRIMARY KEY NOT NULL, ··· 86 91 impl DiskAccess for SqliteAccess { 87 92 type StorageError = rusqlite::Error; 88 93 fn get_writer(&mut self) -> Result<impl DiskWriter<rusqlite::Error>, rusqlite::Error> { 89 - let insert_stmt = self 90 - .conn 91 - .prepare("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?; 92 - Ok(SqliteWriter { insert_stmt }) 94 + let tx = self.conn.transaction()?; 95 + // let insert_stmt = tx.prepare("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?; 96 + Ok(SqliteWriter { tx: Some(tx) }) 93 97 } 94 98 fn get_reader( 95 99 &self, ··· 100 104 } 101 105 102 106 pub struct SqliteWriter<'conn> { 103 - insert_stmt: rusqlite::Statement<'conn>, 107 + tx: Option<rusqlite::Transaction<'conn>>, 108 + } 109 + 110 + /// oops careful in async 111 + impl Drop for SqliteWriter<'_> { 112 + fn drop(&mut self) { 113 + let tx = self.tx.take(); 114 + tx.unwrap().commit().unwrap(); 115 + } 104 116 } 105 117 106 118 impl DiskWriter<rusqlite::Error> for SqliteWriter<'_> { 107 119 fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> rusqlite::Result<()> { 108 - self.insert_stmt.execute((key, val))?; 120 + let tx = self.tx.as_ref().unwrap(); 121 + let mut insert_stmt = tx.prepare_cached("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?; 122 + insert_stmt.execute((key, val))?; 123 + Ok(()) 124 + } 125 + fn put_many(&mut self, kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>) -> rusqlite::Result<()> { 126 + let tx = self.tx.as_ref().unwrap(); 127 + let mut insert_stmt = tx.prepare_cached("INSERT INTO blocks (key, val) VALUES (?1, ?2)")?; 128 + for (k, v) in kv { 129 + insert_stmt.execute((k, v))?; 130 + } 109 131 Ok(()) 110 132 } 111 133 } ··· 144 166 type Access = RedbAccess; 145 167 async fn get_access(&mut self) -> Result<RedbAccess, redb::Error> { 146 168 let path = self.path.clone(); 147 - let kb = 2_usize.pow(10); 169 + let mb = 2_usize.pow(20); 148 170 let db = tokio::task::spawn_blocking(move || { 149 171 let db = redb::Database::builder() 150 - .set_cache_size(16 * kb) 172 + .set_cache_size(5 * mb) 151 173 .create(path)?; 152 174 Ok::<_, Self::StorageError>(db) 153 175 }) ··· 183 205 fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> Result<(), redb::Error> { 184 206 let mut table = self.tx.as_ref().unwrap().open_table(REDB_TABLE)?; 185 207 table.insert(&*key, &*val)?; 208 + Ok(()) 209 + } 210 + fn put_many(&mut self, kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>) -> Result<(), redb::Error> { 211 + let mut table = self.tx.as_ref().unwrap().open_table(REDB_TABLE)?; 212 + for (k, v) in kv { 213 + table.insert(&*k, &*v)?; 214 + } 186 215 Ok(()) 187 216 } 188 217 } ··· 274 303 self.db.set(key, val)?; 275 304 Ok(()) 276 305 } 306 + fn put_many(&mut self, kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>) -> Result<(), CaskError> { 307 + for (k, v) in kv { 308 + self.db.set(k, v)?; 309 + } 310 + Ok(()) 311 + } 277 312 } 278 313 279 314 pub struct RustcaskReader { ··· 288 323 .map_err(|e| CaskError::GetError(e.to_string())) 289 324 } 290 325 } 326 + 327 + 328 + ///////// heeeeeeeeeeeeed 329 + 330 + type HeedBytes = heed::types::SerdeBincode<Vec<u8>>; 331 + type HeedDb = heed::Database<HeedBytes, HeedBytes>; 332 + // type HeedDb = heed::Database<Vec<u8>, Vec<u8>>; 333 + 334 + pub struct HeedStore { 335 + path: PathBuf, 336 + } 337 + 338 + impl HeedStore { 339 + pub fn new(path: PathBuf) -> Self { 340 + Self { path } 341 + } 342 + } 343 + 344 + impl StorageErrorBase for heed::Error {} 345 + 346 + impl DiskStore for HeedStore { 347 + type StorageError = heed::Error; 348 + type Access = HeedAccess; 349 + async fn get_access(&mut self) -> Result<HeedAccess, heed::Error> { 350 + let path = self.path.clone(); 351 + let env = tokio::task::spawn_blocking(move || { 352 + std::fs::create_dir_all(&path).unwrap(); 353 + let env = unsafe { 354 + heed::EnvOpenOptions::new() 355 + .map_size(1 * 2_usize.pow(30)) 356 + .open(path)? 357 + }; 358 + Ok::<_, Self::StorageError>(env) 359 + }) 360 + .await 361 + .expect("join error")?; 362 + 363 + Ok(HeedAccess { env, db: None }) 364 + } 365 + } 366 + 367 + pub struct HeedAccess { 368 + env: heed::Env, 369 + db: Option<HeedDb>, 370 + } 371 + 372 + impl DiskAccess for HeedAccess { 373 + type StorageError = heed::Error; 374 + fn get_writer(&mut self) -> Result<impl DiskWriter<heed::Error>, heed::Error> { 375 + let mut tx = self.env.write_txn()?; 376 + let db = self.env.create_database(&mut tx, None)?; 377 + self.db = Some(db.clone()); 378 + Ok(HeedWriter { tx: Some(tx), db }) 379 + } 380 + fn get_reader(&self) -> Result<impl DiskReader<StorageError = heed::Error>, heed::Error> { 381 + let tx = self.env.read_txn()?; 382 + let db = self.db.expect("should have called get_writer first"); 383 + Ok(HeedReader { tx, db }) 384 + } 385 + } 386 + 387 + pub struct HeedWriter<'tx> { 388 + tx: Option<heed::RwTxn<'tx>>, 389 + db: HeedDb, 390 + } 391 + 392 + impl DiskWriter<heed::Error> for HeedWriter<'_> { 393 + fn put(&mut self, key: Vec<u8>, val: Vec<u8>) -> Result<(), heed::Error> { 394 + let mut tx = self.tx.as_mut().unwrap(); 395 + self.db.put(&mut tx, &key, &val)?; 396 + Ok(()) 397 + } 398 + fn put_many(&mut self, kv: impl Iterator<Item = (Vec<u8>, Vec<u8>)>) -> Result<(), heed::Error> { 399 + let mut tx = self.tx.as_mut().unwrap(); 400 + for (k, v) in kv { 401 + self.db.put(&mut tx, &k, &v)?; 402 + } 403 + Ok(()) 404 + } 405 + } 406 + 407 + /// oops careful in async 408 + impl Drop for HeedWriter<'_> { 409 + fn drop(&mut self) { 410 + let tx = self.tx.take(); 411 + tx.unwrap().commit().unwrap(); 412 + } 413 + } 414 + 415 + pub struct HeedReader<'tx> { 416 + tx: heed::RoTxn<'tx, heed::WithTls>, 417 + db: HeedDb, 418 + } 419 + 420 + impl DiskReader for HeedReader<'_> { 421 + type StorageError = heed::Error; 422 + fn get(&mut self, key: Vec<u8>) -> Result<Option<Vec<u8>>, heed::Error> { 423 + self.db.get(&self.tx, &key) 424 + } 425 + }
+15 -10
src/drive.rs
··· 205 205 // dump mem blocks into the store 206 206 access = tokio::task::spawn(async move { 207 207 let mut writer = access.get_writer()?; 208 - for (k, v) in self.mem_blocks { 209 - let key_bytes = k.to_bytes(); 210 - let val_bytes = encode(v)?; // TODO 211 - writer.put(key_bytes, val_bytes)?; 212 - } 208 + 209 + let kvs = self 210 + .mem_blocks 211 + .into_iter() 212 + .map(|(k, v)| (k.to_bytes(), encode(v).unwrap())); 213 + 214 + writer.put_many(kvs)?; 215 + 213 216 drop(writer); // cannot outlive access 214 217 Ok::<_, DiskDriveError<S::StorageError>>(access) 215 218 }) ··· 251 254 // dump mem blocks into the store 252 255 access = tokio::task::spawn_blocking(move || { 253 256 let mut writer = access.get_writer()?; 254 - for (k, v) in chunk { 255 - let key_bytes = k.to_bytes(); 256 - let val_bytes = encode(v)?; // TODO 257 - writer.put(key_bytes, val_bytes)?; 258 - } 257 + 258 + let kvs = chunk 259 + .into_iter() 260 + .map(|(k, v)| (k.to_bytes(), encode(v).unwrap())); 261 + 262 + writer.put_many(kvs)?; 263 + 259 264 drop(writer); // cannot outlive access 260 265 Ok::<_, DiskDriveError<S::StorageError>>(access) 261 266 })