this repo has no description

wip

+4
.opencode/agent/pair.md
··· 26 26 - We've talked it through and we've confirmed I'd like you to implement something 27 27 - A snippet clarifies better than words 28 28 29 + ## Subagents 30 + 31 + You should prefer to spawn subagents for performing large tasks, to save our context for conversation. 32 + 29 33 ## Codebase access 30 34 31 35 You have full read/write. Explore freely to understand context—just tell me what you're thinking, don't silently absorb and produce.
+21
.opencode/opencode.json
··· 1 + { 2 + "$schema": "https://opencode.ai/config.json", 3 + "mcp": { 4 + "playwright": { 5 + "type": "local", 6 + "command": [ 7 + "pnpx", 8 + "@playwright/mcp@latest", 9 + "--browser", 10 + "firefox", 11 + "--user-data-dir", 12 + "/home/jonathan/.cache/playwright-mcp" 13 + ], 14 + "environment": { 15 + "PLAYWRIGHT_BROWSERS_PATH": "/home/jonathan/.cache/playwright-browsers", 16 + "PLAYWRIGHT_SKIP_VALIDATE_HOST_REQUIREMENTS": "true" 17 + }, 18 + "enabled": true 19 + } 20 + } 21 + }
+3
AGENTS.md
··· 1 + # Tips 2 + 3 + Always make sure you check to see if the dev server is running before starting a new one
+114
debug-demo.ts
··· 1 + import {chromium} from 'playwright' 2 + 3 + async function debugDemo() { 4 + const browsersPath = process.env.PLAYWRIGHT_BROWSERS_PATH 5 + const executablePath = browsersPath ? `${browsersPath}/chromium-1194/chrome-linux/chrome` : undefined 6 + 7 + const browser = await chromium.launch({ 8 + headless: true, 9 + executablePath, 10 + }) 11 + const page = await browser.newPage() 12 + 13 + // Capture console logs 14 + page.on('console', (msg) => { 15 + const type = msg.type() 16 + const text = msg.text() 17 + if (type === 'error') { 18 + console.error('[BROWSER ERROR]', text) 19 + } else if (type === 'warning') { 20 + console.warn('[BROWSER WARN]', text) 21 + } else { 22 + console.log('[BROWSER]', text) 23 + } 24 + }) 25 + 26 + // Capture page errors 27 + page.on('pageerror', (err) => { 28 + console.error('[PAGE ERROR]', err.message) 29 + }) 30 + 31 + console.log('Navigating to http://localhost:5173...') 32 + await page.goto('http://localhost:5173', {waitUntil: 'networkidle'}) 33 + 34 + // Wait for the demo to initialize 35 + await page.waitForSelector('.demo', {timeout: 10000}) 36 + 37 + console.log('\n=== Demo Page Loaded ===\n') 38 + 39 + // Get entry count 40 + const entryCount = await page.textContent('#entry-count') 41 + console.log('Entry count:', entryCount) 42 + 43 + // Get all query sections 44 + const queries = await page.$$('.query') 45 + console.log(`\nFound ${queries.length} queries\n`) 46 + 47 + // Run each query and capture results 48 + for (let i = 0; i < queries.length; i++) { 49 + const query = queries[i] 50 + const title = await query.$eval('h3', (el) => el.textContent) 51 + console.log(`\n--- Query ${i + 1}: ${title} ---`) 52 + 53 + // Get the query spec 54 + const spec = await query.$eval('.query-spec', (el) => el.textContent) 55 + console.log('Spec:', spec?.replace(/\n/g, ' ')) 56 + 57 + // Get the plan details 58 + const plan = await query.$('.plan') 59 + if (plan) { 60 + const planDetails = await plan.evaluate((el) => { 61 + const divs = el.querySelectorAll(':scope > div') 62 + return Array.from(divs).map((d) => d.textContent?.trim()) 63 + }) 64 + console.log('Plan:', planDetails.join(' | ')) 65 + } 66 + 67 + // Click the Run Query button 68 + const runBtn = await query.$('.run-query-btn') 69 + if (runBtn) { 70 + await runBtn.click() 71 + // Wait for results to appear 72 + await page.waitForSelector(`#results-${i} pre, #results-${i} .loading`, {timeout: 5000}).catch(() => {}) 73 + // Give time for async query to complete 74 + await page.waitForTimeout(500) 75 + 76 + const results = await page.$eval(`#results-${i}`, (el) => el.textContent).catch(() => 'No results') 77 + console.log('Results:', results?.slice(0, 500)) 78 + } 79 + } 80 + 81 + // Check cardinality estimates 82 + console.log('\n=== Cardinality Estimates ===') 83 + const cardTable = await page.$('#cardinality-table') 84 + if (cardTable) { 85 + const rows = await cardTable.$$eval('tr', (trs) => 86 + trs.slice(1).map((tr) => { 87 + const cells = tr.querySelectorAll('td') 88 + return `${cells[0]?.textContent}: ${cells[1]?.textContent}` 89 + }), 90 + ) 91 + rows.forEach((r) => console.log(r)) 92 + } 93 + 94 + // Check index store sizes 95 + console.log('\n=== Index Store Sizes ===') 96 + const sizesTable = await page.$('#index-sizes-table') 97 + if (sizesTable) { 98 + const rows = await sizesTable.$$eval('tr', (trs) => 99 + trs.slice(1).map((tr) => { 100 + const cells = tr.querySelectorAll('td') 101 + return `${cells[0]?.textContent}: ${cells[1]?.textContent}` 102 + }), 103 + ) 104 + rows.forEach((r) => console.log(r)) 105 + } 106 + 107 + await browser.close() 108 + console.log('\n=== Done ===') 109 + } 110 + 111 + debugDemo().catch((err) => { 112 + console.error('Error:', err) 113 + process.exit(1) 114 + })
+6
flake.nix
··· 14 14 buildInputs = [ 15 15 pkgs.pnpm 16 16 pkgs.nodejs 17 + pkgs.playwright-driver.browsers 17 18 ]; 19 + 20 + shellHook = '' 21 + export PLAYWRIGHT_BROWSERS_PATH=${pkgs.playwright-driver.browsers} 22 + export PLAYWRIGHT_SKIP_VALIDATE_HOST_REQUIREMENTS=true 23 + ''; 18 24 }; 19 25 } 20 26 );
+5
package.json
··· 16 16 "@eslint/js": "^9.39.2", 17 17 "@eslint/json": "^0.14.0", 18 18 "@eslint/markdown": "^7.5.1", 19 + "@playwright/test": "^1.57.0", 20 + "@types/node": "^25.0.3", 19 21 "confusing-browser-globals": "^1.0.11", 20 22 "eslint": "^9.39.2", 21 23 "eslint-config-prettier": "^10.1.8", ··· 23 25 "eslint-plugin-prettier": "^5.5.4", 24 26 "fake-indexeddb": "^6.2.5", 25 27 "globals": "^16.5.0", 28 + "playwright": "^1.57.0", 26 29 "prettier": "^3.7.4", 30 + "tsx": "^4.21.0", 27 31 "typescript": "~5.9.3", 28 32 "typescript-eslint": "^8.51.0", 29 33 "typescript-eslint-language-service": "^5.0.5", ··· 33 37 }, 34 38 "dependencies": { 35 39 "core-js": "^3.47.0", 40 + "feedsmith": "^2.8.0", 36 41 "idb": "^8.0.3", 37 42 "zod": "^4.3.4" 38 43 }
+113 -8
pnpm-lock.yaml
··· 11 11 core-js: 12 12 specifier: ^3.47.0 13 13 version: 3.47.0 14 + feedsmith: 15 + specifier: ^2.8.0 16 + version: 2.8.0 14 17 idb: 15 18 specifier: ^8.0.3 16 19 version: 8.0.3 ··· 33 36 '@eslint/markdown': 34 37 specifier: ^7.5.1 35 38 version: 7.5.1 39 + '@playwright/test': 40 + specifier: ^1.57.0 41 + version: 1.57.0 42 + '@types/node': 43 + specifier: ^25.0.3 44 + version: 25.0.3 36 45 confusing-browser-globals: 37 46 specifier: ^1.0.11 38 47 version: 1.0.11 ··· 54 63 globals: 55 64 specifier: ^16.5.0 56 65 version: 16.5.0 66 + playwright: 67 + specifier: ^1.57.0 68 + version: 1.57.0 57 69 prettier: 58 70 specifier: ^3.7.4 59 71 version: 3.7.4 72 + tsx: 73 + specifier: ^4.21.0 74 + version: 4.21.0 60 75 typescript: 61 76 specifier: ~5.9.3 62 77 version: 5.9.3 ··· 71 86 version: 5.1.3 72 87 vite: 73 88 specifier: ^7.2.4 74 - version: 7.3.0 89 + version: 7.3.0(@types/node@25.0.3)(tsx@4.21.0) 75 90 vitest: 76 91 specifier: ^4.0.16 77 - version: 4.0.16 92 + version: 4.0.16(@types/node@25.0.3)(tsx@4.21.0) 78 93 79 94 packages: 80 95 ··· 347 362 '@pkgr/core@0.2.9': 348 363 resolution: {integrity: sha512-QNqXyfVS2wm9hweSYD2O7F0G06uurj9kZ96TRQE5Y9hU7+tgdZwIkbAKc5Ocy1HxEY2kuDQa6cQ1WRs/O5LFKA==} 349 364 engines: {node: ^12.20.0 || ^14.18.0 || >=16.0.0} 365 + 366 + '@playwright/test@1.57.0': 367 + resolution: {integrity: sha512-6TyEnHgd6SArQO8UO2OMTxshln3QMWBtPGrOCgs3wVEmQmwyuNtB10IZMfmYDE0riwNR1cu4q+pPcxMVtaG3TA==} 368 + engines: {node: '>=18'} 369 + hasBin: true 350 370 351 371 '@rollup/rollup-android-arm-eabi@4.54.0': 352 372 resolution: {integrity: sha512-OywsdRHrFvCdvsewAInDKCNyR3laPA2mc9bRYJ6LBp5IyvF3fvXbbNR0bSzHlZVFtn6E0xw2oZlyjg4rKCVcng==} ··· 485 505 '@types/ms@2.1.0': 486 506 resolution: {integrity: sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==} 487 507 508 + '@types/node@25.0.3': 509 + resolution: {integrity: sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==} 510 + 488 511 '@types/unist@3.0.3': 489 512 resolution: {integrity: sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==} 490 513 ··· 767 790 768 791 devlop@1.1.0: 769 792 resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==} 793 + 794 + entities@7.0.0: 795 + resolution: {integrity: sha512-FDWG5cmEYf2Z00IkYRhbFrwIwvdFKH07uV8dvNy0omp/Qb1xcyCWp2UDtcwJF4QZZvk0sLudP6/hAu42TaqVhQ==} 796 + engines: {node: '>=0.12'} 770 797 771 798 es-module-lexer@1.7.0: 772 799 resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==} ··· 891 918 fast-levenshtein@2.0.6: 892 919 resolution: {integrity: sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==} 893 920 921 + fast-xml-parser@5.3.3: 922 + resolution: {integrity: sha512-2O3dkPAAC6JavuMm8+4+pgTk+5hoAs+CjZ+sWcQLkX9+/tHRuTkQh/Oaifr8qDmZ8iEHb771Ea6G8CdwkrgvYA==} 923 + hasBin: true 924 + 894 925 fault@2.0.1: 895 926 resolution: {integrity: sha512-WtySTkS4OKev5JtpHXnib4Gxiurzh5NCGvWrFaZ34m6JehfTUhKZvn9njTfw48t6JumVQOmrKqpmGcdwxnhqBQ==} 896 927 ··· 903 934 picomatch: 904 935 optional: true 905 936 937 + feedsmith@2.8.0: 938 + resolution: {integrity: sha512-LloQgc/Kc82PGwAsNV0AjkDGS91NRdbP1jTwJMfRHkhLu7NgGPWyi/TWz06r4PXh0ixhf+5NAgt8bL/Wg/u7/A==} 939 + 906 940 file-entry-cache@8.0.0: 907 941 resolution: {integrity: sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==} 908 942 engines: {node: '>=16.0.0'} ··· 921 955 format@0.2.2: 922 956 resolution: {integrity: sha512-wzsgA6WOq+09wrU1tsJ09udeR/YZRaeArL9e1wPbFg3GG2yDnC2ldKpxs4xunpFF9DgqCqOIra3bc1HWrJ37Ww==} 923 957 engines: {node: '>=0.4.x'} 958 + 959 + fsevents@2.3.2: 960 + resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==} 961 + engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} 962 + os: [darwin] 924 963 925 964 fsevents@2.3.3: 926 965 resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} ··· 1205 1244 resolution: {integrity: sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==} 1206 1245 engines: {node: '>=12'} 1207 1246 1247 + playwright-core@1.57.0: 1248 + resolution: {integrity: sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==} 1249 + engines: {node: '>=18'} 1250 + hasBin: true 1251 + 1252 + playwright@1.57.0: 1253 + resolution: {integrity: sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw==} 1254 + engines: {node: '>=18'} 1255 + hasBin: true 1256 + 1208 1257 postcss@8.5.6: 1209 1258 resolution: {integrity: sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==} 1210 1259 engines: {node: ^10 || ^12 || >=14} ··· 1272 1321 resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==} 1273 1322 engines: {node: '>=8'} 1274 1323 1324 + strnum@2.1.2: 1325 + resolution: {integrity: sha512-l63NF9y/cLROq/yqKXSLtcMeeyOfnSQlfMSlzFt/K73oIaD8DGaQWd7Z34X9GPiKqP5rbSh84Hl4bOlLcjiSrQ==} 1326 + 1275 1327 supports-color@7.2.0: 1276 1328 resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==} 1277 1329 engines: {node: '>=8'} ··· 1303 1355 1304 1356 tslib@2.8.1: 1305 1357 resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} 1358 + 1359 + tsx@4.21.0: 1360 + resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==} 1361 + engines: {node: '>=18.0.0'} 1362 + hasBin: true 1306 1363 1307 1364 type-check@0.4.0: 1308 1365 resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==} ··· 1332 1389 engines: {node: '>=14.17'} 1333 1390 hasBin: true 1334 1391 1392 + undici-types@7.16.0: 1393 + resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} 1394 + 1335 1395 unist-util-is@6.0.1: 1336 1396 resolution: {integrity: sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g==} 1337 1397 ··· 1676 1736 1677 1737 '@pkgr/core@0.2.9': {} 1678 1738 1739 + '@playwright/test@1.57.0': 1740 + dependencies: 1741 + playwright: 1.57.0 1742 + 1679 1743 '@rollup/rollup-android-arm-eabi@4.54.0': 1680 1744 optional: true 1681 1745 ··· 1769 1833 '@types/unist': 3.0.3 1770 1834 1771 1835 '@types/ms@2.1.0': {} 1836 + 1837 + '@types/node@25.0.3': 1838 + dependencies: 1839 + undici-types: 7.16.0 1772 1840 1773 1841 '@types/unist@3.0.3': {} 1774 1842 ··· 1931 1999 chai: 6.2.2 1932 2000 tinyrainbow: 3.0.3 1933 2001 1934 - '@vitest/mocker@4.0.16(vite@7.3.0)': 2002 + '@vitest/mocker@4.0.16(vite@7.3.0(@types/node@25.0.3)(tsx@4.21.0))': 1935 2003 dependencies: 1936 2004 '@vitest/spy': 4.0.16 1937 2005 estree-walker: 3.0.3 1938 2006 magic-string: 0.30.21 1939 2007 optionalDependencies: 1940 - vite: 7.3.0 2008 + vite: 7.3.0(@types/node@25.0.3)(tsx@4.21.0) 1941 2009 1942 2010 '@vitest/pretty-format@4.0.16': 1943 2011 dependencies: ··· 2042 2110 dependencies: 2043 2111 dequal: 2.0.3 2044 2112 2113 + entities@7.0.0: {} 2114 + 2045 2115 es-module-lexer@1.7.0: {} 2046 2116 2047 2117 esbuild@0.27.2: ··· 2196 2266 2197 2267 fast-levenshtein@2.0.6: {} 2198 2268 2269 + fast-xml-parser@5.3.3: 2270 + dependencies: 2271 + strnum: 2.1.2 2272 + 2199 2273 fault@2.0.1: 2200 2274 dependencies: 2201 2275 format: 0.2.2 ··· 2204 2278 optionalDependencies: 2205 2279 picomatch: 4.0.3 2206 2280 2281 + feedsmith@2.8.0: 2282 + dependencies: 2283 + entities: 7.0.0 2284 + fast-xml-parser: 5.3.3 2285 + 2207 2286 file-entry-cache@8.0.0: 2208 2287 dependencies: 2209 2288 flat-cache: 4.0.1 ··· 2221 2300 flatted@3.3.3: {} 2222 2301 2223 2302 format@0.2.2: {} 2303 + 2304 + fsevents@2.3.2: 2305 + optional: true 2224 2306 2225 2307 fsevents@2.3.3: 2226 2308 optional: true ··· 2661 2743 2662 2744 picomatch@4.0.3: {} 2663 2745 2746 + playwright-core@1.57.0: {} 2747 + 2748 + playwright@1.57.0: 2749 + dependencies: 2750 + playwright-core: 1.57.0 2751 + optionalDependencies: 2752 + fsevents: 2.3.2 2753 + 2664 2754 postcss@8.5.6: 2665 2755 dependencies: 2666 2756 nanoid: 3.3.11 ··· 2729 2819 2730 2820 strip-json-comments@3.1.1: {} 2731 2821 2822 + strnum@2.1.2: {} 2823 + 2732 2824 supports-color@7.2.0: 2733 2825 dependencies: 2734 2826 has-flag: 4.0.0 ··· 2755 2847 tslib@2.8.1: 2756 2848 optional: true 2757 2849 2850 + tsx@4.21.0: 2851 + dependencies: 2852 + esbuild: 0.27.2 2853 + get-tsconfig: 4.13.0 2854 + optionalDependencies: 2855 + fsevents: 2.3.3 2856 + 2758 2857 type-check@0.4.0: 2759 2858 dependencies: 2760 2859 prelude-ls: 1.2.1 ··· 2782 2881 vscode-languageserver-protocol: 3.17.5 2783 2882 2784 2883 typescript@5.9.3: {} 2884 + 2885 + undici-types@7.16.0: {} 2785 2886 2786 2887 unist-util-is@6.0.1: 2787 2888 dependencies: ··· 2830 2931 dependencies: 2831 2932 punycode: 2.3.1 2832 2933 2833 - vite@7.3.0: 2934 + vite@7.3.0(@types/node@25.0.3)(tsx@4.21.0): 2834 2935 dependencies: 2835 2936 esbuild: 0.27.2 2836 2937 fdir: 6.5.0(picomatch@4.0.3) ··· 2839 2940 rollup: 4.54.0 2840 2941 tinyglobby: 0.2.15 2841 2942 optionalDependencies: 2943 + '@types/node': 25.0.3 2842 2944 fsevents: 2.3.3 2945 + tsx: 4.21.0 2843 2946 2844 - vitest@4.0.16: 2947 + vitest@4.0.16(@types/node@25.0.3)(tsx@4.21.0): 2845 2948 dependencies: 2846 2949 '@vitest/expect': 4.0.16 2847 - '@vitest/mocker': 4.0.16(vite@7.3.0) 2950 + '@vitest/mocker': 4.0.16(vite@7.3.0(@types/node@25.0.3)(tsx@4.21.0)) 2848 2951 '@vitest/pretty-format': 4.0.16 2849 2952 '@vitest/runner': 4.0.16 2850 2953 '@vitest/snapshot': 4.0.16 ··· 2861 2964 tinyexec: 1.0.2 2862 2965 tinyglobby: 0.2.15 2863 2966 tinyrainbow: 3.0.3 2864 - vite: 7.3.0 2967 + vite: 7.3.0(@types/node@25.0.3)(tsx@4.21.0) 2865 2968 why-is-node-running: 2.3.0 2969 + optionalDependencies: 2970 + '@types/node': 25.0.3 2866 2971 transitivePeerDependencies: 2867 2972 - jiti 2868 2973 - less
+1870 -261
src/lib/docstore.ts
··· 741 741 } 742 742 743 743 /** 744 + * Record multiple writes in a batch (more efficient for bulk inserts). 745 + * Updates HLLs for all records, persists once at the end. 746 + */ 747 + async recordWriteBatch( 748 + storeId: string, 749 + records: Record<string, unknown>[], 750 + tx?: IDBPTransaction<unknown, string[], 'readwrite'>, 751 + ): Promise<void> { 752 + const store = tx 753 + ? tx.objectStore('_stats') 754 + : this.#db.transaction('_stats', 'readwrite').objectStore('_stats') 755 + 756 + let stats = (await store.get(storeId)) as IndexStatsRecord | undefined 757 + if (!stats) { 758 + stats = this.#createEmpty(storeId) 759 + } 760 + 761 + for (const keys of records) { 762 + stats.rowCount++ 763 + 764 + for (const [key, value] of Object.entries(keys)) { 765 + if (value === undefined || value === null) continue 766 + 767 + // Update HLL 768 + if (!stats.cardinalitySketch[key]) { 769 + stats.cardinalitySketch[key] = new Uint8Array(HLL_REGISTER_COUNT) 770 + } 771 + const hll = new HyperLogLog(stats.cardinalitySketch[key]) 772 + hll.add(String(value)) 773 + 774 + // Update bounds (only for comparable types) 775 + if (this.#isComparable(value)) { 776 + if (!stats.bounds[key]) { 777 + stats.bounds[key] = {min: value, max: value} 778 + } else { 779 + this.#updateBounds(stats.bounds[key], value) 780 + } 781 + } 782 + } 783 + } 784 + 785 + await store.put(stats) 786 + } 787 + 788 + /** 744 789 * Update stats after a delete operation. 745 790 */ 746 791 async recordDelete(storeId: string, tx?: IDBPTransaction<unknown, string[], 'readwrite'>): Promise<void> { ··· 896 941 // Query planner 897 942 // ============================================================================= 898 943 899 - export type CompareOp = '=' | '<' | '>' | '<=' | '>=' | '!=' 944 + export type CompareOp = '=' | '<' | '>' | '<=' | '>=' | '!=' | 'in' 900 945 946 + /** 947 + * A field-level comparison clause. 948 + * For spread fields, use the full path like '...tags.tag'. 949 + */ 901 950 export interface WhereClause { 902 951 key: string 903 952 op: CompareOp 953 + /** For 'in' operator, this should be an array of values */ 904 954 value: unknown 905 955 } 906 956 957 + /** 958 + * Match a single row within a spread array. 959 + * All conditions in `match` must be satisfied by the SAME row. 960 + * Multiple SpreadMatch clauses on the same spread create an intersection 961 + * (document must have rows matching EACH SpreadMatch). 962 + * 963 + * Example: Find documents with tag=(media-type, audio) AND tag=(explicit, true) 964 + * ```typescript 965 + * matches: [ 966 + * {spread: 'tags', where: {tag: 'media-type', value: 'audio'}}, 967 + * {spread: 'tags', where: {tag: 'explicit', value: 'true'}}, 968 + * ] 969 + * ``` 970 + */ 971 + export interface SpreadMatch { 972 + /** The spread field name (without '...' prefix), e.g., 'tags' */ 973 + spread: string 974 + /** Field values that must all match on ONE row (equality only) */ 975 + where: Record<string, unknown> 976 + } 977 + 907 978 export interface QuerySpec { 908 979 store: string 980 + /** Field-level WHERE clauses (legacy, for simple queries) */ 909 981 where: WhereClause[] 982 + /** Spread row matches - each match must be satisfied by one row in the spread */ 983 + matches?: SpreadMatch[] 910 984 orderBy?: {key: string; direction: 'asc' | 'desc'} 911 985 limit?: number 912 986 } ··· 1004 1078 * and this describes the "build" side (used for lookups). 1005 1079 */ 1006 1080 intersection?: IntersectionPlan 1081 + 1082 + /** 1083 + * For 'in' queries: sorted list of values to scan via cursor hopping. 1084 + * The executor will scan each value in sequence using cursor.continue(nextValue). 1085 + */ 1086 + inValues?: unknown[] 1087 + 1088 + /** 1089 + * Prefix values for compound index 'in' queries. 1090 + * If set, each inValue is combined with this prefix to form the full key. 1091 + */ 1092 + inPrefix?: unknown[] 1007 1093 } 1008 1094 1009 1095 /** ··· 1042 1128 * For primary store this is 'id', for index stores it's '_docId'. 1043 1129 */ 1044 1130 docIdField: string 1131 + 1132 + /** 1133 + * For self-joins with 3+ values, chain additional intersections here. 1134 + * Each additional scan further filters the result set. 1135 + */ 1136 + next?: IntersectionPlan 1045 1137 } 1046 1138 1047 1139 /** ··· 1059 1151 handledClauses: Set<number> 1060 1152 // Estimated selectivity based on cardinality 1061 1153 selectivity: number 1154 + // If an 'in' clause is handled, the clause index and values 1155 + inClause?: {clauseIdx: number; key: string; values: unknown[]} 1062 1156 } 1063 1157 1064 1158 export class QueryPlanner<S extends ShapeWithStorageRef> { ··· 1074 1168 * Generate a query plan for the given query specification. 1075 1169 */ 1076 1170 async plan(query: QuerySpec): Promise<QueryPlan> { 1171 + // Handle SpreadMatch queries (new explicit syntax for spread row matching) 1172 + if (query.matches && query.matches.length > 0) { 1173 + const spreadMatchPlan = await this.#evaluateSpreadMatches(query) 1174 + if (spreadMatchPlan) { 1175 + return spreadMatchPlan 1176 + } 1177 + } 1178 + 1179 + // Check for self-join requirement: multiple equality conditions on the same spread key 1180 + // e.g., ...tags.tag = 'javascript' AND ...tags.tag = 'ai' 1181 + // This requires scanning the index multiple times and intersecting by _docId 1182 + const selfJoinPlan = await this.#evaluateSelfJoin(query) 1183 + if (selfJoinPlan) { 1184 + return selfJoinPlan 1185 + } 1186 + 1077 1187 const candidates = await this.#generateCandidates(query) 1078 1188 1079 1189 if (candidates.length === 0) { ··· 1091 1201 const bestSingle = scored[0] 1092 1202 1093 1203 // Evaluate index intersection opportunity 1094 - const intersectionPlan = this.#evaluateIntersection(candidates, query) 1204 + const intersectionPlan = await this.#evaluateIntersection(candidates, query) 1095 1205 if (intersectionPlan) { 1096 1206 // Compare based on estimated efficiency: 1097 1207 // - Single index: scans selectivity% of rows, then post-filters unhandled clauses ··· 1177 1287 let eqPrefixLength = 0 1178 1288 const handledClauses = new Set<number>() 1179 1289 1180 - // Count leading equality matches 1290 + // Count leading equality matches (including 'in' which we handle via cursor hopping) 1291 + let inClauseIdx: number | null = null 1292 + let inClauseKey: string | null = null 1293 + 1181 1294 for (const key of indexKeys) { 1182 - const clauseIdx = query.where.findIndex((w) => w.key === key && w.op === '=') 1183 - if (clauseIdx >= 0) { 1295 + const eqClauseIdx = query.where.findIndex((w) => w.key === key && w.op === '=') 1296 + if (eqClauseIdx >= 0) { 1184 1297 eqPrefixLength++ 1185 - handledClauses.add(clauseIdx) 1298 + handledClauses.add(eqClauseIdx) 1186 1299 } else { 1187 - break // Equality prefix broken 1300 + // Check for 'in' clause on this key - can handle via cursor hopping 1301 + const inIdx = query.where.findIndex((w) => w.key === key && w.op === 'in') 1302 + if (inIdx >= 0) { 1303 + inClauseIdx = inIdx 1304 + inClauseKey = key 1305 + handledClauses.add(inIdx) 1306 + } 1307 + break // Equality prefix broken (but 'in' is still handled) 1188 1308 } 1189 1309 } 1190 1310 1191 1311 // Check for range condition on next key after eq prefix 1192 - if (eqPrefixLength < indexKeys.length) { 1312 + if (eqPrefixLength < indexKeys.length && inClauseIdx === null) { 1193 1313 const nextKey = indexKeys[eqPrefixLength] 1194 - const rangeClauseIdx = query.where.findIndex((w) => w.key === nextKey && w.op !== '=' && w.op !== '!=') 1314 + const rangeClauseIdx = query.where.findIndex( 1315 + (w) => w.key === nextKey && w.op !== '=' && w.op !== '!=' && w.op !== 'in', 1316 + ) 1195 1317 if (rangeClauseIdx >= 0) { 1196 1318 handledClauses.add(rangeClauseIdx) 1197 1319 } ··· 1210 1332 let selectivity = 1.0 1211 1333 for (const clauseIdx of handledClauses) { 1212 1334 const clause = query.where[clauseIdx] 1213 - const cardinality = await this.#statsManager.getCardinality( 1214 - source === 'primary' ? this.#storeDef.name : source, 1215 - clause.key, 1216 - ) 1335 + // Use the appropriate store for cardinality lookup: 1336 + // - Spread fields (starting with ...) use the index store 1337 + // - Regular fields use the primary store 1338 + const storeForCardinality = 1339 + clause.key.startsWith('...') && source !== 'primary' ? source : this.#storeDef.name 1340 + const cardinality = await this.#statsManager.getCardinality(storeForCardinality, clause.key) 1217 1341 if (cardinality > 0 && clause.op === '=') { 1218 1342 selectivity *= 1 / cardinality 1343 + } else if (cardinality > 0 && clause.op === 'in') { 1344 + // 'in' with N values: selectivity is N / cardinality 1345 + const values = clause.value as unknown[] 1346 + selectivity *= values.length / cardinality 1219 1347 } else if (cardinality > 0) { 1220 1348 // Range query - rough estimate of 1/3 selectivity 1221 1349 selectivity *= 0.33 1222 1350 } 1223 1351 } 1224 1352 1353 + // Build inClause info if present 1354 + const inClause = 1355 + inClauseIdx !== null && inClauseKey !== null 1356 + ? { 1357 + clauseIdx: inClauseIdx, 1358 + key: inClauseKey, 1359 + values: query.where[inClauseIdx].value as unknown[], 1360 + } 1361 + : undefined 1362 + 1225 1363 return { 1226 1364 source, 1227 1365 indexName, ··· 1230 1368 coversOrderBy, 1231 1369 handledClauses, 1232 1370 selectivity, 1371 + inClause, 1233 1372 } 1234 1373 } 1235 1374 ··· 1273 1412 1274 1413 /** 1275 1414 * Build final query plan from chosen candidate. 1415 + * Fetches stats for the candidate's source to get accurate row counts. 1276 1416 */ 1277 - #buildPlan(candidate: IndexCandidate, query: QuerySpec): QueryPlan { 1278 - // Build key range from handled clauses 1279 - const keyRange = this.#buildKeyRange(candidate, query) 1417 + async #buildPlan(candidate: IndexCandidate, query: QuerySpec): Promise<QueryPlan> { 1418 + // Build key range from handled clauses (null if using 'in' with cursor hopping) 1419 + const keyRange = candidate.inClause ? null : this.#buildKeyRange(candidate, query) 1280 1420 1281 1421 // Filter = clauses not handled by index 1282 1422 const filter = query.where.filter((_, i) => !candidate.handledClauses.has(i)) ··· 1292 1432 // Determine available fields 1293 1433 const availableFields = this.#getAvailableFields(candidate.source) 1294 1434 1435 + // Build 'in' values if present 1436 + let inValues: unknown[] | undefined 1437 + let inPrefix: unknown[] | undefined 1438 + 1439 + if (candidate.inClause) { 1440 + // Sort values for efficient cursor hopping 1441 + inValues = [...candidate.inClause.values].sort() 1442 + 1443 + // Build prefix from equality conditions before the 'in' key 1444 + if (candidate.eqPrefixLength > 0) { 1445 + inPrefix = [] 1446 + for (let i = 0; i < candidate.eqPrefixLength; i++) { 1447 + const key = candidate.indexKeys[i] 1448 + const clause = query.where.find((w) => w.key === key && w.op === '=') 1449 + if (clause) inPrefix.push(clause.value) 1450 + } 1451 + } 1452 + } 1453 + 1454 + // Fetch stats for the candidate's source (primary or index store) 1455 + const storeId = candidate.source === 'primary' ? this.#storeDef.name : candidate.source 1456 + const stats = await this.#statsManager.getStats(storeId) 1457 + const totalRows = stats?.rowCount ?? 1000 1458 + 1295 1459 return { 1296 1460 source: candidate.source, 1297 1461 indexName: candidate.indexName, ··· 1300 1464 filter, 1301 1465 needsSort: query.orderBy != null && !candidate.coversOrderBy, 1302 1466 availableFields, 1303 - estimatedRows: Math.ceil(candidate.selectivity * 1000), // TODO: use actual row count from stats 1467 + estimatedRows: Math.min(Math.ceil(candidate.selectivity * totalRows), totalRows), 1304 1468 selectivity: candidate.selectivity, 1469 + inValues, 1470 + inPrefix, 1305 1471 } 1306 1472 } 1307 1473 ··· 1387 1553 * The [] serves as a max sentinel since arrays sort after all other IDB types. 1388 1554 */ 1389 1555 #buildCompoundKeyRange(eqValues: unknown[], rangeClauses: WhereClause[]): KeyRangeSpec { 1390 - let lower: unknown[] | undefined 1391 - let upper: unknown[] | undefined 1556 + // For single-key indexes with no equality prefix (just a range on the key), 1557 + // we need to return scalar values, not arrays. 1558 + const isSimpleRange = eqValues.length === 0 1559 + 1560 + let lower: unknown | undefined 1561 + let upper: unknown | undefined 1392 1562 let lowerOpen = false 1393 1563 let upperOpen = false 1394 1564 1395 1565 for (const clause of rangeClauses) { 1396 1566 switch (clause.op) { 1397 1567 case '>': 1398 - lower = [...eqValues, clause.value] 1568 + lower = isSimpleRange ? clause.value : [...eqValues, clause.value] 1399 1569 lowerOpen = true 1400 1570 break 1401 1571 case '>=': 1402 - lower = [...eqValues, clause.value] 1572 + lower = isSimpleRange ? clause.value : [...eqValues, clause.value] 1403 1573 lowerOpen = false 1404 1574 break 1405 1575 case '<': 1406 - upper = [...eqValues, clause.value] 1576 + upper = isSimpleRange ? clause.value : [...eqValues, clause.value] 1407 1577 upperOpen = true 1408 1578 break 1409 1579 case '<=': 1410 - upper = [...eqValues, clause.value] 1580 + upper = isSimpleRange ? clause.value : [...eqValues, clause.value] 1411 1581 upperOpen = false 1412 1582 break 1413 1583 } 1414 1584 } 1415 1585 1586 + // For simple range queries (no equality prefix), return simple bounds 1587 + if (isSimpleRange) { 1588 + if (lower !== undefined && upper !== undefined) { 1589 + return {type: 'bound', lower, upper, lowerOpen, upperOpen} 1590 + } else if (lower !== undefined) { 1591 + return {type: 'lowerBound', lower, lowerOpen} 1592 + } else if (upper !== undefined) { 1593 + return {type: 'upperBound', upper, upperOpen} 1594 + } 1595 + // Shouldn't happen 1596 + return {type: 'only', lower: null} 1597 + } 1598 + 1416 1599 // For compound indexes, we must bound both ends to stay within the equality prefix. 1417 1600 // If only lower bound specified, add upper bound with max sentinel to stay in prefix. 1418 1601 // If only upper bound specified, add lower bound with min value to stay in prefix. ··· 1439 1622 1440 1623 /** 1441 1624 * Fallback plan when no index is useful. 1625 + * Fetches stats for the primary store to get accurate row counts. 1442 1626 */ 1443 - #fullScanPlan(query: QuerySpec): QueryPlan { 1627 + async #fullScanPlan(query: QuerySpec): Promise<QueryPlan> { 1628 + const stats = await this.#statsManager.getStats(this.#storeDef.name) 1629 + const totalRows = stats?.rowCount ?? 1000 1444 1630 return { 1445 1631 source: 'primary', 1446 1632 indexName: null, ··· 1449 1635 filter: query.where, 1450 1636 needsSort: query.orderBy != null, 1451 1637 availableFields: Object.keys(this.#storeDef.schema), 1452 - estimatedRows: 10000, // TODO: use actual row count from stats 1638 + estimatedRows: totalRows, 1453 1639 selectivity: 1.0, 1454 1640 } 1455 1641 } 1456 1642 1457 1643 /** 1458 - * Evaluate whether index intersection would be beneficial. 1459 - * Returns an intersection plan if two indexes can be combined effectively, 1460 - * or null if intersection isn't worthwhile. 1644 + * Evaluate self-join requirement: multiple equality conditions on the same spread key. 1645 + * e.g., ...tags.tag = 'javascript' AND ...tags.tag = 'ai' 1461 1646 * 1462 - * Intersection is beneficial when: 1463 - * 1. We have at least 2 candidates covering different clauses 1464 - * 2. Each candidate has good selectivity (< 50%) 1465 - * 3. Combined selectivity is significantly better than single best 1466 - * 4. Both sides can identify documents by ID 1647 + * This requires scanning the index multiple times with different values and 1648 + * intersecting by _docId to find documents that have ALL the specified values. 1649 + * Fetches stats for the index store to get accurate row counts. 1467 1650 */ 1468 - #evaluateIntersection(candidates: IndexCandidate[], query: QuerySpec): QueryPlan | null { 1469 - // Need at least 2 candidates with handled clauses 1470 - const usableCandidates = candidates.filter((c) => c.handledClauses.size > 0 && c.selectivity < 0.5) 1471 - if (usableCandidates.length < 2) { 1651 + async #evaluateSelfJoin(query: QuerySpec): Promise<QueryPlan | null> { 1652 + // Group equality clauses by key to find duplicates 1653 + const eqClausesByKey = new Map<string, {clauseIdx: number; clause: WhereClause}[]>() 1654 + 1655 + for (let i = 0; i < query.where.length; i++) { 1656 + const clause = query.where[i] 1657 + if (clause.op === '=' && clause.key.startsWith('...')) { 1658 + // This is an equality on a spread key 1659 + const existing = eqClausesByKey.get(clause.key) ?? [] 1660 + existing.push({clauseIdx: i, clause}) 1661 + eqClausesByKey.set(clause.key, existing) 1662 + } 1663 + } 1664 + 1665 + // Find keys with multiple equality conditions 1666 + let selfJoinKey: string | null = null 1667 + let selfJoinClauses: {clauseIdx: number; clause: WhereClause}[] = [] 1668 + 1669 + for (const [key, clauses] of eqClausesByKey) { 1670 + if (clauses.length >= 2) { 1671 + selfJoinKey = key 1672 + selfJoinClauses = clauses 1673 + break // Handle one self-join at a time 1674 + } 1675 + } 1676 + 1677 + if (!selfJoinKey || selfJoinClauses.length < 2) { 1472 1678 return null 1473 1679 } 1474 1680 1475 - // Find pairs of candidates that handle different clauses 1476 - let bestPair: {probe: IndexCandidate; build: IndexCandidate; combinedSelectivity: number} | null = null 1681 + // Find the best index that can handle this spread key 1682 + // Prefer indexes that can also handle other equality conditions (e.g., feedId = 'tech') 1683 + let bestIndex: { 1684 + store: string 1685 + name: string 1686 + keys: string[] 1687 + eqPrefixLength: number 1688 + eqValues: unknown[] 1689 + handledClauseIndices: Set<number> 1690 + } | null = null 1691 + 1692 + for (const group of this.#storeDef.indexStoreGroups) { 1693 + for (const idx of group.indexes) { 1694 + const idxKeys = idx.keys as string[] 1695 + if (!idxKeys.includes(selfJoinKey)) continue 1696 + 1697 + // Calculate how many prefix equality clauses this index can handle 1698 + let eqPrefixLength = 0 1699 + const eqValues: unknown[] = [] 1700 + const handledIndices = new Set<number>(selfJoinClauses.map((c) => c.clauseIdx)) 1477 1701 1478 - for (let i = 0; i < usableCandidates.length; i++) { 1479 - for (let j = i + 1; j < usableCandidates.length; j++) { 1480 - const a = usableCandidates[i] 1481 - const b = usableCandidates[j] 1702 + for (const key of idxKeys) { 1703 + if (key === selfJoinKey) break 1704 + const clauseIdx = query.where.findIndex((w) => w.key === key && w.op === '=') 1705 + if (clauseIdx >= 0) { 1706 + eqPrefixLength++ 1707 + eqValues.push(query.where[clauseIdx].value) 1708 + handledIndices.add(clauseIdx) 1709 + } else { 1710 + break 1711 + } 1712 + } 1482 1713 1483 - // Check if they handle different clauses (no overlap = more benefit) 1484 - const overlap = this.#clauseOverlap(a.handledClauses, b.handledClauses) 1485 - if (overlap === a.handledClauses.size || overlap === b.handledClauses.size) { 1486 - // One fully contains the other, no benefit from intersection 1487 - continue 1714 + // Prefer index with longer equality prefix 1715 + if (!bestIndex || eqPrefixLength > bestIndex.eqPrefixLength) { 1716 + bestIndex = { 1717 + store: group.storeName, 1718 + name: idx.name, 1719 + keys: idxKeys, 1720 + eqPrefixLength, 1721 + eqValues, 1722 + handledClauseIndices: handledIndices, 1723 + } 1488 1724 } 1725 + } 1726 + } 1489 1727 1490 - // Combined selectivity is multiplicative for independent conditions 1491 - const combinedSelectivity = a.selectivity * b.selectivity 1728 + if (!bestIndex) { 1729 + return null // No index covers this spread key 1730 + } 1731 + 1732 + const indexStore = bestIndex.store 1733 + const indexName = bestIndex.name 1734 + const eqValues = bestIndex.eqValues 1735 + const handledClauseIndices = bestIndex.handledClauseIndices 1736 + 1737 + // Build key ranges for each self-join value 1738 + const buildKeyRangeForValue = (value: unknown): KeyRangeSpec => { 1739 + if (eqValues.length === 0) { 1740 + return {type: 'only', lower: value} 1741 + } else { 1742 + return {type: 'only', lower: [...eqValues, value]} 1743 + } 1744 + } 1492 1745 1493 - // The probe side should be the smaller result set 1494 - const [probe, build] = a.selectivity <= b.selectivity ? [a, b] : [b, a] 1746 + // Build the probe side (first value) 1747 + const probeClause = selfJoinClauses[0] 1748 + const probeKeyRange = buildKeyRangeForValue(probeClause.clause.value) 1495 1749 1496 - // Check if this pair is better than previous best 1497 - if (!bestPair || combinedSelectivity < bestPair.combinedSelectivity) { 1498 - bestPair = {probe, build, combinedSelectivity} 1750 + // Get selectivity estimate 1751 + const cardinality = await this.#statsManager.getCardinality(indexStore, selfJoinKey) 1752 + const perValueSelectivity = cardinality > 0 ? 1 / cardinality : 0.1 1753 + 1754 + // Build the chain of intersections for remaining values 1755 + let intersection: IntersectionPlan | undefined 1756 + let prevIntersection: IntersectionPlan | undefined 1757 + 1758 + for (let i = 1; i < selfJoinClauses.length; i++) { 1759 + const clause = selfJoinClauses[i] 1760 + const keyRange = buildKeyRangeForValue(clause.clause.value) 1761 + 1762 + const newIntersection: IntersectionPlan = { 1763 + source: indexStore, 1764 + indexName, 1765 + keyRange, 1766 + handledClauses: [clause.clause], 1767 + selectivity: perValueSelectivity, 1768 + docIdField: '_docId', 1769 + } 1770 + 1771 + if (i === 1) { 1772 + intersection = newIntersection 1773 + } else if (prevIntersection) { 1774 + prevIntersection.next = newIntersection 1775 + } 1776 + prevIntersection = newIntersection 1777 + } 1778 + 1779 + // Remaining clauses that aren't handled 1780 + const filter = query.where.filter((_, i) => !handledClauseIndices.has(i)) 1781 + 1782 + // Combined selectivity (assuming independence) 1783 + const combinedSelectivity = Math.pow(perValueSelectivity, selfJoinClauses.length) 1784 + 1785 + // Available fields from the index store 1786 + const group = this.#storeDef.indexStoreGroups.find((g) => g.storeName === indexStore) 1787 + const availableFields = group ? [...group.allKeys] : [] 1788 + 1789 + // Fetch stats for the index store 1790 + const stats = await this.#statsManager.getStats(indexStore) 1791 + const totalRows = stats?.rowCount ?? 1000 1792 + 1793 + return { 1794 + source: indexStore, 1795 + indexName, 1796 + keyRange: probeKeyRange, 1797 + direction: 'next', 1798 + filter, 1799 + needsSort: query.orderBy != null, 1800 + availableFields, 1801 + estimatedRows: Math.min(Math.ceil(combinedSelectivity * totalRows), totalRows), 1802 + selectivity: combinedSelectivity, 1803 + docIdField: '_docId', 1804 + intersection, 1805 + } 1806 + } 1807 + 1808 + /** 1809 + * Evaluate SpreadMatch conditions and build an intersection plan. 1810 + * 1811 + * Each SpreadMatch represents a compound equality condition that must be satisfied 1812 + * by ONE row in the spread array. Multiple SpreadMatch on the same spread create 1813 + * a self-join (intersection by document ID). 1814 + * 1815 + * Example: Find docs with BOTH (tag='media-type', value='audio') AND (tag='explicit', value='true') 1816 + * ```typescript 1817 + * matches: [ 1818 + * {spread: 'tags', where: {tag: 'media-type', value: 'audio'}}, 1819 + * {spread: 'tags', where: {tag: 'explicit', value: 'true'}}, 1820 + * ] 1821 + * ``` 1822 + */ 1823 + async #evaluateSpreadMatches(query: QuerySpec): Promise<QueryPlan | null> { 1824 + const matches = query.matches! 1825 + 1826 + // Group matches by spread field 1827 + const matchesBySpread = new Map<string, SpreadMatch[]>() 1828 + for (const match of matches) { 1829 + const existing = matchesBySpread.get(match.spread) ?? [] 1830 + existing.push(match) 1831 + matchesBySpread.set(match.spread, existing) 1832 + } 1833 + 1834 + // For now, handle the simple case: all matches on the same spread 1835 + // TODO: Handle cross-spread intersections 1836 + if (matchesBySpread.size !== 1) { 1837 + console.warn('Cross-spread intersections not yet supported, falling back to regular planning') 1838 + return null 1839 + } 1840 + 1841 + const [spreadName, spreadMatches] = [...matchesBySpread.entries()][0] 1842 + 1843 + // Find the best index that covers the spread fields 1844 + // We need an index on the index store (e.g., entries_idx:tags) that has 1845 + // keys matching the fields in the SpreadMatch.where object 1846 + let bestIndex: { 1847 + group: IndexStoreGroup<S> 1848 + index: {name: string; keys: readonly string[]} 1849 + matchedKeys: string[] 1850 + } | null = null 1851 + 1852 + for (const group of this.#storeDef.indexStoreGroups) { 1853 + // Check if this group handles the spread field 1854 + // The group's allKeys should include spread keys like '...tags.tag', '...tags.value' 1855 + const spreadPrefix = `...${spreadName}.` 1856 + const groupHandlesSpread = [...group.allKeys].some((k: string) => k.startsWith(spreadPrefix)) 1857 + if (!groupHandlesSpread) continue 1858 + 1859 + for (const idx of group.indexes) { 1860 + const idxKeys = idx.keys as string[] 1861 + 1862 + // Check how many of the SpreadMatch fields this index covers as a prefix 1863 + // Convert SpreadMatch.where keys to spread key format: tag -> ...tags.tag 1864 + const matchKeys = Object.keys(spreadMatches[0].where) 1865 + const spreadKeys = matchKeys.map((k) => `${spreadPrefix.slice(0, -1)}.${k}`) 1866 + 1867 + // Check if index keys start with these spread keys (in some order) 1868 + let matchedCount = 0 1869 + const matchedKeys: string[] = [] 1870 + for (const spreadKey of spreadKeys) { 1871 + if (idxKeys.includes(spreadKey)) { 1872 + matchedCount++ 1873 + matchedKeys.push(spreadKey) 1874 + } 1875 + } 1876 + 1877 + // Prefer index that matches more keys 1878 + if (matchedCount > 0 && (!bestIndex || matchedCount > bestIndex.matchedKeys.length)) { 1879 + bestIndex = {group, index: idx, matchedKeys} 1499 1880 } 1500 1881 } 1501 1882 } 1502 1883 1503 - if (!bestPair) { 1884 + if (!bestIndex) { 1885 + console.warn(`No index found for spread '${spreadName}' with required fields`) 1504 1886 return null 1505 1887 } 1506 1888 1507 - // Build the intersection plan 1508 - return this.#buildIntersectionPlan(bestPair.probe, bestPair.build, query) 1889 + const indexStore = bestIndex.group.storeName 1890 + const indexName = bestIndex.index.name 1891 + const indexKeys = bestIndex.index.keys as string[] 1892 + 1893 + // Build key ranges for each SpreadMatch 1894 + // For each match, we need to build a compound key from the where clause 1895 + const buildKeyRangeForMatch = (match: SpreadMatch): KeyRangeSpec => { 1896 + // Build compound key value in index key order 1897 + const keyValues: unknown[] = [] 1898 + for (const indexKey of indexKeys) { 1899 + // Convert index key (e.g., '...tags.tag') to match key (e.g., 'tag') 1900 + const spreadPrefix = `...${match.spread}.` 1901 + if (indexKey.startsWith(spreadPrefix)) { 1902 + const matchKey = indexKey.slice(spreadPrefix.length) 1903 + if (matchKey in match.where) { 1904 + keyValues.push(match.where[matchKey]) 1905 + } else { 1906 + break // Can't continue prefix if key is missing 1907 + } 1908 + } else { 1909 + break // Non-spread key in index, stop 1910 + } 1911 + } 1912 + 1913 + if (keyValues.length === 0) { 1914 + throw new Error(`SpreadMatch has no matchable keys for index ${indexName}`) 1915 + } 1916 + 1917 + return { 1918 + type: 'only', 1919 + lower: keyValues.length === 1 ? keyValues[0] : keyValues, 1920 + } 1921 + } 1922 + 1923 + // Build the probe side (first match) 1924 + const probeKeyRange = buildKeyRangeForMatch(spreadMatches[0]) 1925 + 1926 + // Get selectivity estimate 1927 + // For compound matches, selectivity is product of per-field selectivities 1928 + let perMatchSelectivity = 1.0 1929 + for (const key of bestIndex.matchedKeys) { 1930 + const cardinality = await this.#statsManager.getCardinality(indexStore, key) 1931 + if (cardinality > 0) { 1932 + perMatchSelectivity *= 1 / cardinality 1933 + } 1934 + } 1935 + 1936 + // Build intersection chain for remaining matches 1937 + let intersection: IntersectionPlan | undefined 1938 + let prevIntersection: IntersectionPlan | undefined 1939 + 1940 + for (let i = 1; i < spreadMatches.length; i++) { 1941 + const match = spreadMatches[i] 1942 + const keyRange = buildKeyRangeForMatch(match) 1943 + 1944 + // Convert SpreadMatch.where to WhereClause array for handledClauses 1945 + const handledClauses: WhereClause[] = Object.entries(match.where).map(([key, value]) => ({ 1946 + key: `...${match.spread}.${key}`, 1947 + op: '=' as const, 1948 + value, 1949 + })) 1950 + 1951 + const newIntersection: IntersectionPlan = { 1952 + source: indexStore, 1953 + indexName, 1954 + keyRange, 1955 + handledClauses, 1956 + selectivity: perMatchSelectivity, 1957 + docIdField: '_docId', 1958 + } 1959 + 1960 + if (i === 1) { 1961 + intersection = newIntersection 1962 + } else if (prevIntersection) { 1963 + prevIntersection.next = newIntersection 1964 + } 1965 + prevIntersection = newIntersection 1966 + } 1967 + 1968 + // Regular where clauses become post-filters 1969 + const filter = [...query.where] 1970 + 1971 + // Combined selectivity (assuming independence between matches) 1972 + const combinedSelectivity = Math.pow(perMatchSelectivity, spreadMatches.length) 1973 + 1974 + // Available fields from the index store 1975 + const availableFields = [...bestIndex.group.allKeys] 1976 + 1977 + // Fetch stats for row count estimate 1978 + const stats = await this.#statsManager.getStats(indexStore) 1979 + const totalRows = stats?.rowCount ?? 1000 1980 + 1981 + return { 1982 + source: indexStore, 1983 + indexName, 1984 + keyRange: probeKeyRange, 1985 + direction: 'next' as const, 1986 + filter, 1987 + needsSort: query.orderBy != null, 1988 + availableFields, 1989 + estimatedRows: Math.min(Math.ceil(combinedSelectivity * totalRows), totalRows), 1990 + selectivity: combinedSelectivity, 1991 + docIdField: '_docId', 1992 + intersection, 1993 + } 1509 1994 } 1510 1995 1511 1996 /** 1512 - * Count how many clause indices overlap between two sets. 1997 + * Evaluate whether index intersection would be beneficial. 1998 + * Returns an intersection plan if multiple indexes can be combined effectively, 1999 + * or null if intersection isn't worthwhile. 2000 + * 2001 + * Supports N-way intersections by chaining via intersection.next. 2002 + * 2003 + * Intersection is beneficial when: 2004 + * 1. We have at least 2 candidates covering different clauses 2005 + * 2. Each candidate has good selectivity (< 50%) 2006 + * 3. Combined selectivity is significantly better than single best 2007 + * 4. All sides can identify documents by ID 1513 2008 */ 1514 - #clauseOverlap(a: Set<number>, b: Set<number>): number { 1515 - let count = 0 1516 - for (const idx of a) { 1517 - if (b.has(idx)) count++ 2009 + async #evaluateIntersection(candidates: IndexCandidate[], query: QuerySpec): Promise<QueryPlan | null> { 2010 + // Need at least 2 candidates with handled clauses 2011 + const usableCandidates = candidates.filter((c) => c.handledClauses.size > 0 && c.selectivity < 0.5) 2012 + if (usableCandidates.length < 2) { 2013 + return null 2014 + } 2015 + 2016 + // Sort by selectivity (best/lowest first) for greedy selection 2017 + const sorted = [...usableCandidates].sort((a, b) => a.selectivity - b.selectivity) 2018 + 2019 + // Greedily select candidates that handle new clauses 2020 + const chosen: IndexCandidate[] = [] 2021 + const handledClauses = new Set<number>() 2022 + 2023 + for (const candidate of sorted) { 2024 + // Check if this candidate handles any new clauses 2025 + const newClauses = [...candidate.handledClauses].filter((i) => !handledClauses.has(i)) 2026 + if (newClauses.length > 0) { 2027 + chosen.push(candidate) 2028 + for (const i of candidate.handledClauses) { 2029 + handledClauses.add(i) 2030 + } 2031 + } 2032 + } 2033 + 2034 + if (chosen.length < 2) { 2035 + return null 1518 2036 } 1519 - return count 2037 + 2038 + // Build the N-way intersection plan 2039 + return this.#buildNWayIntersectionPlan(chosen, query) 1520 2040 } 1521 2041 1522 2042 /** 1523 - * Build a query plan that uses index intersection. 2043 + * Build a query plan that uses N-way index intersection. 2044 + * chosen[0] is the probe, chosen[1..N] are chained via intersection.next 2045 + * Fetches stats for the probe source to get accurate row counts. 1524 2046 */ 1525 - #buildIntersectionPlan(probe: IndexCandidate, build: IndexCandidate, query: QuerySpec): QueryPlan { 1526 - // Build key ranges for both sides 2047 + async #buildNWayIntersectionPlan(chosen: IndexCandidate[], query: QuerySpec): Promise<QueryPlan> { 2048 + const probe = chosen[0] 1527 2049 const probeKeyRange = this.#buildKeyRange(probe, query) 1528 - const buildKeyRange = this.#buildKeyRange(build, query) 2050 + const probeDocIdField = probe.source === 'primary' ? 'id' : '_docId' 1529 2051 1530 - // Collect all handled clauses from both sides 1531 - const allHandled = new Set<number>([...probe.handledClauses, ...build.handledClauses]) 2052 + // Collect all handled clauses from all chosen candidates 2053 + const allHandled = new Set<number>() 2054 + for (const c of chosen) { 2055 + for (const i of c.handledClauses) { 2056 + allHandled.add(i) 2057 + } 2058 + } 1532 2059 const filter = query.where.filter((_, i) => !allHandled.has(i)) 1533 2060 1534 - // Determine doc ID field for each side 1535 - const probeDocIdField = probe.source === 'primary' ? 'id' : '_docId' 1536 - const buildDocIdField = build.source === 'primary' ? 'id' : '_docId' 1537 - 1538 2061 // Direction based on orderBy (probe side determines this) 1539 2062 let direction: IDBCursorDirection = 'next' 1540 2063 if (query.orderBy) { ··· 1543 2066 } 1544 2067 } 1545 2068 1546 - // Combined selectivity 1547 - const combinedSelectivity = probe.selectivity * build.selectivity 2069 + // Combined selectivity is multiplicative 2070 + let combinedSelectivity = 1 2071 + for (const c of chosen) { 2072 + combinedSelectivity *= c.selectivity 2073 + } 1548 2074 1549 - // Available fields come from final fetch (will need primary lookup if either is index store) 1550 - const availableFields = 1551 - probe.source === 'primary' && build.source === 'primary' 1552 - ? Object.keys(this.#storeDef.schema) 1553 - : Object.keys(this.#storeDef.schema) // Always fetch from primary after intersection 2075 + // Available fields - always fetch from primary after intersection 2076 + const availableFields = Object.keys(this.#storeDef.schema) 2077 + 2078 + // Build the intersection chain for chosen[1..N] 2079 + let intersection: IntersectionPlan | undefined 2080 + let prevIntersection: IntersectionPlan | undefined 2081 + 2082 + for (let i = 1; i < chosen.length; i++) { 2083 + const candidate = chosen[i] 2084 + const keyRange = this.#buildKeyRange(candidate, query) 2085 + const docIdField = candidate.source === 'primary' ? 'id' : '_docId' 2086 + const handledClauses = query.where.filter((_, idx) => candidate.handledClauses.has(idx)) 2087 + 2088 + const newIntersection: IntersectionPlan = { 2089 + source: candidate.source, 2090 + indexName: candidate.indexName, 2091 + keyRange, 2092 + handledClauses, 2093 + selectivity: candidate.selectivity, 2094 + docIdField, 2095 + } 2096 + 2097 + if (i === 1) { 2098 + intersection = newIntersection 2099 + } else if (prevIntersection) { 2100 + prevIntersection.next = newIntersection 2101 + } 2102 + prevIntersection = newIntersection 2103 + } 1554 2104 1555 - // Get the clauses handled by build side for the intersection plan 1556 - const buildHandledClauses = query.where.filter((_, i) => build.handledClauses.has(i)) 2105 + // Fetch stats for the probe's source (primary or index store) 2106 + const storeId = probe.source === 'primary' ? this.#storeDef.name : probe.source 2107 + const stats = await this.#statsManager.getStats(storeId) 2108 + const totalRows = stats?.rowCount ?? 1000 1557 2109 1558 2110 return { 1559 2111 source: probe.source, ··· 1563 2115 filter, 1564 2116 needsSort: query.orderBy != null && !probe.coversOrderBy, 1565 2117 availableFields, 1566 - estimatedRows: Math.ceil(combinedSelectivity * 1000), 2118 + estimatedRows: Math.min(Math.ceil(combinedSelectivity * totalRows), totalRows), 1567 2119 selectivity: combinedSelectivity, 1568 2120 docIdField: probeDocIdField, 1569 - intersection: { 1570 - source: build.source, 1571 - indexName: build.indexName, 1572 - keyRange: buildKeyRange, 1573 - handledClauses: buildHandledClauses, 1574 - selectivity: build.selectivity, 1575 - docIdField: buildDocIdField, 1576 - }, 2121 + intersection, 1577 2122 } 1578 2123 } 1579 2124 } 1580 2125 1581 2126 // ============================================================================= 2127 + // Batched cursor iteration (reduces IDB round-trips by ~1000x) 2128 + // ============================================================================= 2129 + 2130 + /** 2131 + * Default page size for batched iteration. 2132 + * Can be overridden per-query via ExecutionOptions. 2133 + */ 2134 + const DEFAULT_PAGE_SIZE = 1000 2135 + 2136 + /** 2137 + * Batched row iteration using getAll(). 2138 + * 2139 + * NOTE: For IDBIndex sources, we fetch all rows in one call because pagination 2140 + * doesn't work correctly - getAll() uses index key ranges but we only have the 2141 + * primary key to continue from, creating a key space mismatch. 2142 + * 2143 + * Single getAll() is still much faster than cursor.continue() iteration 2144 + * (~5-6x based on benchmarks). 2145 + * 2146 + * @param source - IDBObjectStore or IDBIndex to iterate 2147 + * @param keyRange - Optional key range filter 2148 + * @param direction - 'next' or 'prev' 2149 + */ 2150 + async function* iterateRows<T = Record<string, unknown>>( 2151 + source: {getAll: (query?: IDBKeyRange | null, count?: number) => Promise<T[]>}, 2152 + keyRange: IDBKeyRange | null, 2153 + direction: 'next' | 'prev' = 'next', 2154 + _pageSize: number = DEFAULT_PAGE_SIZE, 2155 + _getKey: (row: T) => IDBValidKey = (row) => (row as Record<string, unknown>)['id'] as IDBValidKey, 2156 + ): AsyncGenerator<T> { 2157 + // Fetch all rows in one call - pagination doesn't work for indexes (key space mismatch) 2158 + const allRows = await source.getAll(keyRange ?? undefined) 2159 + 2160 + if (direction === 'prev') { 2161 + // Reverse iteration 2162 + for (let i = allRows.length - 1; i >= 0; i--) { 2163 + yield allRows[i] 2164 + } 2165 + } else { 2166 + // Forward iteration 2167 + for (const row of allRows) { 2168 + yield row 2169 + } 2170 + } 2171 + } 2172 + 2173 + /** 2174 + * Collect all keys from a source into a Set, extracting docId from compound keys. 2175 + * 2176 + * NOTE: For IDBIndex sources, we fetch all keys in one call because pagination 2177 + * doesn't work correctly - getAllKeys() returns primary keys but keyRange filters 2178 + * on index keys, so we can't build pagination ranges from returned keys. 2179 + * 2180 + * For IDBObjectStore sources, we could paginate (primary keys match), but for 2181 + * simplicity we use single fetch here too since counts are usually reasonable. 2182 + */ 2183 + async function collectDocIds( 2184 + source: {getAllKeys: (query?: IDBKeyRange | null, count?: number) => Promise<IDBValidKey[]>}, 2185 + keyRange: IDBKeyRange | null, 2186 + isCompoundKey: boolean, 2187 + _pageSize: number = DEFAULT_PAGE_SIZE, 2188 + ): Promise<Set<IDBValidKey>> { 2189 + const docIds = new Set<IDBValidKey>() 2190 + 2191 + // Single fetch - pagination doesn't work for indexes (key space mismatch) 2192 + const allKeys = await source.getAllKeys(keyRange ?? undefined) 2193 + 2194 + for (const pk of allKeys) { 2195 + const docId = isCompoundKey ? (pk as IDBValidKey[])[0] : pk 2196 + docIds.add(docId) 2197 + } 2198 + 2199 + return docIds 2200 + } 2201 + 2202 + // ============================================================================= 1582 2203 // Query execution operators 1583 2204 // ============================================================================= 1584 2205 ··· 1595 2216 * When true, logs timing and row count information to console.debug. 1596 2217 */ 1597 2218 analyze?: boolean 2219 + 2220 + /** 2221 + * Page size for batched iteration. Larger values = fewer IDB round-trips 2222 + * but more memory usage per batch. Default: 1000. 2223 + * 2224 + * For stores with large row sizes, consider reducing this. 2225 + */ 2226 + pageSize?: number 1598 2227 } 1599 2228 1600 2229 const ANALYZE_PREFIX = '[docstore:analyze]' ··· 1602 2231 /** 1603 2232 * Execute a query plan and yield index rows. 1604 2233 * Rows contain only the fields available in the index (keys + replicated). 2234 + * 2235 + * Supports cursor hopping for 'in' queries: efficiently scans multiple 2236 + * discrete values by jumping between them rather than scanning the full range. 2237 + * 2238 + * Uses batched iteration (getAll with pagination) to reduce IDB round-trips 2239 + * by ~1000x compared to cursor.continue() loops. 2240 + * 2241 + * This is the low-level implementation. Use rows() for the smart dispatcher 2242 + * that handles intersections and sorting automatically. 1605 2243 */ 1606 - export async function* rows( 2244 + async function* rowsDirect( 1607 2245 ctx: ExecutionContext, 1608 2246 plan: QueryPlan, 1609 2247 options: ExecutionOptions = {}, 1610 2248 ): AsyncGenerator<Record<string, unknown>> { 1611 - const {analyze = false} = options 2249 + const {analyze = false, pageSize = DEFAULT_PAGE_SIZE} = options 1612 2250 const startTime = analyze ? performance.now() : 0 1613 2251 let scanned = 0 1614 2252 let matched = 0 1615 2253 1616 2254 if (analyze) { 2255 + const inInfo = plan.inValues ? ` (IN ${plan.inValues.length} values)` : '' 1617 2256 console.debug( 1618 - `${ANALYZE_PREFIX} rows() started - index: ${plan.indexName ?? '<full scan>'}`, 1619 - plan 2257 + `${ANALYZE_PREFIX} rows() started - index: ${plan.indexName ?? '<full scan>'}${inInfo}, pageSize: ${pageSize}`, 2258 + plan, 1620 2259 ) 1621 2260 } 1622 2261 ··· 1624 2263 const tx = ctx.db.transaction(storeName, 'readonly') 1625 2264 const store = tx.objectStore(storeName) 1626 2265 1627 - // Open cursor with key range and direction 1628 - const keyRange = plan.keyRange ? toIDBKeyRange(plan.keyRange) : null 2266 + // Fast path: direct get() for primary key lookup (no index, 'only' key range) 2267 + if (!plan.indexName && plan.keyRange?.type === 'only' && !plan.inValues) { 2268 + const row = (await store.get(plan.keyRange.lower as IDBValidKey)) as Record<string, unknown> | undefined 2269 + if (row && (plan.filter.length === 0 || matchesFilters(row, plan.filter, analyze))) { 2270 + if (analyze) { 2271 + const totalTime = performance.now() - startTime 2272 + console.debug(`${ANALYZE_PREFIX} rows() complete (direct get) - time: ${totalTime.toFixed(2)}ms`) 2273 + } 2274 + yield row 2275 + } 2276 + return 2277 + } 2278 + 2279 + // Handle 'in' queries with batched iteration per value 2280 + if (plan.inValues && plan.inValues.length > 0) { 2281 + const indexOrStore = plan.indexName ? store.index(plan.indexName) : store 2282 + 2283 + for (const value of plan.inValues) { 2284 + const fullKey = plan.inPrefix ? [...plan.inPrefix, value] : value 2285 + const keyRange = IDBKeyRange.only(fullKey) 1629 2286 1630 - const cursorOpenStart = analyze ? performance.now() : 0 2287 + // Use batched row iteration instead of cursor 2288 + for await (const row of iterateRows( 2289 + indexOrStore, 2290 + keyRange, 2291 + plan.direction, 2292 + pageSize, 2293 + (r) => (r as Record<string, unknown>)[ctx.storeDef.primaryKey] as IDBValidKey, 2294 + )) { 2295 + scanned++ 2296 + const rowObj = row as Record<string, unknown> 1631 2297 1632 - // Get cursor - use index if specified, otherwise scan store 1633 - let cursor = plan.indexName 1634 - ? await store.index(plan.indexName).openCursor(keyRange, plan.direction) 1635 - : await store.openCursor(keyRange, plan.direction) 2298 + if (plan.filter.length === 0 || matchesFilters(rowObj, plan.filter, analyze)) { 2299 + matched++ 2300 + yield rowObj 2301 + } 2302 + } 2303 + } 1636 2304 1637 - if (analyze) { 1638 - const cursorOpenTime = performance.now() - cursorOpenStart 1639 - console.debug(`${ANALYZE_PREFIX} cursor opened in ${cursorOpenTime.toFixed(2)}ms`) 2305 + if (analyze) { 2306 + const totalTime = performance.now() - startTime 2307 + console.debug( 2308 + `${ANALYZE_PREFIX} rows() complete (batched in) - values: ${plan.inValues.length}, scanned: ${scanned}, matched: ${matched}, time: ${totalTime.toFixed(2)}ms`, 2309 + ) 2310 + } 2311 + return 1640 2312 } 1641 2313 1642 - // Iterate cursor using idb's promise-based API 1643 - while (cursor) { 2314 + // Standard batched iteration 2315 + const keyRange = plan.keyRange ? toIDBKeyRange(plan.keyRange) : null 2316 + const indexOrStore = plan.indexName ? store.index(plan.indexName) : store 2317 + 2318 + // Use batched row iteration instead of cursor 2319 + for await (const row of iterateRows( 2320 + indexOrStore, 2321 + keyRange, 2322 + plan.direction, 2323 + pageSize, 2324 + (r) => (r as Record<string, unknown>)[ctx.storeDef.primaryKey] as IDBValidKey, 2325 + )) { 1644 2326 scanned++ 1645 - const row = cursor.value as Record<string, unknown> 2327 + const rowObj = row as Record<string, unknown> 1646 2328 1647 2329 // Apply post-fetch filters 1648 - if (plan.filter.length === 0 || matchesFilters(row, plan.filter, analyze)) { 2330 + if (plan.filter.length === 0 || matchesFilters(rowObj, plan.filter, analyze)) { 1649 2331 matched++ 1650 - yield row 2332 + yield rowObj 1651 2333 } 2334 + } 1652 2335 1653 - cursor = await cursor.continue() 2336 + if (analyze) { 2337 + const totalTime = performance.now() - startTime 2338 + console.debug( 2339 + `${ANALYZE_PREFIX} rows() complete (batched) - scanned: ${scanned}, matched: ${matched}, time: ${totalTime.toFixed(2)}ms`, 2340 + ) 2341 + } 2342 + } 2343 + 2344 + /** 2345 + * Count rows matching a query plan. 2346 + * 2347 + * Fast path: Uses IDB's native count() when possible (no post-filters, no intersection). 2348 + * This is a pure key scan with no value deserialization. 2349 + * 2350 + * Slow path: Falls back to cursor iteration when post-filters or intersection required. 2351 + */ 2352 + export async function count( 2353 + ctx: ExecutionContext, 2354 + plan: QueryPlan, 2355 + options: ExecutionOptions = {}, 2356 + ): Promise<number> { 2357 + const {analyze = false} = options 2358 + const startTime = analyze ? performance.now() : 0 2359 + 2360 + // Fast path: use native count() when no post-filters and no intersection 2361 + if (plan.filter.length === 0 && !plan.intersection && !plan.inValues) { 2362 + const storeName = plan.source === 'primary' ? ctx.storeDef.name : plan.source 2363 + const tx = ctx.db.transaction(storeName, 'readonly') 2364 + const store = tx.objectStore(storeName) 2365 + const keyRange = plan.keyRange ? toIDBKeyRange(plan.keyRange) : undefined 2366 + 2367 + const result = plan.indexName 2368 + ? await store.index(plan.indexName).count(keyRange) 2369 + : await store.count(keyRange) 2370 + 2371 + if (analyze) { 2372 + const totalTime = performance.now() - startTime 2373 + console.debug( 2374 + `${ANALYZE_PREFIX} count() complete (native) - count: ${result}, time: ${totalTime.toFixed(2)}ms`, 2375 + ) 2376 + } 2377 + 2378 + return result 2379 + } 2380 + 2381 + // Slow path: iterate and count 2382 + // For intersection queries with no post-filters, use optimized key-only counting 2383 + if (plan.intersection && plan.filter.length === 0) { 2384 + if (analyze) { 2385 + console.debug(`${ANALYZE_PREFIX} count() using key-only intersection`) 2386 + } 2387 + const result = await countIntersectDirect(ctx, plan, options) 2388 + if (analyze) { 2389 + const totalTime = performance.now() - startTime 2390 + console.debug( 2391 + `${ANALYZE_PREFIX} count() complete (key intersection) - count: ${result}, time: ${totalTime.toFixed(2)}ms`, 2392 + ) 2393 + } 2394 + return result 2395 + } 2396 + 2397 + // Fallback: iterate rows and count (needed when post-filters exist) 2398 + if (analyze) { 2399 + console.debug(`${ANALYZE_PREFIX} count() using cursor iteration (has filters)`) 2400 + } 2401 + 2402 + let result = 0 2403 + const rowGenerator = plan.intersection 2404 + ? rowsIntersectDirect(ctx, plan, options) 2405 + : rowsDirect(ctx, plan, options) 2406 + 2407 + for await (const _ of rowGenerator) { 2408 + result++ 1654 2409 } 1655 2410 1656 2411 if (analyze) { 1657 2412 const totalTime = performance.now() - startTime 1658 2413 console.debug( 1659 - `${ANALYZE_PREFIX} rows() complete - scanned: ${scanned}, matched: ${matched}, time: ${totalTime.toFixed(2)}ms`, 2414 + `${ANALYZE_PREFIX} count() complete (cursor) - count: ${result}, time: ${totalTime.toFixed(2)}ms`, 2415 + ) 2416 + } 2417 + 2418 + return result 2419 + } 2420 + 2421 + /** 2422 + * Count documents matching an intersection query using key-only iteration. 2423 + * This is faster than rowsIntersectDirect when we only need a count. 2424 + * 2425 + * Uses batched key iteration (getAllKeys with pagination) for efficiency. 2426 + */ 2427 + async function countIntersectDirect( 2428 + ctx: ExecutionContext, 2429 + plan: QueryPlan, 2430 + options: ExecutionOptions = {}, 2431 + ): Promise<number> { 2432 + const {analyze = false, pageSize = DEFAULT_PAGE_SIZE} = options 2433 + 2434 + if (!plan.intersection) { 2435 + throw new Error('countIntersectDirect requires an intersection plan') 2436 + } 2437 + 2438 + const intersection = plan.intersection 2439 + const probeStoreName = plan.source === 'primary' ? ctx.storeDef.name : plan.source 2440 + const buildStoreName = intersection.source === 'primary' ? ctx.storeDef.name : intersection.source 2441 + 2442 + // Collect all store names needed 2443 + const storeNamesSet = new Set([probeStoreName, buildStoreName]) 2444 + let nextIntersection = intersection.next 2445 + while (nextIntersection) { 2446 + const storeName = nextIntersection.source === 'primary' ? ctx.storeDef.name : nextIntersection.source 2447 + storeNamesSet.add(storeName) 2448 + nextIntersection = nextIntersection.next 2449 + } 2450 + 2451 + const tx = ctx.db.transaction(Array.from(storeNamesSet), 'readonly') 2452 + 2453 + // Phase 1: Collect doc IDs from probe side using batched key iteration 2454 + const probeScanStart = analyze ? performance.now() : 0 2455 + const probeStore = tx.objectStore(probeStoreName) 2456 + const probeKeyRange = plan.keyRange ? toIDBKeyRange(plan.keyRange) : null 2457 + const isProbeIndexStore = plan.source !== 'primary' 2458 + 2459 + const probeSource = plan.indexName ? probeStore.index(plan.indexName) : probeStore 2460 + const probeDocIds = await collectDocIds(probeSource, probeKeyRange, isProbeIndexStore, pageSize) 2461 + 2462 + if (analyze) { 2463 + const probeScanTime = performance.now() - probeScanStart 2464 + console.debug( 2465 + `${ANALYZE_PREFIX} count intersection probe (batched keys) - count: ${probeDocIds.size}, time: ${probeScanTime.toFixed(2)}ms`, 2466 + ) 2467 + } 2468 + 2469 + // Phase 2: Filter through each intersection side 2470 + let currentIntersection: IntersectionPlan | undefined = intersection 2471 + let intersectionIndex = 1 2472 + let candidateDocIds = probeDocIds 2473 + 2474 + const filterCtx: IntersectionFilterContext = { 2475 + tx, 2476 + primaryStoreName: ctx.storeDef.name, 2477 + analyze, 2478 + pageSize, 2479 + } 2480 + 2481 + while (currentIntersection) { 2482 + candidateDocIds = await filterByIntersection( 2483 + filterCtx, 2484 + currentIntersection, 2485 + candidateDocIds, 2486 + intersectionIndex, 2487 + ) 2488 + currentIntersection = currentIntersection.next 2489 + intersectionIndex++ 2490 + } 2491 + 2492 + // Result is just the size of the final candidate set 2493 + return candidateDocIds.size 2494 + } 2495 + 2496 + // Threshold for switching from scan to point lookups in intersections 2497 + // Point lookup cost varies but is typically 0.5-2ms, cursor step is ~0.04ms 2498 + // Switch to point lookups when candidates < buildRows/10 2499 + const POINT_LOOKUP_THRESHOLD = 10 2500 + 2501 + interface IntersectionFilterContext { 2502 + tx: IDBPTransaction<unknown, string[], 'readonly'> 2503 + primaryStoreName: string 2504 + analyze: boolean 2505 + pageSize: number 2506 + } 2507 + 2508 + /** 2509 + * Filter candidate doc IDs through an intersection plan. 2510 + * Chooses between scan and point-lookup strategies based on estimated costs. 2511 + * 2512 + * Uses batched key iteration (getAllKeys with pagination) for scan strategy. 2513 + * 2514 + * @returns Set of doc IDs that exist in the intersection 2515 + */ 2516 + async function filterByIntersection( 2517 + ctx: IntersectionFilterContext, 2518 + intersection: IntersectionPlan, 2519 + candidateDocIds: Set<IDBValidKey>, 2520 + intersectionIndex: number, 2521 + ): Promise<Set<IDBValidKey>> { 2522 + const {tx, primaryStoreName, analyze} = ctx 2523 + const buildScanStart = analyze ? performance.now() : 0 2524 + 2525 + const curBuildStoreName = intersection.source === 'primary' ? primaryStoreName : intersection.source 2526 + const buildStore = tx.objectStore(curBuildStoreName) 2527 + const buildKeyRange = intersection.keyRange ? toIDBKeyRange(intersection.keyRange) : null 2528 + const isIndexStore = intersection.source !== 'primary' 2529 + 2530 + // Estimate build side size to decide strategy 2531 + const totalRows = await buildStore.count() 2532 + const estimatedBuildRows = Math.max(1, Math.floor(totalRows * intersection.selectivity)) 2533 + 2534 + // Choose strategy: point lookups vs full scan 2535 + // Point lookups only work for primary store (we can fetch doc by ID and check fields) 2536 + const usePointLookups = 2537 + intersection.source === 'primary' && candidateDocIds.size < estimatedBuildRows / POINT_LOOKUP_THRESHOLD 2538 + 2539 + if (analyze) { 2540 + console.debug( 2541 + `${ANALYZE_PREFIX} intersection ${intersectionIndex} strategy: ${usePointLookups ? 'point-lookups' : 'scan'} (candidates: ${candidateDocIds.size}, est. build rows: ${estimatedBuildRows})`, 1660 2542 ) 1661 2543 } 1662 2544 1663 - // Handle sorting if needed (requires buffering all results) 1664 - // Note: for needsSort=true, caller should use rowsSorted() instead 2545 + const buildDocIds = new Set<IDBValidKey>() 2546 + 2547 + if (usePointLookups) { 2548 + // Point lookup strategy: fetch each candidate doc and check the filter 2549 + // This is faster when candidates << build side rows 2550 + const primaryStore = tx.objectStore(primaryStoreName) 2551 + const handledClauses = intersection.handledClauses 2552 + 2553 + for (const docId of candidateDocIds) { 2554 + const doc = (await primaryStore.get(docId)) as Record<string, unknown> | undefined 2555 + if (doc) { 2556 + // Check if doc matches all the clauses this intersection handles 2557 + const matches = handledClauses.every((clause) => matchesOp(doc[clause.key], clause.op, clause.value)) 2558 + if (matches) { 2559 + buildDocIds.add(docId) 2560 + } 2561 + } 2562 + } 2563 + } else { 2564 + // Scan strategy: fetch all keys in one call 2565 + // NOTE: We can't use pagination with indexes because getAllKeys() returns 2566 + // primary keys but keyRange filters on index keys - key space mismatch. 2567 + // Single fetch is still much faster than cursor.continue() loop. 2568 + const buildSource = intersection.indexName ? buildStore.index(intersection.indexName) : buildStore 2569 + const allPrimaryKeys = await buildSource.getAllKeys(buildKeyRange ?? undefined) 2570 + 2571 + for (const pk of allPrimaryKeys) { 2572 + // Extract docId from primaryKey 2573 + // - Index stores: primaryKey is [_docId, _rowId], so primaryKey[0] = docId 2574 + // - Primary store: primaryKey IS the docId directly 2575 + const docId = isIndexStore ? (pk as IDBValidKey[])[0] : pk 2576 + // Only add if it's still a candidate (optimization for chained intersections) 2577 + if (candidateDocIds.has(docId)) { 2578 + buildDocIds.add(docId) 2579 + } 2580 + } 2581 + } 2582 + 2583 + if (analyze) { 2584 + const buildScanTime = performance.now() - buildScanStart 2585 + console.debug( 2586 + `${ANALYZE_PREFIX} intersection ${intersectionIndex} complete - source: ${intersection.source}/${intersection.indexName}, strategy: ${usePointLookups ? 'point-lookups' : 'scan'}, count: ${buildDocIds.size}, time: ${buildScanTime.toFixed(2)}ms`, 2587 + ) 2588 + } 2589 + 2590 + return buildDocIds 1665 2591 } 1666 2592 1667 2593 /** ··· 1672 2598 * This is efficient when: 1673 2599 * - Both indexes have good selectivity 1674 2600 * - The intersection of results is much smaller than either side alone 2601 + * 2602 + * This is the low-level implementation. Use rows() for the smart dispatcher 2603 + * that handles intersections and sorting automatically. 1675 2604 */ 1676 - export async function* rowsIntersect( 2605 + async function* rowsIntersectDirect( 1677 2606 ctx: ExecutionContext, 1678 2607 plan: QueryPlan, 1679 2608 options: ExecutionOptions = {}, 1680 2609 ): AsyncGenerator<Record<string, unknown>> { 1681 - const {analyze = false} = options 2610 + const {analyze = false, pageSize = DEFAULT_PAGE_SIZE} = options 1682 2611 1683 2612 if (!plan.intersection) { 1684 2613 // No intersection, fall back to regular rows 1685 - yield* rows(ctx, plan, options) 2614 + yield* rowsDirect(ctx, plan, options) 1686 2615 return 1687 2616 } 1688 2617 ··· 1696 2625 1697 2626 const intersection = plan.intersection 1698 2627 const probeDocIdField = plan.docIdField ?? (plan.source === 'primary' ? 'id' : '_docId') 1699 - const buildDocIdField = intersection.docIdField 1700 - 1701 - // Strategy: scan probe side, collect doc IDs, look up in build side 1702 - // For efficiency, we batch the lookups 1703 2628 1704 2629 const probeStoreName = plan.source === 'primary' ? ctx.storeDef.name : plan.source 1705 2630 const buildStoreName = intersection.source === 'primary' ? ctx.storeDef.name : intersection.source 1706 2631 1707 - // If both are on the same store, we can use a single transaction 1708 - const storeNames = probeStoreName === buildStoreName ? [probeStoreName] : [probeStoreName, buildStoreName] 2632 + // Collect all store names needed for the transaction (probe + all intersection sides) 2633 + const storeNamesSet = new Set([probeStoreName, buildStoreName]) 2634 + let nextIntersection = intersection.next 2635 + while (nextIntersection) { 2636 + const storeName = nextIntersection.source === 'primary' ? ctx.storeDef.name : nextIntersection.source 2637 + storeNamesSet.add(storeName) 2638 + nextIntersection = nextIntersection.next 2639 + } 2640 + const storeNames = Array.from(storeNamesSet) 1709 2641 1710 2642 const tx = ctx.db.transaction(storeNames, 'readonly') 1711 2643 1712 - // Phase 1: Collect doc IDs from probe side (smaller set) 2644 + // Phase 1: Collect doc IDs from probe side using batched key iteration 1713 2645 const probeScanStart = analyze ? performance.now() : 0 1714 2646 const probeStore = tx.objectStore(probeStoreName) 1715 2647 const probeKeyRange = plan.keyRange ? toIDBKeyRange(plan.keyRange) : null 2648 + const isProbeIndexStore = plan.source !== 'primary' 1716 2649 1717 - let probeCursor = plan.indexName 1718 - ? await probeStore.index(plan.indexName).openCursor(probeKeyRange, plan.direction) 1719 - : await probeStore.openCursor(probeKeyRange, plan.direction) 1720 - 1721 - // Collect probe doc IDs and their rows 1722 - const probeResults: Map<IDBValidKey, Record<string, unknown>> = new Map() 1723 - 1724 - while (probeCursor) { 1725 - const row = probeCursor.value as Record<string, unknown> 1726 - const docId = row[probeDocIdField] as IDBValidKey 1727 - 1728 - // Only include if it passes probe-side filters (not covered by index) 1729 - // Note: plan.filter contains clauses not handled by either side, 1730 - // but intersection.handledClauses are checked via the build side lookup 1731 - probeResults.set(docId, row) 1732 - 1733 - probeCursor = await probeCursor.continue() 1734 - } 1735 - 1736 - const probeScanTime = analyze ? performance.now() - probeScanStart : 0 2650 + const probeSource = plan.indexName ? probeStore.index(plan.indexName) : probeStore 2651 + const probeDocIds = await collectDocIds(probeSource, probeKeyRange, isProbeIndexStore, pageSize) 1737 2652 1738 2653 if (analyze) { 2654 + const probeScanTime = performance.now() - probeScanStart 1739 2655 console.debug( 1740 - `${ANALYZE_PREFIX} probe side scanned - count: ${probeResults.size}, time: ${probeScanTime.toFixed(2)}ms`, 2656 + `${ANALYZE_PREFIX} probe side scanned (batched keys) - count: ${probeDocIds.size}, time: ${probeScanTime.toFixed(2)}ms`, 1741 2657 ) 1742 2658 } 1743 2659 1744 - // Phase 2: Build a set of doc IDs from build side for intersection 1745 - const buildScanStart = analyze ? performance.now() : 0 1746 - const buildStore = tx.objectStore(buildStoreName) 1747 - const buildKeyRange = intersection.keyRange ? toIDBKeyRange(intersection.keyRange) : null 2660 + // Phase 2: Filter candidates through each intersection side 2661 + let currentIntersection: IntersectionPlan | undefined = intersection 2662 + let intersectionIndex = 1 2663 + let candidateDocIds = probeDocIds 1748 2664 1749 - let buildCursor = intersection.indexName 1750 - ? await buildStore.index(intersection.indexName).openCursor(buildKeyRange, 'next') 1751 - : await buildStore.openCursor(buildKeyRange, 'next') 2665 + const filterCtx: IntersectionFilterContext = { 2666 + tx, 2667 + primaryStoreName: ctx.storeDef.name, 2668 + analyze, 2669 + pageSize, 2670 + } 1752 2671 1753 - const buildDocIds = new Set<IDBValidKey>() 2672 + while (currentIntersection) { 2673 + candidateDocIds = await filterByIntersection( 2674 + filterCtx, 2675 + currentIntersection, 2676 + candidateDocIds, 2677 + intersectionIndex, 2678 + ) 2679 + currentIntersection = currentIntersection.next 2680 + intersectionIndex++ 2681 + } 1754 2682 1755 - while (buildCursor) { 1756 - const row = buildCursor.value as Record<string, unknown> 1757 - const docId = row[buildDocIdField] as IDBValidKey 1758 - buildDocIds.add(docId) 1759 - buildCursor = await buildCursor.continue() 1760 - } 2683 + // Phase 3: Fetch and yield rows for the final intersection result 2684 + // Strategy depends on result size vs probe size: 2685 + // - Small result set: point lookups from primary store 2686 + // - Large result set: re-scan probe with value cursor 2687 + const fetchStart = analyze ? performance.now() : 0 2688 + let intersectionCount = 0 1761 2689 1762 - const buildScanTime = analyze ? performance.now() - buildScanStart : 0 2690 + const resultSize = candidateDocIds.size 2691 + const probeSize = probeDocIds.size 2692 + const usePointLookups = resultSize < probeSize / 4 // Point lookups when < 25% match 1763 2693 1764 2694 if (analyze) { 1765 2695 console.debug( 1766 - `${ANALYZE_PREFIX} build side scanned - count: ${buildDocIds.size}, time: ${buildScanTime.toFixed(2)}ms`, 2696 + `${ANALYZE_PREFIX} fetch strategy: ${usePointLookups ? 'point-lookups' : 're-scan'} (result: ${resultSize}, probe: ${probeSize})`, 1767 2697 ) 1768 2698 } 1769 2699 1770 - // Phase 3: Yield rows that exist in both sets 1771 - let intersectionCount = 0 1772 - for (const [docId, row] of probeResults) { 1773 - if (buildDocIds.has(docId)) { 1774 - // Apply remaining filters (those not handled by either index) 1775 - if (plan.filter.length === 0 || matchesFilters(row, plan.filter, analyze)) { 1776 - intersectionCount++ 1777 - yield row 2700 + if (usePointLookups && plan.source !== 'primary') { 2701 + // Fetch matching docs from primary store by docId 2702 + // This is efficient when intersection filters out most probe rows 2703 + const primaryStore = tx.objectStore(ctx.storeDef.name) 2704 + 2705 + for (const docId of candidateDocIds) { 2706 + const doc = (await primaryStore.get(docId)) as Record<string, unknown> | undefined 2707 + if (doc) { 2708 + if (plan.filter.length === 0 || matchesFilters(doc, plan.filter, analyze)) { 2709 + intersectionCount++ 2710 + yield doc 2711 + } 2712 + } 2713 + } 2714 + } else { 2715 + // Re-scan probe side with batched row iteration 2716 + // Much faster than cursor.continue() loop - ~1000x fewer IDB round-trips 2717 + const probeSource = plan.indexName ? probeStore.index(plan.indexName) : probeStore 2718 + 2719 + for await (const row of iterateRows( 2720 + probeSource, 2721 + probeKeyRange, 2722 + plan.direction, 2723 + pageSize, 2724 + (r) => (r as Record<string, unknown>)[ctx.storeDef.primaryKey] as IDBValidKey, 2725 + )) { 2726 + const rowObj = row as Record<string, unknown> 2727 + const docId = rowObj[probeDocIdField] as IDBValidKey 2728 + 2729 + if (candidateDocIds.has(docId)) { 2730 + if (plan.filter.length === 0 || matchesFilters(rowObj, plan.filter, analyze)) { 2731 + intersectionCount++ 2732 + yield rowObj 2733 + } 2734 + // Remove from set to avoid yielding duplicates (same docId can have multiple index rows) 2735 + candidateDocIds.delete(docId) 1778 2736 } 1779 2737 } 1780 2738 } 1781 2739 1782 2740 if (analyze) { 2741 + const fetchTime = performance.now() - fetchStart 1783 2742 const totalTime = performance.now() - startTime 1784 2743 console.debug( 1785 - `${ANALYZE_PREFIX} rowsIntersect() complete - intersection: ${intersectionCount}, total time: ${totalTime.toFixed(2)}ms`, 2744 + `${ANALYZE_PREFIX} rowsIntersect() complete - intersection: ${intersectionCount}, fetch time: ${fetchTime.toFixed(2)}ms, total time: ${totalTime.toFixed(2)}ms`, 1786 2745 ) 1787 2746 } 1788 2747 } 1789 2748 1790 2749 /** 1791 2750 * Execute a query plan and yield index rows, sorted in memory. 1792 - * Use when plan.needsSort is true. 1793 - * Automatically uses intersection if the plan has one. 2751 + * 2752 + * This is the low-level implementation. Use rows() for the smart dispatcher 2753 + * that handles intersections and sorting automatically. 1794 2754 */ 1795 - export async function* rowsSorted( 2755 + async function* rowsSortedDirect( 1796 2756 ctx: ExecutionContext, 1797 2757 plan: QueryPlan, 1798 2758 orderBy: {key: string; direction: 'asc' | 'desc'}, ··· 1808 2768 // Collect all matching rows - use intersection if available 1809 2769 const collectStart = analyze ? performance.now() : 0 1810 2770 const allRows: Record<string, unknown>[] = [] 1811 - const rowGenerator = plan.intersection ? rowsIntersect(ctx, plan, options) : rows(ctx, plan, options) 2771 + const rowGenerator = plan.intersection 2772 + ? rowsIntersectDirect(ctx, plan, options) 2773 + : rowsDirect(ctx, plan, options) 1812 2774 for await (const row of rowGenerator) { 1813 2775 allRows.push(row) 1814 2776 } ··· 1846 2808 } 1847 2809 1848 2810 /** 2811 + * Execute a query plan and yield index rows. 2812 + * 2813 + * This is the main entry point for query execution. It automatically: 2814 + * - Uses intersection when the plan has one 2815 + * - Sorts results in memory when plan.needsSort is true 2816 + * - Falls back to direct cursor iteration for simple queries 2817 + */ 2818 + export async function* rows( 2819 + ctx: ExecutionContext, 2820 + plan: QueryPlan, 2821 + options: RowsOptions = {}, 2822 + ): AsyncGenerator<Record<string, unknown>> { 2823 + const {orderBy} = options 2824 + 2825 + // If sorting is needed, use the sorted implementation 2826 + if (plan.needsSort && orderBy) { 2827 + yield* rowsSortedDirect(ctx, plan, orderBy, options) 2828 + return 2829 + } 2830 + 2831 + // If intersection is needed, use the intersection implementation 2832 + if (plan.intersection) { 2833 + yield* rowsIntersectDirect(ctx, plan, options) 2834 + return 2835 + } 2836 + 2837 + // Simple case: direct cursor iteration 2838 + yield* rowsDirect(ctx, plan, options) 2839 + } 2840 + 2841 + /** 2842 + * Options for the rows() function. 2843 + */ 2844 + export interface RowsOptions extends ExecutionOptions { 2845 + /** Required when plan.needsSort is true */ 2846 + orderBy?: {key: string; direction: 'asc' | 'desc'} 2847 + } 2848 + 2849 + /** 1849 2850 * Options for the docs() function. 1850 2851 */ 1851 2852 export interface DocsOptions extends ExecutionOptions { ··· 1872 2873 console.debug(`${ANALYZE_PREFIX} docs() started - source: ${plan.source}, batchSize: ${batchSize}`) 1873 2874 } 1874 2875 1875 - // Choose the appropriate row generator 1876 - const rowGenerator = plan.intersection ? rowsIntersect(ctx, plan, options) : rows(ctx, plan, options) 2876 + // Use the smart rows() dispatcher which handles intersections and sorting 2877 + const rowGenerator = rows(ctx, plan, options) 1877 2878 1878 2879 // If already reading from primary and no intersection, just yield rows as docs 1879 2880 if (plan.source === 'primary' && !plan.intersection) { ··· 2004 3005 } 2005 3006 } 2006 3007 3008 + // eslint-disable-next-line @typescript-eslint/no-explicit-any 3009 + type AnyStoreDef = StoreDef<any> 3010 + 2007 3011 /** 2008 - * Install a docstore schema into IndexedDB. 2009 - * Creates the primary object store and all denormalized index stores. 3012 + * Install docstore schemas into IndexedDB. 3013 + * Creates the primary object stores and all denormalized index stores. 2010 3014 * 2011 3015 * @param dbName - Name for the IDB database 2012 - * @param storeDef - Store definition from docstore.define() 2013 - * @param version - Optional version number (defaults to 1) 3016 + * @param storeDefs - Store definition(s) from docstore.define() - can be single or array 3017 + * @param options - version number (defaults to 1) 2014 3018 * @returns Promise resolving to the opened database 2015 3019 */ 2016 - export async function install<S extends ShapeWithStorageRef>( 3020 + export async function install( 2017 3021 dbName: string, 2018 - storeDef: StoreDef<S>, 2019 - options: {version?: number; primaryKey?: string} = {}, 3022 + storeDefs: AnyStoreDef | AnyStoreDef[], 3023 + options: {version?: number} = {}, 2020 3024 ): Promise<IDBPDatabase> { 2021 - const {version = 1, primaryKey = 'id'} = options 3025 + const {version = 1} = options 3026 + 3027 + // Normalize to array 3028 + const stores = Array.isArray(storeDefs) ? storeDefs : [storeDefs] 3029 + 3030 + // Collect all store names needed by all storeDefs 3031 + const allStoreNames = new Set<string>() 3032 + for (const storeDef of stores) { 3033 + allStoreNames.add(storeDef.name) 3034 + for (const group of storeDef.indexStoreGroups) { 3035 + allStoreNames.add(group.storeName) 3036 + } 3037 + } 2022 3038 2023 3039 const db = await openDB(dbName, version, { 2024 3040 upgrade(db, oldVersion, _newVersion, _tx) { 2025 - // On version upgrade, delete and recreate all stores to handle schema changes 2026 - // In production, you'd want proper migrations, but for dev this is simpler 3041 + // On version upgrade, delete stores that we manage 2027 3042 if (oldVersion > 0) { 2028 - // Delete all existing stores 2029 3043 for (const name of db.objectStoreNames) { 2030 - db.deleteObjectStore(name) 3044 + if (allStoreNames.has(name)) { 3045 + db.deleteObjectStore(name) 3046 + } 2031 3047 } 2032 3048 } 2033 3049 2034 - // Create primary store 2035 - const primaryStore = db.createObjectStore(storeDef.name, {keyPath: primaryKey}) 3050 + // Create all stores for all storeDefs 3051 + for (const storeDef of stores) { 3052 + const primaryKey = storeDef.primaryKey 3053 + 3054 + // Create primary store if it doesn't exist 3055 + if (!db.objectStoreNames.contains(storeDef.name)) { 3056 + const primaryStore = db.createObjectStore(storeDef.name, {keyPath: primaryKey}) 2036 3057 2037 - // Create native indexes on primary store 2038 - for (const idx of storeDef.nativeIndexes) { 2039 - const keyPath = idx.keys.length === 1 ? idx.keys[0] : idx.keys 2040 - primaryStore.createIndex(idx.name, keyPath as string | string[], { 2041 - unique: false, 2042 - multiEntry: false, 2043 - }) 2044 - } 3058 + // Create native indexes on primary store 3059 + for (const idx of storeDef.nativeIndexes) { 3060 + const keyPath = idx.keys.length === 1 ? idx.keys[0] : idx.keys 3061 + primaryStore.createIndex(idx.name, keyPath as string | string[], { 3062 + unique: false, 3063 + multiEntry: false, 3064 + }) 3065 + } 3066 + } 2045 3067 2046 - // Create denormalized index stores 2047 - for (const group of storeDef.indexStoreGroups) { 2048 - // Index stores use auto-increment key since rows are denormalized 2049 - // We also store _docId to join back to primary 2050 - const indexStore = db.createObjectStore(group.storeName, { 2051 - keyPath: null, 2052 - autoIncrement: true, 2053 - }) 3068 + // Create denormalized index stores 3069 + for (const group of storeDef.indexStoreGroups) { 3070 + if (!db.objectStoreNames.contains(group.storeName)) { 3071 + // Index stores use compound key [_docId, _rowId] for: 3072 + // 1. Efficient key-only cursor scans (cursor.primaryKey[0] = docId) 3073 + // 2. Unique row identification within denormalized data 3074 + const indexStore = db.createObjectStore(group.storeName, { 3075 + keyPath: ['_docId', '_rowId'], 3076 + }) 2054 3077 2055 - // Create an index on _docId for efficient deletion on update 2056 - indexStore.createIndex('_docId', '_docId', {unique: false}) 3078 + // Create an index on _docId for efficient deletion on update 3079 + indexStore.createIndex('_docId', '_docId', {unique: false}) 2057 3080 2058 - // Create indexes for each index definition in this group 2059 - for (const idx of group.indexes) { 2060 - // Convert logical keys to storage-safe keys for IDB keyPath 2061 - const storageKeys = idx.keys.map((k) => toStorageKey(k as string)) 2062 - const keyPath = storageKeys.length === 1 ? storageKeys[0] : storageKeys 2063 - indexStore.createIndex(idx.name, keyPath, { 2064 - unique: false, 2065 - multiEntry: false, // We handle multi-entry via denormalization 2066 - }) 3081 + // Create indexes for each index definition in this group 3082 + for (const idx of group.indexes) { 3083 + // Convert logical keys to storage-safe keys for IDB keyPath 3084 + const storageKeys = idx.keys.map((k) => toStorageKey(k as string)) 3085 + const keyPath = storageKeys.length === 1 ? storageKeys[0] : storageKeys 3086 + indexStore.createIndex(idx.name, keyPath, { 3087 + unique: false, 3088 + multiEntry: false, // We handle multi-entry via denormalization 3089 + }) 3090 + } 3091 + } 2067 3092 } 2068 3093 } 2069 3094 2070 - // Create _stats store for query planning statistics 2071 - db.createObjectStore('_stats', {keyPath: 'id'}) 3095 + // Create _stats store for query planning statistics (shared across all stores) 3096 + if (!db.objectStoreNames.contains('_stats')) { 3097 + db.createObjectStore('_stats', {keyPath: 'id'}) 3098 + } 2072 3099 }, 2073 3100 }) 2074 3101 ··· 2088 3115 db: IDBPDatabase, 2089 3116 storeDef: StoreDef<S>, 2090 3117 doc: InferShapeWithStorageRef<S>, 3118 + statsManager?: StatsManager, 2091 3119 options: {primaryKey?: string} = {}, 2092 3120 ): Promise<void> { 2093 3121 const {primaryKey = 'id'} = options 2094 3122 const docId = (doc as Record<string, unknown>)[primaryKey] as IDBValidKey 2095 3123 2096 3124 // Get all store names we need to write to 2097 - const storeNames = [storeDef.name, ...storeDef.indexStoreGroups.map((g) => g.storeName)] 3125 + const storeNames = [ 3126 + storeDef.name, 3127 + ...storeDef.indexStoreGroups.map((g) => g.storeName), 3128 + ...(statsManager ? ['_stats'] : []), 3129 + ] 2098 3130 2099 3131 const tx = db.transaction(storeNames, 'readwrite') 2100 3132 ··· 2102 3134 const primaryStore = tx.objectStore(storeDef.name) 2103 3135 await primaryStore.put(doc as Record<string, unknown>) 2104 3136 3137 + // Collect stats data per store 3138 + const statsByStore = new Map<string, Record<string, unknown>[]>() 3139 + 3140 + if (statsManager) { 3141 + // Collect primary store stats - extract non-array keys from schema 3142 + const docRecord = doc as Record<string, unknown> 3143 + const primaryStats: Record<string, unknown> = {} 3144 + for (const key of Object.keys(storeDef.schema)) { 3145 + if (key in docRecord && !Array.isArray(docRecord[key])) { 3146 + primaryStats[key] = docRecord[key] 3147 + } 3148 + } 3149 + statsByStore.set(storeDef.name, [primaryStats]) 3150 + } 3151 + 2105 3152 // Delete old index rows and write new ones for each index store group 2106 3153 for (const group of storeDef.indexStoreGroups) { 2107 3154 const indexStore = tx.objectStore(group.storeName) 2108 3155 const docIdIndex = indexStore.index('_docId') 2109 3156 2110 - // Delete existing rows for this document 2111 - let cursor = await docIdIndex.openCursor(IDBKeyRange.only(docId)) 3157 + // Delete existing rows for this document using key cursor (no value deserialization) 3158 + let cursor = await docIdIndex.openKeyCursor(IDBKeyRange.only(docId)) 2112 3159 while (cursor) { 2113 - await cursor.delete() 3160 + await indexStore.delete(cursor.primaryKey) 2114 3161 cursor = await cursor.continue() 2115 3162 } 2116 3163 ··· 2118 3165 const result = ingest(storeDef, doc) 2119 3166 const rows = result.byStore.get(group.storeName) ?? [] 2120 3167 2121 - // Write new index rows 3168 + // Collect index store stats 3169 + const indexStatsRecords: Record<string, unknown>[] = [] 3170 + 3171 + // Write new index rows with unique _rowId for compound primary key 2122 3172 for (const row of rows) { 2123 - await indexStore.add({...row, _docId: docId}) 3173 + await indexStore.add({...row, _docId: docId, _rowId: crypto.randomUUID()}) 3174 + if (statsManager) { 3175 + // Convert storage keys back to logical keys for stats 3176 + indexStatsRecords.push(rowFromStorage(row, group.allKeys)) 3177 + } 3178 + } 3179 + 3180 + if (statsManager && indexStatsRecords.length > 0) { 3181 + statsByStore.set(group.storeName, indexStatsRecords) 3182 + } 3183 + } 3184 + 3185 + // Update stats per store in the same transaction 3186 + if (statsManager && statsByStore.size > 0) { 3187 + for (const [storeId, records] of statsByStore) { 3188 + await statsManager.recordWriteBatch( 3189 + storeId, 3190 + records, 3191 + tx as IDBPTransaction<unknown, string[], 'readwrite'>, 3192 + ) 3193 + } 3194 + } 3195 + 3196 + await tx.done 3197 + } 3198 + 3199 + /** 3200 + * Put multiple documents into the store in a single transaction. 3201 + * More efficient than multiple put() calls - one transaction, one stats update. 3202 + * 3203 + * @param db - Database instance from install() 3204 + * @param storeDef - Store definition 3205 + * @param docs - Documents to store 3206 + * @param statsManager - Stats manager to update (optional) 3207 + * @param options - Optional settings 3208 + */ 3209 + export async function putMany<S extends ShapeWithStorageRef>( 3210 + db: IDBPDatabase, 3211 + storeDef: StoreDef<S>, 3212 + docs: InferShapeWithStorageRef<S>[], 3213 + statsManager?: StatsManager, 3214 + options: {primaryKey?: string} = {}, 3215 + ): Promise<void> { 3216 + if (docs.length === 0) return 3217 + 3218 + const {primaryKey = 'id'} = options 3219 + 3220 + // Get all store names we need to write to 3221 + const storeNames = [ 3222 + storeDef.name, 3223 + ...storeDef.indexStoreGroups.map((g) => g.storeName), 3224 + ...(statsManager ? ['_stats'] : []), 3225 + ] 3226 + 3227 + const tx = db.transaction(storeNames, 'readwrite') 3228 + const primaryStore = tx.objectStore(storeDef.name) 3229 + 3230 + // Collect stats data per store 3231 + const statsByStore = new Map<string, Record<string, unknown>[]>() 3232 + 3233 + for (const doc of docs) { 3234 + const docId = (doc as Record<string, unknown>)[primaryKey] as IDBValidKey 3235 + 3236 + // Write to primary store 3237 + await primaryStore.put(doc as Record<string, unknown>) 3238 + 3239 + // Collect primary store stats - extract keys that exist in schema 3240 + if (statsManager) { 3241 + const docRecord = doc as Record<string, unknown> 3242 + const primaryStats: Record<string, unknown> = {} 3243 + for (const key of Object.keys(storeDef.schema)) { 3244 + if (key in docRecord && !Array.isArray(docRecord[key])) { 3245 + primaryStats[key] = docRecord[key] 3246 + } 3247 + } 3248 + const existing = statsByStore.get(storeDef.name) ?? [] 3249 + existing.push(primaryStats) 3250 + statsByStore.set(storeDef.name, existing) 3251 + } 3252 + 3253 + // Delete old index rows and write new ones for each index store group 3254 + for (const group of storeDef.indexStoreGroups) { 3255 + const indexStore = tx.objectStore(group.storeName) 3256 + const docIdIndex = indexStore.index('_docId') 3257 + 3258 + // Delete existing rows for this document using key cursor (no value deserialization) 3259 + let cursor = await docIdIndex.openKeyCursor(IDBKeyRange.only(docId)) 3260 + while (cursor) { 3261 + await indexStore.delete(cursor.primaryKey) 3262 + cursor = await cursor.continue() 3263 + } 3264 + 3265 + // Ingest document to get new index rows 3266 + const result = ingest(storeDef, doc) 3267 + const rows = result.byStore.get(group.storeName) ?? [] 3268 + 3269 + // Write new index rows with unique _rowId for compound primary key 3270 + for (const row of rows) { 3271 + await indexStore.add({...row, _docId: docId, _rowId: crypto.randomUUID()}) 3272 + if (statsManager) { 3273 + // Convert storage keys back to logical keys for stats, collect per index store 3274 + const existing = statsByStore.get(group.storeName) ?? [] 3275 + existing.push(rowFromStorage(row, group.allKeys)) 3276 + statsByStore.set(group.storeName, existing) 3277 + } 3278 + } 3279 + } 3280 + } 3281 + 3282 + // Update stats per store once at the end 3283 + if (statsManager && statsByStore.size > 0) { 3284 + for (const [storeId, records] of statsByStore) { 3285 + await statsManager.recordWriteBatch( 3286 + storeId, 3287 + records, 3288 + tx as IDBPTransaction<unknown, string[], 'readwrite'>, 3289 + ) 2124 3290 } 2125 3291 } 2126 3292 ··· 2153 3319 // Delete from primary store 2154 3320 await tx.objectStore(storeDef.name).delete(key) 2155 3321 2156 - // Delete from index stores 3322 + // Delete from index stores using key cursor (no value deserialization) 2157 3323 for (const group of storeDef.indexStoreGroups) { 2158 3324 const indexStore = tx.objectStore(group.storeName) 2159 3325 const docIdIndex = indexStore.index('_docId') 2160 3326 2161 - let cursor = await docIdIndex.openCursor(IDBKeyRange.only(key)) 3327 + let cursor = await docIdIndex.openKeyCursor(IDBKeyRange.only(key)) 2162 3328 while (cursor) { 2163 - await cursor.delete() 3329 + await indexStore.delete(cursor.primaryKey) 2164 3330 cursor = await cursor.continue() 2165 3331 } 2166 3332 } ··· 2168 3334 await tx.done 2169 3335 } 2170 3336 3337 + // ============================================================================= 3338 + // Index value aggregation (tag clouds, facets) 3339 + // ============================================================================= 3340 + 2171 3341 /** 2172 - * Check if a row matches all filter clauses. 2173 - * Handles both logical keys and storage keys, including spread keys for primary store rows. 3342 + * Result from collecting index values - a value and its count. 3343 + */ 3344 + export interface ValueCount<T = unknown> { 3345 + value: T 3346 + count: number 3347 + } 3348 + 3349 + /** 3350 + * Options for collectIndexValues. 3351 + */ 3352 + export interface CollectValuesOptions extends ExecutionOptions { 3353 + /** 3354 + * If true, count unique documents rather than index rows. 3355 + * For spread indexes, the same document may appear multiple times with different values. 3356 + * Default: false (count all rows) 3357 + */ 3358 + countDocuments?: boolean 3359 + 3360 + /** 3361 + * Maximum number of distinct values to return. 3362 + * Results are sorted by count descending, so this returns the top N. 3363 + * Default: no limit 3364 + */ 3365 + limit?: number 3366 + 3367 + /** 3368 + * Minimum count to include in results. 3369 + * Useful for filtering out rare values. 3370 + * Default: 1 (include all) 3371 + */ 3372 + minCount?: number 3373 + 3374 + /** 3375 + * Key range to filter the scan (optional). 3376 + * Applied to the first key of the index. 3377 + */ 3378 + keyRange?: KeyRangeSpec 3379 + } 3380 + 3381 + /** 3382 + * Collect distinct values from an index with their counts. 3383 + * Useful for building tag clouds, faceted search, or analyzing value distributions. 3384 + * 3385 + * Uses key-only cursors when possible (countDocuments=false) for ~10x faster iteration. 3386 + * The value is extracted directly from the index key, avoiding row deserialization. 3387 + * 3388 + * @param ctx - Execution context with db and storeDef 3389 + * @param indexName - Name of the index to scan (must be defined on the store) 3390 + * @param keyToCollect - The logical key to collect values for (e.g., '...tags.tag') 3391 + * @param options - Collection options 3392 + * @returns Array of {value, count} sorted by count descending 3393 + * 3394 + * @example 3395 + * // Get all tag names with counts (tag cloud) 3396 + * const tagCounts = await collectIndexValues(ctx, 'by_tag_only', '...tags.tag') 3397 + * // => [{value: 'category', count: 150}, {value: 'author', count: 45}, ...] 3398 + * 3399 + * @example 3400 + * // Get top 10 categories with at least 5 occurrences 3401 + * const topCategories = await collectIndexValues(ctx, 'by_tag', '...tags.value', { 3402 + * keyRange: {type: 'only', lower: 'category'}, // filter to tag='category' 3403 + * limit: 10, 3404 + * minCount: 5 3405 + * }) 2174 3406 */ 2175 - function matchesFilters(row: Record<string, unknown>, filters: WhereClause[], analyze: boolean): boolean { 2176 - const filterOpenTime = performance.now() 2177 - try { 2178 - for (const filter of filters) { 2179 - // Try storage key first (for index store rows), then logical key (for primary store) 2180 - const storageKey = toStorageKey(filter.key) 2181 - let value = row[storageKey] ?? row[filter.key] 3407 + export async function collectIndexValues<S extends ShapeWithStorageRef>( 3408 + ctx: ExecutionContext & {storeDef: StoreDef<S>}, 3409 + indexName: string, 3410 + keyToCollect: string, 3411 + options: CollectValuesOptions = {}, 3412 + ): Promise<ValueCount[]> { 3413 + const {countDocuments = false, limit, minCount = 1, keyRange, analyze = false} = options 3414 + const startTime = analyze ? performance.now() : 0 2182 3415 2183 - // If value is still undefined and this is a spread key, we need to check inside arrays 2184 - // e.g., for '...tags.tag' on a primary store row, check if any tag.tag matches 2185 - if (value === undefined && filter.key.startsWith('...')) { 2186 - const path = filter.key.slice(3).split('.') 2187 - // path[0] is the array field, rest is the path within each element 2188 - const arrayField = path[0] 2189 - const remainderPath = path.slice(1) 2190 - const array = row[arrayField] 3416 + // Find the index definition 3417 + const indexDef = ctx.storeDef.indexes.find((idx) => idx.name === indexName) 3418 + if (!indexDef) { 3419 + throw new Error(`Index '${indexName}' not found on store '${ctx.storeDef.name}'`) 3420 + } 2191 3421 2192 - if (Array.isArray(array)) { 2193 - // Check if ANY element in the array matches the filter 2194 - const anyMatch = array.some((element) => { 2195 - const elementValue = remainderPath.length > 0 ? getPath(element, remainderPath) : element 2196 - return matchesOp(elementValue, filter.op, filter.value) 2197 - }) 2198 - if (!anyMatch) { 2199 - return false 3422 + // Find which position in the index key corresponds to our target key 3423 + const keyIndex = indexDef.keys.findIndex((k) => k === keyToCollect) 3424 + if (keyIndex === -1) { 3425 + throw new Error( 3426 + `Key '${keyToCollect}' not found in index '${indexName}' keys: [${indexDef.keys.join(', ')}]`, 3427 + ) 3428 + } 3429 + 3430 + // Determine which store to scan 3431 + let storeName: string 3432 + if (indexDef.spreadRoots.size === 0) { 3433 + // Native index on primary store 3434 + storeName = ctx.storeDef.name 3435 + } else { 3436 + // Find the index store group 3437 + const group = ctx.storeDef.indexStoreGroups.find((g) => g.indexes.some((idx) => idx.name === indexName)) 3438 + if (!group) { 3439 + throw new Error(`Index store group not found for index '${indexName}'`) 3440 + } 3441 + storeName = group.storeName 3442 + } 3443 + 3444 + const isCompoundKey = indexDef.keys.length > 1 3445 + 3446 + if (analyze) { 3447 + console.debug( 3448 + `${ANALYZE_PREFIX} collectIndexValues() started - index: ${indexName}, key: ${keyToCollect}[${keyIndex}], store: ${storeName}, compound: ${isCompoundKey}, countDocs: ${countDocuments}`, 3449 + ) 3450 + } 3451 + 3452 + const tx = ctx.db.transaction(storeName, 'readonly') 3453 + const store = tx.objectStore(storeName) 3454 + const index = store.index(indexName) 3455 + 3456 + // Build IDB key range if specified 3457 + const idbKeyRange = keyRange ? toIDBKeyRange(keyRange) : null 3458 + 3459 + // Aggregate counts 3460 + const valueCounts = new Map<unknown, number>() 3461 + const valueDocIds = countDocuments ? new Map<unknown, Set<IDBValidKey>>() : null 3462 + 3463 + let scanned = 0 3464 + 3465 + // Use getAll() for batch retrieval - single IDB call is much faster than cursor iteration. 3466 + // Benchmarks show getAll() is ~5-6x faster than openKeyCursor iteration (120ms vs 670ms for 15k rows). 3467 + // NOTE: IDBIndex.getAllKeys() returns PRIMARY keys, not index keys! 3468 + // So we use getAll() to get the row values, then extract the indexed field. 3469 + // The indexed field is stored in the row with a storage-safe key name. 3470 + const storageKey = toStorageKey(keyToCollect) 3471 + 3472 + if (!countDocuments) { 3473 + const fetchStart = analyze ? performance.now() : 0 3474 + const allRows = await index.getAll(idbKeyRange ?? undefined) 3475 + if (analyze) { 3476 + console.debug( 3477 + `${ANALYZE_PREFIX} getAll() fetched ${allRows.length} rows in ${(performance.now() - fetchStart).toFixed(2)}ms`, 3478 + ) 3479 + } 3480 + 3481 + scanned = allRows.length 3482 + const aggStart = analyze ? performance.now() : 0 3483 + 3484 + // Aggregate in JS - extract the indexed field value from each row 3485 + for (const row of allRows) { 3486 + const value = (row as Record<string, unknown>)[storageKey] 3487 + if (value !== undefined && value !== null) { 3488 + valueCounts.set(value, (valueCounts.get(value) ?? 0) + 1) 3489 + } 3490 + } 3491 + 3492 + if (analyze) { 3493 + console.debug( 3494 + `${ANALYZE_PREFIX} JS aggregation of ${scanned} rows took ${(performance.now() - aggStart).toFixed(2)}ms`, 3495 + ) 3496 + } 3497 + } else { 3498 + // For countDocuments, we need both value and docId from each row 3499 + // Use getAll() which is much faster than cursor iteration 3500 + const fetchStart = analyze ? performance.now() : 0 3501 + const allRows = await index.getAll(idbKeyRange ?? undefined) 3502 + if (analyze) { 3503 + console.debug( 3504 + `${ANALYZE_PREFIX} getAll() fetched ${allRows.length} rows in ${(performance.now() - fetchStart).toFixed(2)}ms`, 3505 + ) 3506 + } 3507 + 3508 + scanned = allRows.length 3509 + const aggStart = analyze ? performance.now() : 0 3510 + 3511 + // Aggregate by value, tracking unique docIds 3512 + for (const row of allRows) { 3513 + const rowObj = row as Record<string, unknown> 3514 + const value = rowObj[storageKey] 3515 + const docId = rowObj['_docId'] as IDBValidKey 3516 + 3517 + if (value !== undefined && value !== null && valueDocIds) { 3518 + if (!valueDocIds.has(value)) { 3519 + valueDocIds.set(value, new Set()) 2200 3520 } 2201 - continue // This filter passed, check next 3521 + valueDocIds.get(value)!.add(docId) 2202 3522 } 2203 3523 } 2204 3524 2205 - if (!matchesOp(value, filter.op, filter.value)) { 2206 - return false 3525 + if (analyze) { 3526 + console.debug( 3527 + `${ANALYZE_PREFIX} JS aggregation of ${scanned} rows took ${(performance.now() - aggStart).toFixed(2)}ms`, 3528 + ) 3529 + } 3530 + 3531 + // Convert docId sets to counts 3532 + for (const [value, docIds] of valueDocIds!) { 3533 + valueCounts.set(value, docIds.size) 3534 + } 3535 + } 3536 + 3537 + // Convert to array and filter by minCount 3538 + let results: ValueCount[] = [] 3539 + for (const [value, count] of valueCounts) { 3540 + if (count >= minCount) { 3541 + results.push({value, count}) 2207 3542 } 2208 3543 } 2209 - return true 3544 + 3545 + // Sort by count descending 3546 + results.sort((a, b) => b.count - a.count) 3547 + 3548 + // Apply limit 3549 + if (limit !== undefined && results.length > limit) { 3550 + results = results.slice(0, limit) 3551 + } 3552 + 3553 + if (analyze) { 3554 + const totalTime = performance.now() - startTime 3555 + console.debug( 3556 + `${ANALYZE_PREFIX} collectIndexValues() complete - scanned: ${scanned}, distinct: ${results.length}, time: ${totalTime.toFixed(2)}ms`, 3557 + ) 3558 + } 3559 + 3560 + return results 2210 3561 } 2211 - finally { 3562 + 3563 + /** 3564 + * Result from collecting key pairs (for compound indexes). 3565 + */ 3566 + export interface KeyPairCount<K1 = unknown, K2 = unknown> { 3567 + key1: K1 3568 + key2: K2 3569 + count: number 3570 + } 3571 + 3572 + /** 3573 + * Options for collectKeyPairs. 3574 + */ 3575 + export interface CollectKeyPairsOptions extends ExecutionOptions { 3576 + /** 3577 + * If true, count unique documents rather than index rows. 3578 + * Default: false 3579 + */ 3580 + countDocuments?: boolean 3581 + 3582 + /** 3583 + * Maximum number of pairs to return. 3584 + * Results are sorted by count descending. 3585 + */ 3586 + limit?: number 3587 + 3588 + /** 3589 + * Minimum count to include. 3590 + */ 3591 + minCount?: number 3592 + 3593 + /** 3594 + * Filter to a specific value for key1. 3595 + */ 3596 + key1Value?: unknown 3597 + } 3598 + 3599 + /** 3600 + * Collect key pairs from a compound index with counts. 3601 + * Useful for tag:value pairs, or any two-dimensional aggregation. 3602 + * 3603 + * Uses key-only cursors when possible (countDocuments=false) for ~10x faster iteration. 3604 + * Values are extracted directly from the compound index key. 3605 + * 3606 + * @param ctx - Execution context 3607 + * @param indexName - Name of a compound index with at least 2 keys 3608 + * @param options - Collection options 3609 + * @returns Array of {key1, key2, count} sorted by count descending 3610 + * 3611 + * @example 3612 + * // Get all tag:value pairs with counts 3613 + * const pairs = await collectKeyPairs(ctx, 'by_tag') 3614 + * // => [{key1: 'category', key2: 'Technology', count: 50}, ...] 3615 + * 3616 + * @example 3617 + * // Get values for a specific tag 3618 + * const categoryValues = await collectKeyPairs(ctx, 'by_tag', { 3619 + * key1Value: 'category', 3620 + * limit: 20 3621 + * }) 3622 + */ 3623 + export async function collectKeyPairs<S extends ShapeWithStorageRef>( 3624 + ctx: ExecutionContext & {storeDef: StoreDef<S>}, 3625 + indexName: string, 3626 + options: CollectKeyPairsOptions = {}, 3627 + ): Promise<KeyPairCount[]> { 3628 + const {countDocuments = false, limit, minCount = 1, key1Value, analyze = false} = options 3629 + const startTime = analyze ? performance.now() : 0 3630 + 3631 + // Find the index definition 3632 + const indexDef = ctx.storeDef.indexes.find((idx) => idx.name === indexName) 3633 + if (!indexDef) { 3634 + throw new Error(`Index '${indexName}' not found on store '${ctx.storeDef.name}'`) 3635 + } 3636 + if (indexDef.keys.length < 2) { 3637 + throw new Error(`Index '${indexName}' must have at least 2 keys for collectKeyPairs`) 3638 + } 3639 + 3640 + // Determine store 3641 + let storeName: string 3642 + if (indexDef.spreadRoots.size === 0) { 3643 + storeName = ctx.storeDef.name 3644 + } else { 3645 + const group = ctx.storeDef.indexStoreGroups.find((g) => g.indexes.some((idx) => idx.name === indexName)) 3646 + if (!group) { 3647 + throw new Error(`Index store group not found for index '${indexName}'`) 3648 + } 3649 + storeName = group.storeName 3650 + } 3651 + 3652 + const key1Logical = indexDef.keys[0] as string 3653 + const key2Logical = indexDef.keys[1] as string 3654 + 3655 + if (analyze) { 3656 + console.debug( 3657 + `${ANALYZE_PREFIX} collectKeyPairs() started - index: ${indexName}, keys: [${key1Logical}, ${key2Logical}], countDocs: ${countDocuments}`, 3658 + ) 3659 + } 3660 + 3661 + const tx = ctx.db.transaction(storeName, 'readonly') 3662 + const store = tx.objectStore(storeName) 3663 + const index = store.index(indexName) 3664 + 3665 + // Build key range if filtering by key1 3666 + const idbKeyRange = key1Value !== undefined ? IDBKeyRange.bound([key1Value], [key1Value, []]) : null 3667 + 3668 + // Aggregate counts - use composite key for the map 3669 + const pairCounts = new Map<string, {key1: unknown; key2: unknown; count: number}>() 3670 + const pairDocIds = countDocuments 3671 + ? new Map<string, {key1: unknown; key2: unknown; docIds: Set<IDBValidKey>}>() 3672 + : null 3673 + 3674 + let scanned = 0 3675 + 3676 + // Use getAll() for batch retrieval - single IDB call is much faster than cursor iteration. 3677 + // NOTE: IDBIndex.getAllKeys() returns PRIMARY keys, not index keys! 3678 + // So we use getAll() to get the row values, then extract the indexed fields. 3679 + const storageKey1 = toStorageKey(key1Logical) 3680 + const storageKey2 = toStorageKey(key2Logical) 3681 + 3682 + if (!countDocuments) { 3683 + const fetchStart = analyze ? performance.now() : 0 3684 + const allRows = await index.getAll(idbKeyRange ?? undefined) 2212 3685 if (analyze) { 2213 - const filterMatchTime = performance.now() - filterOpenTime 2214 - console.debug(`${ANALYZE_PREFIX} filter matched in ${filterMatchTime.toFixed(2)}ms`) 3686 + console.debug( 3687 + `${ANALYZE_PREFIX} getAll() fetched ${allRows.length} rows in ${(performance.now() - fetchStart).toFixed(2)}ms`, 3688 + ) 3689 + } 3690 + 3691 + scanned = allRows.length 3692 + const aggStart = analyze ? performance.now() : 0 3693 + 3694 + // Aggregate in JS - extract the indexed field values from each row 3695 + for (const row of allRows) { 3696 + const rowObj = row as Record<string, unknown> 3697 + const k1 = rowObj[storageKey1] 3698 + const k2 = rowObj[storageKey2] 3699 + 3700 + if (k1 !== undefined && k1 !== null && k2 !== undefined && k2 !== null) { 3701 + const pairKey = JSON.stringify([k1, k2]) 3702 + if (!pairCounts.has(pairKey)) { 3703 + pairCounts.set(pairKey, {key1: k1, key2: k2, count: 0}) 3704 + } 3705 + pairCounts.get(pairKey)!.count++ 3706 + } 3707 + } 3708 + 3709 + if (analyze) { 3710 + console.debug( 3711 + `${ANALYZE_PREFIX} JS aggregation of ${scanned} rows took ${(performance.now() - aggStart).toFixed(2)}ms`, 3712 + ) 3713 + } 3714 + } else { 3715 + // For countDocuments, we need both values and docId from each row 3716 + // Use getAll() which is much faster than cursor iteration 3717 + const fetchStart = analyze ? performance.now() : 0 3718 + const allRows = await index.getAll(idbKeyRange ?? undefined) 3719 + if (analyze) { 3720 + console.debug( 3721 + `${ANALYZE_PREFIX} getAll() fetched ${allRows.length} rows in ${(performance.now() - fetchStart).toFixed(2)}ms`, 3722 + ) 3723 + } 3724 + 3725 + scanned = allRows.length 3726 + const aggStart = analyze ? performance.now() : 0 3727 + 3728 + // Aggregate by key pair, tracking unique docIds 3729 + for (const row of allRows) { 3730 + const rowObj = row as Record<string, unknown> 3731 + const k1 = rowObj[storageKey1] 3732 + const k2 = rowObj[storageKey2] 3733 + const docId = rowObj['_docId'] as IDBValidKey 3734 + 3735 + if (k1 !== undefined && k1 !== null && k2 !== undefined && k2 !== null && pairDocIds) { 3736 + const pairKey = JSON.stringify([k1, k2]) 3737 + 3738 + if (!pairDocIds.has(pairKey)) { 3739 + pairDocIds.set(pairKey, {key1: k1, key2: k2, docIds: new Set()}) 3740 + } 3741 + pairDocIds.get(pairKey)!.docIds.add(docId) 3742 + } 3743 + } 3744 + 3745 + if (analyze) { 3746 + console.debug( 3747 + `${ANALYZE_PREFIX} JS aggregation of ${scanned} rows took ${(performance.now() - aggStart).toFixed(2)}ms`, 3748 + ) 3749 + } 3750 + 3751 + // Convert docId sets to counts 3752 + for (const [pairKey, {key1, key2, docIds}] of pairDocIds!) { 3753 + pairCounts.set(pairKey, {key1, key2, count: docIds.size}) 2215 3754 } 2216 3755 } 3756 + 3757 + // Convert to array and filter 3758 + let results: KeyPairCount[] = [] 3759 + for (const {key1, key2, count} of pairCounts.values()) { 3760 + if (count >= minCount) { 3761 + results.push({key1, key2, count}) 3762 + } 3763 + } 3764 + 3765 + // Sort by count descending 3766 + results.sort((a, b) => b.count - a.count) 3767 + 3768 + // Apply limit 3769 + if (limit !== undefined && results.length > limit) { 3770 + results = results.slice(0, limit) 3771 + } 3772 + 3773 + if (analyze) { 3774 + const totalTime = performance.now() - startTime 3775 + console.debug( 3776 + `${ANALYZE_PREFIX} collectKeyPairs() complete - scanned: ${scanned}, pairs: ${results.length}, time: ${totalTime.toFixed(2)}ms`, 3777 + ) 3778 + } 3779 + 3780 + return results 3781 + } 3782 + 3783 + /** 3784 + * Check if a row matches all filter clauses. 3785 + * Handles both logical keys and storage keys, including spread keys for primary store rows. 3786 + */ 3787 + function matchesFilters(row: Record<string, unknown>, filters: WhereClause[], analyze: boolean): boolean { 3788 + for (const filter of filters) { 3789 + // Try storage key first (for index store rows), then logical key (for primary store) 3790 + const storageKey = toStorageKey(filter.key) 3791 + let value = row[storageKey] ?? row[filter.key] 3792 + 3793 + // If value is still undefined and this is a spread key, we need to check inside arrays 3794 + // e.g., for '...tags.tag' on a primary store row, check if any tag.tag matches 3795 + if (value === undefined && filter.key.startsWith('...')) { 3796 + const path = filter.key.slice(3).split('.') 3797 + // path[0] is the array field, rest is the path within each element 3798 + const arrayField = path[0] 3799 + const remainderPath = path.slice(1) 3800 + const array = row[arrayField] 3801 + 3802 + if (Array.isArray(array)) { 3803 + // Check if ANY element in the array matches the filter 3804 + const anyMatch = array.some((element) => { 3805 + const elementValue = remainderPath.length > 0 ? getPath(element, remainderPath) : element 3806 + return matchesOp(elementValue, filter.op, filter.value) 3807 + }) 3808 + if (!anyMatch) { 3809 + return false 3810 + } 3811 + continue // This filter passed, check next 3812 + } 3813 + } 3814 + 3815 + if (!matchesOp(value, filter.op, filter.value)) { 3816 + return false 3817 + } 3818 + } 3819 + return true 2217 3820 } 2218 3821 2219 3822 /** ··· 2233 3836 return (value as number) > (target as number) 2234 3837 case '>=': 2235 3838 return (value as number) >= (target as number) 3839 + case 'in': 3840 + return Array.isArray(target) && target.includes(value) 2236 3841 default: 2237 3842 return false 2238 3843 } ··· 2258 3863 // IDB operations 2259 3864 install, 2260 3865 put, 3866 + putMany, 2261 3867 get, 2262 3868 del, 3869 + // Aggregation 3870 + collectIndexValues, 3871 + collectKeyPairs, 2263 3872 } 2264 3873 2265 3874 export type {
+1039
src/main-old.ts
··· 1 + import './style.css' 2 + import {z} from 'zod/mini' 3 + import { 4 + docstore, 5 + ingest, 6 + QueryPlanner, 7 + StatsManager, 8 + rows, 9 + count, 10 + docs, 11 + collect, 12 + putMany, 13 + type IDBPDatabase, 14 + type Infer, 15 + type QuerySpec, 16 + type QueryPlan, 17 + } from './lib/docstore' 18 + 19 + // ============================================================================= 20 + // Define our store schema 21 + // ============================================================================= 22 + 23 + const entriesStore = docstore.define('entries', { 24 + schema: { 25 + id: z.string(), 26 + feedId: z.string(), 27 + title: z.string(), 28 + publishedAt: z.date(), 29 + status: z.enum(['draft', 'published', 'archived']), 30 + tags: z.array(z.object({tag: z.string(), score: z.number()})), 31 + // Second spread array for stress testing cross-spread queries 32 + categories: z.array(z.object({name: z.string(), priority: z.number()})), 33 + authorId: z.string(), 34 + }, 35 + indexes: (idx) => [ 36 + // Native indexes on primary store 37 + idx('by_feed_date').on('feedId', 'publishedAt'), 38 + idx('by_status').on('status'), 39 + idx('by_author_date').on('authorId', 'publishedAt'), 40 + // Denormalized indexes (spread over tags) 41 + idx('by_tag').on('...tags.tag').replicate('title', 'publishedAt'), 42 + idx('by_tag_score').on('...tags.tag', '...tags.score'), 43 + idx('by_feed_tag').on('feedId', '...tags.tag').replicate('title'), 44 + // Denormalized indexes (spread over categories) - second spread root 45 + idx('by_category').on('...categories.name').replicate('title', 'publishedAt'), 46 + idx('by_category_priority').on('...categories.name', '...categories.priority'), 47 + // Combined spread index (cartesian product of tags × categories) 48 + idx('by_tag_category').on('...tags.tag', '...categories.name'), 49 + ], 50 + }) 51 + 52 + type Entry = Infer<typeof entriesStore.schema> 53 + 54 + // ============================================================================= 55 + // Global state 56 + // ============================================================================= 57 + 58 + let db: IDBPDatabase 59 + let statsManager: StatsManager 60 + let planner: QueryPlanner<typeof entriesStore.schema> 61 + let nextEntryId = 0 62 + let entryCount = 0 63 + 64 + // ============================================================================= 65 + // Generate sample data 66 + // ============================================================================= 67 + 68 + const feeds = ['tech', 'science', 'politics', 'sports', 'entertainment'] 69 + const authors = ['alice', 'bob', 'carol', 'dave', 'eve', 'frank'] 70 + const tagPool = [ 71 + 'javascript', 72 + 'typescript', 73 + 'rust', 74 + 'go', 75 + 'python', 76 + 'react', 77 + 'vue', 78 + 'svelte', 79 + 'node', 80 + 'deno', 81 + 'ai', 82 + 'ml', 83 + 'web', 84 + 'mobile', 85 + 'cloud', 86 + ] 87 + const categoryPool = [ 88 + 'programming', 89 + 'tutorial', 90 + 'news', 91 + 'opinion', 92 + 'review', 93 + 'announcement', 94 + 'deep-dive', 95 + 'beginner', 96 + ] 97 + const statuses: ('draft' | 'published' | 'archived')[] = ['draft', 'published', 'archived'] 98 + 99 + function randomTags(): {tag: string; score: number}[] { 100 + const count = Math.floor(Math.random() * 4) + 1 101 + const shuffled = [...tagPool].sort(() => Math.random() - 0.5) 102 + return shuffled.slice(0, count).map((tag) => ({ 103 + tag, 104 + score: Math.floor(Math.random() * 100), 105 + })) 106 + } 107 + 108 + function randomCategories(): {name: string; priority: number}[] { 109 + // Sometimes return empty array to test edge case 110 + if (Math.random() < 0.1) return [] 111 + const count = Math.floor(Math.random() * 3) + 1 112 + const shuffled = [...categoryPool].sort(() => Math.random() - 0.5) 113 + return shuffled.slice(0, count).map((name) => ({ 114 + name, 115 + priority: Math.floor(Math.random() * 10) + 1, 116 + })) 117 + } 118 + 119 + function generateEntry(): Entry { 120 + const now = Date.now() 121 + const id = nextEntryId++ 122 + return { 123 + id: `entry-${id}`, 124 + feedId: feeds[Math.floor(Math.random() * feeds.length)], 125 + title: `Article ${id}: ${tagPool[Math.floor(Math.random() * tagPool.length)]} news`, 126 + publishedAt: new Date(now - Math.random() * 30 * 24 * 60 * 60 * 1000), 127 + status: statuses[Math.floor(Math.random() * statuses.length)], 128 + tags: randomTags(), 129 + categories: randomCategories(), 130 + authorId: authors[Math.floor(Math.random() * authors.length)], 131 + } 132 + } 133 + 134 + async function addEntries(count: number) { 135 + const entries: Entry[] = [] 136 + for (let i = 0; i < count; i++) { 137 + entries.push(generateEntry()) 138 + } 139 + 140 + // Batch insert with stats update 141 + await putMany(db, entriesStore, entries, statsManager) 142 + entryCount += count 143 + } 144 + 145 + // ============================================================================= 146 + // Query execution using real IDB 147 + // ============================================================================= 148 + 149 + async function executeQuery(plan: QueryPlan, spec: QuerySpec): Promise<Record<string, unknown>[]> { 150 + const ctx = { 151 + db, 152 + storeDef: entriesStore as any, // ExecutionContext expects generic StoreDef 153 + } 154 + 155 + // rows() now handles intersections and sorting automatically 156 + const options = { 157 + analyze: true, 158 + orderBy: spec.orderBy, 159 + } 160 + const rowGenerator = rows(ctx, plan, options) 161 + const results = await collect(rowGenerator, 5) 162 + return results as Record<string, unknown>[] 163 + } 164 + 165 + async function executeDocsQuery(plan: QueryPlan): Promise<Entry[]> { 166 + const ctx = { 167 + db, 168 + storeDef: entriesStore as any, 169 + } 170 + 171 + const options = {analyze: true} 172 + const docGenerator = docs(ctx, plan, options) 173 + const results = await collect(docGenerator, 5) 174 + return results as Entry[] 175 + } 176 + 177 + async function executeCountQuery(plan: QueryPlan): Promise<number> { 178 + const ctx = { 179 + db, 180 + storeDef: entriesStore as any, 181 + } 182 + 183 + const options = {analyze: true} 184 + return count(ctx, plan, options) 185 + } 186 + 187 + // ============================================================================= 188 + // Get counts from IDB 189 + // ============================================================================= 190 + 191 + async function getStoreCounts(): Promise<Map<string, number>> { 192 + const counts = new Map<string, number>() 193 + 194 + // Count primary store 195 + const tx = db.transaction(entriesStore.name, 'readonly') 196 + const primaryCount = await tx.objectStore(entriesStore.name).count() 197 + counts.set(entriesStore.name, primaryCount) 198 + await tx.done 199 + 200 + // Count index stores 201 + for (const group of entriesStore.indexStoreGroups) { 202 + const indexTx = db.transaction(group.storeName, 'readonly') 203 + const indexCount = await indexTx.objectStore(group.storeName).count() 204 + counts.set(group.storeName, indexCount) 205 + await indexTx.done 206 + } 207 + 208 + return counts 209 + } 210 + 211 + // ============================================================================= 212 + // Sample queries 213 + // ============================================================================= 214 + 215 + const sampleQueries: {name: string; spec: QuerySpec}[] = [ 216 + { 217 + name: 'All entries from tech feed, newest first', 218 + spec: { 219 + store: 'entries', 220 + where: [{key: 'feedId', op: '=', value: 'tech'}], 221 + orderBy: {key: 'publishedAt', direction: 'desc'}, 222 + }, 223 + }, 224 + { 225 + name: 'Published entries only', 226 + spec: { 227 + store: 'entries', 228 + where: [{key: 'status', op: '=', value: 'published'}], 229 + }, 230 + }, 231 + { 232 + name: 'Entries tagged with "javascript"', 233 + spec: { 234 + store: 'entries', 235 + where: [{key: '...tags.tag', op: '=', value: 'javascript'}], 236 + }, 237 + }, 238 + { 239 + name: 'Tech feed + javascript tag', 240 + spec: { 241 + store: 'entries', 242 + where: [ 243 + {key: 'feedId', op: '=', value: 'tech'}, 244 + {key: '...tags.tag', op: '=', value: 'javascript'}, 245 + ], 246 + }, 247 + }, 248 + { 249 + name: 'javascript tag AND ai tag (self-join)', 250 + spec: { 251 + store: 'entries', 252 + where: [ 253 + {key: 'feedId', op: '=', value: 'tech'}, 254 + {key: '...tags.tag', op: '=', value: 'javascript'}, 255 + {key: '...tags.tag', op: '=', value: 'ai'}, 256 + ], 257 + }, 258 + }, 259 + { 260 + name: 'javascript tag OR ai tag (IN query)', 261 + spec: { 262 + store: 'entries', 263 + where: [{key: '...tags.tag', op: 'in', value: ['javascript', 'ai']}], 264 + }, 265 + }, 266 + { 267 + name: 'Entries by alice, recent', 268 + spec: { 269 + store: 'entries', 270 + where: [ 271 + {key: 'authorId', op: '=', value: 'alice'}, 272 + {key: 'publishedAt', op: '>', value: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000)}, 273 + ], 274 + orderBy: {key: 'publishedAt', direction: 'desc'}, 275 + }, 276 + }, 277 + { 278 + name: 'Primary key lookup (by id)', 279 + spec: { 280 + store: 'entries', 281 + where: [{key: 'id', op: '=', value: 'entry-42'}], 282 + }, 283 + }, 284 + { 285 + name: 'Full table scan (filter by title - no index)', 286 + spec: { 287 + store: 'entries', 288 + where: [{key: 'title', op: '=', value: 'Article 1: ai news'}], 289 + }, 290 + }, 291 + // --- Queries that would benefit from index joins --- 292 + { 293 + name: 'Published + javascript tag (status index + tag index)', 294 + spec: { 295 + store: 'entries', 296 + where: [ 297 + {key: 'status', op: '=', value: 'published'}, 298 + {key: '...tags.tag', op: '=', value: 'javascript'}, 299 + ], 300 + }, 301 + }, 302 + { 303 + name: 'Alice + typescript tag (author index + tag index)', 304 + spec: { 305 + store: 'entries', 306 + where: [ 307 + {key: 'authorId', op: '=', value: 'alice'}, 308 + {key: '...tags.tag', op: '=', value: 'typescript'}, 309 + ], 310 + }, 311 + }, 312 + { 313 + name: 'Science feed + published status (feed index + status index)', 314 + spec: { 315 + store: 'entries', 316 + where: [ 317 + {key: 'feedId', op: '=', value: 'science'}, 318 + {key: 'status', op: '=', value: 'published'}, 319 + ], 320 + }, 321 + }, 322 + { 323 + name: 'Bob + draft + react tag (3-way join potential)', 324 + spec: { 325 + store: 'entries', 326 + where: [ 327 + {key: 'authorId', op: '=', value: 'bob'}, 328 + {key: 'status', op: '=', value: 'draft'}, 329 + {key: '...tags.tag', op: '=', value: 'react'}, 330 + ], 331 + }, 332 + }, 333 + { 334 + name: 'High score tags in tech feed (feed+tag index + score filter)', 335 + spec: { 336 + store: 'entries', 337 + where: [ 338 + {key: 'feedId', op: '=', value: 'tech'}, 339 + {key: '...tags.tag', op: '=', value: 'ai'}, 340 + {key: '...tags.score', op: '>=', value: 50}, 341 + ], 342 + }, 343 + }, 344 + // ========================================================================== 345 + // STRESS TEST: Multiple spread arrays (cross-spread queries) 346 + // ========================================================================== 347 + { 348 + name: 'Category lookup (second spread array)', 349 + spec: { 350 + store: 'entries', 351 + where: [{key: '...categories.name', op: '=', value: 'tutorial'}], 352 + }, 353 + }, 354 + { 355 + name: 'Tag + Category (cross-spread intersection)', 356 + spec: { 357 + store: 'entries', 358 + where: [ 359 + {key: '...tags.tag', op: '=', value: 'javascript'}, 360 + {key: '...categories.name', op: '=', value: 'tutorial'}, 361 + ], 362 + }, 363 + }, 364 + { 365 + name: 'Tag + Category via combined index (cartesian)', 366 + spec: { 367 + store: 'entries', 368 + where: [ 369 + {key: '...tags.tag', op: '=', value: 'rust'}, 370 + {key: '...categories.name', op: '=', value: 'deep-dive'}, 371 + ], 372 + }, 373 + }, 374 + // ========================================================================== 375 + // STRESS TEST: Same-row vs cross-row semantics on spread fields 376 + // ========================================================================== 377 + { 378 + name: 'Same-row: javascript tag with score > 50 (compound index)', 379 + spec: { 380 + store: 'entries', 381 + where: [ 382 + {key: '...tags.tag', op: '=', value: 'javascript'}, 383 + {key: '...tags.score', op: '>', value: 50}, 384 + ], 385 + }, 386 + }, 387 + { 388 + name: 'Same-row: high priority tutorial (compound index)', 389 + spec: { 390 + store: 'entries', 391 + where: [ 392 + {key: '...categories.name', op: '=', value: 'tutorial'}, 393 + {key: '...categories.priority', op: '>=', value: 8}, 394 + ], 395 + }, 396 + }, 397 + // ========================================================================== 398 + // STRESS TEST: ORDER BY with intersection (needs in-memory sort) 399 + // ========================================================================== 400 + { 401 + name: 'Published + javascript ORDER BY publishedAt (needs sort)', 402 + spec: { 403 + store: 'entries', 404 + where: [ 405 + {key: 'status', op: '=', value: 'published'}, 406 + {key: '...tags.tag', op: '=', value: 'javascript'}, 407 + ], 408 + orderBy: {key: 'publishedAt', direction: 'desc'}, 409 + }, 410 + }, 411 + { 412 + name: 'Tutorial category ORDER BY publishedAt (index covers order)', 413 + spec: { 414 + store: 'entries', 415 + where: [{key: '...categories.name', op: '=', value: 'tutorial'}], 416 + orderBy: {key: 'publishedAt', direction: 'desc'}, 417 + }, 418 + }, 419 + // ========================================================================== 420 + // STRESS TEST: Range queries on spread fields 421 + // ========================================================================== 422 + { 423 + name: 'Tags with score between 25-75 (range on spread)', 424 + spec: { 425 + store: 'entries', 426 + where: [ 427 + {key: '...tags.tag', op: '=', value: 'ai'}, 428 + {key: '...tags.score', op: '>=', value: 25}, 429 + {key: '...tags.score', op: '<=', value: 75}, 430 + ], 431 + }, 432 + }, 433 + { 434 + name: 'High priority categories (range without equality prefix)', 435 + spec: { 436 + store: 'entries', 437 + where: [{key: '...categories.priority', op: '>=', value: 8}], 438 + }, 439 + }, 440 + ] 441 + 442 + // ============================================================================= 443 + // UI rendering 444 + // ============================================================================= 445 + 446 + function formatIntersectionChain(intersection: QueryPlan['intersection']): string { 447 + if (!intersection) return '' 448 + 449 + let html = '' 450 + let current: QueryPlan['intersection'] = intersection 451 + let index = 1 452 + 453 + while (current) { 454 + html += ` 455 + <div class="intersection-step"> 456 + <strong>${index === 1 ? 'INTERSECT' : 'AND'} with:</strong> 457 + <span>Source: ${current.source}</span> 458 + <span>Index: ${current.indexName}</span> 459 + <span>KeyRange: ${current.keyRange ? JSON.stringify(current.keyRange.lower ?? current.keyRange) : 'none'}</span> 460 + </div> 461 + ` 462 + current = current.next 463 + index++ 464 + } 465 + 466 + return html 467 + } 468 + 469 + function formatPlan(plan: QueryPlan, queryId: number): string { 470 + const intersectionInfo = plan.intersection 471 + ? ` 472 + <div class="intersection-info"> 473 + ${formatIntersectionChain(plan.intersection)} 474 + </div> 475 + ` 476 + : '' 477 + 478 + return ` 479 + <div class="plan ${plan.intersection ? 'plan-intersection' : ''}" data-query-id="${queryId}"> 480 + <div><strong>Source:</strong> ${plan.source}</div> 481 + <div><strong>Index:</strong> ${plan.indexName ?? '<full scan>'}</div> 482 + <div><strong>Key Range:</strong> ${plan.keyRange ? JSON.stringify(plan.keyRange) : 'none'}</div> 483 + <div><strong>Direction:</strong> ${plan.direction}</div> 484 + <div><strong>Post-filters:</strong> ${plan.filter.length > 0 ? plan.filter.map((f) => `${f.key} ${f.op} ${f.value}`).join(', ') : 'none'}</div> 485 + <div><strong>Needs Sort:</strong> ${plan.needsSort}</div> 486 + <div><strong>Available Fields:</strong> ${plan.availableFields.join(', ')}</div> 487 + <div><strong>Selectivity:</strong> ${(plan.selectivity * 100).toFixed(2)}%</div> 488 + <div><strong>Est. Rows:</strong> ${plan.estimatedRows}</div> 489 + ${intersectionInfo} 490 + </div> 491 + <button class="run-query-btn" data-query-id="${queryId}">Run rows()</button> 492 + <button class="run-docs-btn" data-query-id="${queryId}">Run docs()</button> 493 + <button class="run-count-btn" data-query-id="${queryId}">Run count()</button> 494 + <div class="query-results" id="results-${queryId}"></div> 495 + ` 496 + } 497 + 498 + async function formatIngestResult(entry: Entry) { 499 + const result = ingest(entriesStore, entry) 500 + let html = '<div class="ingest-result">' 501 + 502 + for (const [storeName, rows] of result.byStore) { 503 + html += `<div class="store-group"><strong>${storeName}</strong> (${rows.length} rows)</div>` 504 + html += '<pre>' + JSON.stringify(rows, null, 2) + '</pre>' 505 + } 506 + 507 + html += '</div>' 508 + return html 509 + } 510 + 511 + async function renderDemo() { 512 + // Get a sample entry for display 513 + const sampleEntry = await docstore.get(db, entriesStore, 'entry-0') 514 + 515 + const storeCounts = await getStoreCounts() 516 + 517 + let html = ` 518 + <div class="demo"> 519 + <h1>DocStore Query Planner Demo</h1> 520 + <p class="idb-badge">Using real IndexedDB</p> 521 + 522 + <section class="controls"> 523 + <button id="add-entries-btn">Add 1000 Entries</button> 524 + <button id="clear-db-btn">Clear Database</button> 525 + <span id="entry-count">Entries: ${entryCount}</span> 526 + </section> 527 + 528 + <section> 529 + <h2>Store Definition</h2> 530 + <pre>${JSON.stringify( 531 + { 532 + name: entriesStore.name, 533 + nativeIndexes: entriesStore.nativeIndexes.map((i) => ({name: i.name, keys: i.keys})), 534 + indexStoreGroups: entriesStore.indexStoreGroups.map((g) => ({ 535 + storeName: g.storeName, 536 + spreadRoots: [...g.spreadRoots], 537 + indexes: g.indexes.map((i) => ({name: i.name, keys: i.keys})), 538 + allKeys: [...g.allKeys], 539 + })), 540 + }, 541 + null, 542 + 2, 543 + )}</pre> 544 + </section> 545 + 546 + <section> 547 + <h2>Sample Entry Ingestion</h2> 548 + <p>Here's how a sample entry gets denormalized into index rows:</p> 549 + <pre>${JSON.stringify( 550 + sampleEntry, 551 + (key, value) => { 552 + if (value instanceof Date) return value.toISOString() 553 + return value 554 + }, 555 + 2, 556 + )}</pre> 557 + ${sampleEntry ? await formatIngestResult(sampleEntry) : '<p>No entries yet</p>'} 558 + </section> 559 + 560 + <section> 561 + <h2>Query Plans</h2> 562 + <p>Given <strong id="query-entry-count">${entryCount}</strong> entries, here's how queries get planned. Click "Run Query" to see results:</p> 563 + <div id="queries-container"> 564 + ` 565 + 566 + for (let i = 0; i < sampleQueries.length; i++) { 567 + const {name, spec} = sampleQueries[i] 568 + const plan = await planner.plan(spec) 569 + html += ` 570 + <div class="query"> 571 + <h3>${name}</h3> 572 + <pre class="query-spec">${JSON.stringify(spec.where, null, 2)}${spec.orderBy ? '\nORDER BY ' + spec.orderBy.key + ' ' + spec.orderBy.direction.toUpperCase() : ''}</pre> 573 + ${formatPlan(plan, i)} 574 + </div> 575 + ` 576 + } 577 + 578 + html += ` 579 + </div> 580 + </section> 581 + 582 + <section> 583 + <h2>Cardinality Estimates (via HyperLogLog)</h2> 584 + <table id="cardinality-table"> 585 + <tr><th>Field</th><th>Estimated Cardinality</th></tr> 586 + ` 587 + 588 + // Map fields to their store names (spread fields live in index stores) 589 + const fieldToStore: Record<string, string> = { 590 + feedId: entriesStore.name, 591 + status: entriesStore.name, 592 + authorId: entriesStore.name, 593 + '...tags.tag': `${entriesStore.name}_idx:tags`, 594 + '...tags.score': `${entriesStore.name}_idx:tags`, 595 + '...categories.name': `${entriesStore.name}_idx:categories`, 596 + '...categories.priority': `${entriesStore.name}_idx:categories`, 597 + } 598 + for (const [field, storeName] of Object.entries(fieldToStore)) { 599 + const card = await statsManager.getCardinality(storeName, field) 600 + html += `<tr><td>${field}</td><td>${card}</td></tr>` 601 + } 602 + 603 + html += ` 604 + </table> 605 + </section> 606 + 607 + <section> 608 + <h2>Index Store Sizes (from IDB)</h2> 609 + <table id="index-sizes-table"> 610 + <tr><th>Store</th><th>Row Count</th></tr> 611 + ` 612 + 613 + for (const [storeName, count] of storeCounts) { 614 + const label = storeName === entriesStore.name ? `primary (${storeName})` : storeName 615 + html += `<tr><td>${label}</td><td>${count}</td></tr>` 616 + } 617 + 618 + html += ` 619 + </table> 620 + </section> 621 + </div> 622 + ` 623 + 624 + document.querySelector<HTMLDivElement>('#app')!.innerHTML = html 625 + 626 + // Attach event handlers 627 + document.getElementById('add-entries-btn')!.onclick = async () => { 628 + const btn = document.getElementById('add-entries-btn') as HTMLButtonElement 629 + btn.disabled = true 630 + btn.textContent = 'Adding...' 631 + 632 + await addEntries(1000) 633 + document.getElementById('entry-count')!.textContent = `Entries: ${entryCount}` 634 + 635 + btn.disabled = false 636 + btn.textContent = 'Add 1000 Entries' 637 + 638 + await refreshPlans() 639 + } 640 + 641 + document.getElementById('clear-db-btn')!.onclick = async () => { 642 + // Delete the database and reload 643 + db.close() 644 + indexedDB.deleteDatabase('docstore-demo') 645 + window.location.reload() 646 + } 647 + 648 + // Attach run query buttons 649 + attachQueryButtons() 650 + } 651 + 652 + function attachQueryButtons() { 653 + document.querySelectorAll('.run-query-btn').forEach((btn) => { 654 + btn.addEventListener('click', async (e) => { 655 + const queryId = parseInt((e.target as HTMLElement).dataset.queryId!) 656 + const spec = sampleQueries[queryId].spec 657 + const plan = await planner.plan(spec) 658 + 659 + const resultsEl = document.getElementById(`results-${queryId}`)! 660 + resultsEl.innerHTML = '<div class="loading">Querying IDB (rows)...</div>' 661 + 662 + const results = await executeQuery(plan, spec) 663 + 664 + resultsEl.innerHTML = ` 665 + <div class="results-header">First 5 rows from IDB (rows):</div> 666 + <pre>${JSON.stringify( 667 + results, 668 + (key, value) => { 669 + if (value instanceof Date) return value.toISOString() 670 + return value 671 + }, 672 + 2, 673 + )}</pre> 674 + ` 675 + }) 676 + }) 677 + 678 + document.querySelectorAll('.run-docs-btn').forEach((btn) => { 679 + btn.addEventListener('click', async (e) => { 680 + const queryId = parseInt((e.target as HTMLElement).dataset.queryId!) 681 + const spec = sampleQueries[queryId].spec 682 + const plan = await planner.plan(spec) 683 + 684 + const resultsEl = document.getElementById(`results-${queryId}`)! 685 + resultsEl.innerHTML = '<div class="loading">Querying IDB (docs)...</div>' 686 + 687 + const results = await executeDocsQuery(plan) 688 + 689 + resultsEl.innerHTML = ` 690 + <div class="results-header">First 5 docs from IDB (full documents):</div> 691 + <pre>${JSON.stringify( 692 + results, 693 + (key, value) => { 694 + if (value instanceof Date) return value.toISOString() 695 + return value 696 + }, 697 + 2, 698 + )}</pre> 699 + ` 700 + }) 701 + }) 702 + 703 + document.querySelectorAll('.run-count-btn').forEach((btn) => { 704 + btn.addEventListener('click', async (e) => { 705 + const queryId = parseInt((e.target as HTMLElement).dataset.queryId!) 706 + const spec = sampleQueries[queryId].spec 707 + const plan = await planner.plan(spec) 708 + 709 + const resultsEl = document.getElementById(`results-${queryId}`)! 710 + resultsEl.innerHTML = '<div class="loading">Counting (IDB)...</div>' 711 + 712 + const startTime = performance.now() 713 + const result = await executeCountQuery(plan) 714 + const elapsed = performance.now() - startTime 715 + 716 + resultsEl.innerHTML = ` 717 + <div class="results-header">Count result:</div> 718 + <pre>Count: ${result}\nTime: ${elapsed.toFixed(2)}ms</pre> 719 + ` 720 + }) 721 + }) 722 + } 723 + 724 + async function refreshPlans() { 725 + document.getElementById('query-entry-count')!.textContent = String(entryCount) 726 + 727 + const container = document.getElementById('queries-container')! 728 + let html = '' 729 + 730 + for (let i = 0; i < sampleQueries.length; i++) { 731 + const {name, spec} = sampleQueries[i] 732 + const plan = await planner.plan(spec) 733 + html += ` 734 + <div class="query"> 735 + <h3>${name}</h3> 736 + <pre class="query-spec">${JSON.stringify(spec.where, null, 2)}${spec.orderBy ? '\nORDER BY ' + spec.orderBy.key + ' ' + spec.orderBy.direction.toUpperCase() : ''}</pre> 737 + ${formatPlan(plan, i)} 738 + </div> 739 + ` 740 + } 741 + 742 + container.innerHTML = html 743 + attachQueryButtons() 744 + 745 + // Update cardinality table 746 + const cardTable = document.getElementById('cardinality-table')! 747 + let cardHtml = '<tr><th>Field</th><th>Estimated Cardinality</th></tr>' 748 + const fields = [ 749 + 'feedId', 750 + 'status', 751 + 'authorId', 752 + '...tags.tag', 753 + '...tags.score', 754 + '...categories.name', 755 + '...categories.priority', 756 + ] 757 + for (const field of fields) { 758 + const card = await statsManager.getCardinality(entriesStore.name, field) 759 + cardHtml += `<tr><td>${field}</td><td>${card}</td></tr>` 760 + } 761 + cardTable.innerHTML = cardHtml 762 + 763 + // Update index sizes table from IDB 764 + const storeCounts = await getStoreCounts() 765 + const sizesTable = document.getElementById('index-sizes-table')! 766 + let sizesHtml = '<tr><th>Store</th><th>Row Count</th></tr>' 767 + for (const [storeName, count] of storeCounts) { 768 + const label = storeName === entriesStore.name ? `primary (${storeName})` : storeName 769 + sizesHtml += `<tr><td>${label}</td><td>${count}</td></tr>` 770 + } 771 + sizesTable.innerHTML = sizesHtml 772 + } 773 + 774 + // ============================================================================= 775 + // Styles 776 + // ============================================================================= 777 + 778 + const style = document.createElement('style') 779 + style.textContent = ` 780 + :root { 781 + color-scheme: dark; 782 + } 783 + 784 + body { 785 + background: #1a1a1a; 786 + color: #e0e0e0; 787 + margin: 0; 788 + } 789 + 790 + .demo { 791 + max-width: 1000px; 792 + padding: 20px; 793 + font-family: system-ui, -apple-system, sans-serif; 794 + } 795 + 796 + h1 { 797 + border-bottom: 2px solid #555; 798 + padding-bottom: 10px; 799 + } 800 + 801 + h2 { 802 + margin-top: 30px; 803 + color: #aaa; 804 + } 805 + 806 + h3 { 807 + margin-top: 20px; 808 + color: #999; 809 + } 810 + 811 + pre { 812 + background: #2a2a2a; 813 + padding: 15px; 814 + border-radius: 5px; 815 + overflow-x: auto; 816 + font-size: 13px; 817 + color: #d0d0d0; 818 + border: 1px solid #3a3a3a; 819 + } 820 + 821 + .idb-badge { 822 + display: inline-block; 823 + background: #2a5a2a; 824 + color: #8f8; 825 + padding: 5px 15px; 826 + border-radius: 15px; 827 + font-size: 14px; 828 + margin-bottom: 15px; 829 + } 830 + 831 + .controls { 832 + display: flex; 833 + gap: 15px; 834 + align-items: center; 835 + padding: 15px; 836 + background: #252525; 837 + border-radius: 8px; 838 + margin-bottom: 20px; 839 + } 840 + 841 + button { 842 + background: #3a5a8a; 843 + color: #fff; 844 + border: none; 845 + padding: 10px 20px; 846 + border-radius: 5px; 847 + cursor: pointer; 848 + font-size: 14px; 849 + } 850 + 851 + button:hover { 852 + background: #4a6a9a; 853 + } 854 + 855 + button:disabled { 856 + background: #444; 857 + cursor: not-allowed; 858 + } 859 + 860 + .run-query-btn, .run-docs-btn, .run-count-btn { 861 + margin-top: 10px; 862 + } 863 + 864 + .run-query-btn { 865 + background: #4a7a4a; 866 + } 867 + 868 + .run-query-btn:hover { 869 + background: #5a8a5a; 870 + } 871 + 872 + .run-count-btn { 873 + background: #7a4a7a; 874 + } 875 + 876 + .run-count-btn:hover { 877 + background: #8a5a8a; 878 + } 879 + 880 + #clear-db-btn { 881 + background: #8a4a4a; 882 + } 883 + 884 + #clear-db-btn:hover { 885 + background: #9a5a5a; 886 + } 887 + 888 + #entry-count { 889 + color: #8a8; 890 + font-weight: bold; 891 + } 892 + 893 + .query { 894 + border: 1px solid #444; 895 + border-radius: 8px; 896 + padding: 15px; 897 + margin: 15px 0; 898 + background: #222; 899 + } 900 + 901 + .query-spec { 902 + background: #1e3a4a; 903 + border-color: #2a5a6a; 904 + } 905 + 906 + .plan { 907 + background: #1e2e1e; 908 + padding: 15px; 909 + border-radius: 5px; 910 + display: grid; 911 + grid-template-columns: 1fr 1fr; 912 + gap: 8px; 913 + border: 1px solid #2a4a2a; 914 + } 915 + 916 + .plan-intersection { 917 + background: #2a2e3e; 918 + border-color: #3a4a6a; 919 + } 920 + 921 + .plan div { 922 + font-size: 14px; 923 + } 924 + 925 + .intersection-info { 926 + grid-column: 1 / -1; 927 + background: #3a4a3a; 928 + padding: 10px; 929 + border-radius: 4px; 930 + margin-top: 8px; 931 + border: 1px dashed #5a7a5a; 932 + } 933 + 934 + .intersection-step { 935 + display: flex; 936 + gap: 15px; 937 + padding: 5px 0; 938 + font-size: 13px; 939 + color: #8fa; 940 + border-bottom: 1px dotted #4a5a4a; 941 + } 942 + 943 + .intersection-step:last-child { 944 + border-bottom: none; 945 + } 946 + 947 + .intersection-step strong { 948 + color: #afa; 949 + min-width: 100px; 950 + } 951 + 952 + .intersection-details { 953 + display: flex; 954 + gap: 20px; 955 + margin-top: 5px; 956 + font-size: 13px; 957 + color: #8fa; 958 + } 959 + 960 + .query-results { 961 + margin-top: 10px; 962 + } 963 + 964 + .results-header { 965 + color: #8a8; 966 + margin-bottom: 5px; 967 + font-weight: bold; 968 + } 969 + 970 + .loading { 971 + color: #aa8; 972 + font-style: italic; 973 + } 974 + 975 + .ingest-result { 976 + margin-top: 10px; 977 + } 978 + 979 + .store-group { 980 + margin-top: 10px; 981 + padding: 5px; 982 + background: #333; 983 + border-radius: 3px; 984 + } 985 + 986 + table { 987 + border-collapse: collapse; 988 + width: 100%; 989 + } 990 + 991 + th, td { 992 + border: 1px solid #444; 993 + padding: 10px; 994 + text-align: left; 995 + } 996 + 997 + th { 998 + background: #2a2a2a; 999 + } 1000 + 1001 + strong { 1002 + color: #b0b0b0; 1003 + } 1004 + ` 1005 + document.head.appendChild(style) 1006 + 1007 + // ============================================================================= 1008 + // Initialize 1009 + // ============================================================================= 1010 + 1011 + async function init() { 1012 + // Install the schema into IndexedDB 1013 + // Version 5: added categories array and cross-spread indexes 1014 + db = await docstore.install('docstore-demo', entriesStore, {version: 5}) 1015 + 1016 + // Create stats manager with idb database 1017 + statsManager = new StatsManager({db}) 1018 + 1019 + // Create query planner 1020 + planner = new QueryPlanner(entriesStore, statsManager) 1021 + 1022 + // Check if we already have data 1023 + const tx = db.transaction(entriesStore.name, 'readonly') 1024 + entryCount = await tx.objectStore(entriesStore.name).count() 1025 + await tx.done 1026 + 1027 + // If empty, seed with initial entries 1028 + if (entryCount === 0) { 1029 + await addEntries(100) 1030 + } else { 1031 + // Update nextEntryId to avoid collisions 1032 + nextEntryId = entryCount 1033 + } 1034 + 1035 + // Render the demo 1036 + await renderDemo() 1037 + } 1038 + 1039 + init().catch(console.error)
+1886 -431
src/main.ts
··· 1 1 import './style.css' 2 2 import {z} from 'zod/mini' 3 + import {parseFeed, parseOpml} from 'feedsmith' 3 4 import { 4 5 docstore, 5 - ingest, 6 6 QueryPlanner, 7 7 StatsManager, 8 8 rows, 9 - rowsIntersect, 9 + count, 10 + docs, 10 11 collect, 12 + putMany, 13 + collectIndexValues, 14 + collectKeyPairs, 11 15 type IDBPDatabase, 12 16 type Infer, 13 17 type QuerySpec, 14 18 type QueryPlan, 19 + type KeyPairCount, 15 20 } from './lib/docstore' 16 21 17 22 // ============================================================================= 18 - // Define our store schema 23 + // CORS Proxy configuration 24 + // ============================================================================= 25 + 26 + const CORS_PROXY = 'https://corsproxy.io/?' 27 + 28 + async function fetchWithCors(url: string): Promise<string> { 29 + const proxiedUrl = CORS_PROXY + encodeURIComponent(url) 30 + const response = await fetch(proxiedUrl) 31 + if (!response.ok) { 32 + throw new Error(`Fetch failed for ${url}: ${response.status} ${response.statusText}`) 33 + } 34 + return response.text() 35 + } 36 + 37 + // ============================================================================= 38 + // Store definitions 19 39 // ============================================================================= 20 40 41 + const feedsStore = docstore.define('feeds', { 42 + schema: { 43 + id: z.string(), 44 + url: z.string(), 45 + title: z.string(), 46 + description: z.string(), 47 + siteUrl: z.optional(z.string()), 48 + imageUrl: z.optional(z.string()), 49 + language: z.optional(z.string()), 50 + author: z.optional(z.string()), 51 + lastFetched: z.date(), 52 + lastBuildDate: z.optional(z.date()), 53 + ttl: z.optional(z.number()), 54 + feedType: z.optional(z.string()), 55 + explicit: z.optional(z.boolean()), 56 + podcastType: z.optional(z.string()), 57 + tags: z.array( 58 + z.object({ 59 + tag: z.string(), 60 + value: z.string(), 61 + }), 62 + ), 63 + }, 64 + indexes: (idx) => [ 65 + idx('by_url').on('url'), 66 + idx('by_last_fetched').on('lastFetched'), 67 + idx('by_tag').on('...tags.tag', '...tags.value'), 68 + idx('by_tag_only').on('...tags.tag').replicate('title', 'url'), 69 + ], 70 + }) 71 + 21 72 const entriesStore = docstore.define('entries', { 22 73 schema: { 23 74 id: z.string(), 24 75 feedId: z.string(), 76 + guid: z.optional(z.string()), 25 77 title: z.string(), 78 + link: z.optional(z.string()), 79 + content: z.string(), 80 + summary: z.optional(z.string()), 26 81 publishedAt: z.date(), 27 - status: z.enum(['draft', 'published', 'archived']), 28 - tags: z.array(z.object({tag: z.string(), score: z.number()})), 29 - authorId: z.string(), 82 + updatedAt: z.optional(z.date()), 83 + author: z.optional(z.string()), 84 + duration: z.optional(z.number()), 85 + episodeNumber: z.optional(z.number()), 86 + seasonNumber: z.optional(z.number()), 87 + episodeType: z.optional(z.string()), 88 + explicit: z.optional(z.boolean()), 89 + enclosureUrl: z.optional(z.string()), 90 + enclosureType: z.optional(z.string()), 91 + enclosureLength: z.optional(z.number()), 92 + tags: z.array( 93 + z.object({ 94 + tag: z.string(), 95 + value: z.string(), 96 + }), 97 + ), 30 98 }, 31 99 indexes: (idx) => [ 32 - // Native indexes on primary store 33 100 idx('by_feed_date').on('feedId', 'publishedAt'), 34 - idx('by_status').on('status'), 35 - idx('by_author_date').on('authorId', 'publishedAt'), 36 - // Denormalized indexes (spread over tags) 37 - idx('by_tag').on('...tags.tag').replicate('title', 'publishedAt'), 38 - idx('by_tag_score').on('...tags.tag', '...tags.score'), 39 - idx('by_feed_tag').on('feedId', '...tags.tag').replicate('title'), 101 + idx('by_published').on('publishedAt'), 102 + idx('by_feed').on('feedId').replicate('title', 'publishedAt'), 103 + idx('by_tag').on('...tags.tag', '...tags.value').replicate('title', 'publishedAt', 'feedId'), 104 + idx('by_feed_tag').on('feedId', '...tags.tag').replicate('title', 'publishedAt'), 105 + idx('by_tag_value').on('...tags.tag', '...tags.value'), 106 + idx('by_episode').on('feedId', 'seasonNumber', 'episodeNumber'), 107 + idx('by_duration').on('duration'), 108 + idx('by_enclosure_type').on('enclosureType').replicate('title', 'feedId'), 40 109 ], 41 110 }) 42 111 112 + type Feed = Infer<typeof feedsStore.schema> 43 113 type Entry = Infer<typeof entriesStore.schema> 44 114 45 115 // ============================================================================= ··· 47 117 // ============================================================================= 48 118 49 119 let db: IDBPDatabase 50 - let statsManager: StatsManager 51 - let planner: QueryPlanner<typeof entriesStore.schema> 52 - let nextEntryId = 0 53 - let entryCount = 0 120 + let feedsStatsManager: StatsManager 121 + let entriesStatsManager: StatsManager 122 + let feedsPlanner: QueryPlanner<typeof feedsStore.schema> 123 + let entriesPlanner: QueryPlanner<typeof entriesStore.schema> 54 124 55 125 // ============================================================================= 56 - // Generate sample data 126 + // Feed parsing helpers 57 127 // ============================================================================= 58 128 59 - const feeds = ['tech', 'science', 'politics', 'sports', 'entertainment'] 60 - const authors = ['alice', 'bob', 'carol', 'dave', 'eve', 'frank'] 61 - const tagPool = [ 62 - 'javascript', 63 - 'typescript', 64 - 'rust', 65 - 'go', 66 - 'python', 67 - 'react', 68 - 'vue', 69 - 'svelte', 70 - 'node', 71 - 'deno', 72 - 'ai', 73 - 'ml', 74 - 'web', 75 - 'mobile', 76 - 'cloud', 77 - ] 78 - const statuses: ('draft' | 'published' | 'archived')[] = ['draft', 'published', 'archived'] 129 + function generateId(input: string): string { 130 + // Simple hash function for generating IDs 131 + let hash = 0 132 + for (let i = 0; i < input.length; i++) { 133 + const char = input.charCodeAt(i) 134 + hash = (hash << 5) - hash + char 135 + hash = hash & hash 136 + } 137 + return Math.abs(hash).toString(36) 138 + } 79 139 80 - function randomTags(): {tag: string; score: number}[] { 81 - const count = Math.floor(Math.random() * 4) + 1 82 - const shuffled = [...tagPool].sort(() => Math.random() - 0.5) 83 - return shuffled.slice(0, count).map((tag) => ({ 84 - tag, 85 - score: Math.floor(Math.random() * 100), 86 - })) 140 + /** 141 + * Parse iTunes duration to seconds. 142 + * Handles formats: "3600", "23:45", "01:23:45", or already a number 143 + */ 144 + function parseDuration(duration: unknown): number | undefined { 145 + if (duration === undefined || duration === null) return undefined 146 + 147 + // If already a number, return it 148 + if (typeof duration === 'number') return duration 149 + 150 + // If not a string, can't parse 151 + if (typeof duration !== 'string') return undefined 152 + 153 + // Try parsing as plain number first (seconds as string) 154 + const asNumber = parseInt(duration, 10) 155 + if (!isNaN(asNumber) && String(asNumber) === duration) { 156 + return asNumber 157 + } 158 + 159 + // Parse HH:MM:SS or MM:SS format 160 + const parts = duration.split(':').map((p) => parseInt(p, 10)) 161 + if (parts.some(isNaN)) return undefined 162 + 163 + if (parts.length === 3) { 164 + // HH:MM:SS 165 + return parts[0] * 3600 + parts[1] * 60 + parts[2] 166 + } else if (parts.length === 2) { 167 + // MM:SS 168 + return parts[0] * 60 + parts[1] 169 + } 170 + 171 + return undefined 87 172 } 88 173 89 - function generateEntry(): Entry { 90 - const now = Date.now() 91 - const id = nextEntryId++ 92 - return { 93 - id: `entry-${id}`, 94 - feedId: feeds[Math.floor(Math.random() * feeds.length)], 95 - title: `Article ${id}: ${tagPool[Math.floor(Math.random() * tagPool.length)]} news`, 96 - publishedAt: new Date(now - Math.random() * 30 * 24 * 60 * 60 * 1000), 97 - status: statuses[Math.floor(Math.random() * statuses.length)], 98 - tags: randomTags(), 99 - authorId: authors[Math.floor(Math.random() * authors.length)], 174 + function parseDate(dateStr: string | undefined): Date { 175 + if (!dateStr) return new Date() 176 + const parsed = new Date(dateStr) 177 + return isNaN(parsed.getTime()) ? new Date() : parsed 178 + } 179 + 180 + function extractFeedTags(parsed: ReturnType<typeof parseFeed>['feed'], opmlFolder?: string): Feed['tags'] { 181 + const tags: Feed['tags'] = [] 182 + 183 + // Handle based on feed format 184 + if ('itunes' in parsed && parsed.itunes) { 185 + const itunes = parsed.itunes as { 186 + categories?: Array<{text: string; categories?: Array<{text: string}>}> 187 + explicit?: boolean 188 + type?: string 189 + } 190 + 191 + // iTunes categories (flatten nested) 192 + if (itunes.categories) { 193 + for (const cat of itunes.categories) { 194 + tags.push({tag: 'category', value: cat.text}) 195 + if (cat.categories) { 196 + for (const subcat of cat.categories) { 197 + tags.push({tag: 'category', value: subcat.text}) 198 + tags.push({tag: 'subcategory', value: `${cat.text}/${subcat.text}`}) 199 + } 200 + } 201 + } 202 + } 203 + 204 + // Explicit 205 + if (itunes.explicit !== undefined) { 206 + tags.push({tag: 'explicit', value: String(itunes.explicit)}) 207 + } 208 + 209 + // Podcast type 210 + if (itunes.type) { 211 + tags.push({tag: 'podcast-type', value: itunes.type}) 212 + } 213 + } 214 + 215 + // RSS categories 216 + if ('categories' in parsed && Array.isArray(parsed.categories)) { 217 + for (const cat of parsed.categories as Array<{name: string; domain?: string}>) { 218 + tags.push({tag: 'category', value: cat.name}) 219 + if (cat.domain) { 220 + tags.push({tag: 'category-domain', value: cat.domain}) 221 + } 222 + } 223 + } 224 + 225 + // Language 226 + if ('language' in parsed && parsed.language) { 227 + tags.push({tag: 'language', value: parsed.language as string}) 228 + } 229 + 230 + // OPML folder 231 + if (opmlFolder) { 232 + tags.push({tag: 'opml-folder', value: opmlFolder}) 233 + } 234 + 235 + // Generator (interesting for analysis) 236 + if ('generator' in parsed && parsed.generator) { 237 + const gen = parsed.generator 238 + const genValue = typeof gen === 'string' ? gen : (gen as {name?: string})?.name || String(gen) 239 + if (genValue && genValue !== '[object Object]') { 240 + tags.push({tag: 'generator', value: genValue}) 241 + } 100 242 } 243 + 244 + return tags 101 245 } 102 246 103 - async function addEntries(count: number) { 104 - for (let i = 0; i < count; i++) { 105 - const entry = generateEntry() 106 - await docstore.put(db, entriesStore, entry) 247 + function extractEntryTags(item: Record<string, unknown>, feedId: string): Entry['tags'] { 248 + const tags: Entry['tags'] = [] 107 249 108 - // Record stats for the primary store fields (using logical key names) 109 - await statsManager.recordWrite(entriesStore.name, { 110 - id: entry.id, 111 - feedId: entry.feedId, 112 - status: entry.status, 113 - authorId: entry.authorId, 114 - publishedAt: entry.publishedAt.toISOString(), 115 - }) 250 + // Categories 251 + if ('categories' in item && Array.isArray(item.categories)) { 252 + for (const cat of item.categories as Array<{name: string}>) { 253 + tags.push({tag: 'category', value: cat.name}) 254 + } 255 + } 116 256 117 - // Record stats for spread fields (tags) - use logical key names 118 - // The query planner looks up stats using logical keys like '...tags.tag' 119 - for (const tag of entry.tags) { 120 - await statsManager.recordWrite(entriesStore.name, { 121 - '...tags.tag': tag.tag, 122 - '...tags.score': tag.score, 123 - }) 257 + // iTunes metadata 258 + if ('itunes' in item && item.itunes) { 259 + const itunes = item.itunes as { 260 + explicit?: boolean 261 + episodeType?: string 262 + keywords?: string[] 124 263 } 125 264 126 - entryCount++ 265 + if (itunes.explicit !== undefined) { 266 + tags.push({tag: 'explicit', value: String(itunes.explicit)}) 267 + } 268 + if (itunes.episodeType) { 269 + tags.push({tag: 'episode-type', value: itunes.episodeType}) 270 + } 271 + if (itunes.keywords) { 272 + for (const kw of itunes.keywords) { 273 + tags.push({tag: 'keyword', value: kw}) 274 + } 275 + } 127 276 } 277 + 278 + // Enclosure detection 279 + const enclosures = ('enclosures' in item ? item.enclosures : null) as Array<{type?: string}> | null 280 + if (enclosures && enclosures.length > 0) { 281 + tags.push({tag: 'has-enclosure', value: 'true'}) 282 + const encType = enclosures[0]?.type || '' 283 + if (encType.startsWith('audio/')) { 284 + tags.push({tag: 'media-type', value: 'audio'}) 285 + } else if (encType.startsWith('video/')) { 286 + tags.push({tag: 'media-type', value: 'video'}) 287 + } 288 + } else { 289 + tags.push({tag: 'has-enclosure', value: 'false'}) 290 + } 291 + 292 + // Author 293 + if ('authors' in item && Array.isArray(item.authors) && item.authors.length > 0) { 294 + const author = item.authors[0] 295 + const authorName = typeof author === 'string' ? author : (author as {name?: string})?.name 296 + if (authorName) { 297 + tags.push({tag: 'author', value: authorName}) 298 + } 299 + } 300 + 301 + return tags 302 + } 303 + 304 + interface ParsedFeedResult { 305 + feed: Feed 306 + entries: Entry[] 307 + } 308 + 309 + function parseFeedContent(url: string, content: string, opmlFolder?: string): ParsedFeedResult { 310 + const result = parseFeed(content) 311 + const parsed = result.feed as Record<string, unknown> 312 + const feedId = generateId(url) 313 + 314 + // Extract feed metadata 315 + const feed: Feed = { 316 + id: feedId, 317 + url, 318 + title: (parsed.title as string) || url, 319 + description: (parsed.description as string) || '', 320 + siteUrl: (parsed.link as string) || undefined, 321 + imageUrl: undefined, 322 + language: (parsed.language as string) || undefined, 323 + author: undefined, 324 + lastFetched: new Date(), 325 + lastBuildDate: parsed.lastBuildDate ? parseDate(parsed.lastBuildDate as string) : undefined, 326 + ttl: (parsed.ttl as number) || undefined, 327 + feedType: result.format, 328 + explicit: undefined, 329 + podcastType: undefined, 330 + tags: extractFeedTags(result.feed, opmlFolder), 331 + } 332 + 333 + // Extract image 334 + if ('image' in parsed && parsed.image) { 335 + const img = parsed.image as {url?: string} 336 + feed.imageUrl = img.url 337 + } 338 + if ('itunes' in parsed && parsed.itunes) { 339 + const itunes = parsed.itunes as {image?: string; author?: string; explicit?: boolean; type?: string} 340 + if (itunes.image) feed.imageUrl = itunes.image 341 + if (itunes.author) feed.author = itunes.author 342 + if (itunes.explicit !== undefined) feed.explicit = itunes.explicit 343 + if (itunes.type) feed.podcastType = itunes.type 344 + } 345 + 346 + // Extract entries (RSS uses 'items', Atom uses 'entries') 347 + const items = 348 + (parsed.items as Array<Record<string, unknown>>) || 349 + (parsed.entries as Array<Record<string, unknown>>) || 350 + [] 351 + const entries: Entry[] = items.map((item) => { 352 + const guid = item.guid as {value?: string} | string | undefined 353 + const guidValue = typeof guid === 'string' ? guid : guid?.value 354 + const itemAtomId = item.id as string | undefined 355 + const entryId = guidValue 356 + ? generateId(guidValue) 357 + : itemAtomId 358 + ? generateId(itemAtomId) 359 + : generateId(`${feedId}-${item.title || Date.now()}`) 360 + 361 + // Get enclosure info 362 + const enclosures = item.enclosures as Array<{url: string; type: string; length: number}> | undefined 363 + const enclosure = enclosures?.[0] 364 + 365 + // Get iTunes episode info 366 + const itunes = item.itunes as 367 + | { 368 + duration?: number 369 + episode?: number 370 + season?: number 371 + episodeType?: string 372 + explicit?: boolean 373 + author?: string 374 + } 375 + | undefined 376 + 377 + // Get content - prefer content:encoded, then description, then Atom content 378 + const contentNs = item.content as {encoded?: string; value?: string} | string | undefined 379 + let fullContent = '' 380 + if (typeof contentNs === 'string') { 381 + fullContent = contentNs 382 + } else if (contentNs?.encoded) { 383 + fullContent = contentNs.encoded 384 + } else if (contentNs?.value) { 385 + fullContent = contentNs.value 386 + } else if (item.description) { 387 + fullContent = item.description as string 388 + } 389 + 390 + // Get summary (RSS description or Atom summary) 391 + const summaryField = item.summary as {value?: string} | string | undefined 392 + const summaryText = 393 + typeof summaryField === 'string' 394 + ? summaryField 395 + : summaryField?.value || (item.description as string) || undefined 396 + 397 + // Get author 398 + let author: string | undefined 399 + if (itunes?.author) { 400 + author = itunes.author 401 + } else if (item.authors && Array.isArray(item.authors) && item.authors.length > 0) { 402 + const firstAuthor = item.authors[0] 403 + author = typeof firstAuthor === 'string' ? firstAuthor : (firstAuthor as {name?: string})?.name 404 + } 405 + 406 + // Get link (RSS uses 'link' string, Atom uses 'links' array) 407 + let link: string | undefined = item.link as string | undefined 408 + if (!link && item.links && Array.isArray(item.links)) { 409 + const altLink = (item.links as Array<{href?: string; rel?: string}>).find( 410 + (l) => l.rel === 'alternate' || !l.rel, 411 + ) 412 + link = altLink?.href 413 + } 414 + 415 + // Get guid/id (RSS uses 'guid', Atom uses 'id') 416 + const finalGuid = guidValue || itemAtomId 417 + 418 + // Get published date (RSS uses 'pubDate', Atom uses 'published') 419 + const pubDateStr = (item.pubDate as string) || (item.published as string) 420 + const updatedStr = item.updated as string | undefined 421 + 422 + return { 423 + id: entryId, 424 + feedId, 425 + guid: finalGuid, 426 + title: (item.title as string) || 'Untitled', 427 + link, 428 + content: fullContent || summaryText || '', 429 + summary: summaryText !== fullContent ? summaryText : undefined, 430 + publishedAt: parseDate(pubDateStr), 431 + updatedAt: updatedStr ? parseDate(updatedStr) : undefined, 432 + author, 433 + duration: parseDuration(itunes?.duration), 434 + episodeNumber: itunes?.episode, 435 + seasonNumber: itunes?.season, 436 + episodeType: itunes?.episodeType, 437 + explicit: itunes?.explicit, 438 + enclosureUrl: enclosure?.url, 439 + enclosureType: enclosure?.type, 440 + enclosureLength: enclosure?.length, 441 + tags: extractEntryTags(item, feedId), 442 + } 443 + }) 444 + 445 + return {feed, entries} 128 446 } 129 447 130 448 // ============================================================================= 131 - // Query execution using real IDB 449 + // Database operations 132 450 // ============================================================================= 133 451 134 - async function executeQuery(plan: QueryPlan): Promise<Record<string, unknown>[]> { 135 - const ctx = { 136 - db, 137 - storeDef: entriesStore as any, // ExecutionContext expects generic StoreDef 452 + async function saveFeed(feed: Feed): Promise<void> { 453 + await putMany(db, feedsStore, [feed], feedsStatsManager) 454 + } 455 + 456 + async function saveEntries(entries: Entry[]): Promise<void> { 457 + if (entries.length === 0) return 458 + await putMany(db, entriesStore, entries, entriesStatsManager) 459 + } 460 + 461 + async function fetchAndParseFeed(url: string, opmlFolder?: string): Promise<ParsedFeedResult> { 462 + const content = await fetchWithCors(url) 463 + return parseFeedContent(url, content, opmlFolder) 464 + } 465 + 466 + async function addFeed(url: string, opmlFolder?: string): Promise<{feed: Feed; entriesCount: number}> { 467 + const {feed, entries} = await fetchAndParseFeed(url, opmlFolder) 468 + await saveFeed(feed) 469 + await saveEntries(entries) 470 + return {feed, entriesCount: entries.length} 471 + } 472 + 473 + async function refreshFeed(feedId: string): Promise<{entriesCount: number}> { 474 + // Get existing feed 475 + const existingFeed = await docstore.get(db, feedsStore, feedId) 476 + if (!existingFeed) throw new Error(`Feed not found: ${feedId}`) 477 + 478 + // Preserve OPML folder tag if present 479 + const opmlFolder = existingFeed.tags.find((t) => t.tag === 'opml-folder')?.value 480 + 481 + const {feed, entries} = await fetchAndParseFeed(existingFeed.url, opmlFolder) 482 + await saveFeed(feed) 483 + await saveEntries(entries) 484 + return {entriesCount: entries.length} 485 + } 486 + 487 + async function refreshAllFeeds(): Promise<{feedsRefreshed: number; totalEntries: number}> { 488 + const allFeeds = await collectAll( 489 + docs( 490 + {db, storeDef: feedsStore as any}, 491 + { 492 + source: 'primary', 493 + indexName: null, 494 + keyRange: null, 495 + direction: 'next', 496 + filter: [], 497 + needsSort: false, 498 + availableFields: Object.keys(feedsStore.schema), 499 + estimatedRows: 1000, 500 + selectivity: 1, 501 + }, 502 + ), 503 + ) 504 + 505 + let feedsRefreshed = 0 506 + let totalEntries = 0 507 + 508 + for (const feed of allFeeds as Feed[]) { 509 + try { 510 + const result = await refreshFeed(feed.id) 511 + feedsRefreshed++ 512 + totalEntries += result.entriesCount 513 + } catch (err) { 514 + console.error(`Failed to refresh feed ${feed.url}:`, err) 515 + } 138 516 } 139 517 140 - // Use rowsIntersect when the plan has an intersection, otherwise use regular rows 141 - // Enable analyze mode to show timing info in the console 142 - const options = {analyze: true} 143 - const rowGenerator = plan.intersection ? rowsIntersect(ctx, plan, options) : rows(ctx, plan, options) 144 - const results = await collect(rowGenerator, 5) 518 + return {feedsRefreshed, totalEntries} 519 + } 520 + 521 + async function collectAll<T>(gen: AsyncGenerator<T>): Promise<T[]> { 522 + const results: T[] = [] 523 + for await (const item of gen) { 524 + results.push(item) 525 + } 145 526 return results 146 527 } 147 528 148 529 // ============================================================================= 149 - // Get counts from IDB 530 + // OPML Import 150 531 // ============================================================================= 151 532 152 - async function getStoreCounts(): Promise<Map<string, number>> { 153 - const counts = new Map<string, number>() 533 + interface OpmlOutline { 534 + text?: string 535 + title?: string 536 + xmlUrl?: string 537 + outlines?: OpmlOutline[] 538 + } 539 + 540 + function flattenOpmlOutlines( 541 + outlines: OpmlOutline[], 542 + folder?: string, 543 + ): Array<{url: string; folder?: string}> { 544 + const results: Array<{url: string; folder?: string}> = [] 545 + 546 + for (const outline of outlines) { 547 + if (outline.xmlUrl) { 548 + results.push({url: outline.xmlUrl, folder}) 549 + } 550 + if (outline.outlines && outline.outlines.length > 0) { 551 + const subFolder = outline.text || outline.title || folder 552 + results.push(...flattenOpmlOutlines(outline.outlines, subFolder)) 553 + } 554 + } 555 + 556 + return results 557 + } 154 558 155 - // Count primary store 156 - const tx = db.transaction(entriesStore.name, 'readonly') 157 - const primaryCount = await tx.objectStore(entriesStore.name).count() 158 - counts.set(entriesStore.name, primaryCount) 159 - await tx.done 559 + async function importOpml( 560 + content: string, 561 + ): Promise<{feedsAdded: number; totalEntries: number; errors: string[]}> { 562 + const opml = parseOpml(content) 563 + const outlines = opml.body?.outlines || [] 564 + const feedUrls = flattenOpmlOutlines(outlines as OpmlOutline[]) 565 + 566 + let feedsAdded = 0 567 + let totalEntries = 0 568 + const errors: string[] = [] 160 569 161 - // Count index stores 162 - for (const group of entriesStore.indexStoreGroups) { 163 - const indexTx = db.transaction(group.storeName, 'readonly') 164 - const indexCount = await indexTx.objectStore(group.storeName).count() 165 - counts.set(group.storeName, indexCount) 166 - await indexTx.done 570 + for (const {url, folder} of feedUrls) { 571 + try { 572 + const result = await addFeed(url, folder) 573 + feedsAdded++ 574 + totalEntries += result.entriesCount 575 + updateStatus(`Imported ${feedsAdded}/${feedUrls.length} feeds...`) 576 + } catch (err) { 577 + errors.push(`${url}: ${err instanceof Error ? err.message : String(err)}`) 578 + } 167 579 } 168 580 169 - return counts 581 + return {feedsAdded, totalEntries, errors} 582 + } 583 + 584 + // ============================================================================= 585 + // Query execution 586 + // ============================================================================= 587 + 588 + async function executeQuery( 589 + planner: QueryPlanner<any>, 590 + storeDef: any, 591 + spec: QuerySpec, 592 + ): Promise<{rows: Record<string, unknown>[]; plan: QueryPlan}> { 593 + const plan = await planner.plan(spec) 594 + const ctx = {db, storeDef} 595 + const options = {analyze: true, orderBy: spec.orderBy} 596 + const rowGenerator = rows(ctx, plan, options) 597 + const results = await collect(rowGenerator, 20) 598 + return {rows: results as Record<string, unknown>[], plan} 599 + } 600 + 601 + async function executeDocsQuery( 602 + planner: QueryPlanner<any>, 603 + storeDef: any, 604 + spec: QuerySpec, 605 + ): Promise<{docs: Record<string, unknown>[]; plan: QueryPlan}> { 606 + const plan = await planner.plan(spec) 607 + const ctx = {db, storeDef} 608 + const options = {analyze: true} 609 + const docGenerator = docs(ctx, plan, options) 610 + const results = await collect(docGenerator, 10) 611 + return {docs: results as Record<string, unknown>[], plan} 612 + } 613 + 614 + async function executeCountQuery( 615 + planner: QueryPlanner<any>, 616 + storeDef: any, 617 + spec: QuerySpec, 618 + ): Promise<{count: number; plan: QueryPlan}> { 619 + const plan = await planner.plan(spec) 620 + const ctx = {db, storeDef} 621 + const options = {analyze: true} 622 + const result = await count(ctx, plan, options) 623 + return {count: result, plan} 170 624 } 171 625 172 626 // ============================================================================= 173 - // Sample queries 627 + // Sample queries for testing 174 628 // ============================================================================= 175 629 176 - const sampleQueries: {name: string; spec: QuerySpec}[] = [ 630 + const sampleQueries: Array<{name: string; store: 'feeds' | 'entries'; spec: QuerySpec}> = [ 177 631 { 178 - name: 'All entries from tech feed, newest first', 632 + name: 'All entries (newest first)', 633 + store: 'entries', 179 634 spec: { 180 635 store: 'entries', 181 - where: [{key: 'feedId', op: '=', value: 'tech'}], 636 + where: [], 182 637 orderBy: {key: 'publishedAt', direction: 'desc'}, 183 638 }, 184 639 }, 185 640 { 186 - name: 'Published entries only', 641 + name: 'Entries from specific feed', 642 + store: 'entries', 187 643 spec: { 188 644 store: 'entries', 189 - where: [{key: 'status', op: '=', value: 'published'}], 645 + where: [{key: 'feedId', op: '=', value: '__FEED_ID__'}], 646 + orderBy: {key: 'publishedAt', direction: 'desc'}, 190 647 }, 191 648 }, 192 649 { 193 - name: 'Entries tagged with "javascript"', 650 + name: 'Audio episodes only', 651 + store: 'entries', 194 652 spec: { 195 653 store: 'entries', 196 - where: [{key: '...tags.tag', op: '=', value: 'javascript'}], 654 + where: [ 655 + {key: '...tags.tag', op: '=', value: 'media-type'}, 656 + {key: '...tags.value', op: '=', value: 'audio'}, 657 + ], 197 658 }, 198 659 }, 199 660 { 200 - name: 'Tech feed + javascript tag', 661 + name: 'Entries with category tag', 662 + store: 'entries', 663 + spec: { 664 + store: 'entries', 665 + where: [{key: '...tags.tag', op: '=', value: 'category'}], 666 + }, 667 + }, 668 + { 669 + name: 'Long episodes (>30 min)', 670 + store: 'entries', 671 + spec: { 672 + store: 'entries', 673 + where: [{key: 'duration', op: '>', value: 1800}], 674 + }, 675 + }, 676 + { 677 + name: 'Short episodes (<10 min)', 678 + store: 'entries', 679 + spec: { 680 + store: 'entries', 681 + where: [{key: 'duration', op: '<', value: 600}], 682 + }, 683 + }, 684 + { 685 + name: 'Explicit content', 686 + store: 'entries', 201 687 spec: { 202 688 store: 'entries', 203 689 where: [ 204 - {key: 'feedId', op: '=', value: 'tech'}, 205 - {key: '...tags.tag', op: '=', value: 'javascript'}, 690 + {key: '...tags.tag', op: '=', value: 'explicit'}, 691 + {key: '...tags.value', op: '=', value: 'true'}, 206 692 ], 207 693 }, 208 694 }, 209 695 { 210 - name: 'Entries by alice, recent', 696 + name: 'Video content', 697 + store: 'entries', 211 698 spec: { 212 699 store: 'entries', 213 700 where: [ 214 - {key: 'authorId', op: '=', value: 'alice'}, 215 - {key: 'publishedAt', op: '>', value: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000)}, 701 + {key: '...tags.tag', op: '=', value: 'media-type'}, 702 + {key: '...tags.value', op: '=', value: 'video'}, 216 703 ], 217 - orderBy: {key: 'publishedAt', direction: 'desc'}, 218 704 }, 219 705 }, 220 706 { 221 - name: 'Primary key lookup (by id)', 707 + name: 'Feeds by category', 708 + store: 'feeds', 709 + spec: { 710 + store: 'feeds', 711 + where: [{key: '...tags.tag', op: '=', value: 'category'}], 712 + }, 713 + }, 714 + { 715 + name: 'Feeds with language tag', 716 + store: 'feeds', 717 + spec: { 718 + store: 'feeds', 719 + where: [{key: '...tags.tag', op: '=', value: 'language'}], 720 + }, 721 + }, 722 + { 723 + name: 'Recently fetched feeds', 724 + store: 'feeds', 725 + spec: { 726 + store: 'feeds', 727 + where: [], 728 + orderBy: {key: 'lastFetched', direction: 'desc'}, 729 + }, 730 + }, 731 + { 732 + name: 'Entries by enclosure type', 733 + store: 'entries', 222 734 spec: { 223 735 store: 'entries', 224 - where: [{key: 'id', op: '=', value: 'entry-42'}], 736 + where: [{key: 'enclosureType', op: '=', value: 'audio/mpeg'}], 737 + }, 738 + }, 739 + // === INTERSECTION QUERIES using new SpreadMatch syntax === 740 + // Each {spread, where} block must be satisfied by ONE row in the spread array. 741 + // Multiple blocks create an intersection (doc must have rows matching ALL blocks). 742 + { 743 + name: '🔀 Audio AND Explicit (intersection)', 744 + store: 'entries', 745 + spec: { 746 + store: 'entries', 747 + where: [], 748 + matches: [ 749 + {spread: 'tags', where: {tag: 'media-type', value: 'audio'}}, 750 + {spread: 'tags', where: {tag: 'explicit', value: 'true'}}, 751 + ], 225 752 }, 226 753 }, 227 754 { 228 - name: 'Full table scan (filter by title - no index)', 755 + name: '🔀 Full episodes AND Explicit (intersection)', 756 + store: 'entries', 229 757 spec: { 230 758 store: 'entries', 231 - where: [{key: 'title', op: '=', value: 'Article 1: ai news'}], 759 + where: [], 760 + matches: [ 761 + {spread: 'tags', where: {tag: 'episode-type', value: 'full'}}, 762 + {spread: 'tags', where: {tag: 'explicit', value: 'true'}}, 763 + ], 232 764 }, 233 765 }, 234 - // --- Queries that would benefit from index joins --- 235 766 { 236 - name: 'Published + javascript tag (status index + tag index)', 767 + name: '🔀 Audio + Full + Explicit (3-way intersection)', 768 + store: 'entries', 237 769 spec: { 238 770 store: 'entries', 239 - where: [ 240 - {key: 'status', op: '=', value: 'published'}, 241 - {key: '...tags.tag', op: '=', value: 'javascript'}, 771 + where: [], 772 + matches: [ 773 + {spread: 'tags', where: {tag: 'media-type', value: 'audio'}}, 774 + {spread: 'tags', where: {tag: 'episode-type', value: 'full'}}, 775 + {spread: 'tags', where: {tag: 'explicit', value: 'true'}}, 242 776 ], 243 777 }, 244 778 }, 245 779 { 246 - name: 'Alice + typescript tag (author index + tag index)', 780 + name: '🔀 Specific feed + Audio (cross-index)', 781 + store: 'entries', 782 + spec: { 783 + store: 'entries', 784 + where: [{key: 'feedId', op: '=', value: '__FEED_ID__'}], 785 + matches: [{spread: 'tags', where: {tag: 'media-type', value: 'audio'}}], 786 + }, 787 + }, 788 + { 789 + name: '🔀 Specific feed + Explicit content', 790 + store: 'entries', 791 + spec: { 792 + store: 'entries', 793 + where: [{key: 'feedId', op: '=', value: '__FEED_ID__'}], 794 + matches: [{spread: 'tags', where: {tag: 'explicit', value: 'true'}}], 795 + }, 796 + }, 797 + { 798 + name: '🔀 Category AND has-enclosure (self-join on tag)', 799 + store: 'entries', 247 800 spec: { 248 801 store: 'entries', 249 - where: [ 250 - {key: 'authorId', op: '=', value: 'alice'}, 251 - {key: '...tags.tag', op: '=', value: 'typescript'}, 802 + where: [], 803 + matches: [ 804 + {spread: 'tags', where: {tag: 'category'}}, // Just tag, no value 805 + {spread: 'tags', where: {tag: 'has-enclosure'}}, 252 806 ], 253 807 }, 254 808 }, 255 809 { 256 - name: 'Science feed + published status (feed index + status index)', 810 + name: '🔀 Video + Has enclosure', 811 + store: 'entries', 257 812 spec: { 258 813 store: 'entries', 259 - where: [ 260 - {key: 'feedId', op: '=', value: 'science'}, 261 - {key: 'status', op: '=', value: 'published'}, 814 + where: [], 815 + matches: [ 816 + {spread: 'tags', where: {tag: 'media-type', value: 'video'}}, 817 + {spread: 'tags', where: {tag: 'has-enclosure', value: 'true'}}, 262 818 ], 263 819 }, 264 820 }, 265 821 { 266 - name: 'Bob + draft + react tag (3-way join potential)', 822 + name: '🔀 Bonus episodes + Explicit', 823 + store: 'entries', 267 824 spec: { 268 825 store: 'entries', 269 - where: [ 270 - {key: 'authorId', op: '=', value: 'bob'}, 271 - {key: 'status', op: '=', value: 'draft'}, 272 - {key: '...tags.tag', op: '=', value: 'react'}, 826 + where: [], 827 + matches: [ 828 + {spread: 'tags', where: {tag: 'episode-type', value: 'bonus'}}, 829 + {spread: 'tags', where: {tag: 'explicit', value: 'true'}}, 273 830 ], 274 831 }, 275 832 }, 276 833 { 277 - name: 'High score tags in tech feed (feed+tag index + score filter)', 834 + name: '🔀 Feed tags: Fiction + Serial (intersection)', 835 + store: 'feeds', 836 + spec: { 837 + store: 'feeds', 838 + where: [], 839 + matches: [ 840 + {spread: 'tags', where: {tag: 'category', value: 'Fiction'}}, 841 + {spread: 'tags', where: {tag: 'podcast-type', value: 'serial'}}, 842 + ], 843 + }, 844 + }, 845 + { 846 + name: '🔀 Feed tags: Comedy + Explicit', 847 + store: 'feeds', 848 + spec: { 849 + store: 'feeds', 850 + where: [], 851 + matches: [ 852 + {spread: 'tags', where: {tag: 'category', value: 'Comedy'}}, 853 + {spread: 'tags', where: {tag: 'explicit', value: 'true'}}, 854 + ], 855 + }, 856 + }, 857 + // === Single SpreadMatch queries (should work like regular tag queries) === 858 + { 859 + name: '🏷️ Audio episodes (single match)', 860 + store: 'entries', 861 + spec: { 862 + store: 'entries', 863 + where: [], 864 + matches: [{spread: 'tags', where: {tag: 'media-type', value: 'audio'}}], 865 + }, 866 + }, 867 + { 868 + name: '🏷️ Explicit content (single match)', 869 + store: 'entries', 870 + spec: { 871 + store: 'entries', 872 + where: [], 873 + matches: [{spread: 'tags', where: {tag: 'explicit', value: 'true'}}], 874 + }, 875 + }, 876 + // === SORTING QUERIES (Needs Sort: true) === 877 + // These queries use an index for filtering but need post-query sorting 878 + // because the ORDER BY field is not covered by the index. 879 + { 880 + name: '🔃 Audio entries by duration (sort required)', 881 + store: 'entries', 278 882 spec: { 279 883 store: 'entries', 280 884 where: [ 281 - {key: 'feedId', op: '=', value: 'tech'}, 282 - {key: '...tags.tag', op: '=', value: 'ai'}, 283 - {key: '...tags.score', op: '>=', value: 50}, 885 + {key: '...tags.tag', op: '=', value: 'media-type'}, 886 + {key: '...tags.value', op: '=', value: 'audio'}, 284 887 ], 888 + orderBy: {key: 'duration', direction: 'desc'}, 889 + }, 890 + }, 891 + { 892 + name: '🔃 Feed entries by title (sort required)', 893 + store: 'entries', 894 + spec: { 895 + store: 'entries', 896 + where: [{key: 'feedId', op: '=', value: '__FEED_ID__'}], 897 + orderBy: {key: 'title', direction: 'asc'}, 898 + }, 899 + }, 900 + { 901 + name: '🔃 MP3 entries by date (sort required)', 902 + store: 'entries', 903 + spec: { 904 + store: 'entries', 905 + where: [{key: 'enclosureType', op: '=', value: 'audio/mpeg'}], 906 + orderBy: {key: 'publishedAt', direction: 'desc'}, 285 907 }, 286 908 }, 287 909 ] 288 910 289 911 // ============================================================================= 290 - // UI rendering 912 + // UI State 291 913 // ============================================================================= 292 914 293 - function formatPlan(plan: QueryPlan, queryId: number): string { 294 - const intersectionInfo = plan.intersection 295 - ? ` 296 - <div class="intersection-info"> 297 - <strong>INTERSECTION with:</strong> 298 - <div class="intersection-details"> 299 - <span>Source: ${plan.intersection.source}</span> 300 - <span>Index: ${plan.intersection.indexName}</span> 301 - <span>Selectivity: ${(plan.intersection.selectivity * 100).toFixed(2)}%</span> 302 - </div> 303 - </div> 304 - ` 305 - : '' 915 + let currentView: 'feeds' | 'entries' | 'queries' | 'tagcloud' = 'feeds' 916 + 917 + function updateStatus(message: string) { 918 + const statusEl = document.getElementById('status-message') 919 + if (statusEl) statusEl.textContent = message 920 + } 921 + 922 + // ============================================================================= 923 + // UI Rendering 924 + // ============================================================================= 925 + 926 + async function getStoreCounts(): Promise<{feeds: number; entries: number}> { 927 + const feedsTx = db.transaction(feedsStore.name, 'readonly') 928 + const feedsCount = await feedsTx.objectStore(feedsStore.name).count() 929 + await feedsTx.done 930 + 931 + const entriesTx = db.transaction(entriesStore.name, 'readonly') 932 + const entriesCount = await entriesTx.objectStore(entriesStore.name).count() 933 + await entriesTx.done 934 + 935 + return {feeds: feedsCount, entries: entriesCount} 936 + } 306 937 307 - return ` 308 - <div class="plan ${plan.intersection ? 'plan-intersection' : ''}" data-query-id="${queryId}"> 938 + function formatPlan(plan: QueryPlan): string { 939 + let html = ` 940 + <div class="plan ${plan.intersection ? 'plan-intersection' : ''}"> 309 941 <div><strong>Source:</strong> ${plan.source}</div> 310 942 <div><strong>Index:</strong> ${plan.indexName ?? '<full scan>'}</div> 311 943 <div><strong>Key Range:</strong> ${plan.keyRange ? JSON.stringify(plan.keyRange) : 'none'}</div> 312 944 <div><strong>Direction:</strong> ${plan.direction}</div> 313 945 <div><strong>Post-filters:</strong> ${plan.filter.length > 0 ? plan.filter.map((f) => `${f.key} ${f.op} ${f.value}`).join(', ') : 'none'}</div> 314 946 <div><strong>Needs Sort:</strong> ${plan.needsSort}</div> 315 - <div><strong>Available Fields:</strong> ${plan.availableFields.join(', ')}</div> 316 947 <div><strong>Selectivity:</strong> ${(plan.selectivity * 100).toFixed(2)}%</div> 317 948 <div><strong>Est. Rows:</strong> ${plan.estimatedRows}</div> 318 - ${intersectionInfo} 319 949 </div> 320 - <button class="run-query-btn" data-query-id="${queryId}">Run Query (show 5 rows)</button> 321 - <div class="query-results" id="results-${queryId}"></div> 322 950 ` 323 - } 324 951 325 - async function formatIngestResult(entry: Entry) { 326 - const result = ingest(entriesStore, entry) 327 - let html = '<div class="ingest-result">' 328 - 329 - for (const [storeName, rows] of result.byStore) { 330 - html += `<div class="store-group"><strong>${storeName}</strong> (${rows.length} rows)</div>` 331 - html += '<pre>' + JSON.stringify(rows, null, 2) + '</pre>' 952 + if (plan.intersection) { 953 + html += ` 954 + <div class="intersection-info"> 955 + <strong>INTERSECTION:</strong> 956 + <div>Source: ${plan.intersection.source}</div> 957 + <div>Index: ${plan.intersection.indexName}</div> 958 + <div>Selectivity: ${(plan.intersection.selectivity * 100).toFixed(2)}%</div> 959 + </div> 960 + ` 332 961 } 333 962 334 - html += '</div>' 335 963 return html 336 964 } 337 965 338 - async function renderDemo() { 339 - // Get a sample entry for display 340 - const sampleEntry = await docstore.get(db, entriesStore, 'entry-0') 966 + async function renderControls(): Promise<string> { 967 + const counts = await getStoreCounts() 341 968 342 - const storeCounts = await getStoreCounts() 969 + return ` 970 + <section class="controls"> 971 + <div class="control-row"> 972 + <input type="file" id="opml-file" accept=".opml,.xml" style="display:none"> 973 + <button id="import-opml-btn">Import OPML</button> 974 + <input type="text" id="feed-url-input" placeholder="Feed URL..." style="flex:1"> 975 + <button id="add-feed-btn">Add Feed</button> 976 + <button id="refresh-all-btn">Refresh All</button> 977 + <button id="clear-db-btn" class="danger">Clear Database</button> 978 + </div> 979 + <div class="stats-row"> 980 + <span id="feed-count">Feeds: ${counts.feeds}</span> 981 + <span id="entry-count">Entries: ${counts.entries}</span> 982 + <span id="status-message"></span> 983 + </div> 984 + <div class="nav-row"> 985 + <button class="nav-btn ${currentView === 'feeds' ? 'active' : ''}" data-view="feeds">Feeds</button> 986 + <button class="nav-btn ${currentView === 'entries' ? 'active' : ''}" data-view="entries">Entries</button> 987 + <button class="nav-btn ${currentView === 'tagcloud' ? 'active' : ''}" data-view="tagcloud">Tag Cloud</button> 988 + <button class="nav-btn ${currentView === 'queries' ? 'active' : ''}" data-view="queries">Query Playground</button> 989 + </div> 990 + </section> 991 + ` 992 + } 343 993 344 - let html = ` 345 - <div class="demo"> 346 - <h1>DocStore Query Planner Demo</h1> 347 - <p class="idb-badge">Using real IndexedDB</p> 994 + async function renderFeedsList(): Promise<string> { 995 + const allFeeds = (await collectAll( 996 + docs( 997 + {db, storeDef: feedsStore as any}, 998 + { 999 + source: 'primary', 1000 + indexName: null, 1001 + keyRange: null, 1002 + direction: 'next', 1003 + filter: [], 1004 + needsSort: false, 1005 + availableFields: Object.keys(feedsStore.schema), 1006 + estimatedRows: 1000, 1007 + selectivity: 1, 1008 + }, 1009 + ), 1010 + )) as Feed[] 348 1011 349 - <section class="controls"> 350 - <button id="add-entries-btn">Add 1000 Entries</button> 351 - <button id="clear-db-btn">Clear Database</button> 352 - <span id="entry-count">Entries: ${entryCount}</span> 1012 + if (allFeeds.length === 0) { 1013 + return ` 1014 + <section class="content-section"> 1015 + <h2>Feeds</h2> 1016 + <p class="empty-state">No feeds yet. Import an OPML file or add a feed URL above.</p> 353 1017 </section> 1018 + ` 1019 + } 354 1020 355 - <section> 356 - <h2>Store Definition</h2> 357 - <pre>${JSON.stringify( 358 - { 359 - name: entriesStore.name, 360 - nativeIndexes: entriesStore.nativeIndexes.map((i) => ({name: i.name, keys: i.keys})), 361 - indexStoreGroups: entriesStore.indexStoreGroups.map((g) => ({ 362 - storeName: g.storeName, 363 - spreadRoots: [...g.spreadRoots], 364 - indexes: g.indexes.map((i) => ({name: i.name, keys: i.keys})), 365 - allKeys: [...g.allKeys], 366 - })), 367 - }, 368 - null, 369 - 2, 370 - )}</pre> 371 - </section> 1021 + const feedItems = allFeeds 1022 + .map( 1023 + (feed) => ` 1024 + <div class="feed-item" data-feed-id="${feed.id}"> 1025 + <div class="feed-header"> 1026 + ${feed.imageUrl ? `<img src="${feed.imageUrl}" class="feed-image" alt="">` : '<div class="feed-image-placeholder"></div>'} 1027 + <div class="feed-info"> 1028 + <h3 class="feed-title">${escapeHtml(feed.title)}</h3> 1029 + <a href="${feed.url}" target="_blank" class="feed-url">${escapeHtml(feed.url)}</a> 1030 + <div class="feed-meta"> 1031 + <span>Type: ${feed.feedType || 'unknown'}</span> 1032 + <span>Fetched: ${feed.lastFetched.toLocaleString()}</span> 1033 + ${feed.language ? `<span>Lang: ${feed.language}</span>` : ''} 1034 + </div> 1035 + <div class="feed-tags"> 1036 + ${feed.tags 1037 + .slice(0, 5) 1038 + .map((t) => `<span class="tag">${escapeHtml(t.tag)}:${escapeHtml(t.value)}</span>`) 1039 + .join('')} 1040 + ${feed.tags.length > 5 ? `<span class="tag-more">+${feed.tags.length - 5} more</span>` : ''} 1041 + </div> 1042 + </div> 1043 + <button class="refresh-feed-btn" data-feed-id="${feed.id}">Refresh</button> 1044 + </div> 1045 + </div> 1046 + `, 1047 + ) 1048 + .join('') 372 1049 373 - <section> 374 - <h2>Sample Entry Ingestion</h2> 375 - <p>Here's how a sample entry gets denormalized into index rows:</p> 376 - <pre>${JSON.stringify( 377 - sampleEntry, 378 - (key, value) => { 379 - if (value instanceof Date) return value.toISOString() 380 - return value 381 - }, 382 - 2, 383 - )}</pre> 384 - ${sampleEntry ? await formatIngestResult(sampleEntry) : '<p>No entries yet</p>'} 1050 + return ` 1051 + <section class="content-section"> 1052 + <h2>Feeds (${allFeeds.length})</h2> 1053 + <div class="feeds-list"> 1054 + ${feedItems} 1055 + </div> 1056 + </section> 1057 + ` 1058 + } 1059 + 1060 + async function renderEntriesList(): Promise<string> { 1061 + const plan: QueryPlan = { 1062 + source: 'primary', 1063 + indexName: 'by_published', 1064 + keyRange: null, 1065 + direction: 'prev', 1066 + filter: [], 1067 + needsSort: false, 1068 + availableFields: Object.keys(entriesStore.schema), 1069 + estimatedRows: 1000, 1070 + selectivity: 1, 1071 + } 1072 + 1073 + const allEntries = (await collect(docs({db, storeDef: entriesStore as any}, plan), 100)) as Entry[] 1074 + 1075 + if (allEntries.length === 0) { 1076 + return ` 1077 + <section class="content-section"> 1078 + <h2>Entries</h2> 1079 + <p class="empty-state">No entries yet. Add some feeds first!</p> 385 1080 </section> 1081 + ` 1082 + } 386 1083 387 - <section> 388 - <h2>Query Plans</h2> 389 - <p>Given <strong id="query-entry-count">${entryCount}</strong> entries, here's how queries get planned. Click "Run Query" to see results:</p> 390 - <div id="queries-container"> 1084 + const entryItems = allEntries 1085 + .map( 1086 + (entry) => ` 1087 + <div class="entry-item"> 1088 + <div class="entry-header"> 1089 + <h3 class="entry-title">${escapeHtml(entry.title)}</h3> 1090 + ${entry.link ? `<a href="${entry.link}" target="_blank" class="entry-link">Open</a>` : ''} 1091 + </div> 1092 + <div class="entry-meta"> 1093 + <span class="entry-date">${entry.publishedAt.toLocaleString()}</span> 1094 + ${entry.author ? `<span class="entry-author">by ${escapeHtml(entry.author)}</span>` : ''} 1095 + ${entry.duration ? `<span class="entry-duration">${formatDuration(entry.duration)}</span>` : ''} 1096 + ${entry.episodeNumber ? `<span class="entry-episode">Ep. ${entry.episodeNumber}</span>` : ''} 1097 + </div> 1098 + <div class="entry-tags"> 1099 + ${entry.tags 1100 + .slice(0, 4) 1101 + .map((t) => `<span class="tag">${escapeHtml(t.tag)}:${escapeHtml(t.value)}</span>`) 1102 + .join('')} 1103 + </div> 1104 + <div class="entry-content-preview">${escapeHtml(stripHtml(entry.content).slice(0, 200))}...</div> 1105 + </div> 1106 + `, 1107 + ) 1108 + .join('') 1109 + 1110 + return ` 1111 + <section class="content-section"> 1112 + <h2>Recent Entries (${allEntries.length})</h2> 1113 + <div class="entries-list"> 1114 + ${entryItems} 1115 + </div> 1116 + </section> 391 1117 ` 1118 + } 392 1119 393 - for (let i = 0; i < sampleQueries.length; i++) { 394 - const {name, spec} = sampleQueries[i] 395 - const plan = await planner.plan(spec) 396 - html += ` 397 - <div class="query"> 398 - <h3>${name}</h3> 1120 + async function renderQueryPlayground(): Promise<string> { 1121 + // Get a feed ID for the "specific feed" query 1122 + const allFeeds = (await collectAll( 1123 + docs( 1124 + {db, storeDef: feedsStore as any}, 1125 + { 1126 + source: 'primary', 1127 + indexName: null, 1128 + keyRange: null, 1129 + direction: 'next', 1130 + filter: [], 1131 + needsSort: false, 1132 + availableFields: Object.keys(feedsStore.schema), 1133 + estimatedRows: 10, 1134 + selectivity: 1, 1135 + }, 1136 + ), 1137 + )) as Feed[] 1138 + const firstFeedId = allFeeds[0]?.id || 'no-feeds' 1139 + 1140 + const queryItems = sampleQueries 1141 + .map((q, i) => { 1142 + // Replace placeholder with actual feed ID 1143 + const spec = JSON.parse(JSON.stringify(q.spec)) 1144 + for (const clause of spec.where) { 1145 + if (clause.value === '__FEED_ID__') { 1146 + clause.value = firstFeedId 1147 + } 1148 + } 1149 + 1150 + return ` 1151 + <div class="query-item" data-query-idx="${i}"> 1152 + <h3>${escapeHtml(q.name)}</h3> 399 1153 <pre class="query-spec">${JSON.stringify(spec.where, null, 2)}${spec.orderBy ? '\nORDER BY ' + spec.orderBy.key + ' ' + spec.orderBy.direction.toUpperCase() : ''}</pre> 400 - ${formatPlan(plan, i)} 1154 + <div class="query-buttons"> 1155 + <button class="run-rows-btn" data-query-idx="${i}" data-store="${q.store}">Run rows()</button> 1156 + <button class="run-docs-btn" data-query-idx="${i}" data-store="${q.store}">Run docs()</button> 1157 + <button class="run-count-btn" data-query-idx="${i}" data-store="${q.store}">Run count()</button> 1158 + </div> 1159 + <div class="query-results" id="results-${i}"></div> 401 1160 </div> 402 1161 ` 403 - } 1162 + }) 1163 + .join('') 1164 + 1165 + return ` 1166 + <section class="content-section"> 1167 + <h2>Query Playground</h2> 1168 + <p>Test the query planner against your podcast data. Click buttons to see query plans and results.</p> 1169 + <div class="queries-list"> 1170 + ${queryItems} 1171 + </div> 1172 + </section> 1173 + ` 1174 + } 1175 + 1176 + async function renderTagCloud(): Promise<string> { 1177 + const ctx = {db, storeDef: entriesStore as any} 1178 + const feedCtx = {db, storeDef: feedsStore as any} 1179 + 1180 + // Run all 4 collection queries in parallel for better performance 1181 + // Note: Using row counts (countDocuments: false) is ~10x faster than document counts. 1182 + // For tag clouds, row counts are generally fine since we're showing relative frequencies. 1183 + const startTime = performance.now() 1184 + const [tagTypes, tagPairs, feedTagTypes, feedTagPairs] = await Promise.all([ 1185 + // Get tag types (category, author, media-type, etc.) from entries 1186 + collectIndexValues(ctx, 'by_tag', '...tags.tag', { 1187 + analyze: true, 1188 + countDocuments: false, // Row counts - much faster 1189 + }), 1190 + // Get tag:value pairs for entries 1191 + collectKeyPairs(ctx, 'by_tag', { 1192 + analyze: true, 1193 + countDocuments: false, // Row counts - much faster 1194 + limit: 100, 1195 + }), 1196 + // Get feed tag types - feeds have fewer rows, doc counts are fine 1197 + collectIndexValues(feedCtx, 'by_tag', '...tags.tag', { 1198 + countDocuments: true, 1199 + }), 1200 + // Get feed tag pairs 1201 + collectKeyPairs(feedCtx, 'by_tag', { 1202 + countDocuments: true, 1203 + limit: 50, 1204 + }), 1205 + ]) 1206 + console.debug(`[tagcloud] All queries completed in ${(performance.now() - startTime).toFixed(2)}ms`) 404 1207 405 - html += ` 406 - </div> 1208 + if (tagTypes.length === 0 && feedTagTypes.length === 0) { 1209 + return ` 1210 + <section class="content-section"> 1211 + <h2>Tag Cloud</h2> 1212 + <p class="empty-state">No tags yet. Add some feeds first!</p> 407 1213 </section> 1214 + ` 1215 + } 408 1216 409 - <section> 410 - <h2>Cardinality Estimates (via HyperLogLog)</h2> 411 - <table id="cardinality-table"> 412 - <tr><th>Field</th><th>Estimated Cardinality</th></tr> 413 - ` 1217 + // Calculate font sizes based on count (logarithmic scale) 1218 + const maxCount = Math.max(...tagPairs.map((p) => p.count), 1) 1219 + const minCount = Math.min(...tagPairs.map((p) => p.count), 1) 1220 + const calcFontSize = (count: number) => { 1221 + if (maxCount === minCount) return 1 1222 + const normalized = (Math.log(count) - Math.log(minCount)) / (Math.log(maxCount) - Math.log(minCount)) 1223 + return 0.75 + normalized * 1.5 // 0.75rem to 2.25rem 1224 + } 414 1225 415 - const fields = ['feedId', 'status', 'authorId', '...tags.tag', '...tags.score'] 416 - for (const field of fields) { 417 - const card = await statsManager.getCardinality(entriesStore.name, field) 418 - html += `<tr><td>${field}</td><td>${card}</td></tr>` 1226 + // Group tag pairs by tag type for organized display 1227 + const pairsByType = new Map<string, KeyPairCount[]>() 1228 + for (const pair of tagPairs) { 1229 + const type = pair.key1 as string 1230 + if (!pairsByType.has(type)) { 1231 + pairsByType.set(type, []) 1232 + } 1233 + pairsByType.get(type)!.push(pair) 419 1234 } 420 1235 421 - html += ` 422 - </table> 423 - </section> 1236 + // Render tag type summary 1237 + const tagTypeSummary = tagTypes 1238 + .map( 1239 + (t) => ` 1240 + <div class="tag-type-chip" data-tag-type="${escapeHtml(String(t.value))}"> 1241 + <span class="tag-type-name">${escapeHtml(String(t.value))}</span> 1242 + <span class="tag-type-count">${t.count}</span> 1243 + </div> 1244 + `, 1245 + ) 1246 + .join('') 424 1247 425 - <section> 426 - <h2>Index Store Sizes (from IDB)</h2> 427 - <table id="index-sizes-table"> 428 - <tr><th>Store</th><th>Row Count</th></tr> 1248 + // Render the main tag cloud (all values, sized by count) 1249 + const cloudTags = tagPairs 1250 + .slice(0, 80) // Limit for performance 1251 + .map((pair) => { 1252 + const fontSize = calcFontSize(pair.count) 1253 + const tagType = pair.key1 as string 1254 + const tagValue = pair.key2 as string 1255 + return ` 1256 + <span class="cloud-tag" 1257 + style="font-size: ${fontSize}rem" 1258 + data-tag-type="${escapeHtml(tagType)}" 1259 + data-tag-value="${escapeHtml(tagValue)}" 1260 + title="${escapeHtml(tagType)}: ${escapeHtml(tagValue)} (${pair.count} entries)"> 1261 + ${escapeHtml(tagValue)} 1262 + </span> 1263 + ` 1264 + }) 1265 + .join('') 1266 + 1267 + // Render category breakdown (grouped by tag type) 1268 + const categoryBreakdown = [...pairsByType.entries()] 1269 + .sort((a, b) => b[1].length - a[1].length) 1270 + .slice(0, 5) // Top 5 tag types 1271 + .map( 1272 + ([type, pairs]) => ` 1273 + <div class="tag-category"> 1274 + <h4 class="tag-category-title">${escapeHtml(type)} <span class="tag-category-count">(${pairs.length} values)</span></h4> 1275 + <div class="tag-category-values"> 1276 + ${pairs 1277 + .slice(0, 15) 1278 + .map( 1279 + (p) => ` 1280 + <span class="category-value" data-tag-type="${escapeHtml(type)}" data-tag-value="${escapeHtml(String(p.key2))}"> 1281 + ${escapeHtml(String(p.key2))} <span class="value-count">${p.count}</span> 1282 + </span> 1283 + `, 1284 + ) 1285 + .join('')} 1286 + ${pairs.length > 15 ? `<span class="more-values">+${pairs.length - 15} more</span>` : ''} 1287 + </div> 1288 + </div> 1289 + `, 1290 + ) 1291 + .join('') 1292 + 1293 + // Feed tags section 1294 + const feedTagsHtml = 1295 + feedTagPairs.length > 0 1296 + ? ` 1297 + <div class="feed-tags-section"> 1298 + <h3>Feed Tags</h3> 1299 + <div class="feed-tag-cloud"> 1300 + ${feedTagPairs 1301 + .slice(0, 30) 1302 + .map( 1303 + (pair) => ` 1304 + <span class="feed-cloud-tag" title="${escapeHtml(String(pair.key1))}: ${escapeHtml(String(pair.key2))} (${pair.count} feeds)"> 1305 + ${escapeHtml(String(pair.key2))} 1306 + <span class="feed-tag-count">${pair.count}</span> 1307 + </span> 1308 + `, 1309 + ) 1310 + .join('')} 1311 + </div> 1312 + </div> 429 1313 ` 1314 + : '' 430 1315 431 - for (const [storeName, count] of storeCounts) { 432 - const label = storeName === entriesStore.name ? `primary (${storeName})` : storeName 433 - html += `<tr><td>${label}</td><td>${count}</td></tr>` 1316 + return ` 1317 + <section class="content-section tag-cloud-section"> 1318 + <h2>Tag Cloud</h2> 1319 + <p class="tag-cloud-intro">Aggregated tags from your podcast entries using <code>collectIndexValues</code> and <code>collectKeyPairs</code>.</p> 1320 + 1321 + <div class="tag-type-summary"> 1322 + <h3>Tag Types (${tagTypes.length})</h3> 1323 + <div class="tag-type-chips"> 1324 + ${tagTypeSummary} 1325 + </div> 1326 + </div> 1327 + 1328 + <div class="main-cloud"> 1329 + <h3>All Tags</h3> 1330 + <div class="cloud-container"> 1331 + ${cloudTags} 1332 + </div> 1333 + </div> 1334 + 1335 + <div class="category-breakdown"> 1336 + <h3>By Category</h3> 1337 + ${categoryBreakdown} 1338 + </div> 1339 + 1340 + ${feedTagsHtml} 1341 + </section> 1342 + ` 1343 + } 1344 + 1345 + function escapeHtml(text: string): string { 1346 + const div = document.createElement('div') 1347 + div.textContent = text 1348 + return div.innerHTML 1349 + } 1350 + 1351 + function stripHtml(html: string): string { 1352 + const div = document.createElement('div') 1353 + div.innerHTML = html 1354 + return div.textContent || '' 1355 + } 1356 + 1357 + function formatDuration(seconds: number): string { 1358 + const h = Math.floor(seconds / 3600) 1359 + const m = Math.floor((seconds % 3600) / 60) 1360 + const s = seconds % 60 1361 + if (h > 0) { 1362 + return `${h}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')}` 434 1363 } 1364 + return `${m}:${s.toString().padStart(2, '0')}` 1365 + } 435 1366 436 - html += ` 437 - </table> 438 - </section> 1367 + async function render() { 1368 + let contentHtml = '' 1369 + 1370 + switch (currentView) { 1371 + case 'feeds': 1372 + contentHtml = await renderFeedsList() 1373 + break 1374 + case 'entries': 1375 + contentHtml = await renderEntriesList() 1376 + break 1377 + case 'tagcloud': 1378 + contentHtml = await renderTagCloud() 1379 + break 1380 + case 'queries': 1381 + contentHtml = await renderQueryPlayground() 1382 + break 1383 + } 1384 + 1385 + const controlsHtml = await renderControls() 1386 + 1387 + document.querySelector<HTMLDivElement>('#app')!.innerHTML = ` 1388 + <div class="app-container"> 1389 + <h1>Podcast Feed Reader</h1> 1390 + <p class="subtitle">DocStore Query Planner Test Harness</p> 1391 + ${controlsHtml} 1392 + ${contentHtml} 439 1393 </div> 440 1394 ` 441 1395 442 - document.querySelector<HTMLDivElement>('#app')!.innerHTML = html 1396 + attachEventHandlers() 1397 + } 1398 + 1399 + // ============================================================================= 1400 + // Event handlers 1401 + // ============================================================================= 1402 + 1403 + function attachEventHandlers() { 1404 + // Import OPML 1405 + document.getElementById('import-opml-btn')?.addEventListener('click', () => { 1406 + document.getElementById('opml-file')?.click() 1407 + }) 1408 + 1409 + document.getElementById('opml-file')?.addEventListener('change', async (e) => { 1410 + const file = (e.target as HTMLInputElement).files?.[0] 1411 + if (!file) return 1412 + 1413 + updateStatus('Reading OPML file...') 1414 + const content = await file.text() 443 1415 444 - // Attach event handlers 445 - document.getElementById('add-entries-btn')!.onclick = async () => { 446 - const btn = document.getElementById('add-entries-btn') as HTMLButtonElement 447 - btn.disabled = true 448 - btn.textContent = 'Adding...' 1416 + updateStatus('Importing feeds...') 1417 + try { 1418 + const result = await importOpml(content) 1419 + updateStatus( 1420 + `Imported ${result.feedsAdded} feeds with ${result.totalEntries} entries. ${result.errors.length} errors.`, 1421 + ) 1422 + if (result.errors.length > 0) { 1423 + console.error('Import errors:', result.errors) 1424 + } 1425 + await render() 1426 + } catch (err) { 1427 + updateStatus(`Import failed: ${err instanceof Error ? err.message : String(err)}`) 1428 + } 1429 + }) 449 1430 450 - await addEntries(1000) 451 - document.getElementById('entry-count')!.textContent = `Entries: ${entryCount}` 1431 + // Add single feed 1432 + document.getElementById('add-feed-btn')?.addEventListener('click', async () => { 1433 + const input = document.getElementById('feed-url-input') as HTMLInputElement 1434 + const url = input.value.trim() 1435 + if (!url) return 452 1436 453 - btn.disabled = false 454 - btn.textContent = 'Add 1000 Entries' 1437 + updateStatus(`Fetching ${url}...`) 1438 + try { 1439 + const result = await addFeed(url) 1440 + updateStatus(`Added "${result.feed.title}" with ${result.entriesCount} entries`) 1441 + input.value = '' 1442 + await render() 1443 + } catch (err) { 1444 + updateStatus(`Failed: ${err instanceof Error ? err.message : String(err)}`) 1445 + } 1446 + }) 455 1447 456 - await refreshPlans() 457 - } 1448 + // Refresh all 1449 + document.getElementById('refresh-all-btn')?.addEventListener('click', async () => { 1450 + updateStatus('Refreshing all feeds...') 1451 + try { 1452 + const result = await refreshAllFeeds() 1453 + updateStatus(`Refreshed ${result.feedsRefreshed} feeds, ${result.totalEntries} entries updated`) 1454 + await render() 1455 + } catch (err) { 1456 + updateStatus(`Refresh failed: ${err instanceof Error ? err.message : String(err)}`) 1457 + } 1458 + }) 458 1459 459 - document.getElementById('clear-db-btn')!.onclick = async () => { 460 - // Delete the database and reload 1460 + // Clear database 1461 + document.getElementById('clear-db-btn')?.addEventListener('click', async () => { 1462 + if (!confirm('Clear all feeds and entries?')) return 461 1463 db.close() 462 - indexedDB.deleteDatabase('docstore-demo') 1464 + indexedDB.deleteDatabase('podcast-reader') 463 1465 window.location.reload() 464 - } 1466 + }) 465 1467 466 - // Attach run query buttons 467 - attachQueryButtons() 1468 + // Navigation 1469 + document.querySelectorAll('.nav-btn').forEach((btn) => { 1470 + btn.addEventListener('click', async (e) => { 1471 + currentView = (e.target as HTMLElement).dataset.view as typeof currentView 1472 + await render() 1473 + }) 1474 + }) 1475 + 1476 + // Refresh individual feed 1477 + document.querySelectorAll('.refresh-feed-btn').forEach((btn) => { 1478 + btn.addEventListener('click', async (e) => { 1479 + const feedId = (e.target as HTMLElement).dataset.feedId! 1480 + updateStatus('Refreshing feed...') 1481 + try { 1482 + const result = await refreshFeed(feedId) 1483 + updateStatus(`Refreshed with ${result.entriesCount} entries`) 1484 + await render() 1485 + } catch (err) { 1486 + updateStatus(`Refresh failed: ${err instanceof Error ? err.message : String(err)}`) 1487 + } 1488 + }) 1489 + }) 1490 + 1491 + // Query buttons 1492 + attachQueryHandlers() 468 1493 } 469 1494 470 - function attachQueryButtons() { 471 - document.querySelectorAll('.run-query-btn').forEach((btn) => { 1495 + async function attachQueryHandlers() { 1496 + // Get first feed ID for queries 1497 + const allFeeds = (await collectAll( 1498 + docs( 1499 + {db, storeDef: feedsStore as any}, 1500 + { 1501 + source: 'primary', 1502 + indexName: null, 1503 + keyRange: null, 1504 + direction: 'next', 1505 + filter: [], 1506 + needsSort: false, 1507 + availableFields: Object.keys(feedsStore.schema), 1508 + estimatedRows: 10, 1509 + selectivity: 1, 1510 + }, 1511 + ), 1512 + )) as Feed[] 1513 + const firstFeedId = allFeeds[0]?.id || 'no-feeds' 1514 + 1515 + document.querySelectorAll('.run-rows-btn').forEach((btn) => { 1516 + btn.addEventListener('click', async (e) => { 1517 + const idx = parseInt((e.target as HTMLElement).dataset.queryIdx!) 1518 + const storeName = (e.target as HTMLElement).dataset.store as 'feeds' | 'entries' 1519 + const query = sampleQueries[idx] 1520 + const resultsEl = document.getElementById(`results-${idx}`)! 1521 + 1522 + // Replace placeholder 1523 + const spec = JSON.parse(JSON.stringify(query.spec)) 1524 + for (const clause of spec.where) { 1525 + if (clause.value === '__FEED_ID__') clause.value = firstFeedId 1526 + } 1527 + 1528 + resultsEl.innerHTML = '<div class="loading">Running...</div>' 1529 + 1530 + const planner = storeName === 'feeds' ? feedsPlanner : entriesPlanner 1531 + const storeDef = storeName === 'feeds' ? feedsStore : entriesStore 1532 + 1533 + const result = await executeQuery(planner, storeDef, spec) 1534 + 1535 + resultsEl.innerHTML = ` 1536 + ${formatPlan(result.plan)} 1537 + <div class="results-header">First 20 rows:</div> 1538 + <pre>${JSON.stringify(result.rows, dateReplacer, 2)}</pre> 1539 + ` 1540 + }) 1541 + }) 1542 + 1543 + document.querySelectorAll('.run-docs-btn').forEach((btn) => { 472 1544 btn.addEventListener('click', async (e) => { 473 - const queryId = parseInt((e.target as HTMLElement).dataset.queryId!) 474 - const spec = sampleQueries[queryId].spec 475 - const plan = await planner.plan(spec) 1545 + const idx = parseInt((e.target as HTMLElement).dataset.queryIdx!) 1546 + const storeName = (e.target as HTMLElement).dataset.store as 'feeds' | 'entries' 1547 + const query = sampleQueries[idx] 1548 + const resultsEl = document.getElementById(`results-${idx}`)! 476 1549 477 - const resultsEl = document.getElementById(`results-${queryId}`)! 478 - resultsEl.innerHTML = '<div class="loading">Querying IDB...</div>' 1550 + const spec = JSON.parse(JSON.stringify(query.spec)) 1551 + for (const clause of spec.where) { 1552 + if (clause.value === '__FEED_ID__') clause.value = firstFeedId 1553 + } 479 1554 480 - const results = await executeQuery(plan) 1555 + resultsEl.innerHTML = '<div class="loading">Running...</div>' 1556 + 1557 + const planner = storeName === 'feeds' ? feedsPlanner : entriesPlanner 1558 + const storeDef = storeName === 'feeds' ? feedsStore : entriesStore 1559 + 1560 + const result = await executeDocsQuery(planner, storeDef, spec) 1561 + 1562 + // Truncate content field for display 1563 + const truncatedDocs = result.docs.map((d) => ({ 1564 + ...d, 1565 + content: d.content ? (d.content as string).slice(0, 100) + '...' : undefined, 1566 + })) 481 1567 482 1568 resultsEl.innerHTML = ` 483 - <div class="results-header">First 5 rows from IDB:</div> 484 - <pre>${JSON.stringify( 485 - results, 486 - (key, value) => { 487 - if (value instanceof Date) return value.toISOString() 488 - return value 489 - }, 490 - 2, 491 - )}</pre> 1569 + ${formatPlan(result.plan)} 1570 + <div class="results-header">First 10 docs (content truncated):</div> 1571 + <pre>${JSON.stringify(truncatedDocs, dateReplacer, 2)}</pre> 492 1572 ` 493 1573 }) 494 1574 }) 495 - } 496 1575 497 - async function refreshPlans() { 498 - document.getElementById('query-entry-count')!.textContent = String(entryCount) 1576 + document.querySelectorAll('.run-count-btn').forEach((btn) => { 1577 + btn.addEventListener('click', async (e) => { 1578 + const idx = parseInt((e.target as HTMLElement).dataset.queryIdx!) 1579 + const storeName = (e.target as HTMLElement).dataset.store as 'feeds' | 'entries' 1580 + const query = sampleQueries[idx] 1581 + const resultsEl = document.getElementById(`results-${idx}`)! 499 1582 500 - const container = document.getElementById('queries-container')! 501 - let html = '' 1583 + const spec = JSON.parse(JSON.stringify(query.spec)) 1584 + for (const clause of spec.where) { 1585 + if (clause.value === '__FEED_ID__') clause.value = firstFeedId 1586 + } 502 1587 503 - for (let i = 0; i < sampleQueries.length; i++) { 504 - const {name, spec} = sampleQueries[i] 505 - const plan = await planner.plan(spec) 506 - html += ` 507 - <div class="query"> 508 - <h3>${name}</h3> 509 - <pre class="query-spec">${JSON.stringify(spec.where, null, 2)}${spec.orderBy ? '\nORDER BY ' + spec.orderBy.key + ' ' + spec.orderBy.direction.toUpperCase() : ''}</pre> 510 - ${formatPlan(plan, i)} 511 - </div> 512 - ` 513 - } 1588 + resultsEl.innerHTML = '<div class="loading">Counting...</div>' 514 1589 515 - container.innerHTML = html 516 - attachQueryButtons() 1590 + const planner = storeName === 'feeds' ? feedsPlanner : entriesPlanner 1591 + const storeDef = storeName === 'feeds' ? feedsStore : entriesStore 517 1592 518 - // Update cardinality table 519 - const cardTable = document.getElementById('cardinality-table')! 520 - let cardHtml = '<tr><th>Field</th><th>Estimated Cardinality</th></tr>' 521 - const fields = ['feedId', 'status', 'authorId', '...tags.tag', '...tags.score'] 522 - for (const field of fields) { 523 - const card = await statsManager.getCardinality(entriesStore.name, field) 524 - cardHtml += `<tr><td>${field}</td><td>${card}</td></tr>` 525 - } 526 - cardTable.innerHTML = cardHtml 1593 + const startTime = performance.now() 1594 + const result = await executeCountQuery(planner, storeDef, spec) 1595 + const elapsed = performance.now() - startTime 527 1596 528 - // Update index sizes table from IDB 529 - const storeCounts = await getStoreCounts() 530 - const sizesTable = document.getElementById('index-sizes-table')! 531 - let sizesHtml = '<tr><th>Store</th><th>Row Count</th></tr>' 532 - for (const [storeName, count] of storeCounts) { 533 - const label = storeName === entriesStore.name ? `primary (${storeName})` : storeName 534 - sizesHtml += `<tr><td>${label}</td><td>${count}</td></tr>` 1597 + resultsEl.innerHTML = ` 1598 + ${formatPlan(result.plan)} 1599 + <div class="results-header">Count: ${result.count}</div> 1600 + <div>Time: ${elapsed.toFixed(2)}ms</div> 1601 + ` 1602 + }) 1603 + }) 1604 + } 1605 + 1606 + function dateReplacer(_key: string, value: unknown) { 1607 + if (value instanceof Date) { 1608 + return value.toISOString() 535 1609 } 536 - sizesTable.innerHTML = sizesHtml 1610 + return value 537 1611 } 538 1612 539 1613 // ============================================================================= ··· 550 1624 background: #1a1a1a; 551 1625 color: #e0e0e0; 552 1626 margin: 0; 1627 + font-family: system-ui, -apple-system, sans-serif; 553 1628 } 554 1629 555 - .demo { 556 - max-width: 1000px; 1630 + .app-container { 1631 + max-width: 1200px; 1632 + margin: 0 auto; 557 1633 padding: 20px; 558 - font-family: system-ui, -apple-system, sans-serif; 559 1634 } 560 1635 561 1636 h1 { 562 - border-bottom: 2px solid #555; 563 - padding-bottom: 10px; 1637 + margin-bottom: 5px; 564 1638 } 565 1639 566 - h2 { 567 - margin-top: 30px; 568 - color: #aaa; 1640 + .subtitle { 1641 + color: #888; 1642 + margin-top: 0; 1643 + margin-bottom: 20px; 569 1644 } 570 1645 571 - h3 { 572 - margin-top: 20px; 573 - color: #999; 1646 + .controls { 1647 + background: #252525; 1648 + padding: 15px; 1649 + border-radius: 8px; 1650 + margin-bottom: 20px; 574 1651 } 575 1652 576 - pre { 577 - background: #2a2a2a; 578 - padding: 15px; 579 - border-radius: 5px; 580 - overflow-x: auto; 581 - font-size: 13px; 582 - color: #d0d0d0; 583 - border: 1px solid #3a3a3a; 1653 + .control-row { 1654 + display: flex; 1655 + gap: 10px; 1656 + align-items: center; 1657 + flex-wrap: wrap; 584 1658 } 585 1659 586 - .idb-badge { 587 - display: inline-block; 588 - background: #2a5a2a; 589 - color: #8f8; 590 - padding: 5px 15px; 591 - border-radius: 15px; 1660 + .stats-row { 1661 + display: flex; 1662 + gap: 20px; 1663 + margin-top: 10px; 592 1664 font-size: 14px; 593 - margin-bottom: 15px; 1665 + color: #888; 594 1666 } 595 1667 596 - .controls { 1668 + .stats-row span { 1669 + padding: 4px 8px; 1670 + background: #333; 1671 + border-radius: 4px; 1672 + } 1673 + 1674 + #status-message { 1675 + color: #8a8; 1676 + flex: 1; 1677 + } 1678 + 1679 + .nav-row { 597 1680 display: flex; 598 - gap: 15px; 599 - align-items: center; 600 - padding: 15px; 601 - background: #252525; 602 - border-radius: 8px; 603 - margin-bottom: 20px; 1681 + gap: 10px; 1682 + margin-top: 15px; 1683 + border-top: 1px solid #333; 1684 + padding-top: 15px; 1685 + } 1686 + 1687 + .nav-btn { 1688 + background: #333; 1689 + border: 1px solid #444; 1690 + } 1691 + 1692 + .nav-btn.active { 1693 + background: #4a7a4a; 1694 + border-color: #5a8a5a; 1695 + } 1696 + 1697 + input[type="text"] { 1698 + background: #333; 1699 + border: 1px solid #444; 1700 + color: #e0e0e0; 1701 + padding: 8px 12px; 1702 + border-radius: 4px; 1703 + font-size: 14px; 1704 + } 1705 + 1706 + input[type="text"]:focus { 1707 + outline: none; 1708 + border-color: #5a8a5a; 604 1709 } 605 1710 606 1711 button { 607 1712 background: #3a5a8a; 608 1713 color: #fff; 609 1714 border: none; 610 - padding: 10px 20px; 611 - border-radius: 5px; 1715 + padding: 8px 16px; 1716 + border-radius: 4px; 612 1717 cursor: pointer; 613 1718 font-size: 14px; 614 1719 } ··· 617 1722 background: #4a6a9a; 618 1723 } 619 1724 620 - button:disabled { 621 - background: #444; 622 - cursor: not-allowed; 1725 + button.danger { 1726 + background: #8a4a4a; 623 1727 } 624 1728 625 - .run-query-btn { 626 - margin-top: 10px; 1729 + button.danger:hover { 1730 + background: #9a5a5a; 1731 + } 1732 + 1733 + .content-section { 1734 + background: #222; 1735 + padding: 20px; 1736 + border-radius: 8px; 1737 + } 1738 + 1739 + .content-section h2 { 1740 + margin-top: 0; 1741 + color: #aaa; 1742 + border-bottom: 1px solid #333; 1743 + padding-bottom: 10px; 1744 + } 1745 + 1746 + .empty-state { 1747 + color: #666; 1748 + font-style: italic; 1749 + text-align: center; 1750 + padding: 40px; 1751 + } 1752 + 1753 + /* Feeds list */ 1754 + .feeds-list { 1755 + display: flex; 1756 + flex-direction: column; 1757 + gap: 15px; 1758 + } 1759 + 1760 + .feed-item { 1761 + background: #2a2a2a; 1762 + border: 1px solid #333; 1763 + border-radius: 8px; 1764 + padding: 15px; 1765 + } 1766 + 1767 + .feed-header { 1768 + display: flex; 1769 + gap: 15px; 1770 + align-items: flex-start; 1771 + } 1772 + 1773 + .feed-image { 1774 + width: 60px; 1775 + height: 60px; 1776 + border-radius: 8px; 1777 + object-fit: cover; 1778 + } 1779 + 1780 + .feed-image-placeholder { 1781 + width: 60px; 1782 + height: 60px; 1783 + border-radius: 8px; 1784 + background: #333; 1785 + } 1786 + 1787 + .feed-info { 1788 + flex: 1; 1789 + } 1790 + 1791 + .feed-title { 1792 + margin: 0 0 5px 0; 1793 + font-size: 16px; 1794 + } 1795 + 1796 + .feed-url { 1797 + color: #6a9aca; 1798 + font-size: 12px; 1799 + text-decoration: none; 1800 + word-break: break-all; 1801 + } 1802 + 1803 + .feed-url:hover { 1804 + text-decoration: underline; 1805 + } 1806 + 1807 + .feed-meta { 1808 + display: flex; 1809 + gap: 15px; 1810 + font-size: 12px; 1811 + color: #888; 1812 + margin-top: 8px; 1813 + } 1814 + 1815 + .feed-tags { 1816 + display: flex; 1817 + flex-wrap: wrap; 1818 + gap: 5px; 1819 + margin-top: 8px; 1820 + } 1821 + 1822 + .tag { 1823 + background: #3a4a3a; 1824 + color: #8a8; 1825 + padding: 2px 8px; 1826 + border-radius: 10px; 1827 + font-size: 11px; 1828 + } 1829 + 1830 + .tag-more { 1831 + color: #666; 1832 + font-size: 11px; 1833 + } 1834 + 1835 + .refresh-feed-btn { 627 1836 background: #4a7a4a; 1837 + padding: 6px 12px; 1838 + font-size: 12px; 628 1839 } 629 1840 630 - .run-query-btn:hover { 631 - background: #5a8a5a; 1841 + /* Entries list */ 1842 + .entries-list { 1843 + display: flex; 1844 + flex-direction: column; 1845 + gap: 12px; 632 1846 } 633 1847 634 - #clear-db-btn { 635 - background: #8a4a4a; 1848 + .entry-item { 1849 + background: #2a2a2a; 1850 + border: 1px solid #333; 1851 + border-radius: 8px; 1852 + padding: 12px; 1853 + } 1854 + 1855 + .entry-header { 1856 + display: flex; 1857 + justify-content: space-between; 1858 + align-items: flex-start; 1859 + gap: 10px; 1860 + } 1861 + 1862 + .entry-title { 1863 + margin: 0; 1864 + font-size: 14px; 1865 + font-weight: 500; 1866 + } 1867 + 1868 + .entry-link { 1869 + color: #6a9aca; 1870 + font-size: 12px; 1871 + text-decoration: none; 1872 + white-space: nowrap; 636 1873 } 637 1874 638 - #clear-db-btn:hover { 639 - background: #9a5a5a; 1875 + .entry-meta { 1876 + display: flex; 1877 + gap: 15px; 1878 + font-size: 12px; 1879 + color: #888; 1880 + margin-top: 6px; 640 1881 } 641 1882 642 - #entry-count { 1883 + .entry-duration { 643 1884 color: #8a8; 644 - font-weight: bold; 645 1885 } 646 1886 647 - .query { 648 - border: 1px solid #444; 1887 + .entry-tags { 1888 + display: flex; 1889 + flex-wrap: wrap; 1890 + gap: 5px; 1891 + margin-top: 6px; 1892 + } 1893 + 1894 + .entry-content-preview { 1895 + font-size: 12px; 1896 + color: #666; 1897 + margin-top: 8px; 1898 + line-height: 1.4; 1899 + } 1900 + 1901 + /* Query playground */ 1902 + .queries-list { 1903 + display: flex; 1904 + flex-direction: column; 1905 + gap: 20px; 1906 + } 1907 + 1908 + .query-item { 1909 + background: #2a2a2a; 1910 + border: 1px solid #333; 649 1911 border-radius: 8px; 650 1912 padding: 15px; 651 - margin: 15px 0; 652 - background: #222; 1913 + } 1914 + 1915 + .query-item h3 { 1916 + margin: 0 0 10px 0; 1917 + font-size: 14px; 1918 + color: #aaa; 653 1919 } 654 1920 655 1921 .query-spec { 656 1922 background: #1e3a4a; 657 - border-color: #2a5a6a; 1923 + border: 1px solid #2a5a6a; 1924 + padding: 10px; 1925 + border-radius: 4px; 1926 + font-size: 12px; 1927 + overflow-x: auto; 1928 + margin: 0 0 10px 0; 1929 + } 1930 + 1931 + .query-buttons { 1932 + display: flex; 1933 + gap: 10px; 1934 + } 1935 + 1936 + .query-buttons button { 1937 + font-size: 12px; 1938 + padding: 6px 12px; 1939 + } 1940 + 1941 + .run-rows-btn { 1942 + background: #4a7a4a; 1943 + } 1944 + 1945 + .run-docs-btn { 1946 + background: #3a5a8a; 1947 + } 1948 + 1949 + .run-count-btn { 1950 + background: #7a4a7a; 1951 + } 1952 + 1953 + .query-results { 1954 + margin-top: 10px; 1955 + } 1956 + 1957 + .query-results pre { 1958 + background: #1a1a1a; 1959 + padding: 10px; 1960 + border-radius: 4px; 1961 + font-size: 11px; 1962 + overflow-x: auto; 1963 + max-height: 300px; 1964 + overflow-y: auto; 1965 + } 1966 + 1967 + .results-header { 1968 + color: #8a8; 1969 + font-size: 12px; 1970 + margin-bottom: 5px; 1971 + } 1972 + 1973 + .loading { 1974 + color: #aa8; 1975 + font-style: italic; 658 1976 } 659 1977 660 1978 .plan { 661 1979 background: #1e2e1e; 662 - padding: 15px; 663 - border-radius: 5px; 1980 + padding: 10px; 1981 + border-radius: 4px; 664 1982 display: grid; 665 1983 grid-template-columns: 1fr 1fr; 666 - gap: 8px; 1984 + gap: 5px; 1985 + font-size: 11px; 1986 + margin-bottom: 10px; 667 1987 border: 1px solid #2a4a2a; 668 1988 } 669 1989 ··· 672 1992 border-color: #3a4a6a; 673 1993 } 674 1994 675 - .plan div { 676 - font-size: 14px; 1995 + .plan strong { 1996 + color: #8a8; 677 1997 } 678 1998 679 1999 .intersection-info { 680 - grid-column: 1 / -1; 681 2000 background: #3a4a3a; 682 - padding: 10px; 2001 + padding: 8px; 683 2002 border-radius: 4px; 684 - margin-top: 8px; 2003 + font-size: 11px; 2004 + margin-top: 5px; 685 2005 border: 1px dashed #5a7a5a; 686 2006 } 687 2007 688 - .intersection-details { 2008 + .intersection-info strong { 2009 + color: #afa; 2010 + } 2011 + 2012 + /* Tag Cloud */ 2013 + .tag-cloud-section h3 { 2014 + color: #aaa; 2015 + margin: 20px 0 10px 0; 2016 + font-size: 14px; 2017 + border-bottom: 1px solid #333; 2018 + padding-bottom: 8px; 2019 + } 2020 + 2021 + .tag-cloud-intro { 2022 + color: #888; 2023 + font-size: 13px; 2024 + margin-bottom: 20px; 2025 + } 2026 + 2027 + .tag-cloud-intro code { 2028 + background: #333; 2029 + padding: 2px 6px; 2030 + border-radius: 3px; 2031 + font-size: 12px; 2032 + color: #8cf; 2033 + } 2034 + 2035 + .tag-type-chips { 689 2036 display: flex; 690 - gap: 20px; 691 - margin-top: 5px; 692 - font-size: 13px; 693 - color: #8fa; 2037 + flex-wrap: wrap; 2038 + gap: 8px; 694 2039 } 695 2040 696 - .query-results { 697 - margin-top: 10px; 2041 + .tag-type-chip { 2042 + background: #2a3a4a; 2043 + border: 1px solid #3a5a6a; 2044 + border-radius: 20px; 2045 + padding: 6px 14px; 2046 + display: flex; 2047 + align-items: center; 2048 + gap: 8px; 2049 + cursor: pointer; 2050 + transition: all 0.2s; 698 2051 } 699 2052 700 - .results-header { 2053 + .tag-type-chip:hover { 2054 + background: #3a4a5a; 2055 + border-color: #5a7a8a; 2056 + } 2057 + 2058 + .tag-type-name { 2059 + color: #8cf; 2060 + font-weight: 500; 2061 + } 2062 + 2063 + .tag-type-count { 2064 + background: #1a2a3a; 2065 + color: #6a8a9a; 2066 + padding: 2px 8px; 2067 + border-radius: 10px; 2068 + font-size: 11px; 2069 + } 2070 + 2071 + .main-cloud { 2072 + margin: 20px 0; 2073 + } 2074 + 2075 + .cloud-container { 2076 + background: #1a1a1a; 2077 + border: 1px solid #333; 2078 + border-radius: 8px; 2079 + padding: 20px; 2080 + line-height: 2.2; 2081 + text-align: center; 2082 + } 2083 + 2084 + .cloud-tag { 2085 + display: inline-block; 2086 + margin: 4px 8px; 701 2087 color: #8a8; 702 - margin-bottom: 5px; 703 - font-weight: bold; 2088 + cursor: pointer; 2089 + transition: all 0.15s; 2090 + padding: 2px 4px; 2091 + border-radius: 3px; 704 2092 } 705 2093 706 - .loading { 707 - color: #aa8; 708 - font-style: italic; 2094 + .cloud-tag:hover { 2095 + color: #afa; 2096 + background: #2a3a2a; 2097 + } 2098 + 2099 + .cloud-tag[data-tag-type="category"] { color: #8cf; } 2100 + .cloud-tag[data-tag-type="category"]:hover { color: #aef; background: #2a3a4a; } 2101 + 2102 + .cloud-tag[data-tag-type="author"] { color: #f8a; } 2103 + .cloud-tag[data-tag-type="author"]:hover { color: #fac; background: #3a2a3a; } 2104 + 2105 + .cloud-tag[data-tag-type="media-type"] { color: #8fa; } 2106 + .cloud-tag[data-tag-type="media-type"]:hover { color: #afc; background: #2a3a2a; } 2107 + 2108 + .cloud-tag[data-tag-type="keyword"] { color: #fa8; } 2109 + .cloud-tag[data-tag-type="keyword"]:hover { color: #fca; background: #3a3a2a; } 2110 + 2111 + .category-breakdown { 2112 + margin: 20px 0; 709 2113 } 710 2114 711 - .ingest-result { 712 - margin-top: 10px; 2115 + .tag-category { 2116 + background: #252525; 2117 + border: 1px solid #333; 2118 + border-radius: 8px; 2119 + padding: 15px; 2120 + margin-bottom: 12px; 713 2121 } 714 2122 715 - .store-group { 716 - margin-top: 10px; 717 - padding: 5px; 718 - background: #333; 719 - border-radius: 3px; 2123 + .tag-category-title { 2124 + margin: 0 0 10px 0; 2125 + color: #8cf; 2126 + font-size: 13px; 720 2127 } 721 2128 722 - table { 723 - border-collapse: collapse; 724 - width: 100%; 2129 + .tag-category-count { 2130 + color: #666; 2131 + font-weight: normal; 725 2132 } 726 2133 727 - th, td { 728 - border: 1px solid #444; 729 - padding: 10px; 730 - text-align: left; 2134 + .tag-category-values { 2135 + display: flex; 2136 + flex-wrap: wrap; 2137 + gap: 6px; 731 2138 } 732 2139 733 - th { 2140 + .category-value { 734 2141 background: #2a2a2a; 2142 + border: 1px solid #3a3a3a; 2143 + border-radius: 4px; 2144 + padding: 4px 10px; 2145 + font-size: 12px; 2146 + color: #aaa; 2147 + cursor: pointer; 2148 + transition: all 0.15s; 735 2149 } 736 2150 737 - strong { 738 - color: #b0b0b0; 2151 + .category-value:hover { 2152 + background: #3a3a3a; 2153 + border-color: #4a4a4a; 2154 + color: #ddd; 2155 + } 2156 + 2157 + .value-count { 2158 + color: #666; 2159 + font-size: 10px; 2160 + margin-left: 4px; 2161 + } 2162 + 2163 + .more-values { 2164 + color: #666; 2165 + font-size: 12px; 2166 + font-style: italic; 2167 + padding: 4px 10px; 2168 + } 2169 + 2170 + .feed-tags-section { 2171 + margin-top: 30px; 2172 + padding-top: 20px; 2173 + border-top: 1px solid #333; 2174 + } 2175 + 2176 + .feed-tag-cloud { 2177 + display: flex; 2178 + flex-wrap: wrap; 2179 + gap: 8px; 2180 + } 2181 + 2182 + .feed-cloud-tag { 2183 + background: #2a3a2a; 2184 + border: 1px solid #3a5a3a; 2185 + border-radius: 4px; 2186 + padding: 6px 12px; 2187 + font-size: 12px; 2188 + color: #8a8; 2189 + cursor: pointer; 2190 + transition: all 0.15s; 2191 + } 2192 + 2193 + .feed-cloud-tag:hover { 2194 + background: #3a4a3a; 2195 + border-color: #5a7a5a; 2196 + color: #afa; 2197 + } 2198 + 2199 + .feed-tag-count { 2200 + background: #1a2a1a; 2201 + color: #5a7a5a; 2202 + padding: 1px 6px; 2203 + border-radius: 8px; 2204 + font-size: 10px; 2205 + margin-left: 6px; 739 2206 } 740 2207 ` 741 2208 document.head.appendChild(style) ··· 745 2212 // ============================================================================= 746 2213 747 2214 async function init() { 748 - // Install the schema into IndexedDB 749 - // Version 3: using real StatsManager with idb 750 - db = await docstore.install('docstore-demo', entriesStore, {version: 3}) 751 - 752 - // Create stats manager with idb database 753 - statsManager = new StatsManager({db}) 2215 + // Install both stores in a single call to avoid version mismatch issues 2216 + db = await docstore.install('podcast-reader', [feedsStore, entriesStore], {version: 1}) 754 2217 755 - // Create query planner 756 - planner = new QueryPlanner(entriesStore, statsManager) 757 - 758 - // Check if we already have data 759 - const tx = db.transaction(entriesStore.name, 'readonly') 760 - entryCount = await tx.objectStore(entriesStore.name).count() 761 - await tx.done 2218 + // Create stats managers 2219 + feedsStatsManager = new StatsManager({db}) 2220 + entriesStatsManager = new StatsManager({db}) 762 2221 763 - // If empty, seed with initial entries 764 - if (entryCount === 0) { 765 - await addEntries(100) 766 - } else { 767 - // Update nextEntryId to avoid collisions 768 - nextEntryId = entryCount 769 - } 2222 + // Create query planners 2223 + feedsPlanner = new QueryPlanner(feedsStore, feedsStatsManager) 2224 + entriesPlanner = new QueryPlanner(entriesStore, entriesStatsManager) 770 2225 771 - // Render the demo 772 - await renderDemo() 2226 + // Render the UI 2227 + await render() 773 2228 } 774 2229 775 2230 init().catch(console.error)