Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

add _redirects, use PDS firehose, TODO use git again when #20 is fixed

Changed files
+712 -131
cli
+31 -23
cli/Cargo.lock
··· 523 524 [[package]] 525 name = "clap" 526 - version = "4.5.51" 527 source = "registry+https://github.com/rust-lang/crates.io-index" 528 - checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5" 529 dependencies = [ 530 "clap_builder", 531 "clap_derive", ··· 533 534 [[package]] 535 name = "clap_builder" 536 - version = "4.5.51" 537 source = "registry+https://github.com/rust-lang/crates.io-index" 538 - checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a" 539 dependencies = [ 540 "anstream", 541 "anstyle", ··· 1752 1753 [[package]] 1754 name = "jacquard" 1755 - version = "0.9.0" 1756 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#d853091d7de59e18746a78532dc28cfc017079b0" 1757 dependencies = [ 1758 "bytes", 1759 "getrandom 0.2.16", ··· 1767 "jose-jwk", 1768 "miette", 1769 "regex", 1770 "reqwest", 1771 "serde", 1772 "serde_html_form", ··· 1781 1782 [[package]] 1783 name = "jacquard-api" 1784 - version = "0.9.0" 1785 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#d853091d7de59e18746a78532dc28cfc017079b0" 1786 dependencies = [ 1787 "bon", 1788 "bytes", ··· 1799 1800 [[package]] 1801 name = "jacquard-common" 1802 - version = "0.9.0" 1803 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#d853091d7de59e18746a78532dc28cfc017079b0" 1804 dependencies = [ 1805 "base64 0.22.1", 1806 "bon", ··· 1823 "p256", 1824 "rand 0.9.2", 1825 "regex", 1826 "reqwest", 1827 "serde", 1828 "serde_html_form", ··· 1840 1841 [[package]] 1842 name = "jacquard-derive" 1843 - version = "0.9.0" 1844 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#d853091d7de59e18746a78532dc28cfc017079b0" 1845 dependencies = [ 1846 "heck 0.5.0", 1847 "jacquard-lexicon", ··· 1852 1853 [[package]] 1854 name = "jacquard-identity" 1855 - version = "0.9.1" 1856 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#d853091d7de59e18746a78532dc28cfc017079b0" 1857 dependencies = [ 1858 "bon", 1859 "bytes", ··· 1878 1879 [[package]] 1880 name = "jacquard-lexicon" 1881 - version = "0.9.1" 1882 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#d853091d7de59e18746a78532dc28cfc017079b0" 1883 dependencies = [ 1884 "cid", 1885 "dashmap", ··· 1904 1905 [[package]] 1906 name = "jacquard-oauth" 1907 - version = "0.9.0" 1908 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#d853091d7de59e18746a78532dc28cfc017079b0" 1909 dependencies = [ 1910 "base64 0.22.1", 1911 "bytes", ··· 1925 "serde_html_form", 1926 "serde_json", 1927 "sha2", 1928 - "signature", 1929 "smol_str", 1930 "thiserror 2.0.17", 1931 "tokio", ··· 2230 2231 [[package]] 2232 name = "mini-moka" 2233 - version = "0.11.0" 2234 - source = "git+https://github.com/moka-rs/mini-moka?rev=da864e849f5d034f32e02197fee9bb5d5af36d3d#da864e849f5d034f32e02197fee9bb5d5af36d3d" 2235 dependencies = [ 2236 "crossbeam-channel", 2237 "crossbeam-utils", ··· 2999 ] 3000 3001 [[package]] 3002 name = "regex-syntax" 3003 version = "0.8.8" 3004 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3051 3052 [[package]] 3053 name = "resolv-conf" 3054 - version = "0.7.5" 3055 source = "registry+https://github.com/rust-lang/crates.io-index" 3056 - checksum = "6b3789b30bd25ba102de4beabd95d21ac45b69b1be7d14522bab988c526d6799" 3057 3058 [[package]] 3059 name = "rfc6979" ··· 4954 "multibase", 4955 "multihash", 4956 "n0-future 0.3.1", 4957 "reqwest", 4958 "rustversion", 4959 "serde",
··· 523 524 [[package]] 525 name = "clap" 526 + version = "4.5.52" 527 source = "registry+https://github.com/rust-lang/crates.io-index" 528 + checksum = "aa8120877db0e5c011242f96806ce3c94e0737ab8108532a76a3300a01db2ab8" 529 dependencies = [ 530 "clap_builder", 531 "clap_derive", ··· 533 534 [[package]] 535 name = "clap_builder" 536 + version = "4.5.52" 537 source = "registry+https://github.com/rust-lang/crates.io-index" 538 + checksum = "02576b399397b659c26064fbc92a75fede9d18ffd5f80ca1cd74ddab167016e1" 539 dependencies = [ 540 "anstream", 541 "anstyle", ··· 1752 1753 [[package]] 1754 name = "jacquard" 1755 + version = "0.9.3" 1756 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#324cbb45078fe2f77b60ae2bd7765c5306ec8b5e" 1757 dependencies = [ 1758 "bytes", 1759 "getrandom 0.2.16", ··· 1767 "jose-jwk", 1768 "miette", 1769 "regex", 1770 + "regex-lite", 1771 "reqwest", 1772 "serde", 1773 "serde_html_form", ··· 1782 1783 [[package]] 1784 name = "jacquard-api" 1785 + version = "0.9.2" 1786 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#324cbb45078fe2f77b60ae2bd7765c5306ec8b5e" 1787 dependencies = [ 1788 "bon", 1789 "bytes", ··· 1800 1801 [[package]] 1802 name = "jacquard-common" 1803 + version = "0.9.2" 1804 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#324cbb45078fe2f77b60ae2bd7765c5306ec8b5e" 1805 dependencies = [ 1806 "base64 0.22.1", 1807 "bon", ··· 1824 "p256", 1825 "rand 0.9.2", 1826 "regex", 1827 + "regex-lite", 1828 "reqwest", 1829 "serde", 1830 "serde_html_form", ··· 1842 1843 [[package]] 1844 name = "jacquard-derive" 1845 + version = "0.9.3" 1846 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#324cbb45078fe2f77b60ae2bd7765c5306ec8b5e" 1847 dependencies = [ 1848 "heck 0.5.0", 1849 "jacquard-lexicon", ··· 1854 1855 [[package]] 1856 name = "jacquard-identity" 1857 + version = "0.9.2" 1858 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#324cbb45078fe2f77b60ae2bd7765c5306ec8b5e" 1859 dependencies = [ 1860 "bon", 1861 "bytes", ··· 1880 1881 [[package]] 1882 name = "jacquard-lexicon" 1883 + version = "0.9.2" 1884 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#324cbb45078fe2f77b60ae2bd7765c5306ec8b5e" 1885 dependencies = [ 1886 "cid", 1887 "dashmap", ··· 1906 1907 [[package]] 1908 name = "jacquard-oauth" 1909 + version = "0.9.2" 1910 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#324cbb45078fe2f77b60ae2bd7765c5306ec8b5e" 1911 dependencies = [ 1912 "base64 0.22.1", 1913 "bytes", ··· 1927 "serde_html_form", 1928 "serde_json", 1929 "sha2", 1930 "smol_str", 1931 "thiserror 2.0.17", 1932 "tokio", ··· 2231 2232 [[package]] 2233 name = "mini-moka" 2234 + version = "0.10.99" 2235 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#324cbb45078fe2f77b60ae2bd7765c5306ec8b5e" 2236 dependencies = [ 2237 "crossbeam-channel", 2238 "crossbeam-utils", ··· 3000 ] 3001 3002 [[package]] 3003 + name = "regex-lite" 3004 + version = "0.1.8" 3005 + source = "registry+https://github.com/rust-lang/crates.io-index" 3006 + checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" 3007 + 3008 + [[package]] 3009 name = "regex-syntax" 3010 version = "0.8.8" 3011 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3058 3059 [[package]] 3060 name = "resolv-conf" 3061 + version = "0.7.6" 3062 source = "registry+https://github.com/rust-lang/crates.io-index" 3063 + checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" 3064 3065 [[package]] 3066 name = "rfc6979" ··· 4961 "multibase", 4962 "multihash", 4963 "n0-future 0.3.1", 4964 + "regex", 4965 "reqwest", 4966 "rustversion", 4967 "serde",
+8 -7
cli/Cargo.toml
··· 8 place_wisp = [] 9 10 [dependencies] 11 - jacquard = { git = "https://tangled.org/@nonbinary.computer/jacquard", features = ["loopback"] } 12 - jacquard-oauth = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 13 - jacquard-api = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 14 - jacquard-common = { git = "https://tangled.org/@nonbinary.computer/jacquard", features = ["websocket"] } 15 - jacquard-identity = { git = "https://tangled.org/@nonbinary.computer/jacquard", features = ["dns"] } 16 - jacquard-derive = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 17 - jacquard-lexicon = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 18 clap = { version = "4.5.51", features = ["derive"] } 19 tokio = { version = "1.48", features = ["full"] } 20 miette = { version = "7.6.0", features = ["fancy"] } ··· 39 n0-future = "0.3.1" 40 chrono = "0.4" 41 url = "2.5"
··· 8 place_wisp = [] 9 10 [dependencies] 11 + jacquard = { path = "/Users/regent/Developer/jacquard/crates/jacquard", features = ["loopback"] } 12 + jacquard-oauth = { path = "/Users/regent/Developer/jacquard/crates/jacquard-oauth" } 13 + jacquard-api = { path = "/Users/regent/Developer/jacquard/crates/jacquard-api", features = ["streaming"] } 14 + jacquard-common = { path = "/Users/regent/Developer/jacquard/crates/jacquard-common", features = ["websocket"] } 15 + jacquard-identity = { path = "/Users/regent/Developer/jacquard/crates/jacquard-identity", features = ["dns"] } 16 + jacquard-derive = { path = "/Users/regent/Developer/jacquard/crates/jacquard-derive" } 17 + jacquard-lexicon = { path = "/Users/regent/Developer/jacquard/crates/jacquard-lexicon" } 18 clap = { version = "4.5.51", features = ["derive"] } 19 tokio = { version = "1.48", features = ["full"] } 20 miette = { version = "7.6.0", features = ["fancy"] } ··· 39 n0-future = "0.3.1" 40 chrono = "0.4" 41 url = "2.5" 42 + regex = "1.11"
+61 -37
cli/src/main.rs
··· 7 mod pull; 8 mod serve; 9 mod subfs_utils; 10 11 use clap::{Parser, Subcommand}; 12 use jacquard::CowStr; ··· 168 site: Option<String>, 169 ) -> miette::Result<()> { 170 let (session, auth) = 171 - MemoryCredentialSession::authenticated(input, password, None).await?; 172 println!("Signed in as {}", auth.handle); 173 174 let agent: Agent<_> = Agent::from(session); ··· 556 /// Process a single file: gzip -> base64 -> upload blob (or reuse existing) 557 /// Returns (File, reused: bool) 558 /// file_path_key is the full path from the site root (e.g., "config/file.json") for blob map lookup 559 async fn process_file( 560 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 561 file_path: &Path, ··· 571 .first_or_octet_stream() 572 .to_string(); 573 574 - // Gzip compress 575 - let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); 576 - encoder.write_all(&file_data).into_diagnostic()?; 577 - let gzipped = encoder.finish().into_diagnostic()?; 578 579 - // Base64 encode the gzipped data 580 - let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 581 582 - // Compute CID for this file (CRITICAL: on base64-encoded gzipped content) 583 - let file_cid = cid::compute_cid(&base64_bytes); 584 - 585 // Check if we have an existing blob with the same CID 586 let existing_blob = existing_blobs.get(file_path_key); 587 - 588 if let Some((existing_blob_ref, existing_cid)) = existing_blob { 589 if existing_cid == &file_cid { 590 // CIDs match - reuse existing blob 591 println!(" ✓ Reusing blob for {} (CID: {})", file_path_key, file_cid); 592 - return Ok(( 593 - File::new() 594 - .r#type(CowStr::from("file")) 595 - .blob(existing_blob_ref.clone()) 596 - .encoding(CowStr::from("gzip")) 597 - .mime_type(CowStr::from(original_mime)) 598 - .base64(true) 599 - .build(), 600 - true 601 - )); 602 } 603 } 604 - 605 // File is new or changed - upload it 606 - println!(" ↑ Uploading {} ({} bytes, CID: {})", file_path_key, base64_bytes.len(), file_cid); 607 - let blob = agent.upload_blob( 608 - base64_bytes, 609 - MimeType::new_static("application/octet-stream"), 610 - ).await?; 611 612 - Ok(( 613 - File::new() 614 - .r#type(CowStr::from("file")) 615 - .blob(blob) 616 - .encoding(CowStr::from("gzip")) 617 - .mime_type(CowStr::from(original_mime)) 618 - .base64(true) 619 - .build(), 620 - false 621 - )) 622 } 623 624 /// Convert fs::Directory to subfs::Directory
··· 7 mod pull; 8 mod serve; 9 mod subfs_utils; 10 + mod redirects; 11 12 use clap::{Parser, Subcommand}; 13 use jacquard::CowStr; ··· 169 site: Option<String>, 170 ) -> miette::Result<()> { 171 let (session, auth) = 172 + MemoryCredentialSession::authenticated(input, password, None, None).await?; 173 println!("Signed in as {}", auth.handle); 174 175 let agent: Agent<_> = Agent::from(session); ··· 557 /// Process a single file: gzip -> base64 -> upload blob (or reuse existing) 558 /// Returns (File, reused: bool) 559 /// file_path_key is the full path from the site root (e.g., "config/file.json") for blob map lookup 560 + /// 561 + /// Special handling: _redirects files are NOT compressed (uploaded as-is) 562 async fn process_file( 563 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 564 file_path: &Path, ··· 574 .first_or_octet_stream() 575 .to_string(); 576 577 + // Check if this is a _redirects file (don't compress it) 578 + let is_redirects_file = file_path.file_name() 579 + .and_then(|n| n.to_str()) 580 + .map(|n| n == "_redirects") 581 + .unwrap_or(false); 582 583 + let (upload_bytes, encoding, is_base64) = if is_redirects_file { 584 + // Don't compress _redirects - upload as-is 585 + (file_data.clone(), None, false) 586 + } else { 587 + // Gzip compress 588 + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); 589 + encoder.write_all(&file_data).into_diagnostic()?; 590 + let gzipped = encoder.finish().into_diagnostic()?; 591 592 + // Base64 encode the gzipped data 593 + let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 594 + (base64_bytes, Some("gzip"), true) 595 + }; 596 + 597 + // Compute CID for this file 598 + let file_cid = cid::compute_cid(&upload_bytes); 599 + 600 // Check if we have an existing blob with the same CID 601 let existing_blob = existing_blobs.get(file_path_key); 602 + 603 if let Some((existing_blob_ref, existing_cid)) = existing_blob { 604 if existing_cid == &file_cid { 605 // CIDs match - reuse existing blob 606 println!(" ✓ Reusing blob for {} (CID: {})", file_path_key, file_cid); 607 + let mut file_builder = File::new() 608 + .r#type(CowStr::from("file")) 609 + .blob(existing_blob_ref.clone()) 610 + .mime_type(CowStr::from(original_mime)); 611 + 612 + if let Some(enc) = encoding { 613 + file_builder = file_builder.encoding(CowStr::from(enc)); 614 + } 615 + if is_base64 { 616 + file_builder = file_builder.base64(true); 617 + } 618 + 619 + return Ok((file_builder.build(), true)); 620 } 621 } 622 + 623 // File is new or changed - upload it 624 + let mime_type = if is_redirects_file { 625 + MimeType::new_static("text/plain") 626 + } else { 627 + MimeType::new_static("application/octet-stream") 628 + }; 629 + 630 + println!(" ↑ Uploading {} ({} bytes, CID: {})", file_path_key, upload_bytes.len(), file_cid); 631 + let blob = agent.upload_blob(upload_bytes, mime_type).await?; 632 + 633 + let mut file_builder = File::new() 634 + .r#type(CowStr::from("file")) 635 + .blob(blob) 636 + .mime_type(CowStr::from(original_mime)); 637 + 638 + if let Some(enc) = encoding { 639 + file_builder = file_builder.encoding(CowStr::from(enc)); 640 + } 641 + if is_base64 { 642 + file_builder = file_builder.base64(true); 643 + } 644 645 + Ok((file_builder.build(), false)) 646 } 647 648 /// Convert fs::Directory to subfs::Directory
+375
cli/src/redirects.rs
···
··· 1 + use regex::Regex; 2 + use std::collections::HashMap; 3 + use std::fs; 4 + use std::path::Path; 5 + 6 + /// Maximum number of redirect rules to prevent DoS attacks 7 + const MAX_REDIRECT_RULES: usize = 1000; 8 + 9 + #[derive(Debug, Clone)] 10 + pub struct RedirectRule { 11 + #[allow(dead_code)] 12 + pub from: String, 13 + pub to: String, 14 + pub status: u16, 15 + #[allow(dead_code)] 16 + pub force: bool, 17 + pub from_pattern: Regex, 18 + pub from_params: Vec<String>, 19 + pub query_params: Option<HashMap<String, String>>, 20 + } 21 + 22 + #[derive(Debug)] 23 + pub struct RedirectMatch { 24 + pub target_path: String, 25 + pub status: u16, 26 + pub force: bool, 27 + } 28 + 29 + /// Parse a _redirects file into an array of redirect rules 30 + pub fn parse_redirects_file(content: &str) -> Vec<RedirectRule> { 31 + let lines = content.lines(); 32 + let mut rules = Vec::new(); 33 + 34 + for (line_num, line_raw) in lines.enumerate() { 35 + if line_raw.trim().is_empty() || line_raw.trim().starts_with('#') { 36 + continue; 37 + } 38 + 39 + // Enforce max rules limit 40 + if rules.len() >= MAX_REDIRECT_RULES { 41 + eprintln!( 42 + "Redirect rules limit reached ({}), ignoring remaining rules", 43 + MAX_REDIRECT_RULES 44 + ); 45 + break; 46 + } 47 + 48 + match parse_redirect_line(line_raw.trim()) { 49 + Ok(Some(rule)) => rules.push(rule), 50 + Ok(None) => continue, 51 + Err(e) => { 52 + eprintln!( 53 + "Failed to parse redirect rule on line {}: {} ({})", 54 + line_num + 1, 55 + line_raw, 56 + e 57 + ); 58 + } 59 + } 60 + } 61 + 62 + rules 63 + } 64 + 65 + /// Parse a single redirect rule line 66 + /// Format: /from [query_params] /to [status] [conditions] 67 + fn parse_redirect_line(line: &str) -> Result<Option<RedirectRule>, String> { 68 + let parts: Vec<&str> = line.split_whitespace().collect(); 69 + 70 + if parts.len() < 2 { 71 + return Ok(None); 72 + } 73 + 74 + let mut idx = 0; 75 + let from = parts[idx]; 76 + idx += 1; 77 + 78 + let mut status = 301; // Default status 79 + let mut force = false; 80 + let mut query_params: HashMap<String, String> = HashMap::new(); 81 + 82 + // Parse query parameters that come before the destination path 83 + while idx < parts.len() { 84 + let part = parts[idx]; 85 + 86 + // If it starts with / or http, it's the destination path 87 + if part.starts_with('/') || part.starts_with("http://") || part.starts_with("https://") { 88 + break; 89 + } 90 + 91 + // If it contains = and comes before the destination, it's a query param 92 + if part.contains('=') { 93 + let split_index = part.find('=').unwrap(); 94 + let key = &part[..split_index]; 95 + let value = &part[split_index + 1..]; 96 + 97 + if !key.is_empty() && !value.is_empty() { 98 + query_params.insert(key.to_string(), value.to_string()); 99 + } 100 + idx += 1; 101 + } else { 102 + break; 103 + } 104 + } 105 + 106 + // Next part should be the destination 107 + if idx >= parts.len() { 108 + return Ok(None); 109 + } 110 + 111 + let to = parts[idx]; 112 + idx += 1; 113 + 114 + // Parse remaining parts for status code 115 + for part in parts.iter().skip(idx) { 116 + // Check for status code (with optional ! for force) 117 + if let Some(stripped) = part.strip_suffix('!') { 118 + if let Ok(s) = stripped.parse::<u16>() { 119 + force = true; 120 + status = s; 121 + } 122 + } else if let Ok(s) = part.parse::<u16>() { 123 + status = s; 124 + } 125 + // Note: We're ignoring conditional redirects (Country, Language, Cookie, Role) for now 126 + // They can be added later if needed 127 + } 128 + 129 + // Parse the 'from' pattern 130 + let (pattern, params) = convert_path_to_regex(from)?; 131 + 132 + Ok(Some(RedirectRule { 133 + from: from.to_string(), 134 + to: to.to_string(), 135 + status, 136 + force, 137 + from_pattern: pattern, 138 + from_params: params, 139 + query_params: if query_params.is_empty() { 140 + None 141 + } else { 142 + Some(query_params) 143 + }, 144 + })) 145 + } 146 + 147 + /// Convert a path pattern with placeholders and splats to a regex 148 + /// Examples: 149 + /// /blog/:year/:month/:day -> captures year, month, day 150 + /// /news/* -> captures splat 151 + fn convert_path_to_regex(pattern: &str) -> Result<(Regex, Vec<String>), String> { 152 + let mut params = Vec::new(); 153 + let mut regex_str = String::from("^"); 154 + 155 + // Split by query string if present 156 + let path_part = pattern.split('?').next().unwrap_or(pattern); 157 + 158 + // Escape special regex characters except * and : 159 + let mut escaped = String::new(); 160 + for ch in path_part.chars() { 161 + match ch { 162 + '.' | '+' | '^' | '$' | '{' | '}' | '(' | ')' | '|' | '[' | ']' | '\\' => { 163 + escaped.push('\\'); 164 + escaped.push(ch); 165 + } 166 + _ => escaped.push(ch), 167 + } 168 + } 169 + 170 + // Replace :param with named capture groups 171 + let param_regex = Regex::new(r":([a-zA-Z_][a-zA-Z0-9_]*)").map_err(|e| e.to_string())?; 172 + let mut last_end = 0; 173 + let mut result = String::new(); 174 + 175 + for cap in param_regex.captures_iter(&escaped) { 176 + let m = cap.get(0).unwrap(); 177 + result.push_str(&escaped[last_end..m.start()]); 178 + result.push_str("([^/?]+)"); 179 + params.push(cap[1].to_string()); 180 + last_end = m.end(); 181 + } 182 + result.push_str(&escaped[last_end..]); 183 + escaped = result; 184 + 185 + // Replace * with splat capture 186 + if escaped.contains('*') { 187 + escaped = escaped.replace('*', "(.*)"); 188 + params.push("splat".to_string()); 189 + } 190 + 191 + regex_str.push_str(&escaped); 192 + 193 + // Make trailing slash optional 194 + if !regex_str.ends_with(".*") { 195 + regex_str.push_str("/?"); 196 + } 197 + 198 + regex_str.push('$'); 199 + 200 + let pattern = Regex::new(&regex_str).map_err(|e| e.to_string())?; 201 + 202 + Ok((pattern, params)) 203 + } 204 + 205 + /// Match a request path against redirect rules 206 + pub fn match_redirect_rule( 207 + request_path: &str, 208 + rules: &[RedirectRule], 209 + query_params: Option<&HashMap<String, String>>, 210 + ) -> Option<RedirectMatch> { 211 + // Normalize path: ensure leading slash 212 + let normalized_path = if request_path.starts_with('/') { 213 + request_path.to_string() 214 + } else { 215 + format!("/{}", request_path) 216 + }; 217 + 218 + for rule in rules { 219 + // Check query parameter conditions first (if any) 220 + if let Some(required_params) = &rule.query_params { 221 + if let Some(actual_params) = query_params { 222 + let query_matches = required_params.iter().all(|(key, expected_value)| { 223 + if let Some(actual_value) = actual_params.get(key) { 224 + // If expected value is a placeholder (:name), any value is acceptable 225 + if expected_value.starts_with(':') { 226 + return true; 227 + } 228 + // Otherwise it must match exactly 229 + actual_value == expected_value 230 + } else { 231 + false 232 + } 233 + }); 234 + 235 + if !query_matches { 236 + continue; 237 + } 238 + } else { 239 + // Rule requires query params but none provided 240 + continue; 241 + } 242 + } 243 + 244 + // Match the path pattern 245 + if let Some(captures) = rule.from_pattern.captures(&normalized_path) { 246 + let mut target_path = rule.to.clone(); 247 + 248 + // Replace captured parameters 249 + for (i, param_name) in rule.from_params.iter().enumerate() { 250 + if let Some(param_value) = captures.get(i + 1) { 251 + let value = param_value.as_str(); 252 + 253 + if param_name == "splat" { 254 + target_path = target_path.replace(":splat", value); 255 + } else { 256 + target_path = target_path.replace(&format!(":{}", param_name), value); 257 + } 258 + } 259 + } 260 + 261 + // Handle query parameter replacements 262 + if let Some(required_params) = &rule.query_params { 263 + if let Some(actual_params) = query_params { 264 + for (key, placeholder) in required_params { 265 + if placeholder.starts_with(':') { 266 + if let Some(actual_value) = actual_params.get(key) { 267 + let param_name = &placeholder[1..]; 268 + target_path = target_path.replace( 269 + &format!(":{}", param_name), 270 + actual_value, 271 + ); 272 + } 273 + } 274 + } 275 + } 276 + } 277 + 278 + // Preserve query string for 200, 301, 302 redirects (unless target already has one) 279 + if [200, 301, 302].contains(&rule.status) 280 + && query_params.is_some() 281 + && !target_path.contains('?') 282 + { 283 + if let Some(params) = query_params { 284 + if !params.is_empty() { 285 + let query_string: String = params 286 + .iter() 287 + .map(|(k, v)| format!("{}={}", k, v)) 288 + .collect::<Vec<_>>() 289 + .join("&"); 290 + target_path = format!("{}?{}", target_path, query_string); 291 + } 292 + } 293 + } 294 + 295 + return Some(RedirectMatch { 296 + target_path, 297 + status: rule.status, 298 + force: rule.force, 299 + }); 300 + } 301 + } 302 + 303 + None 304 + } 305 + 306 + /// Load redirect rules from a _redirects file 307 + pub fn load_redirect_rules(directory: &Path) -> Vec<RedirectRule> { 308 + let redirects_path = directory.join("_redirects"); 309 + 310 + if !redirects_path.exists() { 311 + return Vec::new(); 312 + } 313 + 314 + match fs::read_to_string(&redirects_path) { 315 + Ok(content) => parse_redirects_file(&content), 316 + Err(e) => { 317 + eprintln!("Failed to load _redirects file: {}", e); 318 + Vec::new() 319 + } 320 + } 321 + } 322 + 323 + #[cfg(test)] 324 + mod tests { 325 + use super::*; 326 + 327 + #[test] 328 + fn test_parse_simple_redirect() { 329 + let content = "/old-path /new-path"; 330 + let rules = parse_redirects_file(content); 331 + assert_eq!(rules.len(), 1); 332 + assert_eq!(rules[0].from, "/old-path"); 333 + assert_eq!(rules[0].to, "/new-path"); 334 + assert_eq!(rules[0].status, 301); 335 + assert!(!rules[0].force); 336 + } 337 + 338 + #[test] 339 + fn test_parse_with_status() { 340 + let content = "/temp /target 302"; 341 + let rules = parse_redirects_file(content); 342 + assert_eq!(rules[0].status, 302); 343 + } 344 + 345 + #[test] 346 + fn test_parse_force_redirect() { 347 + let content = "/force /target 301!"; 348 + let rules = parse_redirects_file(content); 349 + assert!(rules[0].force); 350 + } 351 + 352 + #[test] 353 + fn test_match_exact_path() { 354 + let rules = parse_redirects_file("/old-path /new-path"); 355 + let m = match_redirect_rule("/old-path", &rules, None); 356 + assert!(m.is_some()); 357 + assert_eq!(m.unwrap().target_path, "/new-path"); 358 + } 359 + 360 + #[test] 361 + fn test_match_splat() { 362 + let rules = parse_redirects_file("/news/* /blog/:splat"); 363 + let m = match_redirect_rule("/news/2024/01/15/post", &rules, None); 364 + assert!(m.is_some()); 365 + assert_eq!(m.unwrap().target_path, "/blog/2024/01/15/post"); 366 + } 367 + 368 + #[test] 369 + fn test_match_placeholders() { 370 + let rules = parse_redirects_file("/blog/:year/:month/:day /posts/:year-:month-:day"); 371 + let m = match_redirect_rule("/blog/2024/01/15", &rules, None); 372 + assert!(m.is_some()); 373 + assert_eq!(m.unwrap().target_path, "/posts/2024-01-15"); 374 + } 375 + }
+237 -64
cli/src/serve.rs
··· 1 use crate::pull::pull_site; 2 - use axum::Router; 3 use jacquard::CowStr; 4 - use jacquard_common::jetstream::{CommitOperation, JetstreamMessage, JetstreamParams}; 5 use jacquard_common::types::string::Did; 6 use jacquard_common::xrpc::{SubscriptionClient, TungsteniteSubscriptionClient}; 7 use miette::IntoDiagnostic; 8 use n0_future::StreamExt; 9 use std::path::PathBuf; 10 use std::sync::Arc; 11 use tokio::sync::RwLock; 12 use tower_http::compression::CompressionLayer; 13 use tower_http::services::ServeDir; 14 - use url::Url; 15 16 /// Shared state for the server 17 #[derive(Clone)] ··· 20 rkey: CowStr<'static>, 21 output_dir: PathBuf, 22 last_cid: Arc<RwLock<Option<String>>>, 23 } 24 25 /// Serve a site locally with real-time firehose updates ··· 54 let did_str = CowStr::from(did.as_str().to_string()); 55 pull_site(did_str.clone(), rkey.clone(), output_dir.clone()).await?; 56 57 // Create shared state 58 let state = ServerState { 59 did: did_str.clone(), 60 rkey: rkey.clone(), 61 output_dir: output_dir.clone(), 62 last_cid: Arc::new(RwLock::new(None)), 63 }; 64 65 // Start firehose listener in background ··· 70 } 71 }); 72 73 - // Create HTTP server with gzip compression 74 let app = Router::new() 75 - .fallback_service( 76 - ServeDir::new(&output_dir) 77 - .precompressed_gzip() 78 - ) 79 - .layer(CompressionLayer::new()) 80 - .with_state(state); 81 82 let addr = format!("0.0.0.0:{}", port); 83 let listener = tokio::net::TcpListener::bind(&addr) ··· 90 axum::serve(listener, app).await.into_diagnostic()?; 91 92 Ok(()) 93 } 94 95 /// Watch the firehose for updates to the specific site 96 fn watch_firehose(state: ServerState) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<()>> + Send>> { 97 Box::pin(async move { 98 - let jetstream_url = Url::parse("wss://jetstream1.us-east.fire.hose.cam") 99 - .into_diagnostic()?; 100 101 - println!("[Firehose] Connecting to Jetstream..."); 102 103 // Create subscription client 104 - let client = TungsteniteSubscriptionClient::from_base_uri(jetstream_url); 105 106 - // Subscribe with no filters (we'll filter manually) 107 - // Jetstream doesn't support filtering by collection in the params builder 108 - let params = JetstreamParams::new().build(); 109 110 let stream = client.subscribe(&params).await.into_diagnostic()?; 111 - println!("[Firehose] Connected! Watching for updates..."); 112 113 // Convert to typed message stream 114 let (_sink, mut messages) = stream.into_stream(); ··· 117 match messages.next().await { 118 Some(Ok(msg)) => { 119 if let Err(e) = handle_firehose_message(&state, msg).await { 120 - eprintln!("[Firehose] Error handling message: {}", e); 121 } 122 } 123 Some(Err(e)) => { 124 - eprintln!("[Firehose] Stream error: {}", e); 125 // Try to reconnect after a delay 126 tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; 127 return Box::pin(watch_firehose(state)).await; 128 } 129 None => { 130 - println!("[Firehose] Stream ended, reconnecting..."); 131 tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; 132 return Box::pin(watch_firehose(state)).await; 133 } ··· 137 } 138 139 /// Handle a firehose message 140 - async fn handle_firehose_message( 141 state: &ServerState, 142 - msg: JetstreamMessage<'_>, 143 ) -> miette::Result<()> { 144 match msg { 145 - JetstreamMessage::Commit { 146 - did, 147 - commit, 148 - .. 149 - } => { 150 - // Check if this is our site 151 - if did.as_str() == state.did.as_str() 152 - && commit.collection.as_str() == "place.wisp.fs" 153 - && commit.rkey.as_str() == state.rkey.as_str() 154 - { 155 - match commit.operation { 156 - CommitOperation::Create | CommitOperation::Update => { 157 - let new_cid = commit.cid.as_ref().map(|c| c.to_string()); 158 - 159 - // Check if CID changed 160 - let should_update = { 161 - let last_cid = state.last_cid.read().await; 162 - new_cid != *last_cid 163 - }; 164 165 - if should_update { 166 - println!("\n[Update] Detected change to site {} (CID: {:?})", state.rkey, new_cid); 167 - println!("[Update] Pulling latest version..."); 168 169 - // Pull the updated site 170 - match pull_site( 171 - state.did.clone(), 172 - state.rkey.clone(), 173 - state.output_dir.clone(), 174 - ) 175 - .await 176 - { 177 - Ok(_) => { 178 - // Update last CID 179 - let mut last_cid = state.last_cid.write().await; 180 - *last_cid = new_cid; 181 - println!("[Update] ✓ Site updated successfully!\n"); 182 - } 183 - Err(e) => { 184 - eprintln!("[Update] Failed to pull site: {}", e); 185 - } 186 } 187 } 188 - } 189 - CommitOperation::Delete => { 190 println!("\n[Update] Site {} was deleted", state.rkey); 191 } 192 } 193 }
··· 1 use crate::pull::pull_site; 2 + use crate::redirects::{load_redirect_rules, match_redirect_rule, RedirectRule}; 3 + use axum::{ 4 + Router, 5 + extract::Request, 6 + response::{Response, IntoResponse, Redirect}, 7 + http::{StatusCode, Uri}, 8 + }; 9 use jacquard::CowStr; 10 + use jacquard::api::com_atproto::sync::subscribe_repos::{SubscribeRepos, SubscribeReposMessage}; 11 use jacquard_common::types::string::Did; 12 use jacquard_common::xrpc::{SubscriptionClient, TungsteniteSubscriptionClient}; 13 use miette::IntoDiagnostic; 14 use n0_future::StreamExt; 15 + use std::collections::HashMap; 16 use std::path::PathBuf; 17 use std::sync::Arc; 18 use tokio::sync::RwLock; 19 + use tower::Service; 20 use tower_http::compression::CompressionLayer; 21 use tower_http::services::ServeDir; 22 23 /// Shared state for the server 24 #[derive(Clone)] ··· 27 rkey: CowStr<'static>, 28 output_dir: PathBuf, 29 last_cid: Arc<RwLock<Option<String>>>, 30 + redirect_rules: Arc<RwLock<Vec<RedirectRule>>>, 31 } 32 33 /// Serve a site locally with real-time firehose updates ··· 62 let did_str = CowStr::from(did.as_str().to_string()); 63 pull_site(did_str.clone(), rkey.clone(), output_dir.clone()).await?; 64 65 + // Load redirect rules 66 + let redirect_rules = load_redirect_rules(&output_dir); 67 + if !redirect_rules.is_empty() { 68 + println!("Loaded {} redirect rules from _redirects", redirect_rules.len()); 69 + } 70 + 71 // Create shared state 72 let state = ServerState { 73 did: did_str.clone(), 74 rkey: rkey.clone(), 75 output_dir: output_dir.clone(), 76 last_cid: Arc::new(RwLock::new(None)), 77 + redirect_rules: Arc::new(RwLock::new(redirect_rules)), 78 }; 79 80 // Start firehose listener in background ··· 85 } 86 }); 87 88 + // Create HTTP server with gzip compression and redirect handling 89 + let serve_dir = ServeDir::new(&output_dir).precompressed_gzip(); 90 + 91 let app = Router::new() 92 + .fallback(move |req: Request| { 93 + let state = state.clone(); 94 + let mut serve_dir = serve_dir.clone(); 95 + async move { 96 + handle_request_with_redirects(req, state, &mut serve_dir).await 97 + } 98 + }) 99 + .layer(CompressionLayer::new()); 100 101 let addr = format!("0.0.0.0:{}", port); 102 let listener = tokio::net::TcpListener::bind(&addr) ··· 109 axum::serve(listener, app).await.into_diagnostic()?; 110 111 Ok(()) 112 + } 113 + 114 + /// Handle a request with redirect support 115 + async fn handle_request_with_redirects( 116 + req: Request, 117 + state: ServerState, 118 + serve_dir: &mut ServeDir, 119 + ) -> Response { 120 + let uri = req.uri().clone(); 121 + let path = uri.path(); 122 + let method = req.method().clone(); 123 + 124 + // Parse query parameters 125 + let query_params = uri.query().map(|q| { 126 + let mut params = HashMap::new(); 127 + for pair in q.split('&') { 128 + if let Some((key, value)) = pair.split_once('=') { 129 + params.insert(key.to_string(), value.to_string()); 130 + } 131 + } 132 + params 133 + }); 134 + 135 + // Check for redirect rules 136 + let redirect_rules = state.redirect_rules.read().await; 137 + if let Some(redirect_match) = match_redirect_rule(path, &redirect_rules, query_params.as_ref()) { 138 + let is_force = redirect_match.force; 139 + drop(redirect_rules); // Release the lock 140 + 141 + // If not forced, check if the file exists first 142 + if !is_force { 143 + // Try to serve the file normally first 144 + let test_req = Request::builder() 145 + .uri(uri.clone()) 146 + .method(&method) 147 + .body(axum::body::Body::empty()) 148 + .unwrap(); 149 + 150 + match serve_dir.call(test_req).await { 151 + Ok(response) if response.status().is_success() => { 152 + // File exists and was served successfully, return it 153 + return response.into_response(); 154 + } 155 + _ => { 156 + // File doesn't exist or error, apply redirect 157 + } 158 + } 159 + } 160 + 161 + // Handle different status codes 162 + match redirect_match.status { 163 + 200 => { 164 + // Rewrite: serve the target file but keep the URL the same 165 + if let Ok(target_uri) = redirect_match.target_path.parse::<Uri>() { 166 + let new_req = Request::builder() 167 + .uri(target_uri) 168 + .method(&method) 169 + .body(axum::body::Body::empty()) 170 + .unwrap(); 171 + 172 + match serve_dir.call(new_req).await { 173 + Ok(response) => response.into_response(), 174 + Err(_) => StatusCode::INTERNAL_SERVER_ERROR.into_response(), 175 + } 176 + } else { 177 + StatusCode::INTERNAL_SERVER_ERROR.into_response() 178 + } 179 + } 180 + 301 => { 181 + // Permanent redirect 182 + Redirect::permanent(&redirect_match.target_path).into_response() 183 + } 184 + 302 => { 185 + // Temporary redirect 186 + Redirect::temporary(&redirect_match.target_path).into_response() 187 + } 188 + 404 => { 189 + // Custom 404 page 190 + if let Ok(target_uri) = redirect_match.target_path.parse::<Uri>() { 191 + let new_req = Request::builder() 192 + .uri(target_uri) 193 + .method(&method) 194 + .body(axum::body::Body::empty()) 195 + .unwrap(); 196 + 197 + match serve_dir.call(new_req).await { 198 + Ok(mut response) => { 199 + *response.status_mut() = StatusCode::NOT_FOUND; 200 + response.into_response() 201 + } 202 + Err(_) => StatusCode::NOT_FOUND.into_response(), 203 + } 204 + } else { 205 + StatusCode::NOT_FOUND.into_response() 206 + } 207 + } 208 + _ => { 209 + // Unsupported status code, fall through to normal serving 210 + match serve_dir.call(req).await { 211 + Ok(response) => response.into_response(), 212 + Err(_) => StatusCode::INTERNAL_SERVER_ERROR.into_response(), 213 + } 214 + } 215 + } 216 + } else { 217 + drop(redirect_rules); 218 + // No redirect match, serve normally 219 + match serve_dir.call(req).await { 220 + Ok(response) => response.into_response(), 221 + Err(_) => StatusCode::NOT_FOUND.into_response(), 222 + } 223 + } 224 } 225 226 /// Watch the firehose for updates to the specific site 227 fn watch_firehose(state: ServerState) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<()>> + Send>> { 228 Box::pin(async move { 229 + use jacquard_identity::PublicResolver; 230 + use jacquard::prelude::IdentityResolver; 231 + 232 + // Resolve DID to PDS URL 233 + let resolver = PublicResolver::default(); 234 + let did = Did::new(&state.did).into_diagnostic()?; 235 + let pds_url = resolver.pds_for_did(&did).await.into_diagnostic()?; 236 + 237 + println!("[PDS] Resolved DID to PDS: {}", pds_url); 238 + 239 + // Convert HTTP(S) URL to WebSocket URL 240 + let mut ws_url = pds_url.clone(); 241 + let scheme = if pds_url.scheme() == "https" { "wss" } else { "ws" }; 242 + ws_url.set_scheme(scheme) 243 + .map_err(|_| miette::miette!("Failed to set WebSocket scheme"))?; 244 245 + println!("[PDS] Connecting to {}...", ws_url); 246 247 // Create subscription client 248 + let client = TungsteniteSubscriptionClient::from_base_uri(ws_url); 249 250 + // Subscribe to the PDS firehose 251 + let params = SubscribeRepos::new().build(); 252 253 let stream = client.subscribe(&params).await.into_diagnostic()?; 254 + println!("[PDS] Connected! Watching for updates..."); 255 256 // Convert to typed message stream 257 let (_sink, mut messages) = stream.into_stream(); ··· 260 match messages.next().await { 261 Some(Ok(msg)) => { 262 if let Err(e) = handle_firehose_message(&state, msg).await { 263 + eprintln!("[PDS] Error handling message: {}", e); 264 } 265 } 266 Some(Err(e)) => { 267 + eprintln!("[PDS] Stream error: {}", e); 268 // Try to reconnect after a delay 269 tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; 270 return Box::pin(watch_firehose(state)).await; 271 } 272 None => { 273 + println!("[PDS] Stream ended, reconnecting..."); 274 tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; 275 return Box::pin(watch_firehose(state)).await; 276 } ··· 280 } 281 282 /// Handle a firehose message 283 + async fn handle_firehose_message<'a>( 284 state: &ServerState, 285 + msg: SubscribeReposMessage<'a>, 286 ) -> miette::Result<()> { 287 match msg { 288 + SubscribeReposMessage::Commit(commit_msg) => { 289 + // Check if this commit is from our DID 290 + if commit_msg.repo.as_str() != state.did.as_str() { 291 + return Ok(()); 292 + } 293 294 + // Check if any operation affects our site 295 + let target_path = format!("place.wisp.fs/{}", state.rkey); 296 + let has_site_update = commit_msg.ops.iter().any(|op| op.path.as_ref() == target_path); 297 298 + if has_site_update { 299 + // Debug: log all operations for this commit 300 + println!("[Debug] Commit has {} ops for {}", commit_msg.ops.len(), state.rkey); 301 + for op in &commit_msg.ops { 302 + if op.path.as_ref() == target_path { 303 + println!("[Debug] - {} {}", op.action.as_ref(), op.path.as_ref()); 304 + } 305 + } 306 + } 307 + 308 + if has_site_update { 309 + // Use the commit CID as the version tracker 310 + let commit_cid = commit_msg.commit.to_string(); 311 + 312 + // Check if this is a new commit 313 + let should_update = { 314 + let last_cid = state.last_cid.read().await; 315 + Some(commit_cid.clone()) != *last_cid 316 + }; 317 + 318 + if should_update { 319 + // Check operation types 320 + let has_create_or_update = commit_msg.ops.iter().any(|op| { 321 + op.path.as_ref() == target_path && 322 + (op.action.as_ref() == "create" || op.action.as_ref() == "update") 323 + }); 324 + let has_delete = commit_msg.ops.iter().any(|op| { 325 + op.path.as_ref() == target_path && op.action.as_ref() == "delete" 326 + }); 327 + 328 + // If there's a create/update, pull the site (even if there's also a delete in the same commit) 329 + if has_create_or_update { 330 + println!("\n[Update] Detected change to site {} (commit: {})", state.rkey, commit_cid); 331 + println!("[Update] Pulling latest version..."); 332 + 333 + // Pull the updated site 334 + match pull_site( 335 + state.did.clone(), 336 + state.rkey.clone(), 337 + state.output_dir.clone(), 338 + ) 339 + .await 340 + { 341 + Ok(_) => { 342 + // Update last CID 343 + let mut last_cid = state.last_cid.write().await; 344 + *last_cid = Some(commit_cid); 345 + 346 + // Reload redirect rules 347 + let new_redirect_rules = load_redirect_rules(&state.output_dir); 348 + let mut redirect_rules = state.redirect_rules.write().await; 349 + *redirect_rules = new_redirect_rules; 350 + 351 + println!("[Update] ✓ Site updated successfully!\n"); 352 + } 353 + Err(e) => { 354 + eprintln!("[Update] Failed to pull site: {}", e); 355 } 356 } 357 + } else if has_delete { 358 + // Only a delete, no create/update 359 println!("\n[Update] Site {} was deleted", state.rkey); 360 + 361 + // Update last CID so we don't process this commit again 362 + let mut last_cid = state.last_cid.write().await; 363 + *last_cid = Some(commit_cid); 364 } 365 } 366 }