feat: add serde_json dep and implement item serialization for export formats

+1

Cargo.lock

··· 918 918 "owo-colors", 919 919 "pai-core", 920 920 "rusqlite", 921 + "serde_json", 921 922 ] 922 923 923 924 [[package]]

+1

cli/Cargo.toml

··· 14 14 chrono = "0.4" 15 15 dirs = "6.0" 16 16 owo-colors = "4.1" 17 + serde_json = "1.0"

+396 -37

cli/src/main.rs

··· 1 1 mod paths; 2 2 mod storage; 3 3 4 + use chrono::{DateTime, Duration, Utc}; 4 5 use clap::{Parser, Subcommand}; 5 6 use owo_colors::OwoColorize; 6 - use pai_core::{Config, ListFilter, PaiError, SourceKind}; 7 + use pai_core::{Config, Item, ListFilter, PaiError, SourceKind}; 8 + use std::fs::File; 9 + use std::io::{self, Write}; 7 10 use std::path::PathBuf; 11 + use std::str::FromStr; 8 12 use storage::SqliteStorage; 9 13 10 14 /// Personal Activity Index - POSIX-style CLI for content aggregation ··· 53 57 /// Output file (default: stdout) 54 58 #[arg(short = 'o', value_name = "FILE")] 55 59 output: Option<PathBuf>, 56 - } 57 - 58 - impl From<ExportOpts> for ListFilter { 59 - fn from(opts: ExportOpts) -> Self { 60 - ListFilter { 61 - source_kind: opts.kind, 62 - source_id: opts.source_id, 63 - limit: opts.limit, 64 - since: opts.since, 65 - query: opts.query, 66 - } 67 - } 68 60 } 69 61 70 62 #[derive(Subcommand, Debug)] ··· 186 178 let db_path = paths::resolve_db_path(db_path)?; 187 179 let storage = SqliteStorage::new(db_path)?; 188 180 181 + let since = normalize_since_input(since)?; 182 + let limit = ensure_positive_limit(limit)?; 183 + let source_id = normalize_optional_string(source_id); 184 + let query = normalize_optional_string(query); 185 + 189 186 let filter = ListFilter { source_kind: kind, source_id, limit: Some(limit), since, query }; 190 187 191 188 let items = pai_core::Storage::list_items(&storage, &filter)?; ··· 195 192 return Ok(()); 196 193 } 197 194 198 - println!("{} {}\n", "Found".cyan(), format!("{} items:", items.len()).bold()); 199 - for item in items { 200 - println!("{} {}", "ID:".bright_black(), item.id); 201 - println!( 202 - "{} {} {}", 203 - "Source:".bright_black(), 204 - item.source_kind.to_string().cyan(), 205 - format!("({})", item.source_id).bright_black() 206 - ); 207 - if let Some(title) = &item.title { 208 - println!("{} {}", "Title:".bright_black(), title.bold()); 209 - } 210 - if let Some(author) = &item.author { 211 - println!("{} {}", "Author:".bright_black(), author); 212 - } 213 - println!("{} {}", "URL:".bright_black(), item.url.blue().underline()); 214 - println!("{} {}", "Published:".bright_black(), item.published_at); 215 - println!(); 216 - } 195 + println!("{} {}", "Found".cyan(), format!("{} item(s)", items.len()).bold()); 196 + println!(); 197 + render_items_table(&items)?; 217 198 218 199 Ok(()) 219 200 } 220 201 221 202 fn handle_export(db_path: Option<PathBuf>, opts: ExportOpts) -> Result<(), PaiError> { 222 203 let db_path = paths::resolve_db_path(db_path)?; 223 - let _storage = SqliteStorage::new(db_path)?; 204 + let storage = SqliteStorage::new(db_path)?; 205 + 206 + let ExportOpts { kind, source_id, limit, since, query, format, output } = opts; 207 + let since = normalize_since_input(since)?; 208 + let limit = ensure_optional_limit(limit)?; 209 + let source_id = normalize_optional_string(source_id); 210 + let query = normalize_optional_string(query); 211 + 212 + let filter = ListFilter { source_kind: kind, source_id, limit, since, query }; 213 + let items = pai_core::Storage::list_items(&storage, &filter)?; 224 214 225 - let format = opts.format.clone(); 226 - let output = opts.output.clone(); 227 - let filter: ListFilter = opts.into(); 215 + let export_format = ExportFormat::from_str(&format)?; 216 + let mut writer = create_output_writer(output.as_ref())?; 217 + export_items(&items, export_format, writer.as_mut())?; 228 218 229 - println!("export command - format: {format}, output: {output:?}, filter: {filter:?}"); 219 + match output { 220 + Some(path) => println!( 221 + "{} Exported {} item(s) to {}", 222 + "Success:".green(), 223 + items.len(), 224 + path.display() 225 + ), 226 + None => println!("{} Exported {} item(s) to stdout", "Success:".green(), items.len()), 227 + } 228 + 230 229 Ok(()) 231 230 } 232 231 ··· 299 298 300 299 Ok(()) 301 300 } 301 + 302 + fn normalize_since_input(since: Option<String>) -> Result<Option<String>, PaiError> { 303 + normalize_since_with_now(since, Utc::now()) 304 + } 305 + 306 + fn normalize_since_with_now(since: Option<String>, now: DateTime<Utc>) -> Result<Option<String>, PaiError> { 307 + let value = match since { 308 + Some(raw) => { 309 + let trimmed = raw.trim(); 310 + if trimmed.is_empty() { 311 + return Ok(None); 312 + } 313 + trimmed.to_string() 314 + } 315 + None => return Ok(None), 316 + }; 317 + 318 + if let Some(duration) = parse_relative_duration(&value) { 319 + let instant = now - duration; 320 + return Ok(Some(instant.to_rfc3339())); 321 + } 322 + 323 + if let Ok(dt) = DateTime::parse_from_rfc3339(&value) { 324 + return Ok(Some(dt.with_timezone(&Utc).to_rfc3339())); 325 + } 326 + 327 + if let Ok(dt) = DateTime::parse_from_rfc2822(&value) { 328 + return Ok(Some(dt.with_timezone(&Utc).to_rfc3339())); 329 + } 330 + 331 + Err(PaiError::InvalidArgument(format!( 332 + "Invalid since value '{value}'. Use ISO 8601 (e.g. 2024-01-01T00:00:00Z) or relative forms like 7d/24h/60m." 333 + ))) 334 + } 335 + 336 + fn parse_relative_duration(input: &str) -> Option<Duration> { 337 + if input.len() < 2 { 338 + return None; 339 + } 340 + 341 + let unit = input.chars().last()?.to_ascii_lowercase(); 342 + let magnitude: i64 = input[..input.len() - 1].parse().ok()?; 343 + 344 + match unit { 345 + 'm' => Some(Duration::minutes(magnitude)), 346 + 'h' => Some(Duration::hours(magnitude)), 347 + 'd' => Some(Duration::days(magnitude)), 348 + 'w' => Some(Duration::weeks(magnitude)), 349 + _ => None, 350 + } 351 + } 352 + 353 + fn ensure_positive_limit(limit: usize) -> Result<usize, PaiError> { 354 + if limit == 0 { 355 + return Err(PaiError::InvalidArgument("Limit must be greater than zero".to_string())); 356 + } 357 + Ok(limit) 358 + } 359 + 360 + fn ensure_optional_limit(limit: Option<usize>) -> Result<Option<usize>, PaiError> { 361 + match limit { 362 + Some(value) => Ok(Some(ensure_positive_limit(value)?)), 363 + None => Ok(None), 364 + } 365 + } 366 + 367 + fn normalize_optional_string(value: Option<String>) -> Option<String> { 368 + value.and_then(|input| { 369 + let trimmed = input.trim(); 370 + if trimmed.is_empty() { 371 + None 372 + } else { 373 + Some(trimmed.to_string()) 374 + } 375 + }) 376 + } 377 + 378 + enum ExportFormat { 379 + Json, 380 + Ndjson, 381 + Rss, 382 + } 383 + 384 + impl FromStr for ExportFormat { 385 + type Err = PaiError; 386 + 387 + fn from_str(s: &str) -> Result<Self, Self::Err> { 388 + match s.to_ascii_lowercase().as_str() { 389 + "json" => Ok(Self::Json), 390 + "ndjson" => Ok(Self::Ndjson), 391 + "rss" => Ok(Self::Rss), 392 + other => Err(PaiError::InvalidArgument(format!( 393 + "Unsupported export format '{other}'. Expected json, ndjson, or rss." 394 + ))), 395 + } 396 + } 397 + } 398 + 399 + fn create_output_writer(path: Option<&PathBuf>) -> Result<Box<dyn Write>, PaiError> { 400 + if let Some(path) = path { 401 + if let Some(parent) = path.parent() { 402 + if !parent.as_os_str().is_empty() { 403 + std::fs::create_dir_all(parent)?; 404 + } 405 + } 406 + let file = File::create(path)?; 407 + Ok(Box::new(file)) 408 + } else { 409 + Ok(Box::new(io::stdout())) 410 + } 411 + } 412 + 413 + fn export_items(items: &[Item], format: ExportFormat, writer: &mut dyn Write) -> Result<(), PaiError> { 414 + match format { 415 + ExportFormat::Json => write_json(items, writer)?, 416 + ExportFormat::Ndjson => write_ndjson(items, writer)?, 417 + ExportFormat::Rss => write_rss(items, writer)?, 418 + } 419 + 420 + writer.flush().map_err(PaiError::Io) 421 + } 422 + 423 + fn write_json(items: &[Item], writer: &mut dyn Write) -> Result<(), PaiError> { 424 + serde_json::to_writer_pretty(&mut *writer, items) 425 + .map_err(|e| PaiError::Parse(format!("Failed to serialize JSON export: {e}")))?; 426 + writer.write_all(b"\n").map_err(PaiError::Io) 427 + } 428 + 429 + fn write_ndjson(items: &[Item], writer: &mut dyn Write) -> Result<(), PaiError> { 430 + for item in items { 431 + serde_json::to_writer(&mut *writer, item) 432 + .map_err(|e| PaiError::Parse(format!("Failed to serialize JSON export: {e}")))?; 433 + writer.write_all(b"\n").map_err(PaiError::Io)?; 434 + } 435 + Ok(()) 436 + } 437 + 438 + fn write_rss(items: &[Item], writer: &mut dyn Write) -> Result<(), PaiError> { 439 + let feed = build_rss_feed(items)?; 440 + writer.write_all(feed.as_bytes()).map_err(PaiError::Io)?; 441 + writer.write_all(b"\n").map_err(PaiError::Io) 442 + } 443 + 444 + fn build_rss_feed(items: &[Item]) -> Result<String, PaiError> { 445 + const TITLE: &str = "Personal Activity Index"; 446 + const LINK: &str = "https://personal-activity-index.local/"; 447 + const DESCRIPTION: &str = "Aggregated feed exported by the Personal Activity Index CLI."; 448 + 449 + let mut feed = String::new(); 450 + feed.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 451 + feed.push_str("<rss version=\"2.0\"><channel>"); 452 + feed.push_str(&format!("<title>{TITLE}</title>")); 453 + feed.push_str(&format!("<link>{LINK}</link>")); 454 + feed.push_str(&format!("<description>{DESCRIPTION}</description>")); 455 + 456 + for item in items { 457 + let title = item.title.as_deref().or(item.summary.as_deref()).unwrap_or(&item.url); 458 + let description = item.summary.as_deref().or(item.content_html.as_deref()).unwrap_or(""); 459 + let author = item.author.as_deref().unwrap_or("Unknown"); 460 + 461 + feed.push_str("<item>"); 462 + feed.push_str(&format!("<title>{}</title>", escape_xml(title))); 463 + feed.push_str(&format!("<link>{}</link>", escape_xml(&item.url))); 464 + feed.push_str(&format!("<guid isPermaLink=\"false\">{}</guid>", escape_xml(&item.id))); 465 + feed.push_str(&format!( 466 + "<category>{}</category>", 467 + escape_xml(&item.source_kind.to_string()) 468 + )); 469 + feed.push_str(&format!("<author>{}</author>", escape_xml(author))); 470 + feed.push_str(&format!("<description>{}</description>", escape_xml(description))); 471 + feed.push_str(&format!("<pubDate>{}</pubDate>", format_rss_date(&item.published_at))); 472 + feed.push_str("</item>"); 473 + } 474 + 475 + feed.push_str("</channel></rss>"); 476 + Ok(feed) 477 + } 478 + 479 + fn escape_xml(input: &str) -> String { 480 + input 481 + .replace('&', "&") 482 + .replace('<', "<") 483 + .replace('>', ">") 484 + .replace('\"', """) 485 + .replace('\'', "'") 486 + } 487 + 488 + fn format_rss_date(value: &str) -> String { 489 + if let Ok(dt) = DateTime::parse_from_rfc3339(value) { 490 + dt.to_rfc2822() 491 + } else if let Ok(dt) = DateTime::parse_from_rfc2822(value) { 492 + dt.to_rfc2822() 493 + } else { 494 + value.to_string() 495 + } 496 + } 497 + 498 + fn format_published_display(value: &str) -> String { 499 + if let Ok(dt) = DateTime::parse_from_rfc3339(value) { 500 + dt.with_timezone(&Utc).format("%Y-%m-%d %H:%M").to_string() 501 + } else if let Ok(dt) = DateTime::parse_from_rfc2822(value) { 502 + dt.with_timezone(&Utc).format("%Y-%m-%d %H:%M").to_string() 503 + } else { 504 + value.to_string() 505 + } 506 + } 507 + 508 + fn truncate_for_column(value: &str, max_chars: usize) -> String { 509 + let total_chars = value.chars().count(); 510 + if total_chars <= max_chars { 511 + return value.to_string(); 512 + } 513 + 514 + if max_chars <= 3 { 515 + return value.chars().take(max_chars).collect(); 516 + } 517 + 518 + let mut truncated = String::new(); 519 + for ch in value.chars().take(max_chars - 3) { 520 + truncated.push(ch); 521 + } 522 + truncated.push_str("..."); 523 + truncated 524 + } 525 + 526 + fn render_items_table(items: &[Item]) -> Result<(), PaiError> { 527 + let mut stdout = io::stdout(); 528 + write_items_table(items, &mut stdout).map_err(PaiError::Io) 529 + } 530 + 531 + fn write_items_table<W: Write>(items: &[Item], writer: &mut W) -> io::Result<()> { 532 + const PUBLISHED_WIDTH: usize = 19; 533 + const KIND_WIDTH: usize = 9; 534 + const SOURCE_WIDTH: usize = 24; 535 + const TITLE_WIDTH: usize = 60; 536 + 537 + let header = format!( 538 + "| {published:<pub_width$} | {kind:<kind_width$} | {source:<source_width$} | {title:<title_width$} |", 539 + published = "Published", 540 + kind = "Kind", 541 + source = "Source", 542 + title = "Title", 543 + pub_width = PUBLISHED_WIDTH, 544 + kind_width = KIND_WIDTH, 545 + source_width = SOURCE_WIDTH, 546 + title_width = TITLE_WIDTH, 547 + ); 548 + let separator = "-".repeat(header.len()); 549 + 550 + writeln!(writer, "{separator}")?; 551 + writeln!(writer, "{header}")?; 552 + writeln!(writer, "{}", separator.clone())?; 553 + 554 + for item in items { 555 + let published = truncate_for_column(&format_published_display(&item.published_at), PUBLISHED_WIDTH); 556 + let kind = truncate_for_column(&item.source_kind.to_string(), KIND_WIDTH); 557 + let source = truncate_for_column(&item.source_id, SOURCE_WIDTH); 558 + let title_text = item.title.as_deref().or(item.summary.as_deref()).unwrap_or(&item.url); 559 + let title = truncate_for_column(title_text, TITLE_WIDTH); 560 + 561 + let row = format!( 562 + "| {published:<PUBLISHED_WIDTH$} | {kind:<KIND_WIDTH$} | {source:<SOURCE_WIDTH$} | {title:<TITLE_WIDTH$} |", 563 + ); 564 + writeln!(writer, "{row}")?; 565 + } 566 + 567 + writeln!(writer, "{separator}") 568 + } 569 + 570 + #[cfg(test)] 571 + mod tests { 572 + use super::*; 573 + use chrono::TimeZone; 574 + 575 + fn sample_item() -> Item { 576 + Item { 577 + id: "sample-id".to_string(), 578 + source_kind: SourceKind::Substack, 579 + source_id: "patternmatched.substack.com".to_string(), 580 + author: Some("Pattern Matched".to_string()), 581 + title: Some("Test entry".to_string()), 582 + summary: Some("Summary".to_string()), 583 + url: "https://patternmatched.substack.com/p/test".to_string(), 584 + content_html: None, 585 + published_at: "2024-01-01T00:00:00Z".to_string(), 586 + created_at: "2024-01-01T00:00:00Z".to_string(), 587 + } 588 + } 589 + 590 + #[test] 591 + fn normalize_since_accepts_iso8601() { 592 + let now = Utc.with_ymd_and_hms(2024, 1, 10, 0, 0, 0).unwrap(); 593 + let since = normalize_since_with_now(Some("2024-01-01T00:00:00Z".to_string()), now).unwrap(); 594 + assert_eq!(since.unwrap(), "2024-01-01T00:00:00+00:00"); 595 + } 596 + 597 + #[test] 598 + fn normalize_since_accepts_relative_days() { 599 + let now = Utc.with_ymd_and_hms(2024, 1, 10, 0, 0, 0).unwrap(); 600 + let since = normalize_since_with_now(Some("3d".to_string()), now).unwrap(); 601 + assert_eq!(since.unwrap(), "2024-01-07T00:00:00+00:00"); 602 + } 603 + 604 + #[test] 605 + fn ensure_positive_limit_rejects_zero() { 606 + assert!(ensure_positive_limit(0).is_err()); 607 + assert!(ensure_optional_limit(Some(0)).is_err()); 608 + } 609 + 610 + #[test] 611 + fn export_format_parsing() { 612 + assert!(matches!(ExportFormat::from_str("json").unwrap(), ExportFormat::Json)); 613 + assert!(matches!( 614 + ExportFormat::from_str("NDJSON").unwrap(), 615 + ExportFormat::Ndjson 616 + )); 617 + assert!(matches!(ExportFormat::from_str("rss").unwrap(), ExportFormat::Rss)); 618 + assert!(ExportFormat::from_str("invalid").is_err()); 619 + } 620 + 621 + #[test] 622 + fn json_export_serializes_items() { 623 + let mut buffer = Vec::new(); 624 + export_items(&[sample_item()], ExportFormat::Json, &mut buffer).unwrap(); 625 + let output = String::from_utf8(buffer).unwrap(); 626 + assert!(output.trim_start().starts_with('[')); 627 + assert!(output.contains("sample-id")); 628 + } 629 + 630 + #[test] 631 + fn ndjson_export_serializes_items() { 632 + let mut buffer = Vec::new(); 633 + export_items(&[sample_item()], ExportFormat::Ndjson, &mut buffer).unwrap(); 634 + let output = String::from_utf8(buffer).unwrap(); 635 + assert!(output.lines().next().unwrap().contains("sample-id")); 636 + } 637 + 638 + #[test] 639 + fn rss_export_contains_items() { 640 + let feed = build_rss_feed(&[sample_item()]).unwrap(); 641 + assert!(feed.contains("<rss")); 642 + assert!(feed.contains("<item>")); 643 + assert!(feed.contains("sample-id")); 644 + } 645 + 646 + #[test] 647 + fn table_writer_emits_rows() { 648 + let mut buffer = Vec::new(); 649 + write_items_table(&[sample_item()], &mut buffer).unwrap(); 650 + let output = String::from_utf8(buffer).unwrap(); 651 + assert!(output.contains("Published")); 652 + assert!(output.contains("patternmatched")); 653 + } 654 + 655 + #[test] 656 + fn truncate_column_adds_ellipsis() { 657 + let truncated = truncate_for_column("abcdefghijklmnopqrstuvwxyz", 8); 658 + assert_eq!(truncated, "abcde..."); 659 + } 660 + }

+15 -10

core/src/fetchers/substack.rs

··· 1 1 use crate::{Item, PaiError, Result, SourceFetcher, SourceKind, Storage, SubstackConfig}; 2 2 use chrono::Utc; 3 3 use feed_rs::parser; 4 + use tokio::runtime::Runtime; 4 5 5 6 /// Fetcher for Substack RSS feeds 6 7 /// ··· 37 38 38 39 /// Extracts the source ID from the base URL (e.g., "patternmatched.substack.com") 39 40 fn extract_source_id(&self) -> String { 40 - self.config 41 - .base_url 41 + Self::normalize_source_id(&self.config.base_url) 42 + } 43 + 44 + pub(crate) fn normalize_source_id(base_url: &str) -> String { 45 + base_url 42 46 .trim_start_matches("https://") 43 47 .trim_start_matches("http://") 44 48 .trim_end_matches('/') ··· 48 52 49 53 impl SourceFetcher for SubstackFetcher { 50 54 fn sync(&self, storage: &dyn Storage) -> Result<()> { 51 - let runtime = 52 - tokio::runtime::Runtime::new().map_err(|e| PaiError::Fetch(format!("Failed to create runtime: {e}")))?; 55 + let runtime = Runtime::new().map_err(|e| PaiError::Fetch(format!("Failed to create runtime: {e}")))?; 53 56 54 57 runtime.block_on(async { 55 58 let feed = self.fetch_feed().await?; ··· 131 134 132 135 #[test] 133 136 fn extract_source_id_https() { 134 - let config = SubstackConfig { enabled: true, base_url: "https://patternmatched.substack.com".to_string() }; 135 - let fetcher = SubstackFetcher::new(config); 136 - assert_eq!(fetcher.extract_source_id(), "patternmatched.substack.com"); 137 + assert_eq!( 138 + SubstackFetcher::normalize_source_id("https://patternmatched.substack.com"), 139 + "patternmatched.substack.com" 140 + ); 137 141 } 138 142 139 143 #[test] 140 144 fn extract_source_id_http() { 141 - let config = SubstackConfig { enabled: true, base_url: "http://test.substack.com/".to_string() }; 142 - let fetcher = SubstackFetcher::new(config); 143 - assert_eq!(fetcher.extract_source_id(), "test.substack.com"); 145 + assert_eq!( 146 + SubstackFetcher::normalize_source_id("http://test.substack.com/"), 147 + "test.substack.com" 148 + ); 144 149 } 145 150 146 151 #[test]

+4 -1

core/src/lib.rs

··· 13 13 #[error("Unknown source kind: {0}")] 14 14 UnknownSourceKind(String), 15 15 16 + #[error("Invalid argument: {0}")] 17 + InvalidArgument(String), 18 + 16 19 #[error("Storage error: {0}")] 17 20 Storage(String), 18 21 ··· 64 67 } 65 68 66 69 /// Represents a single content item from any source 67 - #[derive(Debug, Clone)] 70 + #[derive(Debug, Clone, Serialize, Deserialize)] 68 71 pub struct Item { 69 72 /// Unique identifier for the item 70 73 pub id: String,

Configure Feed

Configure Feed