personal memory agent

Polish import content review: markdown rendering, enriched meta, complete fixtures

- Render markdown content with marked.js instead of raw <pre> tags
- Add sanitizeMarkdown() to strip dangerous HTML from user-imported content
- Add CSS for .import-content-markdown with constrained heading/list styling
- Strip markdown syntax from preview text in frontend (stripMarkdown helper)
- Enrich ICS importer manifest meta: time_range, attendee_names, recurrence
- Generate plain-text preview from structured data when description is empty
- Add event-specific card display: time range, attendee names, location
- Display tags for Obsidian notes in card meta
- Fix Gemini importer missing message_count in manifest meta
- Strip markdown from Obsidian preview text in importer
- Complete fixture chains for all 6 source types:
- Add import.json + imported.json + content_manifest.jsonl for
Kindle, Obsidian, Claude, and Gemini
- Update ICS manifest with enriched meta fields
- ChatGPT fixtures verified (already correct)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+238 -13
+42 -3
apps/import/_detail.html
··· 66 66 .import-content-item-body { display: none; padding: 0.75em 1em; border-top: 1px solid #e5e5e5; background: #fafafa; } 67 67 .import-content-item.expanded .import-content-item-body { display: block; } 68 68 .import-content-item-body pre { white-space: pre-wrap; font-size: 0.9em; margin: 0; } 69 + .import-content-markdown { font-size: 0.95em; line-height: 1.6; } 70 + .import-content-markdown h2 { font-size: 1.1em; margin: 0 0 0.5em 0; } 71 + .import-content-markdown h3 { font-size: 1em; margin: 0.75em 0 0.25em 0; } 72 + .import-content-markdown ul, .import-content-markdown ol { margin: 0.25em 0; padding-left: 1.5em; } 73 + .import-content-markdown li { margin: 0.15em 0; } 74 + .import-content-markdown blockquote { border-left: 3px solid #ddd; margin: 0.5em 0; padding: 0.25em 0.75em; color: #555; } 75 + .import-content-markdown p { margin: 0.4em 0; } 76 + .import-content-markdown hr { border: none; border-top: 1px solid #e5e5e5; margin: 1em 0; } 77 + .import-content-markdown code { background: #f0f0f0; padding: 1px 4px; border-radius: 3px; font-size: 0.9em; } 78 + .import-content-markdown pre code { background: none; padding: 0; } 69 79 .import-content-item-body .import-content-message { margin: 0.5em 0; } 70 80 .import-content-item-body .import-content-speaker { font-weight: 600; margin-right: 0.5em; } 71 81 .import-content-item-body .import-content-speaker.human { color: #007bff; } ··· 145 155 146 156 </div> 147 157 158 + <script src="{{ vendor_lib('marked') }}"></script> 148 159 <script> 160 + // Configure marked for safe rendering of imported content 161 + marked.setOptions({ breaks: true, gfm: true }); 162 + const markedRenderer = new marked.Renderer(); 163 + // Sanitize: strip raw HTML tags from markdown input to prevent XSS from user-imported content 164 + function sanitizeMarkdown(md) { 165 + return md.replace(/<(script|iframe|object|embed|form|input|textarea|button|select|style|link|meta)[^>]*>[\s\S]*?<\/\1>/gi, '') 166 + .replace(/<(script|iframe|object|embed|form|input|textarea|button|select|style|link|meta)[^>]*\/?>/gi, '') 167 + .replace(/\bon\w+\s*=\s*["'][^"']*["']/gi, '') 168 + .replace(/javascript\s*:/gi, 'blocked:'); 169 + } 170 + 149 171 // Tab switching 150 172 const tabs = document.querySelectorAll('.tab'); 151 173 const contents = document.querySelectorAll('.tab-content'); ··· 256 278 <div class="import-content-item-meta"> 257 279 ${item.meta?.message_count ? `${item.meta.message_count} messages` : ''} 258 280 ${item.meta?.highlight_count ? `${item.meta.highlight_count} highlights` : ''} 259 - ${item.meta?.duration_minutes ? `${item.meta.duration_minutes} min` : ''} 281 + ${item.meta?.time_range ? `${escapeContentHtml(item.meta.time_range)}` : (item.meta?.duration_minutes ? `${item.meta.duration_minutes} min` : '')} 282 + ${item.meta?.attendee_names?.length ? ` · ${item.meta.attendee_names.map(n => escapeContentHtml(n)).join(', ')}` : (item.meta?.attendee_count ? ` · ${item.meta.attendee_count} attendees` : '')} 283 + ${item.meta?.location ? ` · ${escapeContentHtml(item.meta.location)}` : ''} 260 284 ${item.meta?.author ? `· ${escapeContentHtml(item.meta.author)}` : ''} 285 + ${item.meta?.tags?.length ? `· ${item.meta.tags.map(t => escapeContentHtml(t)).join(', ')}` : ''} 261 286 ${calLink ? ` · <a href="${calLink}" class="import-content-cal-link" onclick="event.stopPropagation();">View in calendar →</a>` : ''} 262 287 </div> 263 - ${item.preview ? `<div class="import-content-item-preview">${escapeContentHtml(item.preview)}</div>` : ''} 288 + ${item.preview ? `<div class="import-content-item-preview">${escapeContentHtml(stripMarkdown(item.preview))}</div>` : ''} 264 289 <div class="import-content-item-body"><div class="no-data">Loading...</div></div> 265 290 </div>`; 266 291 }); ··· 299 324 300 325 return data.content.map(part => { 301 326 if (part.type === 'markdown') { 302 - return `<pre>${escapeContentHtml(part.content)}</pre>`; 327 + return `<div class="import-content-markdown">${marked.parse(sanitizeMarkdown(part.content))}</div>`; 303 328 } 304 329 const speaker = part.speaker || 'Unknown'; 305 330 const cls = speaker === 'Human' ? 'human' : 'assistant'; ··· 331 356 .replace(/</g, '&lt;') 332 357 .replace(/>/g, '&gt;') 333 358 .replace(/"/g, '&quot;'); 359 + } 360 + 361 + function stripMarkdown(str) { 362 + if (!str) return ''; 363 + return str 364 + .replace(/^#{1,6}\s+/gm, '') 365 + .replace(/\*\*([^*]+)\*\*/g, '$1') 366 + .replace(/\*([^*]+)\*/g, '$1') 367 + .replace(/^[-*+]\s+/gm, '') 368 + .replace(/^\d+\.\s+/gm, '') 369 + .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') 370 + .replace(/`([^`]+)`/g, '$1') 371 + .replace(/^>\s+/gm, '') 372 + .trim(); 334 373 } 335 374 336 375 document.getElementById('contentSearch').addEventListener('input', function() {
+5 -5
tests/fixtures/journal/imports/20260101_090000/content_manifest.jsonl
··· 1 - {"id": "event-0", "title": "Weekly Engineering Standup", "date": "20260101", "type": "event", "preview": "- **When:** Thursday, January 1, 2026 09:00–09:30 MST", "meta": {}, "segments": [{"day": "20260101", "key": "090000_300"}]} 2 - {"id": "event-1", "title": "1:1 with David Park (Betaworks)", "date": "20260101", "type": "event", "preview": "- **When:** Thursday, January 1, 2026 10:00–10:45 MST", "meta": {}, "segments": [{"day": "20260101", "key": "090000_300"}]} 3 - {"id": "event-2", "title": "Product Review — Solstone v0.4 Roadmap", "date": "20260101", "type": "event", "preview": "- **When:** Thursday, January 1, 2026 11:00–12:00 MST", "meta": {}, "segments": [{"day": "20260101", "key": "090000_300"}]} 4 - {"id": "event-3", "title": "Lunch with Sarah Chen", "date": "20260101", "type": "event", "preview": "- **When:** Thursday, January 1, 2026 12:00–13:00 MST", "meta": {}, "segments": [{"day": "20260101", "key": "093000_300"}]} 5 - {"id": "event-4", "title": "Demo Prep — Betaworks Studio Day", "date": "20260101", "type": "event", "preview": "- **When:** Thursday, January 1, 2026 14:00–15:00 MST", "meta": {}, "segments": [{"day": "20260101", "key": "093000_300"}]} 1 + {"id": "event-0", "title": "Weekly Engineering Standup", "date": "20260101", "type": "event", "preview": "9:00 AM–9:30 AM · Google Meet · jer, maya, erik, carlos · Weekly on Thu", "meta": {"time_range": "9:00 AM–9:30 AM", "location": "Google Meet (meet.google.com/xwr-knvq-paj)", "duration_minutes": 30, "attendee_count": 4, "attendee_names": ["jer", "maya", "erik", "carlos"], "recurrence": "Weekly on Thu"}, "segments": [{"day": "20260101", "key": "090000_300"}]} 2 + {"id": "event-1", "title": "1:1 with David Park (Betaworks)", "date": "20260101", "type": "event", "preview": "Quarterly check-in. David wants to discuss follow-on timing and intro to a potential design partner in the media space.", "meta": {"time_range": "10:00 AM–10:45 AM", "location": "Zoom (us06web.zoom.us/j/84729163055)", "duration_minutes": 45, "attendee_count": 2, "attendee_names": ["david@betaworks.com", "jer@solpbc.org"]}, "segments": [{"day": "20260101", "key": "090000_300"}]} 3 + {"id": "event-2", "title": "Product Review — Solstone v0.4 Roadmap", "date": "20260101", "type": "event", "preview": "Walk through v0.4 milestone plan. Key topics: journal segment format finalization, import pipeline reliability, and facet extraction accuracy targets.", "meta": {"time_range": "11:00 AM–12:00 PM", "location": "Google Meet (meet.google.com/bmf-tsyh-rea)", "duration_minutes": 60, "attendee_count": 4, "attendee_names": ["jer", "maya", "erik", "lin"]}, "segments": [{"day": "20260101", "key": "090000_300"}]} 4 + {"id": "event-3", "title": "Lunch with Sarah Chen", "date": "20260101", "type": "event", "preview": "Catch up with Sarah on outstanding items. Review updated operating agreement language around benefit purpose reporting obligations.", "meta": {"time_range": "12:00 PM–1:00 PM", "location": "The Kitchen, 1530 16th St, Denver, CO 80202", "duration_minutes": 60, "attendee_count": 2, "attendee_names": ["jer@solpbc.org", "sarah.chen@whitfield-law.com"]}, "segments": [{"day": "20260101", "key": "093000_300"}]} 5 + {"id": "event-4", "title": "Demo Prep — Betaworks Studio Day", "date": "20260101", "type": "event", "preview": "Dry run for the Betaworks Studio Day demo on Jan 15.", "meta": {"time_range": "2:00 PM–3:00 PM", "location": "Google Meet (meet.google.com/qjz-wmvx-dck)", "duration_minutes": 60, "attendee_count": 3, "attendee_names": ["jer", "maya", "erik"]}, "segments": [{"day": "20260101", "key": "093000_300"}]}
+1
tests/fixtures/journal/imports/20260101_110000/content_manifest.jsonl
··· 1 + {"id": "activity-0", "title": "Kubernetes vs Nomad for small teams", "date": "20260101", "type": "conversation", "preview": "We're a small team at Meridian Labs, about six engineers, and we're still running docker-compose in production. It's starting to fall apart as we add more services.", "meta": {"message_count": 8}, "segments": [{"day": "20260101", "key": "110000_300"}, {"day": "20260101", "key": "110500_300"}]}
+11
tests/fixtures/journal/imports/20260101_110000/import.json
··· 1 + { 2 + "original_filename": "My Activity.zip", 3 + "upload_timestamp": 1767265200000, 4 + "upload_datetime": "2026-01-01T11:00:00", 5 + "user_timestamp": "20260101_110000", 6 + "file_size": 52340, 7 + "mime_type": "application/zip", 8 + "facet": "work", 9 + "setting": "ai_conversation", 10 + "file_path": "imports/20260101_110000/My Activity.zip" 11 + }
+22
tests/fixtures/journal/imports/20260101_110000/imported.json
··· 1 + { 2 + "processed_timestamp": "20260101_110000", 3 + "processing_completed": "2026-01-01T11:03:00", 4 + "total_files_created": 2, 5 + "all_created_files": [ 6 + "20260101/import.gemini/110000_300/conversation_transcript.jsonl", 7 + "20260101/import.gemini/110500_300/conversation_transcript.jsonl" 8 + ], 9 + "segments": [ 10 + "110000_300", 11 + "110500_300" 12 + ], 13 + "source_type": "gemini", 14 + "source_display": "Gemini", 15 + "entries_written": 8, 16 + "entities_seeded": 0, 17 + "date_range": [ 18 + "20260101", 19 + "20260101" 20 + ], 21 + "target_day": "20260101" 22 + }
+1
tests/fixtures/journal/imports/20260101_140000/content_manifest.jsonl
··· 1 + {"id": "conv-0", "title": "Event sourcing trade-offs for Solstone", "date": "20260101", "type": "conversation", "preview": "I'm evaluating whether to store domain events in Postgres directly or use a dedicated event store like EventStoreDB. What are the main trade-offs?", "meta": {"message_count": 15}, "segments": [{"day": "20260101", "key": "140000_300"}, {"day": "20260101", "key": "140500_300"}, {"day": "20260101", "key": "141000_300"}]}
+11
tests/fixtures/journal/imports/20260101_140000/import.json
··· 1 + { 2 + "original_filename": "claude-conversations.json", 3 + "upload_timestamp": 1767276000000, 4 + "upload_datetime": "2026-01-01T14:00:00", 5 + "user_timestamp": "20260101_140000", 6 + "file_size": 67890, 7 + "mime_type": "application/json", 8 + "facet": "work", 9 + "setting": "ai_conversation", 10 + "file_path": "imports/20260101_140000/claude-conversations.json" 11 + }
+24
tests/fixtures/journal/imports/20260101_140000/imported.json
··· 1 + { 2 + "processed_timestamp": "20260101_140000", 3 + "processing_completed": "2026-01-01T14:04:00", 4 + "total_files_created": 3, 5 + "all_created_files": [ 6 + "20260101/import.claude/140000_300/conversation_transcript.jsonl", 7 + "20260101/import.claude/140500_300/conversation_transcript.jsonl", 8 + "20260101/import.claude/141000_300/conversation_transcript.jsonl" 9 + ], 10 + "segments": [ 11 + "140000_300", 12 + "140500_300", 13 + "141000_300" 14 + ], 15 + "source_type": "claude", 16 + "source_display": "Claude Chat", 17 + "entries_written": 15, 18 + "entities_seeded": 0, 19 + "date_range": [ 20 + "20260101", 21 + "20260101" 22 + ], 23 + "target_day": "20260101" 24 + }
+4
tests/fixtures/journal/imports/20260101_160000/content_manifest.jsonl
··· 1 + {"id": "note-0", "title": "Trust Architecture for Decentralized Identity", "date": "20260101", "type": "note", "preview": "Sketching out the trust layer for Solstone. The core idea is that identity verification shouldn't depend on a single authority — it needs to be distributed and composable.", "meta": {"tags": ["trust", "identity", "solstone", "architecture"]}, "segments": [{"day": "20260101", "key": "160000_300"}]} 2 + {"id": "note-1", "title": "Book Rec from Nadia", "date": "20260101", "type": "note", "preview": "Nadia recommended Governing the Commons by Elinor Ostrom over coffee today. Relevant to what we're building with Solstone.", "meta": {"tags": ["reading", "governance", "commons"]}, "segments": [{"day": "20260101", "key": "160000_300"}]} 3 + {"id": "note-2", "title": "Betaworks Call Prep", "date": "20260101", "type": "note", "preview": "Meeting with David Park from Betaworks on Thursday. Need to cover key points about composable trust signals, the protocol layer, and identity providers.", "meta": {"tags": ["meetings", "betaworks", "partnerships"]}, "segments": [{"day": "20260101", "key": "160500_300"}]} 4 + {"id": "note-3", "title": "Stream Processing Patterns", "date": "20260101", "type": "note", "preview": "Documenting the patterns we keep reaching for. These are emerging from actual implementation, not theoretical.", "meta": {"tags": ["engineering", "streams", "architecture", "solstone"]}, "segments": [{"day": "20260101", "key": "160500_300"}]}
+11
tests/fixtures/journal/imports/20260101_160000/import.json
··· 1 + { 2 + "original_filename": "vault-export.zip", 3 + "upload_timestamp": 1767283200000, 4 + "upload_datetime": "2026-01-01T16:00:00", 5 + "user_timestamp": "20260101_160000", 6 + "file_size": 89012, 7 + "mime_type": "application/zip", 8 + "facet": "work", 9 + "setting": "obsidian", 10 + "file_path": "imports/20260101_160000/vault-export.zip" 11 + }
+22
tests/fixtures/journal/imports/20260101_160000/imported.json
··· 1 + { 2 + "processed_timestamp": "20260101_160000", 3 + "processing_completed": "2026-01-01T16:03:00", 4 + "total_files_created": 2, 5 + "all_created_files": [ 6 + "20260101/import.obsidian/160000_300/note_transcript.md", 7 + "20260101/import.obsidian/160500_300/note_transcript.md" 8 + ], 9 + "segments": [ 10 + "160000_300", 11 + "160500_300" 12 + ], 13 + "source_type": "obsidian", 14 + "source_display": "Obsidian / Logseq Vault", 15 + "entries_written": 4, 16 + "entities_seeded": 7, 17 + "date_range": [ 18 + "20260101", 19 + "20260101" 20 + ], 21 + "target_day": "20260101" 22 + }
+2
tests/fixtures/journal/imports/20260101_200000/content_manifest.jsonl
··· 1 + {"id": "book-0", "title": "The Art of Doing Science and Engineering by Richard Hamming", "date": "20260101", "type": "highlight_group", "preview": "Most people spend their careers working on problems that don't really matter. They choose safe, incremental topics where they can guarantee results rather than risking failure on something significant.", "meta": {"author": "Richard Hamming", "highlight_count": 5}, "segments": [{"day": "20260101", "key": "200000_300"}]} 2 + {"id": "book-1", "title": "Governing the Commons by Elinor Ostrom", "date": "20260101", "type": "highlight_group", "preview": "Communities that have successfully managed shared resources over long periods almost never did so by following the prescriptions of outside experts.", "meta": {"author": "Elinor Ostrom", "highlight_count": 4}, "segments": [{"day": "20260101", "key": "200500_300"}]}
+11
tests/fixtures/journal/imports/20260101_200000/import.json
··· 1 + { 2 + "original_filename": "My Clippings.txt", 3 + "upload_timestamp": 1767297600000, 4 + "upload_datetime": "2026-01-01T20:00:00", 5 + "user_timestamp": "20260101_200000", 6 + "file_size": 34521, 7 + "mime_type": "text/plain", 8 + "facet": "reading", 9 + "setting": "kindle", 10 + "file_path": "imports/20260101_200000/My Clippings.txt" 11 + }
+22
tests/fixtures/journal/imports/20260101_200000/imported.json
··· 1 + { 2 + "processed_timestamp": "20260101_200000", 3 + "processing_completed": "2026-01-01T20:02:00", 4 + "total_files_created": 2, 5 + "all_created_files": [ 6 + "20260101/import.kindle/200000_300/highlights_transcript.md", 7 + "20260101/import.kindle/200500_300/highlights_transcript.md" 8 + ], 9 + "segments": [ 10 + "200000_300", 11 + "200500_300" 12 + ], 13 + "source_type": "kindle", 14 + "source_display": "Kindle Highlights", 15 + "entries_written": 9, 16 + "entities_seeded": 4, 17 + "date_range": [ 18 + "20260101", 19 + "20260101" 20 + ], 21 + "target_day": "20260101" 22 + }
+2 -1
think/importers/gemini.py
··· 257 257 "title": prompt[:80] or f"Activity {valid_idx + 1}", 258 258 "first_ts": activity_messages[0]["create_time"], 259 259 "preview": prompt, 260 + "message_count": len(activity_messages), 260 261 } 261 262 valid_idx += 1 262 263 ··· 351 352 "date": first_dt.strftime("%Y%m%d"), 352 353 "type": "conversation", 353 354 "preview": meta["preview"], 354 - "meta": {}, 355 + "meta": {"message_count": meta.get("message_count", 0)}, 355 356 "segments": [ 356 357 {"day": day, "key": key} 357 358 for day, key in sorted(
+31 -1
think/importers/ics.py
··· 477 477 entry["create_ts"], tz=dt.timezone.utc 478 478 ) 479 479 meta: dict[str, Any] = {} 480 + if entry.get("ts") and entry.get("end_ts"): 481 + try: 482 + start_dt = dt.datetime.fromisoformat(entry["ts"]) 483 + end_dt = dt.datetime.fromisoformat(entry["end_ts"]) 484 + meta["time_range"] = ( 485 + f"{start_dt.strftime('%I:%M %p').lstrip('0')}" 486 + f"–{end_dt.strftime('%I:%M %p').lstrip('0')}" 487 + ) 488 + except ValueError: 489 + pass 480 490 if entry.get("location"): 481 491 meta["location"] = entry["location"] 482 492 if entry.get("duration_minutes") is not None: 483 493 meta["duration_minutes"] = entry["duration_minutes"] 484 494 if entry.get("attendees"): 485 495 meta["attendee_count"] = len(entry["attendees"]) 496 + meta["attendee_names"] = [ 497 + a.get("name") or a.get("email", "") 498 + for a in entry["attendees"][:5] 499 + ] 500 + if entry.get("recurrence"): 501 + meta["recurrence"] = entry["recurrence"] 502 + 503 + # Build plain-text preview from structured data if description is empty 504 + preview = entry.get("content", "").strip() 505 + if not preview: 506 + parts: list[str] = [] 507 + if meta.get("time_range"): 508 + parts.append(meta["time_range"]) 509 + if entry.get("location"): 510 + parts.append(entry["location"]) 511 + if meta.get("attendee_names"): 512 + parts.append(", ".join(meta["attendee_names"])) 513 + if entry.get("recurrence"): 514 + parts.append(entry["recurrence"]) 515 + preview = " · ".join(parts) 486 516 manifest_entries.append( 487 517 { 488 518 "id": f"event-{i}", 489 519 "title": entry.get("title", "Untitled event"), 490 520 "date": create_dt.strftime("%Y%m%d"), 491 521 "type": "event", 492 - "preview": entry.get("content", "")[:200], 522 + "preview": preview[:200], 493 523 "meta": meta, 494 524 "segments": [], 495 525 }
+16 -3
think/importers/obsidian.py
··· 316 316 meta["tags"] = note["tags"] 317 317 if note.get("is_daily"): 318 318 meta["is_daily"] = True 319 + raw_preview = _strip_frontmatter(note.get("content", "")).strip()[:300] 320 + # Strip markdown syntax for clean plain-text preview 321 + clean_preview = re.sub(r"^#{1,6}\s+", "", raw_preview, flags=re.MULTILINE) 322 + clean_preview = re.sub(r"\*\*([^*]+)\*\*", r"\1", clean_preview) 323 + clean_preview = re.sub(r"\*([^*]+)\*", r"\1", clean_preview) 324 + clean_preview = re.sub(r"^[-*+]\s+", "", clean_preview, flags=re.MULTILINE) 325 + clean_preview = re.sub( 326 + r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]", 327 + lambda m: m.group(2) or m.group(1), 328 + clean_preview, 329 + ) 330 + clean_preview = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", clean_preview) 331 + clean_preview = re.sub(r"`([^`]+)`", r"\1", clean_preview) 332 + clean_preview = re.sub(r"^>\s+", "", clean_preview, flags=re.MULTILINE) 333 + clean_preview = " ".join(clean_preview.split())[:200] 319 334 note_manifest.append( 320 335 { 321 336 "id": f"note-{i}", 322 337 "title": note["title"], 323 338 "date": note_dt.strftime("%Y%m%d"), 324 339 "type": "note", 325 - "preview": _strip_frontmatter(note.get("content", "")).strip()[ 326 - :200 327 - ], 340 + "preview": clean_preview, 328 341 "meta": meta, 329 342 "segments": [], 330 343 }