Fetch bookmarks from Karakeep, download videos via yt-dlp, compile to single MP4, generate report. Filters videos >3min.
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

first commit

Alexandre Girard 29c3530e

+266
+4
.env.example
··· 1 + # Karakeep API Configuration 2 + KARAKEEP_BASE_URL=http://your-karakeep-instance:3080 3 + KARAKEEP_LIST_ID=your_list_id_here 4 + KARAKEEP_API_KEY=your_api_key_here
+31
.gitignore
··· 1 + # Environment variables 2 + CLAUDE.md 3 + .env 4 + 5 + # Python 6 + __pycache__/ 7 + *.py[cod] 8 + *$py.class 9 + *.so 10 + .Python 11 + .venv/ 12 + venv/ 13 + ENV/ 14 + env/ 15 + 16 + # Application outputs 17 + downloads/ 18 + compilation/ 19 + karakeep_response.json 20 + filelist.txt 21 + 22 + # IDE 23 + .vscode/ 24 + .idea/ 25 + *.swp 26 + *.swo 27 + *~ 28 + 29 + # OS 30 + .DS_Store 31 + Thumbs.db
+34
README.md
··· 1 + # Quick Compil 2 + 3 + Fetch bookmarks from Karakeep, download videos via yt-dlp, compile to single MP4, generate report. Filters videos >3min. 4 + 5 + ## Prerequisites 6 + 7 + - Python 3.7+: `pip install requests python-dotenv jinja2` 8 + - yt-dlp: `pip install yt-dlp` 9 + - FFmpeg: `sudo apt install ffmpeg` 10 + - Karakeep instance with API access 11 + 12 + ## Setup 13 + 14 + ```bash 15 + cp .env.example .env 16 + ``` 17 + 18 + Edit `.env`: 19 + ```env 20 + KARAKEEP_BASE_URL=http://localhost:3080 21 + KARAKEEP_LIST_ID=your_list_id 22 + KARAKEEP_API_KEY=ak2_xxxxx 23 + ``` 24 + 25 + ## Usage 26 + 27 + ```bash 28 + python main.py 29 + python main.py --start-date 2025-09-23 --end-date 2025-09-30 30 + ``` 31 + 32 + ## License 33 + 34 + MIT
+173
main.py
··· 1 + #!/usr/bin/env python3 2 + """ 3 + MIT License 4 + 5 + Copyright (c) 2025 6 + 7 + Permission is hereby granted, free of charge, to any person obtaining a copy 8 + of this software and associated documentation files (the "Software"), to deal 9 + in the Software without restriction, including without limitation the rights 10 + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 + copies of the Software, and to permit persons to whom the Software is 12 + furnished to do so, subject to the following conditions: 13 + 14 + The above copyright notice and this permission notice shall be included in all 15 + copies or substantial portions of the Software. 16 + 17 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 + SOFTWARE. 24 + 25 + --- 26 + 27 + INDEX: 28 + 1. Load environment variables ...................... Line 56 29 + 2. Parse command line arguments .................... Line 59 30 + 3. Format dates .................................... Line 73 31 + 4. Fetch bookmarks from Karakeep API ............... Line 78 32 + 5. Save raw response ............................... Line 84 33 + 6. Extract bookmarks ............................... Line 87 34 + 7. Filter bookmarks by date range .................. Line 90 35 + 8. Extract URLs from filtered bookmarks ............ Line 96 36 + 9. Create downloads directory ...................... Line 106 37 + 10. Download videos using yt-dlp .................... Line 109 38 + 11. Filter videos by duration (max 3 min) ........... Line 118 39 + 12. Create file list for ffmpeg ..................... Line 137 40 + 13. Create compilation directory .................... Line 140 41 + 14. Compile videos using ffmpeg ..................... Line 143 42 + 15. Generate bookmark report ........................ Line 163 43 + """ 44 + 45 + import os 46 + import re 47 + import subprocess 48 + import requests 49 + import argparse 50 + from datetime import datetime, timedelta 51 + from pathlib import Path 52 + from dotenv import load_dotenv 53 + from jinja2 import Template 54 + 55 + # Load environment variables 56 + load_dotenv() 57 + 58 + # Parse command line arguments 59 + parser = argparse.ArgumentParser() 60 + parser.add_argument( 61 + '--start-date', 62 + type=str, 63 + default=(datetime.now() - timedelta(days=7)).isoformat() 64 + ) 65 + parser.add_argument( 66 + '--end-date', 67 + type=str, 68 + default=datetime.now().isoformat() 69 + ) 70 + args = parser.parse_args() 71 + 72 + # Format dates 73 + formatted_end_date = datetime.fromisoformat(args.end_date).strftime('%Y_%m_%d') 74 + start_ts = datetime.fromisoformat(args.start_date).timestamp() * 1000 75 + end_ts = datetime.fromisoformat(args.end_date).timestamp() * 1000 76 + 77 + # Fetch bookmarks from Karakeep API 78 + response = requests.get( 79 + f"{os.getenv('KARAKEEP_BASE_URL')}/api/v1/lists/{os.getenv('KARAKEEP_LIST_ID')}/bookmarks", 80 + headers={'Authorization': f"Bearer {os.getenv('KARAKEEP_API_KEY')}"} 81 + ).json() 82 + 83 + # Save raw response 84 + Path('karakeep_response.json').write_text(__import__('json').dumps(response, indent=2)) 85 + 86 + # Extract bookmarks 87 + bookmarks = response.get('bookmarks', []) 88 + 89 + # Filter bookmarks by date range 90 + filtered_bookmarks = [ 91 + b for b in bookmarks 92 + if start_ts <= datetime.fromisoformat(b['createdAt'].replace('Z', '+00:00')).timestamp() * 1000 <= end_ts 93 + ] 94 + 95 + # Extract URLs from filtered bookmarks 96 + urls = [ 97 + u for b in bookmarks 98 + if start_ts <= datetime.fromisoformat(b['createdAt'].replace('Z', '+00:00')).timestamp() * 1000 <= end_ts 99 + for u in re.findall( 100 + r'https?://\S+', 101 + b.get('content', {}).get('url', '') + ' ' + (b.get('title') or '') 102 + ) 103 + ] 104 + 105 + # Create downloads directory 106 + Path(f'downloads/{formatted_end_date}').mkdir(parents=True, exist_ok=True) 107 + 108 + # Download videos using yt-dlp 109 + for url in urls: 110 + subprocess.run([ 111 + 'yt-dlp', 112 + '--cookies-from-browser', 'firefox', 113 + '-o', f'downloads/{formatted_end_date}/%(id)s.%(ext)s', 114 + url 115 + ]) 116 + 117 + # Get all downloaded files and filter by duration (max 3 minutes) 118 + all_files = sorted(Path(f'downloads/{formatted_end_date}').glob('*')) 119 + files = [] 120 + for f in all_files: 121 + result = subprocess.run([ 122 + 'ffprobe', 123 + '-v', 'error', 124 + '-show_entries', 'format=duration', 125 + '-of', 'default=noprint_wrappers=1:nokey=1', 126 + str(f) 127 + ], capture_output=True, text=True) 128 + try: 129 + duration = float(result.stdout.strip()) 130 + if duration <= 180: # 3 minutes = 180 seconds 131 + files.append(f) 132 + else: 133 + f.unlink() # Delete videos longer than 3 minutes 134 + except (ValueError, AttributeError): 135 + files.append(f) # Keep if duration can't be determined 136 + 137 + # Create file list for ffmpeg 138 + Path('filelist.txt').write_text('\n'.join( 139 + f"file '{str(f.resolve()).replace(chr(39), chr(39)+chr(92)+chr(39)+chr(39))}'" 140 + for f in files 141 + )) 142 + 143 + # Create compilation directory 144 + Path('compilation').mkdir(exist_ok=True) 145 + 146 + # Compile videos using ffmpeg 147 + subprocess.run([ 148 + 'ffmpeg', 149 + '-f', 'concat', 150 + '-safe', '0', 151 + '-i', 'filelist.txt', 152 + '-c:v', 'libx264', 153 + '-preset', 'fast', 154 + '-crf', '23', 155 + '-c:a', 'aac', 156 + '-b:a', '128k', 157 + '-r', '30', 158 + '-g', '30', 159 + '-avoid_negative_ts', 'make_zero', 160 + '-fflags', '+genpts', 161 + '-movflags', '+faststart', 162 + '-y', 163 + f'compilation/{formatted_end_date}.mp4' 164 + ]) 165 + 166 + # Generate bookmark report 167 + template = Template(Path('templates/bookmark_report.md.j2').read_text()) 168 + report = template.render( 169 + start_date=args.start_date, 170 + end_date=args.end_date, 171 + bookmarks=filtered_bookmarks 172 + ) 173 + Path(f'compilation/{formatted_end_date}.md').write_text(report)
+4
requirements.txt
··· 1 + requests 2 + python-dotenv 3 + jinja2 4 + yt-dlp
+20
templates/bookmark_report.md.j2
··· 1 + # Bookmark Report 2 + **Period:** {{ start_date }} to {{ end_date }} 3 + **Total Bookmarks:** {{ bookmarks|length }} 4 + 5 + --- 6 + 7 + {% for bookmark in bookmarks %} 8 + ## {{ loop.index }}. {{ bookmark.title or 'Untitled' }} 9 + 10 + **URL:** {{ bookmark.content.url }} 11 + **Created:** {{ bookmark.createdAt }} 12 + {% if bookmark.content.author %}**Author:** {{ bookmark.content.author }} {% endif %} 13 + {% if bookmark.content.publisher %}**Publisher:** {{ bookmark.content.publisher }} {% endif %} 14 + {% if bookmark.content.description %}**Description:** {{ bookmark.content.description }} {% endif %} 15 + {% if bookmark.tags %}**Tags:** {{ bookmark.tags|map(attribute='name')|join(', ') }} {% endif %} 16 + {% if bookmark.note %}**Note:** {{ bookmark.note }} {% endif %} 17 + 18 + --- 19 + 20 + {% endfor %}