A from-scratch atproto PDS implementation in Python (mirrors https://github.com/DavidBuchanan314/millipds)

wip account creation

+3
millipds_dev.dockerfile
··· 20 20 # init the db with dev presets 21 21 RUN python3 -m millipds init millipds.test --dev 22 22 23 + # create a test user 24 + RUN python3 -m millipds account create bob.test did:web:bob.test --unsafe_password=hunter2 25 + 23 26 # do the thing 24 27 CMD python3 -m millipds run --listen_host=0.0.0.0 --listen_port=8123 25 28
+41 -2
src/millipds/__main__.py
··· 3 3 Usage: 4 4 millipds init <hostname> [--dev|--sandbox] 5 5 millipds config [--pds_pfx=URL] [--pds_did=DID] [--bsky_appview_pfx=URL] [--bsky_appview_did=DID] 6 + millipds account create <did> <handle> [--unsafe_password=PW] 6 7 millipds run [--sock_path=PATH] [--listen_host=HOST] [--listen_port=PORT] 7 8 millipds (-h | --help) 8 9 millipds --version ··· 19 20 Any options not specified will be left at their previous values. Once changes 20 21 have been made (or even if they haven't), the new config will be printed. 21 22 23 + Do not change the config while the PDS is running (TODO: enforce this in code (or make sure it's harmless?)) 24 + 22 25 --pds_pfx=URL The HTTP URL prefix that this PDS is publicly accessible at (e.g. mypds.example) 23 26 --pds_did=DID This PDS's DID (e.g. did:web:mypds.example) 24 27 --bsky_appview_pfx=URL AppView URL prefix e.g. "https://api.bsky-sandbox.dev" 25 28 --bsky_appview_did=DID AppView DID e.g. did:web:api.bsky-sandbox.dev 26 29 30 + Account create: 31 + Create a new user account on the PDS. Bring your own DID and corresponding 32 + handle - millipds will not (yet?) attempt to validate either. 33 + You'll be prompted for a password interactively. 34 + 35 + TODO: consider bring-your-own signing key? 36 + 37 + --unsafe_password=PW Specify password non-iteractively, for use in test scripts etc. 38 + 27 39 Run: 28 40 Launch the service (in the foreground) 29 41 ··· 36 48 --version Show version. 37 49 """ 38 50 39 - from docopt import docopt 40 51 import importlib.metadata 41 52 import asyncio 53 + import sys 54 + import logging 55 + from getpass import getpass 56 + 57 + from docopt import docopt 42 58 43 59 from . import service 44 60 from . import database 61 + from . import crypto 62 + 63 + 64 + logging.basicConfig(level=logging.DEBUG) # TODO: make this configurable? 65 + 45 66 46 67 """ 47 68 This is the entrypoint for the `millipds` command (declared in project.scripts) ··· 75 96 bsky_appview_pfx="https://api.bsky-sandbox.dev", 76 97 bsky_appview_did="did:web:api.bsky-sandbox.dev", 77 98 ) 78 - else: 99 + else: # "prod" presets 79 100 db.update_config( 80 101 pds_pfx=f'https://{args["<hostname>"]}', 81 102 pds_did=f'did:web:{args["<hostname>"]}', ··· 98 119 bsky_appview_did=args["--bsky_appview_did"], 99 120 ) 100 121 db.print_config() 122 + elif args["account"]: 123 + if args["create"]: 124 + pw = args["--unsafe_password"] 125 + if pw: 126 + # rationale: only allow non-iteractive password input from scripts etc. 127 + if sys.stdin.buffer.isatty(): 128 + print("error: --unsafe_password can't be used from an interactive shell") 129 + return 130 + else: 131 + pw = getpass(f"Password for new account: ") 132 + db.account_create( 133 + did=args["<did>"], 134 + handle=args["<handle>"], 135 + password=pw, 136 + privkey=crypto.keygen_p256() # TODO: supply from arg 137 + ) 138 + else: 139 + print("CLI arg parse error?!") 101 140 elif args["run"]: 102 141 asyncio.run(service.run( 103 142 sock_path=args["--sock_path"],
+22
src/millipds/crypto.py
··· 1 1 from cryptography.hazmat.primitives.asymmetric import ec 2 2 from cryptography.hazmat.primitives import hashes 3 + from cryptography.hazmat.primitives import serialization 3 4 from cryptography.hazmat.primitives.asymmetric.utils import decode_dss_signature, encode_dss_signature 4 5 from cryptography.exceptions import InvalidSignature 5 6 ··· 47 48 ) 48 49 signature = r.to_bytes(32, "big") + s.to_bytes(32, "big") 49 50 return signature 51 + 52 + 53 + def keygen_p256() -> ec.EllipticCurvePrivateKey: 54 + return ec.generate_private_key(ec.SECP256R1()) 55 + 56 + 57 + def privkey_to_pem(privkey: ec.EllipticCurvePrivateKey) -> str: 58 + return privkey.private_bytes( 59 + encoding=serialization.Encoding.PEM, 60 + format=serialization.PrivateFormat.PKCS8, 61 + encryption_algorithm=serialization.NoEncryption() 62 + ).decode() 63 + 64 + 65 + def privkey_from_pem(pem: str) -> ec.EllipticCurvePrivateKey: 66 + privkey = serialization.load_pem_private_key(pem.encode(), password=None) 67 + if not isinstance(privkey, ec.EllipticCurvePrivateKey): 68 + raise TypeError("unsupported key type") 69 + if not isinstance(privkey.curve, (ec.SECP256R1, ec.SECP256K1)): 70 + raise TypeError("unsupported key type") 71 + return privkey
+99 -5
src/millipds/database.py
··· 1 1 """ 2 - Ideally, all SQL statements are contained within this file 2 + Ideally, all SQL statements are contained within this file. 3 + 4 + Password hashing also happens in here, because it doesn't make much sense to do 5 + it anywhere else. 3 6 """ 4 7 5 8 from typing import Optional, Dict 6 9 from functools import cached_property 7 10 import secrets 11 + import os 12 + import logging 8 13 14 + from argon2 import PasswordHasher # maybe this should come from .crypto? 9 15 import apsw 16 + import apsw.bestpractice 17 + 18 + from atmst.blockstore import BlockStore 10 19 11 20 from . import static_config 12 21 from . import util 22 + from . import crypto 23 + 24 + logger = logging.getLogger(__name__) 25 + 26 + # https://rogerbinns.github.io/apsw/bestpractice.html 27 + apsw.bestpractice.apply(apsw.bestpractice.recommended) 13 28 14 29 class Database: 15 30 def __init__(self, path: str=static_config.MAIN_DB_PATH) -> None: 16 31 util.mkdirs_for_file(path) 17 32 self.con = apsw.Connection(path) 33 + self.pw_hasher = PasswordHasher() 18 34 19 35 try: 20 36 if self.config["db_version"] != static_config.MILLIPDS_DB_VERSION: ··· 27 43 self._init_central_tables() 28 44 29 45 def _init_central_tables(self): 46 + logger.info("initing central tables") 30 47 self.con.execute( 31 48 """ 32 49 CREATE TABLE config( ··· 42 59 43 60 self.con.execute( 44 61 """ 45 - INSERT INTO config ( 62 + INSERT INTO config( 46 63 db_version, 47 64 jwt_access_secret 48 65 ) VALUES (?, ?) ··· 54 71 """ 55 72 CREATE TABLE user( 56 73 did TEXT PRIMARY KEY NOT NULL, 74 + handle TEXT NOT NULL, 57 75 prefs BLOB NOT NULL, 58 76 pw_hash TEXT NOT NULL, 59 77 repo_path TEXT NOT NULL, ··· 61 79 ) 62 80 """ 63 81 ) 82 + 83 + self.con.execute("CREATE UNIQUE INDEX user_by_handle ON user(handle)") 64 84 65 85 self.con.execute( 66 86 """ ··· 84 104 if pds_did is not None: 85 105 self.con.execute("UPDATE config SET pds_did=?", (pds_did,)) 86 106 if bsky_appview_pfx is not None: 87 - self.con.execute("UPDATE config SET bsky_appview_pfx=?", (bsky_appview_pfx,)) 107 + self.con.execute( 108 + "UPDATE config SET bsky_appview_pfx=?", 109 + (bsky_appview_pfx,) 110 + ) 88 111 if bsky_appview_did is not None: 89 - self.con.execute("UPDATE config SET bsky_appview_did=?", (bsky_appview_did,)) 112 + self.con.execute( 113 + "UPDATE config SET bsky_appview_did=?", 114 + (bsky_appview_did,) 115 + ) 90 116 91 - del self.config # invalidate the cached value 117 + try: 118 + del self.config # invalidate the cached value 119 + except AttributeError: 120 + pass 92 121 93 122 @cached_property 94 123 def config(self) -> Dict[str, object]: ··· 117 146 if redact_secrets and "secret" in k: 118 147 v = "[REDACTED]" 119 148 print(f"{k:<{maxlen}} : {v!r}") 149 + 150 + def account_create(self, 151 + did: str, 152 + handle: str, 153 + password: str, 154 + privkey: crypto.ec.EllipticCurvePrivateKey 155 + ) -> None: 156 + pw_hash = self.pw_hasher.hash(password) 157 + privkey_pem = crypto.privkey_to_pem(privkey) 158 + repo_path = f"{static_config.REPOS_DIR}/{util.did_to_safe_filename(did)}.sqlite3" 159 + logger.info( 160 + f"creating account for did={did}, handle={handle} at {repo_path}" 161 + ) 162 + with self.con: 163 + self.con.execute( 164 + """ 165 + INSERT INTO user( 166 + did, 167 + handle, 168 + prefs, 169 + pw_hash, 170 + repo_path, 171 + signing_key 172 + ) VALUES (?, ?, ?, ?, ?, ?) 173 + """, 174 + (did, handle, b"{}", pw_hash, repo_path, privkey_pem) 175 + ) 176 + UserDatabase.init_tables(self.con, did, repo_path) 177 + self.con.execute("DETACH spoke") 178 + 179 + 180 + class UserDBBlockStore(BlockStore): 181 + pass # TODO 182 + 183 + 184 + class UserDatabase: 185 + def __init__(self, wcon: apsw.Connection, did: str, path: str) -> None: 186 + self.wcon = wcon # writes go via the hub database connection, using ATTACH 187 + self.rcon = apsw.Connection(path, flags=apsw.SQLITE_OPEN_READONLY) 188 + 189 + # TODO: check db version and did match 190 + 191 + @staticmethod 192 + def init_tables(wcon: apsw.Connection, did: str, path: str) -> None: 193 + util.mkdirs_for_file(path) 194 + wcon.execute("ATTACH ? AS spoke", (path,)) 195 + 196 + wcon.execute( 197 + """ 198 + CREATE TABLE spoke.repo( 199 + db_version INTEGER NOT NULL, 200 + did TEXT NOT NULL 201 + ) 202 + """ 203 + ) 204 + 205 + wcon.execute( 206 + "INSERT INTO spoke.repo(db_version, did) VALUES (?, ?)", 207 + (static_config.MILLIPDS_DB_VERSION, did) 208 + ) 209 + 210 + # TODO: the other tables 211 + 212 + # nb: caller is responsible for running "DETACH spoke", after the end 213 + # of the transaction
+1 -3
src/millipds/service.py
··· 9 9 10 10 from . import static_config 11 11 12 - logging.basicConfig(level=logging.DEBUG) # TODO: make this configurable? 13 - 14 12 15 13 async def hello(request: web.Request): 16 14 version = importlib.metadata.version("millipds") ··· 46 44 This gets invoked via millipds.__main__.py 47 45 """ 48 46 async def run(sock_path: Optional[str], host: str, port: int): 49 - runner = web.AppRunner(app, access_log_format=static_config.LOG_FMT) 47 + runner = web.AppRunner(app, access_log_format=static_config.HTTP_LOG_FMT) 50 48 await runner.setup() 51 49 52 50 if sock_path is None:
+4 -4
src/millipds/static_config.py
··· 4 4 (some of this stuff might want to be broken out into a proper config file, eventually) 5 5 """ 6 6 7 - LOG_FMT = '%{X-Forwarded-For}i %t (%Tf) "%r" %s %b "%{Referer}i" "%{User-Agent}i"' 7 + HTTP_LOG_FMT = '%{X-Forwarded-For}i %t (%Tf) "%r" %s %b "%{Referer}i" "%{User-Agent}i"' 8 8 9 9 GROUPNAME = "millipds-sock" 10 10 11 11 MILLIPDS_DB_VERSION = 1 # this gets bumped if we make breaking changes to the db schema 12 12 13 - DATA_DIR = "./data/" 14 - MAIN_DB_PATH = DATA_DIR + "millipds.sqlite3" 15 - REPOS_DIR = DATA_DIR + "repos/" 13 + DATA_DIR = "./data" 14 + MAIN_DB_PATH = DATA_DIR + "/millipds.sqlite3" 15 + REPOS_DIR = DATA_DIR + "/repos"
+16
src/millipds/util.py
··· 1 1 import os 2 + import hashlib 2 3 3 4 def mkdirs_for_file(path: str) -> None: 4 5 os.makedirs(os.path.dirname(path), exist_ok=True) 6 + 7 + FILANEME_SAFE_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 8 + 9 + def did_to_safe_filename(did: str) -> str: 10 + """ 11 + The format is <sha256(did)>_<filtered_did> 12 + The former guarantees uniqueness, and the latter makes it human-recognizeable (ish) 13 + """ 14 + 15 + hexdigest = hashlib.sha256(did.encode()).hexdigest() 16 + filtered = "".join(char for char in did if char in FILANEME_SAFE_CHARS) 17 + 18 + # Truncate to make sure we're staying within PATH_MAX 19 + # (with room to spare, in case the caller appends a file extension) 20 + return f"{hexdigest}_{filtered}"[:200]