Merge pull request #106857 from m1cr0man/master

nixos/acme: Fixes for account creation and remove tmpfiles usage

authored by

Florian Klink and committed by
GitHub
10307455 0ffa153e

+132 -48
+9
nixos/doc/manual/release-notes/rl-2103.xml
··· 612 612 </listitem> 613 613 <listitem> 614 614 <para> 615 + In the ACME module, the data used to build the hash for the account 616 + directory has changed to accomodate new features to reduce account 617 + rate limit issues. This will trigger new account creation on the first 618 + rebuild following this update. No issues are expected to arise from this, 619 + thanks to the new account creation handling. 620 + </para> 621 + </listitem> 622 + <listitem> 623 + <para> 615 624 <xref linkend="opt-users.users._name_.createHome" /> now always ensures home directory permissions to be <literal>0700</literal>. 616 625 Permissions had previously been ignored for already existing home directories, possibly leaving them readable by others. 617 626 The option's description was incorrect regarding ownership management and has been simplified greatly.
+84 -43
nixos/modules/security/acme.nix
··· 7 7 numCerts = length (builtins.attrNames cfg.certs); 8 8 _24hSecs = 60 * 60 * 24; 9 9 10 + # Used to make unique paths for each cert/account config set 11 + mkHash = with builtins; val: substring 0 20 (hashString "sha256" val); 12 + mkAccountHash = acmeServer: data: mkHash "${toString acmeServer} ${data.keyType} ${data.email}"; 13 + accountDirRoot = "/var/lib/acme/.lego/accounts/"; 14 + 10 15 # There are many services required to make cert renewals work. 11 16 # They all follow a common structure: 12 17 # - They inherit this commonServiceConfig ··· 19 24 Type = "oneshot"; 20 25 User = "acme"; 21 26 Group = mkDefault "acme"; 22 - UMask = 0027; 27 + UMask = 0023; 23 28 StateDirectoryMode = 750; 24 29 ProtectSystem = "full"; 25 30 PrivateTmp = true; ··· 54 59 ''; 55 60 }; 56 61 57 - # Previously, all certs were owned by whatever user was configured in 58 - # config.security.acme.certs.<cert>.user. Now everything is owned by and 59 - # run by the acme user. 60 - userMigrationService = { 61 - description = "Fix owner and group of all ACME certificates"; 62 - 63 - script = with builtins; concatStringsSep "\n" (mapAttrsToList (cert: data: '' 64 - for fixpath in /var/lib/acme/${escapeShellArg cert} /var/lib/acme/.lego/${escapeShellArg cert}; do 62 + # Ensures that directories which are shared across all certs 63 + # exist and have the correct user and group, since group 64 + # is configurable on a per-cert basis. 65 + userMigrationService = let 66 + script = with builtins; '' 67 + chown -R acme .lego/accounts 68 + '' + (concatStringsSep "\n" (mapAttrsToList (cert: data: '' 69 + for fixpath in ${escapeShellArg cert} .lego/${escapeShellArg cert}; do 65 70 if [ -d "$fixpath" ]; then 66 71 chmod -R u=rwX,g=rX,o= "$fixpath" 67 72 chown -R acme:${data.group} "$fixpath" 68 73 fi 69 74 done 70 - '') certConfigs); 75 + '') certConfigs)); 76 + in { 77 + description = "Fix owner and group of all ACME certificates"; 78 + 79 + serviceConfig = commonServiceConfig // { 80 + # We don't want this to run every time a renewal happens 81 + RemainAfterExit = true; 82 + 83 + # These StateDirectory entries negate the need for tmpfiles 84 + StateDirectory = [ "acme" "acme/.lego" "acme/.lego/accounts" ]; 85 + StateDirectoryMode = 755; 86 + WorkingDirectory = "/var/lib/acme"; 71 87 72 - # We don't want this to run every time a renewal happens 73 - serviceConfig.RemainAfterExit = true; 88 + # Run the start script as root 89 + ExecStart = "+" + (pkgs.writeShellScript "acme-fixperms" script); 90 + }; 74 91 }; 75 92 76 93 certToConfig = cert: data: let ··· 101 118 ${toString acmeServer} ${toString data.dnsProvider} 102 119 ${toString data.ocspMustStaple} ${data.keyType} 103 120 ''; 104 - mkHash = with builtins; val: substring 0 20 (hashString "sha256" val); 105 121 certDir = mkHash hashData; 106 122 domainHash = mkHash "${concatStringsSep " " extraDomains} ${data.domain}"; 107 - othersHash = mkHash "${toString acmeServer} ${data.keyType} ${data.email}"; 108 - accountDir = "/var/lib/acme/.lego/accounts/" + othersHash; 123 + accountHash = (mkAccountHash acmeServer data); 124 + accountDir = accountDirRoot + accountHash; 109 125 110 126 protocolOpts = if useDns then ( 111 127 [ "--dns" data.dnsProvider ] ··· 142 158 ); 143 159 144 160 in { 145 - inherit accountDir selfsignedDeps; 161 + inherit accountHash cert selfsignedDeps; 146 162 147 - webroot = data.webroot; 148 163 group = data.group; 149 164 150 165 renewTimer = { ··· 184 199 185 200 StateDirectory = "acme/${cert}"; 186 201 187 - BindPaths = "/var/lib/acme/.minica:/tmp/ca /var/lib/acme/${cert}:/tmp/${keyName}"; 202 + BindPaths = [ 203 + "/var/lib/acme/.minica:/tmp/ca" 204 + "/var/lib/acme/${cert}:/tmp/${keyName}" 205 + ]; 188 206 }; 189 207 190 208 # Working directory will be /tmp ··· 222 240 serviceConfig = commonServiceConfig // { 223 241 Group = data.group; 224 242 225 - # AccountDir dir will be created by tmpfiles to ensure correct permissions 226 - # And to avoid deletion during systemctl clean 227 - # acme/.lego/${cert} is listed so that it is deleted during systemctl clean 228 - StateDirectory = "acme/${cert} acme/.lego/${cert} acme/.lego/${cert}/${certDir}"; 243 + # Keep in mind that these directories will be deleted if the user runs 244 + # systemctl clean --what=state 245 + # acme/.lego/${cert} is listed for this reason. 246 + StateDirectory = [ 247 + "acme/${cert}" 248 + "acme/.lego/${cert}" 249 + "acme/.lego/${cert}/${certDir}" 250 + "acme/.lego/accounts/${accountHash}" 251 + ]; 229 252 230 253 # Needs to be space separated, but can't use a multiline string because that'll include newlines 231 - BindPaths = 232 - "${accountDir}:/tmp/accounts " + 233 - "/var/lib/acme/${cert}:/tmp/out " + 234 - "/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates "; 254 + BindPaths = [ 255 + "${accountDir}:/tmp/accounts" 256 + "/var/lib/acme/${cert}:/tmp/out" 257 + "/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates" 258 + ]; 235 259 236 260 # Only try loading the credentialsFile if the dns challenge is enabled 237 261 EnvironmentFile = mkIf useDns data.credentialsFile; ··· 248 272 249 273 # Working directory will be /tmp 250 274 script = '' 251 - set -euo pipefail 275 + set -euxo pipefail 276 + 277 + ${optionalString (data.webroot != null) '' 278 + # Ensure the webroot exists 279 + mkdir -p '${data.webroot}/.well-known/acme-challenge' 280 + chown 'acme:${data.group}' ${data.webroot}/{.well-known,.well-known/acme-challenge} 281 + ''} 252 282 253 283 echo '${domainHash}' > domainhash.txt 254 284 255 285 # Check if we can renew 256 - # Certificates and account credentials must exist 257 - if [ -e 'certificates/${keyName}.key' -a -e 'certificates/${keyName}.crt' -a "$(ls -1 accounts)" ]; then 286 + if [ -e 'certificates/${keyName}.key' -a -e 'certificates/${keyName}.crt' -a -n "$(ls -1 accounts)" ]; then 258 287 259 288 # When domains are updated, there's no need to do a full 260 289 # Lego run, but it's likely renew won't work if days is too low. ··· 664 693 665 694 systemd.timers = mapAttrs' (cert: conf: nameValuePair "acme-${cert}" conf.renewTimer) certConfigs; 666 695 667 - # .lego and .lego/accounts specified to fix any incorrect permissions 668 - systemd.tmpfiles.rules = [ 669 - "d /var/lib/acme/.lego - acme acme" 670 - "d /var/lib/acme/.lego/accounts - acme acme" 671 - ] ++ (unique (concatMap (conf: [ 672 - "d ${conf.accountDir} - acme acme" 673 - ] ++ (optional (conf.webroot != null) "d ${conf.webroot}/.well-known/acme-challenge - acme ${conf.group}") 674 - ) (attrValues certConfigs))); 696 + systemd.targets = let 697 + # Create some targets which can be depended on to be "active" after cert renewals 698 + finishedTargets = mapAttrs' (cert: conf: nameValuePair "acme-finished-${cert}" { 699 + wantedBy = [ "default.target" ]; 700 + requires = [ "acme-${cert}.service" ] ++ conf.selfsignedDeps; 701 + after = [ "acme-${cert}.service" ] ++ conf.selfsignedDeps; 702 + }) certConfigs; 675 703 676 - # Create some targets which can be depended on to be "active" after cert renewals 677 - systemd.targets = mapAttrs' (cert: conf: nameValuePair "acme-finished-${cert}" { 678 - wantedBy = [ "default.target" ]; 679 - requires = [ "acme-${cert}.service" ] ++ conf.selfsignedDeps; 680 - after = [ "acme-${cert}.service" ] ++ conf.selfsignedDeps; 681 - }) certConfigs; 704 + # Create targets to limit the number of simultaneous account creations 705 + # How it works: 706 + # - Pick a "leader" cert service, which will be in charge of creating the account, 707 + # and run first (requires + after) 708 + # - Make all other cert services sharing the same account wait for the leader to 709 + # finish before starting (requiredBy + before). 710 + # Using a target here is fine - account creation is a one time event. Even if 711 + # systemd clean --what=state is used to delete the account, so long as the user 712 + # then runs one of the cert services, there won't be any issues. 713 + accountTargets = mapAttrs' (hash: confs: let 714 + leader = "acme-${(builtins.head confs).cert}.service"; 715 + dependantServices = map (conf: "acme-${conf.cert}.service") (builtins.tail confs); 716 + in nameValuePair "acme-account-${hash}" { 717 + requiredBy = dependantServices; 718 + before = dependantServices; 719 + requires = [ leader ]; 720 + after = [ leader ]; 721 + }) (groupBy (conf: conf.accountHash) (attrValues certConfigs)); 722 + in finishedTargets // accountTargets; 682 723 }) 683 724 ]; 684 725
+8 -4
nixos/modules/security/acme.xml
··· 162 162 <xref linkend="opt-security.acme.certs"/>."foo.example.com" = { 163 163 <link linkend="opt-security.acme.certs._name_.webroot">webroot</link> = "/var/lib/acme/.challenges"; 164 164 <link linkend="opt-security.acme.certs._name_.email">email</link> = "foo@example.com"; 165 + # Ensure that the web server you use can read the generated certs 166 + # Take a look at the <link linkend="opt-services.nginx.group">group</link> option for the web server you choose. 167 + <link linkend="opt-security.acme.certs._name_.group">group</link> = "nginx"; 165 168 # Since we have a wildcard vhost to handle port 80, 166 169 # we can generate certs for anything! 167 170 # Just make sure your DNS resolves them. ··· 257 260 <para> 258 261 Should you need to regenerate a particular certificate in a hurry, such 259 262 as when a vulnerability is found in Let's Encrypt, there is now a convenient 260 - mechanism for doing so. Running <literal>systemctl clean acme-example.com.service</literal> 261 - will remove all certificate files for the given domain, allowing you to then 262 - <literal>systemctl start acme-example.com.service</literal> to generate fresh 263 - ones. 263 + mechanism for doing so. Running 264 + <literal>systemctl clean --what=state acme-example.com.service</literal> 265 + will remove all certificate files and the account data for the given domain, 266 + allowing you to then <literal>systemctl start acme-example.com.service</literal> 267 + to generate fresh ones. 264 268 </para> 265 269 </section> 266 270 <section xml:id="module-security-acme-fix-jws">
+31 -1
nixos/tests/acme.nix
··· 77 77 after = [ "acme-a.example.test.service" "nginx-config-reload.service" ]; 78 78 }; 79 79 80 + # Test that account creation is collated into one service 81 + specialisation.account-creation.configuration = { nodes, pkgs, lib, ... }: let 82 + email = "newhostmaster@example.test"; 83 + caDomain = nodes.acme.config.test-support.acme.caDomain; 84 + # Exit 99 to make it easier to track if this is the reason a renew failed 85 + testScript = '' 86 + test -e accounts/${caDomain}/${email}/account.json || exit 99 87 + ''; 88 + in { 89 + security.acme.email = lib.mkForce email; 90 + systemd.services."b.example.test".preStart = testScript; 91 + systemd.services."c.example.test".preStart = testScript; 92 + 93 + services.nginx.virtualHosts."b.example.test" = (vhostBase pkgs) // { 94 + enableACME = true; 95 + }; 96 + services.nginx.virtualHosts."c.example.test" = (vhostBase pkgs) // { 97 + enableACME = true; 98 + }; 99 + }; 100 + 80 101 # Cert config changes will not cause the nginx configuration to change. 81 102 # This tests that the reload service is correctly triggered. 82 103 # It also tests that postRun is exec'd as root ··· 289 310 acme.start() 290 311 webserver.start() 291 312 292 - acme.wait_for_unit("default.target") 313 + acme.wait_for_unit("network-online.target") 293 314 acme.wait_for_unit("pebble.service") 294 315 295 316 client.succeed("curl https://${caDomain}:15000/roots/0 > /tmp/ca.crt") ··· 313 334 webserver.succeed("systemctl start test-renew-nginx.target") 314 335 check_issuer(webserver, "a.example.test", "pebble") 315 336 check_connection(client, "a.example.test") 337 + 338 + with subtest("Runs 1 cert for account creation before others"): 339 + switch_to(webserver, "account-creation") 340 + webserver.wait_for_unit("acme-finished-a.example.test.target") 341 + check_connection(client, "a.example.test") 342 + webserver.wait_for_unit("acme-finished-b.example.test.target") 343 + webserver.wait_for_unit("acme-finished-c.example.test.target") 344 + check_connection(client, "b.example.test") 345 + check_connection(client, "c.example.test") 316 346 317 347 with subtest("Can reload web server when cert configuration changes"): 318 348 switch_to(webserver, "cert-change")