@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator

Lock resources briefly while acquiring leases on them to prevent acquiring near-death resources

Summary:
Depends on D19078. Ref T13073. Currently, there is a narrow window where we can acquire a resource after a reclaim has started against it.

To prevent this, briefly lock resources before acquiring them and make sure they're still good. If a resource isn't good, throw the lease back in the pool.

Test Plan:
This is tricky. You need:

- Hoax blueprint with limits and a rule where leases of a given "flavor" can only be satisfied by resources of the same flavor.
- Reduce the 3-minute "wait before resources can be released" to 3 seconds.
- Limit Hoaxes to 1.
- Allocate one "cherry" flavored Hoax and release the lease.
- Add a `sleep(15)` to `releaseResource()` in `DrydockResourceUpdateWorker`, after the `canReclaimResource()` check, with a `print`.

Now:

- Run `bin/phd debug task` in two windows.
- Run `bin/drydock lease --type host --attributes flavor=banana` in a third window.
- This will start to reclaim the existing "cherry" resource. Once one of the `phd` windows prints the "RECLAIMING" message run `bin/drydock lease --type host --attributes flavor=cherry` in a fourth window.
- Before patch: the "cherry" lease acquired immediately, then was released and destroyed moments later.
- After patch: the "cherry" lease yields.

Subscribers: PHID-OPKG-gm6ozazyms6q6i22gyam

Maniphest Tasks: T13073

Differential Revision: https://secure.phabricator.com/D19080

+89 -22
+2
src/__phutil_library_map__.php
··· 1113 1113 'DrydockResourceDatasource' => 'applications/drydock/typeahead/DrydockResourceDatasource.php', 1114 1114 'DrydockResourceListController' => 'applications/drydock/controller/DrydockResourceListController.php', 1115 1115 'DrydockResourceListView' => 'applications/drydock/view/DrydockResourceListView.php', 1116 + 'DrydockResourceLockException' => 'applications/drydock/exception/DrydockResourceLockException.php', 1116 1117 'DrydockResourcePHIDType' => 'applications/drydock/phid/DrydockResourcePHIDType.php', 1117 1118 'DrydockResourceQuery' => 'applications/drydock/query/DrydockResourceQuery.php', 1118 1119 'DrydockResourceReclaimLogType' => 'applications/drydock/logtype/DrydockResourceReclaimLogType.php', ··· 6344 6345 'DrydockResourceDatasource' => 'PhabricatorTypeaheadDatasource', 6345 6346 'DrydockResourceListController' => 'DrydockResourceController', 6346 6347 'DrydockResourceListView' => 'AphrontView', 6348 + 'DrydockResourceLockException' => 'Exception', 6347 6349 'DrydockResourcePHIDType' => 'PhabricatorPHIDType', 6348 6350 'DrydockResourceQuery' => 'DrydockQuery', 6349 6351 'DrydockResourceReclaimLogType' => 'DrydockLogType',
+4
src/applications/drydock/exception/DrydockResourceLockException.php
··· 1 + <?php 2 + 3 + final class DrydockResourceLockException 4 + extends Exception {}
+62 -17
src/applications/drydock/storage/DrydockLease.php
··· 213 213 } 214 214 } 215 215 216 - $this->openTransaction(); 216 + // Before we associate the lease with the resource, we lock the resource 217 + // and reload it to make sure it is still pending or active. If we don't 218 + // do this, the resource may have just been reclaimed. (Once we acquire 219 + // the resource that stops it from being released, so we're nearly safe.) 220 + 221 + $resource_phid = $resource->getPHID(); 222 + $hash = PhabricatorHash::digestForIndex($resource_phid); 223 + $lock_key = 'drydock.resource:'.$hash; 224 + $lock = PhabricatorGlobalLock::newLock($lock_key); 217 225 218 226 try { 219 - DrydockSlotLock::acquireLocks($this->getPHID(), $this->slotLocks); 220 - $this->slotLocks = array(); 221 - } catch (DrydockSlotLockException $ex) { 222 - $this->killTransaction(); 227 + $lock->lock(15); 228 + } catch (Exception $ex) { 229 + throw new DrydockResourceLockException( 230 + pht( 231 + 'Failed to acquire lock for resource ("%s") while trying to '. 232 + 'acquire lease ("%s").', 233 + $resource->getPHID(), 234 + $this->getPHID())); 235 + } 236 + 237 + $resource->reload(); 238 + 239 + if (($resource->getStatus() !== DrydockResourceStatus::STATUS_ACTIVE) && 240 + ($resource->getStatus() !== DrydockResourceStatus::STATUS_PENDING)) { 241 + throw new DrydockAcquiredBrokenResourceException( 242 + pht( 243 + 'Trying to acquire lease ("%s") on a resource ("%s") in the '. 244 + 'wrong status ("%s").', 245 + $this->getPHID(), 246 + $resource->getPHID(), 247 + $resource->getStatus())); 248 + } 249 + 250 + $caught = null; 251 + try { 252 + $this->openTransaction(); 253 + 254 + try { 255 + DrydockSlotLock::acquireLocks($this->getPHID(), $this->slotLocks); 256 + $this->slotLocks = array(); 257 + } catch (DrydockSlotLockException $ex) { 258 + $this->killTransaction(); 223 259 224 - $this->logEvent( 225 - DrydockSlotLockFailureLogType::LOGCONST, 226 - array( 227 - 'locks' => $ex->getLockMap(), 228 - )); 260 + $this->logEvent( 261 + DrydockSlotLockFailureLogType::LOGCONST, 262 + array( 263 + 'locks' => $ex->getLockMap(), 264 + )); 265 + 266 + throw $ex; 267 + } 268 + 269 + $this 270 + ->setResourcePHID($resource->getPHID()) 271 + ->attachResource($resource) 272 + ->setStatus($new_status) 273 + ->save(); 229 274 230 - throw $ex; 275 + $this->saveTransaction(); 276 + } catch (Exception $ex) { 277 + $caught = $ex; 231 278 } 232 279 233 - $this 234 - ->setResourcePHID($resource->getPHID()) 235 - ->attachResource($resource) 236 - ->setStatus($new_status) 237 - ->save(); 280 + $lock->unlock(); 238 281 239 - $this->saveTransaction(); 282 + if ($caught) { 283 + throw $caught; 284 + } 240 285 241 286 $this->isAcquired = true; 242 287
+21 -5
src/applications/drydock/worker/DrydockLeaseUpdateWorker.php
··· 53 53 54 54 $lease 55 55 ->setStatus(DrydockLeaseStatus::STATUS_PENDING) 56 + ->setResourcePHID(null) 56 57 ->save(); 57 58 58 59 $lease->logEvent( ··· 301 302 $resources = $this->rankResources($resources, $lease); 302 303 303 304 $exceptions = array(); 305 + $yields = array(); 304 306 $allocated = false; 305 307 foreach ($resources as $resource) { 306 308 try { 307 309 $this->acquireLease($resource, $lease); 308 310 $allocated = true; 309 311 break; 312 + } catch (DrydockResourceLockException $ex) { 313 + // We need to lock the resource to actually acquire it. If we aren't 314 + // able to acquire the lock quickly enough, we can yield and try again 315 + // later. 316 + $yields[] = $ex; 317 + } catch (DrydockAcquiredBrokenResourceException $ex) { 318 + // If a resource was reclaimed or destroyed by the time we actually 319 + // got around to acquiring it, we just got unlucky. We can yield and 320 + // try again later. 321 + $yields[] = $ex; 310 322 } catch (Exception $ex) { 311 323 $exceptions[] = $ex; 312 324 } 313 325 } 314 326 315 327 if (!$allocated) { 316 - throw new PhutilAggregateException( 317 - pht( 318 - 'Unable to acquire lease "%s" on any resource.', 319 - $lease->getPHID()), 320 - $exceptions); 328 + if ($yields) { 329 + throw new PhabricatorWorkerYieldException(15); 330 + } else { 331 + throw new PhutilAggregateException( 332 + pht( 333 + 'Unable to acquire lease "%s" on any resource.', 334 + $lease->getPHID()), 335 + $exceptions); 336 + } 321 337 } 322 338 } 323 339