@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator

Add a chunking storage engine for files

Summary:
Ref T7149. This isn't complete and isn't active yet, but does basically work. I'll shore it up in the next few diffs.

The new workflow goes like this:

> Client, file.allocate(): I'd like to upload a file with length L, metadata M, and hash H.

Then the server returns `upload` (a boolean) and `filePHID` (a PHID). These mean:

| upload | filePHID | means |
|---|---|---|
| false | false | Server can't accept file.
| false | true | File data already known, file created from hash.
| true | false | Just upload normally.
| true | true | Query chunks to start or resume a chunked upload.

All but the last case are uninteresting and work like exising uploads with `file.uploadhash` (which we can eventually deprecate).

In the last case:

> Client, file.querychunks(): Give me a list of chunks that I should upload.

This returns all the chunks for the file. Chunks have a start byte, an end byte, and a "complete" flag to indicate that the server already has the data.

Then, the client fills in chunks by sending them:

> Client, file.uploadchunk(): Here is the data for one chunk.

This stuff doesn't work yet or has some caveats:

- I haven't tested resume much.
- Files need an "isPartial()" flag for partial uploads, and the UI needs to respect it.
- The JS client needs to become chunk-aware.
- Chunk size is set crazy low to make testing easier.
- Some debugging flags that I'll remove soon-ish.
- Downloading works, but still streams the whole file into memory.
- This storage engine is disabled by default (hardcoded as a unit test engine) because it's still sketchy.
- Need some code to remove the "isParital" flag when the last chunk is uploaded.
- Maybe do checksumming on chunks.

Test Plan:
- Hacked up `arc upload` (see next diff) to be chunk-aware and uploaded a readme in 18 32-byte chunks. Then downloaded it. Got the same file back that I uploaded.
- File UI now shows some basic chunk info for chunked files:

{F336434}

Reviewers: btrahan

Reviewed By: btrahan

Subscribers: joshuaspence, epriestley

Maniphest Tasks: T7149

Differential Revision: https://secure.phabricator.com/D12060

+887
+9
resources/sql/autopatches/20150312.filechunk.1.sql
··· 1 + CREATE TABLE {$NAMESPACE}_file.file_chunk ( 2 + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, 3 + chunkHandle BINARY(12) NOT NULL, 4 + byteStart BIGINT UNSIGNED NOT NULL, 5 + byteEnd BIGINT UNSIGNED NOT NULL, 6 + dataFilePHID VARBINARY(64), 7 + KEY `key_file` (chunkhandle, byteStart, byteEnd), 8 + KEY `key_data` (dataFilePHID) 9 + ) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT};
+16
src/__phutil_library_map__.php
··· 747 747 'FeedPublisherWorker' => 'applications/feed/worker/FeedPublisherWorker.php', 748 748 'FeedPushWorker' => 'applications/feed/worker/FeedPushWorker.php', 749 749 'FeedQueryConduitAPIMethod' => 'applications/feed/conduit/FeedQueryConduitAPIMethod.php', 750 + 'FileAllocateConduitAPIMethod' => 'applications/files/conduit/FileAllocateConduitAPIMethod.php', 750 751 'FileConduitAPIMethod' => 'applications/files/conduit/FileConduitAPIMethod.php', 751 752 'FileCreateMailReceiver' => 'applications/files/mail/FileCreateMailReceiver.php', 752 753 'FileDownloadConduitAPIMethod' => 'applications/files/conduit/FileDownloadConduitAPIMethod.php', 753 754 'FileInfoConduitAPIMethod' => 'applications/files/conduit/FileInfoConduitAPIMethod.php', 754 755 'FileMailReceiver' => 'applications/files/mail/FileMailReceiver.php', 756 + 'FileQueryChunksConduitAPIMethod' => 'applications/files/conduit/FileQueryChunksConduitAPIMethod.php', 755 757 'FileReplyHandler' => 'applications/files/mail/FileReplyHandler.php', 758 + 'FileUploadChunkConduitAPIMethod' => 'applications/files/conduit/FileUploadChunkConduitAPIMethod.php', 756 759 'FileUploadConduitAPIMethod' => 'applications/files/conduit/FileUploadConduitAPIMethod.php', 757 760 'FileUploadHashConduitAPIMethod' => 'applications/files/conduit/FileUploadHashConduitAPIMethod.php', 758 761 'FilesDefaultViewCapability' => 'applications/files/capability/FilesDefaultViewCapability.php', ··· 1485 1488 'PhabricatorChatLogDAO' => 'applications/chatlog/storage/PhabricatorChatLogDAO.php', 1486 1489 'PhabricatorChatLogEvent' => 'applications/chatlog/storage/PhabricatorChatLogEvent.php', 1487 1490 'PhabricatorChatLogQuery' => 'applications/chatlog/query/PhabricatorChatLogQuery.php', 1491 + 'PhabricatorChunkedFileStorageEngine' => 'applications/files/engine/PhabricatorChunkedFileStorageEngine.php', 1488 1492 'PhabricatorClusterConfigOptions' => 'applications/config/option/PhabricatorClusterConfigOptions.php', 1489 1493 'PhabricatorCommitBranchesField' => 'applications/repository/customfield/PhabricatorCommitBranchesField.php', 1490 1494 'PhabricatorCommitCustomField' => 'applications/repository/customfield/PhabricatorCommitCustomField.php', ··· 1790 1794 'PhabricatorFeedStoryReference' => 'applications/feed/storage/PhabricatorFeedStoryReference.php', 1791 1795 'PhabricatorFile' => 'applications/files/storage/PhabricatorFile.php', 1792 1796 'PhabricatorFileBundleLoader' => 'applications/files/query/PhabricatorFileBundleLoader.php', 1797 + 'PhabricatorFileChunk' => 'applications/files/storage/PhabricatorFileChunk.php', 1798 + 'PhabricatorFileChunkQuery' => 'applications/files/query/PhabricatorFileChunkQuery.php', 1793 1799 'PhabricatorFileCommentController' => 'applications/files/controller/PhabricatorFileCommentController.php', 1794 1800 'PhabricatorFileComposeController' => 'applications/files/controller/PhabricatorFileComposeController.php', 1795 1801 'PhabricatorFileController' => 'applications/files/controller/PhabricatorFileController.php', ··· 3908 3914 'FeedPublisherWorker' => 'FeedPushWorker', 3909 3915 'FeedPushWorker' => 'PhabricatorWorker', 3910 3916 'FeedQueryConduitAPIMethod' => 'FeedConduitAPIMethod', 3917 + 'FileAllocateConduitAPIMethod' => 'FileConduitAPIMethod', 3911 3918 'FileConduitAPIMethod' => 'ConduitAPIMethod', 3912 3919 'FileCreateMailReceiver' => 'PhabricatorMailReceiver', 3913 3920 'FileDownloadConduitAPIMethod' => 'FileConduitAPIMethod', 3914 3921 'FileInfoConduitAPIMethod' => 'FileConduitAPIMethod', 3915 3922 'FileMailReceiver' => 'PhabricatorObjectMailReceiver', 3923 + 'FileQueryChunksConduitAPIMethod' => 'FileConduitAPIMethod', 3916 3924 'FileReplyHandler' => 'PhabricatorMailReplyHandler', 3925 + 'FileUploadChunkConduitAPIMethod' => 'FileConduitAPIMethod', 3917 3926 'FileUploadConduitAPIMethod' => 'FileConduitAPIMethod', 3918 3927 'FileUploadHashConduitAPIMethod' => 'FileConduitAPIMethod', 3919 3928 'FilesDefaultViewCapability' => 'PhabricatorPolicyCapability', ··· 4748 4757 'PhabricatorPolicyInterface', 4749 4758 ), 4750 4759 'PhabricatorChatLogQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', 4760 + 'PhabricatorChunkedFileStorageEngine' => 'PhabricatorFileStorageEngine', 4751 4761 'PhabricatorClusterConfigOptions' => 'PhabricatorApplicationConfigOptions', 4752 4762 'PhabricatorCommitBranchesField' => 'PhabricatorCommitCustomField', 4753 4763 'PhabricatorCommitCustomField' => 'PhabricatorCustomField', ··· 5081 5091 'PhabricatorPolicyInterface', 5082 5092 'PhabricatorDestructibleInterface', 5083 5093 ), 5094 + 'PhabricatorFileChunk' => array( 5095 + 'PhabricatorFileDAO', 5096 + 'PhabricatorPolicyInterface', 5097 + 'PhabricatorDestructibleInterface', 5098 + ), 5099 + 'PhabricatorFileChunkQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', 5084 5100 'PhabricatorFileCommentController' => 'PhabricatorFileController', 5085 5101 'PhabricatorFileComposeController' => 'PhabricatorFileController', 5086 5102 'PhabricatorFileController' => 'PhabricatorController',
+131
src/applications/files/conduit/FileAllocateConduitAPIMethod.php
··· 1 + <?php 2 + 3 + final class FileAllocateConduitAPIMethod 4 + extends FileConduitAPIMethod { 5 + 6 + public function getAPIMethodName() { 7 + return 'file.allocate'; 8 + } 9 + 10 + public function getMethodDescription() { 11 + return pht('Prepare to upload a file.'); 12 + } 13 + 14 + public function defineParamTypes() { 15 + return array( 16 + 'name' => 'string', 17 + 'contentLength' => 'int', 18 + 'contentHash' => 'optional string', 19 + 'viewPolicy' => 'optional string', 20 + 21 + // TODO: Remove this, it's just here to make testing easier. 22 + 'forceChunking' => 'optional bool', 23 + ); 24 + } 25 + 26 + public function defineReturnType() { 27 + return 'map<string, wild>'; 28 + } 29 + 30 + public function defineErrorTypes() { 31 + return array(); 32 + } 33 + 34 + protected function execute(ConduitAPIRequest $request) { 35 + $viewer = $request->getUser(); 36 + 37 + $hash = $request->getValue('contentHash'); 38 + $name = $request->getValue('name'); 39 + $view_policy = $request->getValue('viewPolicy'); 40 + $content_length = $request->getValue('contentLength'); 41 + 42 + $force_chunking = $request->getValue('forceChunking'); 43 + 44 + $properties = array( 45 + 'name' => $name, 46 + 'authorPHID' => $viewer->getPHID(), 47 + 'viewPolicy' => $view_policy, 48 + 'isExplicitUpload' => true, 49 + ); 50 + 51 + if ($hash) { 52 + $file = PhabricatorFile::newFileFromContentHash( 53 + $hash, 54 + $properties); 55 + 56 + if ($file && !$force_chunking) { 57 + return array( 58 + 'upload' => false, 59 + 'filePHID' => $file->getPHID(), 60 + ); 61 + } 62 + 63 + $chunked_hash = PhabricatorChunkedFileStorageEngine::getChunkedHash( 64 + $viewer, 65 + $hash); 66 + $file = id(new PhabricatorFileQuery()) 67 + ->setViewer($viewer) 68 + ->withContentHashes(array($chunked_hash)) 69 + ->executeOne(); 70 + 71 + if ($file) { 72 + return array( 73 + 'upload' => $file->isPartial(), 74 + 'filePHID' => $file->getPHID(), 75 + ); 76 + } 77 + } 78 + 79 + $engines = PhabricatorFileStorageEngine::loadStorageEngines( 80 + $content_length); 81 + if ($engines) { 82 + 83 + if ($force_chunking) { 84 + foreach ($engines as $key => $engine) { 85 + if (!$engine->isChunkEngine()) { 86 + unset($engines[$key]); 87 + } 88 + } 89 + } 90 + 91 + // Pick the first engine. If the file is small enough to fit into a 92 + // single engine without chunking, this will be a non-chunk engine and 93 + // we'll just tell the client to upload the file. 94 + $engine = head($engines); 95 + if ($engine) { 96 + if (!$engine->isChunkEngine()) { 97 + return array( 98 + 'upload' => true, 99 + 'filePHID' => null, 100 + ); 101 + } 102 + 103 + // Otherwise, this is a large file and we need to perform a chunked 104 + // upload. 105 + 106 + $chunk_properties = array(); 107 + 108 + if ($hash) { 109 + $chunk_properties += array( 110 + 'chunkedHash' => $chunked_hash, 111 + ); 112 + } 113 + 114 + $file = $engine->allocateChunks($content_length, $chunk_properties); 115 + 116 + return array( 117 + 'upload' => true, 118 + 'filePHID' => $file->getPHID(), 119 + ); 120 + } 121 + } 122 + 123 + // None of the storage engines can accept this file. 124 + 125 + return array( 126 + 'upload' => false, 127 + 'filePHID' => null, 128 + ); 129 + } 130 + 131 + }
+100
src/applications/files/conduit/FileConduitAPIMethod.php
··· 6 6 return PhabricatorApplication::getByClass('PhabricatorFilesApplication'); 7 7 } 8 8 9 + protected function loadFileByPHID(PhabricatorUser $viewer, $file_phid) { 10 + $file = id(new PhabricatorFileQuery()) 11 + ->setViewer($viewer) 12 + ->withPHIDs(array($file_phid)) 13 + ->executeOne(); 14 + if (!$file) { 15 + throw new Exception(pht('No such file "%s"!', $file_phid)); 16 + } 17 + 18 + return $file; 19 + } 20 + 21 + protected function loadFileChunks( 22 + PhabricatorUser $viewer, 23 + PhabricatorFile $file) { 24 + return $this->newChunkQuery($viewer, $file) 25 + ->execute(); 26 + } 27 + 28 + protected function loadFileChunkForUpload( 29 + PhabricatorUser $viewer, 30 + PhabricatorFile $file, 31 + $start, 32 + $end) { 33 + 34 + $start = (int)$start; 35 + $end = (int)$end; 36 + 37 + $chunks = $this->newChunkQuery($viewer, $file) 38 + ->withByteRange($start, $end) 39 + ->execute(); 40 + 41 + if (!$chunks) { 42 + throw new Exception( 43 + pht( 44 + 'There are no file data chunks in byte range %d - %d.', 45 + $start, 46 + $end)); 47 + } 48 + 49 + if (count($chunks) !== 1) { 50 + phlog($chunks); 51 + throw new Exception( 52 + pht( 53 + 'There are multiple chunks in byte range %d - %d.', 54 + $start, 55 + $end)); 56 + } 57 + 58 + $chunk = head($chunks); 59 + if ($chunk->getByteStart() != $start) { 60 + throw new Exception( 61 + pht( 62 + 'Chunk start byte is %d, not %d.', 63 + $chunk->getByteStart(), 64 + $start)); 65 + } 66 + 67 + if ($chunk->getByteEnd() != $end) { 68 + throw new Exception( 69 + pht( 70 + 'Chunk end byte is %d, not %d.', 71 + $chunk->getByteEnd(), 72 + $end)); 73 + } 74 + 75 + if ($chunk->getDataFilePHID()) { 76 + throw new Exception( 77 + pht( 78 + 'Chunk has already been uploaded.')); 79 + } 80 + 81 + return $chunk; 82 + } 83 + 84 + protected function decodeBase64($data) { 85 + $data = base64_decode($data, $strict = true); 86 + if ($data === false) { 87 + throw new Exception(pht('Unable to decode base64 data!')); 88 + } 89 + return $data; 90 + } 91 + 92 + private function newChunkQuery( 93 + PhabricatorUser $viewer, 94 + PhabricatorFile $file) { 95 + 96 + $engine = $file->instantiateStorageEngine(); 97 + if (!$engine->isChunkEngine()) { 98 + throw new Exception( 99 + pht( 100 + 'File "%s" does not have chunks!', 101 + $file->getPHID())); 102 + } 103 + 104 + return id(new PhabricatorFileChunkQuery()) 105 + ->setViewer($viewer) 106 + ->withChunkHandles(array($file->getStorageHandle())); 107 + } 108 + 9 109 }
+47
src/applications/files/conduit/FileQueryChunksConduitAPIMethod.php
··· 1 + <?php 2 + 3 + final class FileQueryChunksConduitAPIMethod 4 + extends FileConduitAPIMethod { 5 + 6 + public function getAPIMethodName() { 7 + return 'file.querychunks'; 8 + } 9 + 10 + public function getMethodDescription() { 11 + return pht('Get information about file chunks.'); 12 + } 13 + 14 + public function defineParamTypes() { 15 + return array( 16 + 'filePHID' => 'phid', 17 + ); 18 + } 19 + 20 + public function defineReturnType() { 21 + return 'list<wild>'; 22 + } 23 + 24 + public function defineErrorTypes() { 25 + return array(); 26 + } 27 + 28 + protected function execute(ConduitAPIRequest $request) { 29 + $viewer = $request->getUser(); 30 + 31 + $file_phid = $request->getValue('filePHID'); 32 + $file = $this->loadFileByPHID($viewer, $file_phid); 33 + $chunks = $this->loadFileChunks($viewer, $file); 34 + 35 + $results = array(); 36 + foreach ($chunks as $chunk) { 37 + $results[] = array( 38 + 'byteStart' => $chunk->getByteStart(), 39 + 'byteEnd' => $chunk->getByteEnd(), 40 + 'complete' => (bool)$chunk->getDataFilePHID(), 41 + ); 42 + } 43 + 44 + return $results; 45 + } 46 + 47 + }
+74
src/applications/files/conduit/FileUploadChunkConduitAPIMethod.php
··· 1 + <?php 2 + 3 + final class FileUploadChunkConduitAPIMethod 4 + extends FileConduitAPIMethod { 5 + 6 + public function getAPIMethodName() { 7 + return 'file.uploadchunk'; 8 + } 9 + 10 + public function getMethodDescription() { 11 + return pht('Upload a chunk of file data to the server.'); 12 + } 13 + 14 + public function defineParamTypes() { 15 + return array( 16 + 'filePHID' => 'phid', 17 + 'byteStart' => 'int', 18 + 'data' => 'string', 19 + 'dataEncoding' => 'string', 20 + ); 21 + } 22 + 23 + public function defineReturnType() { 24 + return 'void'; 25 + } 26 + 27 + public function defineErrorTypes() { 28 + return array(); 29 + } 30 + 31 + protected function execute(ConduitAPIRequest $request) { 32 + $viewer = $request->getUser(); 33 + 34 + $file_phid = $request->getValue('filePHID'); 35 + $file = $this->loadFileByPHID($viewer, $file_phid); 36 + 37 + $start = $request->getValue('byteStart'); 38 + 39 + $data = $request->getValue('data'); 40 + $encoding = $request->getValue('dataEncoding'); 41 + switch ($encoding) { 42 + case 'base64': 43 + $data = $this->decodeBase64($data); 44 + break; 45 + case null: 46 + break; 47 + default: 48 + throw new Exception(pht('Unsupported data encoding.')); 49 + } 50 + $length = strlen($data); 51 + 52 + $chunk = $this->loadFileChunkForUpload( 53 + $viewer, 54 + $file, 55 + $start, 56 + $start + $length); 57 + 58 + // NOTE: These files have a view policy which prevents normal access. They 59 + // are only accessed through the storage engine. 60 + $file = PhabricatorFile::newFromFileData( 61 + $data, 62 + array( 63 + 'name' => $file->getMonogram().'.chunk-'.$chunk->getID(), 64 + 'viewPolicy' => PhabricatorPolicies::POLICY_NOONE, 65 + )); 66 + 67 + $chunk->setDataFilePHID($file->getPHID())->save(); 68 + 69 + // TODO: If all chunks are up, mark the file as complete. 70 + 71 + return null; 72 + } 73 + 74 + }
+1
src/applications/files/conduit/FileUploadHashConduitAPIMethod.php
··· 3 3 final class FileUploadHashConduitAPIMethod extends FileConduitAPIMethod { 4 4 5 5 public function getAPIMethodName() { 6 + // TODO: Deprecate this in favor of `file.allocate`. 6 7 return 'file.uploadhash'; 7 8 } 8 9
+61
src/applications/files/controller/PhabricatorFileInfoController.php
··· 295 295 296 296 $box->addPropertyList($media); 297 297 } 298 + 299 + $engine = null; 300 + try { 301 + $engine = $file->instantiateStorageEngine(); 302 + } catch (Exception $ex) { 303 + // Don't bother raising this anywhere for now. 304 + } 305 + 306 + if ($engine) { 307 + if ($engine->isChunkEngine()) { 308 + $chunkinfo = new PHUIPropertyListView(); 309 + $box->addPropertyList($chunkinfo, pht('Chunks')); 310 + 311 + $chunks = id(new PhabricatorFileChunkQuery()) 312 + ->setViewer($user) 313 + ->withChunkHandles(array($file->getStorageHandle())) 314 + ->execute(); 315 + $chunks = msort($chunks, 'getByteStart'); 316 + 317 + $rows = array(); 318 + $completed = array(); 319 + foreach ($chunks as $chunk) { 320 + $is_complete = $chunk->getDataFilePHID(); 321 + 322 + $rows[] = array( 323 + $chunk->getByteStart(), 324 + $chunk->getByteEnd(), 325 + ($is_complete ? pht('Yes') : pht('No')), 326 + ); 327 + 328 + if ($is_complete) { 329 + $completed[] = $chunk; 330 + } 331 + } 332 + 333 + $table = id(new AphrontTableView($rows)) 334 + ->setHeaders( 335 + array( 336 + pht('Offset'), 337 + pht('End'), 338 + pht('Complete'), 339 + )) 340 + ->setColumnClasses( 341 + array( 342 + '', 343 + '', 344 + 'wide', 345 + )); 346 + 347 + $chunkinfo->addProperty( 348 + pht('Total Chunks'), 349 + count($chunks)); 350 + 351 + $chunkinfo->addProperty( 352 + pht('Completed Chunks'), 353 + count($completed)); 354 + 355 + $chunkinfo->addRawContent($table); 356 + } 357 + } 358 + 298 359 } 299 360 300 361 }
+171
src/applications/files/engine/PhabricatorChunkedFileStorageEngine.php
··· 1 + <?php 2 + 3 + final class PhabricatorChunkedFileStorageEngine 4 + extends PhabricatorFileStorageEngine { 5 + 6 + public function getEngineIdentifier() { 7 + return 'chunks'; 8 + } 9 + 10 + public function getEnginePriority() { 11 + return 60000; 12 + } 13 + 14 + /** 15 + * We can write chunks if we have at least one valid storage engine 16 + * underneath us. 17 + * 18 + * This engine must not also be a chunk engine. 19 + */ 20 + public function canWriteFiles() { 21 + return (bool)$this->getWritableEngine(); 22 + } 23 + 24 + public function hasFilesizeLimit() { 25 + return false; 26 + } 27 + 28 + public function isChunkEngine() { 29 + return true; 30 + } 31 + 32 + public function isTestEngine() { 33 + // TODO: For now, prevent this from actually being selected. 34 + return true; 35 + } 36 + 37 + public function writeFile($data, array $params) { 38 + // The chunk engine does not support direct writes. 39 + throw new PhutilMethodNotImplementedException(); 40 + } 41 + 42 + public function readFile($handle) { 43 + // This is inefficient, but makes the API work as expected. 44 + $chunks = $this->loadAllChunks($handle, true); 45 + 46 + $buffer = ''; 47 + foreach ($chunks as $chunk) { 48 + $data_file = $chunk->getDataFile(); 49 + if (!$data_file) { 50 + throw new Exception(pht('This file data is incomplete!')); 51 + } 52 + 53 + $buffer .= $chunk->getDataFile()->loadFileData(); 54 + } 55 + 56 + return $buffer; 57 + } 58 + 59 + public function deleteFile($handle) { 60 + $engine = new PhabricatorDestructionEngine(); 61 + $chunks = $this->loadAllChunks($handle); 62 + foreach ($chunks as $chunk) { 63 + $engine->destroyObject($chunk); 64 + } 65 + } 66 + 67 + private function loadAllChunks($handle, $need_files) { 68 + $chunks = id(new PhabricatorFileChunkQuery()) 69 + ->setViewer(PhabricatorUser::getOmnipotentUser()) 70 + ->withChunkHandles(array($handle)) 71 + ->needDataFiles($need_files) 72 + ->execute(); 73 + 74 + $chunks = msort($chunks, 'getByteStart'); 75 + 76 + return $chunks; 77 + } 78 + 79 + /** 80 + * Compute a chunked file hash for the viewer. 81 + * 82 + * We can not currently compute a real hash for chunked file uploads (because 83 + * no process sees all of the file data). 84 + * 85 + * We also can not trust the hash that the user claims to have computed. If 86 + * we trust the user, they can upload some `evil.exe` and claim it has the 87 + * same file hash as `good.exe`. When another user later uploads the real 88 + * `good.exe`, we'll just create a reference to the existing `evil.exe`. Users 89 + * who download `good.exe` will then receive `evil.exe`. 90 + * 91 + * Instead, we rehash the user's claimed hash with account secrets. This 92 + * allows users to resume file uploads, but not collide with other users. 93 + * 94 + * Ideally, we'd like to be able to verify hashes, but this is complicated 95 + * and time consuming and gives us a fairly small benefit. 96 + * 97 + * @param PhabricatorUser Viewing user. 98 + * @param string Claimed file hash. 99 + * @return string Rehashed file hash. 100 + */ 101 + public static function getChunkedHash(PhabricatorUser $viewer, $hash) { 102 + if (!$viewer->getPHID()) { 103 + throw new Exception( 104 + pht('Unable to compute chunked hash without real viewer!')); 105 + } 106 + 107 + $input = $viewer->getAccountSecret().':'.$hash.':'.$viewer->getPHID(); 108 + return PhabricatorHash::digest($input); 109 + } 110 + 111 + public function allocateChunks($length, array $properties) { 112 + $file = PhabricatorFile::newChunkedFile($this, $length, $properties); 113 + 114 + $chunk_size = $this->getChunkSize(); 115 + 116 + $handle = $file->getStorageHandle(); 117 + 118 + $chunks = array(); 119 + for ($ii = 0; $ii < $length; $ii += $chunk_size) { 120 + $chunks[] = PhabricatorFileChunk::initializeNewChunk( 121 + $handle, 122 + $ii, 123 + min($ii + $chunk_size, $length)); 124 + } 125 + 126 + $file->openTransaction(); 127 + foreach ($chunks as $chunk) { 128 + $chunk->save(); 129 + } 130 + $file->save(); 131 + $file->saveTransaction(); 132 + 133 + return $file; 134 + } 135 + 136 + private function getWritableEngine() { 137 + // NOTE: We can't just load writable engines or we'll loop forever. 138 + $engines = PhabricatorFileStorageEngine::loadAllEngines(); 139 + 140 + foreach ($engines as $engine) { 141 + if ($engine->isChunkEngine()) { 142 + continue; 143 + } 144 + 145 + if ($engine->isTestEngine()) { 146 + continue; 147 + } 148 + 149 + if (!$engine->canWriteFiles()) { 150 + continue; 151 + } 152 + 153 + if ($engine->hasFilesizeLimit()) { 154 + if ($engine->getFilesizeLimit() < $this->getChunkSize()) { 155 + continue; 156 + } 157 + } 158 + 159 + return true; 160 + } 161 + 162 + return false; 163 + } 164 + 165 + private function getChunkSize() { 166 + // TODO: This is an artificially small size to make it easier to 167 + // test chunking. 168 + return 32; 169 + } 170 + 171 + }
+15
src/applications/files/engine/PhabricatorFileStorageEngine.php
··· 113 113 } 114 114 115 115 116 + /** 117 + * Identifies chunking storage engines. 118 + * 119 + * If this is a storage engine which splits files into chunks and stores the 120 + * chunks in other engines, it can return `true` to signal that other 121 + * chunking engines should not try to store data here. 122 + * 123 + * @return bool True if this is a chunk engine. 124 + * @task meta 125 + */ 126 + public function isChunkEngine() { 127 + return false; 128 + } 129 + 130 + 116 131 /* -( Managing File Data )------------------------------------------------- */ 117 132 118 133
+116
src/applications/files/query/PhabricatorFileChunkQuery.php
··· 1 + <?php 2 + 3 + final class PhabricatorFileChunkQuery 4 + extends PhabricatorCursorPagedPolicyAwareQuery { 5 + 6 + private $chunkHandles; 7 + private $rangeStart; 8 + private $rangeEnd; 9 + private $needDataFiles; 10 + 11 + public function withChunkHandles(array $handles) { 12 + $this->chunkHandles = $handles; 13 + return $this; 14 + } 15 + 16 + public function withByteRange($start, $end) { 17 + $this->rangeStart = $start; 18 + $this->rangeEnd = $end; 19 + return $this; 20 + } 21 + 22 + public function needDataFiles($need) { 23 + $this->needDataFiles = $need; 24 + return $this; 25 + } 26 + 27 + protected function loadPage() { 28 + $table = new PhabricatorFileChunk(); 29 + $conn_r = $table->establishConnection('r'); 30 + 31 + $data = queryfx_all( 32 + $conn_r, 33 + 'SELECT * FROM %T %Q %Q %Q', 34 + $table->getTableName(), 35 + $this->buildWhereClause($conn_r), 36 + $this->buildOrderClause($conn_r), 37 + $this->buildLimitClause($conn_r)); 38 + 39 + return $table->loadAllFromArray($data); 40 + } 41 + 42 + protected function willFilterPage(array $chunks) { 43 + 44 + if ($this->needDataFiles) { 45 + $file_phids = mpull($chunks, 'getDataFilePHID'); 46 + $file_phids = array_filter($file_phids); 47 + if ($file_phids) { 48 + $files = id(new PhabricatorFileQuery()) 49 + ->setViewer($this->getViewer()) 50 + ->setParentQuery($this) 51 + ->withPHIDs($file_phids) 52 + ->execute(); 53 + $files = mpull($files, null, 'getPHID'); 54 + } else { 55 + $files = array(); 56 + } 57 + 58 + foreach ($chunks as $key => $chunk) { 59 + $data_phid = $chunk->getDataFilePHID(); 60 + if (!$data_phid) { 61 + $chunk->attachDataFile(null); 62 + continue; 63 + } 64 + 65 + $file = idx($files, $data_phid); 66 + if (!$file) { 67 + unset($chunks[$key]); 68 + $this->didRejectResult($chunk); 69 + continue; 70 + } 71 + 72 + $chunk->attachDataFile($file); 73 + } 74 + 75 + if (!$chunks) { 76 + return $chunks; 77 + } 78 + } 79 + 80 + return $chunks; 81 + } 82 + 83 + private function buildWhereClause(AphrontDatabaseConnection $conn_r) { 84 + $where = array(); 85 + 86 + if ($this->chunkHandles !== null) { 87 + $where[] = qsprintf( 88 + $conn_r, 89 + 'chunkHandle IN (%Ls)', 90 + $this->chunkHandles); 91 + } 92 + 93 + if ($this->rangeStart !== null) { 94 + $where[] = qsprintf( 95 + $conn_r, 96 + 'byteEnd > %d', 97 + $this->rangeStart); 98 + } 99 + 100 + if ($this->rangeEnd !== null) { 101 + $where[] = qsprintf( 102 + $conn_r, 103 + 'byteStart < %d', 104 + $this->rangeEnd); 105 + } 106 + 107 + $where[] = $this->buildPagingClause($conn_r); 108 + 109 + return $this->formatWhereClause($where); 110 + } 111 + 112 + public function getQueryApplicationClass() { 113 + return 'PhabricatorFilesApplication'; 114 + } 115 + 116 + }
+41
src/applications/files/storage/PhabricatorFile.php
··· 33 33 const METADATA_IMAGE_HEIGHT = 'height'; 34 34 const METADATA_CAN_CDN = 'canCDN'; 35 35 const METADATA_BUILTIN = 'builtin'; 36 + const METADATA_PARTIAL = 'partial'; 36 37 37 38 protected $name; 38 39 protected $mimeType; ··· 260 261 261 262 return $new_file; 262 263 } 264 + 265 + return $file; 266 + } 267 + 268 + public static function newChunkedFile( 269 + PhabricatorFileStorageEngine $engine, 270 + $length, 271 + array $params) { 272 + 273 + $file = PhabricatorFile::initializeNewFile(); 274 + 275 + $file->setByteSize($length); 276 + 277 + // TODO: We might be able to test the first chunk in order to figure 278 + // this out more reliably, since MIME detection usually examines headers. 279 + // However, enormous files are probably always either actually raw data 280 + // or reasonable to treat like raw data. 281 + $file->setMimeType('application/octet-stream'); 282 + 283 + $chunked_hash = idx($params, 'chunkedHash'); 284 + if ($chunked_hash) { 285 + $file->setContentHash($chunked_hash); 286 + } else { 287 + // See PhabricatorChunkedFileStorageEngine::getChunkedHash() for some 288 + // discussion of this. 289 + $file->setContentHash( 290 + PhabricatorHash::digest( 291 + Filesystem::readRandomBytes(64))); 292 + } 293 + 294 + $file->setStorageEngine($engine->getEngineIdentifier()); 295 + $file->setStorageHandle(PhabricatorFileChunk::newChunkHandle()); 296 + $file->setStorageFormat(self::STORAGE_FORMAT_RAW); 297 + 298 + $file->readPropertiesFromParameters($params); 263 299 264 300 return $file; 265 301 } ··· 1132 1168 return id(new AphrontRedirectResponse()) 1133 1169 ->setIsExternal($is_external) 1134 1170 ->setURI($uri); 1171 + } 1172 + 1173 + public function isPartial() { 1174 + // TODO: Placeholder for resumable uploads. 1175 + return false; 1135 1176 } 1136 1177 1137 1178
+105
src/applications/files/storage/PhabricatorFileChunk.php
··· 1 + <?php 2 + 3 + final class PhabricatorFileChunk extends PhabricatorFileDAO 4 + implements 5 + PhabricatorPolicyInterface, 6 + PhabricatorDestructibleInterface { 7 + 8 + protected $chunkHandle; 9 + protected $byteStart; 10 + protected $byteEnd; 11 + protected $dataFilePHID; 12 + 13 + private $dataFile = self::ATTACHABLE; 14 + 15 + protected function getConfiguration() { 16 + return array( 17 + self::CONFIG_TIMESTAMPS => false, 18 + self::CONFIG_COLUMN_SCHEMA => array( 19 + 'chunkHandle' => 'bytes12', 20 + 'byteStart' => 'uint64', 21 + 'byteEnd' => 'uint64', 22 + 'dataFilePHID' => 'phid?', 23 + ), 24 + self::CONFIG_KEY_SCHEMA => array( 25 + 'key_file' => array( 26 + 'columns' => array('chunkHandle', 'byteStart', 'byteEnd'), 27 + ), 28 + 'key_data' => array( 29 + 'columns' => array('dataFilePHID'), 30 + ), 31 + ), 32 + ) + parent::getConfiguration(); 33 + } 34 + 35 + public static function newChunkHandle() { 36 + $seed = Filesystem::readRandomBytes(64); 37 + return PhabricatorHash::digestForIndex($seed); 38 + } 39 + 40 + public static function initializeNewChunk($handle, $start, $end) { 41 + return id(new PhabricatorFileChunk()) 42 + ->setChunkHandle($handle) 43 + ->setByteStart($start) 44 + ->setByteEnd($end); 45 + } 46 + 47 + public function attachDataFile(PhabricatorFile $file = null) { 48 + $this->dataFile = $file; 49 + return $this; 50 + } 51 + 52 + public function getDataFile() { 53 + return $this->assertAttached($this->dataFile); 54 + } 55 + 56 + 57 + /* -( PhabricatorPolicyInterface )----------------------------------------- */ 58 + 59 + 60 + public function getCapabilities() { 61 + return array( 62 + PhabricatorPolicyCapability::CAN_VIEW, 63 + ); 64 + } 65 + 66 + 67 + public function getPolicy($capability) { 68 + // These objects are low-level and only accessed through the storage 69 + // engine, so policies are mostly just in place to let us use the common 70 + // query infrastructure. 71 + return PhabricatorPolicies::getMostOpenPolicy(); 72 + } 73 + 74 + 75 + public function hasAutomaticCapability($capability, PhabricatorUser $viewer) { 76 + return false; 77 + } 78 + 79 + 80 + public function describeAutomaticCapability($capability) { 81 + return null; 82 + } 83 + 84 + 85 + /* -( PhabricatorDestructibleInterface )----------------------------------- */ 86 + 87 + 88 + public function destroyObjectPermanently( 89 + PhabricatorDestructionEngine $engine) { 90 + 91 + $data_phid = $this->getDataFilePHID(); 92 + if ($data_phid) { 93 + $data_file = id(new PhabricatorFileQuery()) 94 + ->setViewer(PhabricatorUser::getOmnipotentUser()) 95 + ->withPHIDs(array($data_phid)) 96 + ->executeOne(); 97 + if ($data_file) { 98 + $engine->destroyObject($data_file); 99 + } 100 + } 101 + 102 + $this->delete(); 103 + } 104 + 105 + }