Build Reactive Signals for Bluesky's AT Protocol Firehose in Laravel

Add CBOR decoder with IEEE 754 float support

Changed files
+485
src
CBOR
Core
tests
+286
src/CBOR/Decoder.php
··· 1 + <?php 2 + 3 + declare(strict_types=1); 4 + 5 + namespace SocialDept\Signal\CBOR; 6 + 7 + use RuntimeException; 8 + use SocialDept\Signal\Binary\Reader; 9 + use SocialDept\Signal\Core\CID; 10 + 11 + /** 12 + * CBOR (Concise Binary Object Representation) decoder. 13 + * 14 + * Implements RFC 8949 CBOR with DAG-CBOR extensions for IPLD. 15 + * Supports tag 42 for CID links. 16 + */ 17 + class Decoder 18 + { 19 + private const MAJOR_TYPE_UNSIGNED = 0; 20 + 21 + private const MAJOR_TYPE_NEGATIVE = 1; 22 + 23 + private const MAJOR_TYPE_BYTES = 2; 24 + 25 + private const MAJOR_TYPE_TEXT = 3; 26 + 27 + private const MAJOR_TYPE_ARRAY = 4; 28 + 29 + private const MAJOR_TYPE_MAP = 5; 30 + 31 + private const MAJOR_TYPE_TAG = 6; 32 + 33 + private const MAJOR_TYPE_SPECIAL = 7; 34 + 35 + private const TAG_CID = 42; 36 + 37 + private Reader $reader; 38 + 39 + public function __construct(string $data) 40 + { 41 + $this->reader = new Reader($data); 42 + } 43 + 44 + /** 45 + * Decode the next CBOR item. 46 + * 47 + * @return mixed Decoded value 48 + * 49 + * @throws RuntimeException If data is malformed 50 + */ 51 + public function decode(): mixed 52 + { 53 + if (! $this->reader->hasMore()) { 54 + throw new RuntimeException('Unexpected end of CBOR data'); 55 + } 56 + 57 + $initialByte = $this->reader->readByte(); 58 + $majorType = $initialByte >> 5; 59 + $additionalInfo = $initialByte & 0x1F; 60 + 61 + return match ($majorType) { 62 + self::MAJOR_TYPE_UNSIGNED => $this->decodeUnsigned($additionalInfo), 63 + self::MAJOR_TYPE_NEGATIVE => $this->decodeNegative($additionalInfo), 64 + self::MAJOR_TYPE_BYTES => $this->decodeBytes($additionalInfo), 65 + self::MAJOR_TYPE_TEXT => $this->decodeText($additionalInfo), 66 + self::MAJOR_TYPE_ARRAY => $this->decodeArray($additionalInfo), 67 + self::MAJOR_TYPE_MAP => $this->decodeMap($additionalInfo), 68 + self::MAJOR_TYPE_TAG => $this->decodeTag($additionalInfo), 69 + self::MAJOR_TYPE_SPECIAL => $this->decodeSpecial($additionalInfo), 70 + default => throw new RuntimeException("Unknown major type: {$majorType}"), 71 + }; 72 + } 73 + 74 + /** 75 + * Check if there's more data to decode. 76 + */ 77 + public function hasMore(): bool 78 + { 79 + return $this->reader->hasMore(); 80 + } 81 + 82 + /** 83 + * Get current position. 84 + */ 85 + public function getPosition(): int 86 + { 87 + return $this->reader->getPosition(); 88 + } 89 + 90 + /** 91 + * Decode unsigned integer. 92 + */ 93 + private function decodeUnsigned(int $additionalInfo): int 94 + { 95 + return $this->decodeLength($additionalInfo); 96 + } 97 + 98 + /** 99 + * Decode negative integer. 100 + */ 101 + private function decodeNegative(int $additionalInfo): int 102 + { 103 + $value = $this->decodeLength($additionalInfo); 104 + 105 + return -1 - $value; 106 + } 107 + 108 + /** 109 + * Decode byte string. 110 + */ 111 + private function decodeBytes(int $additionalInfo): string 112 + { 113 + $length = $this->decodeLength($additionalInfo); 114 + 115 + return $this->reader->readBytes($length); 116 + } 117 + 118 + /** 119 + * Decode text string. 120 + */ 121 + private function decodeText(int $additionalInfo): string 122 + { 123 + $length = $this->decodeLength($additionalInfo); 124 + 125 + return $this->reader->readBytes($length); 126 + } 127 + 128 + /** 129 + * Decode array. 130 + */ 131 + private function decodeArray(int $additionalInfo): array 132 + { 133 + $length = $this->decodeLength($additionalInfo); 134 + $array = []; 135 + 136 + for ($i = 0; $i < $length; $i++) { 137 + $array[] = $this->decode(); 138 + } 139 + 140 + return $array; 141 + } 142 + 143 + /** 144 + * Decode map (object). 145 + */ 146 + private function decodeMap(int $additionalInfo): array 147 + { 148 + $length = $this->decodeLength($additionalInfo); 149 + $map = []; 150 + 151 + for ($i = 0; $i < $length; $i++) { 152 + $key = $this->decode(); 153 + $value = $this->decode(); 154 + 155 + if (! is_string($key) && ! is_int($key)) { 156 + throw new RuntimeException('Map keys must be strings or integers'); 157 + } 158 + 159 + $map[$key] = $value; 160 + } 161 + 162 + return $map; 163 + } 164 + 165 + /** 166 + * Decode tagged value. 167 + */ 168 + private function decodeTag(int $additionalInfo): mixed 169 + { 170 + $tag = $this->decodeLength($additionalInfo); 171 + 172 + if ($tag === self::TAG_CID) { 173 + // Tag 42 = CID link (DAG-CBOR) 174 + // Next item should be byte string containing CID 175 + $cidBytes = $this->decode(); 176 + 177 + if (! is_string($cidBytes)) { 178 + throw new RuntimeException('CID tag must be followed by byte string'); 179 + } 180 + 181 + // First byte should be 0x00 for CID 182 + if (ord($cidBytes[0]) !== 0x00) { 183 + throw new RuntimeException('Invalid CID byte string prefix'); 184 + } 185 + 186 + return CID::fromBinary(substr($cidBytes, 1)); 187 + } 188 + 189 + // For other tags, just return the tagged value 190 + return $this->decode(); 191 + } 192 + 193 + /** 194 + * Decode special values (bool, null, floats). 195 + */ 196 + private function decodeSpecial(int $additionalInfo): mixed 197 + { 198 + return match ($additionalInfo) { 199 + 20 => false, 200 + 21 => true, 201 + 22 => null, 202 + 23 => throw new RuntimeException('Undefined special value'), 203 + 25 => $this->decodeFloat16(), // IEEE 754 Half-Precision (16-bit) 204 + 26 => $this->decodeFloat32(), // IEEE 754 Single-Precision (32-bit) 205 + 27 => $this->decodeFloat64(), // IEEE 754 Double-Precision (64-bit) 206 + default => throw new RuntimeException("Unsupported special value: {$additionalInfo}"), 207 + }; 208 + } 209 + 210 + /** 211 + * Decode IEEE 754 half-precision float (16-bit). 212 + */ 213 + private function decodeFloat16(): float 214 + { 215 + $bytes = $this->reader->readBytes(2); 216 + $bits = unpack('n', $bytes)[1]; 217 + 218 + // Extract sign, exponent, and mantissa 219 + $sign = ($bits >> 15) & 1; 220 + $exponent = ($bits >> 10) & 0x1F; 221 + $mantissa = $bits & 0x3FF; 222 + 223 + // Handle special cases 224 + if ($exponent === 0) { 225 + // Subnormal or zero 226 + $value = $mantissa / 1024.0 * (2 ** -14); 227 + } elseif ($exponent === 31) { 228 + // Infinity or NaN 229 + return $mantissa === 0 ? ($sign ? -INF : INF) : NAN; 230 + } else { 231 + // Normalized value 232 + $value = (1 + $mantissa / 1024.0) * (2 ** ($exponent - 15)); 233 + } 234 + 235 + return $sign ? -$value : $value; 236 + } 237 + 238 + /** 239 + * Decode IEEE 754 single-precision float (32-bit). 240 + */ 241 + private function decodeFloat32(): float 242 + { 243 + $bytes = $this->reader->readBytes(4); 244 + 245 + return unpack('G', $bytes)[1]; // Big-endian float 246 + } 247 + 248 + /** 249 + * Decode IEEE 754 double-precision float (64-bit). 250 + */ 251 + private function decodeFloat64(): float 252 + { 253 + $bytes = $this->reader->readBytes(8); 254 + 255 + return unpack('E', $bytes)[1]; // Big-endian double 256 + } 257 + 258 + /** 259 + * Decode length/value from additional info. 260 + */ 261 + private function decodeLength(int $additionalInfo): int 262 + { 263 + if ($additionalInfo < 24) { 264 + return $additionalInfo; 265 + } 266 + 267 + return match ($additionalInfo) { 268 + 24 => $this->reader->readByte(), 269 + 25 => unpack('n', $this->reader->readBytes(2))[1], 270 + 26 => unpack('N', $this->reader->readBytes(4))[1], 271 + 27 => $this->readUint64(), 272 + default => throw new RuntimeException("Invalid additional info: {$additionalInfo}"), 273 + }; 274 + } 275 + 276 + /** 277 + * Read 64-bit unsigned integer. 278 + */ 279 + private function readUint64(): int 280 + { 281 + $bytes = $this->reader->readBytes(8); 282 + $unpacked = unpack('J', $bytes)[1]; 283 + 284 + return $unpacked; 285 + } 286 + }
+64
src/Core/CBOR.php
··· 1 + <?php 2 + 3 + declare(strict_types=1); 4 + 5 + namespace SocialDept\Signal\Core; 6 + 7 + use SocialDept\Signal\CBOR\Decoder; 8 + 9 + /** 10 + * CBOR facade for simple decoding operations. 11 + * 12 + * Provides static methods matching the interface needed by FirehoseConsumer. 13 + */ 14 + class CBOR 15 + { 16 + /** 17 + * Decode first CBOR item and return remainder. 18 + * 19 + * @param string $data Binary CBOR data 20 + * @return array{0: mixed, 1: string} [decoded value, remaining data] 21 + */ 22 + public static function decodeFirst(string $data): array 23 + { 24 + $decoder = new Decoder($data); 25 + $value = $decoder->decode(); 26 + 27 + // Calculate remaining data based on decoder position 28 + $position = $decoder->getPosition(); 29 + $remainder = substr($data, $position); 30 + 31 + return [$value, $remainder]; 32 + } 33 + 34 + /** 35 + * Decode complete CBOR data. 36 + * 37 + * @param string $data Binary CBOR data 38 + * @return mixed Decoded value 39 + */ 40 + public static function decode(string $data): mixed 41 + { 42 + $decoder = new Decoder($data); 43 + 44 + return $decoder->decode(); 45 + } 46 + 47 + /** 48 + * Decode all CBOR items from data. 49 + * 50 + * @param string $data Binary CBOR data 51 + * @return array All decoded values 52 + */ 53 + public static function decodeAll(string $data): array 54 + { 55 + $decoder = new Decoder($data); 56 + $items = []; 57 + 58 + while ($decoder->hasMore()) { 59 + $items[] = $decoder->decode(); 60 + } 61 + 62 + return $items; 63 + } 64 + }
+135
tests/Unit/CBORTest.php
··· 1 + <?php 2 + 3 + declare(strict_types=1); 4 + 5 + namespace SocialDept\Signal\Tests\Unit; 6 + 7 + use PHPUnit\Framework\TestCase; 8 + use SocialDept\Signal\Core\CBOR; 9 + use SocialDept\Signal\Core\CID; 10 + 11 + class CBORTest extends TestCase 12 + { 13 + public function test_decode_unsigned_integers(): void 14 + { 15 + // Small value (0-23) 16 + $this->assertSame(0, CBOR::decode("\x00")); 17 + $this->assertSame(1, CBOR::decode("\x01")); 18 + $this->assertSame(23, CBOR::decode("\x17")); 19 + 20 + // 1-byte value 21 + $this->assertSame(24, CBOR::decode("\x18\x18")); 22 + $this->assertSame(255, CBOR::decode("\x18\xFF")); 23 + 24 + // 2-byte value 25 + $this->assertSame(256, CBOR::decode("\x19\x01\x00")); 26 + $this->assertSame(1000, CBOR::decode("\x19\x03\xE8")); 27 + } 28 + 29 + public function test_decode_negative_integers(): void 30 + { 31 + // -1 is encoded as 0x20 (major type 1, value 0) 32 + $this->assertSame(-1, CBOR::decode("\x20")); 33 + 34 + // -10 is encoded as 0x29 (major type 1, value 9) 35 + $this->assertSame(-10, CBOR::decode("\x29")); 36 + 37 + // -100 is encoded as 0x38 0x63 (major type 1, 1-byte value 99) 38 + $this->assertSame(-100, CBOR::decode("\x38\x63")); 39 + } 40 + 41 + public function test_decode_byte_strings(): void 42 + { 43 + // Empty byte string 44 + $this->assertSame('', CBOR::decode("\x40")); 45 + 46 + // 4-byte string 47 + $this->assertSame("\x01\x02\x03\x04", CBOR::decode("\x44\x01\x02\x03\x04")); 48 + } 49 + 50 + public function test_decode_text_strings(): void 51 + { 52 + // Empty text string 53 + $this->assertSame('', CBOR::decode("\x60")); 54 + 55 + // "hello" 56 + $this->assertSame('hello', CBOR::decode("\x65hello")); 57 + 58 + // "IETF" 59 + $this->assertSame('IETF', CBOR::decode("\x64IETF")); 60 + } 61 + 62 + public function test_decode_arrays(): void 63 + { 64 + // Empty array 65 + $this->assertSame([], CBOR::decode("\x80")); 66 + 67 + // [1, 2, 3] 68 + $this->assertSame([1, 2, 3], CBOR::decode("\x83\x01\x02\x03")); 69 + 70 + // Mixed array [1, "two", 3] 71 + $result = CBOR::decode("\x83\x01\x63two\x03"); 72 + $this->assertSame([1, 'two', 3], $result); 73 + } 74 + 75 + public function test_decode_maps(): void 76 + { 77 + // Empty map 78 + $this->assertSame([], CBOR::decode("\xA0")); 79 + 80 + // {"a": 1, "b": 2} 81 + $result = CBOR::decode("\xA2\x61a\x01\x61b\x02"); 82 + $this->assertSame(['a' => 1, 'b' => 2], $result); 83 + } 84 + 85 + public function test_decode_special_values(): void 86 + { 87 + // false 88 + $this->assertFalse(CBOR::decode("\xF4")); 89 + 90 + // true 91 + $this->assertTrue(CBOR::decode("\xF5")); 92 + 93 + // null 94 + $this->assertNull(CBOR::decode("\xF6")); 95 + } 96 + 97 + public function test_decode_first_returns_value_and_remainder(): void 98 + { 99 + [$value, $remainder] = CBOR::decodeFirst("\x01\x02\x03"); 100 + 101 + $this->assertSame(1, $value); 102 + $this->assertSame("\x02\x03", $remainder); 103 + } 104 + 105 + public function test_decode_nested_structures(): void 106 + { 107 + // {"key": [1, 2, {"inner": true}]} 108 + $cbor = "\xA1\x63key\x83\x01\x02\xA1\x65inner\xF5"; 109 + $result = CBOR::decode($cbor); 110 + 111 + $expected = [ 112 + 'key' => [1, 2, ['inner' => true]], 113 + ]; 114 + 115 + $this->assertSame($expected, $result); 116 + } 117 + 118 + public function test_decode_cid_tag(): void 119 + { 120 + // Tag 42 (CID) followed by byte string with CID data 121 + // CID bytes: 0x00 prefix + version + codec + multihash 122 + $cidBinary = "\x01\x71\x12\x20" . str_repeat("\x00", 32); // version 1, codec 0x71, sha256, 32 zero bytes 123 + $cidBytes = "\x00" . $cidBinary; // Add 0x00 prefix for CBOR tag 42 124 + 125 + // CBOR: tag 42 (0xD8 0x2A) + byte string with 1-byte length (0x58 = major type 2, additional info 24) 126 + $length = strlen($cidBytes); 127 + $cbor = "\xD8\x2A\x58" . chr($length) . $cidBytes; 128 + 129 + $result = CBOR::decode($cbor); 130 + 131 + $this->assertInstanceOf(CID::class, $result); 132 + $this->assertSame(1, $result->version); 133 + $this->assertSame(0x71, $result->codec); 134 + } 135 + }