Absolute hinky bare-bones implementation of multiformats in Perl

added CID parsing

+6
.gitignore
··· 1 + .build/ 2 + build/ 3 + Multiformats-* 4 + .sw? 5 + .*.sw? 6 + package-version.txt
+3 -3
lib/Multiformats.pm
··· 1 1 package Multiformats; 2 2 use strict; 3 3 4 - # ABSTRACT: Implementation of multiformats as per https://multiformats.io/ 4 + # ABSTRACT: Implementation of several multiformats as per https://multiformats.io/ for use with ATprotocol 5 5 6 6 # VERSION 7 7 8 - # this package exists purely as a little placeholder for various abstracts and versions; 9 - # the real meat is in the various sub-modules 8 + # this package exists purely as a little placeholder for various abstracts and versions; as well as some 9 + # of the documentation 10 10 11 11 1;
+70
lib/Multiformats/CID.pm
··· 1 + package 2 + Multiformats::CID { 3 + 4 + use feature 'signatures'; 5 + use Exporter 'import'; 6 + our @EXPORT_OK = qw/cid/; 7 + use Multiformats::Varint qw/varint_decode_raw/; 8 + use Multiformats::Multicodec qw/multicodec_get_codec multicodec_wrap/; 9 + use Multiformats::Multibase qw/multibase_decode/; 10 + use Multiformats::Multihash qw/multihash_unwrap/; 11 + 12 + sub cid($bytes) { 13 + utf8::downgrade($bytes, 1); 14 + 15 + # so a v0 and v1 cid in binary should start with either 0x00 or 0x01 - if that isn't the case 16 + # assume we have a string cid 17 + if(substr($bytes, 0, 1) ne "\0" && substr($bytes, 0, 1) ne "\1") { 18 + my $binary = multibase_decode($bytes); 19 + return cid_from_binary($binary); 20 + } else { 21 + # binary 22 + return cid_from_binary($bytes); 23 + } 24 + } 25 + 26 + sub cid_from_binary($bytes) { 27 + utf8::downgrade($bytes, 1); 28 + my ($version, $bread) = varint_decode_raw($bytes); 29 + die 'Unsupported CID version ', $version, ', ' unless $version == 1; 30 + 31 + my ($mc_codec, $bread_codec) = varint_decode_raw(substr($bytes, $bread)); 32 + 33 + my $mc = Multiformats::Multicodec::_get_by_tag($mc_codec); 34 + 35 + # not sure what that codec tag does in here because it doesn't appear to do 36 + # anything short of encoding, well, nothing - the remaining data is the multihash 37 + my ($mh, $hash) = multihash_unwrap(substr($bytes, $bread + $bread_codec)); 38 + 39 + return Multiformats::CID::CIDv1->new(version => 1, codec => $mc->[0], hash_function => $mh->[0], hash => $hash); 40 + } 41 + } 42 + 43 + package 44 + Multiformats::CID::CIDv1 { 45 + use Mojo::Base -base, -signatures; 46 + use Multiformats::Multicodec qw/multicodec_wrap multicodec_unwrap/; 47 + use Multiformats::Multibase qw/multibase_encode/; 48 + use Multiformats::Varint qw/varint_encode/; 49 + use Multiformats::Multihash qw/multihash_wrap/; 50 + use overload bool => sub {1}, '""' => sub { shift->to_str }, fallback => 1; 51 + 52 + # note that the codecs are the tag values, not the names, we need to take this into account 53 + # in multibase_encode and multihash_encode 54 + has [qw/version codec hash_function hash/] => undef; 55 + 56 + sub to_str($self, $codec = 'base32') { 57 + return multibase_encode($codec, $self->to_bytes); 58 + } 59 + 60 + sub to_bytes($self) { 61 + my $hash = multihash_wrap($self->hash_function, $self->hash); 62 + my $content = multicodec_wrap($self->codec, $hash); 63 + my $version = varint_encode($self->version); 64 + return $version . $content; 65 + } 66 + } 67 + 68 + 1; 69 + 70 +
+42 -19
lib/Multiformats/Multibase.pm
··· 22 22 return bless({}, $pkg); 23 23 } 24 24 25 - # these 2 maps map the actual encoding and decoding 26 - # to a subroutine that takes the to be decoded/encoded values as first argument 27 - # please note that only a few formats are implemented by default 28 - use constant MB_ENCODE_MAP => { 29 - 'none' => sub { return "\0" . shift }, 30 - 'base32' => sub { return 'b' . encode_b32r(shift) }, 31 - 'base36' => sub { return 'k' . encode_base36(shift) }, 32 - 'base58btc' => sub { return 'z' . encode_b58b(shift) }, 33 - }; 25 + # this map holds the various encodings and decodings 26 + use constant MB_MAP => [ 27 + [ 'none', "\0", sub { return shift }, sub { return shift } ], 28 + [ 'base32', 'b', sub { return lc(encode_b32r(shift)) }, sub { return decode_b32r(uc(shift)) } ], 29 + [ 'base32upper','B', sub { return encode_b32r(shift) }, sub { return decode_b32r(shift) } ], 30 + [ 'base36', 'k', sub { return lc(encode_base36(shift)) }, sub { return decode_base36(shift) } ], 31 + [ 'base58btc', 'z', sub { return encode_b58b(shift) }, sub { return decode_b58b(shift) } ], 32 + ]; 33 + 34 + sub _map_by_tag($tag) { 35 + foreach my $entry (@{__PACKAGE__->MB_MAP}) { 36 + return $entry if($entry->[1] eq $tag); 37 + } 38 + return undef; 39 + } 34 40 35 - use constant MB_DECODE_MAP => { 36 - "\0" => sub { return shift }, 37 - 'b' => sub { return decode_b32r(shift) }, 38 - 'k' => sub { return decode_base36(shift) }, 39 - 'z' => sub { return decode_b58b(shift) }, 40 - }; 41 + sub _map_by_name($name) { 42 + if(length($name) == 1) { 43 + return _map_by_tag($name); 44 + } else { 45 + foreach my $entry (@{__PACKAGE__->MB_MAP}) { 46 + return $entry if($entry->[0] eq $name); 47 + } 48 + return undef; 49 + } 50 + } 41 51 42 52 sub multibase_decode($bytes) { 53 + # make sure it's actual bytes 54 + utf8::downgrade($bytes, 1); 43 55 my $t = substr($bytes, 0, 1); 44 - die 'unknown format ' . $t . ', ' unless exists MB_DECODE_MAP->{$t}; 45 - return MB_DECODE_MAP->{$t}->(substr($bytes, 1)); 56 + if(my $e = _map_by_tag($t)) { 57 + my $decoded = $e->[3]->(substr($bytes, 1)); 58 + return wantarray 59 + ? ($t, $decoded) 60 + : $decoded; 61 + } else { 62 + die 'unknown format ' . $t . ', '; 63 + } 46 64 } 47 65 48 66 sub multibase_encode($as, $bytes) { 49 - die 'unknown format ' . $as . ', ' unless exists MB_ENCODE_MAP->{$as}; 50 - return MB_ENCODE_MAP->{$as}->($bytes); 67 + utf8::downgrade($bytes, 1); 68 + if(my $e = _map_by_name($as)) { 69 + my $encoded = $e->[1] . $e->[2]->($bytes); 70 + return $encoded; 71 + } else { 72 + die 'unknown format ' . $as . ', '; 73 + } 51 74 } 52 75 } 53 76
+58
lib/Multiformats/Multicodec.pm
··· 1 + package 2 + Multiformats::Multicodec { 3 + use strict; 4 + use warnings; 5 + use feature 'signatures'; 6 + use Multiformats::Varint qw/varint_decode_raw varint_encode/; 7 + 8 + use Exporter 'import'; 9 + our @EXPORT_OK = qw/multicodec_wrap multicodec_unwrap multicodec_get_codec/; 10 + 11 + use constant MULTICODEC_MAP => [ 12 + [ 'raw', 0x55 ], 13 + [ 'dag-cbor', 0x71 ], 14 + ]; 15 + 16 + sub _get_by_name($as) { 17 + foreach my $entry (@{__PACKAGE__->MULTICODEC_MAP}) { 18 + return $entry if($entry->[0] eq $as); 19 + } 20 + return _get_by_tag($as); 21 + } 22 + 23 + sub _get_by_tag($tag) { 24 + foreach my $entry (@{__PACKAGE__->MULTICODEC_MAP}) { 25 + return $entry if($entry->[1] == $tag); 26 + } 27 + return undef; 28 + } 29 + 30 + sub multicodec_wrap($as, $value) { 31 + utf8::downgrade($value, 1); 32 + if(my $e = _get_by_name($as)) { 33 + my $id = varint_encode($e->[1]); 34 + return $id . $value; 35 + } else { 36 + die 'Unsupported multicodec type ', $as, ', '; 37 + } 38 + } 39 + 40 + sub multicodec_unwrap($value) { 41 + utf8::downgrade($value, 1); 42 + my ($id, $bytes) = varint_decode_raw($value); 43 + return substr($value, $bytes); 44 + 45 + } 46 + 47 + sub multicodec_get_codec($value) { 48 + utf8::downgrade($value, 1); 49 + my ($id, $bytes) = varint_decode_raw($value); 50 + if(my $e = _get_by_tag($id)) { 51 + return $e; 52 + } else { 53 + die 'Unsupported multicodec type ', $id, ', '; 54 + } 55 + } 56 + } 57 + 58 + 1;
+103
lib/Multiformats/Multihash.pm
··· 1 + package 2 + Multiformats::Multihash { 3 + use strict; 4 + use warnings; 5 + use feature 'signatures'; 6 + 7 + use Exporter 'import'; 8 + our @EXPORT_OK = qw/multihash_encode multihash_decode multihash_wrap multihash_unwrap/; 9 + 10 + use Digest::SHA qw/sha1 sha256 sha384 sha512/; # SHA2 11 + use Digest::SHA3 qw/sha3_224 sha3_384 sha3_256/; 12 + use Multiformats::Varint qw/varint_decode_raw varint_encode/; 13 + 14 + sub decode($self, $value) { 15 + return multihash_decode($value); 16 + } 17 + 18 + sub encode($self, $as, $value) { 19 + return multihash_encode($as, $value); 20 + } 21 + 22 + sub new($pkg) { 23 + return bless({}, $pkg); 24 + } 25 + 26 + # this map holds the various encodings and decodings 27 + use constant MULTIFORMAT_MAP => [ 28 + [ 'identity', 0x00, undef, sub { return shift } ], 29 + [ 'sha1', 0x11, undef, sub { return sha1(shift) } ], 30 + [ 'sha2-256', 0x12, undef, sub { return sha256(shift) } ], 31 + [ 'sha2-512', 0x13, undef, sub { return sha512(shift) } ], 32 + [ 'sha3-384', 0x15, undef, sub { return sha3_384(shift) } ], 33 + [ 'sha3-256', 0x16, undef, sub { return sha3_256(shift) } ], 34 + [ 'sha3-224', 0x17, undef, sub { return sha3_224(shift) } ], 35 + [ 'sha2-384', 0x20, undef, sub { return sha_384(shift) } ], 36 + ]; 37 + 38 + sub codecs { 39 + return __PACKAGE__->MULTIFORMAT_MAP; 40 + } 41 + 42 + sub _map_by_tag($tag) { 43 + foreach my $entry (@{__PACKAGE__->MULTIFORMAT_MAP}) { 44 + return $entry if($entry->[1] == $tag); 45 + } 46 + return undef; 47 + } 48 + 49 + sub _map_by_name($name) { 50 + foreach my $entry (@{__PACKAGE__->MULTIFORMAT_MAP}) { 51 + return $entry if($entry->[0] eq $name); 52 + } 53 + return _map_by_tag($name); 54 + } 55 + 56 + sub multihash_decode($bytes) { 57 + # make sure it's actual bytes 58 + utf8::downgrade($bytes, 1); 59 + 60 + my ($t, $bread_type) = varint_decode_raw($bytes); 61 + if(my $e = _map_by_tag($t)) { 62 + my ($l, $bread_len) = varint_decode_raw(substr($bytes, $bread_type)); 63 + return substr($bytes, $bread_type + $bread_len); # there isn't any decoding since hashes are a one-way street so we just return the actual value 64 + } else { 65 + die 'unknown format ' . $t . ', '; 66 + } 67 + } 68 + 69 + sub multihash_unwrap($bytes) { 70 + utf8::downgrade($bytes, 1); 71 + 72 + my ($t, $bread_type) = varint_decode_raw($bytes); 73 + if(my $e = _map_by_tag($t)) { 74 + my ($l, $bread_len) = varint_decode_raw(substr($bytes, $bread_type)); 75 + return wantarray 76 + ? ($e, substr($bytes, $bread_type + $bread_len)) # allows us to get the whole kit and kaboodle in one sitting 77 + : substr($bytes, $bread_type + $bread_len) 78 + } else { 79 + die 'unknown format ' . $t . ', '; 80 + } 81 + } 82 + 83 + sub multihash_wrap($as, $bytes) { 84 + utf8::downgrade($bytes, 1); 85 + if(my $e = _map_by_name($as)) { 86 + return varint_encode($e->[1]) . varint_encode(length($bytes)) . $bytes; 87 + } else { 88 + die 'unknown format ' . $as . ', '; 89 + } 90 + } 91 + 92 + sub multihash_encode($as, $bytes) { 93 + utf8::downgrade($bytes, 1); 94 + if(my $e = _map_by_name($as)) { 95 + my $hash = $e->[3]->($bytes); 96 + return varint_encode($e->[1]) . varint_encode(length($hash)) . $hash; 97 + } else { 98 + die 'unknown format ' . $as . ', '; 99 + } 100 + } 101 + } 102 + 103 + 1;
+4 -4
lib/Multiformats/Varint.pm
··· 25 25 26 26 # varint_encode, varint_decode_raw and varint_decode lifted from python multiformats https://github.com/hashberg-io/multiformats 27 27 sub varint_encode($value) { 28 - die 'PerlDS::Encoding::varint_encode: cannot encode negative values' unless $value >= 0; 28 + die 'Multiformats::Varint::varint_encode: cannot encode negative values' unless $value >= 0; 29 29 my @out = (); 30 30 while(1) { 31 31 my $next_byte = $value & 0b01111111; ··· 37 37 last; 38 38 } 39 39 } 40 - die 'PerlDS::Encoding::varint_encode: encoded varint > 9 bytes' unless scalar(@out) <= 9; 40 + die 'Multiformats::Varint::varint_encode: encoded varint > 9 bytes' unless scalar(@out) <= 9; 41 41 return wantarray 42 42 ? (pack('C*', @out), scalar(@out)) 43 43 : pack('C*', @out) ··· 46 46 47 47 sub varint_decode($value) { 48 48 my ($x, $read) = varint_decode_raw($value); 49 - die 'PerlDS::Encoding::varint_decode: not all bytes used by encoding' if($read > length($value)); 49 + die 'Multiformats::Varint::varint_decode: not all bytes used by encoding' if($read > length($value)); 50 50 return $x; 51 51 } 52 52 ··· 59 59 # via the num_bytes_read later 60 60 61 61 while($expect_next) { 62 - die 'PerlDS::Encoding::varint_decode: no next byte to read' if $num_bytes_read >= scalar(@buf); 62 + die 'Multiformats::Varint::varint_decode_raw: no next byte to read' if $num_bytes_read >= scalar(@buf); 63 63 my $next_byte = $buf[$num_bytes_read]; 64 64 $x += ($next_byte & 0b01111111) << (7 * $num_bytes_read); 65 65 $expect_next = ($next_byte >> 7 == 0b1) ? 1 : undef;
+5
t/003_multihash.t
··· 1 + use Test::More; 2 + 3 + use_ok('Multiformats::Multihash'); 4 + 5 + done_testing();
+5
t/004_multicodec.t
··· 1 + use Test::More; 2 + 3 + use_ok('Multiformats::Multicodec'); 4 + 5 + done_testing();
+21
t/005_cid.t
··· 1 + use Test::More; 2 + 3 + use_ok('Multiformats::CID'); 4 + 5 + my $binary = "\1q\22 |\b\217\356\37\274\0\216H\230\361\"\234\4\243ZFc\225`\26k\316\$\363\tfQ\234\307\3550"; 6 + 7 + my $cid = Multiformats::CID::cid($binary); 8 + 9 + is(ref($cid), 'Multiformats::CID::CIDv1', 'cid returns proper object'); 10 + 11 + my $str = $cid->to_str; 12 + 13 + my $cid2 = Multiformats::CID::cid($str); 14 + 15 + is(ref($cid2), 'Multiformats::CID::CIDv1', 'cid returns proper object for string cid'); 16 + 17 + is($cid2->hash, $cid->hash, 'hashes match'); 18 + 19 + 20 + 21 + done_testing();