···1616| --- | --- |
1717| [TypeScript](#typescript) | [`typescript/plcbundle.ts`](typescript/plcbundle.ts) |
1818| [Python](#python) | [`python/plcbundle.py`](python/plcbundle.py) |
1919+| [Ruby](#ruby) | [`ruby/plcbundle.rb`](ruby/plcbundle.rb) |
19202021## TypeScript
2122···65666667---
67686969+7070+6871## Python
69727073File: [`python/plcbundle.py`](python/plcbundle.py)
71747275### Usage
73767474-TODO7777+TODO
7878+7979+## Ruby
8080+8181+File: [`ruby/plcbundle.rb`](ruby/plcbundle.rb)
8282+8383+### Prerequisites
8484+8585+You need Ruby installed (version 3+ recommended).
8686+8787+### Installation
8888+8989+The script relies on two external gems for HTTP requests and Zstandard compression.
9090+9191+```sh
9292+# Install required gems
9393+gem install zstd-ruby
9494+```
9595+9696+### Usage
9797+9898+Run the script from your terminal. It accepts one optional argument: the path to the output directory.
9999+100100+```sh
101101+# Run and save bundles to the default './plc_bundles_rb' directory
102102+ruby plcbundle.rb
103103+104104+# Run and save to a custom directory
105105+ruby plcbundle.rb ./my_ruby_bundles
106106+```
+182
ruby/plcbundle.rb
···11+#!/usr/bin/env ruby
22+# frozen_string_literal: true
33+44+# plcbundle.rb - Ruby implementation of plcbundle V1 specification
55+# Creates compressed, cryptographically-chained bundles of DID PLC operations
66+77+require 'json'
88+require 'digest'
99+require 'net/http'
1010+require 'uri'
1111+require 'fileutils'
1212+require 'time'
1313+require 'set'
1414+require 'zstd-ruby'
1515+1616+# Configuration constants
1717+BUNDLE_SIZE = 10_000
1818+INDEX_FILE = 'plc_bundles.json'
1919+PLC_URL = 'https://plc.directory'
2020+2121+class PlcBundle
2222+ def initialize(dir)
2323+ @dir = dir
2424+ @pool = [] # Mempool of operations waiting to be bundled
2525+ @seen = Set.new # CID deduplication set (pruned after each bundle)
2626+2727+ FileUtils.mkdir_p(@dir)
2828+ @idx = load_idx
2929+ puts "plcbundle v1 | Dir: #{@dir} | Last: #{@idx[:last_bundle]}\n"
3030+3131+ # Seed deduplication set with boundary CIDs from previous bundle
3232+ seed_boundary if @idx[:bundles].any?
3333+ end
3434+3535+ def run
3636+ cursor = @idx[:bundles].last&.dig(:end_time)
3737+3838+ loop do
3939+ puts "\nFetch: #{cursor || 'start'}"
4040+ ops = fetch(cursor) or (puts('Done.') and break)
4141+4242+ add_ops(ops) # Validate and add to mempool
4343+ cursor = ops.last[:time]
4444+ create_bundle while @pool.size >= BUNDLE_SIZE # Create bundles when ready
4545+4646+ sleep 0.2 # Rate limiting
4747+ end
4848+4949+ save_idx
5050+ puts "\nBundles: #{@idx[:bundles].size} | Pool: #{@pool.size} | Size: #{'%.1f' % (@idx[:total_size_bytes] / 1e6)}MB"
5151+ rescue => e
5252+ puts "\nError: #{e.message}" and save_idx
5353+ end
5454+5555+ private
5656+5757+ # Fetch operations from PLC directory export endpoint
5858+ def fetch(after)
5959+ uri = URI("#{PLC_URL}/export?count=1000#{after ? "&after=#{after}" : ''}")
6060+ res = Net::HTTP.get_response(uri)
6161+ res.is_a?(Net::HTTPSuccess) or return nil
6262+6363+ # Parse each line and preserve raw JSON for reproducibility (Spec 4.2)
6464+ res.body.strip.split("\n").map do |line|
6565+ {**JSON.parse(line, symbolize_names: true), raw: line, time: JSON.parse(line)['createdAt']}
6666+ end
6767+ rescue
6868+ nil
6969+ end
7070+7171+ # Process and validate operations before adding to mempool
7272+ def add_ops(ops)
7373+ last_t = @pool.last&.dig(:time) || @idx[:bundles].last&.dig(:end_time) || ''
7474+ added = 0
7575+7676+ ops.each do |op|
7777+ next if @seen.include?(op[:cid]) # Skip duplicates (boundary + within-batch)
7878+7979+ # Spec 3: Validate chronological order
8080+ raise "Order fail" if op[:time] < last_t
8181+8282+ @pool << op
8383+ @seen << op[:cid]
8484+ last_t = op[:time]
8585+ added += 1
8686+ end
8787+8888+ puts " +#{added} ops"
8989+ end
9090+9191+ # Create a bundle file and update index
9292+ def create_bundle
9393+ ops = @pool.shift(BUNDLE_SIZE)
9494+ parent = @idx[:bundles].last&.dig(:hash) || ''
9595+9696+ # Spec 4.2: Serialize using raw JSON strings for reproducibility
9797+ jsonl = ops.map { |o| o[:raw] + "\n" }.join
9898+9999+ # Spec 6.3: Calculate hashes
100100+ ch = sha(jsonl) # Content hash
101101+ h = sha(parent.empty? ? "plcbundle:genesis:#{ch}" : "#{parent}:#{ch}") # Chain hash
102102+ zst = Zstd.compress(jsonl) # Compress
103103+104104+ # Write bundle file
105105+ num = @idx[:last_bundle] + 1
106106+ file = format('%06d.jsonl.zst', num)
107107+ File.binwrite("#{@dir}/#{file}", zst)
108108+109109+ # Create metadata entry
110110+ @idx[:bundles] << {
111111+ bundle_number: num,
112112+ start_time: ops[0][:time],
113113+ end_time: ops[-1][:time],
114114+ operation_count: ops.size,
115115+ did_count: ops.map { |o| o[:did] }.uniq.size,
116116+ hash: h,
117117+ content_hash: ch,
118118+ parent: parent,
119119+ compressed_hash: sha(zst),
120120+ compressed_size: zst.bytesize,
121121+ uncompressed_size: jsonl.bytesize,
122122+ cursor: @idx[:bundles].last&.dig(:end_time) || '',
123123+ created_at: Time.now.utc.iso8601
124124+ }
125125+126126+ @idx[:last_bundle] = num
127127+ @idx[:total_size_bytes] += zst.bytesize
128128+129129+ # Prune seen CIDs: only keep boundary + mempool (memory efficient)
130130+ @seen = boundary_cids(ops) | @pool.map { |o| o[:cid] }.to_set
131131+132132+ save_idx
133133+ puts "✓ #{file} | #{h[0..12]}... | seen:#{@seen.size}"
134134+ end
135135+136136+ # Load index from disk or create new
137137+ def load_idx
138138+ JSON.parse(File.read("#{@dir}/#{INDEX_FILE}"), symbolize_names: true)
139139+ rescue
140140+ {version: '1.0', last_bundle: 0, updated_at: '', total_size_bytes: 0, bundles: []}
141141+ end
142142+143143+ # Atomically save index using temp file + rename
144144+ def save_idx
145145+ @idx[:updated_at] = Time.now.utc.iso8601
146146+ tmp = "#{@dir}/#{INDEX_FILE}.tmp"
147147+ File.write(tmp, JSON.pretty_generate(@idx))
148148+ File.rename(tmp, "#{@dir}/#{INDEX_FILE}")
149149+ end
150150+151151+ # Seed deduplication set with CIDs from last bundle's boundary
152152+ def seed_boundary
153153+ last = @idx[:bundles].last
154154+ file = format('%06d.jsonl.zst', last[:bundle_number])
155155+156156+ data = Zstd.decompress(File.binread("#{@dir}/#{file}"))
157157+ ops = data.strip.split("\n").map do |line|
158158+ {time: JSON.parse(line)['createdAt'], cid: JSON.parse(line)['cid']}
159159+ end
160160+161161+ @seen = boundary_cids(ops)
162162+ puts "Seeded: #{@seen.size} CIDs from bundle #{last[:bundle_number]}"
163163+ rescue
164164+ puts "Warning: couldn't seed boundary"
165165+ end
166166+167167+ # Get CIDs from operations at the same timestamp as the last op (boundary)
168168+ def boundary_cids(ops)
169169+ return Set.new if ops.empty?
170170+171171+ t = ops[-1][:time]
172172+ ops.reverse.take_while { |o| o[:time] == t }.map { |o| o[:cid] }.to_set
173173+ end
174174+175175+ # SHA-256 hash helper
176176+ def sha(data)
177177+ Digest::SHA256.hexdigest(data)
178178+ end
179179+end
180180+181181+# Entry point
182182+PlcBundle.new(ARGV[0] || './plc_bundles_rb').run if __FILE__ == $PROGRAM_NAME