Template of a custom feed generator service for the Bluesky network in Ruby

extracted code from private repo

+2
.gitignore
···
··· 1 + db/*.sqlite3 2 + log
+2
Capfile
···
··· 1 + load 'deploy' 2 + load 'config/deploy'
+14
Gemfile
···
··· 1 + source "https://rubygems.org" 2 + 3 + gem 'blue_factory', '~> 0.1.2' 4 + gem 'skyfall', '~> 0.1.2' 5 + 6 + gem 'activerecord', '~> 6.0' 7 + gem 'sinatra-activerecord', '~> 2.0' 8 + gem 'sqlite3' 9 + gem 'rake' 10 + 11 + group :development do 12 + gem 'webrick' 13 + gem 'capistrano', '~> 2.0' 14 + end
+84
Gemfile.lock
···
··· 1 + GEM 2 + remote: https://rubygems.org/ 3 + specs: 4 + activemodel (6.1.7.3) 5 + activesupport (= 6.1.7.3) 6 + activerecord (6.1.7.3) 7 + activemodel (= 6.1.7.3) 8 + activesupport (= 6.1.7.3) 9 + activesupport (6.1.7.3) 10 + concurrent-ruby (~> 1.0, >= 1.0.2) 11 + i18n (>= 1.6, < 2) 12 + minitest (>= 5.1) 13 + tzinfo (~> 2.0) 14 + zeitwerk (~> 2.3) 15 + base32 (0.3.4) 16 + blue_factory (0.1.2) 17 + sinatra (~> 3.0) 18 + capistrano (2.15.10) 19 + highline 20 + net-scp (>= 1.0.0) 21 + net-sftp (>= 2.0.0) 22 + net-ssh (>= 2.0.14) 23 + net-ssh-gateway (>= 1.1.0) 24 + cbor (0.5.9.6) 25 + concurrent-ruby (1.2.2) 26 + event_emitter (0.2.6) 27 + highline (2.1.0) 28 + i18n (1.14.1) 29 + concurrent-ruby (~> 1.0) 30 + minitest (5.18.0) 31 + mustermann (3.0.0) 32 + ruby2_keywords (~> 0.0.1) 33 + net-scp (4.0.0) 34 + net-ssh (>= 2.6.5, < 8.0.0) 35 + net-sftp (4.0.0) 36 + net-ssh (>= 5.0.0, < 8.0.0) 37 + net-ssh (7.1.0) 38 + net-ssh-gateway (2.0.0) 39 + net-ssh (>= 4.0.0) 40 + rack (2.2.7) 41 + rack-protection (3.0.6) 42 + rack 43 + rake (13.0.6) 44 + ruby2_keywords (0.0.5) 45 + sinatra (3.0.6) 46 + mustermann (~> 3.0) 47 + rack (~> 2.2, >= 2.2.4) 48 + rack-protection (= 3.0.6) 49 + tilt (~> 2.0) 50 + sinatra-activerecord (2.0.26) 51 + activerecord (>= 4.1) 52 + sinatra (>= 1.0) 53 + skyfall (0.1.2) 54 + base32 (~> 0.3, >= 0.3.4) 55 + cbor (~> 0.5, >= 0.5.9.6) 56 + websocket-client-simple (~> 0.6, >= 0.6.1) 57 + sqlite3 (1.6.3-arm64-darwin) 58 + sqlite3 (1.6.3-x86_64-linux) 59 + tilt (2.2.0) 60 + tzinfo (2.0.6) 61 + concurrent-ruby (~> 1.0) 62 + webrick (1.8.1) 63 + websocket (1.2.9) 64 + websocket-client-simple (0.6.1) 65 + event_emitter 66 + websocket 67 + zeitwerk (2.6.8) 68 + 69 + PLATFORMS 70 + arm64-darwin 71 + x86_64-linux 72 + 73 + DEPENDENCIES 74 + activerecord (~> 6.0) 75 + blue_factory (~> 0.1.2) 76 + capistrano (~> 2.0) 77 + rake 78 + sinatra-activerecord (~> 2.0) 79 + skyfall (~> 0.1.2) 80 + sqlite3 81 + webrick 82 + 83 + BUNDLED WITH 84 + 2.4.14
+60
Rakefile
···
··· 1 + require 'bundler/setup' 2 + 3 + require 'blue_factory/rake' 4 + require 'sinatra/activerecord' 5 + require 'sinatra/activerecord/rake' 6 + 7 + require_relative 'app/config' 8 + 9 + def get_feed 10 + if ENV['KEY'].to_s == '' 11 + puts "Please specify feed key as KEY=feedname (the part of the feed's at:// URI after the last slash)" 12 + exit 1 13 + end 14 + 15 + feed_key = ENV['KEY'] 16 + feed = BlueFactory.get_feed(feed_key) 17 + 18 + if feed.nil? 19 + puts "No feed configured for key '#{feed_key}' - use `BlueFactory.add_feed '#{feed_key}', MyFeed.new`" 20 + exit 1 21 + end 22 + 23 + feed 24 + end 25 + 26 + task :print_feed do 27 + feed = get_feed 28 + limit = ENV['N'] ? ENV['N'].to_i : 100 29 + 30 + posts = FeedPost.where(feed_id: feed.feed_id).joins(:post).order('feed_posts.time DESC').limit(limit).map(&:post) 31 + 32 + posts.each do |s| 33 + puts "#{s.time} * https://bsky.app/profile/#{s.repo}/post/#{s.rkey}" 34 + puts s.text 35 + puts 36 + end 37 + end 38 + 39 + task :rebuild_feed do 40 + feed = get_feed 41 + 42 + puts "Cleaning up feed..." 43 + FeedPost.where(feed_id: feed.feed_id).delete_all 44 + 45 + total = Post.count 46 + 47 + puts "Loading posts..." 48 + posts = Post.all.to_a 49 + 50 + posts.each_with_index do |post, i| 51 + print "Processing posts... [#{i + 1}/#{total}]\r" 52 + $stdout.flush 53 + 54 + if feed.post_matches?(post) 55 + FeedPost.create!(feed_id: feed.feed_id, post: post, time: post.time) 56 + end 57 + end 58 + 59 + puts "Processing posts... Done." + " " * 30 60 + end
+10
app/config.rb
···
··· 1 + Dir[File.join(__dir__, 'feeds', '*.rb')].each { |f| require(f) } 2 + 3 + require 'blue_factory' 4 + require 'sinatra/activerecord' 5 + 6 + BlueFactory.set :publisher_did, 'did:plc:<your_identifier_here>' 7 + BlueFactory.set :hostname, 'feeds.example.com' 8 + 9 + BlueFactory.add_feed 'linux', LinuxFeed.new 10 + BlueFactory.add_feed 'starwars', StarWarsFeed.new
+79
app/feeds/feed.rb
···
··· 1 + require 'blue_factory/errors' 2 + require 'time' 3 + 4 + require_relative '../models/feed_post' 5 + 6 + class Feed 7 + DEFAULT_LIMIT = 50 8 + MAX_LIMIT = 100 9 + 10 + # any unique number to use as a key in the database 11 + def feed_id 12 + raise NotImplementedError 13 + end 14 + 15 + def post_matches?(post) 16 + raise NotImplementedError 17 + end 18 + 19 + # name of your feed, e.g. "What's Hot" 20 + def display_name 21 + raise NotImplementedError 22 + end 23 + 24 + # (optional) description of the feed, e.g. "Top trending content from the whole network" 25 + def description 26 + nil 27 + end 28 + 29 + # (optional) path of the feed avatar file 30 + def avatar_file 31 + nil 32 + end 33 + 34 + def get_posts(params) 35 + limit = check_query_limit(params) 36 + query = FeedPost.where(feed_id: feed_id).joins(:post).select('posts.repo, posts.rkey, feed_posts.time, post_id') 37 + .order('feed_posts.time DESC, post_id DESC').limit(limit) 38 + 39 + if params[:cursor].to_s != "" 40 + time, last_id = parse_cursor(params) 41 + query = query.where("feed_posts.time < ? OR (feed_posts.time = ? AND post_id < ?)", time, time, last_id) 42 + end 43 + 44 + posts = query.to_a 45 + last = posts.last 46 + 47 + cursor = last && sprintf('%.06f', last.time.to_f) + ':' + last.post_id.to_s 48 + 49 + { cursor: cursor, posts: posts.map { |p| 'at://' + p.repo + '/app.bsky.feed.post/' + p.rkey }} 50 + end 51 + 52 + 53 + private 54 + 55 + def check_query_limit(params) 56 + if params[:limit] 57 + limit = params[:limit].to_i 58 + (limit < 0) ? 0 : [limit, MAX_LIMIT].min 59 + else 60 + DEFAULT_LIMIT 61 + end 62 + end 63 + 64 + def parse_cursor(params) 65 + parts = params[:cursor].split(':') 66 + 67 + if parts.length != 2 || parts[0] !~ /^\d+(\.\d+)?$/ || parts[1] !~ /^\d+$/ 68 + raise BlueFactory::InvalidRequestError.new("Malformed cursor") 69 + end 70 + 71 + sec = parts[0].to_i 72 + usec = (parts[0].to_f * 1_000_000).to_i % 1_000_000 73 + time = Time.at(sec, usec) 74 + 75 + last_id = parts[1].to_i 76 + 77 + [time, last_id] 78 + end 79 + end
+23
app/feeds/linux_feed.rb
···
··· 1 + require_relative 'feed' 2 + 3 + class LinuxFeed < Feed 4 + def feed_id 5 + 2 6 + end 7 + 8 + def display_name 9 + "Linux" 10 + end 11 + 12 + def description 13 + "Feed with posts about Linux" 14 + end 15 + 16 + def avatar_file 17 + "images/linux_tux.png" 18 + end 19 + 20 + def post_matches?(post) 21 + post.text =~ /linux/i 22 + end 23 + end
+27
app/feeds/star_wars_feed.rb
···
··· 1 + require_relative 'feed' 2 + 3 + class StarWarsFeed < Feed 4 + REGEXPS = [ 5 + /star ?wars/i, /mandalorian/i, /\bandor\b/i, /\bjedi\b/i, /\bsith\b/i, /\byoda\b/i 6 + ] 7 + 8 + def feed_id 9 + 1 10 + end 11 + 12 + def display_name 13 + "Star Wars" 14 + end 15 + 16 + def description 17 + "Feed with posts about Star Wars" 18 + end 19 + 20 + def avatar_file 21 + "images/babyyoda.jpg" 22 + end 23 + 24 + def post_matches?(post) 25 + REGEXPS.any? { |r| post.text =~ r } 26 + end 27 + end
+94
app/firehose_stream.rb
···
··· 1 + require 'json' 2 + require 'sinatra/activerecord' 3 + require 'skyfall' 4 + 5 + require_relative 'config' 6 + require_relative 'models/feed_post' 7 + require_relative 'models/post' 8 + 9 + class FirehoseStream 10 + attr_accessor :show_progress, :log_status, :log_posts, :save_posts 11 + 12 + def initialize 13 + @env = (ENV['APP_ENV'] || ENV['RACK_ENV'] || :development).to_sym 14 + 15 + @show_progress = (@env == :development) ? true : false 16 + @log_status = true 17 + @log_posts = (@env == :development) ? :matching : false 18 + @save_posts = (@env == :development) ? :all : :matching 19 + 20 + @feeds = BlueFactory.all_feeds 21 + end 22 + 23 + def start 24 + return if @sky 25 + 26 + @sky = Skyfall::Stream.new('bsky.social', :subscribe_repos) 27 + 28 + @sky.on_message do |m| 29 + handle_message(m) 30 + end 31 + 32 + if @log_status 33 + @sky.on_connect { puts "Connected #{Time.now} ✓" } 34 + @sky.on_disconnect { puts; puts "Disconnected #{Time.now}" } 35 + @sky.on_reconnect { puts "Reconnecting..." } 36 + @sky.on_error { |e| puts "ERROR: #{Time.now} #{e}" } 37 + end 38 + 39 + @sky.connect 40 + end 41 + 42 + def stop 43 + @sky&.disconnect 44 + @sky = nil 45 + end 46 + 47 + def handle_message(msg) 48 + return if msg.type != :commit 49 + 50 + msg.operations.each do |op| 51 + return unless op.type == :bsky_post 52 + 53 + if op.action == :delete 54 + if post = Post.find_by(repo: op.repo, rkey: op.rkey) 55 + post.destroy 56 + end 57 + end 58 + 59 + return unless op.action == :create 60 + 61 + begin 62 + text = op.raw_record['text'] 63 + post = Post.new( 64 + repo: op.repo, 65 + time: msg.time, 66 + text: text, 67 + rkey: op.rkey, 68 + data: JSON.generate(op.raw_record) 69 + ) 70 + 71 + matched = false 72 + 73 + @feeds.each do |feed| 74 + if feed.post_matches?(post) 75 + FeedPost.create!(feed_id: feed.feed_id, post: post, time: msg.time) unless !@save_posts 76 + matched = true 77 + end 78 + end 79 + 80 + if @log_posts == :all || @log_posts && matched 81 + puts 82 + puts text 83 + end 84 + 85 + post.save! if @save_posts == :all 86 + rescue StandardError => e 87 + puts "Error: #{e}" 88 + p msg unless @env == :production || e.message == "nesting of 100 is too deep" 89 + end 90 + 91 + print '.' if @show_progress && @log_posts != :all 92 + end 93 + end 94 + end
+7
app/models/feed_post.rb
···
··· 1 + require 'active_record' 2 + require_relative 'post' 3 + 4 + class FeedPost < ActiveRecord::Base 5 + belongs_to :post 6 + validates_presence_of :feed_id, :time 7 + end
+6
app/models/post.rb
···
··· 1 + require 'active_record' 2 + 3 + class Post < ActiveRecord::Base 4 + validates_presence_of :repo, :time, :data, :rkey 5 + validates :text, length: { minimum: 0, allow_nil: false } 6 + end
+10
config.ru
···
··· 1 + require_relative 'app/config' 2 + 3 + Dir.mkdir('log') unless Dir.exist?('log') 4 + 5 + log = File.new("log/sinatra.log", "a+") 6 + log.sync = true 7 + 8 + use Rack::CommonLogger, log 9 + 10 + run BlueFactory::Server
+11
config/database.yml
···
··· 1 + development: 2 + adapter: sqlite3 3 + database: db/blueskydev.sqlite3 4 + pool: 5 5 + timeout: 10000 6 + 7 + production: 8 + adapter: sqlite3 9 + database: db/bluesky.sqlite3 10 + pool: 5 11 + timeout: 10000
+38
config/deploy.rb
···
··· 1 + # TODO: migrate to capistrano3 bundler integration 2 + require 'bundler/capistrano' 3 + set :bundle_dir, '' 4 + set :bundle_flags, '--quiet' 5 + set :bundle_without, [] 6 + 7 + set :application, "bsky_feeds" 8 + set :repository, "git@github.com:mackuba/bluesky-feeds-rb.git" 9 + set :scm, :git 10 + set :keep_releases, 5 11 + set :use_sudo, false 12 + set :deploy_to, "/var/www/bsky_feeds" 13 + set :deploy_via, :remote_cache 14 + set :migrate_env, "RACK_ENV=production" 15 + 16 + server "feeds.example.com", :app, :web, :db, :primary => true 17 + 18 + before 'bundle:install', 'deploy:set_bundler_options' 19 + after 'deploy:update_code', 'deploy:link_shared' 20 + 21 + after 'deploy', 'deploy:cleanup' 22 + after 'deploy:migrations', 'deploy:cleanup' 23 + 24 + namespace :deploy do 25 + task :restart, :roles => :web do 26 + run "touch #{current_path}/tmp/restart.txt" 27 + end 28 + 29 + task :set_bundler_options do 30 + run "cd #{release_path} && bundle config set --local deployment 'true'" 31 + run "cd #{release_path} && bundle config set --local path '#{shared_path}/bundle'" 32 + run "cd #{release_path} && bundle config set --local without 'development test'" 33 + end 34 + 35 + task :link_shared do 36 + run "ln -s #{shared_path}/bluesky.sqlite3 #{release_path}/db/bluesky.sqlite3" 37 + end 38 + end
+21
db/migrate/20230615155215_create_tables.rb
···
··· 1 + class CreateTables < ActiveRecord::Migration[6.1] 2 + def change 3 + create_table :posts do |t| 4 + t.string :repo, null: false 5 + t.datetime :time, null: false 6 + t.string :text, null: false 7 + t.text :data, null: false 8 + t.string :rkey, null: false 9 + end 10 + 11 + add_index :posts, :rkey 12 + 13 + create_table :feed_posts do |t| 14 + t.integer :feed_id, null: false 15 + t.integer :post_id, null: false 16 + t.datetime :time, null: false 17 + end 18 + 19 + add_index :feed_posts, [:feed_id, :time] 20 + end 21 + end
+31
db/schema.rb
···
··· 1 + # This file is auto-generated from the current state of the database. Instead 2 + # of editing this file, please use the migrations feature of Active Record to 3 + # incrementally modify your database, and then regenerate this schema definition. 4 + # 5 + # This file is the source Rails uses to define your schema when running `bin/rails 6 + # db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to 7 + # be faster and is potentially less error prone than running all of your 8 + # migrations from scratch. Old migrations may fail to apply correctly if those 9 + # migrations use external dependencies or application code. 10 + # 11 + # It's strongly recommended that you check this file into your version control system. 12 + 13 + ActiveRecord::Schema.define(version: 2023_06_15_155215) do 14 + 15 + create_table "feed_posts", force: :cascade do |t| 16 + t.integer "feed_id", null: false 17 + t.integer "post_id", null: false 18 + t.datetime "time", null: false 19 + t.index ["feed_id", "time"], name: "index_feed_posts_on_feed_id_and_time" 20 + end 21 + 22 + create_table "posts", force: :cascade do |t| 23 + t.string "repo", null: false 24 + t.datetime "time", null: false 25 + t.string "text", null: false 26 + t.text "data", null: false 27 + t.string "rkey", null: false 28 + t.index ["rkey"], name: "index_posts_on_rkey" 29 + end 30 + 31 + end
+63
firehose.rb
···
··· 1 + #!/usr/bin/env ruby 2 + 3 + require 'bundler/setup' 4 + require_relative 'app/firehose_stream' 5 + 6 + ActiveRecord::Base.logger = nil 7 + 8 + def print_help 9 + puts "Usage: #{$0} [options...]" 10 + puts "Options:" 11 + puts 12 + puts " * Showing progress: [default: show in development]" 13 + puts " -p = show progress dots for each received message" 14 + puts " -np = don't show progress dots" 15 + puts 16 + puts " * Logging status changes: [default: log in any mode]" 17 + puts " -ns = don't log status changes" 18 + puts 19 + puts " * Logging post text: [default: -lm in development, -nl in production]" 20 + puts " -lm = log text of matching posts" 21 + puts " -la = log text of every post" 22 + puts " -nl = don't log posts" 23 + puts 24 + puts " * Saving posts to db: [default: -da in development, -dm in production]" 25 + puts " -da = save all posts to database" 26 + puts " -dm = save only matching posts to database" 27 + puts " -nd = don't save any posts" 28 + end 29 + 30 + firehose = FirehoseStream.new 31 + 32 + ARGV.each do |arg| 33 + case arg 34 + when '-p' 35 + firehose.show_progress = true 36 + when '-np' 37 + firehose.show_progress = false 38 + when '-ns' 39 + firehose.log_status = false 40 + when '-lm' 41 + firehose.log_posts = :matching 42 + when '-la' 43 + firehose.log_posts = :all 44 + when '-nl' 45 + firehose.log_posts = false 46 + when '-dm' 47 + firehose.save_posts = :matching 48 + when '-da' 49 + firehose.save_posts = :all 50 + when '-nd' 51 + firehose.save_posts = false 52 + when '-h', '--help' 53 + print_help 54 + exit 0 55 + else 56 + puts "Unrecognized option: #{arg}" 57 + print_help 58 + exit 1 59 + end 60 + end 61 + 62 + firehose.start 63 + sleep
images/babyyoda.jpg

This is a binary file and will not be displayed.

images/linux_tux.png

This is a binary file and will not be displayed.

+8
server.rb
···
··· 1 + #!/usr/bin/env ruby 2 + 3 + require 'bundler/setup' 4 + 5 + require_relative 'app/config' 6 + 7 + BlueFactory::Server.set :port, 3000 8 + BlueFactory::Server.run!