Template of a custom feed generator service for the Bluesky network in Ruby

split rakefile into multiple task files

Changed files
+206 -196
lib
+1 -196
Rakefile
··· 1 1 require 'bundler/setup' 2 - 3 2 require 'blue_factory/rake' 4 3 require 'sinatra/activerecord' 5 4 require 'sinatra/activerecord/rake' 6 5 7 - require_relative 'app/config' 8 - require_relative 'app/post_console_printer' 9 - require_relative 'app/utils' 10 - 11 - def get_feed 12 - if ENV['KEY'].to_s == '' 13 - puts "Please specify feed key as KEY=feedname (the part of the feed's at:// URI after the last slash)" 14 - exit 1 15 - end 16 - 17 - feed_key = ENV['KEY'] 18 - feed = BlueFactory.get_feed(feed_key) 19 - 20 - if feed.nil? 21 - puts "No feed configured for key '#{feed_key}' - use `BlueFactory.add_feed '#{feed_key}', MyFeed.new`" 22 - exit 1 23 - end 24 - 25 - feed 26 - end 27 - 28 - desc "Print posts in the feed, starting from the newest ones (limit = N)" 29 - task :print_feed do 30 - feed = get_feed 31 - limit = ENV['N'] ? ENV['N'].to_i : 100 32 - 33 - posts = FeedPost.where(feed_id: feed.feed_id).joins(:post).order('feed_posts.time DESC').limit(limit).map(&:post) 34 - 35 - # this fixes an error when piping a long output to less and then closing without reading it all 36 - Signal.trap("SIGPIPE", "SYSTEM_DEFAULT") 37 - 38 - printer = PostConsolePrinter.new(feed) 39 - 40 - posts.each do |s| 41 - printer.display(s) 42 - end 43 - end 44 - 45 - desc "Remove a single post from a feed" 46 - task :delete_feed_item do 47 - feed = get_feed 48 - 49 - if ENV['URL'].to_s == '' 50 - puts "Please specify post url as URL=https://bsky.app/..." 51 - exit 1 52 - end 53 - 54 - url = ENV['URL'] 55 - parts = url.gsub(/^https:\/\//, '').split('/') 56 - author = parts[2] 57 - rkey = parts[4] 58 - 59 - if author.start_with?('did:') 60 - did = author 61 - handle = Utils.handle_from_did(did) 62 - else 63 - handle = author 64 - did = Utils.did_from_handle(handle) 65 - end 66 - 67 - if item = FeedPost.joins(:post).find_by(feed_id: feed.feed_id, post: { repo: did, rkey: rkey }) 68 - item.destroy 69 - puts "Deleted post by @#{handle} from #{feed.display_name} feed" 70 - else 71 - puts "Post not found in the feed" 72 - end 73 - end 74 - 75 - desc "Rescan all posts and rebuild the feed from scratch (DAYS = number of days)" 76 - task :rebuild_feed do 77 - feed = get_feed 78 - method = ENV['UNSAFE'] ? :tap : :transaction 79 - dry = ENV['DRY_RUN'] ? true : false 80 - 81 - ActiveRecord::Base.send(method) do 82 - if ENV['ONLY_EXISTING'] 83 - feed_posts = FeedPost.where(feed_id: feed.feed_id).includes(:post).to_a 84 - total = feed_posts.length 85 - 86 - puts "Processing posts..." 87 - 88 - deleted = 0 89 - 90 - feed_posts.each do |fp| 91 - if !feed.post_matches?(fp.post) 92 - if dry 93 - puts "Post would be deleted: ##{fp.post.id} \"#{fp.post.text}\"" 94 - else 95 - puts "Deleting from feed: ##{fp.post.id} \"#{fp.post.text}\"" 96 - fp.destroy 97 - end 98 - deleted += 1 99 - end 100 - end 101 - 102 - if dry 103 - puts "#{deleted} post(s) would be deleted." 104 - else 105 - puts "Done (#{deleted} post(s) deleted)." 106 - end 107 - else 108 - days = ENV['DAYS'] ? ENV['DAYS'].to_i : 7 109 - 110 - posts = Post.order('time, id') 111 - start = posts.where("time <= DATETIME('now', '-#{days} days')").last 112 - stop = posts.last 113 - first = posts.first 114 - total = start ? (stop.id - start.id + 1) : (stop.id - first.id + 1) 115 - 116 - if ENV['APPEND_ONLY'] 117 - current_post_ids = FeedPost.where(feed_id: feed.feed_id).pluck('post_id') 118 - elsif !dry 119 - print "This will erase and replace the contents of the feed. Continue? [y/n]: " 120 - answer = STDIN.readline 121 - exit unless answer.strip.downcase == 'y' 122 - 123 - puts "Cleaning up feed..." 124 - FeedPost.where(feed_id: feed.feed_id).delete_all 125 - current_post_ids = [] 126 - end 127 - 128 - offset = 0 129 - page = 100000 130 - matched_posts = [] 131 - 132 - loop do 133 - batch = if start 134 - posts.where("time > ? OR (time = ? AND id > ?)", start.time, start.time, start.id).limit(page).to_a 135 - else 136 - posts.limit(page).to_a 137 - end 138 - 139 - break if batch.empty? 140 - 141 - batch.each_with_index do |post, i| 142 - $stderr.print "Processing posts... [#{offset + i + 1}/#{total}]\r" 143 - $stderr.flush 144 - 145 - if !current_post_ids.include?(post.id) && feed.post_matches?(post) 146 - if dry 147 - matched_posts << post 148 - else 149 - FeedPost.create!(feed_id: feed.feed_id, post: post, time: post.time) 150 - end 151 - end 152 - end 153 - 154 - offset += page 155 - start = batch.last 156 - end 157 - 158 - $stderr.puts "Processing posts... Done." + " " * 30 159 - 160 - if dry 161 - if ENV['APPEND_ONLY'] 162 - puts "Added posts:" 163 - puts "==============================" 164 - puts 165 - end 166 - 167 - Signal.trap("SIGPIPE", "SYSTEM_DEFAULT") 168 - printer = PostConsolePrinter.new(feed) 169 - matched_posts.each do |p| 170 - printer.display(p) 171 - end 172 - 173 - if ENV['TO_FILE'] 174 - File.write(ENV['TO_FILE'], matched_posts.map(&:id).to_json) 175 - end 176 - end 177 - end 178 - end 179 - end 180 - 181 - desc "Delete posts older than N days that aren't included in a feed" 182 - task :cleanup_posts do 183 - days = ENV['DAYS'].to_i 184 - if days <= 0 185 - puts "Please specify number of days as e.g. DAYS=30 to delete posts older than that" 186 - exit 1 187 - end 188 - 189 - result = ActiveRecord::Base.connection.execute("SELECT DATETIME('now', '-#{days} days') AS time_limit") 190 - time_limit = result.first['time_limit'] 191 - 192 - subquery = %{ 193 - SELECT posts.id FROM posts 194 - LEFT JOIN feed_posts ON (feed_posts.post_id = posts.id) 195 - WHERE feed_posts.id IS NULL AND posts.time < DATETIME('now', '-#{days} days') 196 - } 197 - 198 - result = Post.where("id IN (#{subquery})").delete_all 199 - 200 - puts "Deleted #{result} posts older than #{time_limit}" 201 - end 6 + Rake.add_rakelib File.join(__dir__, 'lib', 'tasks')
+178
lib/tasks/feeds.rake
··· 1 + $LOAD_PATH.unshift(File.expand_path('../..', __dir__)) 2 + 3 + require 'app/config' 4 + require 'app/models/feed_post' 5 + require 'app/models/post' 6 + require 'app/post_console_printer' 7 + require 'app/utils' 8 + 9 + 10 + def get_feed 11 + if ENV['KEY'].to_s == '' 12 + puts "Please specify feed key as KEY=feedname (the part of the feed's at:// URI after the last slash)" 13 + exit 1 14 + end 15 + 16 + feed_key = ENV['KEY'] 17 + feed = BlueFactory.get_feed(feed_key) 18 + 19 + if feed.nil? 20 + puts "No feed configured for key '#{feed_key}' - use `BlueFactory.add_feed '#{feed_key}', MyFeed.new`" 21 + exit 1 22 + end 23 + 24 + feed 25 + end 26 + 27 + desc "Print posts in the feed, starting from the newest ones (limit = N)" 28 + task :print_feed do 29 + feed = get_feed 30 + limit = ENV['N'] ? ENV['N'].to_i : 100 31 + 32 + posts = FeedPost.where(feed_id: feed.feed_id).joins(:post).order('feed_posts.time DESC').limit(limit).map(&:post) 33 + 34 + # this fixes an error when piping a long output to less and then closing without reading it all 35 + Signal.trap("SIGPIPE", "SYSTEM_DEFAULT") 36 + 37 + printer = PostConsolePrinter.new(feed) 38 + 39 + posts.each do |s| 40 + printer.display(s) 41 + end 42 + end 43 + 44 + desc "Remove a single post from a feed" 45 + task :delete_feed_item do 46 + feed = get_feed 47 + 48 + if ENV['URL'].to_s == '' 49 + puts "Please specify post url as URL=https://bsky.app/..." 50 + exit 1 51 + end 52 + 53 + url = ENV['URL'] 54 + parts = url.gsub(/^https:\/\//, '').split('/') 55 + author = parts[2] 56 + rkey = parts[4] 57 + 58 + if author.start_with?('did:') 59 + did = author 60 + handle = Utils.handle_from_did(did) 61 + else 62 + handle = author 63 + did = Utils.did_from_handle(handle) 64 + end 65 + 66 + if item = FeedPost.joins(:post).find_by(feed_id: feed.feed_id, post: { repo: did, rkey: rkey }) 67 + item.destroy 68 + puts "Deleted post by @#{handle} from #{feed.display_name} feed" 69 + else 70 + puts "Post not found in the feed" 71 + end 72 + end 73 + 74 + desc "Rescan all posts and rebuild the feed from scratch (DAYS = number of days)" 75 + task :rebuild_feed do 76 + feed = get_feed 77 + method = ENV['UNSAFE'] ? :tap : :transaction 78 + dry = ENV['DRY_RUN'] ? true : false 79 + 80 + ActiveRecord::Base.send(method) do 81 + if ENV['ONLY_EXISTING'] 82 + feed_posts = FeedPost.where(feed_id: feed.feed_id).includes(:post).to_a 83 + total = feed_posts.length 84 + 85 + puts "Processing posts..." 86 + 87 + deleted = 0 88 + 89 + feed_posts.each do |fp| 90 + if !feed.post_matches?(fp.post) 91 + if dry 92 + puts "Post would be deleted: ##{fp.post.id} \"#{fp.post.text}\"" 93 + else 94 + puts "Deleting from feed: ##{fp.post.id} \"#{fp.post.text}\"" 95 + fp.destroy 96 + end 97 + deleted += 1 98 + end 99 + end 100 + 101 + if dry 102 + puts "#{deleted} post(s) would be deleted." 103 + else 104 + puts "Done (#{deleted} post(s) deleted)." 105 + end 106 + else 107 + days = ENV['DAYS'] ? ENV['DAYS'].to_i : 7 108 + 109 + posts = Post.order('time, id') 110 + start = posts.where("time <= DATETIME('now', '-#{days} days')").last 111 + stop = posts.last 112 + first = posts.first 113 + total = start ? (stop.id - start.id + 1) : (stop.id - first.id + 1) 114 + 115 + if ENV['APPEND_ONLY'] 116 + current_post_ids = FeedPost.where(feed_id: feed.feed_id).pluck('post_id') 117 + elsif !dry 118 + print "This will erase and replace the contents of the feed. Continue? [y/n]: " 119 + answer = STDIN.readline 120 + exit unless answer.strip.downcase == 'y' 121 + 122 + puts "Cleaning up feed..." 123 + FeedPost.where(feed_id: feed.feed_id).delete_all 124 + current_post_ids = [] 125 + end 126 + 127 + offset = 0 128 + page = 100000 129 + matched_posts = [] 130 + 131 + loop do 132 + batch = if start 133 + posts.where("time > ? OR (time = ? AND id > ?)", start.time, start.time, start.id).limit(page).to_a 134 + else 135 + posts.limit(page).to_a 136 + end 137 + 138 + break if batch.empty? 139 + 140 + batch.each_with_index do |post, i| 141 + $stderr.print "Processing posts... [#{offset + i + 1}/#{total}]\r" 142 + $stderr.flush 143 + 144 + if !current_post_ids.include?(post.id) && feed.post_matches?(post) 145 + if dry 146 + matched_posts << post 147 + else 148 + FeedPost.create!(feed_id: feed.feed_id, post: post, time: post.time) 149 + end 150 + end 151 + end 152 + 153 + offset += page 154 + start = batch.last 155 + end 156 + 157 + $stderr.puts "Processing posts... Done." + " " * 30 158 + 159 + if dry 160 + if ENV['APPEND_ONLY'] 161 + puts "Added posts:" 162 + puts "==============================" 163 + puts 164 + end 165 + 166 + Signal.trap("SIGPIPE", "SYSTEM_DEFAULT") 167 + printer = PostConsolePrinter.new(feed) 168 + matched_posts.each do |p| 169 + printer.display(p) 170 + end 171 + 172 + if ENV['TO_FILE'] 173 + File.write(ENV['TO_FILE'], matched_posts.map(&:id).to_json) 174 + end 175 + end 176 + end 177 + end 178 + end
+27
lib/tasks/posts.rake
··· 1 + $LOAD_PATH.unshift(File.expand_path('../..', __dir__)) 2 + 3 + require 'app/config' 4 + require 'app/models/post' 5 + 6 + 7 + desc "Delete posts older than N days that aren't included in a feed" 8 + task :cleanup_posts do 9 + days = ENV['DAYS'].to_i 10 + if days <= 0 11 + puts "Please specify number of days as e.g. DAYS=30 to delete posts older than that" 12 + exit 1 13 + end 14 + 15 + result = ActiveRecord::Base.connection.execute("SELECT DATETIME('now', '-#{days} days') AS time_limit") 16 + time_limit = result.first['time_limit'] 17 + 18 + subquery = %{ 19 + SELECT posts.id FROM posts 20 + LEFT JOIN feed_posts ON (feed_posts.post_id = posts.id) 21 + WHERE feed_posts.id IS NULL AND posts.time < DATETIME('now', '-#{days} days') 22 + } 23 + 24 + result = Post.where("id IN (#{subquery})").delete_all 25 + 26 + puts "Deleted #{result} posts older than #{time_limit}" 27 + end