Template of a custom feed generator service for the Bluesky network in Ruby

split rebuild task into two methods

Changed files
+86 -74
lib
tasks
+86 -74
lib/tasks/feeds.rake
··· 75 75 task :rebuild_feed do 76 76 feed = get_feed 77 77 method = ENV['UNSAFE'] ? :tap : :transaction 78 - dry = ENV['DRY_RUN'] ? true : false 78 + dry = !!ENV['DRY_RUN'] 79 79 80 80 ActiveRecord::Base.send(method) do 81 81 if ENV['ONLY_EXISTING'] 82 - feed_posts = FeedPost.where(feed_id: feed.feed_id).includes(:post).to_a 83 - total = feed_posts.length 84 - 85 - puts "Processing posts..." 82 + rescan_feed_items(feed, dry) 83 + else 84 + days = ENV['DAYS'] ? ENV['DAYS'].to_i : 7 85 + append_only = !!ENV['APPEND_ONLY'] 86 86 87 - deleted = 0 87 + matched_posts = rebuild_feed(feed, days, append_only, dry) 88 88 89 - feed_posts.each do |fp| 90 - if !feed.post_matches?(fp.post) 91 - if dry 92 - puts "Post would be deleted: ##{fp.post.id} \"#{fp.post.text}\"" 93 - else 94 - puts "Deleting from feed: ##{fp.post.id} \"#{fp.post.text}\"" 95 - fp.destroy 96 - end 97 - deleted += 1 98 - end 89 + if matched_posts && (filename = ENV['TO_FILE']) 90 + File.write(filename, matched_posts.map(&:id).to_json) 99 91 end 92 + end 93 + end 94 + end 100 95 96 + def rescan_feed_items(feed, dry = false) 97 + feed_posts = FeedPost.where(feed_id: feed.feed_id).includes(:post).to_a 98 + total = feed_posts.length 99 + 100 + puts "Processing posts..." 101 + 102 + deleted = 0 103 + 104 + feed_posts.each do |fp| 105 + if !feed.post_matches?(fp.post) 101 106 if dry 102 - puts "#{deleted} post(s) would be deleted." 107 + puts "Post would be deleted: ##{fp.post.id} \"#{fp.post.text}\"" 103 108 else 104 - puts "Done (#{deleted} post(s) deleted)." 109 + puts "Deleting from feed: ##{fp.post.id} \"#{fp.post.text}\"" 110 + fp.destroy 105 111 end 106 - else 107 - days = ENV['DAYS'] ? ENV['DAYS'].to_i : 7 112 + deleted += 1 113 + end 114 + end 108 115 109 - posts = Post.order('time, id') 110 - start = posts.where("time <= DATETIME('now', '-#{days} days')").last 111 - stop = posts.last 112 - first = posts.first 113 - total = start ? (stop.id - start.id + 1) : (stop.id - first.id + 1) 116 + if dry 117 + puts "#{deleted} post(s) would be deleted." 118 + else 119 + puts "Done (#{deleted} post(s) deleted)." 120 + end 121 + end 114 122 115 - if ENV['APPEND_ONLY'] 116 - current_post_ids = FeedPost.where(feed_id: feed.feed_id).pluck('post_id') 117 - elsif !dry 118 - print "This will erase and replace the contents of the feed. Continue? [y/n]: " 119 - answer = STDIN.readline 120 - exit unless answer.strip.downcase == 'y' 123 + def rebuild_feed(feed, days, append_only, dry = false) 124 + posts = Post.order('time, id') 125 + start = posts.where("time <= DATETIME('now', '-#{days} days')").last 126 + stop = posts.last 127 + first = posts.first 128 + total = start ? (stop.id - start.id + 1) : (stop.id - first.id + 1) 121 129 122 - puts "Cleaning up feed..." 123 - FeedPost.where(feed_id: feed.feed_id).delete_all 124 - current_post_ids = [] 125 - end 130 + if append_only 131 + current_post_ids = FeedPost.where(feed_id: feed.feed_id).pluck('post_id') 132 + elsif !dry 133 + print "This will erase and replace the contents of the feed. Continue? [y/n]: " 134 + answer = STDIN.readline 135 + exit unless answer.strip.downcase == 'y' 126 136 127 - offset = 0 128 - page = 100000 129 - matched_posts = [] 137 + puts "Cleaning up feed..." 138 + FeedPost.where(feed_id: feed.feed_id).delete_all 139 + current_post_ids = [] 140 + end 130 141 131 - loop do 132 - batch = if start 133 - posts.where("time > ? OR (time = ? AND id > ?)", start.time, start.time, start.id).limit(page).to_a 134 - else 135 - posts.limit(page).to_a 136 - end 142 + offset = 0 143 + page = 100000 144 + matched_posts = [] 137 145 138 - break if batch.empty? 146 + loop do 147 + batch = if start 148 + posts.where("time > ? OR (time = ? AND id > ?)", start.time, start.time, start.id).limit(page).to_a 149 + else 150 + posts.limit(page).to_a 151 + end 139 152 140 - batch.each_with_index do |post, i| 141 - $stderr.print "Processing posts... [#{offset + i + 1}/#{total}]\r" 142 - $stderr.flush 153 + break if batch.empty? 143 154 144 - if !current_post_ids.include?(post.id) && feed.post_matches?(post) 145 - if dry 146 - matched_posts << post 147 - else 148 - FeedPost.create!(feed_id: feed.feed_id, post: post, time: post.time) 149 - end 150 - end 151 - end 155 + batch.each_with_index do |post, i| 156 + $stderr.print "Processing posts... [#{offset + i + 1}/#{total}]\r" 157 + $stderr.flush 152 158 153 - offset += page 154 - start = batch.last 159 + if !current_post_ids.include?(post.id) && feed.post_matches?(post) 160 + if dry 161 + matched_posts << post 162 + else 163 + FeedPost.create!(feed_id: feed.feed_id, post: post, time: post.time) 164 + end 155 165 end 166 + end 156 167 157 - $stderr.puts "Processing posts... Done." + " " * 30 168 + offset += page 169 + start = batch.last 170 + end 158 171 159 - if dry 160 - if ENV['APPEND_ONLY'] 161 - puts "Added posts:" 162 - puts "==============================" 163 - puts 164 - end 172 + $stderr.puts "Processing posts... Done." + " " * 30 165 173 166 - Signal.trap("SIGPIPE", "SYSTEM_DEFAULT") 167 - printer = PostConsolePrinter.new(feed) 168 - matched_posts.each do |p| 169 - printer.display(p) 170 - end 174 + if dry 175 + if append_only 176 + puts "Added posts:" 177 + puts "==============================" 178 + puts 179 + end 171 180 172 - if ENV['TO_FILE'] 173 - File.write(ENV['TO_FILE'], matched_posts.map(&:id).to_json) 174 - end 175 - end 181 + Signal.trap("SIGPIPE", "SYSTEM_DEFAULT") 182 + printer = PostConsolePrinter.new(feed) 183 + 184 + matched_posts.each do |p| 185 + printer.display(p) 176 186 end 187 + 188 + matched_posts 177 189 end 178 190 end