+1
-196
Rakefile
+1
-196
Rakefile
···
1
1
require 'bundler/setup'
2
-
3
2
require 'blue_factory/rake'
4
3
require 'sinatra/activerecord'
5
4
require 'sinatra/activerecord/rake'
6
5
7
-
require_relative 'app/config'
8
-
require_relative 'app/post_console_printer'
9
-
require_relative 'app/utils'
10
-
11
-
def get_feed
12
-
if ENV['KEY'].to_s == ''
13
-
puts "Please specify feed key as KEY=feedname (the part of the feed's at:// URI after the last slash)"
14
-
exit 1
15
-
end
16
-
17
-
feed_key = ENV['KEY']
18
-
feed = BlueFactory.get_feed(feed_key)
19
-
20
-
if feed.nil?
21
-
puts "No feed configured for key '#{feed_key}' - use `BlueFactory.add_feed '#{feed_key}', MyFeed.new`"
22
-
exit 1
23
-
end
24
-
25
-
feed
26
-
end
27
-
28
-
desc "Print posts in the feed, starting from the newest ones (limit = N)"
29
-
task :print_feed do
30
-
feed = get_feed
31
-
limit = ENV['N'] ? ENV['N'].to_i : 100
32
-
33
-
posts = FeedPost.where(feed_id: feed.feed_id).joins(:post).order('feed_posts.time DESC').limit(limit).map(&:post)
34
-
35
-
# this fixes an error when piping a long output to less and then closing without reading it all
36
-
Signal.trap("SIGPIPE", "SYSTEM_DEFAULT")
37
-
38
-
printer = PostConsolePrinter.new(feed)
39
-
40
-
posts.each do |s|
41
-
printer.display(s)
42
-
end
43
-
end
44
-
45
-
desc "Remove a single post from a feed"
46
-
task :delete_feed_item do
47
-
feed = get_feed
48
-
49
-
if ENV['URL'].to_s == ''
50
-
puts "Please specify post url as URL=https://bsky.app/..."
51
-
exit 1
52
-
end
53
-
54
-
url = ENV['URL']
55
-
parts = url.gsub(/^https:\/\//, '').split('/')
56
-
author = parts[2]
57
-
rkey = parts[4]
58
-
59
-
if author.start_with?('did:')
60
-
did = author
61
-
handle = Utils.handle_from_did(did)
62
-
else
63
-
handle = author
64
-
did = Utils.did_from_handle(handle)
65
-
end
66
-
67
-
if item = FeedPost.joins(:post).find_by(feed_id: feed.feed_id, post: { repo: did, rkey: rkey })
68
-
item.destroy
69
-
puts "Deleted post by @#{handle} from #{feed.display_name} feed"
70
-
else
71
-
puts "Post not found in the feed"
72
-
end
73
-
end
74
-
75
-
desc "Rescan all posts and rebuild the feed from scratch (DAYS = number of days)"
76
-
task :rebuild_feed do
77
-
feed = get_feed
78
-
method = ENV['UNSAFE'] ? :tap : :transaction
79
-
dry = ENV['DRY_RUN'] ? true : false
80
-
81
-
ActiveRecord::Base.send(method) do
82
-
if ENV['ONLY_EXISTING']
83
-
feed_posts = FeedPost.where(feed_id: feed.feed_id).includes(:post).to_a
84
-
total = feed_posts.length
85
-
86
-
puts "Processing posts..."
87
-
88
-
deleted = 0
89
-
90
-
feed_posts.each do |fp|
91
-
if !feed.post_matches?(fp.post)
92
-
if dry
93
-
puts "Post would be deleted: ##{fp.post.id} \"#{fp.post.text}\""
94
-
else
95
-
puts "Deleting from feed: ##{fp.post.id} \"#{fp.post.text}\""
96
-
fp.destroy
97
-
end
98
-
deleted += 1
99
-
end
100
-
end
101
-
102
-
if dry
103
-
puts "#{deleted} post(s) would be deleted."
104
-
else
105
-
puts "Done (#{deleted} post(s) deleted)."
106
-
end
107
-
else
108
-
days = ENV['DAYS'] ? ENV['DAYS'].to_i : 7
109
-
110
-
posts = Post.order('time, id')
111
-
start = posts.where("time <= DATETIME('now', '-#{days} days')").last
112
-
stop = posts.last
113
-
first = posts.first
114
-
total = start ? (stop.id - start.id + 1) : (stop.id - first.id + 1)
115
-
116
-
if ENV['APPEND_ONLY']
117
-
current_post_ids = FeedPost.where(feed_id: feed.feed_id).pluck('post_id')
118
-
elsif !dry
119
-
print "This will erase and replace the contents of the feed. Continue? [y/n]: "
120
-
answer = STDIN.readline
121
-
exit unless answer.strip.downcase == 'y'
122
-
123
-
puts "Cleaning up feed..."
124
-
FeedPost.where(feed_id: feed.feed_id).delete_all
125
-
current_post_ids = []
126
-
end
127
-
128
-
offset = 0
129
-
page = 100000
130
-
matched_posts = []
131
-
132
-
loop do
133
-
batch = if start
134
-
posts.where("time > ? OR (time = ? AND id > ?)", start.time, start.time, start.id).limit(page).to_a
135
-
else
136
-
posts.limit(page).to_a
137
-
end
138
-
139
-
break if batch.empty?
140
-
141
-
batch.each_with_index do |post, i|
142
-
$stderr.print "Processing posts... [#{offset + i + 1}/#{total}]\r"
143
-
$stderr.flush
144
-
145
-
if !current_post_ids.include?(post.id) && feed.post_matches?(post)
146
-
if dry
147
-
matched_posts << post
148
-
else
149
-
FeedPost.create!(feed_id: feed.feed_id, post: post, time: post.time)
150
-
end
151
-
end
152
-
end
153
-
154
-
offset += page
155
-
start = batch.last
156
-
end
157
-
158
-
$stderr.puts "Processing posts... Done." + " " * 30
159
-
160
-
if dry
161
-
if ENV['APPEND_ONLY']
162
-
puts "Added posts:"
163
-
puts "=============================="
164
-
puts
165
-
end
166
-
167
-
Signal.trap("SIGPIPE", "SYSTEM_DEFAULT")
168
-
printer = PostConsolePrinter.new(feed)
169
-
matched_posts.each do |p|
170
-
printer.display(p)
171
-
end
172
-
173
-
if ENV['TO_FILE']
174
-
File.write(ENV['TO_FILE'], matched_posts.map(&:id).to_json)
175
-
end
176
-
end
177
-
end
178
-
end
179
-
end
180
-
181
-
desc "Delete posts older than N days that aren't included in a feed"
182
-
task :cleanup_posts do
183
-
days = ENV['DAYS'].to_i
184
-
if days <= 0
185
-
puts "Please specify number of days as e.g. DAYS=30 to delete posts older than that"
186
-
exit 1
187
-
end
188
-
189
-
result = ActiveRecord::Base.connection.execute("SELECT DATETIME('now', '-#{days} days') AS time_limit")
190
-
time_limit = result.first['time_limit']
191
-
192
-
subquery = %{
193
-
SELECT posts.id FROM posts
194
-
LEFT JOIN feed_posts ON (feed_posts.post_id = posts.id)
195
-
WHERE feed_posts.id IS NULL AND posts.time < DATETIME('now', '-#{days} days')
196
-
}
197
-
198
-
result = Post.where("id IN (#{subquery})").delete_all
199
-
200
-
puts "Deleted #{result} posts older than #{time_limit}"
201
-
end
6
+
Rake.add_rakelib File.join(__dir__, 'lib', 'tasks')
+178
lib/tasks/feeds.rake
+178
lib/tasks/feeds.rake
···
1
+
$LOAD_PATH.unshift(File.expand_path('../..', __dir__))
2
+
3
+
require 'app/config'
4
+
require 'app/models/feed_post'
5
+
require 'app/models/post'
6
+
require 'app/post_console_printer'
7
+
require 'app/utils'
8
+
9
+
10
+
def get_feed
11
+
if ENV['KEY'].to_s == ''
12
+
puts "Please specify feed key as KEY=feedname (the part of the feed's at:// URI after the last slash)"
13
+
exit 1
14
+
end
15
+
16
+
feed_key = ENV['KEY']
17
+
feed = BlueFactory.get_feed(feed_key)
18
+
19
+
if feed.nil?
20
+
puts "No feed configured for key '#{feed_key}' - use `BlueFactory.add_feed '#{feed_key}', MyFeed.new`"
21
+
exit 1
22
+
end
23
+
24
+
feed
25
+
end
26
+
27
+
desc "Print posts in the feed, starting from the newest ones (limit = N)"
28
+
task :print_feed do
29
+
feed = get_feed
30
+
limit = ENV['N'] ? ENV['N'].to_i : 100
31
+
32
+
posts = FeedPost.where(feed_id: feed.feed_id).joins(:post).order('feed_posts.time DESC').limit(limit).map(&:post)
33
+
34
+
# this fixes an error when piping a long output to less and then closing without reading it all
35
+
Signal.trap("SIGPIPE", "SYSTEM_DEFAULT")
36
+
37
+
printer = PostConsolePrinter.new(feed)
38
+
39
+
posts.each do |s|
40
+
printer.display(s)
41
+
end
42
+
end
43
+
44
+
desc "Remove a single post from a feed"
45
+
task :delete_feed_item do
46
+
feed = get_feed
47
+
48
+
if ENV['URL'].to_s == ''
49
+
puts "Please specify post url as URL=https://bsky.app/..."
50
+
exit 1
51
+
end
52
+
53
+
url = ENV['URL']
54
+
parts = url.gsub(/^https:\/\//, '').split('/')
55
+
author = parts[2]
56
+
rkey = parts[4]
57
+
58
+
if author.start_with?('did:')
59
+
did = author
60
+
handle = Utils.handle_from_did(did)
61
+
else
62
+
handle = author
63
+
did = Utils.did_from_handle(handle)
64
+
end
65
+
66
+
if item = FeedPost.joins(:post).find_by(feed_id: feed.feed_id, post: { repo: did, rkey: rkey })
67
+
item.destroy
68
+
puts "Deleted post by @#{handle} from #{feed.display_name} feed"
69
+
else
70
+
puts "Post not found in the feed"
71
+
end
72
+
end
73
+
74
+
desc "Rescan all posts and rebuild the feed from scratch (DAYS = number of days)"
75
+
task :rebuild_feed do
76
+
feed = get_feed
77
+
method = ENV['UNSAFE'] ? :tap : :transaction
78
+
dry = ENV['DRY_RUN'] ? true : false
79
+
80
+
ActiveRecord::Base.send(method) do
81
+
if ENV['ONLY_EXISTING']
82
+
feed_posts = FeedPost.where(feed_id: feed.feed_id).includes(:post).to_a
83
+
total = feed_posts.length
84
+
85
+
puts "Processing posts..."
86
+
87
+
deleted = 0
88
+
89
+
feed_posts.each do |fp|
90
+
if !feed.post_matches?(fp.post)
91
+
if dry
92
+
puts "Post would be deleted: ##{fp.post.id} \"#{fp.post.text}\""
93
+
else
94
+
puts "Deleting from feed: ##{fp.post.id} \"#{fp.post.text}\""
95
+
fp.destroy
96
+
end
97
+
deleted += 1
98
+
end
99
+
end
100
+
101
+
if dry
102
+
puts "#{deleted} post(s) would be deleted."
103
+
else
104
+
puts "Done (#{deleted} post(s) deleted)."
105
+
end
106
+
else
107
+
days = ENV['DAYS'] ? ENV['DAYS'].to_i : 7
108
+
109
+
posts = Post.order('time, id')
110
+
start = posts.where("time <= DATETIME('now', '-#{days} days')").last
111
+
stop = posts.last
112
+
first = posts.first
113
+
total = start ? (stop.id - start.id + 1) : (stop.id - first.id + 1)
114
+
115
+
if ENV['APPEND_ONLY']
116
+
current_post_ids = FeedPost.where(feed_id: feed.feed_id).pluck('post_id')
117
+
elsif !dry
118
+
print "This will erase and replace the contents of the feed. Continue? [y/n]: "
119
+
answer = STDIN.readline
120
+
exit unless answer.strip.downcase == 'y'
121
+
122
+
puts "Cleaning up feed..."
123
+
FeedPost.where(feed_id: feed.feed_id).delete_all
124
+
current_post_ids = []
125
+
end
126
+
127
+
offset = 0
128
+
page = 100000
129
+
matched_posts = []
130
+
131
+
loop do
132
+
batch = if start
133
+
posts.where("time > ? OR (time = ? AND id > ?)", start.time, start.time, start.id).limit(page).to_a
134
+
else
135
+
posts.limit(page).to_a
136
+
end
137
+
138
+
break if batch.empty?
139
+
140
+
batch.each_with_index do |post, i|
141
+
$stderr.print "Processing posts... [#{offset + i + 1}/#{total}]\r"
142
+
$stderr.flush
143
+
144
+
if !current_post_ids.include?(post.id) && feed.post_matches?(post)
145
+
if dry
146
+
matched_posts << post
147
+
else
148
+
FeedPost.create!(feed_id: feed.feed_id, post: post, time: post.time)
149
+
end
150
+
end
151
+
end
152
+
153
+
offset += page
154
+
start = batch.last
155
+
end
156
+
157
+
$stderr.puts "Processing posts... Done." + " " * 30
158
+
159
+
if dry
160
+
if ENV['APPEND_ONLY']
161
+
puts "Added posts:"
162
+
puts "=============================="
163
+
puts
164
+
end
165
+
166
+
Signal.trap("SIGPIPE", "SYSTEM_DEFAULT")
167
+
printer = PostConsolePrinter.new(feed)
168
+
matched_posts.each do |p|
169
+
printer.display(p)
170
+
end
171
+
172
+
if ENV['TO_FILE']
173
+
File.write(ENV['TO_FILE'], matched_posts.map(&:id).to_json)
174
+
end
175
+
end
176
+
end
177
+
end
178
+
end
+27
lib/tasks/posts.rake
+27
lib/tasks/posts.rake
···
1
+
$LOAD_PATH.unshift(File.expand_path('../..', __dir__))
2
+
3
+
require 'app/config'
4
+
require 'app/models/post'
5
+
6
+
7
+
desc "Delete posts older than N days that aren't included in a feed"
8
+
task :cleanup_posts do
9
+
days = ENV['DAYS'].to_i
10
+
if days <= 0
11
+
puts "Please specify number of days as e.g. DAYS=30 to delete posts older than that"
12
+
exit 1
13
+
end
14
+
15
+
result = ActiveRecord::Base.connection.execute("SELECT DATETIME('now', '-#{days} days') AS time_limit")
16
+
time_limit = result.first['time_limit']
17
+
18
+
subquery = %{
19
+
SELECT posts.id FROM posts
20
+
LEFT JOIN feed_posts ON (feed_posts.post_id = posts.id)
21
+
WHERE feed_posts.id IS NULL AND posts.time < DATETIME('now', '-#{days} days')
22
+
}
23
+
24
+
result = Post.where("id IN (#{subquery})").delete_all
25
+
26
+
puts "Deleted #{result} posts older than #{time_limit}"
27
+
end