+10
.env.example
+10
.env.example
···
1
+
# hostname on which you're running the Lycan server
2
+
SERVER_HOSTNAME=lycan.feeds.blue
3
+
4
+
# customizing servers we connect to
5
+
RELAY_HOST=bsky.network
6
+
# or: JETSTREAM_HOST=jetstream2.us-east.bsky.network
7
+
APPVIEW_HOST=public.api.bsky.app
8
+
9
+
# put your handle here
10
+
# FIREHOSE_USER_AGENT="Lycan (@my.handle)"
+4
-3
Gemfile
+4
-3
Gemfile
···
2
2
3
3
gem 'activerecord', '~> 7.2'
4
4
gem 'sinatra-activerecord', '~> 2.0'
5
+
gem 'sinatra'
5
6
gem 'pg'
6
7
gem 'rake'
7
8
gem 'irb'
9
+
8
10
gem 'rainbow'
9
-
10
-
gem 'sinatra'
11
+
gem 'dotenv'
11
12
12
13
gem 'minisky', '~> 0.5'
13
14
gem 'didkit', '~> 0.2', git: 'https://tangled.sh/@mackuba.eu/didkit'
···
17
18
gem 'jwt'
18
19
19
20
group :development do
20
-
gem 'puma'
21
+
gem 'thin'
21
22
gem 'rackup'
22
23
gem 'capistrano', '~> 2.0'
23
24
+41
-34
Gemfile.lock
+41
-34
Gemfile.lock
···
7
7
GEM
8
8
remote: https://rubygems.org/
9
9
specs:
10
-
activemodel (7.2.2.2)
11
-
activesupport (= 7.2.2.2)
12
-
activerecord (7.2.2.2)
13
-
activemodel (= 7.2.2.2)
14
-
activesupport (= 7.2.2.2)
10
+
activemodel (7.2.3)
11
+
activesupport (= 7.2.3)
12
+
activerecord (7.2.3)
13
+
activemodel (= 7.2.3)
14
+
activesupport (= 7.2.3)
15
15
timeout (>= 0.4.0)
16
-
activesupport (7.2.2.2)
16
+
activesupport (7.2.3)
17
17
base64
18
18
benchmark (>= 0.3)
19
19
bigdecimal
···
29
29
base58 (0.2.3)
30
30
base64 (0.3.0)
31
31
bcrypt_pbkdf (1.1.1)
32
-
benchmark (0.4.1)
33
-
bigdecimal (3.2.2)
32
+
benchmark (0.5.0)
33
+
bigdecimal (3.3.1)
34
34
capistrano (2.15.11)
35
35
highline
36
36
net-scp (>= 1.0.0)
···
39
39
net-ssh-gateway (>= 1.1.0)
40
40
cbor (0.5.10.1)
41
41
concurrent-ruby (1.3.5)
42
-
connection_pool (2.5.3)
43
-
date (3.4.1)
42
+
connection_pool (2.5.4)
43
+
daemons (1.4.1)
44
+
date (3.5.0)
45
+
dotenv (3.1.8)
44
46
drb (2.2.3)
45
47
ed25519 (1.4.0)
46
-
erb (5.0.2)
48
+
erb (6.0.0)
47
49
eventmachine (1.2.7)
48
50
faye-websocket (0.12.0)
49
51
eventmachine (>= 0.12.0)
···
53
55
i18n (1.14.7)
54
56
concurrent-ruby (~> 1.0)
55
57
io-console (0.8.1)
56
-
irb (1.15.2)
58
+
irb (1.15.3)
57
59
pp (>= 0.6.0)
58
60
rdoc (>= 4.0.0)
59
61
reline (>= 0.4.2)
···
62
64
logger (1.7.0)
63
65
minisky (0.5.0)
64
66
base64 (~> 0.1)
65
-
minitest (5.25.5)
67
+
minitest (5.26.1)
66
68
mustermann (3.0.4)
67
69
ruby2_keywords (~> 0.0.1)
68
70
net-scp (4.1.0)
···
72
74
net-ssh (7.3.0)
73
75
net-ssh-gateway (2.0.0)
74
76
net-ssh (>= 4.0.0)
75
-
nio4r (2.7.4)
76
-
pg (1.6.1)
77
-
pg (1.6.1-aarch64-linux)
78
-
pg (1.6.1-aarch64-linux-musl)
79
-
pg (1.6.1-arm64-darwin)
80
-
pg (1.6.1-x86_64-darwin)
81
-
pg (1.6.1-x86_64-linux)
82
-
pg (1.6.1-x86_64-linux-musl)
83
-
pp (0.6.2)
77
+
pg (1.6.2)
78
+
pg (1.6.2-aarch64-linux)
79
+
pg (1.6.2-aarch64-linux-musl)
80
+
pg (1.6.2-arm64-darwin)
81
+
pg (1.6.2-x86_64-darwin)
82
+
pg (1.6.2-x86_64-linux)
83
+
pg (1.6.2-x86_64-linux-musl)
84
+
pp (0.6.3)
84
85
prettyprint
85
86
prettyprint (0.2.0)
86
87
psych (5.2.6)
87
88
date
88
89
stringio
89
-
puma (6.6.1)
90
-
nio4r (~> 2.0)
91
-
rack (3.2.0)
92
-
rack-protection (4.1.1)
90
+
rack (3.2.4)
91
+
rack-protection (4.2.1)
93
92
base64 (>= 0.1.0)
94
93
logger (>= 1.6.0)
95
94
rack (>= 3.0.0, < 4)
···
99
98
rackup (2.2.1)
100
99
rack (>= 3)
101
100
rainbow (3.1.1)
102
-
rake (13.3.0)
103
-
rdoc (6.14.2)
101
+
rake (13.3.1)
102
+
rdoc (6.15.1)
104
103
erb
105
104
psych (>= 4.0.0)
106
-
reline (0.6.2)
105
+
tsort
106
+
reline (0.6.3)
107
107
io-console (~> 0.5)
108
108
ruby2_keywords (0.0.5)
109
109
securerandom (0.4.1)
110
-
sinatra (4.1.1)
110
+
sinatra (4.2.1)
111
111
logger (>= 1.6.0)
112
112
mustermann (~> 3.0)
113
113
rack (>= 3.0.0, < 4)
114
-
rack-protection (= 4.1.1)
114
+
rack-protection (= 4.2.1)
115
115
rack-session (>= 2.0.0, < 3)
116
116
tilt (~> 2.0)
117
117
sinatra-activerecord (2.0.28)
···
123
123
cbor (~> 0.5, >= 0.5.9.6)
124
124
eventmachine (~> 1.2, >= 1.2.7)
125
125
faye-websocket (~> 0.12)
126
-
stringio (3.1.7)
126
+
stringio (3.1.8)
127
+
thin (2.0.1)
128
+
daemons (~> 1.0, >= 1.0.9)
129
+
eventmachine (~> 1.0, >= 1.0.4)
130
+
logger
131
+
rack (>= 1, < 4)
127
132
tilt (2.6.1)
128
-
timeout (0.4.3)
133
+
timeout (0.4.4)
134
+
tsort (0.2.0)
129
135
tzinfo (2.0.6)
130
136
concurrent-ruby (~> 1.0)
131
137
websocket-driver (0.8.0)
···
148
154
bcrypt_pbkdf (>= 1.0, < 2.0)
149
155
capistrano (~> 2.0)
150
156
didkit (~> 0.2)!
157
+
dotenv
151
158
ed25519 (>= 1.2, < 2.0)
152
159
irb
153
160
jwt
154
161
minisky (~> 0.5)
155
162
pg
156
-
puma
157
163
rackup
158
164
rainbow
159
165
rake
160
166
sinatra
161
167
sinatra-activerecord (~> 2.0)
162
168
skyfall (~> 0.6)
169
+
thin
163
170
164
171
BUNDLED WITH
165
172
2.7.0
+110
README.md
+110
README.md
···
1
+
# Lycan ๐บ
2
+
3
+
A service which downloads and indexes the Bluesky posts you've liked, reposted, quoted or bookmarked, and allows you to search in that archive.
4
+
5
+
6
+
## How it works
7
+
8
+
Lycan is kind of like a tiny specialized AppView, which only indexes some specific things from some specific people. To avoid having to keep a full-network AppView, it only indexes posts and likes on demand from people who request to use it. So the first time you want to use it, you need to ask it to run an import process, which can take anything between a few minutes and an hour, depending on how much data there is to download. After that, new likes are being indexed live from the firehose.
9
+
10
+
At the moment, Lycan indexes four types of content:
11
+
12
+
- posts you've liked
13
+
- posts you've reposted
14
+
- posts you've quoted
15
+
- your old-style bookmarks (using the ๐ emoji method)
16
+
17
+
New bookmarks are private data, so at the moment they can't be imported until support for OAuth is added.
18
+
19
+
Lycan is written in Ruby, using Sinatra and ActiveRecord, with Postgres as the database. The official instance runs at [lycan.feeds.blue](https://lycan.feeds.blue) (this service only implements an XRPC API โ the UI is implemented as part of [Skythread](https://skythread.mackuba.eu)).
20
+
21
+
The service consists of three separate components:
22
+
23
+
- a **firehose client**, which streams events from a relay/Jetstream and saves new data for the users whose data is/has been imported
24
+
- a **background worker**, which runs the import process
25
+
- an **HTTP server**, which serves the XRPC endpoints (currently there are 3: `startImport`, `getImportStatus` and `searchPosts`, plus a `did.json`); all the endpoints require service authentication through PDS proxying
26
+
27
+
28
+
## Setting up on localhost
29
+
30
+
This app should run on any somewhat recent version of Ruby, though of course it's recommended to run one that's still getting maintenance updates, ideally the latest one. It's also recommended to install it with [YJIT support](https://shopify.engineering/ruby-yjit-is-production-ready), and on Linux also with [jemalloc](https://scalingo.com/blog/improve-ruby-application-memory-jemalloc). You will probably need to have some familiarity with the Ruby ecosystem in order to set it up and run it.
31
+
32
+
A Postgres database is also required (again, any non-ancient version should work).
33
+
34
+
Download or clone the repository, then install the dependencies:
35
+
36
+
```
37
+
bundle install
38
+
```
39
+
40
+
Next, create the database โ the configuration is defined in [`config/database.yml`](config/database.yml), for development it's `lycan_development`. Create it either manually, or with a rake task:
41
+
42
+
```
43
+
bundle exec rake db:create
44
+
```
45
+
46
+
Then, run the migrations:
47
+
48
+
```
49
+
bundle exec rake db:migrate
50
+
```
51
+
52
+
To run an import, you will need to run three separate processes, probably in separate terminal tabs:
53
+
54
+
1) the firehose client, [`bin/firehose`](bin/firehose)
55
+
2) the background worker, [`bin/worker`](bin/worker)
56
+
3) the Sinatra HTTP server, [`bin/server`](bin/server)
57
+
58
+
The UI can be accessed through Skythread, either on the official site on [skythread.mackuba.eu](https://skythread.mackuba.eu), or a copy you can download [from the repo](https://tangled.org/mackuba.eu/skythread). Log in and open "[Archive search](https://skythread.mackuba.eu/?page=search&mode=likes)" from the account menu โ but importantly, to use the `localhost` Lycan instance, add `&lycan=local` to the URL.
59
+
60
+
You should then be able to start an import from there, and see the worker process printing some logs as it starts to download the data. (The firehose process needs to be running too, because the import job needs to pass through it first.)
61
+
62
+
63
+
## Configuration
64
+
65
+
There's a few things you can configure through ENV variables:
66
+
67
+
- `RELAY_HOST` โ hostname of the relay to use for the firehose (default: `bsky.network`)
68
+
- `JETSTREAM_HOST` โ alternatively, instead of `RELAY_HOST`, set this to a hostname of a [Jetstream](https://github.com/bluesky-social/jetstream) instance
69
+
- `FIREHOSE_USER_AGENT` โ when running in production, it's recommended that you set this to some name that identifies who is running the service
70
+
- `APPVIEW_HOST` โ hostname of the AppView used to download posts (default: `public.api.bsky.app`)
71
+
- `SERVER_HOSTNAME` โ hostname of the server on which you're running the service in production
72
+
73
+
74
+
## Rake tasks
75
+
76
+
Some Rake tasks that might be useful:
77
+
78
+
```
79
+
bundle exec rake enqueue_user DID=did:plc:qweqwe
80
+
```
81
+
82
+
- request an import of the given account (to be handled by firehose + worker)
83
+
84
+
```
85
+
bundle exec rake import_user DID=did:plc:qweqwe COLLECTION=likes/reposts/posts/all
86
+
```
87
+
88
+
- run a complete import synchronously
89
+
90
+
```
91
+
bundle exec rake process_posts
92
+
```
93
+
94
+
- process all previously queued and unfinished or failed items
95
+
96
+
97
+
## Running in production
98
+
99
+
This will probably heavily depend on where and how you prefer to run it, I'm using a Capistrano deploy config in [`config/deploy.rb`](config/deploy.rb) to deploy to a VPS at [lycan.feeds.blue](https://lycan.feeds.blue). To use something like Docker or a service like Fly or Railway, you'll need to adapt the config for your specific setup.
100
+
101
+
On the server, you need to make sure that the firehose & worker processes are always running and are restarted if necessary. One option to do this (which I'm using) may be writing a `systemd` service config file and adding it to `/etc/systemd/system`. To run the HTTP server, you need Nginx/Caddy/Apache and a Ruby app server โ my recommendation is Nginx with either Passenger (runs your app automatically from Nginx) or something like Puma (needs to be started by e.g. systemd like the firehose).
102
+
103
+
104
+
## Credits
105
+
106
+
Copyright ยฉ 2025 Kuba Suder ([@mackuba.eu](https://bsky.app/profile/did:plc:oio4hkxaop4ao4wz2pp3f4cr)).
107
+
108
+
The code is available under the terms of the [zlib license](https://choosealicense.com/licenses/zlib/) (permissive, similar to MIT).
109
+
110
+
Bug reports and pull requests are welcome ๐
+1
Rakefile
+1
Rakefile
+14
-2
app/authenticator.rb
+14
-2
app/authenticator.rb
···
6
6
require 'openssl'
7
7
8
8
class Authenticator
9
+
class InvalidTokenError < StandardError
10
+
end
11
+
9
12
def initialize(hostname:)
10
13
@@pkey_cache ||= {}
11
14
@hostname = hostname
···
15
18
return nil unless auth_header.start_with?('Bearer ')
16
19
17
20
token = auth_header.gsub(/\ABearer /, '')
18
-
data = JSON.parse(Base64.decode64(token.split('.')[1]))
21
+
parts = token.split('.')
22
+
raise InvalidTokenError, "Invalid JWT token" if parts.length != 3
23
+
24
+
begin
25
+
decoded_data = Base64.decode64(parts[1])
26
+
data = JSON.parse(decoded_data)
27
+
rescue StandardError => e
28
+
raise InvalidTokenError, "Invalid JWT token"
29
+
end
30
+
19
31
did = data['iss']
20
-
return nil if data['aud'] != "did:web:#{@hostname}" || data['lxm'] != endpoint
32
+
return nil if did.nil? || data['aud'] != "did:web:#{@hostname}" || data['lxm'] != endpoint
21
33
22
34
pkey = pkey_for_user(did)
23
35
+35
-4
app/firehose_client.rb
+35
-4
app/firehose_client.rb
···
13
13
14
14
def initialize
15
15
@env = (ENV['APP_ENV'] || ENV['RACK_ENV'] || :development).to_sym
16
-
@service = DEFAULT_RELAY
16
+
17
+
if ENV['RELAY_HOST']
18
+
@service = ENV['RELAY_HOST']
19
+
elsif ENV['JETSTREAM_HOST']
20
+
@service = ENV['JETSTREAM_HOST']
21
+
@jetstream = true
22
+
else
23
+
@service = DEFAULT_RELAY
24
+
end
17
25
end
18
26
19
27
def start
···
26
34
last_cursor = load_or_init_cursor
27
35
cursor = @start_cursor || last_cursor
28
36
29
-
@sky = Skyfall::Firehose.new(@service, :subscribe_repos, cursor)
30
-
@sky.user_agent = "Lycan (https://tangled.sh/@mackuba.eu/lycan) #{@sky.version_string}"
37
+
@sky = if @jetstream
38
+
Skyfall::Jetstream.new(@service, { cursor: cursor, wanted_collections: [:bsky_post, :bsky_like, :bsky_repost] })
39
+
else
40
+
Skyfall::Firehose.new(@service, :subscribe_repos, cursor)
41
+
end
42
+
43
+
@sky.user_agent = (ENV['FIREHOSE_USER_AGENT'] || "Lycan (https://tangled.sh/@mackuba.eu/lycan)") + ' ' + @sky.version_string
31
44
@sky.check_heartbeat = true
32
45
33
46
@sky.on_message do |m|
···
138
151
end
139
152
end
140
153
end
154
+
rescue CBOR::UnpackError
155
+
# ignore invalid records
141
156
end
142
157
143
158
def process_account_event(msg)
144
159
if msg.status == :deleted
145
160
if user = User.find_by(did: msg.repo)
146
161
user.destroy
147
-
@active_users.delete_if { |u| u.id == user.id }
162
+
@active_users.delete_if { |k, u| u.id == user.id }
148
163
end
149
164
end
150
165
end
···
153
168
return unless @current_user
154
169
155
170
if op.action == :create
171
+
return if op.raw_record.nil?
156
172
@current_user.likes.import_from_record(op.uri, op.raw_record, queue: :firehose)
157
173
elsif op.action == :delete
158
174
@current_user.likes.where(rkey: op.rkey).delete_all
159
175
end
176
+
rescue StandardError => e
177
+
log "Error in #process_like (#{msg.seq}, #{op.uri}): #{e}"
178
+
log e.backtrace.reject { |x| x.include?('/ruby/') }
179
+
sleep 5 if e.is_a?(ActiveRecord::ConnectionFailed)
160
180
end
161
181
162
182
def process_repost(msg, op)
163
183
return unless @current_user
164
184
165
185
if op.action == :create
186
+
return if op.raw_record.nil?
166
187
@current_user.reposts.import_from_record(op.uri, op.raw_record, queue: :firehose)
167
188
elsif op.action == :delete
168
189
@current_user.reposts.where(rkey: op.rkey).delete_all
169
190
end
191
+
rescue StandardError => e
192
+
log "Error in #process_repost (#{msg.seq}, #{op.uri}): #{e}"
193
+
log e.backtrace.reject { |x| x.include?('/ruby/') }
194
+
sleep 5 if e.is_a?(ActiveRecord::ConnectionFailed)
170
195
end
171
196
172
197
def process_post(msg, op)
173
198
if op.action == :create
199
+
return if op.raw_record.nil?
200
+
174
201
if @current_user
175
202
@current_user.quotes.import_from_record(op.uri, op.raw_record, queue: :firehose)
176
203
@current_user.pins.import_from_record(op.uri, op.raw_record, queue: :firehose)
···
185
212
post.destroy
186
213
end
187
214
end
215
+
rescue StandardError => e
216
+
log "Error in #process_post (#{msg.seq}, #{op.uri}): #{e}"
217
+
log e.backtrace.reject { |x| x.include?('/ruby/') }
218
+
sleep 5 if e.is_a?(ActiveRecord::ConnectionFailed)
188
219
end
189
220
190
221
def log(text)
+20
-4
app/import_manager.rb
+20
-4
app/import_manager.rb
···
5
5
require_relative 'post_downloader'
6
6
7
7
class ImportManager
8
-
attr_accessor :report, :time_limit
8
+
attr_accessor :report, :time_limit, :logger, :log_status_updates
9
9
10
10
def initialize(user)
11
11
@user = user
···
34
34
end
35
35
end
36
36
37
-
queued_items = @user.all_items_in_queue(:import)
37
+
queued_items = @user.all_items_in_queue(:import).sort_by(&:time).reverse
38
38
queue = ItemQueue.new(queued_items)
39
39
40
40
downloader = PostDownloader.new
41
41
downloader.report = @report
42
+
downloader.logger = @logger
42
43
43
-
download_thread = Thread.new { downloader.import_from_queue(queue) }
44
+
download_thread = Thread.new do
45
+
@logger&.info "Starting downloader thread for #{@user}" if @log_status_updates
46
+
47
+
downloader.import_from_queue(queue)
48
+
49
+
@logger&.info "Ended downloader thread for #{@user}" if @log_status_updates
50
+
end
44
51
45
52
import_threads = importers.map do |import|
46
53
import.item_queue = queue
47
54
import.report = @report
55
+
import.logger = @logger
48
56
49
-
Thread.new { import.run_import(@time_limit) }
57
+
Thread.new do
58
+
@logger&.info "Starting #{import.class} thread for #{@user}" if @log_status_updates
59
+
60
+
import.run_import(@time_limit)
61
+
62
+
@logger&.info "Ended #{import.class} thread for #{@user}" if @log_status_updates
63
+
end
50
64
end
51
65
52
66
import_threads.each { |i| i.join }
67
+
68
+
@logger&.info "Finished all importer threads for #{@user}, waiting for downloader" if @log_status_updates
53
69
54
70
downloader.stop_when_empty = true
55
71
download_thread.join
+33
-5
app/import_worker.rb
+33
-5
app/import_worker.rb
···
1
1
require 'active_record'
2
+
require 'minisky'
3
+
require 'time'
2
4
3
5
require_relative 'init'
4
6
require_relative 'import_manager'
···
7
9
require_relative 'reports/basic_report'
8
10
9
11
class ImportWorker
10
-
attr_accessor :verbose
12
+
attr_accessor :verbose, :logger
11
13
12
14
class UserThread < Thread
13
-
def initialize(user, collections, verbose = false)
15
+
def initialize(user, collections, logger, verbose = false)
14
16
@user = user
15
17
@verbose = verbose
18
+
@logger = logger
16
19
17
20
super { run(collections) }
18
21
end
···
22
25
end
23
26
24
27
def run(collections)
28
+
@logger&.info "Starting import thread for #{@user}"
29
+
30
+
if @user.registered_at.nil?
31
+
registration_time = get_registration_time(@user)
32
+
@user.update!(registered_at: registration_time)
33
+
end
34
+
25
35
import = ImportManager.new(@user)
26
-
import.report = BasicReport.new if @verbose
36
+
37
+
if @logger
38
+
import.report = BasicReport.new(@logger) if @verbose
39
+
import.logger = @logger
40
+
import.log_status_updates = true
41
+
end
42
+
27
43
import.start(collections)
44
+
45
+
@logger&.info "Ended import thread for #{@user}"
46
+
end
47
+
48
+
def get_registration_time(user)
49
+
sky = Minisky.new(ENV['APPVIEW_HOST'] || 'public.api.bsky.app', nil)
50
+
profile = sky.get_request('app.bsky.actor.getProfile', { actor: user.did })
51
+
52
+
Time.parse(profile['createdAt'])
28
53
end
29
54
end
30
55
···
33
58
34
59
@firehose_thread = Thread.new { process_firehose_items }
35
60
@downloader = PostDownloader.new
61
+
@downloader.logger = @logger
36
62
37
63
loop do
38
64
@user_threads.delete_if { |t| !t.alive? }
39
65
40
-
if user = User.with_unfinished_import.where.not(id: @user_threads.map(&:user_id)).first
66
+
users = User.with_unfinished_import.where.not(id: @user_threads.map(&:user_id)).to_a
67
+
68
+
users.each do |user|
41
69
collections = user.imports.unfinished.map(&:collection)
42
-
thread = UserThread.new(user, collections, @verbose)
70
+
thread = UserThread.new(user, collections, @logger, @verbose)
43
71
@user_threads << thread
44
72
end
45
73
+11
-3
app/importers/base_importer.rb
+11
-3
app/importers/base_importer.rb
···
1
1
require 'didkit'
2
2
require 'minisky'
3
+
require 'time'
3
4
4
5
require_relative '../at_uri'
6
+
require_relative '../errors'
5
7
require_relative '../models/post'
6
8
require_relative '../models/user'
7
9
8
10
class BaseImporter
9
-
attr_accessor :item_queue, :report
11
+
attr_accessor :item_queue, :report, :logger
10
12
11
13
def initialize(user)
12
14
@did = DID.new(user.did)
···
31
33
end
32
34
33
35
@time_limit = requested_time_limit || @import.last_completed
34
-
puts "Fetching until: #{@time_limit}" if @time_limit
36
+
@logger&.info "Fetching until: #{@time_limit}" if @time_limit
35
37
36
38
import_items
37
39
38
40
@import.update!(last_completed: @import.started_from) unless requested_time_limit
39
-
@import.update!(cursor: nil, started_from: nil)
41
+
@import.update!(cursor: nil, started_from: nil, fetched_until: nil)
40
42
@report&.update(importers: { importer_name => { :finished => true }})
41
43
end
42
44
43
45
def import_items
44
46
raise NotImplementedError
47
+
end
48
+
49
+
def created_at(record)
50
+
Time.parse(record['createdAt'])
51
+
rescue StandardError
52
+
raise InvalidRecordError
45
53
end
46
54
end
+12
-9
app/importers/likes_importer.rb
+12
-9
app/importers/likes_importer.rb
···
1
-
require 'time'
2
1
require_relative 'base_importer'
3
2
4
3
class LikesImporter < BaseImporter
···
11
10
12
11
records = response['records']
13
12
cursor = response['cursor']
14
-
15
-
@imported_count += records.length
16
-
@report&.update(importers: { importer_name => { :imported_items => @imported_count }})
17
-
@report&.update(importers: { importer_name => { :oldest_date => Time.parse(records.last['value']['createdAt']) }}) unless records.empty?
13
+
oldest_date = nil
18
14
19
15
records.each do |record|
20
16
begin
21
17
like = @user.likes.import_from_record(record['uri'], record['value'], queue: :import)
22
18
19
+
record_date = like&.time || created_at(record['value'])
20
+
oldest_date = [oldest_date, record_date].compact.min
21
+
23
22
if like && like.pending? && @item_queue
24
23
@item_queue.push(like)
25
24
@report&.update(queue: { length: @item_queue.length })
26
25
end
27
-
rescue StandardError => e
28
-
puts "Error in LikesImporter: #{record['uri']}: #{e}"
26
+
rescue InvalidRecordError => e
27
+
@logger&.warn "Error in LikesImporter: #{record['uri']}: #{e}"
29
28
end
30
29
end
31
30
31
+
@imported_count += records.length
32
+
@report&.update(importers: { importer_name => { :imported_items => @imported_count }})
33
+
@report&.update(importers: { importer_name => { :oldest_date => oldest_date }}) if oldest_date
34
+
32
35
params[:cursor] = cursor
33
-
@import.update!(cursor: cursor)
36
+
@import.update!(cursor: cursor, fetched_until: oldest_date)
34
37
35
38
break if !cursor
36
-
break if @time_limit && records.any? { |x| Time.parse(x['value']['createdAt']) < @time_limit }
39
+
break if @time_limit && oldest_date && oldest_date < @time_limit
37
40
end
38
41
end
39
42
end
+12
-9
app/importers/posts_importer.rb
+12
-9
app/importers/posts_importer.rb
···
1
-
require 'time'
2
1
require_relative 'base_importer'
3
2
4
3
class PostsImporter < BaseImporter
···
11
10
12
11
records = response['records']
13
12
cursor = response['cursor']
14
-
15
-
@imported_count += records.length
16
-
@report&.update(importers: { importer_name => { :imported_items => @imported_count }})
17
-
@report&.update(importers: { importer_name => { :oldest_date => Time.parse(records.last['value']['createdAt']) }}) unless records.empty?
13
+
oldest_date = nil
18
14
19
15
records.each do |record|
20
16
begin
21
17
quote = @user.quotes.import_from_record(record['uri'], record['value'], queue: :import)
22
18
pin = @user.pins.import_from_record(record['uri'], record['value'], queue: :import)
23
19
20
+
record_date = quote&.time || pin&.time || created_at(record['value'])
21
+
oldest_date = [oldest_date, record_date].compact.min
22
+
24
23
if @item_queue
25
24
if quote && quote.pending?
26
25
@item_queue.push(quote)
···
32
31
33
32
@report&.update(queue: { length: @item_queue.length })
34
33
end
35
-
rescue StandardError => e
36
-
puts "Error in PostsImporter: #{record['uri']}: #{e}"
34
+
rescue InvalidRecordError => e
35
+
@logger&.warn "Error in PostsImporter: #{record['uri']}: #{e}"
37
36
end
38
37
end
39
38
39
+
@imported_count += records.length
40
+
@report&.update(importers: { importer_name => { :imported_items => @imported_count }})
41
+
@report&.update(importers: { importer_name => { :oldest_date => oldest_date }}) if oldest_date
42
+
40
43
params[:cursor] = cursor
41
-
@import.update!(cursor: cursor)
44
+
@import.update!(cursor: cursor, fetched_until: oldest_date)
42
45
43
46
break if !cursor
44
-
break if @time_limit && records.any? { |x| Time.parse(x['value']['createdAt']) < @time_limit }
47
+
break if @time_limit && oldest_date && oldest_date < @time_limit
45
48
end
46
49
end
47
50
end
+12
-9
app/importers/reposts_importer.rb
+12
-9
app/importers/reposts_importer.rb
···
1
-
require 'time'
2
1
require_relative 'base_importer'
3
2
4
3
class RepostsImporter < BaseImporter
···
11
10
12
11
records = response['records']
13
12
cursor = response['cursor']
14
-
15
-
@imported_count += records.length
16
-
@report&.update(importers: { importer_name => { :imported_items => @imported_count }})
17
-
@report&.update(importers: { importer_name => { :oldest_date => Time.parse(records.last['value']['createdAt']) }}) unless records.empty?
13
+
oldest_date = nil
18
14
19
15
records.each do |record|
20
16
begin
21
17
repost = @user.reposts.import_from_record(record['uri'], record['value'], queue: :import)
22
18
19
+
record_date = repost&.time || created_at(record['value'])
20
+
oldest_date = [oldest_date, record_date].compact.min
21
+
23
22
if repost && repost.pending? && @item_queue
24
23
@item_queue.push(repost)
25
24
@report&.update(queue: { length: @item_queue.length })
26
25
end
27
-
rescue StandardError => e
28
-
puts "Error in RepostsImporter: #{record['uri']}: #{e}"
26
+
rescue InvalidRecordError => e
27
+
@logger&.warn "Error in RepostsImporter: #{record['uri']}: #{e}"
29
28
end
30
29
end
31
30
31
+
@imported_count += records.length
32
+
@report&.update(importers: { importer_name => { :imported_items => @imported_count }})
33
+
@report&.update(importers: { importer_name => { :oldest_date => oldest_date }}) if oldest_date
34
+
32
35
params[:cursor] = cursor
33
-
@import.update!(cursor: cursor)
36
+
@import.update!(cursor: cursor, fetched_until: oldest_date)
34
37
35
38
break if !cursor
36
-
break if @time_limit && records.any? { |x| Time.parse(x['value']['createdAt']) < @time_limit }
39
+
break if @time_limit && oldest_date && oldest_date < @time_limit
37
40
end
38
41
end
39
42
end
+1
app/init.rb
+1
app/init.rb
+26
app/models/import.rb
+26
app/models/import.rb
···
9
9
validates_uniqueness_of :collection, scope: :user_id
10
10
11
11
scope :unfinished, -> { where('(started_from IS NOT NULL) OR (last_completed IS NULL)') }
12
+
13
+
IMPORT_END = Time.at(0)
14
+
15
+
def imported_until
16
+
return nil if cursor.nil? && last_completed.nil?
17
+
18
+
groups = case collection
19
+
when 'likes'
20
+
[:likes]
21
+
when 'reposts'
22
+
[:reposts]
23
+
when 'posts'
24
+
[:pins, :quotes]
25
+
end
26
+
27
+
newest_queued_items = groups.map { |g| user.send(g).where(queue: :import).order(:time).last }
28
+
newest_queued = newest_queued_items.compact.sort_by(&:time).last
29
+
30
+
if newest_queued
31
+
newest_queued.time
32
+
elsif fetched_until
33
+
fetched_until
34
+
else
35
+
IMPORT_END
36
+
end
37
+
end
12
38
end
+7
app/models/post.rb
+7
app/models/post.rb
···
10
10
validates_length_of :text, maximum: 1000
11
11
validates_length_of :data, maximum: 10000
12
12
13
+
validate :check_null_bytes
14
+
13
15
belongs_to :user
14
16
15
17
has_many :likes, dependent: :delete_all
···
26
28
27
29
def at_uri
28
30
"at://#{user.did}/app.bsky.feed.post/#{rkey}"
31
+
end
32
+
33
+
def check_null_bytes
34
+
# Postgres doesn't allow null bytes in strings
35
+
errors.add(:text, 'must not contain a null byte') if text.include?("\u0000")
29
36
end
30
37
end
+27
-35
app/models/user.rb
+27
-35
app/models/user.rb
···
7
7
require_relative 'pin'
8
8
require_relative 'post'
9
9
require_relative 'repost'
10
+
require_relative 'user_importable'
10
11
11
12
class User < ActiveRecord::Base
12
13
validates_presence_of :did
13
14
validates_length_of :did, maximum: 260
15
+
validates_format_of :did, with: /\Adid:(plc:[0-9a-z]{24}|web:[0-9a-z\-]+(\.[0-9a-z\-]+)+)\Z/
14
16
15
17
has_many :posts
16
18
has_many :imports, dependent: :delete_all
···
18
20
19
21
before_destroy :delete_posts_cascading
20
22
21
-
has_many :likes, foreign_key: 'actor_id', dependent: :delete_all do
22
-
def import_from_record(like_uri, record, **args)
23
-
like = self.new_from_record(like_uri, record)
24
-
return nil if like.nil? || self.where(rkey: like.rkey).exists?
25
-
26
-
like.import_item!(args)
27
-
end
28
-
end
29
-
30
-
has_many :reposts, foreign_key: 'actor_id', dependent: :delete_all do
31
-
def import_from_record(repost_uri, record, **args)
32
-
repost = self.new_from_record(repost_uri, record)
33
-
return nil if repost.nil? || self.where(rkey: repost.rkey).exists?
34
-
35
-
repost.import_item!(args)
36
-
end
37
-
end
38
-
39
-
has_many :quotes, foreign_key: 'actor_id', dependent: :delete_all do
40
-
def import_from_record(post_uri, record, **args)
41
-
quote = self.new_from_record(post_uri, record)
42
-
return nil if quote.nil? || self.where(rkey: quote.rkey).exists?
43
-
44
-
quote.import_item!(args)
45
-
end
46
-
end
47
-
48
-
has_many :pins, foreign_key: 'actor_id', dependent: :delete_all do
49
-
def import_from_record(post_uri, record, **args)
50
-
pin = self.new_from_record(post_uri, record)
51
-
return nil if pin.nil? || self.where(rkey: pin.rkey).exists?
52
-
53
-
pin.import_item!(args)
54
-
end
55
-
end
23
+
has_many :likes, foreign_key: 'actor_id', dependent: :delete_all, extend: UserImportable
24
+
has_many :reposts, foreign_key: 'actor_id', dependent: :delete_all, extend: UserImportable
25
+
has_many :quotes, foreign_key: 'actor_id', dependent: :delete_all, extend: UserImportable
26
+
has_many :pins, foreign_key: 'actor_id', dependent: :delete_all, extend: UserImportable
56
27
57
28
def self.active
58
29
self.joins(:imports).distinct
···
78
49
[:likes, :reposts, :quotes, :pins].map { |x| self.send(x).in_queue(queue).to_a }.reduce(&:+)
79
50
end
80
51
52
+
def imported_until
53
+
import_positions = self.imports.map(&:imported_until)
54
+
55
+
if import_positions.empty? || import_positions.any? { |x| x.nil? }
56
+
nil
57
+
else
58
+
import_positions.sort.last
59
+
end
60
+
end
61
+
62
+
def erase_imports!
63
+
[:likes, :reposts, :quotes, :pins].map { |x| self.send(x).delete_all }
64
+
65
+
self.import_job&.destroy
66
+
self.imports.delete_all
67
+
end
68
+
81
69
def delete_posts_cascading
82
70
posts_subquery = self.posts.select(:id)
83
71
···
87
75
Quote.where(post_id: posts_subquery).delete_all
88
76
89
77
Post.where(user: self).delete_all
78
+
end
79
+
80
+
def to_s
81
+
%(<User id: #{id}, did: "#{did}">)
90
82
end
91
83
end
+20
app/models/user_importable.rb
+20
app/models/user_importable.rb
···
1
+
require_relative '../errors'
2
+
3
+
module UserImportable
4
+
def import_from_record(item_uri, record, **args)
5
+
item = try_build_from_record(item_uri, record)
6
+
return nil if item.nil? || already_imported?(item)
7
+
8
+
item.import_item!(args)
9
+
end
10
+
11
+
def try_build_from_record(item_uri, record)
12
+
self.new_from_record(item_uri, record)
13
+
rescue StandardError
14
+
raise InvalidRecordError
15
+
end
16
+
17
+
def already_imported?(item)
18
+
self.where(rkey: item.rkey).exists?
19
+
end
20
+
end
+52
-39
app/post_downloader.rb
+52
-39
app/post_downloader.rb
···
6
6
require_relative 'models/user'
7
7
8
8
class PostDownloader
9
-
attr_accessor :report, :stop_when_empty
9
+
attr_accessor :report, :logger, :stop_when_empty
10
10
11
11
def initialize
12
-
@sky = Minisky.new(ENV['APPVIEW'] || 'public.api.bsky.app', nil)
12
+
@sky = Minisky.new(ENV['APPVIEW_HOST'] || 'public.api.bsky.app', nil)
13
13
14
14
@total_count = 0
15
15
@oldest_imported = Time.now
···
36
36
end
37
37
38
38
def process_items(items)
39
-
#
40
-
existing_posts = Post.where(rkey: items.map { |x| AT_URI(x.post_uri).rkey }).to_a
39
+
existing_posts = Post.where(rkey: items.map { |x| AT_URI(x.post_uri).rkey }).to_a
41
40
42
-
items.dup.each do |item|
43
-
if post = existing_posts.detect { |post| post.at_uri == item.post_uri }
44
-
update_item(item, post)
45
-
items.delete(item)
46
-
end
41
+
items.dup.each do |item|
42
+
if post = existing_posts.detect { |post| post.at_uri == item.post_uri }
43
+
update_item(item, post)
44
+
items.delete(item)
47
45
end
46
+
end
48
47
49
-
return if items.empty?
48
+
return if items.empty?
50
49
51
-
begin
52
-
response = @sky.get_request('app.bsky.feed.getPosts', { uris: items.map(&:post_uri).uniq })
50
+
begin
51
+
response = @sky.get_request('app.bsky.feed.getPosts', { uris: items.map(&:post_uri).uniq })
53
52
54
-
response['posts'].each do |data|
55
-
begin
56
-
item = items.detect { |x| x.post_uri == data['uri'] }
57
-
items.delete(item)
53
+
response['posts'].each do |data|
54
+
current_items = items.select { |x| x.post_uri == data['uri'] }
55
+
items -= current_items
58
56
59
-
post = save_post(data['uri'], data['record'])
57
+
begin
58
+
post = save_post(data['uri'], data['record'])
60
59
61
-
if post.valid?
62
-
update_item(item, post)
63
-
else
64
-
puts "Invalid post #{item.post_uri}: #{post.errors.full_messages.join("; ")}"
65
-
invalidate_item(item)
66
-
end
67
-
rescue StandardError => e
68
-
puts "Error in PostDownloader: #{item.post_uri}: #{e.class}: #{e}"
60
+
if post.valid?
61
+
current_items.each { |i| update_item(i, post) }
62
+
else
63
+
@logger&.warn "Invalid post #{data['uri']}: #{post.errors.full_messages.join("; ")}"
64
+
current_items.each { |i| invalidate_item(i) }
69
65
end
66
+
rescue InvalidRecordError => e
67
+
@logger&.warn "Error in PostDownloader: #{data['uri']}: #{e.class}: #{e}"
68
+
current_items.each { |i| i.update!(queue: nil) }
70
69
end
71
-
72
-
check_missing_items(items)
73
-
rescue StandardError => e
74
-
puts "Error in PostDownloader: #{e.class}: #{e}"
75
70
end
76
-
#
71
+
72
+
check_missing_items(items)
73
+
rescue StandardError => e
74
+
@logger&.warn "Error in PostDownloader: #{e.class}: #{e}"
75
+
end
77
76
end
78
77
79
78
def save_post(post_uri, record)
80
79
did, _, rkey = AT_URI(post_uri)
81
80
82
-
text = record.delete('text')
83
-
created = record.delete('createdAt')
84
-
85
-
author = User.find_or_create_by!(did: did)
81
+
begin
82
+
author = User.find_or_create_by!(did: did)
83
+
rescue ActiveRecord::RecordInvalid => e
84
+
raise InvalidRecordError
85
+
end
86
86
87
87
if post = Post.find_by(user: author, rkey: rkey)
88
88
return post
89
+
else
90
+
post = build_post(author, rkey, record)
91
+
post.save
92
+
post
89
93
end
94
+
end
90
95
91
-
Post.create!(
96
+
def build_post(author, rkey, record)
97
+
text = record.delete('text')
98
+
created = record.delete('createdAt')
99
+
100
+
record.delete('$type')
101
+
102
+
Post.new(
92
103
user: author,
93
104
rkey: rkey,
94
105
time: Time.parse(created),
95
106
text: text,
96
107
data: JSON.generate(record)
97
108
)
109
+
rescue StandardError
110
+
raise InvalidRecordError
98
111
end
99
112
100
113
def update_item(item, post)
···
140
153
case status
141
154
when :active
142
155
# account is active but wasn't returned in getProfiles, probably was suspended on the AppView
143
-
puts "#{item.post_uri}: account #{did} exists on the PDS, account must have been taken down"
156
+
# puts "#{item.post_uri}: account #{did} exists on the PDS, account must have been taken down"
144
157
item.destroy
145
158
when nil
146
159
# account was deleted, so all posts were deleted too
147
-
puts "#{item.post_uri}: account #{did} doesn't exist on the PDS, post must have been deleted"
160
+
# puts "#{item.post_uri}: account #{did} doesn't exist on the PDS, post must have been deleted"
148
161
item.destroy
149
162
else
150
163
# account is inactive/suspended, but could come back, so leave it for now
151
-
puts "#{item.post_uri}: account #{did} is inactive: #{status}"
164
+
# puts "#{item.post_uri}: account #{did} is inactive: #{status}"
152
165
end
153
166
rescue StandardError => e
154
167
hostname = did_obj.document.pds_host rescue "???"
155
-
puts "#{item.post_uri}: couldn't check account status for #{did} on #{hostname}: #{e.class}: #{e}"
168
+
@logger&.warn "#{item.post_uri}: couldn't check account status for #{did} on #{hostname}: #{e.class}: #{e}"
156
169
157
170
# delete reference if the account's PDS is the old bsky.social (so it must have been deleted pre Nov 2023)
158
171
item.destroy if hostname == 'bsky.social'
+6
-2
app/reports/basic_report.rb
+6
-2
app/reports/basic_report.rb
···
1
1
class BasicReport
2
+
def initialize(logger)
3
+
@logger = logger
4
+
end
5
+
2
6
def update(data)
3
7
data.each do |k, v|
4
8
if k == :downloader
5
-
p ({ k => v }) if v[:downloaded_posts] && v[:downloaded_posts] % 100 == 0
9
+
@logger.info({ k => v }.inspect) if v[:downloaded_posts] && v[:downloaded_posts] % 100 == 0
6
10
elsif k == :queue
7
11
next
8
12
else
9
-
p ({ k => v})
13
+
@logger.info({ k => v}.inspect)
10
14
end
11
15
end
12
16
end
+9
app/reports/simple_logger.rb
+9
app/reports/simple_logger.rb
+105
-23
app/server.rb
+105
-23
app/server.rb
···
8
8
9
9
class Server < Sinatra::Application
10
10
register Sinatra::ActiveRecordExtension
11
-
set :port, 3000
11
+
set :port, ENV['PORT'] || 3000
12
12
13
13
PAGE_LIMIT = 25
14
-
HOSTNAME = 'lycan.feeds.blue'
14
+
HOSTNAME = ENV['SERVER_HOSTNAME'] || 'lycan.feeds.blue'
15
15
16
16
helpers do
17
17
def json_response(data)
···
23
23
content_type :json
24
24
[status, JSON.generate({ error: name, message: message })]
25
25
end
26
+
27
+
def get_user_did
28
+
if settings.development?
29
+
did = if request.post?
30
+
json = JSON.parse(request.body.read)
31
+
request.body.rewind
32
+
json['user']
33
+
else
34
+
params[:user]
35
+
end
36
+
37
+
if did
38
+
return did
39
+
else
40
+
halt json_error('AuthMissing', 'Missing "user" parameter', status: 401)
41
+
end
42
+
else
43
+
auth_header = env['HTTP_AUTHORIZATION']
44
+
if auth_header.to_s.strip.empty?
45
+
halt json_error('AuthMissing', 'Missing authentication header', status: 401)
46
+
end
47
+
48
+
begin
49
+
method = request.path.split('/')[2]
50
+
did = @authenticator.decode_user_from_jwt(auth_header, method)
51
+
rescue StandardError => e
52
+
halt json_error('InvalidToken', e.message, status: 401)
53
+
end
54
+
55
+
if did
56
+
return did
57
+
else
58
+
halt json_error('InvalidToken', "Invalid JWT token", status: 401)
59
+
end
60
+
end
61
+
end
62
+
63
+
def load_user
64
+
did = get_user_did
65
+
66
+
if user = User.find_by(did: did)
67
+
return user
68
+
else
69
+
halt json_error('AccountNotFound', 'Account not found', status: 401)
70
+
end
71
+
end
26
72
end
27
73
28
74
before do
29
75
@authenticator = Authenticator.new(hostname: HOSTNAME)
30
-
end
31
-
32
-
get '/xrpc/blue.feeds.lycan.searchPosts' do
33
-
headers['access-control-allow-origin'] = '*'
34
76
35
77
if settings.development?
36
-
user = User.find_by(did: params[:user])
37
-
return json_error('UserNotFound', 'Missing "user" parameter') if user.nil?
38
-
else
39
-
begin
40
-
auth_header = env['HTTP_AUTHORIZATION']
41
-
did = @authenticator.decode_user_from_jwt(auth_header, 'blue.feeds.lycan.searchPosts')
42
-
rescue StandardError => e
43
-
p e
44
-
end
45
-
46
-
user = did && User.find_by(did: did)
47
-
return json_error('UserNotFound', 'Missing authentication header') if user.nil?
78
+
headers['Access-Control-Allow-Origin'] = '*'
48
79
end
80
+
end
81
+
82
+
get '/' do
83
+
"Awoo ๐บ"
84
+
end
85
+
86
+
get '/xrpc/blue.feeds.lycan.searchPosts' do
87
+
@user = load_user
49
88
50
89
if params[:query].to_s.strip.empty?
51
90
return json_error('MissingParameter', 'Missing "query" parameter')
···
58
97
query = QueryParser.new(params[:query])
59
98
60
99
collection = case params[:collection]
61
-
when 'likes' then user.likes
62
-
when 'pins' then user.pins
63
-
when 'quotes' then user.quotes
64
-
when 'reposts' then user.reposts
100
+
when 'likes' then @user.likes
101
+
when 'pins' then @user.pins
102
+
when 'quotes' then @user.quotes
103
+
when 'reposts' then @user.reposts
65
104
else return json_error('InvalidParameter', 'Invalid search collection')
66
105
end
67
106
···
80
119
81
120
post_uris = case params[:collection]
82
121
when 'quotes'
83
-
items.map { |x| "at://#{user.did}/app.bsky.feed.post/#{x.rkey}" }
122
+
items.map { |x| "at://#{@user.did}/app.bsky.feed.post/#{x.rkey}" }
84
123
else
85
124
items.map(&:post).map(&:at_uri)
86
125
end
87
126
88
127
json_response(terms: query.terms, posts: post_uris, cursor: items.last&.cursor)
128
+
end
129
+
130
+
if settings.development?
131
+
options '/xrpc/blue.feeds.lycan.startImport' do
132
+
headers['Access-Control-Allow-Origin'] = '*'
133
+
headers['Access-Control-Allow-Headers'] = 'content-type'
134
+
end
135
+
end
136
+
137
+
post '/xrpc/blue.feeds.lycan.startImport' do
138
+
did = get_user_did
139
+
user = User.find_or_create_by(did: did)
140
+
141
+
if !user.valid?
142
+
json_error('InvalidRequest', 'Invalid DID')
143
+
elsif user.import_job || user.active?
144
+
json_response(message: "Import has already started")
145
+
else
146
+
user.create_import_job!
147
+
148
+
status 202
149
+
json_response(message: "Import has been scheduled")
150
+
end
151
+
end
152
+
153
+
get '/xrpc/blue.feeds.lycan.getImportStatus' do
154
+
did = get_user_did
155
+
user = User.find_by(did: did)
156
+
until_date = user&.imported_until
157
+
158
+
case until_date
159
+
when nil
160
+
if user.import_job || user.imports.exists?
161
+
json_response(status: 'scheduled')
162
+
else
163
+
json_response(status: 'not_started')
164
+
end
165
+
when Import::IMPORT_END
166
+
json_response(status: 'finished')
167
+
else
168
+
progress = 1 - (until_date - user.registered_at) / (Time.now - user.registered_at)
169
+
json_response(status: 'in_progress', position: until_date.iso8601, progress: progress)
170
+
end
89
171
end
90
172
91
173
get '/.well-known/did.json' do
+1
bin/server
+1
bin/server
+7
-2
bin/worker
+7
-2
bin/worker
···
2
2
3
3
require 'bundler/setup'
4
4
require_relative '../app/import_worker'
5
+
require_relative '../app/reports/simple_logger'
5
6
6
7
$stdout.sync = true
7
8
···
17
18
puts " -v = verbose"
18
19
end
19
20
21
+
logger = SimpleLogger.new
20
22
worker = ImportWorker.new
23
+
worker.logger = logger
21
24
22
25
args = ARGV.dup
23
26
···
36
39
end
37
40
38
41
trap("SIGINT") {
39
-
puts "Stopping..."
42
+
puts
43
+
puts "[#{Time.now}] Stopping..."
40
44
exit
41
45
}
42
46
43
47
trap("SIGTERM") {
44
-
puts "Shutting down the service..."
48
+
puts "[#{Time.now}] Shutting down the service..."
45
49
exit
46
50
}
47
51
52
+
puts "[#{Time.now}] Starting background worker..."
48
53
worker.run
+5
config/deploy.rb
+5
config/deploy.rb
···
22
22
23
23
after 'deploy', 'deploy:cleanup'
24
24
after 'deploy:migrations', 'deploy:cleanup'
25
+
after 'deploy:update_code', 'deploy:link_shared'
25
26
26
27
namespace :deploy do
27
28
task :restart, :roles => :web do
···
32
33
run "cd #{release_path} && bundle config set --local deployment 'true'"
33
34
run "cd #{release_path} && bundle config set --local path '#{shared_path}/bundle'"
34
35
run "cd #{release_path} && bundle config set --local without 'development test'"
36
+
end
37
+
38
+
task :link_shared do
39
+
run "ln -s #{shared_path}/env #{release_path}/.env"
35
40
end
36
41
end
+5
db/migrate/20250923180153_add_user_created_at.rb
+5
db/migrate/20250923180153_add_user_created_at.rb
+5
db/migrate/20251027134657_add_fetched_until_to_imports.rb
+5
db/migrate/20251027134657_add_fetched_until_to_imports.rb
+3
-1
db/schema.rb
+3
-1
db/schema.rb
···
10
10
#
11
11
# It's strongly recommended that you check this file into your version control system.
12
12
13
-
ActiveRecord::Schema[7.2].define(version: 2025_09_23_014702) do
13
+
ActiveRecord::Schema[7.2].define(version: 2025_10_27_134657) do
14
14
# These are extensions that must be enabled in order to support this database
15
15
enable_extension "plpgsql"
16
16
···
25
25
t.datetime "started_from"
26
26
t.datetime "last_completed"
27
27
t.string "collection", limit: 20, null: false
28
+
t.datetime "fetched_until"
28
29
t.index ["user_id", "collection"], name: "index_imports_on_user_id_and_collection", unique: true
29
30
end
30
31
···
91
92
92
93
create_table "users", id: :serial, force: :cascade do |t|
93
94
t.string "did", limit: 260, null: false
95
+
t.datetime "registered_at"
94
96
t.index ["did"], name: "index_users_on_did", unique: true
95
97
end
96
98
end
+2
lib/tasks/import.rake
+2
lib/tasks/import.rake
···
3
3
require_relative '../../app/models/user'
4
4
require_relative '../../app/post_downloader'
5
5
require_relative '../../app/reports/console_report'
6
+
require_relative '../../app/reports/simple_logger'
6
7
7
8
task :enqueue_user do
8
9
unless ENV['DID']
···
34
35
35
36
import = ImportManager.new(user)
36
37
import.report = ConsoleReport.new
38
+
import.logger = SimpleLogger.new
37
39
import.time_limit = ENV['UNTIL']
38
40
39
41
trap("SIGINT") {