That fuck shit the fascists are using
1package org.tm.archive.database
2
3import android.annotation.SuppressLint
4import android.content.Context
5import android.database.Cursor
6import android.text.TextUtils
7import net.zetetic.database.sqlcipher.SQLiteDatabase
8import org.intellij.lang.annotations.Language
9import org.signal.core.util.SqlUtil
10import org.signal.core.util.ThreadUtil
11import org.signal.core.util.logging.Log
12import org.signal.core.util.withinTransaction
13import org.tm.archive.jobs.RebuildMessageSearchIndexJob
14
15/**
16 * Contains all databases necessary for full-text search (FTS).
17 */
18@SuppressLint("RecipientIdDatabaseReferenceUsage", "ThreadIdDatabaseReferenceUsage") // Handles updates via triggers
19class SearchTable(context: Context, databaseHelper: SignalDatabase) : DatabaseTable(context, databaseHelper) {
20 companion object {
21 private val TAG = Log.tag(SearchTable::class.java)
22
23 const val FTS_TABLE_NAME = "message_fts"
24 const val ID = "rowid"
25 const val BODY = MessageTable.BODY
26 const val THREAD_ID = MessageTable.THREAD_ID
27 const val SNIPPET = "snippet"
28 const val CONVERSATION_RECIPIENT = "conversation_recipient"
29 const val MESSAGE_RECIPIENT = "message_recipient"
30 const val IS_MMS = "is_mms"
31 const val MESSAGE_ID = "message_id"
32 const val SNIPPET_WRAP = "..."
33
34 @Language("sql")
35 val CREATE_TABLE = arrayOf(
36 "CREATE VIRTUAL TABLE $FTS_TABLE_NAME USING fts5($BODY, $THREAD_ID UNINDEXED, content=${MessageTable.TABLE_NAME}, content_rowid=${MessageTable.ID})"
37 )
38
39 private const val TRIGGER_AFTER_INSERT = "message_ai"
40 private const val TRIGGER_AFTER_DELETE = "message_ad"
41 private const val TRIGGER_AFTER_UPDATE = "message_au"
42
43 @Language("sql")
44 val CREATE_TRIGGERS = arrayOf(
45 """
46 CREATE TRIGGER $TRIGGER_AFTER_INSERT AFTER INSERT ON ${MessageTable.TABLE_NAME} BEGIN
47 INSERT INTO $FTS_TABLE_NAME($ID, $BODY, $THREAD_ID) VALUES (new.${MessageTable.ID}, new.${MessageTable.BODY}, new.${MessageTable.THREAD_ID});
48 END;
49 """,
50 """
51 CREATE TRIGGER $TRIGGER_AFTER_DELETE AFTER DELETE ON ${MessageTable.TABLE_NAME} BEGIN
52 INSERT INTO $FTS_TABLE_NAME($FTS_TABLE_NAME, $ID, $BODY, $THREAD_ID) VALUES('delete', old.${MessageTable.ID}, old.${MessageTable.BODY}, old.${MessageTable.THREAD_ID});
53 END;
54 """,
55 """
56 CREATE TRIGGER $TRIGGER_AFTER_UPDATE AFTER UPDATE ON ${MessageTable.TABLE_NAME} BEGIN
57 INSERT INTO $FTS_TABLE_NAME($FTS_TABLE_NAME, $ID, $BODY, $THREAD_ID) VALUES('delete', old.${MessageTable.ID}, old.${MessageTable.BODY}, old.${MessageTable.THREAD_ID});
58 INSERT INTO $FTS_TABLE_NAME($ID, $BODY, $THREAD_ID) VALUES (new.${MessageTable.ID}, new.${MessageTable.BODY}, new.${MessageTable.THREAD_ID});
59 END;
60 """
61 )
62
63 @Language("sql")
64 private const val MESSAGES_QUERY = """
65 SELECT
66 ${ThreadTable.TABLE_NAME}.${ThreadTable.RECIPIENT_ID} AS $CONVERSATION_RECIPIENT,
67 ${MessageTable.TABLE_NAME}.${MessageTable.FROM_RECIPIENT_ID} AS $MESSAGE_RECIPIENT,
68 snippet($FTS_TABLE_NAME, -1, '', '', '$SNIPPET_WRAP', 7) AS $SNIPPET,
69 ${MessageTable.TABLE_NAME}.${MessageTable.DATE_RECEIVED},
70 $FTS_TABLE_NAME.$THREAD_ID,
71 $FTS_TABLE_NAME.$BODY,
72 $FTS_TABLE_NAME.$ID AS $MESSAGE_ID,
73 1 AS $IS_MMS
74 FROM
75 ${MessageTable.TABLE_NAME}
76 INNER JOIN $FTS_TABLE_NAME ON $FTS_TABLE_NAME.$ID = ${MessageTable.TABLE_NAME}.${MessageTable.ID}
77 INNER JOIN ${ThreadTable.TABLE_NAME} ON $FTS_TABLE_NAME.$THREAD_ID = ${ThreadTable.TABLE_NAME}.${ThreadTable.ID}
78 WHERE
79 $FTS_TABLE_NAME MATCH ? AND
80 ${MessageTable.TABLE_NAME}.${MessageTable.TYPE} & ${MessageTypes.GROUP_V2_BIT} = 0 AND
81 ${MessageTable.TABLE_NAME}.${MessageTable.TYPE} & ${MessageTypes.SPECIAL_TYPE_PAYMENTS_NOTIFICATION} = 0 AND
82 ${MessageTable.TABLE_NAME}.${MessageTable.SCHEDULED_DATE} < 0 AND
83 ${MessageTable.TABLE_NAME}.${MessageTable.LATEST_REVISION_ID} IS NULL
84 ORDER BY ${MessageTable.DATE_RECEIVED} DESC
85 LIMIT 500
86 """
87
88 @Language("sql")
89 private const val MESSAGES_FOR_THREAD_QUERY = """
90 SELECT
91 ${ThreadTable.TABLE_NAME}.${ThreadTable.RECIPIENT_ID} AS $CONVERSATION_RECIPIENT,
92 ${MessageTable.TABLE_NAME}.${MessageTable.FROM_RECIPIENT_ID} AS $MESSAGE_RECIPIENT,
93 snippet($FTS_TABLE_NAME, -1, '', '', '$SNIPPET_WRAP', 7) AS $SNIPPET,
94 ${MessageTable.TABLE_NAME}.${MessageTable.DATE_RECEIVED},
95 $FTS_TABLE_NAME.$THREAD_ID,
96 $FTS_TABLE_NAME.$BODY,
97 $FTS_TABLE_NAME.$ID AS $MESSAGE_ID,
98 1 AS $IS_MMS
99 FROM
100 ${MessageTable.TABLE_NAME}
101 INNER JOIN $FTS_TABLE_NAME ON $FTS_TABLE_NAME.$ID = ${MessageTable.TABLE_NAME}.${MessageTable.ID}
102 INNER JOIN ${ThreadTable.TABLE_NAME} ON $FTS_TABLE_NAME.$THREAD_ID = ${ThreadTable.TABLE_NAME}.${ThreadTable.ID}
103 WHERE
104 $FTS_TABLE_NAME MATCH ? AND
105 ${MessageTable.TABLE_NAME}.${MessageTable.THREAD_ID} = ? AND
106 ${MessageTable.TABLE_NAME}.${MessageTable.TYPE} & ${MessageTypes.GROUP_V2_BIT} = 0 AND
107 ${MessageTable.TABLE_NAME}.${MessageTable.TYPE} & ${MessageTypes.SPECIAL_TYPE_PAYMENTS_NOTIFICATION} = 0 AND
108 ${MessageTable.TABLE_NAME}.${MessageTable.SCHEDULED_DATE} < 0 AND
109 ${MessageTable.TABLE_NAME}.${MessageTable.LATEST_REVISION_ID} IS NULL
110 ORDER BY ${MessageTable.DATE_RECEIVED} DESC
111 LIMIT 500
112 """
113 }
114
115 fun queryMessages(query: String): Cursor? {
116 val fullTextSearchQuery = createFullTextSearchQuery(query)
117 return if (fullTextSearchQuery.isEmpty()) {
118 null
119 } else {
120 readableDatabase.rawQuery(MESSAGES_QUERY, SqlUtil.buildArgs(fullTextSearchQuery))
121 }
122 }
123
124 fun queryMessages(query: String, threadId: Long): Cursor? {
125 val fullTextSearchQuery = createFullTextSearchQuery(query)
126 return if (TextUtils.isEmpty(fullTextSearchQuery)) {
127 null
128 } else {
129 readableDatabase.rawQuery(MESSAGES_FOR_THREAD_QUERY, SqlUtil.buildArgs(fullTextSearchQuery, threadId))
130 }
131 }
132
133 /**
134 * Re-adds every message to the index. It's fine to insert the same message twice; the table will naturally de-dupe.
135 *
136 * In order to prevent the database from locking up with super large inserts, this will perform the re-index in batches of the size you specify.
137 * It is not guaranteed that every batch will be the same size, but rather that the batches will be _no larger_ than the specified size.
138 *
139 * Warning: This is a potentially extremely-costly operation! It can take 10+ seconds on large installs and/or slow devices.
140 * Be smart about where you call this.
141 */
142 fun rebuildIndex(batchSize: Long = 10_000L) {
143 val maxId: Long = SignalDatabase.messages.getNextId()
144
145 Log.i(TAG, "Re-indexing. Operating on ID's 1-$maxId in steps of $batchSize.")
146
147 for (i in 1..maxId step batchSize) {
148 Log.i(TAG, "Reindexing ID's [$i, ${i + batchSize})")
149 writableDatabase.execSQL(
150 """
151 INSERT INTO $FTS_TABLE_NAME ($ID, $BODY)
152 SELECT
153 ${MessageTable.ID},
154 ${MessageTable.BODY}
155 FROM
156 ${MessageTable.TABLE_NAME}
157 WHERE
158 ${MessageTable.ID} >= $i AND
159 ${MessageTable.ID} < ${i + batchSize}
160 """
161 )
162 }
163 }
164
165 /**
166 * This performs the same thing as the `optimize` command in SQLite, but broken into iterative stages to avoid locking up the database for too long.
167 * If what's going on in this method seems weird, that's because it is, but please read the sqlite docs -- we're following their algorithm:
168 * https://www.sqlite.org/fts5.html#the_optimize_command
169 *
170 * Note that in order for the [SqlUtil.getTotalChanges] call to work, we have to be within a transaction, or else the connection pool screws everything up
171 * (the stats are on a per-connection basis).
172 *
173 * There's this double-batching mechanism happening here to strike a balance between making individual transactions short while also not hammering the
174 * database with a ton of independent transactions.
175 *
176 * To give you some ballpark numbers, on a large database (~400k messages), it takes ~75 iterations to fully optimize everything.
177 */
178 fun optimizeIndex(timeout: Long): Boolean {
179 val pageSize = 64 // chosen through experimentation
180 val batchSize = 10 // chosen through experimentation
181 val noChangeThreshold = 2 // if less changes occurred than this, operation is considered no-op (see sqlite docs ref'd in kdoc)
182
183 val startTime = System.currentTimeMillis()
184 var totalIterations = 0
185 var totalBatches = 0
186 var actualWorkTime = 0L
187 var finished = false
188
189 while (!finished) {
190 var batchIterations = 0
191 val batchStartTime = System.currentTimeMillis()
192
193 writableDatabase.withinTransaction { db ->
194 // Note the negative page size -- see sqlite docs ref'd in kdoc
195 db.execSQL("INSERT INTO $FTS_TABLE_NAME ($FTS_TABLE_NAME, rank) values ('merge', -$pageSize)")
196 var previousCount = SqlUtil.getTotalChanges(db)
197
198 val iterativeStatement = db.compileStatement("INSERT INTO $FTS_TABLE_NAME ($FTS_TABLE_NAME, rank) values ('merge', $pageSize)")
199 iterativeStatement.execute()
200 var count = SqlUtil.getTotalChanges(db)
201
202 while (batchIterations < batchSize && count - previousCount >= noChangeThreshold) {
203 previousCount = count
204 iterativeStatement.execute()
205
206 count = SqlUtil.getTotalChanges(db)
207 batchIterations++
208 }
209
210 if (count - previousCount < noChangeThreshold) {
211 finished = true
212 }
213 }
214
215 totalIterations += batchIterations
216 totalBatches++
217 actualWorkTime += System.currentTimeMillis() - batchStartTime
218
219 if (actualWorkTime >= timeout) {
220 Log.w(TAG, "Timed out during optimization! We did $totalIterations iterations across $totalBatches batches, taking ${System.currentTimeMillis() - startTime} ms. Bailed out to avoid database lockup.")
221 return false
222 }
223
224 // We want to sleep in between batches to give other db operations a chance to run
225 ThreadUtil.sleep(50)
226 }
227
228 Log.d(TAG, "Took ${System.currentTimeMillis() - startTime} ms and $totalIterations iterations across $totalBatches batches to optimize. Of that time, $actualWorkTime ms were spent actually working (~${actualWorkTime / totalBatches} ms/batch). The rest was spent sleeping.")
229 return true
230 }
231
232 /**
233 * Drops all tables and recreates them.
234 */
235 @JvmOverloads
236 fun fullyResetTables(db: SQLiteDatabase = writableDatabase.sqlCipherDatabase) {
237 Log.w(TAG, "[fullyResetTables] Dropping tables and triggers...")
238 db.execSQL("DROP TABLE IF EXISTS $FTS_TABLE_NAME")
239 db.execSQL("DROP TABLE IF EXISTS ${FTS_TABLE_NAME}_config")
240 db.execSQL("DROP TABLE IF EXISTS ${FTS_TABLE_NAME}_content")
241 db.execSQL("DROP TABLE IF EXISTS ${FTS_TABLE_NAME}_data")
242 db.execSQL("DROP TABLE IF EXISTS ${FTS_TABLE_NAME}_idx")
243 db.execSQL("DROP TRIGGER IF EXISTS $TRIGGER_AFTER_INSERT")
244 db.execSQL("DROP TRIGGER IF EXISTS $TRIGGER_AFTER_DELETE")
245 db.execSQL("DROP TRIGGER IF EXISTS $TRIGGER_AFTER_UPDATE")
246
247 Log.w(TAG, "[fullyResetTables] Recreating table...")
248 CREATE_TABLE.forEach { db.execSQL(it) }
249
250 Log.w(TAG, "[fullyResetTables] Recreating triggers...")
251 CREATE_TRIGGERS.forEach { db.execSQL(it) }
252
253 RebuildMessageSearchIndexJob.enqueue()
254
255 Log.w(TAG, "[fullyResetTables] Done. Index will be rebuilt asynchronously)")
256 }
257
258 /**
259 * We want to turn the user's query into something that works well in a MATCH query.
260 * Most users expect some amount of fuzzy search, so what we do is break the string
261 * into tokens, escape each token (to allow the user to search for punctuation), and
262 * then append a * to the end of each token to turn it into a prefix query.
263 */
264 private fun createFullTextSearchQuery(query: String): String {
265 return query
266 .split(" ")
267 .map { it.trim() }
268 .filter { it.isNotEmpty() }
269 .map { fullTextSearchEscape(it) }
270 .joinToString(
271 separator = " ",
272 transform = { "$it*" }
273 )
274 }
275
276 /**
277 * If you wrap a string in quotes, sqlite considers it a string literal when making a MATCH query.
278 * In order to distinguish normal quotes, you turn all " into "".
279 */
280 private fun fullTextSearchEscape(s: String): String {
281 val quotesEscaped = s.replace("\"", "\"\"")
282 return "\"$quotesEscaped\""
283 }
284}