That fuck shit the fascists are using
at master 284 lines 12 kB view raw
1package org.tm.archive.database 2 3import android.annotation.SuppressLint 4import android.content.Context 5import android.database.Cursor 6import android.text.TextUtils 7import net.zetetic.database.sqlcipher.SQLiteDatabase 8import org.intellij.lang.annotations.Language 9import org.signal.core.util.SqlUtil 10import org.signal.core.util.ThreadUtil 11import org.signal.core.util.logging.Log 12import org.signal.core.util.withinTransaction 13import org.tm.archive.jobs.RebuildMessageSearchIndexJob 14 15/** 16 * Contains all databases necessary for full-text search (FTS). 17 */ 18@SuppressLint("RecipientIdDatabaseReferenceUsage", "ThreadIdDatabaseReferenceUsage") // Handles updates via triggers 19class SearchTable(context: Context, databaseHelper: SignalDatabase) : DatabaseTable(context, databaseHelper) { 20 companion object { 21 private val TAG = Log.tag(SearchTable::class.java) 22 23 const val FTS_TABLE_NAME = "message_fts" 24 const val ID = "rowid" 25 const val BODY = MessageTable.BODY 26 const val THREAD_ID = MessageTable.THREAD_ID 27 const val SNIPPET = "snippet" 28 const val CONVERSATION_RECIPIENT = "conversation_recipient" 29 const val MESSAGE_RECIPIENT = "message_recipient" 30 const val IS_MMS = "is_mms" 31 const val MESSAGE_ID = "message_id" 32 const val SNIPPET_WRAP = "..." 33 34 @Language("sql") 35 val CREATE_TABLE = arrayOf( 36 "CREATE VIRTUAL TABLE $FTS_TABLE_NAME USING fts5($BODY, $THREAD_ID UNINDEXED, content=${MessageTable.TABLE_NAME}, content_rowid=${MessageTable.ID})" 37 ) 38 39 private const val TRIGGER_AFTER_INSERT = "message_ai" 40 private const val TRIGGER_AFTER_DELETE = "message_ad" 41 private const val TRIGGER_AFTER_UPDATE = "message_au" 42 43 @Language("sql") 44 val CREATE_TRIGGERS = arrayOf( 45 """ 46 CREATE TRIGGER $TRIGGER_AFTER_INSERT AFTER INSERT ON ${MessageTable.TABLE_NAME} BEGIN 47 INSERT INTO $FTS_TABLE_NAME($ID, $BODY, $THREAD_ID) VALUES (new.${MessageTable.ID}, new.${MessageTable.BODY}, new.${MessageTable.THREAD_ID}); 48 END; 49 """, 50 """ 51 CREATE TRIGGER $TRIGGER_AFTER_DELETE AFTER DELETE ON ${MessageTable.TABLE_NAME} BEGIN 52 INSERT INTO $FTS_TABLE_NAME($FTS_TABLE_NAME, $ID, $BODY, $THREAD_ID) VALUES('delete', old.${MessageTable.ID}, old.${MessageTable.BODY}, old.${MessageTable.THREAD_ID}); 53 END; 54 """, 55 """ 56 CREATE TRIGGER $TRIGGER_AFTER_UPDATE AFTER UPDATE ON ${MessageTable.TABLE_NAME} BEGIN 57 INSERT INTO $FTS_TABLE_NAME($FTS_TABLE_NAME, $ID, $BODY, $THREAD_ID) VALUES('delete', old.${MessageTable.ID}, old.${MessageTable.BODY}, old.${MessageTable.THREAD_ID}); 58 INSERT INTO $FTS_TABLE_NAME($ID, $BODY, $THREAD_ID) VALUES (new.${MessageTable.ID}, new.${MessageTable.BODY}, new.${MessageTable.THREAD_ID}); 59 END; 60 """ 61 ) 62 63 @Language("sql") 64 private const val MESSAGES_QUERY = """ 65 SELECT 66 ${ThreadTable.TABLE_NAME}.${ThreadTable.RECIPIENT_ID} AS $CONVERSATION_RECIPIENT, 67 ${MessageTable.TABLE_NAME}.${MessageTable.FROM_RECIPIENT_ID} AS $MESSAGE_RECIPIENT, 68 snippet($FTS_TABLE_NAME, -1, '', '', '$SNIPPET_WRAP', 7) AS $SNIPPET, 69 ${MessageTable.TABLE_NAME}.${MessageTable.DATE_RECEIVED}, 70 $FTS_TABLE_NAME.$THREAD_ID, 71 $FTS_TABLE_NAME.$BODY, 72 $FTS_TABLE_NAME.$ID AS $MESSAGE_ID, 73 1 AS $IS_MMS 74 FROM 75 ${MessageTable.TABLE_NAME} 76 INNER JOIN $FTS_TABLE_NAME ON $FTS_TABLE_NAME.$ID = ${MessageTable.TABLE_NAME}.${MessageTable.ID} 77 INNER JOIN ${ThreadTable.TABLE_NAME} ON $FTS_TABLE_NAME.$THREAD_ID = ${ThreadTable.TABLE_NAME}.${ThreadTable.ID} 78 WHERE 79 $FTS_TABLE_NAME MATCH ? AND 80 ${MessageTable.TABLE_NAME}.${MessageTable.TYPE} & ${MessageTypes.GROUP_V2_BIT} = 0 AND 81 ${MessageTable.TABLE_NAME}.${MessageTable.TYPE} & ${MessageTypes.SPECIAL_TYPE_PAYMENTS_NOTIFICATION} = 0 AND 82 ${MessageTable.TABLE_NAME}.${MessageTable.SCHEDULED_DATE} < 0 AND 83 ${MessageTable.TABLE_NAME}.${MessageTable.LATEST_REVISION_ID} IS NULL 84 ORDER BY ${MessageTable.DATE_RECEIVED} DESC 85 LIMIT 500 86 """ 87 88 @Language("sql") 89 private const val MESSAGES_FOR_THREAD_QUERY = """ 90 SELECT 91 ${ThreadTable.TABLE_NAME}.${ThreadTable.RECIPIENT_ID} AS $CONVERSATION_RECIPIENT, 92 ${MessageTable.TABLE_NAME}.${MessageTable.FROM_RECIPIENT_ID} AS $MESSAGE_RECIPIENT, 93 snippet($FTS_TABLE_NAME, -1, '', '', '$SNIPPET_WRAP', 7) AS $SNIPPET, 94 ${MessageTable.TABLE_NAME}.${MessageTable.DATE_RECEIVED}, 95 $FTS_TABLE_NAME.$THREAD_ID, 96 $FTS_TABLE_NAME.$BODY, 97 $FTS_TABLE_NAME.$ID AS $MESSAGE_ID, 98 1 AS $IS_MMS 99 FROM 100 ${MessageTable.TABLE_NAME} 101 INNER JOIN $FTS_TABLE_NAME ON $FTS_TABLE_NAME.$ID = ${MessageTable.TABLE_NAME}.${MessageTable.ID} 102 INNER JOIN ${ThreadTable.TABLE_NAME} ON $FTS_TABLE_NAME.$THREAD_ID = ${ThreadTable.TABLE_NAME}.${ThreadTable.ID} 103 WHERE 104 $FTS_TABLE_NAME MATCH ? AND 105 ${MessageTable.TABLE_NAME}.${MessageTable.THREAD_ID} = ? AND 106 ${MessageTable.TABLE_NAME}.${MessageTable.TYPE} & ${MessageTypes.GROUP_V2_BIT} = 0 AND 107 ${MessageTable.TABLE_NAME}.${MessageTable.TYPE} & ${MessageTypes.SPECIAL_TYPE_PAYMENTS_NOTIFICATION} = 0 AND 108 ${MessageTable.TABLE_NAME}.${MessageTable.SCHEDULED_DATE} < 0 AND 109 ${MessageTable.TABLE_NAME}.${MessageTable.LATEST_REVISION_ID} IS NULL 110 ORDER BY ${MessageTable.DATE_RECEIVED} DESC 111 LIMIT 500 112 """ 113 } 114 115 fun queryMessages(query: String): Cursor? { 116 val fullTextSearchQuery = createFullTextSearchQuery(query) 117 return if (fullTextSearchQuery.isEmpty()) { 118 null 119 } else { 120 readableDatabase.rawQuery(MESSAGES_QUERY, SqlUtil.buildArgs(fullTextSearchQuery)) 121 } 122 } 123 124 fun queryMessages(query: String, threadId: Long): Cursor? { 125 val fullTextSearchQuery = createFullTextSearchQuery(query) 126 return if (TextUtils.isEmpty(fullTextSearchQuery)) { 127 null 128 } else { 129 readableDatabase.rawQuery(MESSAGES_FOR_THREAD_QUERY, SqlUtil.buildArgs(fullTextSearchQuery, threadId)) 130 } 131 } 132 133 /** 134 * Re-adds every message to the index. It's fine to insert the same message twice; the table will naturally de-dupe. 135 * 136 * In order to prevent the database from locking up with super large inserts, this will perform the re-index in batches of the size you specify. 137 * It is not guaranteed that every batch will be the same size, but rather that the batches will be _no larger_ than the specified size. 138 * 139 * Warning: This is a potentially extremely-costly operation! It can take 10+ seconds on large installs and/or slow devices. 140 * Be smart about where you call this. 141 */ 142 fun rebuildIndex(batchSize: Long = 10_000L) { 143 val maxId: Long = SignalDatabase.messages.getNextId() 144 145 Log.i(TAG, "Re-indexing. Operating on ID's 1-$maxId in steps of $batchSize.") 146 147 for (i in 1..maxId step batchSize) { 148 Log.i(TAG, "Reindexing ID's [$i, ${i + batchSize})") 149 writableDatabase.execSQL( 150 """ 151 INSERT INTO $FTS_TABLE_NAME ($ID, $BODY) 152 SELECT 153 ${MessageTable.ID}, 154 ${MessageTable.BODY} 155 FROM 156 ${MessageTable.TABLE_NAME} 157 WHERE 158 ${MessageTable.ID} >= $i AND 159 ${MessageTable.ID} < ${i + batchSize} 160 """ 161 ) 162 } 163 } 164 165 /** 166 * This performs the same thing as the `optimize` command in SQLite, but broken into iterative stages to avoid locking up the database for too long. 167 * If what's going on in this method seems weird, that's because it is, but please read the sqlite docs -- we're following their algorithm: 168 * https://www.sqlite.org/fts5.html#the_optimize_command 169 * 170 * Note that in order for the [SqlUtil.getTotalChanges] call to work, we have to be within a transaction, or else the connection pool screws everything up 171 * (the stats are on a per-connection basis). 172 * 173 * There's this double-batching mechanism happening here to strike a balance between making individual transactions short while also not hammering the 174 * database with a ton of independent transactions. 175 * 176 * To give you some ballpark numbers, on a large database (~400k messages), it takes ~75 iterations to fully optimize everything. 177 */ 178 fun optimizeIndex(timeout: Long): Boolean { 179 val pageSize = 64 // chosen through experimentation 180 val batchSize = 10 // chosen through experimentation 181 val noChangeThreshold = 2 // if less changes occurred than this, operation is considered no-op (see sqlite docs ref'd in kdoc) 182 183 val startTime = System.currentTimeMillis() 184 var totalIterations = 0 185 var totalBatches = 0 186 var actualWorkTime = 0L 187 var finished = false 188 189 while (!finished) { 190 var batchIterations = 0 191 val batchStartTime = System.currentTimeMillis() 192 193 writableDatabase.withinTransaction { db -> 194 // Note the negative page size -- see sqlite docs ref'd in kdoc 195 db.execSQL("INSERT INTO $FTS_TABLE_NAME ($FTS_TABLE_NAME, rank) values ('merge', -$pageSize)") 196 var previousCount = SqlUtil.getTotalChanges(db) 197 198 val iterativeStatement = db.compileStatement("INSERT INTO $FTS_TABLE_NAME ($FTS_TABLE_NAME, rank) values ('merge', $pageSize)") 199 iterativeStatement.execute() 200 var count = SqlUtil.getTotalChanges(db) 201 202 while (batchIterations < batchSize && count - previousCount >= noChangeThreshold) { 203 previousCount = count 204 iterativeStatement.execute() 205 206 count = SqlUtil.getTotalChanges(db) 207 batchIterations++ 208 } 209 210 if (count - previousCount < noChangeThreshold) { 211 finished = true 212 } 213 } 214 215 totalIterations += batchIterations 216 totalBatches++ 217 actualWorkTime += System.currentTimeMillis() - batchStartTime 218 219 if (actualWorkTime >= timeout) { 220 Log.w(TAG, "Timed out during optimization! We did $totalIterations iterations across $totalBatches batches, taking ${System.currentTimeMillis() - startTime} ms. Bailed out to avoid database lockup.") 221 return false 222 } 223 224 // We want to sleep in between batches to give other db operations a chance to run 225 ThreadUtil.sleep(50) 226 } 227 228 Log.d(TAG, "Took ${System.currentTimeMillis() - startTime} ms and $totalIterations iterations across $totalBatches batches to optimize. Of that time, $actualWorkTime ms were spent actually working (~${actualWorkTime / totalBatches} ms/batch). The rest was spent sleeping.") 229 return true 230 } 231 232 /** 233 * Drops all tables and recreates them. 234 */ 235 @JvmOverloads 236 fun fullyResetTables(db: SQLiteDatabase = writableDatabase.sqlCipherDatabase) { 237 Log.w(TAG, "[fullyResetTables] Dropping tables and triggers...") 238 db.execSQL("DROP TABLE IF EXISTS $FTS_TABLE_NAME") 239 db.execSQL("DROP TABLE IF EXISTS ${FTS_TABLE_NAME}_config") 240 db.execSQL("DROP TABLE IF EXISTS ${FTS_TABLE_NAME}_content") 241 db.execSQL("DROP TABLE IF EXISTS ${FTS_TABLE_NAME}_data") 242 db.execSQL("DROP TABLE IF EXISTS ${FTS_TABLE_NAME}_idx") 243 db.execSQL("DROP TRIGGER IF EXISTS $TRIGGER_AFTER_INSERT") 244 db.execSQL("DROP TRIGGER IF EXISTS $TRIGGER_AFTER_DELETE") 245 db.execSQL("DROP TRIGGER IF EXISTS $TRIGGER_AFTER_UPDATE") 246 247 Log.w(TAG, "[fullyResetTables] Recreating table...") 248 CREATE_TABLE.forEach { db.execSQL(it) } 249 250 Log.w(TAG, "[fullyResetTables] Recreating triggers...") 251 CREATE_TRIGGERS.forEach { db.execSQL(it) } 252 253 RebuildMessageSearchIndexJob.enqueue() 254 255 Log.w(TAG, "[fullyResetTables] Done. Index will be rebuilt asynchronously)") 256 } 257 258 /** 259 * We want to turn the user's query into something that works well in a MATCH query. 260 * Most users expect some amount of fuzzy search, so what we do is break the string 261 * into tokens, escape each token (to allow the user to search for punctuation), and 262 * then append a * to the end of each token to turn it into a prefix query. 263 */ 264 private fun createFullTextSearchQuery(query: String): String { 265 return query 266 .split(" ") 267 .map { it.trim() } 268 .filter { it.isNotEmpty() } 269 .map { fullTextSearchEscape(it) } 270 .joinToString( 271 separator = " ", 272 transform = { "$it*" } 273 ) 274 } 275 276 /** 277 * If you wrap a string in quotes, sqlite considers it a string literal when making a MATCH query. 278 * In order to distinguish normal quotes, you turn all " into "". 279 */ 280 private fun fullTextSearchEscape(s: String): String { 281 val quotesEscaped = s.replace("\"", "\"\"") 282 return "\"$quotesEscaped\"" 283 } 284}