this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge pull request #297 from ri72miieop/main

EPI-136 - Add username snapshot to archive_upload table

authored by

ri72miieop and committed by
GitHub
62294454 5250ae6a

+66 -31
+6 -4
scripts/import_from_files_to_db.ts
··· 36 36 } 37 37 38 38 39 - const GLOBAL_ARCHIVE_PATH = process.env.ARCHIVE_PATH || './data/downloads/archives' 39 + const GLOBAL_ARCHIVE_PATH = process.env.ARCHIVE_PATH || '../data/downloads/archives' 40 40 console.log('Using ARCHIVE_PATH:', GLOBAL_ARCHIVE_PATH) 41 41 42 42 // Parse command line arguments for limiting archives ··· 378 378 } 379 379 async function Upsert_Archive_Upload(data: any, fileRoot:string) { 380 380 const accountId = data.account[0].account.accountId; 381 + const username = data.account[0].account.username; 381 382 const tweetsDate=data.tweets.map((t:any)=>new Date(t.tweet.created_at)).sort((a:any, b:any) => a - b);; 382 383 const start_date = tweetsDate[0]; 383 384 const end_date = tweetsDate[tweetsDate.length-1]; 384 385 385 - const archive_upload : InsertArchiveUpload ={ 386 - account_id:accountId, 386 + const archive_upload = { 387 + account_id:accountId, 388 + username:username, 387 389 start_date:start_date.toISOString(), 388 390 end_date:end_date.toISOString(), 389 391 archive_at:new Date().toISOString(), 390 - keep_private:false} 392 + keep_private:false} as InsertArchiveUpload 391 393 392 394 let getItem = (item: any) => { 393 395 return item as InsertArchiveUpload;
+2 -2
services/process_archive/process_archive_upload.ts
··· 928 928 logger.info('Fetching archive_upload records ready for processing...') 929 929 930 930 const ready = await sql` 931 - SELECT au.id, au.account_id, a.username, au.archive_at 932 - FROM public.archive_upload au left join all_account a on au.account_id=a.account_id 931 + SELECT au.id, au.account_id, au.username, au.archive_at 932 + FROM public.archive_upload au 933 933 WHERE upload_phase IN ('ready_for_commit') 934 934 ORDER BY archive_at ASC 935 935 `
+4 -1
sql/functions/archive/04_insert_temp_archive_upload.sql
··· 1 1 CREATE OR REPLACE FUNCTION public.insert_temp_archive_upload( 2 2 p_account_id TEXT, 3 + p_username TEXT, 3 4 p_archive_at timestamp with time zone, 4 5 p_keep_private BOOLEAN, 5 6 p_upload_likes BOOLEAN, ··· 27 28 EXECUTE format(' 28 29 INSERT INTO temp.archive_upload_%s ( 29 30 account_id, 31 + username, 30 32 archive_at, 31 33 keep_private, 32 34 upload_likes, 33 35 start_date, 34 36 end_date 35 37 ) 36 - VALUES ($1, $2, $3, $4, $5, $6) 38 + VALUES ($1, $2, $3, $4, $5, $6, $7) 37 39 RETURNING id 38 40 ', p_suffix) 39 41 USING 40 42 p_account_id, 43 + p_username, 41 44 p_archive_at, 42 45 p_keep_private, 43 46 p_upload_likes,
+5 -1
src/lib/db_insert.ts
··· 104 104 select('id,archive_at').eq('account_id', accountId).in('upload_phase', ['uploading', 'ready_for_commit']) 105 105 .order('created_at', { ascending: false }).limit(1).maybeSingle() 106 106 107 + const username = archiveData.account[0].account.username; 108 + 107 109 let supabaseUpsertQuery; 108 110 if (lastUploadedArchive) { 109 111 supabaseUpsertQuery = supabase 110 112 .from('archive_upload') 111 113 .update({ 114 + username: username, 112 115 archive_at: latestTweetDate, 113 116 keep_private: uploadOptions.keepPrivate, 114 117 upload_likes: uploadOptions.uploadLikes, ··· 126 129 .from('archive_upload') 127 130 .insert({ 128 131 account_id: accountId, 132 + username: username, 129 133 archive_at: latestTweetDate, 130 134 keep_private: uploadOptions.keepPrivate, 131 135 upload_likes: uploadOptions.uploadLikes, 132 136 start_date: uploadOptions.startDate, 133 137 end_date: uploadOptions.endDate, 134 - upload_phase: 'uploading', 138 + upload_phase: 'uploading' 135 139 }) 136 140 .select('id') 137 141 .single()
+17
supabase/migrations/20251009160144_add_username_to_archive_upload.sql
··· 1 + -- Add username column to archive_upload table to store the handle at time of upload 2 + -- This prevents issues when users change their handle after uploading archives 3 + 4 + ALTER TABLE "public"."archive_upload" 5 + ADD COLUMN "username" text; 6 + 7 + -- Add a comment to explain the purpose 8 + COMMENT ON COLUMN "public"."archive_upload"."username" IS 'Username/handle at the time of archive upload - used to locate archive files in storage'; 9 + 10 + -- Populate username column in existing archive_upload records 11 + -- This migration updates existing records to have the username from all_account table 12 + 13 + UPDATE "public"."archive_upload" 14 + SET "username" = "all_account"."username" 15 + FROM "public"."all_account" 16 + WHERE "archive_upload"."account_id" = "all_account"."account_id" 17 + AND "archive_upload"."username" IS NULL;
+3
supabase/migrations/20251023163841_add_indexes_for_performance.sql
··· 1 + CREATE INDEX IF NOT EXISTS idx_archive_upload_username ON public.archive_upload USING btree (username); 2 + 3 + CREATE INDEX IF NOT EXISTS idx_tweets_account_created ON public.tweets USING btree (account_id, created_at);
+4 -9
supabase/schemas/020_tables.sql
··· 54 54 "upload_likes" boolean DEFAULT true, 55 55 "start_date" "date", 56 56 "end_date" "date", 57 - "upload_phase" "public"."upload_phase_enum" DEFAULT 'uploading'::"public"."upload_phase_enum" 57 + "upload_phase" "public"."upload_phase_enum" DEFAULT 'uploading'::"public"."upload_phase_enum", 58 + "username" "text" 58 59 ); 59 60 ALTER TABLE "public"."archive_upload" OWNER TO "postgres"; 60 61 ··· 218 219 quoted_tweet_id TEXT NOT NULL, 219 220 220 221 -- Composite primary key 221 - PRIMARY KEY (tweet_id, quoted_tweet_id), 222 - 223 - -- Foreign key constraints 224 - CONSTRAINT fk_quote_tweets_tweet_id FOREIGN KEY (tweet_id) REFERENCES public.tweets (tweet_id) ON DELETE CASCADE 222 + PRIMARY KEY (tweet_id, quoted_tweet_id) 225 223 ); 226 224 227 225 ALTER TABLE "public"."quote_tweets" OWNER TO "postgres"; 228 226 229 227 CREATE TABLE IF NOT EXISTS public.retweets ( 230 228 tweet_id TEXT NOT NULL PRIMARY KEY, 231 - retweeted_tweet_id TEXT NULL, 232 - 233 - CONSTRAINT fk_retweets_tweet_id FOREIGN KEY (tweet_id) REFERENCES public.tweets (tweet_id) ON DELETE CASCADE, 234 - CONSTRAINT fk_retweets_retweeted_tweet_id FOREIGN KEY (retweeted_tweet_id) REFERENCES public.tweets (tweet_id) ON DELETE SET NULL 229 + retweeted_tweet_id TEXT NULL 235 230 ); 236 231 237 232 ALTER TABLE "public"."retweets" OWNER TO "postgres";
+2
supabase/schemas/030_indexes.sql
··· 5 5 6 6 -- public.archive_upload 7 7 CREATE INDEX "idx_archive_upload_account_id" ON "public"."archive_upload" USING "btree" ("account_id"); 8 + CREATE INDEX "idx_archive_upload_username" ON "public"."archive_upload" USING "btree" ("username"); 8 9 9 10 -- public.conversations 10 11 CREATE INDEX "idx_conversation_id" ON "public"."conversations" USING "btree" ("conversation_id"); ··· 58 59 CREATE INDEX "text_fts" ON "public"."tweets" USING "gin" ("fts"); 59 60 CREATE INDEX "tweets_account_id_favorite_idx" ON "public"."tweets" USING "btree" ("account_id", "favorite_count" DESC); 60 61 CREATE INDEX "tweets_account_id_retweet_idx" ON "public"."tweets" USING "btree" ("account_id", "retweet_count" DESC); 62 + CREATE INDEX "idx_tweets_account_created" ON public.tweets (account_id, created_at); 61 63 62 64 -- public.user_mentions 63 65 CREATE INDEX "idx_user_mentions_mentioned_user_id" ON "public"."user_mentions" USING "btree" ("mentioned_user_id");
-10
supabase/schemas/040_views.sql
··· 2 2 3 3 -- public.account moved to 032_views_prereq.sql 4 4 5 - -- public.quote_tweets 6 - CREATE OR REPLACE VIEW "public"."quote_tweets" AS 7 - SELECT "t"."tweet_id", 8 - "substring"("tu"."expanded_url", 'status/([0-9]+)'::"text") AS "quoted_tweet_id", 9 - "substring"("tu"."expanded_url", 'https?://(?:www\\.)?twitter\\.com/([^/]+)/status/'::"text") AS "quoted_tweet_username" 10 - FROM ("public"."tweet_urls" "tu" 11 - JOIN "public"."tweets" "t" ON (("tu"."tweet_id" = "t"."tweet_id"))) 12 - WHERE (("tu"."expanded_url" ~~ 'https://twitter.com/%/status/%'::"text") OR ("tu"."expanded_url" ~~ 'https://x.com/%/status/%'::"text")); 13 - ALTER TABLE "public"."quote_tweets" OWNER TO "postgres"; 14 - 15 5 -- public.enriched_tweets 16 6 CREATE OR REPLACE VIEW "public"."enriched_tweets" AS 17 7 SELECT "t"."tweet_id",
+13
supabase/schemas/050_constraints.sql
··· 177 177 ADD CONSTRAINT "optin_username_key" UNIQUE ("username"); 178 178 ALTER TABLE ONLY "public"."optin" 179 179 ADD CONSTRAINT "optin_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "auth"."users"("id") ON DELETE CASCADE; 180 + 181 + 182 + -- public.quote_tweets foreign keys 183 + ALTER TABLE ONLY "public"."quote_tweets" 184 + ADD CONSTRAINT "fk_quote_tweets_tweet_id" FOREIGN KEY ("tweet_id") REFERENCES "public"."tweets"("tweet_id") ON DELETE CASCADE; 185 + ALTER TABLE ONLY "public"."quote_tweets" 186 + ADD CONSTRAINT "fk_quote_tweets_quoted_tweet_id" FOREIGN KEY ("quoted_tweet_id") REFERENCES "public"."tweets"("tweet_id") ON DELETE CASCADE; 187 + 188 + -- public.retweets foreign keys 189 + ALTER TABLE ONLY "public"."retweets" 190 + ADD CONSTRAINT "fk_retweets_tweet_id" FOREIGN KEY ("tweet_id") REFERENCES "public"."tweets"("tweet_id") ON DELETE CASCADE; 191 + ALTER TABLE ONLY "public"."retweets" 192 + ADD CONSTRAINT "fk_retweets_retweeted_tweet_id" FOREIGN KEY ("retweeted_tweet_id") REFERENCES "public"."tweets"("tweet_id") ON DELETE SET NULL;
+3 -2
supabase/schemas/070_functions.sql
··· 934 934 DECLARE 935 935 v_archive_upload_id BIGINT; 936 936 v_account_id TEXT; 937 + v_username TEXT; 937 938 v_archive_at TIMESTAMP WITH TIME ZONE; 938 939 v_keep_private BOOLEAN; 939 940 v_upload_likes BOOLEAN; ··· 974 975 v_phase_start := clock_timestamp(); 975 976 RAISE NOTICE 'Phase 2: Getting archive upload data'; 976 977 -- Get the archive upload that's ready for commit 977 - SELECT id, archive_at, keep_private, upload_likes, start_date, end_date 978 - INTO v_archive_upload_id, v_archive_at, v_keep_private, v_upload_likes, v_start_date, v_end_date 978 + SELECT id, archive_at, keep_private, upload_likes, start_date, end_date, username 979 + INTO v_archive_upload_id, v_archive_at, v_keep_private, v_upload_likes, v_start_date, v_end_date, v_username 979 980 FROM public.archive_upload 980 981 WHERE account_id = v_account_id 981 982 AND upload_phase = 'ready_for_commit'
+6 -2
supabase/schemas/prod.sql
··· 34 34 DECLARE 35 35 v_archive_upload_id BIGINT; 36 36 v_account_id TEXT; 37 + v_username TEXT; 37 38 v_archive_at TIMESTAMP WITH TIME ZONE; 38 39 v_keep_private BOOLEAN; 39 40 v_upload_likes BOOLEAN; ··· 71 72 RAISE NOTICE 'Phase 2: Getting archive upload data'; 72 73 -- 2. Get the latest archive upload data from temp.archive_upload 73 74 EXECUTE format(' 74 - SELECT archive_at, keep_private, upload_likes, start_date, end_date 75 + SELECT archive_at, keep_private, upload_likes, start_date, end_date, username 75 76 FROM temp.archive_upload_%s 76 77 ORDER BY archive_at DESC 77 78 LIMIT 1 78 - ', p_suffix) INTO v_archive_at, v_keep_private, v_upload_likes, v_start_date, v_end_date; 79 + ', p_suffix) INTO v_archive_at, v_keep_private, v_upload_likes, v_start_date, v_end_date, v_username; 79 80 80 81 RAISE NOTICE 'Phase 3: Inserting archive upload data'; 81 82 -- 3. Insert or update archive_upload and get the ID 82 83 INSERT INTO public.archive_upload ( 83 84 account_id, 85 + username, 84 86 archive_at, 85 87 created_at, 86 88 keep_private, ··· 91 93 ) 92 94 VALUES ( 93 95 v_account_id, 96 + v_username, 94 97 v_archive_at, 95 98 CURRENT_TIMESTAMP, 96 99 v_keep_private, ··· 102 105 ON CONFLICT (account_id, archive_at) 103 106 DO UPDATE SET 104 107 account_id = EXCLUDED.account_id, 108 + username = EXCLUDED.username, 105 109 created_at = CURRENT_TIMESTAMP, 106 110 keep_private = EXCLUDED.keep_private, 107 111 upload_likes = EXCLUDED.upload_likes,
+1
tests/db-insertion/db-insertion.test.ts
··· 122 122 .from('archive_upload') 123 123 .insert({ 124 124 account_id: accountId, 125 + username: username, 125 126 archive_at: latestTweetDate, 126 127 keep_private: uploadOptions.keepPrivate, 127 128 upload_likes: uploadOptions.uploadLikes,