script to retroactively add commitids to past openbsd commits

initial revision

jcs.org 81349ca5

+291
+291
openbsd-commitid.rb
··· 1 + #!/usr/bin/ruby 2 + 3 + require "sqlite3" 4 + 5 + class RCSFile 6 + attr_accessor :revisions 7 + 8 + def initialize(file) 9 + @revisions = {} 10 + 11 + # rcs modified to end revs in ### 12 + blocks = `rlog #{file}`.force_encoding("binary"). 13 + split(/^(-{28}|={77})###\n?$/).reject{|b| b.match(/^(-{28}|={77})$/) } 14 + 15 + if !blocks.first.match(/^RCS file/) 16 + raise "file #{file} didn't come out of rlog properly" 17 + end 18 + 19 + blocks.shift 20 + blocks.each do |block| 21 + rev = RCSRevision.new(block) 22 + if @revisions[rev.revision] 23 + raise "duplicate revision #{rev.revision} in #{file}" 24 + end 25 + @revisions[rev.revision] = rev 26 + end 27 + end 28 + end 29 + 30 + class RCSRevision 31 + attr_accessor :revision, :date, :author, :state, :lines, :commitid, :log 32 + 33 + # str: "revision 1.7\ndate: 1996/12/14 12:17:33; author: mickey; state: Exp; lines: +3 -3;\n-Wall'ing." 34 + def initialize(str) 35 + @revision = nil 36 + @date = 0 37 + @author = nil 38 + @state = nil 39 + @lines = nil 40 + @commitid = nil 41 + @log = nil 42 + 43 + lines = str.gsub(/^\s*/, "").split("\n") 44 + # -> [ 45 + # "revision 1.7", 46 + # "date: 1996/12/14 12:17:33; author: mickey; state: Exp; lines: +3 -3;", 47 + # "-Wall'ing." 48 + # ] 49 + 50 + # strip out possible branches line in log 51 + if lines[2].to_s.match(/^branches:\s+([\d\.]+)/) 52 + lines.delete_at(2) 53 + end 54 + 55 + @revision = lines.first.scan(/^revision ([\d\.]+)($|\tlocked by)/).first.first 56 + # -> "1.7" 57 + 58 + # date/author/state/lines/commitid line 59 + lines[1].split(/;[ \t]*/).each do |piece| 60 + kv = piece.split(": ") 61 + self.send(kv[0] + "=", kv[1]) 62 + end 63 + # -> @date = "1996/12/14 12:17:33", @author = "mickey", ... 64 + 65 + if m = @date.match(/^\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d$/) 66 + @date = DateTime.parse(@date).strftime("%s").to_i 67 + else 68 + raise "invalid date #{@date}" 69 + end 70 + # -> @date = 850565853 71 + 72 + @log = lines[2, lines.count].join("\n") 73 + end 74 + end 75 + 76 + class Scanner 77 + def initialize(dbf, root) 78 + @db = SQLite3::Database.new dbf 79 + 80 + @db.execute "CREATE TABLE IF NOT EXISTS changesets 81 + (id integer primary key, date integer, author text, commitid text, 82 + log text)" 83 + @db.execute "CREATE UNIQUE INDEX IF NOT EXISTS u_commitid ON changesets 84 + (commitid)" 85 + 86 + @db.execute "CREATE TABLE IF NOT EXISTS files 87 + (id integer primary key, file text)" 88 + @db.execute "CREATE UNIQUE INDEX IF NOT EXISTS u_file ON files 89 + (file)" 90 + 91 + @db.execute "CREATE TABLE IF NOT EXISTS revisions 92 + (id integer primary key, file_id integer, changeset_id integer, 93 + date integer, version text, author text, commitid text, log text)" 94 + @db.execute "CREATE UNIQUE INDEX IF NOT EXISTS u_revision ON revisions 95 + (file_id, version)" 96 + @db.execute "CREATE INDEX IF NOT EXISTS empty_changesets ON revisions 97 + (changeset_id)" 98 + @db.execute "CREATE INDEX IF NOT EXISTS cs_by_commitid ON revisions 99 + (commitid, changeset_id)" 100 + @db.execute "CREATE INDEX IF NOT EXISTS all_revs_by_author ON revisions 101 + (author, date)" 102 + 103 + @db.results_as_hash = true 104 + 105 + @root = root 106 + end 107 + 108 + def recurse(dir = nil) 109 + if !dir 110 + dir = @root 111 + end 112 + 113 + puts "recursing into #{dir}" 114 + 115 + Dir.glob((dir + "/*").gsub(/\/\//, "/")).each do |f| 116 + if Dir.exists? f 117 + recurse(f) 118 + elsif f.match(/,v$/) 119 + scan(f) 120 + end 121 + end 122 + end 123 + 124 + def scan(f) 125 + canfile = f[@root.length, f.length - @root.length].gsub(/\/Attic\//, "/") 126 + puts " scanning file #{canfile}" 127 + 128 + rcs = RCSFile.new(f) 129 + 130 + fid = @db.execute("SELECT id FROM files WHERE file = ?", [ canfile ]).first 131 + if !fid 132 + @db.execute("INSERT INTO files (file) VALUES (?)", [ canfile ]) 133 + fid = @db.execute("SELECT id FROM files WHERE file = ?", 134 + [ canfile ]).first 135 + end 136 + raise if !fid 137 + 138 + rcs.revisions.each do |r,rev| 139 + rid = @db.execute("SELECT id FROM revisions WHERE file_id = ? AND " + 140 + "version = ?", [ fid["id"], r ]).first 141 + 142 + if !rid 143 + @db.execute("INSERT INTO revisions (file_id, date, version, author, " + 144 + "commitid, log) VALUES (?, ?, ?, ?, ?, ?)", [ fid["id"], rev.date, 145 + rev.revision, rev.author, rev.commitid, rev.log ]) 146 + 147 + puts " inserted #{r}, authored #{rev.date} by #{rev.author}" + 148 + (rev.commitid ? ", commitid #{rev.commitid}" : "") 149 + end 150 + end 151 + end 152 + 153 + def stray_commitids_to_changesets 154 + stray_commitids = @db.execute("SELECT DISTINCT author, commitid FROM " + 155 + "revisions WHERE commitid IS NOT NULL AND changeset_id IS NULL") 156 + stray_commitids.each do |row| 157 + csid = @db.execute("SELECT id FROM changesets WHERE commitid = ?", 158 + [ row["commitid"] ]).first 159 + if !csid 160 + @db.execute("INSERT INTO changesets (author, commitid) VALUES (?, ?)", 161 + [ row["author"], row["commitid"] ]) 162 + csid = @db.execute("SELECT id FROM changesets WHERE commitid = ?", 163 + [ row["commitid"] ]).first 164 + end 165 + raise if !csid 166 + 167 + puts "commitid #{row["commitid"]} -> changeset #{csid["id"]}" 168 + 169 + @db.execute("UPDATE revisions SET changeset_id = ? WHERE commitid = ?", 170 + [ csid["id"], row["commitid"] ]) 171 + end 172 + end 173 + 174 + def group_into_changesets 175 + new_sets = [] 176 + last_row = {} 177 + cur_set = [] 178 + 179 + @db.execute("SELECT * FROM revisions WHERE changeset_id IS NULL ORDER " + 180 + "BY author ASC, date ASC") do |row| 181 + # commits by the same author with the same log message (unless they're 182 + # initial imports - 1.1.1.1) within 30 seconds of each other are grouped 183 + # together 184 + if last_row.any? && row["author"] == last_row["author"] && 185 + (row["log"] == last_row["log"] || row["log"] == "Initial revision" || 186 + last_row["log"] == "Initial revision") && 187 + row["date"].to_i - last_row["date"].to_i <= 30 188 + cur_set.push row["id"].to_i 189 + elsif !last_row.any? 190 + cur_set.push row["id"].to_i 191 + else 192 + if cur_set.any? 193 + new_sets.push cur_set 194 + cur_set = [] 195 + end 196 + cur_set.push row["id"].to_i 197 + end 198 + 199 + last_row = row 200 + end 201 + 202 + if cur_set.any? 203 + new_sets.push cur_set 204 + end 205 + 206 + new_sets.each do |s| 207 + puts "new set with revision ids #{s.inspect}" 208 + @db.execute("INSERT INTO changesets (id) VALUES (NULL)") 209 + id = @db.execute("SELECT last_insert_rowid() AS id").first["id"] 210 + raise if !id 211 + 212 + @db.execute("UPDATE revisions SET changeset_id = ? WHERE id IN (" + 213 + s.map{|a| "?" }.join(",") + ")", [ id ] + s) 214 + end 215 + 216 + if @db.execute("SELECT * FROM revisions WHERE changeset_id IS NULL").any? 217 + raise "still have revisions with empty changesets" 218 + end 219 + end 220 + 221 + def fill_in_changeset_data 222 + cses = {} 223 + @db.execute("SELECT id, commitid FROM changesets WHERE date IS NULL") do |c| 224 + cses[c["id"]] = c["commitid"] 225 + end 226 + 227 + cses.each do |csid,comid| 228 + date = nil 229 + commitid = comid 230 + log = nil 231 + author = nil 232 + 233 + @db.execute("SELECT * FROM revisions WHERE changeset_id = ? ORDER BY " + 234 + "date ASC", [ csid ]) do |rev| 235 + if !date 236 + date = rev["date"] 237 + end 238 + 239 + if rev["log"] != "Initial revision" 240 + log = rev["log"] 241 + end 242 + 243 + if author && rev["author"] != author 244 + raise "authors different between revs of #{csid}" 245 + else 246 + author = rev["author"] 247 + end 248 + end 249 + 250 + if commitid.to_s == "" 251 + commitid = "" 252 + while commitid.length < 16 253 + c = rand(75) + 48 254 + if ((c >= 48 && c <= 57) || (c >= 65 && c <= 90) || 255 + (c >= 97 && c <= 122)) 256 + commitid << c.chr 257 + end 258 + end 259 + end 260 + 261 + if !date 262 + raise "no date for changeset #{csid}" 263 + end 264 + 265 + puts "changeset #{csid} -> commitid #{commitid}" 266 + 267 + @db.execute("UPDATE changesets SET date = ?, commitid = ?, log = ?, " + 268 + "author = ? WHERE id = ?", [ date, commitid, log, author, csid ]) 269 + end 270 + end 271 + 272 + def repo_surgery(checked_out_dir) 273 + Dir.chdir(checked_out_dir) 274 + system("cvs", "-q", "up", "-PACd") 275 + 276 + @db.execute("SELECT files.file, changesets.commitid, revisions.version " + 277 + "FROM revisions LEFT OUTER JOIN files ON files.id = file_id " + 278 + "LEFT OUTER JOIN changesets ON revisions.changeset_id = changesets.id " + 279 + "WHERE revisions.commitid IS NULL ORDER BY files.id") do |rev| 280 + system("cvs", "-q", "admin", "-C", 281 + "#{rev["version"]}:#{rev["commitid"]}", rev["file"].gsub(/,v$/, "")) 282 + end 283 + end 284 + end 285 + 286 + sc = Scanner.new("openbsdv.db", "/var/cvs/src/") 287 + #sc.recurse 288 + sc.group_into_changesets 289 + sc.stray_commitids_to_changesets 290 + sc.fill_in_changeset_data 291 + sc.repo_surgery("/usr/src.local")