script to retroactively add commitids to past openbsd commits

properly deal with files with dead 1.1 revisions

files added on branches/imports have a dead 1.1 revision, so our
checkout of -r1.1 files to bring in these files. when scanning each
file, record which version is the first we see that isn't dead, and
then individually checkout those revisions of those files.

also fixes an issue with weird files in the openbsd src tree like
sbin/isakmpd/pkcs.c which have no versions before 1.4 for some
reason.

with these changes, a full import and writing back of commitids on
the openbsd src tree is possible

+67 -38
+12 -5
rcsfile.rb
··· 1 1 class RCSFile 2 - attr_accessor :revisions 2 + attr_accessor :revisions, :first_undead_version 3 3 4 4 def initialize(file) 5 5 @revisions = {} ··· 7 7 # rcs modified to end revs in ### 8 8 blocks = [] 9 9 IO.popen([ "rlog", file ]) do |rlog| 10 + # rlog modified to end revision and file separators with ### 10 11 blocks = rlog.read.force_encoding("binary"). 11 - split(/^(-{28}|={77})###\n?$/).reject{|b| b.match(/^(-{28}|={77})$/) } 12 + split(/^(-{28}|={77})###\n?$/). 13 + reject{|b| b.match(/\A(-{28}|={77})\z/) } 12 14 end 13 15 14 16 if !blocks.first.match(/^RCS file/) ··· 18 20 blocks.shift 19 21 blocks.each do |block| 20 22 rev = RCSRevision.new(block) 21 - if @revisions[rev.revision] 22 - raise "duplicate revision #{rev.revision} in #{file}" 23 + if @revisions[rev.version] 24 + raise "duplicate revision #{rev.version} in #{file}" 23 25 end 24 - @revisions[rev.revision] = rev 26 + @revisions[rev.version] = rev 25 27 end 28 + 29 + @first_undead_version = @revisions.values. 30 + # this has nothing to do with Gem, but it has a version comparator 31 + sort{|a,b| Gem::Version.new(a.version) <=> Gem::Version.new(b.version) }. 32 + select{|r| r.state != "dead" }.first.version 26 33 end 27 34 end
+3 -3
rcsrevision.rb
··· 1 1 require "date" 2 2 3 3 class RCSRevision 4 - attr_accessor :revision, :date, :author, :state, :lines, :commitid, :log 4 + attr_accessor :version, :date, :author, :state, :lines, :commitid, :log 5 5 6 6 # str: "revision 1.7\ndate: 1996/12/14 12:17:33; author: mickey; state: Exp; lines: +3 -3;\n-Wall'ing." 7 7 def initialize(str) 8 - @revision = nil 8 + @version = nil 9 9 @date = 0 10 10 @author = nil 11 11 @state = nil ··· 25 25 lines.delete_at(2) 26 26 end 27 27 28 - @revision = lines.first.scan(/^revision ([\d\.]+)($|\tlocked by)/).first.first 28 + @version = lines.first.scan(/^revision ([\d\.]+)($|\tlocked by)/).first.first 29 29 # -> "1.7" 30 30 31 31 # date/author/state/lines/commitid line
+52 -30
scanner.rb
··· 5 5 @db = SQLite3::Database.new dbf 6 6 7 7 @db.execute "CREATE TABLE IF NOT EXISTS changesets 8 - (id integer primary key, date integer, author text, commitid text, 9 - log text)" 8 + (id INTEGER PRIMARY KEY, date INTEGER, author TEXT, commitid TEXT, 9 + log TEXT)" 10 10 @db.execute "CREATE UNIQUE INDEX IF NOT EXISTS u_commitid ON changesets 11 11 (commitid)" 12 12 13 13 @db.execute "CREATE TABLE IF NOT EXISTS files 14 - (id integer primary key, file text, in_attic integer)" 14 + (id INTEGER PRIMARY KEY, file TEXT, first_undead_version TEXT)" 15 15 @db.execute "CREATE UNIQUE INDEX IF NOT EXISTS u_file ON files 16 16 (file)" 17 17 18 18 @db.execute "CREATE TABLE IF NOT EXISTS revisions 19 - (id integer primary key, file_id integer, changeset_id integer, 20 - date integer, version text, author text, commitid text, log text)" 19 + (id INTEGER PRIMARY KEY, file_id INTEGER, changeset_id INTEGER, 20 + date INTEGER, version TEXT, author TEXT, commitid TEXT, log TEXT, 21 + state TEXT)" 21 22 @db.execute "CREATE UNIQUE INDEX IF NOT EXISTS u_revision ON revisions 22 23 (file_id, version)" 23 24 @db.execute "CREATE INDEX IF NOT EXISTS empty_changesets ON revisions ··· 26 27 (commitid, changeset_id)" 27 28 @db.execute "CREATE INDEX IF NOT EXISTS all_revs_by_author ON revisions 28 29 (author, date)" 30 + @db.execute "CREATE INDEX IF NOT EXISTS all_revs_by_version_and_state ON 31 + revisions (version, state)" 29 32 30 33 @db.results_as_hash = true 31 34 ··· 50 53 51 54 def scan(f) 52 55 canfile = f[@root.length, f.length - @root.length].gsub(/(^|\/)Attic\//, 53 - "/") 54 - in_attic = !!f.match(/(^|\/)Attic\//) 55 - puts " scanning file #{canfile}" + (in_attic ? " (in attic)" : "") 56 + "/").gsub(/^\/*/, "") 57 + puts " scanning file #{canfile}" 56 58 57 59 rcs = RCSFile.new(f) 58 60 59 - fid = @db.execute("SELECT id, in_attic FROM files WHERE file = ?", 60 - [ canfile ]).first 61 + fid = @db.execute("SELECT id, first_undead_version FROM files WHERE " + 62 + "file = ?", [ canfile ]).first 61 63 if fid 62 - if fid["in_attic"].to_i != (in_attic ? 1 : 0) 63 - @db.execute("UPDATE files SET in_attic = ? WHERE id = ?", 64 - [ (in_attic ? 1 : 0), fid["id"] ]) 64 + if fid["first_undead_version"] != rcs.first_undead_version 65 + @db.execute("UPDATE files SET first_undead_version = ? WHERE id = ?", 66 + [ rcs.first_undead_version, fid["id"] ]) 65 67 end 66 68 else 67 - @db.execute("INSERT INTO files (file, in_attic) VALUES (?, ?)", 68 - [ canfile, in_attic ? 1 : 0 ]) 69 + @db.execute("INSERT INTO files (file, first_undead_version) VALUES " + 70 + "(?, ?)", [ canfile, rcs.first_undead_version ]) 69 71 fid = @db.execute("SELECT id FROM files WHERE file = ?", 70 72 [ canfile ]).first 71 73 end ··· 81 83 (rid["commitid"].to_s == "" ? "" : " from #{rid["commitid"]}") 82 84 83 85 @db.execute("UPDATE revisions SET commitid = ? WHERE file_id = ? " + 84 - "AND version = ?", [ rev.commitid, fid["id"], rev.revision ]) 86 + "AND version = ?", [ rev.commitid, fid["id"], rev.version ]) 85 87 end 86 88 else 87 89 puts " inserted #{r}, authored #{rev.date} by #{rev.author}" + 88 90 (rev.commitid ? ", commitid #{rev.commitid}" : "") 89 91 90 92 @db.execute("INSERT INTO revisions (file_id, date, version, author, " + 91 - "commitid, log) VALUES (?, ?, ?, ?, ?, ?)", [ fid["id"], rev.date, 92 - rev.revision, rev.author, rev.commitid, rev.log ]) 93 + "commitid, state, log) VALUES (?, ?, ?, ?, ?, ?, ?)", 94 + [ fid["id"], rev.date, rev.version, rev.author, rev.commitid, 95 + rev.state, rev.log ]) 93 96 end 94 97 end 95 98 end ··· 219 222 end 220 223 221 224 def repo_surgery(tmp_dir, cvs_root, tree) 222 - puts "checking out repo \"#{tree}\" to #{tmp_dir}" 225 + puts "checking out #{tree} from #{cvs_root} to #{tmp_dir}" 223 226 224 227 Dir.chdir(tmp_dir) 225 - system("cvs", "-Q", "-d", cvs_root, "co", tree) 228 + 229 + # for a deleted file to be operated by with cvs admin, it must be 230 + # present in the CVS/Entries files, so check out all files at rev 1.1 so we 231 + # know they will not be deleted. otherwise cvs admin will fail silently 232 + system("cvs", "-Q", "-d", cvs_root, "co", "-r1.1", tree) || 233 + raise("cvs checkout returned non-zero") 234 + 235 + # but if any files were added on a branch or somehow have a weird history, 236 + # their 1.1 revision will be dead so check out any non-dead revision of 237 + # those files 238 + dead11s = {} 239 + @db.execute("SELECT 240 + file, first_undead_version 241 + FROM files 242 + WHERE first_undead_version NOT LIKE '1.1'") do |rev| 243 + dead11s[rev["file"]] = rev["first_undead_version"] 244 + end 245 + 246 + dead11s.each do |file,rev| 247 + confile = file.gsub(/,v$/, "") 248 + 249 + puts " checking out non-dead revision #{rev} of #{confile}" 226 250 251 + system("cvs", "-Q", "-d", cvs_root, "co", "-r#{rev}", 252 + "#{tree}/#{confile}") || 253 + raise("cvs co -r#{rev} #{confile} failed") 254 + end 227 255 Dir.chdir(tmp_dir + "/#{tree}") 228 256 229 257 csid = nil 230 258 @db.execute("SELECT 231 - files.file, files.in_attic, changesets.commitid, changesets.author, 232 - changesets.date, revisions.version 259 + files.file, changesets.commitid, changesets.author, changesets.date, 260 + revisions.version 233 261 FROM revisions 234 262 LEFT OUTER JOIN files ON files.id = file_id 235 263 LEFT OUTER JOIN changesets ON revisions.changeset_id = changesets.id ··· 243 271 244 272 puts " #{rev["file"]} #{rev["version"]}" 245 273 246 - if rev["in_attic"].to_i == 1 247 - # for a deleted file to be operated by with cvs admin, it must be 248 - # present in the CVS/Entries files, so just check out a known version 249 - # that will put it there. otherwise cvs admin will fail silently 250 - system("cvs", "-Q", "up", "-r1.1", rev["file"].gsub(/,v$/, "")) 251 - end 252 - 253 274 output = nil 254 275 IO.popen(ca = [ "cvs", "admin", "-C", 255 276 "#{rev["version"]}:#{rev["commitid"]}", ··· 264 285 265 286 puts "cleaning up #{tmp_dir}/#{tree}" 266 287 267 - system("rm", "-rf", tmp_dir + "/#{tree}") 288 + system("rm", "-rf", tmp_dir + "/#{tree}") || 289 + raise("rm of #{tmp_dir}/#{tree} failed") 268 290 end 269 291 end