jcs's openbsd hax
openbsd
1#!/bin/ksh
2# $OpenBSD: check_sym,v 1.14 2024/12/24 18:14:49 tb Exp $
3#
4# Copyright (c) 2016,2019,2022 Philip Guenther <guenther@openbsd.org>
5#
6# Permission to use, copy, modify, and distribute this software for any
7# purpose with or without fee is hereby granted, provided that the above
8# copyright notice and this permission notice appear in all copies.
9#
10# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17#
18#
19# check_sym -- compare the symbols and external function references in two
20# versions of a library
21#
22# SYNOPSIS
23# check_sym [-chkSv] [old [new]]
24#
25# DESCRIPTION
26# Library developers need to be aware when they have changed the
27# ABI of a library. To assist them, check_sym examines two versions
28# of a shared library and reports changes to the following:
29# * the set of exported symbols and their strengths
30# * the set of undefined symbols referenced
31# * the set of lazily-resolved functions (PLT)
32#
33# In each case, additions and removals are reported; for exported
34# symbols it also reports when a symbol is weakened or strengthened.
35#
36# With the -S option, a similar analysis is done but for the static lib.
37#
38# The shared libraries to compare can be specified on the
39# command-line. Otherwise, check_sym expects to be run from the
40# source directory of a library with a shlib_version file specifying
41# the version being built and the new library in the obj subdirectory.
42# If the old library to compare against wasn't specified either then
43# check_sym will take the highest version of that library in the
44# *current* directory, or the highest version of that library in
45# /usr/lib if it wasn't present in the current directory.
46#
47# By default, check_sym places all its intermediate files in a
48# temporary directory and removes it on exit. They contain useful
49# details for understanding what changed, so if the -k option is used
50# they will instead be placed in /tmp/ and left behind. If any of
51# them cannot be created by the user, the command will fail. The
52# files left behind by the -k option can be cleaned up by invoking
53# check_syms with the -c option.
54#
55# The -v option enables verbose output, showing relocation counts.
56#
57# The *basic* rules of thumb for library versions are: if you
58# * stop exporting a symbol, or
59# * change the size of a data symbol
60# * start exporting a symbol that an inter-dependent library needs
61# then you need to bump the MAJOR version of the library.
62#
63# Otherwise, if you:
64# * start exporting a symbol
65# then you need to bump the MINOR version of the library.
66#
67# SEE ALSO
68# readelf(1), elf(5)
69#
70# AUTHORS
71# Philip Guenther <guenther@openbsd.org>
72#
73# CAVEATS
74# The elf format is infinitely extendable, but check_sym only
75# handles a few weirdnesses. Running it on or against new archs
76# may result in meaningless results.
77#
78# BUGS
79# While the author stills find the intermediate files useful,
80# most people won't. By default they should be placed in a
81# temp directory and removed.
82#
83
84get_lib_name()
85{
86 sed -n '/^[ ]*LIB[ ]*=/{ s/^[^=]*=[ ]*\([^ ]*\).*/\1/p; q;}' "$@"
87}
88
89pick_highest()
90{
91 old=
92 omaj=-1
93 omin=0
94 for i
95 do
96 [[ -f $i ]] || continue
97 maj=${i%.*}; maj=${maj##*.}
98 min=${i##*.}
99 if [[ $maj -gt $omaj || ( $maj -eq $omaj && $min -gt $omin ) ]]
100 then
101 old=$i
102 omaj=$maj
103 omin=$min
104 fi
105 done
106 [[ $old != "" ]]
107}
108
109fail() { echo "$*" >&2; exit 1; }
110
111usage()
112{
113 usage="usage: check_sym [-chkSv] [old [new]]"
114 [[ $# -eq 0 ]] || fail "check_sym: $*
115$usage"
116 echo "$usage"
117 exit 0
118}
119
120
121#
122# Output helpers
123#
124data_sym_changes()
125{
126 join "$@" | awk '$2 != $3 { print $1 " " $2 " --> " $3 }'
127}
128
129output_if_not_empty()
130{
131 leader=$1
132 shift
133 if "$@" | grep -q .
134 then
135 echo "$leader"
136 "$@" | sed 's:^: :'
137 echo
138 fi
139}
140
141
142#
143# Dynamic library routines
144#
145
146dynamic_collect()
147{
148 readelf -sW $old | filt_symtab > $odir/Ds1
149 readelf -sW $new | filt_symtab > $odir/Ds2
150
151 readelf -rW $old > $odir/r1
152 readelf -rW $new > $odir/r2
153
154 case $(readelf -h $new | grep '^ *Machine:') in
155 *MIPS*) cpu=mips64
156 gotsym1=$(readelf -d $old | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
157 gotsym2=$(readelf -d $new | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
158 ;;
159 *HPPA*) cpu=hppa;;
160 *) cpu=dontcare;;
161 esac
162}
163
164jump_slots()
165{
166 case $cpu in
167 hppa) awk '/IPLT/ && $5 != ""{print $5}' r$1
168 ;;
169 mips64) # the $((gotsym$1)) converts hex to decimal
170 awk -v g=$((gotsym$1)) \
171 '/^Symbol table ..symtab/{exit}
172 $6 == "PROTECTED" { next }
173 $1+0 >= g && $4 == "FUNC" {print $8}' Ds$1
174 ;;
175 *) awk '/JU*MP_SL/ && $5 != ""{print $5}' r$1
176 ;;
177 esac | sort -o j$1
178}
179
180dynamic_sym()
181{
182 awk -v s=$1 '/^Symbol table ..symtab/{exit}
183 ! /^ *[1-9]/ {next}
184 $5 == "LOCAL" {next}
185 $7 == "UND" {print $8 | ("sort -o DU" s); next }
186 $5 == "GLOBAL" {print $8 | ("sort -o DS" s) }
187 $5 == "WEAK" {print $8 | ("sort -o DW" s) }
188 $4 == "OBJECT" {print $8, $3 | ("sort -o DO" s) }
189 {print $8 | ("sort -o D" s)
190 print $4, $5, $6, $8}' Ds$1 | sort -o d$1
191}
192
193static_sym()
194{
195 awk '/^Symbol table ..symtab/{s=1}
196 /LOCAL/{next}
197 s&&/^ *[1-9]/{print $4, $5, $6, $8}' Ds$1 | sort -o s$1
198}
199
200dynamic_analysis()
201{
202 jump_slots $1
203 dynamic_sym $1
204 #static_sym $1
205 comm -23 j$1 DU$1 >J$1
206 return 0
207}
208
209dynamic_output()
210{
211 if cmp -s d[12] && cmp -s DO[12]
212 then
213 printf "No dynamic export changes\n"
214 else
215 printf "Dynamic export changes:\n"
216 output_if_not_empty "added:" comm -13 D[12]
217 output_if_not_empty "removed:" comm -23 D[12]
218 output_if_not_empty "weakened:" comm -12 DS1 DW2
219 output_if_not_empty "strengthened:" comm -12 DW1 DS2
220 output_if_not_empty "data object sizes changes:" \
221 data_sym_changes DO[12]
222 fi
223 if ! cmp -s DU[12]
224 then
225 printf "External reference changes:\n"
226 output_if_not_empty "added:" comm -13 DU[12]
227 output_if_not_empty "removed:" comm -23 DU[12]
228 fi
229
230 if $verbose; then
231 printf "\nReloc counts:\nbefore:\n"
232 grep ^R r1
233 printf "\nafter:\n"
234 grep ^R r2
235 fi
236
237 output_if_not_empty "PLT added:" comm -13 J[12]
238 output_if_not_empty "PLT removed:" comm -23 J[12]
239}
240
241
242#
243# Static library routines
244#
245static_collect()
246{
247 readelf -sW $old | filt_ret | filt_symtab > $odir/Ss1
248 readelf -sW $new | filt_ret | filt_symtab > $odir/Ss2
249}
250
251static_analysis()
252{
253 awk -v s=$1 '!/^ *[1-9]/{next}
254 $5 == "LOCAL" {next}
255 $7 == "UND" {print $8 | ("sort -uo SU" s); next }
256 $6 == "HIDDEN" {print $8 | ("sort -uo SH" s) }
257 $5 == "GLOBAL" {print $8 | ("sort -o SS" s) }
258 $5 == "WEAK" {print $8 | ("sort -o SW" s) }
259 $4 == "OBJECT" {print $8, $3 | ("sort -o SO" s) }
260 {print $8 | ("sort -o S" s)
261 print $4, $5, $6, $8}' Ss$1 | sort -o s$1
262 grep -v '^_' SH$1 >Sh$1 || :
263}
264
265static_output()
266{
267 output_if_not_empty "hidden but not reserved:" comm -13 Sh[12]
268 if cmp -s s[12] && cmp -s SO[12]
269 then
270 printf "No static export changes\n"
271 else
272 printf "Static export changes:\n"
273 output_if_not_empty "added:" comm -13 S[12]
274 output_if_not_empty "removed:" comm -23 S[12]
275 output_if_not_empty "weakened:" comm -12 SS1 SW2
276 output_if_not_empty "strengthened:" comm -12 SW1 SS2
277 output_if_not_empty "data object sizes changes:" \
278 data_sym_changes SO[12]
279 fi
280 if ! cmp -s SU[12]
281 then
282 printf "External reference changes:\n"
283 output_if_not_empty "added:" comm -13 SU[12]
284 output_if_not_empty "removed:" comm -23 SU[12]
285 fi
286}
287
288
289unset odir
290file_list={D{,O,S,s,W,U},J,d,j,r}{1,2}
291static_file_list={S{,H,h,O,S,U,W},U,s}{1,2}
292
293keep_temp=false
294dynamic=true
295static=false
296verbose=false
297
298do_static() { static=true dynamic=false file_list=$static_file_list; }
299
300while getopts :chkSv opt "$@"
301do
302 case $opt in
303 c) rm -f /tmp/$file_list
304 exit 0;;
305 h) usage;;
306 k) keep_temp=true;;
307 S) do_static;;
308 v) verbose=true;;
309 \?) usage "unknown option -- $OPTARG";;
310 esac
311done
312shift $((OPTIND - 1))
313[[ $# -gt 2 ]] && usage "too many arguments"
314
315# Old library?
316if ! $static && [[ $1 = ?(*/)lib*.so* ]]
317then
318 [[ -f $1 ]] || fail "$1 doesn't exist"
319 old=$1
320 lib=${old##*/}
321 lib=${lib%%.so.*}
322 shift
323elif [[ $1 = ?(*/)lib*.a ]]
324then
325 # woo hoo, static library mode
326 do_static
327 if [[ -f $1 ]]
328 then
329 old=$1
330 lib=${old##*/}
331 elif [[ $1 = lib*.a && -f /usr/lib/$1 ]]
332 then
333 old=/usr/lib/$1
334 lib=$1
335 else
336 fail "$1 doesn't exist"
337 fi
338 lib=${lib%%.a}
339 shift
340else
341 # try determining it from the current directory
342 if [[ -f Makefile ]] && lib=$(get_lib_name Makefile) &&
343 [[ $lib != "" ]]
344 then
345 lib=lib$lib
346 else
347 lib=libc
348 fi
349
350 # Is there a copy of that lib in the current directory?
351 # If so, use the highest numbered one
352 if ! $static &&
353 ! pick_highest $lib.so.* &&
354 ! pick_highest /usr/lib/$lib.so.*
355 then
356 fail "unable to find $lib.so.*"
357 elif $static
358 then
359 old=/usr/lib/${lib}.a
360 [[ -f $old ]] || fail "$old doesn't exist"
361 fi
362fi
363
364# New library?
365if [[ $1 = ?(*/)lib*.so* ]] ||
366 { $static && [[ $1 = ?(*/)lib*.a ]]; }
367then
368 new=$1
369 shift
370elif $static
371then
372 new=obj/${lib}.a
373else
374 # Dig info out of the just built library
375 . ./shlib_version
376 new=obj/${lib}.so.${major}.${minor}
377fi
378[[ -f $new ]] || fail "$new doesn't exist"
379
380# Filter the output of readelf -s to be easier to parse by removing a
381# field that only appears on some symbols: [<other>: 88]
382# Not really arch-specific, but I've only seen it on alpha
383filt_symtab() { sed 's/\[<other>: [0-9a-f]*\]//'; }
384filt_ret() { egrep -v ' (__retguard_[0-9]+|__llvm_retpoline_[a-z]+[0-9]*)$'; }
385
386if $keep_temp
387then
388 # precreate all the files we'll use, but with noclobber set to avoid
389 # symlink attacks
390 odir=/tmp
391 files=
392 trap 'ret=$?; rm -f $files; exit $ret' 1 2 15 ERR
393else
394 trap 'ret=$?; rm -rf "$odir"; exit $ret' 0 1 2 15 ERR
395 odir=$(mktemp -dt check_sym.XXXXXXXXXX)
396fi
397set -C
398for i in $odir/$file_list
399do
400 rm -f $i
401 3>$i
402 files="$files $i"
403done
404set +C
405
406
407#
408# Collect data
409#
410$dynamic && dynamic_collect
411$static && static_collect
412
413# Now that we're done accessing $old and $new (which could be
414# relative paths), chdir into our work directory, whatever it is
415cd $odir
416
417#
418# Do The Job
419#
420for i in 1 2
421do
422 $dynamic && dynamic_analysis $i
423 $static && static_analysis $i
424done
425
426{
427 echo "$old --> $new"
428 ! $dynamic || dynamic_output
429 ! $static || static_output
430}
431