Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

blkcg: add tools/cgroup/iocost_coef_gen.py

Add a script which can be used to generate device-specific iocost
linear model coefficients.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Tejun Heo and committed by
Jens Axboe
8504dea7 6954ff18

+184
+3
Documentation/admin-guide/cgroup-v2.rst
··· 1529 1529 The IO cost model isn't expected to be accurate in absolute 1530 1530 sense and is scaled to the device behavior dynamically. 1531 1531 1532 + If needed, tools/cgroup/iocost_coef_gen.py can be used to 1533 + generate device-specific coefficients. 1534 + 1532 1535 io.weight 1533 1536 A read-write flat-keyed file which exists on non-root cgroups. 1534 1537 The default is "default 100".
+3
block/blk-iocost.c
··· 46 46 * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate 47 47 * device-specific coefficients. 48 48 * 49 + * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate 50 + * device-specific coefficients. 51 + * 49 52 * 2. Control Strategy 50 53 * 51 54 * The device virtual time (vtime) is used as the primary control metric.
+178
tools/cgroup/iocost_coef_gen.py
··· 1 + #!/usr/bin/env python3 2 + # 3 + # Copyright (C) 2019 Tejun Heo <tj@kernel.org> 4 + # Copyright (C) 2019 Andy Newell <newella@fb.com> 5 + # Copyright (C) 2019 Facebook 6 + 7 + desc = """ 8 + Generate linear IO cost model coefficients used by the blk-iocost 9 + controller. If the target raw testdev is specified, destructive tests 10 + are performed against the whole device; otherwise, on 11 + ./iocost-coef-fio.testfile. The result can be written directly to 12 + /sys/fs/cgroup/io.cost.model. 13 + 14 + On high performance devices, --numjobs > 1 is needed to achieve 15 + saturation. 16 + 17 + See Documentation/admin-guide/cgroup-v2.rst and block/blk-iocost.c 18 + for more details. 19 + """ 20 + 21 + import argparse 22 + import re 23 + import json 24 + import glob 25 + import os 26 + import sys 27 + import atexit 28 + import shutil 29 + import tempfile 30 + import subprocess 31 + 32 + parser = argparse.ArgumentParser(description=desc, 33 + formatter_class=argparse.RawTextHelpFormatter) 34 + parser.add_argument('--testdev', metavar='DEV', 35 + help='Raw block device to use for testing, ignores --testfile-size') 36 + parser.add_argument('--testfile-size-gb', type=float, metavar='GIGABYTES', default=16, 37 + help='Testfile size in gigabytes (default: %(default)s)') 38 + parser.add_argument('--duration', type=int, metavar='SECONDS', default=120, 39 + help='Individual test run duration in seconds (default: %(default)s)') 40 + parser.add_argument('--seqio-block-mb', metavar='MEGABYTES', type=int, default=128, 41 + help='Sequential test block size in megabytes (default: %(default)s)') 42 + parser.add_argument('--seq-depth', type=int, metavar='DEPTH', default=64, 43 + help='Sequential test queue depth (default: %(default)s)') 44 + parser.add_argument('--rand-depth', type=int, metavar='DEPTH', default=64, 45 + help='Random test queue depth (default: %(default)s)') 46 + parser.add_argument('--numjobs', type=int, metavar='JOBS', default=1, 47 + help='Number of parallel fio jobs to run (default: %(default)s)') 48 + parser.add_argument('--quiet', action='store_true') 49 + parser.add_argument('--verbose', action='store_true') 50 + 51 + def info(msg): 52 + if not args.quiet: 53 + print(msg) 54 + 55 + def dbg(msg): 56 + if args.verbose and not args.quiet: 57 + print(msg) 58 + 59 + # determine ('DEVNAME', 'MAJ:MIN') for @path 60 + def dir_to_dev(path): 61 + # find the block device the current directory is on 62 + devname = subprocess.run(f'findmnt -nvo SOURCE -T{path}', 63 + stdout=subprocess.PIPE, shell=True).stdout 64 + devname = os.path.basename(devname).decode('utf-8').strip() 65 + 66 + # partition -> whole device 67 + parents = glob.glob('/sys/block/*/' + devname) 68 + if len(parents): 69 + devname = os.path.basename(os.path.dirname(parents[0])) 70 + rdev = os.stat(f'/dev/{devname}').st_rdev 71 + return (devname, f'{os.major(rdev)}:{os.minor(rdev)}') 72 + 73 + def create_testfile(path, size): 74 + global args 75 + 76 + if os.path.isfile(path) and os.stat(path).st_size == size: 77 + return 78 + 79 + info(f'Creating testfile {path}') 80 + subprocess.check_call(f'rm -f {path}', shell=True) 81 + subprocess.check_call(f'touch {path}', shell=True) 82 + subprocess.call(f'chattr +C {path}', shell=True) 83 + subprocess.check_call( 84 + f'pv -s {size} -pr /dev/urandom {"-q" if args.quiet else ""} | ' 85 + f'dd of={path} count={size} ' 86 + f'iflag=count_bytes,fullblock oflag=direct bs=16M status=none', 87 + shell=True) 88 + 89 + def run_fio(testfile, duration, iotype, iodepth, blocksize, jobs): 90 + global args 91 + 92 + eta = 'never' if args.quiet else 'always' 93 + outfile = tempfile.NamedTemporaryFile() 94 + cmd = (f'fio --direct=1 --ioengine=libaio --name=coef ' 95 + f'--filename={testfile} --runtime={round(duration)} ' 96 + f'--readwrite={iotype} --iodepth={iodepth} --blocksize={blocksize} ' 97 + f'--eta={eta} --output-format json --output={outfile.name} ' 98 + f'--time_based --numjobs={jobs}') 99 + if args.verbose: 100 + dbg(f'Running {cmd}') 101 + subprocess.check_call(cmd, shell=True) 102 + with open(outfile.name, 'r') as f: 103 + d = json.loads(f.read()) 104 + return sum(j['read']['bw_bytes'] + j['write']['bw_bytes'] for j in d['jobs']) 105 + 106 + def restore_elevator_nomerges(): 107 + global elevator_path, nomerges_path, elevator, nomerges 108 + 109 + info(f'Restoring elevator to {elevator} and nomerges to {nomerges}') 110 + with open(elevator_path, 'w') as f: 111 + f.write(elevator) 112 + with open(nomerges_path, 'w') as f: 113 + f.write(nomerges) 114 + 115 + 116 + args = parser.parse_args() 117 + 118 + missing = False 119 + for cmd in [ 'findmnt', 'pv', 'dd', 'fio' ]: 120 + if not shutil.which(cmd): 121 + print(f'Required command "{cmd}" is missing', file=sys.stderr) 122 + missing = True 123 + if missing: 124 + sys.exit(1) 125 + 126 + if args.testdev: 127 + devname = os.path.basename(args.testdev) 128 + rdev = os.stat(f'/dev/{devname}').st_rdev 129 + devno = f'{os.major(rdev)}:{os.minor(rdev)}' 130 + testfile = f'/dev/{devname}' 131 + info(f'Test target: {devname}({devno})') 132 + else: 133 + devname, devno = dir_to_dev('.') 134 + testfile = 'iocost-coef-fio.testfile' 135 + testfile_size = int(args.testfile_size_gb * 2 ** 30) 136 + create_testfile(testfile, testfile_size) 137 + info(f'Test target: {testfile} on {devname}({devno})') 138 + 139 + elevator_path = f'/sys/block/{devname}/queue/scheduler' 140 + nomerges_path = f'/sys/block/{devname}/queue/nomerges' 141 + 142 + with open(elevator_path, 'r') as f: 143 + elevator = re.sub(r'.*\[(.*)\].*', r'\1', f.read().strip()) 144 + with open(nomerges_path, 'r') as f: 145 + nomerges = f.read().strip() 146 + 147 + info(f'Temporarily disabling elevator and merges') 148 + atexit.register(restore_elevator_nomerges) 149 + with open(elevator_path, 'w') as f: 150 + f.write('none') 151 + with open(nomerges_path, 'w') as f: 152 + f.write('1') 153 + 154 + info('Determining rbps...') 155 + rbps = run_fio(testfile, args.duration, 'read', 156 + 1, args.seqio_block_mb * (2 ** 20), args.numjobs) 157 + info(f'\nrbps={rbps}, determining rseqiops...') 158 + rseqiops = round(run_fio(testfile, args.duration, 'read', 159 + args.seq_depth, 4096, args.numjobs) / 4096) 160 + info(f'\nrseqiops={rseqiops}, determining rrandiops...') 161 + rrandiops = round(run_fio(testfile, args.duration, 'randread', 162 + args.rand_depth, 4096, args.numjobs) / 4096) 163 + info(f'\nrrandiops={rrandiops}, determining wbps...') 164 + wbps = run_fio(testfile, args.duration, 'write', 165 + 1, args.seqio_block_mb * (2 ** 20), args.numjobs) 166 + info(f'\nwbps={wbps}, determining wseqiops...') 167 + wseqiops = round(run_fio(testfile, args.duration, 'write', 168 + args.seq_depth, 4096, args.numjobs) / 4096) 169 + info(f'\nwseqiops={wseqiops}, determining wrandiops...') 170 + wrandiops = round(run_fio(testfile, args.duration, 'randwrite', 171 + args.rand_depth, 4096, args.numjobs) / 4096) 172 + info(f'\nwrandiops={wrandiops}') 173 + restore_elevator_nomerges() 174 + atexit.unregister(restore_elevator_nomerges) 175 + info('') 176 + 177 + print(f'{devno} rbps={rbps} rseqiops={rseqiops} rrandiops={rrandiops} ' 178 + f'wbps={wbps} wseqiops={wseqiops} wrandiops={wrandiops}')