Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Squashfs: add multi-threaded decompression using percpu variable

Add a multi-threaded decompression implementation which uses
percpu variables.

Using percpu variables has advantages and disadvantages over
implementations which do not use percpu variables.

Advantages:
* the nature of percpu variables ensures decompression is
load-balanced across the multiple cores.
* simplicity.

Disadvantages: it limits decompression to one thread per core.

Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>

+145 -20
+44 -13
fs/squashfs/Kconfig
··· 25 25 26 26 If unsure, say N. 27 27 28 + choice 29 + prompt "Decompressor parallelisation options" 30 + depends on SQUASHFS 31 + help 32 + Squashfs now supports three parallelisation options for 33 + decompression. Each one exhibits various trade-offs between 34 + decompression performance and CPU and memory usage. 35 + 36 + If in doubt, select "Single threaded compression" 37 + 38 + config SQUASHFS_DECOMP_SINGLE 39 + bool "Single threaded compression" 40 + help 41 + Traditionally Squashfs has used single-threaded decompression. 42 + Only one block (data or metadata) can be decompressed at any 43 + one time. This limits CPU and memory usage to a minimum. 44 + 45 + config SQUASHFS_DECOMP_MULTI 46 + bool "Use multiple decompressors for parallel I/O" 47 + help 48 + By default Squashfs uses a single decompressor but it gives 49 + poor performance on parallel I/O workloads when using multiple CPU 50 + machines due to waiting on decompressor availability. 51 + 52 + If you have a parallel I/O workload and your system has enough memory, 53 + using this option may improve overall I/O performance. 54 + 55 + This decompressor implementation uses up to two parallel 56 + decompressors per core. It dynamically allocates decompressors 57 + on a demand basis. 58 + 59 + config SQUASHFS_DECOMP_MULTI_PERCPU 60 + bool "Use percpu multiple decompressors for parallel I/O" 61 + help 62 + By default Squashfs uses a single decompressor but it gives 63 + poor performance on parallel I/O workloads when using multiple CPU 64 + machines due to waiting on decompressor availability. 65 + 66 + This decompressor implementation uses a maximum of one 67 + decompressor per core. It uses percpu variables to ensure 68 + decompression is load-balanced across the cores. 69 + 70 + endchoice 71 + 28 72 config SQUASHFS_XATTR 29 73 bool "Squashfs XATTR support" 30 74 depends on SQUASHFS ··· 104 60 105 61 LZO is not the standard compression used in Squashfs and so most 106 62 file systems will be readable without selecting this option. 107 - 108 - If unsure, say N. 109 - 110 - config SQUASHFS_MULTI_DECOMPRESSOR 111 - bool "Use multiple decompressors for handling parallel I/O" 112 - depends on SQUASHFS 113 - help 114 - By default Squashfs uses a single decompressor but it gives 115 - poor performance on parallel I/O workloads when using multiple CPU 116 - machines due to waiting on decompressor availability. 117 - 118 - If you have a parallel I/O workload and your system has enough memory, 119 - using this option may improve overall I/O performance. 120 63 121 64 If unsure, say N. 122 65
+3 -7
fs/squashfs/Makefile
··· 5 5 obj-$(CONFIG_SQUASHFS) += squashfs.o 6 6 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o 7 7 squashfs-y += namei.o super.o symlink.o decompressor.o 8 - 8 + squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o 9 + squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o 10 + squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o 9 11 squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o 10 12 squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o 11 13 squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o 12 14 squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o 13 - 14 - ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR 15 - squashfs-y += decompressor_multi.o 16 - else 17 - squashfs-y += decompressor_single.o 18 - endif
+98
fs/squashfs/decompressor_multi_percpu.c
··· 1 + /* 2 + * Copyright (c) 2013 3 + * Phillip Lougher <phillip@squashfs.org.uk> 4 + * 5 + * This work is licensed under the terms of the GNU GPL, version 2. See 6 + * the COPYING file in the top-level directory. 7 + */ 8 + 9 + #include <linux/types.h> 10 + #include <linux/slab.h> 11 + #include <linux/percpu.h> 12 + #include <linux/buffer_head.h> 13 + 14 + #include "squashfs_fs.h" 15 + #include "squashfs_fs_sb.h" 16 + #include "decompressor.h" 17 + #include "squashfs.h" 18 + 19 + /* 20 + * This file implements multi-threaded decompression using percpu 21 + * variables, one thread per cpu core. 22 + */ 23 + 24 + struct squashfs_stream { 25 + void *stream; 26 + }; 27 + 28 + void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, 29 + void *comp_opts) 30 + { 31 + struct squashfs_stream *stream; 32 + struct squashfs_stream __percpu *percpu; 33 + int err, cpu; 34 + 35 + percpu = alloc_percpu(struct squashfs_stream); 36 + if (percpu == NULL) 37 + return ERR_PTR(-ENOMEM); 38 + 39 + for_each_possible_cpu(cpu) { 40 + stream = per_cpu_ptr(percpu, cpu); 41 + stream->stream = msblk->decompressor->init(msblk, comp_opts); 42 + if (IS_ERR(stream->stream)) { 43 + err = PTR_ERR(stream->stream); 44 + goto out; 45 + } 46 + } 47 + 48 + kfree(comp_opts); 49 + return (__force void *) percpu; 50 + 51 + out: 52 + for_each_possible_cpu(cpu) { 53 + stream = per_cpu_ptr(percpu, cpu); 54 + if (!IS_ERR_OR_NULL(stream->stream)) 55 + msblk->decompressor->free(stream->stream); 56 + } 57 + free_percpu(percpu); 58 + return ERR_PTR(err); 59 + } 60 + 61 + void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) 62 + { 63 + struct squashfs_stream __percpu *percpu = 64 + (struct squashfs_stream __percpu *) msblk->stream; 65 + struct squashfs_stream *stream; 66 + int cpu; 67 + 68 + if (msblk->stream) { 69 + for_each_possible_cpu(cpu) { 70 + stream = per_cpu_ptr(percpu, cpu); 71 + msblk->decompressor->free(stream->stream); 72 + } 73 + free_percpu(percpu); 74 + } 75 + } 76 + 77 + int squashfs_decompress(struct squashfs_sb_info *msblk, 78 + void **buffer, struct buffer_head **bh, int b, int offset, int length, 79 + int srclength, int pages) 80 + { 81 + struct squashfs_stream __percpu *percpu = 82 + (struct squashfs_stream __percpu *) msblk->stream; 83 + struct squashfs_stream *stream = get_cpu_ptr(percpu); 84 + int res = msblk->decompressor->decompress(msblk, stream->stream, buffer, 85 + bh, b, offset, length, srclength, pages); 86 + put_cpu_ptr(stream); 87 + 88 + if (res < 0) 89 + ERROR("%s decompression failed, data probably corrupt\n", 90 + msblk->decompressor->name); 91 + 92 + return res; 93 + } 94 + 95 + int squashfs_max_decompressors(void) 96 + { 97 + return num_possible_cpus(); 98 + }