Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lib/lzo: Update LZO compression to current upstream version

This commit updates the kernel LZO code to the current upsteam version
which features a significant speed improvement - benchmarking the Calgary
and Silesia test corpora typically shows a doubled performance in
both compression and decompression on modern i386/x86_64/powerpc machines.

Signed-off-by: Markus F.X.J. Oberhumer <markus@oberhumer.com>

+405 -353
+8 -7
include/linux/lzo.h
··· 4 4 * LZO Public Kernel Interface 5 5 * A mini subset of the LZO real-time data compression library 6 6 * 7 - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> 7 + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> 8 8 * 9 9 * The full LZO package can be found at: 10 10 * http://www.oberhumer.com/opensource/lzo/ 11 11 * 12 - * Changed for kernel use by: 12 + * Changed for Linux kernel use by: 13 13 * Nitin Gupta <nitingupta910@gmail.com> 14 14 * Richard Purdie <rpurdie@openedhand.com> 15 15 */ 16 16 17 - #define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *)) 18 - #define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS 17 + #define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short)) 18 + #define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS 19 19 20 20 #define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) 21 21 22 - /* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */ 22 + /* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ 23 23 int lzo1x_1_compress(const unsigned char *src, size_t src_len, 24 - unsigned char *dst, size_t *dst_len, void *wrkmem); 24 + unsigned char *dst, size_t *dst_len, void *wrkmem); 25 25 26 26 /* safe decompression with overrun testing */ 27 27 int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, 28 - unsigned char *dst, size_t *dst_len); 28 + unsigned char *dst, size_t *dst_len); 29 29 30 30 /* 31 31 * Return values (< 0 = Error) ··· 40 40 #define LZO_E_EOF_NOT_FOUND (-7) 41 41 #define LZO_E_INPUT_NOT_CONSUMED (-8) 42 42 #define LZO_E_NOT_YET_IMPLEMENTED (-9) 43 + #define LZO_E_INVALID_ARGUMENT (-10) 43 44 44 45 #endif
+201 -148
lib/lzo/lzo1x_compress.c
··· 1 1 /* 2 - * LZO1X Compressor from MiniLZO 2 + * LZO1X Compressor from LZO 3 3 * 4 - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> 4 + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> 5 5 * 6 6 * The full LZO package can be found at: 7 7 * http://www.oberhumer.com/opensource/lzo/ 8 8 * 9 - * Changed for kernel use by: 9 + * Changed for Linux kernel use by: 10 10 * Nitin Gupta <nitingupta910@gmail.com> 11 11 * Richard Purdie <rpurdie@openedhand.com> 12 12 */ 13 13 14 14 #include <linux/module.h> 15 15 #include <linux/kernel.h> 16 - #include <linux/lzo.h> 17 16 #include <asm/unaligned.h> 17 + #include <linux/lzo.h> 18 18 #include "lzodefs.h" 19 19 20 20 static noinline size_t 21 - _lzo1x_1_do_compress(const unsigned char *in, size_t in_len, 22 - unsigned char *out, size_t *out_len, void *wrkmem) 21 + lzo1x_1_do_compress(const unsigned char *in, size_t in_len, 22 + unsigned char *out, size_t *out_len, 23 + size_t ti, void *wrkmem) 23 24 { 25 + const unsigned char *ip; 26 + unsigned char *op; 24 27 const unsigned char * const in_end = in + in_len; 25 - const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; 26 - const unsigned char ** const dict = wrkmem; 27 - const unsigned char *ip = in, *ii = ip; 28 - const unsigned char *end, *m, *m_pos; 29 - size_t m_off, m_len, dindex; 30 - unsigned char *op = out; 28 + const unsigned char * const ip_end = in + in_len - 20; 29 + const unsigned char *ii; 30 + lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; 31 31 32 - ip += 4; 32 + op = out; 33 + ip = in; 34 + ii = ip; 35 + ip += ti < 4 ? 4 - ti : 0; 33 36 34 37 for (;;) { 35 - dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; 36 - m_pos = dict[dindex]; 37 - 38 - if (m_pos < in) 39 - goto literal; 40 - 41 - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) 42 - goto literal; 43 - 44 - m_off = ip - m_pos; 45 - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) 46 - goto try_match; 47 - 48 - dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); 49 - m_pos = dict[dindex]; 50 - 51 - if (m_pos < in) 52 - goto literal; 53 - 54 - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) 55 - goto literal; 56 - 57 - m_off = ip - m_pos; 58 - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) 59 - goto try_match; 60 - 61 - goto literal; 62 - 63 - try_match: 64 - if (get_unaligned((const unsigned short *)m_pos) 65 - == get_unaligned((const unsigned short *)ip)) { 66 - if (likely(m_pos[2] == ip[2])) 67 - goto match; 68 - } 69 - 38 + const unsigned char *m_pos; 39 + size_t t, m_len, m_off; 40 + u32 dv; 70 41 literal: 71 - dict[dindex] = ip; 72 - ++ip; 42 + ip += 1 + ((ip - ii) >> 5); 43 + next: 73 44 if (unlikely(ip >= ip_end)) 74 45 break; 75 - continue; 46 + dv = get_unaligned_le32(ip); 47 + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; 48 + m_pos = in + dict[t]; 49 + dict[t] = (lzo_dict_t) (ip - in); 50 + if (unlikely(dv != get_unaligned_le32(m_pos))) 51 + goto literal; 76 52 77 - match: 78 - dict[dindex] = ip; 79 - if (ip != ii) { 80 - size_t t = ip - ii; 81 - 53 + ii -= ti; 54 + ti = 0; 55 + t = ip - ii; 56 + if (t != 0) { 82 57 if (t <= 3) { 83 58 op[-2] |= t; 84 - } else if (t <= 18) { 59 + COPY4(op, ii); 60 + op += t; 61 + } else if (t <= 16) { 85 62 *op++ = (t - 3); 63 + COPY8(op, ii); 64 + COPY8(op + 8, ii + 8); 65 + op += t; 86 66 } else { 87 - size_t tt = t - 18; 88 - 89 - *op++ = 0; 90 - while (tt > 255) { 91 - tt -= 255; 67 + if (t <= 18) { 68 + *op++ = (t - 3); 69 + } else { 70 + size_t tt = t - 18; 92 71 *op++ = 0; 93 - } 94 - *op++ = tt; 95 - } 96 - do { 97 - *op++ = *ii++; 98 - } while (--t > 0); 99 - } 100 - 101 - ip += 3; 102 - if (m_pos[3] != *ip++ || m_pos[4] != *ip++ 103 - || m_pos[5] != *ip++ || m_pos[6] != *ip++ 104 - || m_pos[7] != *ip++ || m_pos[8] != *ip++) { 105 - --ip; 106 - m_len = ip - ii; 107 - 108 - if (m_off <= M2_MAX_OFFSET) { 109 - m_off -= 1; 110 - *op++ = (((m_len - 1) << 5) 111 - | ((m_off & 7) << 2)); 112 - *op++ = (m_off >> 3); 113 - } else if (m_off <= M3_MAX_OFFSET) { 114 - m_off -= 1; 115 - *op++ = (M3_MARKER | (m_len - 2)); 116 - goto m3_m4_offset; 117 - } else { 118 - m_off -= 0x4000; 119 - 120 - *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) 121 - | (m_len - 2)); 122 - goto m3_m4_offset; 123 - } 124 - } else { 125 - end = in_end; 126 - m = m_pos + M2_MAX_LEN + 1; 127 - 128 - while (ip < end && *m == *ip) { 129 - m++; 130 - ip++; 131 - } 132 - m_len = ip - ii; 133 - 134 - if (m_off <= M3_MAX_OFFSET) { 135 - m_off -= 1; 136 - if (m_len <= 33) { 137 - *op++ = (M3_MARKER | (m_len - 2)); 138 - } else { 139 - m_len -= 33; 140 - *op++ = M3_MARKER | 0; 141 - goto m3_m4_len; 142 - } 143 - } else { 144 - m_off -= 0x4000; 145 - if (m_len <= M4_MAX_LEN) { 146 - *op++ = (M4_MARKER 147 - | ((m_off & 0x4000) >> 11) 148 - | (m_len - 2)); 149 - } else { 150 - m_len -= M4_MAX_LEN; 151 - *op++ = (M4_MARKER 152 - | ((m_off & 0x4000) >> 11)); 153 - m3_m4_len: 154 - while (m_len > 255) { 155 - m_len -= 255; 72 + while (unlikely(tt > 255)) { 73 + tt -= 255; 156 74 *op++ = 0; 157 75 } 158 - 159 - *op++ = (m_len); 76 + *op++ = tt; 160 77 } 78 + do { 79 + COPY8(op, ii); 80 + COPY8(op + 8, ii + 8); 81 + op += 16; 82 + ii += 16; 83 + t -= 16; 84 + } while (t >= 16); 85 + if (t > 0) do { 86 + *op++ = *ii++; 87 + } while (--t > 0); 161 88 } 162 - m3_m4_offset: 163 - *op++ = ((m_off & 63) << 2); 164 - *op++ = (m_off >> 6); 165 89 } 166 90 167 - ii = ip; 168 - if (unlikely(ip >= ip_end)) 169 - break; 170 - } 91 + m_len = 4; 92 + { 93 + #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) 94 + u64 v; 95 + v = get_unaligned((const u64 *) (ip + m_len)) ^ 96 + get_unaligned((const u64 *) (m_pos + m_len)); 97 + if (unlikely(v == 0)) { 98 + do { 99 + m_len += 8; 100 + v = get_unaligned((const u64 *) (ip + m_len)) ^ 101 + get_unaligned((const u64 *) (m_pos + m_len)); 102 + if (unlikely(ip + m_len >= ip_end)) 103 + goto m_len_done; 104 + } while (v == 0); 105 + } 106 + # if defined(__LITTLE_ENDIAN) 107 + m_len += (unsigned) __builtin_ctzll(v) / 8; 108 + # elif defined(__BIG_ENDIAN) 109 + m_len += (unsigned) __builtin_clzll(v) / 8; 110 + # else 111 + # error "missing endian definition" 112 + # endif 113 + #elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32) 114 + u32 v; 115 + v = get_unaligned((const u32 *) (ip + m_len)) ^ 116 + get_unaligned((const u32 *) (m_pos + m_len)); 117 + if (unlikely(v == 0)) { 118 + do { 119 + m_len += 4; 120 + v = get_unaligned((const u32 *) (ip + m_len)) ^ 121 + get_unaligned((const u32 *) (m_pos + m_len)); 122 + if (v != 0) 123 + break; 124 + m_len += 4; 125 + v = get_unaligned((const u32 *) (ip + m_len)) ^ 126 + get_unaligned((const u32 *) (m_pos + m_len)); 127 + if (unlikely(ip + m_len >= ip_end)) 128 + goto m_len_done; 129 + } while (v == 0); 130 + } 131 + # if defined(__LITTLE_ENDIAN) 132 + m_len += (unsigned) __builtin_ctz(v) / 8; 133 + # elif defined(__BIG_ENDIAN) 134 + m_len += (unsigned) __builtin_clz(v) / 8; 135 + # else 136 + # error "missing endian definition" 137 + # endif 138 + #else 139 + if (unlikely(ip[m_len] == m_pos[m_len])) { 140 + do { 141 + m_len += 1; 142 + if (ip[m_len] != m_pos[m_len]) 143 + break; 144 + m_len += 1; 145 + if (ip[m_len] != m_pos[m_len]) 146 + break; 147 + m_len += 1; 148 + if (ip[m_len] != m_pos[m_len]) 149 + break; 150 + m_len += 1; 151 + if (ip[m_len] != m_pos[m_len]) 152 + break; 153 + m_len += 1; 154 + if (ip[m_len] != m_pos[m_len]) 155 + break; 156 + m_len += 1; 157 + if (ip[m_len] != m_pos[m_len]) 158 + break; 159 + m_len += 1; 160 + if (ip[m_len] != m_pos[m_len]) 161 + break; 162 + m_len += 1; 163 + if (unlikely(ip + m_len >= ip_end)) 164 + goto m_len_done; 165 + } while (ip[m_len] == m_pos[m_len]); 166 + } 167 + #endif 168 + } 169 + m_len_done: 171 170 171 + m_off = ip - m_pos; 172 + ip += m_len; 173 + ii = ip; 174 + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { 175 + m_off -= 1; 176 + *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); 177 + *op++ = (m_off >> 3); 178 + } else if (m_off <= M3_MAX_OFFSET) { 179 + m_off -= 1; 180 + if (m_len <= M3_MAX_LEN) 181 + *op++ = (M3_MARKER | (m_len - 2)); 182 + else { 183 + m_len -= M3_MAX_LEN; 184 + *op++ = M3_MARKER | 0; 185 + while (unlikely(m_len > 255)) { 186 + m_len -= 255; 187 + *op++ = 0; 188 + } 189 + *op++ = (m_len); 190 + } 191 + *op++ = (m_off << 2); 192 + *op++ = (m_off >> 6); 193 + } else { 194 + m_off -= 0x4000; 195 + if (m_len <= M4_MAX_LEN) 196 + *op++ = (M4_MARKER | ((m_off >> 11) & 8) 197 + | (m_len - 2)); 198 + else { 199 + m_len -= M4_MAX_LEN; 200 + *op++ = (M4_MARKER | ((m_off >> 11) & 8)); 201 + while (unlikely(m_len > 255)) { 202 + m_len -= 255; 203 + *op++ = 0; 204 + } 205 + *op++ = (m_len); 206 + } 207 + *op++ = (m_off << 2); 208 + *op++ = (m_off >> 6); 209 + } 210 + goto next; 211 + } 172 212 *out_len = op - out; 173 - return in_end - ii; 213 + return in_end - (ii - ti); 174 214 } 175 215 176 - int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, 177 - size_t *out_len, void *wrkmem) 216 + int lzo1x_1_compress(const unsigned char *in, size_t in_len, 217 + unsigned char *out, size_t *out_len, 218 + void *wrkmem) 178 219 { 179 - const unsigned char *ii; 220 + const unsigned char *ip = in; 180 221 unsigned char *op = out; 181 - size_t t; 222 + size_t l = in_len; 223 + size_t t = 0; 182 224 183 - if (unlikely(in_len <= M2_MAX_LEN + 5)) { 184 - t = in_len; 185 - } else { 186 - t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); 225 + while (l > 20) { 226 + size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); 227 + uintptr_t ll_end = (uintptr_t) ip + ll; 228 + if ((ll_end + ((t + ll) >> 5)) <= ll_end) 229 + break; 230 + BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); 231 + memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); 232 + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); 233 + ip += ll; 187 234 op += *out_len; 235 + l -= ll; 188 236 } 237 + t += l; 189 238 190 239 if (t > 0) { 191 - ii = in + in_len - t; 240 + const unsigned char *ii = in + in_len - t; 192 241 193 242 if (op == out && t <= 238) { 194 243 *op++ = (17 + t); ··· 247 198 *op++ = (t - 3); 248 199 } else { 249 200 size_t tt = t - 18; 250 - 251 201 *op++ = 0; 252 202 while (tt > 255) { 253 203 tt -= 255; 254 204 *op++ = 0; 255 205 } 256 - 257 206 *op++ = tt; 258 207 } 259 - do { 208 + if (t >= 16) do { 209 + COPY8(op, ii); 210 + COPY8(op + 8, ii + 8); 211 + op += 16; 212 + ii += 16; 213 + t -= 16; 214 + } while (t >= 16); 215 + if (t > 0) do { 260 216 *op++ = *ii++; 261 217 } while (--t > 0); 262 218 } ··· 277 223 278 224 MODULE_LICENSE("GPL"); 279 225 MODULE_DESCRIPTION("LZO1X-1 Compressor"); 280 -
+169 -187
lib/lzo/lzo1x_decompress_safe.c
··· 1 1 /* 2 - * LZO1X Decompressor from MiniLZO 2 + * LZO1X Decompressor from LZO 3 3 * 4 - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> 4 + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> 5 5 * 6 6 * The full LZO package can be found at: 7 7 * http://www.oberhumer.com/opensource/lzo/ 8 8 * 9 - * Changed for kernel use by: 9 + * Changed for Linux kernel use by: 10 10 * Nitin Gupta <nitingupta910@gmail.com> 11 11 * Richard Purdie <rpurdie@openedhand.com> 12 12 */ ··· 15 15 #include <linux/module.h> 16 16 #include <linux/kernel.h> 17 17 #endif 18 - 19 18 #include <asm/unaligned.h> 20 19 #include <linux/lzo.h> 21 20 #include "lzodefs.h" 22 21 23 - #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) 24 - #define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) 25 - #define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) 26 - 27 - #define COPY4(dst, src) \ 28 - put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) 22 + #define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) 23 + #define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) 24 + #define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun 25 + #define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun 26 + #define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun 29 27 30 28 int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, 31 - unsigned char *out, size_t *out_len) 29 + unsigned char *out, size_t *out_len) 32 30 { 31 + unsigned char *op; 32 + const unsigned char *ip; 33 + size_t t, next; 34 + size_t state = 0; 35 + const unsigned char *m_pos; 33 36 const unsigned char * const ip_end = in + in_len; 34 37 unsigned char * const op_end = out + *out_len; 35 - const unsigned char *ip = in, *m_pos; 36 - unsigned char *op = out; 37 - size_t t; 38 38 39 - *out_len = 0; 39 + op = out; 40 + ip = in; 40 41 42 + if (unlikely(in_len < 3)) 43 + goto input_overrun; 41 44 if (*ip > 17) { 42 45 t = *ip++ - 17; 43 - if (t < 4) 46 + if (t < 4) { 47 + next = t; 44 48 goto match_next; 45 - if (HAVE_OP(t, op_end, op)) 46 - goto output_overrun; 47 - if (HAVE_IP(t + 1, ip_end, ip)) 48 - goto input_overrun; 49 - do { 50 - *op++ = *ip++; 51 - } while (--t > 0); 52 - goto first_literal_run; 49 + } 50 + goto copy_literal_run; 53 51 } 54 52 55 - while ((ip < ip_end)) { 53 + for (;;) { 56 54 t = *ip++; 57 - if (t >= 16) 58 - goto match; 59 - if (t == 0) { 60 - if (HAVE_IP(1, ip_end, ip)) 61 - goto input_overrun; 62 - while (*ip == 0) { 63 - t += 255; 64 - ip++; 65 - if (HAVE_IP(1, ip_end, ip)) 66 - goto input_overrun; 67 - } 68 - t += 15 + *ip++; 69 - } 70 - if (HAVE_OP(t + 3, op_end, op)) 71 - goto output_overrun; 72 - if (HAVE_IP(t + 4, ip_end, ip)) 73 - goto input_overrun; 74 - 75 - COPY4(op, ip); 76 - op += 4; 77 - ip += 4; 78 - if (--t > 0) { 79 - if (t >= 4) { 80 - do { 81 - COPY4(op, ip); 82 - op += 4; 83 - ip += 4; 84 - t -= 4; 85 - } while (t >= 4); 86 - if (t > 0) { 55 + if (t < 16) { 56 + if (likely(state == 0)) { 57 + if (unlikely(t == 0)) { 58 + while (unlikely(*ip == 0)) { 59 + t += 255; 60 + ip++; 61 + NEED_IP(1); 62 + } 63 + t += 15 + *ip++; 64 + } 65 + t += 3; 66 + copy_literal_run: 67 + #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 68 + if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { 69 + const unsigned char *ie = ip + t; 70 + unsigned char *oe = op + t; 71 + do { 72 + COPY8(op, ip); 73 + op += 8; 74 + ip += 8; 75 + COPY8(op, ip); 76 + op += 8; 77 + ip += 8; 78 + } while (ip < ie); 79 + ip = ie; 80 + op = oe; 81 + } else 82 + #endif 83 + { 84 + NEED_OP(t); 85 + NEED_IP(t + 3); 87 86 do { 88 87 *op++ = *ip++; 89 88 } while (--t > 0); 90 89 } 91 - } else { 92 - do { 93 - *op++ = *ip++; 94 - } while (--t > 0); 95 - } 96 - } 97 - 98 - first_literal_run: 99 - t = *ip++; 100 - if (t >= 16) 101 - goto match; 102 - m_pos = op - (1 + M2_MAX_OFFSET); 103 - m_pos -= t >> 2; 104 - m_pos -= *ip++ << 2; 105 - 106 - if (HAVE_LB(m_pos, out, op)) 107 - goto lookbehind_overrun; 108 - 109 - if (HAVE_OP(3, op_end, op)) 110 - goto output_overrun; 111 - *op++ = *m_pos++; 112 - *op++ = *m_pos++; 113 - *op++ = *m_pos; 114 - 115 - goto match_done; 116 - 117 - do { 118 - match: 119 - if (t >= 64) { 120 - m_pos = op - 1; 121 - m_pos -= (t >> 2) & 7; 122 - m_pos -= *ip++ << 3; 123 - t = (t >> 5) - 1; 124 - if (HAVE_LB(m_pos, out, op)) 125 - goto lookbehind_overrun; 126 - if (HAVE_OP(t + 3 - 1, op_end, op)) 127 - goto output_overrun; 128 - goto copy_match; 129 - } else if (t >= 32) { 130 - t &= 31; 131 - if (t == 0) { 132 - if (HAVE_IP(1, ip_end, ip)) 133 - goto input_overrun; 134 - while (*ip == 0) { 135 - t += 255; 136 - ip++; 137 - if (HAVE_IP(1, ip_end, ip)) 138 - goto input_overrun; 139 - } 140 - t += 31 + *ip++; 141 - } 142 - m_pos = op - 1; 143 - m_pos -= get_unaligned_le16(ip) >> 2; 144 - ip += 2; 145 - } else if (t >= 16) { 146 - m_pos = op; 147 - m_pos -= (t & 8) << 11; 148 - 149 - t &= 7; 150 - if (t == 0) { 151 - if (HAVE_IP(1, ip_end, ip)) 152 - goto input_overrun; 153 - while (*ip == 0) { 154 - t += 255; 155 - ip++; 156 - if (HAVE_IP(1, ip_end, ip)) 157 - goto input_overrun; 158 - } 159 - t += 7 + *ip++; 160 - } 161 - m_pos -= get_unaligned_le16(ip) >> 2; 162 - ip += 2; 163 - if (m_pos == op) 164 - goto eof_found; 165 - m_pos -= 0x4000; 166 - } else { 90 + state = 4; 91 + continue; 92 + } else if (state != 4) { 93 + next = t & 3; 167 94 m_pos = op - 1; 168 95 m_pos -= t >> 2; 169 96 m_pos -= *ip++ << 2; 170 - 171 - if (HAVE_LB(m_pos, out, op)) 172 - goto lookbehind_overrun; 173 - if (HAVE_OP(2, op_end, op)) 174 - goto output_overrun; 175 - 176 - *op++ = *m_pos++; 177 - *op++ = *m_pos; 178 - goto match_done; 179 - } 180 - 181 - if (HAVE_LB(m_pos, out, op)) 182 - goto lookbehind_overrun; 183 - if (HAVE_OP(t + 3 - 1, op_end, op)) 184 - goto output_overrun; 185 - 186 - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { 187 - COPY4(op, m_pos); 188 - op += 4; 189 - m_pos += 4; 190 - t -= 4 - (3 - 1); 191 - do { 192 - COPY4(op, m_pos); 193 - op += 4; 194 - m_pos += 4; 195 - t -= 4; 196 - } while (t >= 4); 197 - if (t > 0) 198 - do { 199 - *op++ = *m_pos++; 200 - } while (--t > 0); 97 + TEST_LB(m_pos); 98 + NEED_OP(2); 99 + op[0] = m_pos[0]; 100 + op[1] = m_pos[1]; 101 + op += 2; 102 + goto match_next; 201 103 } else { 202 - copy_match: 203 - *op++ = *m_pos++; 204 - *op++ = *m_pos++; 104 + next = t & 3; 105 + m_pos = op - (1 + M2_MAX_OFFSET); 106 + m_pos -= t >> 2; 107 + m_pos -= *ip++ << 2; 108 + t = 3; 109 + } 110 + } else if (t >= 64) { 111 + next = t & 3; 112 + m_pos = op - 1; 113 + m_pos -= (t >> 2) & 7; 114 + m_pos -= *ip++ << 3; 115 + t = (t >> 5) - 1 + (3 - 1); 116 + } else if (t >= 32) { 117 + t = (t & 31) + (3 - 1); 118 + if (unlikely(t == 2)) { 119 + while (unlikely(*ip == 0)) { 120 + t += 255; 121 + ip++; 122 + NEED_IP(1); 123 + } 124 + t += 31 + *ip++; 125 + NEED_IP(2); 126 + } 127 + m_pos = op - 1; 128 + next = get_unaligned_le16(ip); 129 + ip += 2; 130 + m_pos -= next >> 2; 131 + next &= 3; 132 + } else { 133 + m_pos = op; 134 + m_pos -= (t & 8) << 11; 135 + t = (t & 7) + (3 - 1); 136 + if (unlikely(t == 2)) { 137 + while (unlikely(*ip == 0)) { 138 + t += 255; 139 + ip++; 140 + NEED_IP(1); 141 + } 142 + t += 7 + *ip++; 143 + NEED_IP(2); 144 + } 145 + next = get_unaligned_le16(ip); 146 + ip += 2; 147 + m_pos -= next >> 2; 148 + next &= 3; 149 + if (m_pos == op) 150 + goto eof_found; 151 + m_pos -= 0x4000; 152 + } 153 + TEST_LB(m_pos); 154 + #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 155 + if (op - m_pos >= 8) { 156 + unsigned char *oe = op + t; 157 + if (likely(HAVE_OP(t + 15))) { 158 + do { 159 + COPY8(op, m_pos); 160 + op += 8; 161 + m_pos += 8; 162 + COPY8(op, m_pos); 163 + op += 8; 164 + m_pos += 8; 165 + } while (op < oe); 166 + op = oe; 167 + if (HAVE_IP(6)) { 168 + state = next; 169 + COPY4(op, ip); 170 + op += next; 171 + ip += next; 172 + continue; 173 + } 174 + } else { 175 + NEED_OP(t); 205 176 do { 206 177 *op++ = *m_pos++; 207 - } while (--t > 0); 178 + } while (op < oe); 208 179 } 209 - match_done: 210 - t = ip[-2] & 3; 211 - if (t == 0) 212 - break; 180 + } else 181 + #endif 182 + { 183 + unsigned char *oe = op + t; 184 + NEED_OP(t); 185 + op[0] = m_pos[0]; 186 + op[1] = m_pos[1]; 187 + op += 2; 188 + m_pos += 2; 189 + do { 190 + *op++ = *m_pos++; 191 + } while (op < oe); 192 + } 213 193 match_next: 214 - if (HAVE_OP(t, op_end, op)) 215 - goto output_overrun; 216 - if (HAVE_IP(t + 1, ip_end, ip)) 217 - goto input_overrun; 218 - 219 - *op++ = *ip++; 220 - if (t > 1) { 194 + state = next; 195 + t = next; 196 + #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 197 + if (likely(HAVE_IP(6) && HAVE_OP(4))) { 198 + COPY4(op, ip); 199 + op += t; 200 + ip += t; 201 + } else 202 + #endif 203 + { 204 + NEED_IP(t + 3); 205 + NEED_OP(t); 206 + while (t > 0) { 221 207 *op++ = *ip++; 222 - if (t > 2) 223 - *op++ = *ip++; 208 + t--; 224 209 } 225 - 226 - t = *ip++; 227 - } while (ip < ip_end); 210 + } 228 211 } 229 - 230 - *out_len = op - out; 231 - return LZO_E_EOF_NOT_FOUND; 232 212 233 213 eof_found: 234 214 *out_len = op - out; 235 - return (ip == ip_end ? LZO_E_OK : 236 - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); 215 + return (t != 3 ? LZO_E_ERROR : 216 + ip == ip_end ? LZO_E_OK : 217 + ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN); 218 + 237 219 input_overrun: 238 220 *out_len = op - out; 239 221 return LZO_E_INPUT_OVERRUN;
+27 -11
lib/lzo/lzodefs.h
··· 1 1 /* 2 2 * lzodefs.h -- architecture, OS and compiler specific defines 3 3 * 4 - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> 4 + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> 5 5 * 6 6 * The full LZO package can be found at: 7 7 * http://www.oberhumer.com/opensource/lzo/ 8 8 * 9 - * Changed for kernel use by: 9 + * Changed for Linux kernel use by: 10 10 * Nitin Gupta <nitingupta910@gmail.com> 11 11 * Richard Purdie <rpurdie@openedhand.com> 12 12 */ 13 13 14 - #define LZO_VERSION 0x2020 15 - #define LZO_VERSION_STRING "2.02" 16 - #define LZO_VERSION_DATE "Oct 17 2005" 14 + 15 + #define COPY4(dst, src) \ 16 + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) 17 + #if defined(__x86_64__) 18 + #define COPY8(dst, src) \ 19 + put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) 20 + #else 21 + #define COPY8(dst, src) \ 22 + COPY4(dst, src); COPY4((dst) + 4, (src) + 4) 23 + #endif 24 + 25 + #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) 26 + #error "conflicting endian definitions" 27 + #elif defined(__x86_64__) 28 + #define LZO_USE_CTZ64 1 29 + #define LZO_USE_CTZ32 1 30 + #elif defined(__i386__) || defined(__powerpc__) 31 + #define LZO_USE_CTZ32 1 32 + #elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5) 33 + #define LZO_USE_CTZ32 1 34 + #endif 17 35 18 36 #define M1_MAX_OFFSET 0x0400 19 37 #define M2_MAX_OFFSET 0x0800 ··· 52 34 #define M3_MARKER 32 53 35 #define M4_MARKER 16 54 36 55 - #define D_BITS 14 56 - #define D_MASK ((1u << D_BITS) - 1) 37 + #define lzo_dict_t unsigned short 38 + #define D_BITS 13 39 + #define D_SIZE (1u << D_BITS) 40 + #define D_MASK (D_SIZE - 1) 57 41 #define D_HIGH ((D_MASK >> 1) + 1) 58 - 59 - #define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \ 60 - << (s1)) ^ (p)[0]) 61 - #define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0])