crypto/gf128mul.c at v4.14-rc1

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / crypto / gf128mul.c
at v4.14-rc1 405 lines 12 kB view raw
wrap content
  1/* gf128mul.c - GF(2^128) multiplication functions
  2 *
  3 * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
  4 * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
  5 *
  6 * Based on Dr Brian Gladman's (GPL'd) work published at
  7 * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
  8 * See the original copyright notice below.
  9 *
 10 * This program is free software; you can redistribute it and/or modify it
 11 * under the terms of the GNU General Public License as published by the Free
 12 * Software Foundation; either version 2 of the License, or (at your option)
 13 * any later version.
 14 */
 15
 16/*
 17 ---------------------------------------------------------------------------
 18 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
 19
 20 LICENSE TERMS
 21
 22 The free distribution and use of this software in both source and binary
 23 form is allowed (with or without changes) provided that:
 24
 25   1. distributions of this source code include the above copyright
 26      notice, this list of conditions and the following disclaimer;
 27
 28   2. distributions in binary form include the above copyright
 29      notice, this list of conditions and the following disclaimer
 30      in the documentation and/or other associated materials;
 31
 32   3. the copyright holder's name is not used to endorse products
 33      built using this software without specific written permission.
 34
 35 ALTERNATIVELY, provided that this notice is retained in full, this product
 36 may be distributed under the terms of the GNU General Public License (GPL),
 37 in which case the provisions of the GPL apply INSTEAD OF those given above.
 38
 39 DISCLAIMER
 40
 41 This software is provided 'as is' with no explicit or implied warranties
 42 in respect of its properties, including, but not limited to, correctness
 43 and/or fitness for purpose.
 44 ---------------------------------------------------------------------------
 45 Issue 31/01/2006
 46
 47 This file provides fast multiplication in GF(2^128) as required by several
 48 cryptographic authentication modes
 49*/
 50
 51#include <crypto/gf128mul.h>
 52#include <linux/kernel.h>
 53#include <linux/module.h>
 54#include <linux/slab.h>
 55
 56#define gf128mul_dat(q) { \
 57	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
 58	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
 59	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
 60	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
 61	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
 62	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
 63	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
 64	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
 65	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
 66	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
 67	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
 68	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
 69	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
 70	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
 71	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
 72	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
 73	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
 74	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
 75	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
 76	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
 77	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
 78	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
 79	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
 80	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
 81	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
 82	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
 83	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
 84	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
 85	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
 86	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
 87	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
 88	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
 89}
 90
 91/*
 92 * Given a value i in 0..255 as the byte overflow when a field element
 93 * in GF(2^128) is multiplied by x^8, the following macro returns the
 94 * 16-bit value that must be XOR-ed into the low-degree end of the
 95 * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
 96 *
 97 * There are two versions of the macro, and hence two tables: one for
 98 * the "be" convention where the highest-order bit is the coefficient of
 99 * the highest-degree polynomial term, and one for the "le" convention
100 * where the highest-order bit is the coefficient of the lowest-degree
101 * polynomial term.  In both cases the values are stored in CPU byte
102 * endianness such that the coefficients are ordered consistently across
103 * bytes, i.e. in the "be" table bits 15..0 of the stored value
104 * correspond to the coefficients of x^15..x^0, and in the "le" table
105 * bits 15..0 correspond to the coefficients of x^0..x^15.
106 *
107 * Therefore, provided that the appropriate byte endianness conversions
108 * are done by the multiplication functions (and these must be in place
109 * anyway to support both little endian and big endian CPUs), the "be"
110 * table can be used for multiplications of both "bbe" and "ble"
111 * elements, and the "le" table can be used for multiplications of both
112 * "lle" and "lbe" elements.
113 */
114
115#define xda_be(i) ( \
116	(i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
117	(i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
118	(i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
119	(i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
120)
121
122#define xda_le(i) ( \
123	(i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
124	(i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
125	(i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
126	(i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
127)
128
129static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
130static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
131
132/*
133 * The following functions multiply a field element by x^8 in
134 * the polynomial field representation.  They use 64-bit word operations
135 * to gain speed but compensate for machine endianness and hence work
136 * correctly on both styles of machine.
137 */
138
139static void gf128mul_x8_lle(be128 *x)
140{
141	u64 a = be64_to_cpu(x->a);
142	u64 b = be64_to_cpu(x->b);
143	u64 _tt = gf128mul_table_le[b & 0xff];
144
145	x->b = cpu_to_be64((b >> 8) | (a << 56));
146	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
147}
148
149static void gf128mul_x8_bbe(be128 *x)
150{
151	u64 a = be64_to_cpu(x->a);
152	u64 b = be64_to_cpu(x->b);
153	u64 _tt = gf128mul_table_be[a >> 56];
154
155	x->a = cpu_to_be64((a << 8) | (b >> 56));
156	x->b = cpu_to_be64((b << 8) ^ _tt);
157}
158
159void gf128mul_lle(be128 *r, const be128 *b)
160{
161	be128 p[8];
162	int i;
163
164	p[0] = *r;
165	for (i = 0; i < 7; ++i)
166		gf128mul_x_lle(&p[i + 1], &p[i]);
167
168	memset(r, 0, sizeof(*r));
169	for (i = 0;;) {
170		u8 ch = ((u8 *)b)[15 - i];
171
172		if (ch & 0x80)
173			be128_xor(r, r, &p[0]);
174		if (ch & 0x40)
175			be128_xor(r, r, &p[1]);
176		if (ch & 0x20)
177			be128_xor(r, r, &p[2]);
178		if (ch & 0x10)
179			be128_xor(r, r, &p[3]);
180		if (ch & 0x08)
181			be128_xor(r, r, &p[4]);
182		if (ch & 0x04)
183			be128_xor(r, r, &p[5]);
184		if (ch & 0x02)
185			be128_xor(r, r, &p[6]);
186		if (ch & 0x01)
187			be128_xor(r, r, &p[7]);
188
189		if (++i >= 16)
190			break;
191
192		gf128mul_x8_lle(r);
193	}
194}
195EXPORT_SYMBOL(gf128mul_lle);
196
197void gf128mul_bbe(be128 *r, const be128 *b)
198{
199	be128 p[8];
200	int i;
201
202	p[0] = *r;
203	for (i = 0; i < 7; ++i)
204		gf128mul_x_bbe(&p[i + 1], &p[i]);
205
206	memset(r, 0, sizeof(*r));
207	for (i = 0;;) {
208		u8 ch = ((u8 *)b)[i];
209
210		if (ch & 0x80)
211			be128_xor(r, r, &p[7]);
212		if (ch & 0x40)
213			be128_xor(r, r, &p[6]);
214		if (ch & 0x20)
215			be128_xor(r, r, &p[5]);
216		if (ch & 0x10)
217			be128_xor(r, r, &p[4]);
218		if (ch & 0x08)
219			be128_xor(r, r, &p[3]);
220		if (ch & 0x04)
221			be128_xor(r, r, &p[2]);
222		if (ch & 0x02)
223			be128_xor(r, r, &p[1]);
224		if (ch & 0x01)
225			be128_xor(r, r, &p[0]);
226
227		if (++i >= 16)
228			break;
229
230		gf128mul_x8_bbe(r);
231	}
232}
233EXPORT_SYMBOL(gf128mul_bbe);
234
235/*      This version uses 64k bytes of table space.
236    A 16 byte buffer has to be multiplied by a 16 byte key
237    value in GF(2^128).  If we consider a GF(2^128) value in
238    the buffer's lowest byte, we can construct a table of
239    the 256 16 byte values that result from the 256 values
240    of this byte.  This requires 4096 bytes. But we also
241    need tables for each of the 16 higher bytes in the
242    buffer as well, which makes 64 kbytes in total.
243*/
244/* additional explanation
245 * t[0][BYTE] contains g*BYTE
246 * t[1][BYTE] contains g*x^8*BYTE
247 *  ..
248 * t[15][BYTE] contains g*x^120*BYTE */
249struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
250{
251	struct gf128mul_64k *t;
252	int i, j, k;
253
254	t = kzalloc(sizeof(*t), GFP_KERNEL);
255	if (!t)
256		goto out;
257
258	for (i = 0; i < 16; i++) {
259		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
260		if (!t->t[i]) {
261			gf128mul_free_64k(t);
262			t = NULL;
263			goto out;
264		}
265	}
266
267	t->t[0]->t[1] = *g;
268	for (j = 1; j <= 64; j <<= 1)
269		gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
270
271	for (i = 0;;) {
272		for (j = 2; j < 256; j += j)
273			for (k = 1; k < j; ++k)
274				be128_xor(&t->t[i]->t[j + k],
275					  &t->t[i]->t[j], &t->t[i]->t[k]);
276
277		if (++i >= 16)
278			break;
279
280		for (j = 128; j > 0; j >>= 1) {
281			t->t[i]->t[j] = t->t[i - 1]->t[j];
282			gf128mul_x8_bbe(&t->t[i]->t[j]);
283		}
284	}
285
286out:
287	return t;
288}
289EXPORT_SYMBOL(gf128mul_init_64k_bbe);
290
291void gf128mul_free_64k(struct gf128mul_64k *t)
292{
293	int i;
294
295	for (i = 0; i < 16; i++)
296		kzfree(t->t[i]);
297	kzfree(t);
298}
299EXPORT_SYMBOL(gf128mul_free_64k);
300
301void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t)
302{
303	u8 *ap = (u8 *)a;
304	be128 r[1];
305	int i;
306
307	*r = t->t[0]->t[ap[15]];
308	for (i = 1; i < 16; ++i)
309		be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
310	*a = *r;
311}
312EXPORT_SYMBOL(gf128mul_64k_bbe);
313
314/*      This version uses 4k bytes of table space.
315    A 16 byte buffer has to be multiplied by a 16 byte key
316    value in GF(2^128).  If we consider a GF(2^128) value in a
317    single byte, we can construct a table of the 256 16 byte
318    values that result from the 256 values of this byte.
319    This requires 4096 bytes. If we take the highest byte in
320    the buffer and use this table to get the result, we then
321    have to multiply by x^120 to get the final value. For the
322    next highest byte the result has to be multiplied by x^112
323    and so on. But we can do this by accumulating the result
324    in an accumulator starting with the result for the top
325    byte.  We repeatedly multiply the accumulator value by
326    x^8 and then add in (i.e. xor) the 16 bytes of the next
327    lower byte in the buffer, stopping when we reach the
328    lowest byte. This requires a 4096 byte table.
329*/
330struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
331{
332	struct gf128mul_4k *t;
333	int j, k;
334
335	t = kzalloc(sizeof(*t), GFP_KERNEL);
336	if (!t)
337		goto out;
338
339	t->t[128] = *g;
340	for (j = 64; j > 0; j >>= 1)
341		gf128mul_x_lle(&t->t[j], &t->t[j+j]);
342
343	for (j = 2; j < 256; j += j)
344		for (k = 1; k < j; ++k)
345			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
346
347out:
348	return t;
349}
350EXPORT_SYMBOL(gf128mul_init_4k_lle);
351
352struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
353{
354	struct gf128mul_4k *t;
355	int j, k;
356
357	t = kzalloc(sizeof(*t), GFP_KERNEL);
358	if (!t)
359		goto out;
360
361	t->t[1] = *g;
362	for (j = 1; j <= 64; j <<= 1)
363		gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
364
365	for (j = 2; j < 256; j += j)
366		for (k = 1; k < j; ++k)
367			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
368
369out:
370	return t;
371}
372EXPORT_SYMBOL(gf128mul_init_4k_bbe);
373
374void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
375{
376	u8 *ap = (u8 *)a;
377	be128 r[1];
378	int i = 15;
379
380	*r = t->t[ap[15]];
381	while (i--) {
382		gf128mul_x8_lle(r);
383		be128_xor(r, r, &t->t[ap[i]]);
384	}
385	*a = *r;
386}
387EXPORT_SYMBOL(gf128mul_4k_lle);
388
389void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
390{
391	u8 *ap = (u8 *)a;
392	be128 r[1];
393	int i = 0;
394
395	*r = t->t[ap[0]];
396	while (++i < 16) {
397		gf128mul_x8_bbe(r);
398		be128_xor(r, r, &t->t[ap[i]]);
399	}
400	*a = *r;
401}
402EXPORT_SYMBOL(gf128mul_4k_bbe);
403
404MODULE_LICENSE("GPL");
405MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
Configure Feed

Configure Feed