crypto/gf128mul.c at v2.6.22-rc5

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / crypto / gf128mul.c
at v2.6.22-rc5 466 lines 13 kB view raw
wrap content
  1/* gf128mul.c - GF(2^128) multiplication functions
  2 *
  3 * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
  4 * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
  5 *
  6 * Based on Dr Brian Gladman's (GPL'd) work published at
  7 * http://fp.gladman.plus.com/cryptography_technology/index.htm
  8 * See the original copyright notice below.
  9 *
 10 * This program is free software; you can redistribute it and/or modify it
 11 * under the terms of the GNU General Public License as published by the Free
 12 * Software Foundation; either version 2 of the License, or (at your option)
 13 * any later version.
 14 */
 15
 16/*
 17 ---------------------------------------------------------------------------
 18 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
 19
 20 LICENSE TERMS
 21
 22 The free distribution and use of this software in both source and binary
 23 form is allowed (with or without changes) provided that:
 24
 25   1. distributions of this source code include the above copyright
 26      notice, this list of conditions and the following disclaimer;
 27
 28   2. distributions in binary form include the above copyright
 29      notice, this list of conditions and the following disclaimer
 30      in the documentation and/or other associated materials;
 31
 32   3. the copyright holder's name is not used to endorse products
 33      built using this software without specific written permission.
 34
 35 ALTERNATIVELY, provided that this notice is retained in full, this product
 36 may be distributed under the terms of the GNU General Public License (GPL),
 37 in which case the provisions of the GPL apply INSTEAD OF those given above.
 38
 39 DISCLAIMER
 40
 41 This software is provided 'as is' with no explicit or implied warranties
 42 in respect of its properties, including, but not limited to, correctness
 43 and/or fitness for purpose.
 44 ---------------------------------------------------------------------------
 45 Issue 31/01/2006
 46
 47 This file provides fast multiplication in GF(128) as required by several
 48 cryptographic authentication modes
 49*/
 50
 51#include <crypto/gf128mul.h>
 52#include <linux/kernel.h>
 53#include <linux/module.h>
 54#include <linux/slab.h>
 55
 56#define gf128mul_dat(q) { \
 57	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
 58	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
 59	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
 60	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
 61	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
 62	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
 63	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
 64	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
 65	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
 66	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
 67	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
 68	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
 69	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
 70	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
 71	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
 72	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
 73	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
 74	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
 75	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
 76	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
 77	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
 78	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
 79	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
 80	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
 81	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
 82	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
 83	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
 84	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
 85	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
 86	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
 87	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
 88	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
 89}
 90
 91/*	Given the value i in 0..255 as the byte overflow when a field element
 92    in GHASH is multipled by x^8, this function will return the values that
 93    are generated in the lo 16-bit word of the field value by applying the
 94    modular polynomial. The values lo_byte and hi_byte are returned via the
 95    macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
 96    memory as required by a suitable definition of this macro operating on
 97    the table above
 98*/
 99
100#define xx(p, q)	0x##p##q
101
102#define xda_bbe(i) ( \
103	(i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
104	(i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
105	(i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
106	(i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
107)
108
109#define xda_lle(i) ( \
110	(i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
111	(i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
112	(i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
113	(i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
114)
115
116static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
117static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
118
119/* These functions multiply a field element by x, by x^4 and by x^8
120 * in the polynomial field representation. It uses 32-bit word operations
121 * to gain speed but compensates for machine endianess and hence works
122 * correctly on both styles of machine.
123 */
124
125static void gf128mul_x_lle(be128 *r, const be128 *x)
126{
127	u64 a = be64_to_cpu(x->a);
128	u64 b = be64_to_cpu(x->b);
129	u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
130
131	r->b = cpu_to_be64((b >> 1) | (a << 63));
132	r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
133}
134
135static void gf128mul_x_bbe(be128 *r, const be128 *x)
136{
137	u64 a = be64_to_cpu(x->a);
138	u64 b = be64_to_cpu(x->b);
139	u64 _tt = gf128mul_table_bbe[a >> 63];
140
141	r->a = cpu_to_be64((a << 1) | (b >> 63));
142	r->b = cpu_to_be64((b << 1) ^ _tt);
143}
144
145static void gf128mul_x8_lle(be128 *x)
146{
147	u64 a = be64_to_cpu(x->a);
148	u64 b = be64_to_cpu(x->b);
149	u64 _tt = gf128mul_table_lle[b & 0xff];
150
151	x->b = cpu_to_be64((b >> 8) | (a << 56));
152	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
153}
154
155static void gf128mul_x8_bbe(be128 *x)
156{
157	u64 a = be64_to_cpu(x->a);
158	u64 b = be64_to_cpu(x->b);
159	u64 _tt = gf128mul_table_bbe[a >> 56];
160
161	x->a = cpu_to_be64((a << 8) | (b >> 56));
162	x->b = cpu_to_be64((b << 8) ^ _tt);
163}
164
165void gf128mul_lle(be128 *r, const be128 *b)
166{
167	be128 p[8];
168	int i;
169
170	p[0] = *r;
171	for (i = 0; i < 7; ++i)
172		gf128mul_x_lle(&p[i + 1], &p[i]);
173
174	memset(r, 0, sizeof(r));
175	for (i = 0;;) {
176		u8 ch = ((u8 *)b)[15 - i];
177
178		if (ch & 0x80)
179			be128_xor(r, r, &p[0]);
180		if (ch & 0x40)
181			be128_xor(r, r, &p[1]);
182		if (ch & 0x20)
183			be128_xor(r, r, &p[2]);
184		if (ch & 0x10)
185			be128_xor(r, r, &p[3]);
186		if (ch & 0x08)
187			be128_xor(r, r, &p[4]);
188		if (ch & 0x04)
189			be128_xor(r, r, &p[5]);
190		if (ch & 0x02)
191			be128_xor(r, r, &p[6]);
192		if (ch & 0x01)
193			be128_xor(r, r, &p[7]);
194
195		if (++i >= 16)
196			break;
197
198		gf128mul_x8_lle(r);
199	}
200}
201EXPORT_SYMBOL(gf128mul_lle);
202
203void gf128mul_bbe(be128 *r, const be128 *b)
204{
205	be128 p[8];
206	int i;
207
208	p[0] = *r;
209	for (i = 0; i < 7; ++i)
210		gf128mul_x_bbe(&p[i + 1], &p[i]);
211
212	memset(r, 0, sizeof(r));
213	for (i = 0;;) {
214		u8 ch = ((u8 *)b)[i];
215
216		if (ch & 0x80)
217			be128_xor(r, r, &p[7]);
218		if (ch & 0x40)
219			be128_xor(r, r, &p[6]);
220		if (ch & 0x20)
221			be128_xor(r, r, &p[5]);
222		if (ch & 0x10)
223			be128_xor(r, r, &p[4]);
224		if (ch & 0x08)
225			be128_xor(r, r, &p[3]);
226		if (ch & 0x04)
227			be128_xor(r, r, &p[2]);
228		if (ch & 0x02)
229			be128_xor(r, r, &p[1]);
230		if (ch & 0x01)
231			be128_xor(r, r, &p[0]);
232
233		if (++i >= 16)
234			break;
235
236		gf128mul_x8_bbe(r);
237	}
238}
239EXPORT_SYMBOL(gf128mul_bbe);
240
241/*      This version uses 64k bytes of table space.
242    A 16 byte buffer has to be multiplied by a 16 byte key
243    value in GF(128).  If we consider a GF(128) value in
244    the buffer's lowest byte, we can construct a table of
245    the 256 16 byte values that result from the 256 values
246    of this byte.  This requires 4096 bytes. But we also
247    need tables for each of the 16 higher bytes in the
248    buffer as well, which makes 64 kbytes in total.
249*/
250/* additional explanation
251 * t[0][BYTE] contains g*BYTE
252 * t[1][BYTE] contains g*x^8*BYTE
253 *  ..
254 * t[15][BYTE] contains g*x^120*BYTE */
255struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g)
256{
257	struct gf128mul_64k *t;
258	int i, j, k;
259
260	t = kzalloc(sizeof(*t), GFP_KERNEL);
261	if (!t)
262		goto out;
263
264	for (i = 0; i < 16; i++) {
265		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
266		if (!t->t[i]) {
267			gf128mul_free_64k(t);
268			t = NULL;
269			goto out;
270		}
271	}
272
273	t->t[0]->t[128] = *g;
274	for (j = 64; j > 0; j >>= 1)
275		gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]);
276
277	for (i = 0;;) {
278		for (j = 2; j < 256; j += j)
279			for (k = 1; k < j; ++k)
280				be128_xor(&t->t[i]->t[j + k],
281					  &t->t[i]->t[j], &t->t[i]->t[k]);
282
283		if (++i >= 16)
284			break;
285
286		for (j = 128; j > 0; j >>= 1) {
287			t->t[i]->t[j] = t->t[i - 1]->t[j];
288			gf128mul_x8_lle(&t->t[i]->t[j]);
289		}
290	}
291
292out:
293	return t;
294}
295EXPORT_SYMBOL(gf128mul_init_64k_lle);
296
297struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
298{
299	struct gf128mul_64k *t;
300	int i, j, k;
301
302	t = kzalloc(sizeof(*t), GFP_KERNEL);
303	if (!t)
304		goto out;
305
306	for (i = 0; i < 16; i++) {
307		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
308		if (!t->t[i]) {
309			gf128mul_free_64k(t);
310			t = NULL;
311			goto out;
312		}
313	}
314
315	t->t[0]->t[1] = *g;
316	for (j = 1; j <= 64; j <<= 1)
317		gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
318
319	for (i = 0;;) {
320		for (j = 2; j < 256; j += j)
321			for (k = 1; k < j; ++k)
322				be128_xor(&t->t[i]->t[j + k],
323					  &t->t[i]->t[j], &t->t[i]->t[k]);
324
325		if (++i >= 16)
326			break;
327
328		for (j = 128; j > 0; j >>= 1) {
329			t->t[i]->t[j] = t->t[i - 1]->t[j];
330			gf128mul_x8_bbe(&t->t[i]->t[j]);
331		}
332	}
333
334out:
335	return t;
336}
337EXPORT_SYMBOL(gf128mul_init_64k_bbe);
338
339void gf128mul_free_64k(struct gf128mul_64k *t)
340{
341	int i;
342
343	for (i = 0; i < 16; i++)
344		kfree(t->t[i]);
345	kfree(t);
346}
347EXPORT_SYMBOL(gf128mul_free_64k);
348
349void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t)
350{
351	u8 *ap = (u8 *)a;
352	be128 r[1];
353	int i;
354
355	*r = t->t[0]->t[ap[0]];
356	for (i = 1; i < 16; ++i)
357		be128_xor(r, r, &t->t[i]->t[ap[i]]);
358	*a = *r;
359}
360EXPORT_SYMBOL(gf128mul_64k_lle);
361
362void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
363{
364	u8 *ap = (u8 *)a;
365	be128 r[1];
366	int i;
367
368	*r = t->t[0]->t[ap[15]];
369	for (i = 1; i < 16; ++i)
370		be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
371	*a = *r;
372}
373EXPORT_SYMBOL(gf128mul_64k_bbe);
374
375/*      This version uses 4k bytes of table space.
376    A 16 byte buffer has to be multiplied by a 16 byte key
377    value in GF(128).  If we consider a GF(128) value in a
378    single byte, we can construct a table of the 256 16 byte
379    values that result from the 256 values of this byte.
380    This requires 4096 bytes. If we take the highest byte in
381    the buffer and use this table to get the result, we then
382    have to multiply by x^120 to get the final value. For the
383    next highest byte the result has to be multiplied by x^112
384    and so on. But we can do this by accumulating the result
385    in an accumulator starting with the result for the top
386    byte.  We repeatedly multiply the accumulator value by
387    x^8 and then add in (i.e. xor) the 16 bytes of the next
388    lower byte in the buffer, stopping when we reach the
389    lowest byte. This requires a 4096 byte table.
390*/
391struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
392{
393	struct gf128mul_4k *t;
394	int j, k;
395
396	t = kzalloc(sizeof(*t), GFP_KERNEL);
397	if (!t)
398		goto out;
399
400	t->t[128] = *g;
401	for (j = 64; j > 0; j >>= 1)
402		gf128mul_x_lle(&t->t[j], &t->t[j+j]);
403
404	for (j = 2; j < 256; j += j)
405		for (k = 1; k < j; ++k)
406			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
407
408out:
409	return t;
410}
411EXPORT_SYMBOL(gf128mul_init_4k_lle);
412
413struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
414{
415	struct gf128mul_4k *t;
416	int j, k;
417
418	t = kzalloc(sizeof(*t), GFP_KERNEL);
419	if (!t)
420		goto out;
421
422	t->t[1] = *g;
423	for (j = 1; j <= 64; j <<= 1)
424		gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
425
426	for (j = 2; j < 256; j += j)
427		for (k = 1; k < j; ++k)
428			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
429
430out:
431	return t;
432}
433EXPORT_SYMBOL(gf128mul_init_4k_bbe);
434
435void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
436{
437	u8 *ap = (u8 *)a;
438	be128 r[1];
439	int i = 15;
440
441	*r = t->t[ap[15]];
442	while (i--) {
443		gf128mul_x8_lle(r);
444		be128_xor(r, r, &t->t[ap[i]]);
445	}
446	*a = *r;
447}
448EXPORT_SYMBOL(gf128mul_4k_lle);
449
450void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
451{
452	u8 *ap = (u8 *)a;
453	be128 r[1];
454	int i = 0;
455
456	*r = t->t[ap[0]];
457	while (++i < 16) {
458		gf128mul_x8_bbe(r);
459		be128_xor(r, r, &t->t[ap[i]]);
460	}
461	*a = *r;
462}
463EXPORT_SYMBOL(gf128mul_4k_bbe);
464
465MODULE_LICENSE("GPL");
466MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
Configure Feed

Configure Feed