about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rw-r--r--Makefile10
-rw-r--r--README27
m---------chacha-opt0
-rw-r--r--rdd.127
-rw-r--r--rdd.c28
-rw-r--r--sosemanuk.c1283
-rw-r--r--sosemanuk.h176
8 files changed, 41 insertions, 1513 deletions
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..bb70641
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "chacha-opt"]
+	path = chacha-opt
+	url = https://github.com/floodyberry/chacha-opt
diff --git a/Makefile b/Makefile
index ea91cfa..f172b30 100644
--- a/Makefile
+++ b/Makefile
@@ -1,19 +1,23 @@
 ALL=rdd
 
 CFLAGS=-g -O3 -Wall -Wno-switch -Wextra -Wwrite-strings
+CPPFLAGS=-I./chacha-opt/app/include
 
 DESTDIR=
 PREFIX=/usr/local
 BINDIR=$(PREFIX)/bin
 MANDIR=$(PREFIX)/share/man
 
-all: $(ALL)
+all: $(ALL) chacha-opt/bin/chacha.lib
 
 clean: FRC
 	rm -f $(ALL) *.o
+	make -C chacha-opt clean
 
-rdd: sosemanuk.o rdd.o
-rdd.c: sosemanuk.h
+rdd: rdd.o chacha-opt/bin/chacha.lib
+
+chacha-opt/bin/chacha.lib:
+	cd chacha-opt && ./configure && make
 
 install: FRC all
 	mkdir -p $(DESTDIR)$(BINDIR) $(DESTDIR)$(MANDIR)/man1 $(DESTDIR)$(ZSHCOMPDIR)
diff --git a/README b/README
index d18358f..d91c6c8 100644
--- a/README
+++ b/README
@@ -8,10 +8,10 @@ SYNOPSIS
 
 DESCRIPTION
      rdd is a high-speed, cryptographically safe random data generator using
-     the reference implementation of the eSTREAM Profile 1 SOSEMANUK stream
-     cipher.  It is designed to fill crypto devices with random data.
+     optimized versions of the ChaCha8 stream cipher, depending on your CPU.
+     It is designed to fill crypto devices with random data.
 
-     Random binary data is outputted to standard output.
+     Random binary data is written to standard output.
 
      The options are as follows:
 
@@ -30,11 +30,11 @@ SEE ALSO
      dd(1), random(4), random(7)
 
 NOTES
-     rdd reads 32 random bytes for the initial key and 16 bytes for every
+     rdd reads 40 random bytes for the initial key and 32 bytes for every
      rekeying from the random source.  Setting rekeymb to -1 never rekeys.
 
      This implementation passed dieharder 3.31.1 "-a" and PractRand/RNG_test
-     0.93 (tested up to 32 TB).
+     0.95 (tested up to 32 TB).
 
 AUTHORS
      Leah Neukirchen <leah@vuxu.org>
@@ -45,19 +45,8 @@ LICENSE
      To the extent possible under law, the creator of this work has waived all
      copyright and related or neighboring rights to this work.
 
-           http://creativecommons.org/publicdomain/zero/1.0/
+     http://creativecommons.org/publicdomain/zero/1.0/
 
-     This program includes code taken from
-           http://www.ecrypt.eu.org/stream/p3ciphers/sosemanuk/sosemanuk_p3source.zip
-     as of 2013-04-13.  An error message which can never occur in rdd usage
-     has been #ifdef'ed out.
+     This program uses the public domain library chacha-opt.
 
-     © 2005 X-CRYPT project.  This software is provided 'as-is', without any
-     express or implied warranty.  In no event will the authors be held liable
-     for any damages arising from the use of this software.
-
-     Permission is granted to anyone to use this software for any purpose,
-     including commercial applications, and to alter it and redistribute it
-     freely, subject to no restriction.
-
-Void Linux                      January 3, 2018                     Void Linux
+Void Linux                      April 19, 2020                      Void Linux
diff --git a/chacha-opt b/chacha-opt
new file mode 160000
+Subproject d69e5acbc632ded477fa7056564c8a830c457de
diff --git a/rdd.1 b/rdd.1
index a9b7364..d5012ab 100644
--- a/rdd.1
+++ b/rdd.1
@@ -1,4 +1,4 @@
-.Dd January 3, 2018
+.Dd April 19, 2020
 .Dt RDD 1
 .Os
 .Sh NAME
@@ -12,11 +12,11 @@
 .Sh DESCRIPTION
 .Nm
 is a high-speed, cryptographically safe random data generator
-using the reference implementation of the eSTREAM Profile 1 SOSEMANUK
-stream cipher.
+using optimized versions of the ChaCha8 stream cipher,
+depending on your CPU.
 It is designed to fill crypto devices with random data.
 .Pp
-Random binary data is outputted to standard output.
+Random binary data is written to standard output.
 .Pp
 The options are as follows:
 .Bl -tag -width Ds
@@ -41,14 +41,14 @@ to standard error on every rekey.
 .Xr random 4 ,
 .Xr random 7
 .Sh NOTES
-rdd reads 32 random bytes for the initial key and 16 bytes for every rekeying
+rdd reads 40 random bytes for the initial key and 32 bytes for every rekeying
 from the random source.
 Setting
 .Ar rekeymb
 to -1 never rekeys.
 .Pp
 This implementation passed dieharder 3.31.1 "-a" and
-PractRand/RNG_test 0.93 (tested up to 32 TB).
+PractRand/RNG_test 0.95 (tested up to 32 TB).
 .Sh AUTHORS
 .An Leah Neukirchen Aq Mt leah@vuxu.org
 .Sh LICENSE
@@ -62,17 +62,4 @@ neighboring rights to this work.
 .Pp
 .Lk http://creativecommons.org/publicdomain/zero/1.0/
 .Pp
-This program includes code taken from
-.Lk http://www.ecrypt.eu.org/stream/p3ciphers/sosemanuk/sosemanuk_p3source.zip
-as of 2013-04-13.
-An error message which can never occur in rdd usage
-has been #ifdef'ed out.
-.Pp
-\[co] 2005 X-CRYPT project.
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising
-from the use of this software.
-.Pp
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to no restriction.
+This program uses the public domain library chacha-opt.
diff --git a/rdd.c b/rdd.c
index c1ed8d6..8f0af11 100644
--- a/rdd.c
+++ b/rdd.c
@@ -5,16 +5,17 @@
 #include <stdlib.h>
 #include <unistd.h>
 
-#include "sosemanuk.h"
+#include "chacha.h"
 
 #define fail(i, s) write(2, "rdd: " s, 5+(sizeof s)-1), exit(i);
 
 int
 main(int argc, char *argv[])
 {
-	unsigned char key[32], iv[16], buf[120*512];
-	sosemanuk_key_context kc;
-	sosemanuk_run_context rc;
+	chacha_state state;
+	chacha_key key;
+	chacha_iv iv;
+	unsigned char buf[16*4096];
 
 	int fd, c, v = 0;
 	long i, r = 4;
@@ -33,28 +34,31 @@ usage:
 	if (argc > optind)
 		goto usage;
 
+	if (chacha_startup() != 0)
+		fail(255, "self-test failed\n");
+
 	if (isatty(1))
 		fail(5, "cowardly not dumping random data to tty\n");
 
 	if ((fd = open(src, O_RDONLY)) < 0)
 		fail(2, "failed to open random source\n");
 
-	if (read(fd, key, sizeof key) != sizeof key)
-		fail(3, "failed to read key from random source\n");
-	sosemanuk_schedule(&kc, key, sizeof key);
+	if (read(fd, iv.b, sizeof iv.b) != sizeof iv.b)
+		fail(3, "failed to read iv from random source\n");
 
 	while (1) {
-		if (read(fd, iv, sizeof iv) != sizeof iv)
-			fail(3, "failed to read iv from random source\n");
-		sosemanuk_init(&rc, &kc, iv, sizeof iv);
+		if (read(fd, key.b, sizeof key.b) != sizeof key.b)
+			fail(3, "failed to read key from random source\n");
+		chacha_init(&state, &key, &iv, 8);
+
 		if (v)
 			write(2, ".", 1);
 
 		for (i = 0; r < 0 || i < r*1024*1024; i += sizeof buf) {
-			sosemanuk_prng(&rc, buf, sizeof buf);
+			chacha_update(&state, 0, buf, sizeof buf);
 			while (write(1, buf, sizeof buf) != sizeof buf)
 				if (errno) {
-					if (errno == ENOSPC)
+					if (errno == ENOSPC || errno == EPIPE)
 						exit(0);
 					if (errno != EINTR)
 						fail(4, "write error\n");
diff --git a/sosemanuk.c b/sosemanuk.c
deleted file mode 100644
index 033ea53..0000000
--- a/sosemanuk.c
+++ /dev/null
@@ -1,1283 +0,0 @@
-/*
- * SOSEMANUK reference implementation.
- *
- * This code is supposed to run on any conforming C implementation (C90
- * or later). When compiled with the SOSEMANUK_VECTOR macro defined, this
- * is a stand-alone program which outputs detailed test vectors. When
- * compiled with the SOSEMANUK_SPEED macro defined, this is a stand-alone
- * program which performs an implementation speed measure.
- *
- * (c) 2005 X-CRYPT project. This software is provided 'as-is', without
- * any express or implied warranty. In no event will the authors be held
- * liable for any damages arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to no restriction.
- *
- * Technical remarks and questions can be addressed to
- * <thomas.pornin@cryptolog.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifdef SOSEMANUK_SPEED
-#include <time.h>
-#endif
-
-#include "sosemanuk.h"
-
-/* ======================================================================== */
-
-#ifdef SOSEMANUK_ECRYPT
-
-/*
- * No local speed testing when using the ECRYPT mode.
- */
-#undef SOSEMANUK_SPEED
-
-/*
- * If we are using the ECRYPT API, then we rely on the ECRYPT portability
- * macros and types.
- */
-
-#define unum32    u32
-#define T32(x)    U32V(x)
-
-#define decode32le(data)       U8TO32_LITTLE(data)
-#define encode32le(dst, val)   do { \
-		u8 *encode_dst = (dst); \
-		u32 encode_val = (val); \
-		U32TO8_LITTLE(encode_dst, encode_val); \
-	} while (0)
-
-#define ROTL(x, n)    ROTL32(x, n)
-#define INLINE
-
-#else
-
-/*
- * 32-bit data decoding, little endian.
- */
-static INLINE unum32
-decode32le(unsigned char *data)
-{
-#ifdef __i386
-	/*
-	 * On i386, we prefer accessing data directly. Unaligned accesses
-	 * imply only a one-cycle penalty; even with that penalty, this
-	 * method is quite faster than the generic one. Note that i486
-	 * and later may be set in a mode where unaligned access trigger
-	 * exceptions; but such a mode is not compatible with usual ABI
-	 * (which require only 4-byte alignment for "double" and "long
-	 * double", hence operating systems do not set that "alignment
-	 * check" flag.
-	 *
-	 * If this optimized access proves to be a problem, replace the
-	 * test above by "#if 0".
-	 */
-	return *(unum32 *)data;
-#else
-	return (unum32)data[0]
-		| ((unum32)data[1] << 8)
-		| ((unum32)data[2] << 16)
-		| ((unum32)data[3] << 24);
-#endif
-}
-
-/*
- * 32-bit data encoding, little-endian.
- */
-static INLINE void
-encode32le(unsigned char *dst, unum32 val)
-{
-#ifdef __i386__
-	/*
-	 * Optimized version for i386. See comments in decode32le().
-	 */
-	*(unum32 *)dst = val;
-#else
-	dst[0] = val & 0xFF;
-	dst[1] = (val >> 8) & 0xFF;
-	dst[2] = (val >> 16) & 0xFF;
-	dst[3] = (val >> 24) & 0xFF;
-#endif
-}
-
-/*
- * Left-rotation by n bits (0 < n < 32).
- */
-#define ROTL(x, n)    (T32(((x) << (n)) | T32((x) >> (32 - (n)))))
-
-#endif
-
-/* ======================================================================== */
-
-/*
- * Serpent S-boxes, implemented in bitslice mode. These circuits have
- * been published by Dag Arne Osvik ("Speeding up Serpent", published in
- * the 3rd AES Candidate Conference) and work on five 32-bit registers:
- * the four inputs, and a fifth scratch register. There are meant to be
- * quite fast on Pentium-class processors. These are not the fastest
- * published, but they are "fast enough" and they are unencumbered as
- * far as intellectual property is concerned (note: these are rewritten
- * from the article itself, and hence are not covered by the GPL on
- * Dag's code, which was not used here).
- *
- * The output bits are permuted. Here is the correspondance:
- *   S0:  1420
- *   S1:  2031
- *   S2:  2314
- *   S3:  1234
- *   S4:  1403
- *   S5:  1302
- *   S6:  0142
- *   S7:  4310
- * (for instance, the output of S0 is in "r1, r4, r2, r0").
- */
-
-#define S0(r0, r1, r2, r3, r4)   do { \
-		r3 ^= r0;  r4  = r1; \
-		r1 &= r3;  r4 ^= r2; \
-		r1 ^= r0;  r0 |= r3; \
-		r0 ^= r4;  r4 ^= r3; \
-		r3 ^= r2;  r2 |= r1; \
-		r2 ^= r4;  r4 = ~r4; \
-		r4 |= r1;  r1 ^= r3; \
-		r1 ^= r4;  r3 |= r0; \
-		r1 ^= r3;  r4 ^= r3; \
-	} while (0)
-
-#define S1(r0, r1, r2, r3, r4)   do { \
-		r0 = ~r0;  r2 = ~r2; \
-		r4  = r0;  r0 &= r1; \
-		r2 ^= r0;  r0 |= r3; \
-		r3 ^= r2;  r1 ^= r0; \
-		r0 ^= r4;  r4 |= r1; \
-		r1 ^= r3;  r2 |= r0; \
-		r2 &= r4;  r0 ^= r1; \
-		r1 &= r2; \
-		r1 ^= r0;  r0 &= r2; \
-		r0 ^= r4; \
-	} while (0)
-
-#define S2(r0, r1, r2, r3, r4)   do { \
-		r4  = r0;  r0 &= r2; \
-		r0 ^= r3;  r2 ^= r1; \
-		r2 ^= r0;  r3 |= r4; \
-		r3 ^= r1;  r4 ^= r2; \
-		r1  = r3;  r3 |= r4; \
-		r3 ^= r0;  r0 &= r1; \
-		r4 ^= r0;  r1 ^= r3; \
-		r1 ^= r4;  r4 = ~r4; \
-	} while (0)
-
-#define S3(r0, r1, r2, r3, r4)   do { \
-		r4  = r0;  r0 |= r3; \
-		r3 ^= r1;  r1 &= r4; \
-		r4 ^= r2;  r2 ^= r3; \
-		r3 &= r0;  r4 |= r1; \
-		r3 ^= r4;  r0 ^= r1; \
-		r4 &= r0;  r1 ^= r3; \
-		r4 ^= r2;  r1 |= r0; \
-		r1 ^= r2;  r0 ^= r3; \
-		r2  = r1;  r1 |= r3; \
-		r1 ^= r0; \
-	} while (0)
-
-#define S4(r0, r1, r2, r3, r4)   do { \
-		r1 ^= r3;  r3 = ~r3; \
-		r2 ^= r3;  r3 ^= r0; \
-		r4  = r1;  r1 &= r3; \
-		r1 ^= r2;  r4 ^= r3; \
-		r0 ^= r4;  r2 &= r4; \
-		r2 ^= r0;  r0 &= r1; \
-		r3 ^= r0;  r4 |= r1; \
-		r4 ^= r0;  r0 |= r3; \
-		r0 ^= r2;  r2 &= r3; \
-		r0 = ~r0;  r4 ^= r2; \
-	} while (0)
-
-#define S5(r0, r1, r2, r3, r4)   do { \
-		r0 ^= r1;  r1 ^= r3; \
-		r3 = ~r3;  r4  = r1; \
-		r1 &= r0;  r2 ^= r3; \
-		r1 ^= r2;  r2 |= r4; \
-		r4 ^= r3;  r3 &= r1; \
-		r3 ^= r0;  r4 ^= r1; \
-		r4 ^= r2;  r2 ^= r0; \
-		r0 &= r3;  r2 = ~r2; \
-		r0 ^= r4;  r4 |= r3; \
-		r2 ^= r4; \
-	} while (0)
-
-#define S6(r0, r1, r2, r3, r4)   do { \
-		r2 = ~r2;  r4  = r3; \
-		r3 &= r0;  r0 ^= r4; \
-		r3 ^= r2;  r2 |= r4; \
-		r1 ^= r3;  r2 ^= r0; \
-		r0 |= r1;  r2 ^= r1; \
-		r4 ^= r0;  r0 |= r3; \
-		r0 ^= r2;  r4 ^= r3; \
-		r4 ^= r0;  r3 = ~r3; \
-		r2 &= r4; \
-		r2 ^= r3; \
-	} while (0)
-
-#define S7(r0, r1, r2, r3, r4)   do { \
-		r4  = r1;  r1 |= r2; \
-		r1 ^= r3;  r4 ^= r2; \
-		r2 ^= r1;  r3 |= r4; \
-		r3 &= r0;  r4 ^= r2; \
-		r3 ^= r1;  r1 |= r4; \
-		r1 ^= r0;  r0 |= r4; \
-		r0 ^= r2;  r1 ^= r4; \
-		r2 ^= r1;  r1 &= r0; \
-		r1 ^= r4;  r2 = ~r2; \
-		r2 |= r0; \
-		r4 ^= r2; \
-	} while (0)
-
-/*
- * The Serpent linear transform.
- */
-#define SERPENT_LT(x0, x1, x2, x3)  do { \
-		x0 = ROTL(x0, 13); \
-		x2 = ROTL(x2, 3); \
-		x1 = x1 ^ x0 ^ x2; \
-		x3 = x3 ^ x2 ^ T32(x0 << 3); \
-		x1 = ROTL(x1, 1); \
-		x3 = ROTL(x3, 7); \
-		x0 = x0 ^ x1 ^ x3; \
-		x2 = x2 ^ x3 ^ T32(x1 << 7); \
-		x0 = ROTL(x0, 5); \
-		x2 = ROTL(x2, 22); \
-	} while (0)
-
-/* ======================================================================== */
-
-#ifdef SOSEMANUK_ECRYPT
-void
-ECRYPT_init(void)
-{
-	return;
-}
-#endif
-
-#ifdef SOSEMANUK_ECRYPT
-void
-ECRYPT_keysetup(ECRYPT_ctx *kc, const u8 *key, u32 keysize, u32 ivsize)
-#else
-/* see sosemanuk.h */
-void
-sosemanuk_schedule(sosemanuk_key_context *kc,
-	unsigned char *key, size_t key_len)
-#endif
-{
-	/*
-	 * This key schedule is actually a truncated Serpent key schedule.
-	 * The key-derived words (w_i) are computed within the eight
-	 * local variables w0 to w7, which are reused again and again.
-	 */
-
-#define SKS(S, o0, o1, o2, o3, d0, d1, d2, d3)   do { \
-		unum32 r0, r1, r2, r3, r4; \
-		r0 = w ## o0; \
-		r1 = w ## o1; \
-		r2 = w ## o2; \
-		r3 = w ## o3; \
-		S(r0, r1, r2, r3, r4); \
-		kc->sk[i ++] = r ## d0; \
-		kc->sk[i ++] = r ## d1; \
-		kc->sk[i ++] = r ## d2; \
-		kc->sk[i ++] = r ## d3; \
-	} while (0)
-
-#define SKS0    SKS(S0, 4, 5, 6, 7, 1, 4, 2, 0)
-#define SKS1    SKS(S1, 0, 1, 2, 3, 2, 0, 3, 1)
-#define SKS2    SKS(S2, 4, 5, 6, 7, 2, 3, 1, 4)
-#define SKS3    SKS(S3, 0, 1, 2, 3, 1, 2, 3, 4)
-#define SKS4    SKS(S4, 4, 5, 6, 7, 1, 4, 0, 3)
-#define SKS5    SKS(S5, 0, 1, 2, 3, 1, 3, 0, 2)
-#define SKS6    SKS(S6, 4, 5, 6, 7, 0, 1, 4, 2)
-#define SKS7    SKS(S7, 0, 1, 2, 3, 4, 3, 1, 0)
-
-#define WUP(wi, wi5, wi3, wi1, cc)   do { \
-		unum32 tt = (wi) ^ (wi5) ^ (wi3) \
-			^ (wi1) ^ (0x9E3779B9 ^ (unum32)(cc)); \
-		(wi) = ROTL(tt, 11); \
-	} while (0)
-
-#define WUP0(cc)   do { \
-		WUP(w0, w3, w5, w7, cc); \
-		WUP(w1, w4, w6, w0, cc + 1); \
-		WUP(w2, w5, w7, w1, cc + 2); \
-		WUP(w3, w6, w0, w2, cc + 3); \
-	} while (0)
-
-#define WUP1(cc)   do { \
-		WUP(w4, w7, w1, w3, cc); \
-		WUP(w5, w0, w2, w4, cc + 1); \
-		WUP(w6, w1, w3, w5, cc + 2); \
-		WUP(w7, w2, w4, w6, cc + 3); \
-	} while (0)
-
-	unsigned char wbuf[32];
-	register unum32 w0, w1, w2, w3, w4, w5, w6, w7;
-	int i = 0;
-#ifdef SOSEMANUK_ECRYPT
-	size_t key_len = keysize / 8;
-
-	kc->ivlen = ivsize / 8;
-#endif
-
-	/*
-	 * The key is copied into the wbuf[] buffer and padded to 256 bits
-	 * as described in the Serpent specification.
-	 */
-#ifdef WASTE_SPACE_ON_STDIO
-	if (key_len == 0 || key_len > 32) {
-		fprintf(stderr, "invalid key size: %lu\n",
-			(unsigned long)key_len);
-		exit(EXIT_FAILURE);
-	}
-#endif
-	memcpy(wbuf, key, key_len);
-	if (key_len < 32) {
-		wbuf[key_len] = 0x01;
-		if (key_len < 31)
-			memset(wbuf + key_len + 1, 0, 31 - key_len);
-	}
-
-#ifdef SOSEMANUK_VECTOR
-	{
-		size_t u;
-
-		printf("key = ");
-		for (u = 0; u < key_len; u ++)
-			printf("%02X", key[u]);
-		printf("\n");
-	}
-#endif
-
-	w0 = decode32le(wbuf);
-	w1 = decode32le(wbuf + 4);
-	w2 = decode32le(wbuf + 8);
-	w3 = decode32le(wbuf + 12);
-	w4 = decode32le(wbuf + 16);
-	w5 = decode32le(wbuf + 20);
-	w6 = decode32le(wbuf + 24);
-	w7 = decode32le(wbuf + 28);
-
-#ifdef SOSEMANUK_VECTOR
-	printf("  -> %08lX %08lX %08lX %08lX %08lX %08lX %08lX %08lX\n",
-		(unsigned long)w7, (unsigned long)w6,
-		(unsigned long)w5, (unsigned long)w4,
-		(unsigned long)w3, (unsigned long)w2,
-		(unsigned long)w1, (unsigned long)w0);
-#endif
-
-	WUP0(0);   SKS3;
-	WUP1(4);   SKS2;
-	WUP0(8);   SKS1;
-	WUP1(12);  SKS0;
-	WUP0(16);  SKS7;
-	WUP1(20);  SKS6;
-	WUP0(24);  SKS5;
-	WUP1(28);  SKS4;
-	WUP0(32);  SKS3;
-	WUP1(36);  SKS2;
-	WUP0(40);  SKS1;
-	WUP1(44);  SKS0;
-	WUP0(48);  SKS7;
-	WUP1(52);  SKS6;
-	WUP0(56);  SKS5;
-	WUP1(60);  SKS4;
-	WUP0(64);  SKS3;
-	WUP1(68);  SKS2;
-	WUP0(72);  SKS1;
-	WUP1(76);  SKS0;
-	WUP0(80);  SKS7;
-	WUP1(84);  SKS6;
-	WUP0(88);  SKS5;
-	WUP1(92);  SKS4;
-	WUP0(96);  SKS3;
-
-#ifdef SOSEMANUK_VECTOR
-	{
-		unsigned u;
-
-		for (u = 0; u < 100; u += 4) {
-			printf("Serpent24 subkey %2u:"
-				" %08lX %08lX %08lX %08lX\n", u / 4,
-				(unsigned long)kc->sk[u + 3],
-				(unsigned long)kc->sk[u + 2],
-				(unsigned long)kc->sk[u + 1],
-				(unsigned long)kc->sk[u + 0]);
-		}
-	}
-#endif
-
-#undef SKS
-#undef SKS0
-#undef SKS1
-#undef SKS2
-#undef SKS3
-#undef SKS4
-#undef SKS5
-#undef SKS6
-#undef SKS7
-#undef WUP
-#undef WUP0
-#undef WUP1
-}
-
-#ifdef SOSEMANUK_ECRYPT
-void
-ECRYPT_ivsetup(ECRYPT_ctx *ctx, const u8 *iv)
-#else
-/* see sosemanuk.h */
-void
-sosemanuk_init(sosemanuk_run_context *rc, sosemanuk_key_context *kc,
-	unsigned char *iv, size_t iv_len)
-#endif
-{
-
-#ifdef SOSEMANUK_ECRYPT
-#define rc       ctx
-#define kc       ctx
-#define iv_len   (ctx->ivlen)
-#endif
-
-	/*
-	 * The Serpent key addition step.
-	 */
-#define KA(zc, x0, x1, x2, x3)  do { \
-		x0 ^= kc->sk[(zc)]; \
-		x1 ^= kc->sk[(zc) + 1]; \
-		x2 ^= kc->sk[(zc) + 2]; \
-		x3 ^= kc->sk[(zc) + 3]; \
-	} while (0)
-
-	/*
-	 * One Serpent round.
-	 *   zc = current subkey counter
-	 *   S = S-box macro for this round
-	 *   i0 to i4 = input register numbers (the fifth is a scratch register)
-	 *   o0 to o3 = output register numbers
-	 */
-#define FSS(zc, S, i0, i1, i2, i3, i4, o0, o1, o2, o3)  do { \
-		KA(zc, r ## i0, r ## i1, r ## i2, r ## i3); \
-		S(r ## i0, r ## i1, r ## i2, r ## i3, r ## i4); \
-		SERPENT_LT(r ## o0, r ## o1, r ## o2, r ## o3); \
-	} while (0)
-
-	/*
-	 * Last Serpent round. Contrary to the "true" Serpent, we keep
-	 * the linear transformation for that last round.
-	 */
-#define FSF(zc, S, i0, i1, i2, i3, i4, o0, o1, o2, o3)  do { \
-		KA(zc, r ## i0, r ## i1, r ## i2, r ## i3); \
-		S(r ## i0, r ## i1, r ## i2, r ## i3, r ## i4); \
-		SERPENT_LT(r ## o0, r ## o1, r ## o2, r ## o3); \
-		KA(zc + 4, r ## o0, r ## o1, r ## o2, r ## o3); \
-	} while (0)
-
-	register unum32 r0, r1, r2, r3, r4;
-	unsigned char ivtmp[16];
-
-	if (iv_len >= sizeof ivtmp) {
-		memcpy(ivtmp, iv, sizeof ivtmp);
-	} else {
-		if (iv_len > 0)
-			memcpy(ivtmp, iv, iv_len);
-		memset(ivtmp + iv_len, 0, (sizeof ivtmp) - iv_len);
-	}
-
-#ifdef SOSEMANUK_VECTOR
-	{
-		size_t u;
-
-		printf("IV = ");
-		for (u = 0; u < 16; u ++)
-			printf("%02X", ivtmp[u]);
-		printf("\n");
-	}
-#endif
-
-	/*
-	 * Decode IV into four 32-bit words (little-endian).
-	 */
-	r0 = decode32le(ivtmp);
-	r1 = decode32le(ivtmp + 4);
-	r2 = decode32le(ivtmp + 8);
-	r3 = decode32le(ivtmp + 12);
-
-#ifdef SOSEMANUK_VECTOR
-	printf("  -> %08lX %08lX %08lX %08lX\n",
-		(unsigned long)r3, (unsigned long)r2,
-		(unsigned long)r1, (unsigned long)r0);
-#endif
-
-	/*
-	 * Encrypt IV with Serpent24. Some values are extracted from the
-	 * output of the twelfth, eighteenth and twenty-fourth rounds.
-	 */
-	FSS(0, S0, 0, 1, 2, 3, 4, 1, 4, 2, 0);
-	FSS(4, S1, 1, 4, 2, 0, 3, 2, 1, 0, 4);
-	FSS(8, S2, 2, 1, 0, 4, 3, 0, 4, 1, 3);
-	FSS(12, S3, 0, 4, 1, 3, 2, 4, 1, 3, 2);
-	FSS(16, S4, 4, 1, 3, 2, 0, 1, 0, 4, 2);
-	FSS(20, S5, 1, 0, 4, 2, 3, 0, 2, 1, 4);
-	FSS(24, S6, 0, 2, 1, 4, 3, 0, 2, 3, 1);
-	FSS(28, S7, 0, 2, 3, 1, 4, 4, 1, 2, 0);
-	FSS(32, S0, 4, 1, 2, 0, 3, 1, 3, 2, 4);
-	FSS(36, S1, 1, 3, 2, 4, 0, 2, 1, 4, 3);
-	FSS(40, S2, 2, 1, 4, 3, 0, 4, 3, 1, 0);
-	FSS(44, S3, 4, 3, 1, 0, 2, 3, 1, 0, 2);
-	rc->s09 = r3;
-	rc->s08 = r1;
-	rc->s07 = r0;
-	rc->s06 = r2;
-
-	FSS(48, S4, 3, 1, 0, 2, 4, 1, 4, 3, 2);
-	FSS(52, S5, 1, 4, 3, 2, 0, 4, 2, 1, 3);
-	FSS(56, S6, 4, 2, 1, 3, 0, 4, 2, 0, 1);
-	FSS(60, S7, 4, 2, 0, 1, 3, 3, 1, 2, 4);
-	FSS(64, S0, 3, 1, 2, 4, 0, 1, 0, 2, 3);
-	FSS(68, S1, 1, 0, 2, 3, 4, 2, 1, 3, 0);
-	rc->r1  = r2;
-	rc->s04 = r1;
-	rc->r2  = r3;
-	rc->s05 = r0;
-
-	FSS(72, S2, 2, 1, 3, 0, 4, 3, 0, 1, 4);
-	FSS(76, S3, 3, 0, 1, 4, 2, 0, 1, 4, 2);
-	FSS(80, S4, 0, 1, 4, 2, 3, 1, 3, 0, 2);
-	FSS(84, S5, 1, 3, 0, 2, 4, 3, 2, 1, 0);
-	FSS(88, S6, 3, 2, 1, 0, 4, 3, 2, 4, 1);
-	FSF(92, S7, 3, 2, 4, 1, 0, 0, 1, 2, 3);
-	rc->s03 = r0;
-	rc->s02 = r1;
-	rc->s01 = r2;
-	rc->s00 = r3;
-
-#ifdef SOSEMANUK_VECTOR
-	printf("Initial LFSR state:\n");
-	printf("      s1  = %08lX\n", (unsigned long)rc->s00);
-	printf("      s2  = %08lX\n", (unsigned long)rc->s01);
-	printf("      s3  = %08lX\n", (unsigned long)rc->s02);
-	printf("      s4  = %08lX\n", (unsigned long)rc->s03);
-	printf("      s5  = %08lX\n", (unsigned long)rc->s04);
-	printf("      s6  = %08lX\n", (unsigned long)rc->s05);
-	printf("      s7  = %08lX\n", (unsigned long)rc->s06);
-	printf("      s8  = %08lX\n", (unsigned long)rc->s07);
-	printf("      s9  = %08lX\n", (unsigned long)rc->s08);
-	printf("      s10 = %08lX\n", (unsigned long)rc->s09);
-	printf("Initial FSM state:  r1 = %08lX   r2 = %08lX\n",
-		(unsigned long)rc->r1, (unsigned long)rc->r2);
-#endif
-
-#ifndef SOSEMANUK_ECRYPT
-	rc->ptr = sizeof rc->buf;
-#endif
-
-#undef KA
-#undef FSS
-#undef FSF
-
-#ifdef SOSEMANUK_ECRYPT
-#undef rc
-#undef kc
-#undef iv_len
-#endif
-}
-
-/*
- * Multiplication by alpha: alpha * x = T32(x << 8) ^ mul_a[x >> 24]
- */
-static unum32 mul_a[] = {
-	0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
-	0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
-	0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
-	0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
-	0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
-	0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
-	0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
-	0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
-	0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
-	0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
-	0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
-	0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
-	0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
-	0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
-	0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
-	0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
-	0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
-	0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
-	0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
-	0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
-	0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
-	0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
-	0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
-	0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
-	0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
-	0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
-	0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
-	0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
-	0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
-	0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
-	0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
-	0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
-	0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
-	0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
-	0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
-	0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
-	0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
-	0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
-	0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
-	0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
-	0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
-	0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
-	0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
-	0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
-	0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
-	0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
-	0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
-	0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
-	0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
-	0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
-	0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
-	0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
-	0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
-	0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
-	0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
-	0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
-	0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
-	0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
-	0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
-	0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
-	0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
-	0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
-	0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
-	0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB
-};
-
-/*
- * Multiplication by 1/alpha: 1/alpha * x = (x >> 8) ^ mul_ia[x & 0xFF]
- */
-static unum32 mul_ia[] = {
-	0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
-	0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
-	0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
-	0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
-	0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
-	0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
-	0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
-	0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
-	0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
-	0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
-	0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
-	0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
-	0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
-	0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
-	0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
-	0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
-	0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
-	0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
-	0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
-	0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
-	0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
-	0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
-	0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
-	0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
-	0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
-	0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
-	0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
-	0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
-	0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
-	0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
-	0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
-	0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
-	0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
-	0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
-	0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
-	0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
-	0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
-	0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
-	0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
-	0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
-	0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
-	0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
-	0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
-	0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
-	0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
-	0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
-	0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
-	0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
-	0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
-	0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
-	0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
-	0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
-	0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
-	0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
-	0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
-	0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
-	0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
-	0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
-	0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
-	0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
-	0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
-	0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
-	0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
-	0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
-};
-
-/*
- * Compute the next block of bits of output stream. This is equivalent
- * to one full rotation of the shift register.
- *
- * If SOSEMANUK_SPEED is defined, this function takes an extra parameter
- * "counter". The function then returns the sum of all produced
- * 32-bit words, in an "unum32". That sum prevents the compiler from
- * optimizing out part of the computation.
- */
-#if defined SOSEMANUK_ECRYPT
-static void
-sosemanuk_internal(ECRYPT_ctx *rc, u8 *dst)
-#elif defined SOSEMANUK_SPEED
-static unum32
-sosemanuk_internal(sosemanuk_run_context *rc, unsigned long counter)
-#else
-static void
-sosemanuk_internal(sosemanuk_run_context *rc)
-#endif
-{
-	/*
-	 * MUL_A(x) computes alpha * x (in F_{2^32}).
-	 * MUL_G(x) computes 1/alpha * x (in F_{2^32}).
-	 */
-#define MUL_A(x)    (T32((x) << 8) ^ mul_a[(x) >> 24])
-#define MUL_G(x)    (((x) >> 8) ^ mul_ia[(x) & 0xFF])
-
-	/*
-	 * This macro computes the special multiplexer, which chooses
-	 * between "x" and "x xor y", depending on the least significant
-	 * bit of the control word. We use the C "?:" selection operator
-	 * (which most compilers know how to optimise) except for Alpha,
-	 * where the manual sign extension seems to perform equally well
-	 * with DEC/Compaq/HP compiler, and much better with gcc.
-	 */
-#ifdef __alpha
-#define XMUX(c, x, y)   ((((signed int)((c) << 31) >> 31) & (y)) ^ (x))
-#else
-#define XMUX(c, x, y)   (((c) & 0x1) ? ((x) ^ (y)) : (x))
-#endif
-
-	/*
-	 * FSM() updates the finite state machine.
-	 */
-#define FSM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9)   do { \
-		unum32 tt, or1; \
-		tt = XMUX(r1, s ## x1, s ## x8); \
-		or1 = r1; \
-		r1 = T32(r2 + tt); \
-		tt = T32(or1 * 0x54655307); \
-		r2 = ROTL(tt, 7); \
-		PFSM; \
-	} while (0)
-
-	/*
-	 * LRU updates the shift register; the dropped value is stored
-	 * in variable "dd".
-	 */
-#define LRU(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd)   do { \
-		dd = s ## x0; \
-		s ## x0 = MUL_A(s ## x0) ^ MUL_G(s ## x3) ^ s ## x9; \
-		PLFSR(dd, s ## x1, s ## x2, s ## x3, s ## x4, s ## x5, \
-			s ## x6, s ## x7, s ## x8, s ## x9, s ## x0); \
-	} while (0)
-
-	/*
-	 * CC1 stores into variable "ee" the next intermediate word
-	 * (combination of the new states of the LFSR and the FSM).
-	 */
-#define CC1(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, ee)   do { \
-		ee = T32(s ## x9 + r1) ^ r2; \
-		PCCVAL(ee); \
-	} while (0)
-
-	/*
-	 * STEP computes one internal round. "dd" receives the "s_t"
-	 * value (dropped from the LFSR) and "ee" gets the value computed
-	 * from the LFSR and FSM.
-	 */
-#define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd, ee)   do { \
-		FSM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); \
-		LRU(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd); \
-		CC1(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, ee); \
-	} while (0)
-
-	/*
-	 * Apply one Serpent round (with the provided S-box macro), XOR
-	 * the result with the "v" values, and encode the result into
-	 * the destination buffer, at the provided offset. The "x*"
-	 * arguments encode the output permutation of the "S" macro.
-	 */
-#ifdef SOSEMANUK_SPEED
-
-#define SRD(S, x0, x1, x2, x3, ooff)   do { \
-		S(u0, u1, u2, u3, u4); \
-		speed_acc += u ## x0 ^ v0; \
-		speed_acc += u ## x1 ^ v1; \
-		speed_acc += u ## x2 ^ v2; \
-		speed_acc += u ## x3 ^ v3; \
-	} while (0)
-
-#else
-
-#ifdef SOSEMANUK_ECRYPT
-#define OUTWORD_BASE   dst
-#else
-#define OUTWORD_BASE   (rc->buf)
-#endif
-
-#define SRD(S, x0, x1, x2, x3, ooff)   do { \
-		PSPIN(u0, u1, u2, u3); \
-		S(u0, u1, u2, u3, u4); \
-		PSPOUT(u ## x0, u ## x1, u ## x2, u ## x3); \
-		encode32le(OUTWORD_BASE + ooff, u ## x0 ^ v0); \
-		encode32le(OUTWORD_BASE + ooff + 4, u ## x1 ^ v1); \
-		encode32le(OUTWORD_BASE + ooff + 8, u ## x2 ^ v2); \
-		encode32le(OUTWORD_BASE + ooff + 12, u ## x3 ^ v3); \
-		POUT(OUTWORD_BASE + ooff); \
-	} while (0)
-
-#endif
-
-	/*
-	 * Audit code; used for detailed test vectors.
-	 */
-#ifdef SOSEMANUK_VECTOR
-
-#define PFSM   do { \
-		printf("New FSM state:  r1 = %08lX   r2 = %08lX\n", \
-			(unsigned long)r1, (unsigned long)r2); \
-	} while (0)
-
-#define PLFSR(dd, x1, x2, x3, x4, x5, x6, x7, x8, x9, x0)   do { \
-		printf("New LFSR state:\n"); \
-		printf("   dropped (s_t): %08lX\n", (unsigned long)dd); \
-		printf("         s_t+1  = %08lX\n", (unsigned long)x1); \
-		printf("         s_t+2  = %08lX\n", (unsigned long)x2); \
-		printf("         s_t+3  = %08lX\n", (unsigned long)x3); \
-		printf("         s_t+4  = %08lX\n", (unsigned long)x4); \
-		printf("         s_t+5  = %08lX\n", (unsigned long)x5); \
-		printf("         s_t+6  = %08lX\n", (unsigned long)x6); \
-		printf("         s_t+7  = %08lX\n", (unsigned long)x7); \
-		printf("         s_t+8  = %08lX\n", (unsigned long)x8); \
-		printf("         s_t+9  = %08lX\n", (unsigned long)x9); \
-		printf("         s_t+10 = %08lX\n", (unsigned long)x0); \
-	} while (0)
-
-#define PCCVAL(ee)   do { \
-		printf("Intermediate output: %08lX\n", (unsigned long)ee); \
-	} while (0)
-
-#define PSPIN(x0, x1, x2, x3)    do { \
-		printf("Serpent1 input:  %08lX %08lX %08lX %08lX\n", \
-			(unsigned long)x3, (unsigned long)x2, \
-			(unsigned long)x1, (unsigned long)x0); \
-	} while (0)
-
-#define PSPOUT(x0, x1, x2, x3)    do { \
-		printf("Serpent1 output: %08lX %08lX %08lX %08lX\n", \
-			(unsigned long)x3, (unsigned long)x2, \
-			(unsigned long)x1, (unsigned long)x0); \
-	} while (0)
-
-#define POUT(buf)   do { \
-		size_t j; \
-		printf("Stream output: "); \
-		for (j = 0; j < 16; j ++) \
-			printf("%02X", (buf)[j]); \
-		printf("\n"); \
-	} while (0)
-
-#else
-
-#define PFSM                      (void)0
-#define PLFSR(dd, x1, x2, x3, x4, x5, x6, x7, x8, x9, x0)   (void)0
-#define PCCVAL(ee)                (void)0
-#define PSPIN(x0, x1, x2, x3)     (void)0
-#define PSPOUT(x0, x1, x2, x3)    (void)0
-#define POUT(buf)                 (void)0
-
-#endif
-
-	unum32 s00 = rc->s00;
-	unum32 s01 = rc->s01;
-	unum32 s02 = rc->s02;
-	unum32 s03 = rc->s03;
-	unum32 s04 = rc->s04;
-	unum32 s05 = rc->s05;
-	unum32 s06 = rc->s06;
-	unum32 s07 = rc->s07;
-	unum32 s08 = rc->s08;
-	unum32 s09 = rc->s09;
-	unum32 r1 = rc->r1;
-	unum32 r2 = rc->r2;
-	unum32 u0, u1, u2, u3, u4;
-	unum32 v0, v1, v2, v3;
-#ifdef SOSEMANUK_SPEED
-	unum32 speed_acc = 0;
-#endif
-
-#ifdef SOSEMANUK_SPEED
-	while (counter -- > 0) {
-#endif
-
-	STEP(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, v0, u0);
-	STEP(01, 02, 03, 04, 05, 06, 07, 08, 09, 00, v1, u1);
-	STEP(02, 03, 04, 05, 06, 07, 08, 09, 00, 01, v2, u2);
-	STEP(03, 04, 05, 06, 07, 08, 09, 00, 01, 02, v3, u3);
-	SRD(S2, 2, 3, 1, 4, 0);
-	STEP(04, 05, 06, 07, 08, 09, 00, 01, 02, 03, v0, u0);
-	STEP(05, 06, 07, 08, 09, 00, 01, 02, 03, 04, v1, u1);
-	STEP(06, 07, 08, 09, 00, 01, 02, 03, 04, 05, v2, u2);
-	STEP(07, 08, 09, 00, 01, 02, 03, 04, 05, 06, v3, u3);
-	SRD(S2, 2, 3, 1, 4, 16);
-	STEP(08, 09, 00, 01, 02, 03, 04, 05, 06, 07, v0, u0);
-	STEP(09, 00, 01, 02, 03, 04, 05, 06, 07, 08, v1, u1);
-	STEP(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, v2, u2);
-	STEP(01, 02, 03, 04, 05, 06, 07, 08, 09, 00, v3, u3);
-	SRD(S2, 2, 3, 1, 4, 32);
-	STEP(02, 03, 04, 05, 06, 07, 08, 09, 00, 01, v0, u0);
-	STEP(03, 04, 05, 06, 07, 08, 09, 00, 01, 02, v1, u1);
-	STEP(04, 05, 06, 07, 08, 09, 00, 01, 02, 03, v2, u2);
-	STEP(05, 06, 07, 08, 09, 00, 01, 02, 03, 04, v3, u3);
-	SRD(S2, 2, 3, 1, 4, 48);
-	STEP(06, 07, 08, 09, 00, 01, 02, 03, 04, 05, v0, u0);
-	STEP(07, 08, 09, 00, 01, 02, 03, 04, 05, 06, v1, u1);
-	STEP(08, 09, 00, 01, 02, 03, 04, 05, 06, 07, v2, u2);
-	STEP(09, 00, 01, 02, 03, 04, 05, 06, 07, 08, v3, u3);
-	SRD(S2, 2, 3, 1, 4, 64);
-
-#ifdef SOSEMANUK_SPEED
-	}
-#endif
-
-	rc->s00 = s00;
-	rc->s01 = s01;
-	rc->s02 = s02;
-	rc->s03 = s03;
-	rc->s04 = s04;
-	rc->s05 = s05;
-	rc->s06 = s06;
-	rc->s07 = s07;
-	rc->s08 = s08;
-	rc->s09 = s09;
-	rc->r1 = r1;
-	rc->r2 = r2;
-
-#ifdef SOSEMANUK_SPEED
-	return T32(speed_acc);
-#endif
-}
-
-/*
- * Combine buffers in1[] and in2[] by XOR, result in out[]. The length
- * is "data_len" (in bytes). Partial overlap of out[] with either in1[]
- * or in2[] is not allowed. Total overlap (out == in1 and/or out == in2)
- * is allowed.
- */
-static INLINE void
-xorbuf(const unsigned char *in1, const unsigned char *in2,
-	unsigned char *out, size_t data_len)
-{
-	while (data_len -- > 0)
-		*out ++ = *in1 ++ ^ *in2 ++;
-}
-
-/* ======================================================================== */
-/*
- * External API.
- */
-
-#if defined SOSEMANUK_ECRYPT
-
-/* see ecrypt-sync.h */
-void
-ECRYPT_process_bytes(int action, ECRYPT_ctx *ctx,
-	const u8 *input, u8 *output, u32 msglen)
-{
-	(void)action;
-
-	while (msglen > 0) {
-		unsigned char tbuf[ECRYPT_BLOCKLENGTH];
-		size_t len;
-
-		sosemanuk_internal(ctx, tbuf);
-		len = sizeof tbuf;
-		if (len > msglen)
-			len = msglen;
-		xorbuf(input, tbuf, output, len);
-		input += len;
-		output += len;
-		msglen -= len;
-	}
-}
-
-/* see ecrypt-sync.h */
-void
-ECRYPT_keystream_bytes(ECRYPT_ctx *ctx, u8 *keystream, u32 length)
-{
-	while (length > 0) {
-		if (length >= ECRYPT_BLOCKLENGTH) {
-			sosemanuk_internal(ctx, keystream);
-			keystream += ECRYPT_BLOCKLENGTH;
-			length -= ECRYPT_BLOCKLENGTH;
-		} else {
-			unsigned char tbuf[ECRYPT_BLOCKLENGTH];
-
-			sosemanuk_internal(ctx, tbuf);
-			memcpy(keystream, tbuf, length);
-			return;
-		}
-	}
-}
-
-/* see ecrypt-sync.h */
-void
-ECRYPT_process_blocks(int action, ECRYPT_ctx *ctx,
-	const u8 *input, u8 *output, u32 blocks)
-{
-	(void)action;
-
-	while (blocks -- > 0) {
-		unsigned char tbuf[ECRYPT_BLOCKLENGTH];
-
-		sosemanuk_internal(ctx, tbuf);
-		xorbuf(input, tbuf, output, ECRYPT_BLOCKLENGTH);
-		input += ECRYPT_BLOCKLENGTH;
-		output += ECRYPT_BLOCKLENGTH;
-	}
-}
-
-/* see ecrypt-sync.h */
-void
-ECRYPT_keystream_blocks(ECRYPT_ctx *ctx, u8 *keystream, u32 blocks)
-{
-	while (blocks -- > 0) {
-		sosemanuk_internal(ctx, keystream);
-		keystream += ECRYPT_BLOCKLENGTH;
-	}
-}
-
-#elif !defined SOSEMANUK_SPEED
-
-/* see sosemanuk.h */
-void
-sosemanuk_prng(sosemanuk_run_context *rc, unsigned char *out, size_t out_len)
-{
-	if (rc->ptr < (sizeof rc->buf)) {
-		size_t rlen = (sizeof rc->buf) - rc->ptr;
-
-		if (rlen > out_len)
-			rlen = out_len;
-		memcpy(out, rc->buf + rc->ptr, rlen);
-		out += rlen;
-		out_len -= rlen;
-		rc->ptr += rlen;
-	}
-	while (out_len > 0) {
-		sosemanuk_internal(rc);
-		if (out_len >= sizeof rc->buf) {
-			memcpy(out, rc->buf, sizeof rc->buf);
-			out += sizeof rc->buf;
-			out_len -= sizeof rc->buf;
-		} else {
-			memcpy(out, rc->buf, out_len);
-			rc->ptr = out_len;
-			out_len = 0;
-		}
-	}
-}
-
-/* see sosemanuk.h */
-void
-sosemanuk_encrypt(sosemanuk_run_context *rc,
-	unsigned char *in, unsigned char *out, size_t data_len)
-{
-	if (rc->ptr < (sizeof rc->buf)) {
-		size_t rlen = (sizeof rc->buf) - rc->ptr;
-
-		if (rlen > data_len)
-			rlen = data_len;
-		xorbuf(rc->buf + rc->ptr, in, out, rlen);
-		in += rlen;
-		out += rlen;
-		data_len -= rlen;
-		rc->ptr += rlen;
-	}
-	while (data_len > 0) {
-		sosemanuk_internal(rc);
-		if (data_len >= sizeof rc->buf) {
-			xorbuf(rc->buf, in, out, sizeof rc->buf);
-			in += sizeof rc->buf;
-			out += sizeof rc->buf;
-			data_len -= sizeof rc->buf;
-		} else {
-			xorbuf(rc->buf, in, out, data_len);
-			rc->ptr = data_len;
-			data_len = 0;
-		}
-	}
-}
-
-#endif
-
-#if defined SOSEMANUK_VECTOR
-
-/* ======================================================================== */
-/*
- * Test code. This code is used to generate test vectors, with the
- * SOSEMANUK_VECTOR macro defined.
- */
-
-/*
- * Generate 160 bytes of stream with the provided key and IV.
- */
-static void
-maketest(int tvn, unsigned char *key, size_t key_len,
-	unsigned char *iv, size_t iv_len)
-{
-#ifdef SOSEMANUK_ECRYPT
-	ECRYPT_ctx ctx;
-#else
-	sosemanuk_key_context kc;
-	sosemanuk_run_context rc;
-#endif
-	unsigned char tmp[160];
-	unsigned u;
-
-	printf("=====================================================\n");
-	printf("Detailed test vector %d:\n", tvn);
-
-#ifdef SOSEMANUK_ECRYPT
-	ECRYPT_init();
-	ECRYPT_keysetup(&ctx, key, key_len * 8, iv_len * 8);
-	ECRYPT_ivsetup(&ctx, iv);
-#if defined SOSEMANUK_TEST_ENCRYPT_BYTES
-	memset(tmp, 0, sizeof tmp);
-	ECRYPT_encrypt_bytes(&ctx, tmp, tmp, sizeof tmp);
-#elif defined SOSEMANUK_TEST_DECRYPT_BYTES
-	memset(tmp, 0, sizeof tmp);
-	ECRYPT_decrypt_bytes(&ctx, tmp, tmp, sizeof tmp);
-#elif defined SOSEMANUK_TEST_ENCRYPT_BLOCKS
-	memset(tmp, 0, sizeof tmp);
-	ECRYPT_encrypt_blocks(&ctx, tmp, tmp, 2);
-#elif defined SOSEMANUK_TEST_DECRYPT_BLOCKS
-	memset(tmp, 0, sizeof tmp);
-	ECRYPT_decrypt_blocks(&ctx, tmp, tmp, 2);
-#elif defined SOSEMANUK_TEST_KEYSTREAM_BLOCKS
-	ECRYPT_keystream_blocks(&ctx, tmp, 2);
-#else
-	ECRYPT_keystream_bytes(&ctx, tmp, sizeof tmp);
-#endif
-#else
-	sosemanuk_schedule(&kc, key, key_len);
-	sosemanuk_init(&rc, &kc, iv, iv_len);
-	sosemanuk_prng(&rc, tmp, sizeof tmp);
-#endif
-
-	printf("\n");
-	printf("Total output:");
-	for (u = 0; u < sizeof tmp; u ++) {
-		if ((u & 0x0F) == 0)
-			printf("\n");
-		printf(" %02X", (unsigned)tmp[u]);
-	}
-	printf("\n\n");
-}
-
-int
-main(void)
-{
-	static unsigned char key1[] = { 0xA7, 0xC0, 0x83, 0xFE, 0xB7 };
-	static unsigned char iv1[] = {
-		0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
-		0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF
-	};
-
-	static unsigned char key2[] = {
-		0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
-		0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF
-	};
-	static unsigned char iv2[] = {
-		0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF,
-		0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77
-	};
-
-	maketest(1, key1, sizeof key1, iv1, sizeof iv1);
-	maketest(2, key2, sizeof key2, iv2, sizeof iv2);
-	return 0;
-}
-
-#elif defined SOSEMANUK_SPEED
-
-/* ======================================================================== */
-/*
- * Test code. This code is used to measure implementation speed. The
- * provided argument is the size of benched output stream, in megabytes.
- */
-
-static void
-usage(void)
-{
-	fprintf(stderr, "missing argument: output length (in megabytes)\n");
-	exit(EXIT_FAILURE);
-}
-
-int
-main(int argc, char *argv[])
-{
-	static unsigned char key[] = { 0xA7, 0xC0, 0x83, 0xFE, 0xB7 };
-	static unsigned char iv[] = {
-		0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
-		0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF
-	};
-	sosemanuk_key_context kc;
-	sosemanuk_run_context rc;
-	unsigned long speed_counter;
-	clock_t orig, end;
-	double nw, ts;
-	unum32 sum;
-
-	if (argc < 2)
-		usage();
-	speed_counter = strtoul(argv[1], 0, 0);
-	speed_counter = (speed_counter * 65536UL) / 5;
-	if (speed_counter == 0)
-		usage();
-	nw = (double)speed_counter * 20.0;
-	printf("number of 32-bit words: %.0f\n", nw);
-	sosemanuk_schedule(&kc, key, sizeof key);
-	sosemanuk_init(&rc, &kc, iv, sizeof iv);
-	sosemanuk_internal(&rc, 16);
-	orig = clock();
-	sum = sosemanuk_internal(&rc, speed_counter);
-	end = clock();
-	ts = (double)end / CLOCKS_PER_SEC - (double)orig / CLOCKS_PER_SEC;
-	if (ts <= 1.0) {
-		printf("too fast: no meaningful result\n");
-	} else {
-		printf("elapsed time: %.4f seconds\n", ts);
-		printf("32-bit words per second: %.0f\n", nw / ts);
-	}
-	printf("sum = %08lX\n", (unsigned long)sum);
-	return 0;
-}
-
-#endif
diff --git a/sosemanuk.h b/sosemanuk.h
deleted file mode 100644
index 3cee5a8..0000000
--- a/sosemanuk.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * SOSEMANUK reference API.
- *
- * This file documents the reference implementation API. If the
- * macro SOSEMANUK_ECRYPT is defined, the API follows the ECRYPT
- * conventions (types, function names...) and uses the ECRYPT files;
- * otherwise, a simpler API is used.
- *
- * (c) 2005 X-CRYPT project. This software is provided 'as-is', without
- * any express or implied warranty. In no event will the authors be held
- * liable for any damages arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to no restriction.
- *
- * Technical remarks and questions can be addressed to
- * <thomas.pornin@cryptolog.com>
- */
-
-#ifndef SOSEMANUK_H__
-#define SOSEMANUK_H__
-
-/*
- * This macro enables the ECRYPT API, and disables the local API.
- * It is defined by default, for ECRYPT processing.
- */
-//#define SOSEMANUK_ECRYPT
-
-#ifdef SOSEMANUK_ECRYPT
-
-#include "ecrypt-sync.h"
-
-#else
-
-#include <limits.h>
-
-/*
- * Input/Output is defined in terms of octets, but C provides only
- * the C notion of "byte". We require that C bytes are actually octets.
- */
-#if CHAR_BIT != 8
-#error We need 8-bit bytes
-#endif
-
-/*
- * We want an unsigned integer type with at least (and possibly exactly)
- * 32 bits. Such a type implements arithmetics modulo 2^n for a value
- * n greater than or equal to 32. The type is named "unum32".
- *
- * Note: we try to use C99 features such as <stdint.h>. This may prove
- * problematic on architectures which claim C99 conformance, but fail
- * to actually conform. If necessary, define the macro BROKEN_C99 to
- * fall back to C90, whatever the environment claims:
-#define BROKEN_C99  1
- */
-
-#if !defined BROKEN_C99 && defined __STDC__ && __STDC_VERSION__ >= 199901L
-
-/*
- * C99 implementation. We use "uint_least32_t" which has the required
- * semantics.
- */
-#include <stdint.h>
-typedef uint_least32_t unum32;
-
-#else
-
-/*
- * Pre-C99 implementation. "unsigned long" is guaranteed to be wide
- * enough, but we want to use "unsigned int" if possible (especially
- * for 64-bit architectures).
- */
-#if UINT_MAX >= 0xFFFFFFFF
-typedef unsigned int unum32;
-#else
-typedef unsigned long unum32;
-#endif
-
-#endif
-
-/*
- * We want (and sometimes need) to perform explicit truncations to 32 bits.
- */
-#define ONE32    ((unum32)0xFFFFFFFF)
-#define T32(x)   ((x) & ONE32)
-
-/*
- * Some of our functions will be tagged as "inline" to help the compiler
- * optimize things. We use "inline" only if the compiler is advanced
- * enough to understand it; C99 compilers, and pre-C99 versions of gcc,
- * understand enough "inline" for our purposes.
- */
-#if (!defined BROKEN_C99 && defined __STDC__ && __STDC_VERSION__ >= 199901L) \
-	|| defined __GNUC__
-#define INLINE inline
-#else
-#define INLINE
-#endif
-
-/*
- * API description:
- *
- * The SOSEMANUK algorithm works with a secret key and an initial value (IV).
- * Two context structures are used:
- *
- * -- "sosemanuk_key_context" holds the processed secret key. The contents
- * of this structure depends only on the key, not the IV.
- *
- * -- "sosemanuk_run_context" holds the current cipher internal state. This
- * structure is initialized using the "sosemanuk_key_context" structure, and
- * the IV; it is updated each time some output is produced.
- *
- * Both structures may be allocated as local variables. There is no
- * other external allocation (using malloc() or any similar function).
- * There is no global state; hence, this code is thread-safe and
- * reentrant.
- */
-
-typedef struct {
-	/*
-	 * Sub-keys for Serpent24.
-	 */
-	unum32 sk[100];
-} sosemanuk_key_context;
-
-typedef struct {
-	/*
-	 * Internal cipher state.
-	 */
-	unum32 s00, s01, s02, s03, s04, s05, s06, s07, s08, s09;
-	unum32 r1, r2;
-
-	/*
-	 * Buffering: the stream cipher produces output data by
-	 * blocks of 640 bits. buf[] contains such a block, and
-	 * "ptr" is the index of the next output byte.
-	 */
-	unsigned char buf[80];
-	unsigned ptr;
-} sosemanuk_run_context;
-
-/*
- * Key schedule: initialize the key context structure with the provided
- * secret key. The secret key is an array of 1 to 32 bytes.
- */
-void sosemanuk_schedule(sosemanuk_key_context *kc,
-	unsigned char *key, size_t key_len);
-
-/*
- * Cipher initialization: the cipher internal state is initialized, using
- * the provided key context and IV. The IV length is up to 16 bytes. If
- * "iv_len" is 0 (no IV), then the "iv" parameter can be NULL.
- */
-void sosemanuk_init(sosemanuk_run_context *rc,
-	sosemanuk_key_context *kc, unsigned char *iv, size_t iv_len);
-
-/*
- * Cipher operation, as a PRNG: the provided output buffer is filled with
- * pseudo-random bytes as output from the stream cipher.
- */
-void sosemanuk_prng(sosemanuk_run_context *rc,
-	unsigned char *out, size_t out_len);
-
-/*
- * Cipher operation, as a stream cipher: data is read from the "in"
- * buffer, combined by XOR with the stream, and the result is written
- * in the "out" buffer. "in" and "out" must be either equal, or
- * reference distinct buffers (no partial overlap is allowed).
- */
-void sosemanuk_encrypt(sosemanuk_run_context *rc,
-	unsigned char *in, unsigned char *out, size_t data_len);
-
-#endif
-
-#endif