From 25dfb4687ecaa4e3b9e11f678c62eb5cbed75657 Mon Sep 17 00:00:00 2001
From: ZC <dave@onewebway.com>
Date: Sun, 2 Dec 2018 18:49:35 -0500
Subject: [PATCH] x20r algo with saphir panama and radiogatun (#33)

X20r algo - this is x16r extended with the addition of haval, gost, radioatun and panama

+ space/tabs cleanup and algo order and missing vstudio files...
---
 Makefile.am              |    5 +-
 algo/x20r.c              |  258 ++++++++++
 cpu-miner.c              |    8 +
 cpuminer.vcxproj         |    4 +
 cpuminer.vcxproj.filters |   15 +
 miner.h                  |    2 +
 sha3/sph_panama.c        |  334 +++++++++++++
 sha3/sph_panama.h        |  118 +++++
 sha3/sph_radiogatun.c    | 1003 ++++++++++++++++++++++++++++++++++++++
 sha3/sph_radiogatun.h    |  186 +++++++
 util.c                   |    3 +
 11 files changed, 1935 insertions(+), 1 deletion(-)
 create mode 100644 algo/x20r.c
 create mode 100644 sha3/sph_panama.c
 create mode 100644 sha3/sph_panama.h
 create mode 100644 sha3/sph_radiogatun.c
 create mode 100644 sha3/sph_radiogatun.h

diff --git a/Makefile.am b/Makefile.am
index 0d389dd33..7933a2a13 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -32,9 +32,11 @@ cpuminer_SOURCES = \
   sha3/sph_cubehash.c \
   sha3/sph_simd.c \
   sha3/sph_echo.c \
+  sha3/sph_fugue.c \
   sha3/sph_hamsi.c \
   sha3/sph_haval.c \
-  sha3/sph_fugue.c \
+  sha3/sph_panama.c \
+  sha3/sph_radiogatun.c \
   sha3/sph_ripemd.c \
   sha3/sph_sha2.c \
   sha3/sph_sha2big.c \
@@ -107,6 +109,7 @@ cpuminer_SOURCES = \
   algo/x15.c \
   algo/x16r.c \
   algo/x16s.c \
+  algo/x20r.c \
   algo/x17.c \
   algo/xevan.c \
   algo/yescrypt.c \
diff --git a/algo/x20r.c b/algo/x20r.c
new file mode 100644
index 000000000..fc5c10468
--- /dev/null
+++ b/algo/x20r.c
@@ -0,0 +1,258 @@
+#include "miner.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sha3/sph_blake.h>
+#include <sha3/sph_bmw.h>
+#include <sha3/sph_groestl.h>
+#include <sha3/sph_jh.h>
+#include <sha3/sph_keccak.h>
+#include <sha3/sph_skein.h>
+#include <sha3/sph_luffa.h>
+#include <sha3/sph_cubehash.h>
+#include <sha3/sph_shavite.h>
+#include <sha3/sph_simd.h>
+#include <sha3/sph_echo.h>
+#include <sha3/sph_hamsi.h>
+#include <sha3/sph_fugue.h>
+#include <sha3/sph_shabal.h>
+#include <sha3/sph_whirlpool.h>
+#include <sha3/sph_sha2.h>
+#include <sha3/sph_haval.h>
+#include <sha3/sph_radiogatun.h>
+#include <sha3/sph_panama.h>
+#include <sha3/gost_streebog.h>
+
+enum Algo {
+	BLAKE = 0,
+	BMW,
+	GROESTL,
+	JH,
+	KECCAK,
+	SKEIN,
+	LUFFA,
+	CUBEHASH,
+	SHAVITE,
+	SIMD,
+	ECHO,
+	HAMSI,
+	FUGUE,
+	SHABAL,
+	WHIRLPOOL,
+	SHA512,
+	HAVAL,
+	GOST,
+	RADIOGATUN,
+	PANAMA,
+	HASH_FUNC_COUNT
+};
+
+static __thread uint32_t s_ntime = UINT32_MAX;
+static __thread char hashOrder[HASH_FUNC_COUNT + 1] = { 0 };
+
+static void getAlgoString(const uint8_t* prevblock, char *output)
+{
+	char *sptr = output;
+
+	for (int j = 0; j < HASH_FUNC_COUNT; j++) {
+		char b = (19 - j) >> 1; // 16 ascii hex chars, reversed
+		uint8_t algoDigit = (j & 1) ? prevblock[b] & 0xF : prevblock[b] >> 4;
+		if (algoDigit >= 10)
+			sprintf(sptr, "%c", 'A' + (algoDigit - 10));
+		else
+			sprintf(sptr, "%u", (uint32_t) algoDigit);
+		sptr++;
+	}
+	*sptr = '\0';
+}
+
+void x20r_hash(void* output, const void* input)
+{
+	uint32_t _ALIGN(128) hash[64/4];
+
+	sph_blake512_context     ctx_blake;
+	sph_bmw512_context       ctx_bmw;
+	sph_groestl512_context   ctx_groestl;
+	sph_skein512_context     ctx_skein;
+	sph_jh512_context        ctx_jh;
+	sph_keccak512_context    ctx_keccak;
+	sph_luffa512_context     ctx_luffa;
+	sph_cubehash512_context  ctx_cubehash;
+	sph_shavite512_context   ctx_shavite;
+	sph_simd512_context      ctx_simd;
+	sph_echo512_context      ctx_echo;
+	sph_hamsi512_context     ctx_hamsi;
+	sph_fugue512_context     ctx_fugue;
+	sph_shabal512_context    ctx_shabal;
+	sph_whirlpool_context    ctx_whirlpool;
+	sph_sha512_context       ctx_sha512;
+	sph_haval256_5_context   ctx_haval;
+	sph_gost512_context      ctx_gost;
+	sph_radiogatun64_context ctx_radiogatun;
+	sph_panama_context       ctx_panama;
+
+	void *in = (void*) input;
+	int size = 80;
+
+	if (s_ntime == UINT32_MAX) {
+		const uint8_t* in8 = (uint8_t*) input;
+		getAlgoString(&in8[4], hashOrder);
+	}
+
+	for (int i = 0; i < 20; i++)
+	{
+		const char elem = hashOrder[i];
+		const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
+
+		switch (algo) {
+		case BLAKE:
+			sph_blake512_init(&ctx_blake);
+			sph_blake512(&ctx_blake, in, size);
+			sph_blake512_close(&ctx_blake, hash);
+			break;
+		case BMW:
+			sph_bmw512_init(&ctx_bmw);
+			sph_bmw512(&ctx_bmw, in, size);
+			sph_bmw512_close(&ctx_bmw, hash);
+			break;
+		case GROESTL:
+			sph_groestl512_init(&ctx_groestl);
+			sph_groestl512(&ctx_groestl, in, size);
+			sph_groestl512_close(&ctx_groestl, hash);
+			break;
+		case SKEIN:
+			sph_skein512_init(&ctx_skein);
+			sph_skein512(&ctx_skein, in, size);
+			sph_skein512_close(&ctx_skein, hash);
+			break;
+		case JH:
+			sph_jh512_init(&ctx_jh);
+			sph_jh512(&ctx_jh, in, size);
+			sph_jh512_close(&ctx_jh, hash);
+			break;
+		case KECCAK:
+			sph_keccak512_init(&ctx_keccak);
+			sph_keccak512(&ctx_keccak, in, size);
+			sph_keccak512_close(&ctx_keccak, hash);
+			break;
+		case LUFFA:
+			sph_luffa512_init(&ctx_luffa);
+			sph_luffa512(&ctx_luffa, in, size);
+			sph_luffa512_close(&ctx_luffa, hash);
+			break;
+		case CUBEHASH:
+			sph_cubehash512_init(&ctx_cubehash);
+			sph_cubehash512(&ctx_cubehash, in, size);
+			sph_cubehash512_close(&ctx_cubehash, hash);
+			break;
+		case SHAVITE:
+			sph_shavite512_init(&ctx_shavite);
+			sph_shavite512(&ctx_shavite, in, size);
+			sph_shavite512_close(&ctx_shavite, hash);
+			break;
+		case SIMD:
+			sph_simd512_init(&ctx_simd);
+			sph_simd512(&ctx_simd, in, size);
+			sph_simd512_close(&ctx_simd, hash);
+			break;
+		case ECHO:
+			sph_echo512_init(&ctx_echo);
+			sph_echo512(&ctx_echo, in, size);
+			sph_echo512_close(&ctx_echo, hash);
+			break;
+		case HAMSI:
+			sph_hamsi512_init(&ctx_hamsi);
+			sph_hamsi512(&ctx_hamsi, in, size);
+			sph_hamsi512_close(&ctx_hamsi, hash);
+			break;
+		case FUGUE:
+			sph_fugue512_init(&ctx_fugue);
+			sph_fugue512(&ctx_fugue, in, size);
+			sph_fugue512_close(&ctx_fugue, hash);
+			break;
+		case SHABAL:
+			sph_shabal512_init(&ctx_shabal);
+			sph_shabal512(&ctx_shabal, in, size);
+			sph_shabal512_close(&ctx_shabal, hash);
+			break;
+		case WHIRLPOOL:
+			sph_whirlpool_init(&ctx_whirlpool);
+			sph_whirlpool(&ctx_whirlpool, in, size);
+			sph_whirlpool_close(&ctx_whirlpool, hash);
+			break;
+		case SHA512:
+			sph_sha512_init(&ctx_sha512);
+			sph_sha512(&ctx_sha512,(const void*) in, size);
+			sph_sha512_close(&ctx_sha512,(void*) hash);
+			break;
+		case HAVAL:
+			sph_haval256_5_init(&ctx_haval);
+			sph_haval256_5(&ctx_haval, in, size);
+			sph_haval256_5_close(&ctx_haval, hash);
+			break;
+		case GOST:
+			sph_gost512_init(&ctx_gost);
+			sph_gost512(&ctx_gost, in, size);
+			sph_gost512_close(&ctx_gost, hash);
+			break;
+		case RADIOGATUN:
+			sph_radiogatun64_init(&ctx_radiogatun);
+			sph_radiogatun64(&ctx_radiogatun, in, size);
+			sph_radiogatun64_close(&ctx_radiogatun, hash);
+			break;
+		case PANAMA:
+			sph_panama_init(&ctx_panama);
+			sph_panama(&ctx_panama, in, size);
+			sph_panama_close(&ctx_panama, hash);
+			break;
+		}
+		in = (void*) hash;
+		size = 64;
+	}
+	memcpy(output, hash, 32);
+}
+
+int scanhash_x20r(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
+{
+	uint32_t _ALIGN(128) hash32[8];
+	uint32_t _ALIGN(128) endiandata[20];
+	uint32_t *pdata = work->data;
+	uint32_t *ptarget = work->target;
+	const uint32_t Htarg = ptarget[7];
+	const uint32_t first_nonce = pdata[19];
+	uint32_t nonce = first_nonce;
+	volatile uint8_t *restart = &(work_restart[thr_id].restart);
+
+	for (int k=0; k < 19; k++)
+		be32enc(&endiandata[k], pdata[k]);
+
+	if (s_ntime != pdata[17]) {
+		uint32_t ntime = swab32(pdata[17]);
+		getAlgoString((const char*) (&endiandata[1]), hashOrder);
+		s_ntime = ntime;
+		if (opt_debug && !thr_id) applog(LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime);
+	}
+
+	if (opt_benchmark)
+		ptarget[7] = 0x0cff;
+
+	do {
+		be32enc(&endiandata[19], nonce);
+		x20r_hash(hash32, endiandata);
+
+		if (hash32[7] <= Htarg && fulltest(hash32, ptarget)) {
+			work_set_target_ratio(work, hash32);
+			pdata[19] = nonce;
+			*hashes_done = pdata[19] - first_nonce;
+			return 1;
+		}
+		nonce++;
+
+	} while (nonce < max_nonce && !(*restart));
+
+	pdata[19] = nonce;
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/cpu-miner.c b/cpu-miner.c
index 8c538e988..30a3eabb5 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -134,6 +134,7 @@ enum algos {
 	ALGO_X16R,
 	ALGO_X16S,
 	ALGO_X17,         /* X17 */
+	ALGO_X20R,
 	ALGO_XEVAN,
 	ALGO_YESCRYPT,
 	ALGO_ZR5,
@@ -198,6 +199,7 @@ static const char *algo_names[] = {
 	"x16r",
 	"x16s",
 	"x17",
+	"x20r",
 	"xevan",
 	"yescrypt",
 	"zr5",
@@ -359,6 +361,7 @@ Options:\n\
                           x16r         X16R (Raven)\n\
                           x16s         X16S (Pigeon)\n\
                           x17          X17\n\
+                          x20r         X20R\n\
                           xevan        Xevan (BitSend)\n\
                           yescrypt     Yescrypt\n\
                           zr5          ZR5\n\
@@ -1853,6 +1856,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 			case ALGO_XEVAN:
 			case ALGO_X16R:
 			case ALGO_X16S:
+			case ALGO_X20R:
 				work_set_target(work, sctx->job.diff / (256.0 * opt_diff_factor));
 				break;
 			case ALGO_KECCAK:
@@ -2205,6 +2209,7 @@ static void *miner_thread(void *userdata)
 			case ALGO_X16R:
 			case ALGO_X16S:
 			case ALGO_X17:
+			case ALGO_X20R:
 			case ALGO_ZR5:
 				max64 = 0x1ffff;
 				break;
@@ -2404,6 +2409,9 @@ static void *miner_thread(void *userdata)
 		case ALGO_X16R:
 			rc = scanhash_x16r(thr_id, &work, max_nonce, &hashes_done);
 			break;
+		case ALGO_X20R:
+			rc = scanhash_x20r(thr_id, &work, max_nonce, &hashes_done);
+			break;
 		case ALGO_X16S:
 			rc = scanhash_x16s(thr_id, &work, max_nonce, &hashes_done);
 			break;
diff --git a/cpuminer.vcxproj b/cpuminer.vcxproj
index 0a4d4ec71..12005153f 100644
--- a/cpuminer.vcxproj
+++ b/cpuminer.vcxproj
@@ -289,6 +289,8 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="sha3\sph_haval.c" />
+    <ClCompile Include="sha3\sph_panama.c" />
+    <ClCompile Include="sha3\sph_radiogatun.c" />
     <ClCompile Include="sha3\sph_whirlpool.c" />
     <ClCompile Include="sha3\gost_streebog.c" />
     <ClCompile Include="sha3\md_helper.c">
@@ -358,6 +360,8 @@
     <ClInclude Include="sha3\sph_simd.h" />
     <ClInclude Include="sha3\sph_skein.h" />
     <ClInclude Include="sha3\sph_hamsi.h" />
+    <ClInclude Include="sha3\sph_panama.h" />
+    <ClInclude Include="sha3\sph_radiogatun.h" />
     <ClInclude Include="sha3\sph_types.h" />
     <ClInclude Include="sha3\sph_whirlpool.h" />
     <ClInclude Include="yescrypt\sha256_Y.h" />
diff --git a/cpuminer.vcxproj.filters b/cpuminer.vcxproj.filters
index dd46391b7..22d53c034 100644
--- a/cpuminer.vcxproj.filters
+++ b/cpuminer.vcxproj.filters
@@ -46,6 +46,12 @@
     <ClCompile Include="sha3\sph_luffa.c">
       <Filter>sph</Filter>
     </ClCompile>
+    <ClCompile Include="sha3\sph_panama.c">
+      <Filter>sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sha3\sph_radiogatun.c">
+      <Filter>sph</Filter>
+    </ClCompile>
     <ClCompile Include="sha3\sph_ripemd.c">
       <Filter>sph</Filter>
     </ClCompile>
@@ -303,6 +309,9 @@
     <ClCompile Include="algo\x16r.c">
       <Filter>algo</Filter>
     </ClCompile>
+    <ClCompile Include="algo\x20r.c">
+      <Filter>algo</Filter>
+    </ClCompile>
     <ClCompile Include="algo\x16s.c">
       <Filter>algo</Filter>
     </ClCompile>
@@ -395,6 +404,12 @@
     <ClInclude Include="sha3\sph_luffa.h">
       <Filter>sph</Filter>
     </ClInclude>
+    <ClInclude Include="sha3\sph_panama.h">
+      <Filter>sph</Filter>
+    </ClInclude>
+    <ClInclude Include="sha3\sph_radiogatun.h">
+      <Filter>sph</Filter>
+    </ClInclude>
     <ClInclude Include="sha3\sph_sha2.h">
       <Filter>sph</Filter>
     </ClInclude>
diff --git a/miner.h b/miner.h
index 9ecbb88a6..897ec7ba5 100644
--- a/miner.h
+++ b/miner.h
@@ -256,6 +256,7 @@ int scanhash_x15(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *ha
 int scanhash_x16r(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done);
 int scanhash_x16s(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done);
 int scanhash_x17(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done);
+int scanhash_x20r(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done);
 int scanhash_xevan(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done);
 int scanhash_yescrypt(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done);
 int scanhash_zr5(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done);
@@ -550,6 +551,7 @@ void x15hash(void *output, const void *input);
 void x16r_hash(void *output, const void *input);
 void x16s_hash(void *output, const void *input);
 void x17hash(void *output, const void *input);
+void x20r_hash(void *output, const void *input);
 void zr5hash(void *output, const void *input);
 void yescrypthash(void *output, const void *input);
 void zr5hash_pok(void *output, uint32_t *pdata);
diff --git a/sha3/sph_panama.c b/sha3/sph_panama.c
new file mode 100644
index 000000000..f3c27c77a
--- /dev/null
+++ b/sha3/sph_panama.c
@@ -0,0 +1,334 @@
+/* $Id: panama.c 216 2010-06-08 09:46:57Z tp $ */
+/*
+ * PANAMA implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_panama.h"
+
+#define LVAR17(b)  sph_u32 \
+	b ## 0, b ## 1, b ## 2, b ## 3, b ## 4, b ## 5, \
+	b ## 6, b ## 7, b ## 8, b ## 9, b ## 10, b ## 11, \
+	b ## 12, b ## 13, b ## 14, b ## 15, b ## 16;
+
+#define LVARS   \
+	LVAR17(a) \
+	LVAR17(g) \
+	LVAR17(p) \
+	LVAR17(t)
+
+#define M17(macro)   do { \
+		macro( 0,  1,  2,  4); \
+		macro( 1,  2,  3,  5); \
+		macro( 2,  3,  4,  6); \
+		macro( 3,  4,  5,  7); \
+		macro( 4,  5,  6,  8); \
+		macro( 5,  6,  7,  9); \
+		macro( 6,  7,  8, 10); \
+		macro( 7,  8,  9, 11); \
+		macro( 8,  9, 10, 12); \
+		macro( 9, 10, 11, 13); \
+		macro(10, 11, 12, 14); \
+		macro(11, 12, 13, 15); \
+		macro(12, 13, 14, 16); \
+		macro(13, 14, 15,  0); \
+		macro(14, 15, 16,  1); \
+		macro(15, 16,  0,  2); \
+		macro(16,  0,  1,  3); \
+	} while (0)
+
+#define BUPDATE1(n0, n2)   do { \
+		sc->buffer[ptr24][n0] ^= sc->buffer[ptr31][n2]; \
+		sc->buffer[ptr31][n2] ^= INW1(n2); \
+	} while (0)
+
+#define BUPDATE   do { \
+		BUPDATE1(0, 2); \
+		BUPDATE1(1, 3); \
+		BUPDATE1(2, 4); \
+		BUPDATE1(3, 5); \
+		BUPDATE1(4, 6); \
+		BUPDATE1(5, 7); \
+		BUPDATE1(6, 0); \
+		BUPDATE1(7, 1); \
+	} while (0)
+
+#define RSTATE(n0, n1, n2, n4)    (a ## n0 = sc->state[n0])
+
+#define WSTATE(n0, n1, n2, n4)    (sc->state[n0] = a ## n0)
+
+#define GAMMA(n0, n1, n2, n4)   \
+	(g ## n0 = a ## n0 ^ (a ## n1 | SPH_T32(~a ## n2)))
+
+#define PI_ALL   do { \
+		p0  = g0; \
+		p1  = SPH_ROTL32( g7,  1); \
+		p2  = SPH_ROTL32(g14,  3); \
+		p3  = SPH_ROTL32( g4,  6); \
+		p4  = SPH_ROTL32(g11, 10); \
+		p5  = SPH_ROTL32( g1, 15); \
+		p6  = SPH_ROTL32( g8, 21); \
+		p7  = SPH_ROTL32(g15, 28); \
+		p8  = SPH_ROTL32( g5,  4); \
+		p9  = SPH_ROTL32(g12, 13); \
+		p10 = SPH_ROTL32( g2, 23); \
+		p11 = SPH_ROTL32( g9,  2); \
+		p12 = SPH_ROTL32(g16, 14); \
+		p13 = SPH_ROTL32( g6, 27); \
+		p14 = SPH_ROTL32(g13,  9); \
+		p15 = SPH_ROTL32( g3, 24); \
+		p16 = SPH_ROTL32(g10,  8); \
+	} while (0)
+
+#define THETA(n0, n1, n2, n4)   \
+	(t ## n0 = p ## n0 ^ p ## n1 ^ p ## n4)
+
+#define SIGMA_ALL   do { \
+		a0 = t0 ^ 1; \
+		a1 = t1 ^ INW2(0); \
+		a2 = t2 ^ INW2(1); \
+		a3 = t3 ^ INW2(2); \
+		a4 = t4 ^ INW2(3); \
+		a5 = t5 ^ INW2(4); \
+		a6 = t6 ^ INW2(5); \
+		a7 = t7 ^ INW2(6); \
+		a8 = t8 ^ INW2(7); \
+		a9  =  t9 ^ sc->buffer[ptr16][0]; \
+		a10 = t10 ^ sc->buffer[ptr16][1]; \
+		a11 = t11 ^ sc->buffer[ptr16][2]; \
+		a12 = t12 ^ sc->buffer[ptr16][3]; \
+		a13 = t13 ^ sc->buffer[ptr16][4]; \
+		a14 = t14 ^ sc->buffer[ptr16][5]; \
+		a15 = t15 ^ sc->buffer[ptr16][6]; \
+		a16 = t16 ^ sc->buffer[ptr16][7]; \
+	} while (0)
+
+#define PANAMA_STEP   do { \
+		unsigned ptr16, ptr24, ptr31; \
+ \
+		ptr24 = (ptr0 - 8) & 31; \
+		ptr31 = (ptr0 - 1) & 31; \
+		BUPDATE; \
+		M17(GAMMA); \
+		PI_ALL; \
+		M17(THETA); \
+		ptr16 = ptr0 ^ 16; \
+		SIGMA_ALL; \
+		ptr0 = ptr31; \
+	} while (0)
+
+/*
+ * These macros are used to compute
+ */
+#define INC0     1
+#define INC1     2
+#define INC2     3
+#define INC3     4
+#define INC4     5
+#define INC5     6
+#define INC6     7
+#define INC7     8
+
+/*
+ * Push data by blocks of 32 bytes. "pbuf" must be 32-bit aligned. Each
+ * iteration processes 32 data bytes; "num" contains the number of
+ * iterations.
+ */
+static void
+panama_push(sph_panama_context *sc, const unsigned char *pbuf, size_t num)
+{
+	LVARS
+	unsigned ptr0;
+#if SPH_LITTLE_FAST
+#define INW1(i)   sph_dec32le_aligned(pbuf + 4 * (i))
+#else
+	sph_u32 X_var[8];
+#define INW1(i)   X_var[i]
+#endif
+#define INW2(i)   INW1(i)
+
+	M17(RSTATE);
+	ptr0 = sc->buffer_ptr;
+	while (num -- > 0) {
+#if !SPH_LITTLE_FAST
+		int i;
+
+		for (i = 0; i < 8; i ++)
+			X_var[i] = sph_dec32le_aligned(pbuf + 4 * (i));
+#endif
+		PANAMA_STEP;
+		pbuf = (const unsigned char *)pbuf + 32;
+	}
+	M17(WSTATE);
+	sc->buffer_ptr = ptr0;
+
+#undef INW1
+#undef INW2
+}
+
+/*
+ * Perform the "pull" operation repeatedly ("num" times). The hash output
+ * will be extracted from the state afterwards.
+ */
+static void
+panama_pull(sph_panama_context *sc, unsigned num)
+{
+	LVARS
+	unsigned ptr0;
+#define INW1(i)     INW_H1(INC ## i)
+#define INW_H1(i)   INW_H2(i)
+#define INW_H2(i)   a ## i
+#define INW2(i)     sc->buffer[ptr4][i]
+
+	M17(RSTATE);
+	ptr0 = sc->buffer_ptr;
+	while (num -- > 0) {
+		unsigned ptr4;
+
+		ptr4 = (ptr0 + 4) & 31;
+		PANAMA_STEP;
+	}
+	M17(WSTATE);
+
+#undef INW1
+#undef INW_H1
+#undef INW_H2
+#undef INW2
+}
+
+/* see sph_panama.h */
+void
+sph_panama_init(void *cc)
+{
+	sph_panama_context *sc;
+
+	sc = cc;
+	/*
+	 * This is not completely conformant, but "it will work
+	 * everywhere". Initial state consists of zeroes everywhere.
+	 * Conceptually, the sph_u32 type may have padding bits which
+	 * must not be set to 0; but such an architecture remains to
+	 * be seen.
+	 */
+	sc->data_ptr = 0;
+	memset(sc->buffer, 0, sizeof sc->buffer);
+	sc->buffer_ptr = 0;
+	memset(sc->state, 0, sizeof sc->state);
+}
+
+#ifdef SPH_UPTR
+static void
+panama_short(void *cc, const void *data, size_t len)
+#else
+void
+sph_panama(void *cc, const void *data, size_t len)
+#endif
+{
+	sph_panama_context *sc;
+	unsigned current;
+
+	sc = cc;
+	current = sc->data_ptr;
+	while (len > 0) {
+		unsigned clen;
+
+		clen = (sizeof sc->data) - current;
+		if (clen > len)
+			clen = len;
+		memcpy(sc->data + current, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+		current += clen;
+		if (current == sizeof sc->data) {
+			current = 0;
+			panama_push(sc, sc->data, 1);
+		}
+	}
+	sc->data_ptr = current;
+}
+
+#ifdef SPH_UPTR
+/* see sph_panama.h */
+void
+sph_panama(void *cc, const void *data, size_t len)
+{
+	sph_panama_context *sc;
+	unsigned current;
+	size_t rlen;
+
+	if (len < (2 * sizeof sc->data)) {
+		panama_short(cc, data, len);
+		return;
+	}
+	sc = cc;
+	current = sc->data_ptr;
+	if (current > 0) {
+		unsigned t;
+
+		t = (sizeof sc->data) - current;
+		panama_short(sc, data, t);
+		data = (const unsigned char *)data + t;
+		len -= t;
+	}
+#if !SPH_UNALIGNED
+	if (((SPH_UPTR)data & 3) != 0) {
+		panama_short(sc, data, len);
+		return;
+	}
+#endif
+	panama_push(sc, data, len >> 5);
+	rlen = len & 31;
+	if (rlen > 0)
+		memcpy(sc->data,
+			(const unsigned char *)data + len - rlen, rlen);
+	sc->data_ptr = rlen;
+}
+#endif
+
+/* see sph_panama.h */
+void
+sph_panama_close(void *cc, void *dst)
+{
+	sph_panama_context *sc;
+	unsigned current;
+	int i;
+
+	sc = cc;
+	current = sc->data_ptr;
+	sc->data[current ++] = 0x01;
+	memset(sc->data + current, 0, (sizeof sc->data) - current);
+	panama_push(sc, sc->data, 1);
+	panama_pull(sc, 32);
+	for (i = 0; i < 8; i ++)
+		sph_enc32le((unsigned char *)dst + 4 * i, sc->state[i + 9]);
+	sph_panama_init(sc);
+}
diff --git a/sha3/sph_panama.h b/sha3/sph_panama.h
new file mode 100644
index 000000000..e4dc1073a
--- /dev/null
+++ b/sha3/sph_panama.h
@@ -0,0 +1,118 @@
+/* $Id: sph_panama.h 154 2010-04-26 17:00:24Z tp $ */
+/**
+ * PANAMA interface.
+ *
+ * PANAMA has been published in: J. Daemen and C. Clapp, "Fast Hashing
+ * and Stream Encryption with PANAMA", Fast Software Encryption -
+ * FSE'98, LNCS 1372, Springer (1998), pp. 60--74.
+ *
+ * PANAMA is not fully defined with regards to endianness and related
+ * topics. This implementation follows strict little-endian conventions:
+ * <ul>
+ * <li>Each 32-byte input block is split into eight 32-bit words, the
+ * first (leftmost) word being numbered 0.</li>
+ * <li>Each such 32-bit word is decoded from memory in little-endian
+ * convention.</li>
+ * <li>The additional padding bit equal to "1" is added by considering
+ * the least significant bit in a byte to come first; practically, this
+ * means that a single byte of value 0x01 is appended to the (byte-oriented)
+ * message, and then 0 to 31 bytes of value 0x00.</li>
+ * <li>The output consists of eight 32-bit words; the word numbered 0 is
+ * written first (in leftmost position) and it is encoded in little-endian
+ * convention.
+ * </ul>
+ * With these conventions, PANAMA is sometimes known as "PANAMA-LE". The
+ * PANAMA reference implementation uses our conventions for input, but
+ * prescribes no convention for output.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_panama.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_PANAMA_H__
+#define SPH_PANAMA_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for PANAMA.
+ */
+#define SPH_SIZE_panama   256
+
+/**
+ * This structure is a context for PANAMA computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a PANAMA computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running PANAMA computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char data[32];   /* first field, for alignment */
+	unsigned data_ptr;
+
+	sph_u32 buffer[32][8];
+	unsigned buffer_ptr;
+
+	sph_u32 state[17];
+#endif
+} sph_panama_context;
+
+/**
+ * Initialize a PANAMA context. This process performs no memory allocation.
+ *
+ * @param cc   the PANAMA context (pointer to a <code>sph_panama_context</code>)
+ */
+void sph_panama_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the PANAMA context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_panama(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current PANAMA computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the PANAMA context
+ * @param dst   the destination buffer
+ */
+void sph_panama_close(void *cc, void *dst);
+
+#endif
diff --git a/sha3/sph_radiogatun.c b/sha3/sph_radiogatun.c
new file mode 100644
index 000000000..888b028f9
--- /dev/null
+++ b/sha3/sph_radiogatun.c
@@ -0,0 +1,1003 @@
+/* $Id: radiogatun.c 226 2010-06-16 17:28:08Z tp $ */
+/*
+ * RadioGatun implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_radiogatun.h"
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_RADIOGATUN
+#define SPH_SMALL_FOOTPRINT_RADIOGATUN   1
+#endif
+
+/* ======================================================================= */
+/*
+ * The core macros. We want to unroll 13 successive rounds so that the
+ * belt rotation becomes pure routing, solved at compilation time, with
+ * no unnecessary copying. We also wish all state variables to be
+ * independant local variables, so that the C compiler becomes free to
+ * map these on registers at it sees fit. This requires some heavy
+ * preprocessor trickeries, including a full addition macro modulo 13.
+ *
+ * These macros are size-independent. Some macros must be defined before
+ * use:
+ *   WT           evaluates to the type for a word (32-bit or 64-bit)
+ *   T            truncates a value to the proper word size
+ *   ROR(x, n)    right rotation of a word x, with explicit modular
+ *                reduction of the rotation count n by the word size
+ *   INW(i, j)    input word j (0, 1, or 2) of block i (0 to 12)
+ *
+ * For INW, the input buffer is pointed to by "buf" which has type
+ * "const unsigned char *".
+ */
+
+#define MUL19(action)   do { \
+		action(0); \
+		action(1); \
+		action(2); \
+		action(3); \
+		action(4); \
+		action(5); \
+		action(6); \
+		action(7); \
+		action(8); \
+		action(9); \
+		action(10); \
+		action(11); \
+		action(12); \
+		action(13); \
+		action(14); \
+		action(15); \
+		action(16); \
+		action(17); \
+		action(18); \
+	} while (0)
+
+#define DECL19(b)   b ## 0, b ## 1, b ## 2, b ## 3, b ## 4, b ## 5, \
+                    b ## 6, b ## 7, b ## 8, b ## 9, b ## 10, b ## 11, \
+                    b ## 12, b ## 13, b ## 14, b ## 15, b ## 16, \
+                    b ## 17, b ## 18
+
+#define M19_T7(i)    M19_T7_(i)
+#define M19_T7_(i)   M19_T7_ ## i
+#define M19_T7_0     0
+#define M19_T7_1     7
+#define M19_T7_2     14
+#define M19_T7_3     2
+#define M19_T7_4     9
+#define M19_T7_5     16
+#define M19_T7_6     4
+#define M19_T7_7     11
+#define M19_T7_8     18
+#define M19_T7_9     6
+#define M19_T7_10    13
+#define M19_T7_11    1
+#define M19_T7_12    8
+#define M19_T7_13    15
+#define M19_T7_14    3
+#define M19_T7_15    10
+#define M19_T7_16    17
+#define M19_T7_17    5
+#define M19_T7_18    12
+
+#define M19_A1(i)    M19_A1_(i)
+#define M19_A1_(i)   M19_A1_ ## i
+#define M19_A1_0     1
+#define M19_A1_1     2
+#define M19_A1_2     3
+#define M19_A1_3     4
+#define M19_A1_4     5
+#define M19_A1_5     6
+#define M19_A1_6     7
+#define M19_A1_7     8
+#define M19_A1_8     9
+#define M19_A1_9     10
+#define M19_A1_10    11
+#define M19_A1_11    12
+#define M19_A1_12    13
+#define M19_A1_13    14
+#define M19_A1_14    15
+#define M19_A1_15    16
+#define M19_A1_16    17
+#define M19_A1_17    18
+#define M19_A1_18    0
+
+#define M19_A2(i)    M19_A2_(i)
+#define M19_A2_(i)   M19_A2_ ## i
+#define M19_A2_0     2
+#define M19_A2_1     3
+#define M19_A2_2     4
+#define M19_A2_3     5
+#define M19_A2_4     6
+#define M19_A2_5     7
+#define M19_A2_6     8
+#define M19_A2_7     9
+#define M19_A2_8     10
+#define M19_A2_9     11
+#define M19_A2_10    12
+#define M19_A2_11    13
+#define M19_A2_12    14
+#define M19_A2_13    15
+#define M19_A2_14    16
+#define M19_A2_15    17
+#define M19_A2_16    18
+#define M19_A2_17    0
+#define M19_A2_18    1
+
+#define M19_A4(i)    M19_A4_(i)
+#define M19_A4_(i)   M19_A4_ ## i
+#define M19_A4_0     4
+#define M19_A4_1     5
+#define M19_A4_2     6
+#define M19_A4_3     7
+#define M19_A4_4     8
+#define M19_A4_5     9
+#define M19_A4_6     10
+#define M19_A4_7     11
+#define M19_A4_8     12
+#define M19_A4_9     13
+#define M19_A4_10    14
+#define M19_A4_11    15
+#define M19_A4_12    16
+#define M19_A4_13    17
+#define M19_A4_14    18
+#define M19_A4_15    0
+#define M19_A4_16    1
+#define M19_A4_17    2
+#define M19_A4_18    3
+
+#define ACC_a(i)    ACC_a_(i)
+#define ACC_a_(i)   a ## i
+#define ACC_atmp(i)    ACC_atmp_(i)
+#define ACC_atmp_(i)   atmp ## i
+
+#define MILL1(i)   (atmp ## i = a ## i ^ T(ACC_a(M19_A1(i)) \
+                   | ~ACC_a(M19_A2(i))))
+#define MILL2(i)   (a ## i = ROR(ACC_atmp(M19_T7(i)), ((i * (i + 1)) >> 1)))
+#define MILL3(i)   (atmp ## i = a ## i ^ ACC_a(M19_A1(i)) ^ ACC_a(M19_A4(i)))
+#define MILL4(i)   (a ## i = atmp ## i ^ (i == 0))
+
+#define MILL   do { \
+		WT DECL19(atmp); \
+		MUL19(MILL1); \
+		MUL19(MILL2); \
+		MUL19(MILL3); \
+		MUL19(MILL4); \
+	} while (0)
+
+#define DECL13(b)   b ## 0 ## _0, b ## 0 ## _1, b ## 0 ## _2, \
+                    b ## 1 ## _0, b ## 1 ## _1, b ## 1 ## _2, \
+                    b ## 2 ## _0, b ## 2 ## _1, b ## 2 ## _2, \
+                    b ## 3 ## _0, b ## 3 ## _1, b ## 3 ## _2, \
+                    b ## 4 ## _0, b ## 4 ## _1, b ## 4 ## _2, \
+                    b ## 5 ## _0, b ## 5 ## _1, b ## 5 ## _2, \
+                    b ## 6 ## _0, b ## 6 ## _1, b ## 6 ## _2, \
+                    b ## 7 ## _0, b ## 7 ## _1, b ## 7 ## _2, \
+                    b ## 8 ## _0, b ## 8 ## _1, b ## 8 ## _2, \
+                    b ## 9 ## _0, b ## 9 ## _1, b ## 9 ## _2, \
+                    b ## 10 ## _0, b ## 10 ## _1, b ## 10 ## _2, \
+                    b ## 11 ## _0, b ## 11 ## _1, b ## 11 ## _2, \
+                    b ## 12 ## _0, b ## 12 ## _1, b ## 12 ## _2
+
+#define M13_A(i, j)    M13_A_(i, j)
+#define M13_A_(i, j)   M13_A_ ## i ## _ ## j
+#define M13_A_0_0      0
+#define M13_A_0_1      1
+#define M13_A_0_2      2
+#define M13_A_0_3      3
+#define M13_A_0_4      4
+#define M13_A_0_5      5
+#define M13_A_0_6      6
+#define M13_A_0_7      7
+#define M13_A_0_8      8
+#define M13_A_0_9      9
+#define M13_A_0_10     10
+#define M13_A_0_11     11
+#define M13_A_0_12     12
+#define M13_A_1_0      1
+#define M13_A_1_1      2
+#define M13_A_1_2      3
+#define M13_A_1_3      4
+#define M13_A_1_4      5
+#define M13_A_1_5      6
+#define M13_A_1_6      7
+#define M13_A_1_7      8
+#define M13_A_1_8      9
+#define M13_A_1_9      10
+#define M13_A_1_10     11
+#define M13_A_1_11     12
+#define M13_A_1_12     0
+#define M13_A_2_0      2
+#define M13_A_2_1      3
+#define M13_A_2_2      4
+#define M13_A_2_3      5
+#define M13_A_2_4      6
+#define M13_A_2_5      7
+#define M13_A_2_6      8
+#define M13_A_2_7      9
+#define M13_A_2_8      10
+#define M13_A_2_9      11
+#define M13_A_2_10     12
+#define M13_A_2_11     0
+#define M13_A_2_12     1
+#define M13_A_3_0      3
+#define M13_A_3_1      4
+#define M13_A_3_2      5
+#define M13_A_3_3      6
+#define M13_A_3_4      7
+#define M13_A_3_5      8
+#define M13_A_3_6      9
+#define M13_A_3_7      10
+#define M13_A_3_8      11
+#define M13_A_3_9      12
+#define M13_A_3_10     0
+#define M13_A_3_11     1
+#define M13_A_3_12     2
+#define M13_A_4_0      4
+#define M13_A_4_1      5
+#define M13_A_4_2      6
+#define M13_A_4_3      7
+#define M13_A_4_4      8
+#define M13_A_4_5      9
+#define M13_A_4_6      10
+#define M13_A_4_7      11
+#define M13_A_4_8      12
+#define M13_A_4_9      0
+#define M13_A_4_10     1
+#define M13_A_4_11     2
+#define M13_A_4_12     3
+#define M13_A_5_0      5
+#define M13_A_5_1      6
+#define M13_A_5_2      7
+#define M13_A_5_3      8
+#define M13_A_5_4      9
+#define M13_A_5_5      10
+#define M13_A_5_6      11
+#define M13_A_5_7      12
+#define M13_A_5_8      0
+#define M13_A_5_9      1
+#define M13_A_5_10     2
+#define M13_A_5_11     3
+#define M13_A_5_12     4
+#define M13_A_6_0      6
+#define M13_A_6_1      7
+#define M13_A_6_2      8
+#define M13_A_6_3      9
+#define M13_A_6_4      10
+#define M13_A_6_5      11
+#define M13_A_6_6      12
+#define M13_A_6_7      0
+#define M13_A_6_8      1
+#define M13_A_6_9      2
+#define M13_A_6_10     3
+#define M13_A_6_11     4
+#define M13_A_6_12     5
+#define M13_A_7_0      7
+#define M13_A_7_1      8
+#define M13_A_7_2      9
+#define M13_A_7_3      10
+#define M13_A_7_4      11
+#define M13_A_7_5      12
+#define M13_A_7_6      0
+#define M13_A_7_7      1
+#define M13_A_7_8      2
+#define M13_A_7_9      3
+#define M13_A_7_10     4
+#define M13_A_7_11     5
+#define M13_A_7_12     6
+#define M13_A_8_0      8
+#define M13_A_8_1      9
+#define M13_A_8_2      10
+#define M13_A_8_3      11
+#define M13_A_8_4      12
+#define M13_A_8_5      0
+#define M13_A_8_6      1
+#define M13_A_8_7      2
+#define M13_A_8_8      3
+#define M13_A_8_9      4
+#define M13_A_8_10     5
+#define M13_A_8_11     6
+#define M13_A_8_12     7
+#define M13_A_9_0      9
+#define M13_A_9_1      10
+#define M13_A_9_2      11
+#define M13_A_9_3      12
+#define M13_A_9_4      0
+#define M13_A_9_5      1
+#define M13_A_9_6      2
+#define M13_A_9_7      3
+#define M13_A_9_8      4
+#define M13_A_9_9      5
+#define M13_A_9_10     6
+#define M13_A_9_11     7
+#define M13_A_9_12     8
+#define M13_A_10_0     10
+#define M13_A_10_1     11
+#define M13_A_10_2     12
+#define M13_A_10_3     0
+#define M13_A_10_4     1
+#define M13_A_10_5     2
+#define M13_A_10_6     3
+#define M13_A_10_7     4
+#define M13_A_10_8     5
+#define M13_A_10_9     6
+#define M13_A_10_10    7
+#define M13_A_10_11    8
+#define M13_A_10_12    9
+#define M13_A_11_0     11
+#define M13_A_11_1     12
+#define M13_A_11_2     0
+#define M13_A_11_3     1
+#define M13_A_11_4     2
+#define M13_A_11_5     3
+#define M13_A_11_6     4
+#define M13_A_11_7     5
+#define M13_A_11_8     6
+#define M13_A_11_9     7
+#define M13_A_11_10    8
+#define M13_A_11_11    9
+#define M13_A_11_12    10
+#define M13_A_12_0     12
+#define M13_A_12_1     0
+#define M13_A_12_2     1
+#define M13_A_12_3     2
+#define M13_A_12_4     3
+#define M13_A_12_5     4
+#define M13_A_12_6     5
+#define M13_A_12_7     6
+#define M13_A_12_8     7
+#define M13_A_12_9     8
+#define M13_A_12_10    9
+#define M13_A_12_11    10
+#define M13_A_12_12    11
+
+#define M13_N(i)    M13_N_(i)
+#define M13_N_(i)   M13_N_ ## i
+#define M13_N_0     12
+#define M13_N_1     11
+#define M13_N_2     10
+#define M13_N_3     9
+#define M13_N_4     8
+#define M13_N_5     7
+#define M13_N_6     6
+#define M13_N_7     5
+#define M13_N_8     4
+#define M13_N_9     3
+#define M13_N_10    2
+#define M13_N_11    1
+#define M13_N_12    0
+
+#define ACC_b(i, k)    ACC_b_(i, k)
+#define ACC_b_(i, k)   b ## i ## _ ## k
+
+#define ROUND_ELT(k, s)   do { \
+		if ((bj += 3) == 39) \
+			bj = 0; \
+		sc->b[bj + s] ^= a ## k; \
+	} while (0)
+
+#define ROUND_SF(j)   do { \
+		size_t bj = (j) * 3; \
+		ROUND_ELT(1, 0); \
+		ROUND_ELT(2, 1); \
+		ROUND_ELT(3, 2); \
+		ROUND_ELT(4, 0); \
+		ROUND_ELT(5, 1); \
+		ROUND_ELT(6, 2); \
+		ROUND_ELT(7, 0); \
+		ROUND_ELT(8, 1); \
+		ROUND_ELT(9, 2); \
+		ROUND_ELT(10, 0); \
+		ROUND_ELT(11, 1); \
+		ROUND_ELT(12, 2); \
+		MILL; \
+		bj = (j) * 3; \
+		a ## 13 ^= sc->b[bj + 0]; \
+		a ## 14 ^= sc->b[bj + 1]; \
+		a ## 15 ^= sc->b[bj + 2]; \
+	} while (0)
+
+#define INPUT_SF(j, p0, p1, p2)   do { \
+		size_t bj = ((j) + 1) * 3; \
+		if (bj == 39) \
+			bj = 0; \
+		sc->b[bj + 0] ^= (p0); \
+		sc->b[bj + 1] ^= (p1); \
+		sc->b[bj + 2] ^= (p2); \
+		a16 ^= (p0); \
+		a17 ^= (p1); \
+		a18 ^= (p2); \
+	} while (0)
+
+
+#if SPH_SMALL_FOOTPRINT_RADIOGATUN
+
+#define ROUND   ROUND_SF
+#define INPUT   INPUT_SF
+
+#else
+
+/*
+ * Round function R, on base j. The value j is such that B[0] is actually
+ * b[j] after the initial rotation. On the 13-round macro, j has the
+ * successive values 12, 11, 10... 1, 0.
+ */
+#define ROUND(j)   do { \
+		ACC_b(M13_A(1, j), 0) ^= a ## 1; \
+		ACC_b(M13_A(2, j), 1) ^= a ## 2; \
+		ACC_b(M13_A(3, j), 2) ^= a ## 3; \
+		ACC_b(M13_A(4, j), 0) ^= a ## 4; \
+		ACC_b(M13_A(5, j), 1) ^= a ## 5; \
+		ACC_b(M13_A(6, j), 2) ^= a ## 6; \
+		ACC_b(M13_A(7, j), 0) ^= a ## 7; \
+		ACC_b(M13_A(8, j), 1) ^= a ## 8; \
+		ACC_b(M13_A(9, j), 2) ^= a ## 9; \
+		ACC_b(M13_A(10, j), 0) ^= a ## 10; \
+		ACC_b(M13_A(11, j), 1) ^= a ## 11; \
+		ACC_b(M13_A(12, j), 2) ^= a ## 12; \
+		MILL; \
+		a ## 13 ^= ACC_b(j, 0); \
+		a ## 14 ^= ACC_b(j, 1); \
+		a ## 15 ^= ACC_b(j, 2); \
+	} while (0)
+
+#define INPUT(j, p0, p1, p2)   do { \
+		ACC_b(M13_A(1, j), 0) ^= (p0); \
+		ACC_b(M13_A(1, j), 1) ^= (p1); \
+		ACC_b(M13_A(1, j), 2) ^= (p2); \
+		a16 ^= (p0); \
+		a17 ^= (p1); \
+		a18 ^= (p2); \
+	} while (0)
+
+#endif
+
+#define MUL13(action)   do { \
+		action(0); \
+		action(1); \
+		action(2); \
+		action(3); \
+		action(4); \
+		action(5); \
+		action(6); \
+		action(7); \
+		action(8); \
+		action(9); \
+		action(10); \
+		action(11); \
+		action(12); \
+	} while (0)
+
+#define MILL_READ_ELT(i)   do { \
+		a ## i = sc->a[i]; \
+	} while (0)
+
+#define MILL_WRITE_ELT(i)   do { \
+		sc->a[i] = a ## i; \
+	} while (0)
+
+#define STATE_READ_SF   do { \
+		MUL19(MILL_READ_ELT); \
+	} while (0)
+
+#define STATE_WRITE_SF   do { \
+		MUL19(MILL_WRITE_ELT); \
+	} while (0)
+
+#define PUSH13_SF   do { \
+		WT DECL19(a); \
+		const unsigned char *buf; \
+ \
+		buf = data; \
+		STATE_READ_SF; \
+		while (len >= sizeof sc->data) { \
+			size_t mk; \
+			for (mk = 13; mk > 0; mk --) { \
+				WT p0 = INW(0, 0); \
+				WT p1 = INW(0, 1); \
+				WT p2 = INW(0, 2); \
+				INPUT_SF(mk - 1, p0, p1, p2); \
+				ROUND_SF(mk - 1); \
+				buf += (sizeof sc->data) / 13; \
+				len -= (sizeof sc->data) / 13; \
+			} \
+		} \
+		STATE_WRITE_SF; \
+		return len; \
+	} while (0)
+
+#if SPH_SMALL_FOOTPRINT_RADIOGATUN
+
+#define STATE_READ    STATE_READ_SF
+#define STATE_WRITE   STATE_WRITE_SF
+#define PUSH13        PUSH13_SF
+
+#else
+
+#define BELT_READ_ELT(i)   do { \
+		b ## i ## _0 = sc->b[3 * i + 0]; \
+		b ## i ## _1 = sc->b[3 * i + 1]; \
+		b ## i ## _2 = sc->b[3 * i + 2]; \
+	} while (0)
+
+#define BELT_WRITE_ELT(i)   do { \
+		sc->b[3 * i + 0] = b ## i ## _0; \
+		sc->b[3 * i + 1] = b ## i ## _1; \
+		sc->b[3 * i + 2] = b ## i ## _2; \
+	} while (0)
+
+#define STATE_READ   do { \
+		MUL13(BELT_READ_ELT); \
+		MUL19(MILL_READ_ELT); \
+	} while (0)
+
+#define STATE_WRITE   do { \
+		MUL13(BELT_WRITE_ELT); \
+		MUL19(MILL_WRITE_ELT); \
+	} while (0)
+
+/*
+ * Input data by chunks of 13*3 blocks. This is the body of the
+ * radiogatun32_push13() and radiogatun64_push13() functions.
+ */
+#define PUSH13   do { \
+		WT DECL19(a), DECL13(b); \
+		const unsigned char *buf; \
+ \
+		buf = data; \
+		STATE_READ; \
+		while (len >= sizeof sc->data) { \
+			WT p0, p1, p2; \
+			MUL13(PUSH13_ELT); \
+			buf += sizeof sc->data; \
+			len -= sizeof sc->data; \
+		} \
+		STATE_WRITE; \
+		return len; \
+	} while (0)
+
+#define PUSH13_ELT(k)   do { \
+		p0 = INW(k, 0); \
+		p1 = INW(k, 1); \
+		p2 = INW(k, 2); \
+		INPUT(M13_N(k), p0, p1, p2); \
+		ROUND(M13_N(k)); \
+	} while (0)
+
+#endif
+
+#define BLANK13_SF   do { \
+		size_t mk = 13; \
+		while (mk -- > 0) \
+			ROUND_SF(mk); \
+	} while (0)
+
+#define BLANK1_SF   do { \
+		WT tmp0, tmp1, tmp2; \
+		ROUND_SF(12); \
+		tmp0 = sc->b[36]; \
+		tmp1 = sc->b[37]; \
+		tmp2 = sc->b[38]; \
+		memmove(sc->b + 3, sc->b, 36 * sizeof sc->b[0]); \
+		sc->b[0] = tmp0; \
+		sc->b[1] = tmp1; \
+		sc->b[2] = tmp2; \
+	} while (0)
+
+#if SPH_SMALL_FOOTPRINT_RADIOGATUN
+
+#define BLANK13   BLANK13_SF
+#define BLANK1    BLANK1_SF
+
+#else
+
+/*
+ * Run 13 blank rounds. This macro expects the "a" and "b" state variables
+ * to be alread declared.
+ */
+#define BLANK13   MUL13(BLANK13_ELT)
+
+#define BLANK13_ELT(k)   ROUND(M13_N(k))
+
+#define MUL12(action)   do { \
+		action(0); \
+		action(1); \
+		action(2); \
+		action(3); \
+		action(4); \
+		action(5); \
+		action(6); \
+		action(7); \
+		action(8); \
+		action(9); \
+		action(10); \
+		action(11); \
+	} while (0)
+
+/*
+ * Run a single blank round, and physically rotate the belt. This is used
+ * for the last blank rounds, and the output rounds. This macro expects the
+ * "a" abd "b" state variables to be already declared.
+ */
+#define BLANK1   do { \
+		WT tmp0, tmp1, tmp2; \
+		ROUND(12); \
+		tmp0 = b0_0; \
+		tmp1 = b0_1; \
+		tmp2 = b0_2; \
+		MUL12(BLANK1_ELT); \
+		b1_0 = tmp0; \
+		b1_1 = tmp1; \
+		b1_2 = tmp2; \
+	} while (0)
+
+#define BLANK1_ELT(i)   do { \
+		ACC_b(M13_A(M13_N(i), 1), 0) = ACC_b(M13_N(i), 0); \
+		ACC_b(M13_A(M13_N(i), 1), 1) = ACC_b(M13_N(i), 1); \
+		ACC_b(M13_A(M13_N(i), 1), 2) = ACC_b(M13_N(i), 2); \
+	} while (0)
+
+#endif
+
+#define NO_TOKEN
+
+/*
+ * Perform padding, then blank rounds, then output some words. This is
+ * the body of sph_radiogatun32_close() and sph_radiogatun64_close().
+ */
+#define CLOSE_SF(width)   CLOSE_GEN(width, \
+                          NO_TOKEN, STATE_READ_SF, BLANK1_SF, BLANK13_SF)
+
+#if SPH_SMALL_FOOTPRINT_RADIOGATUN
+#define CLOSE          CLOSE_SF
+#else
+#define CLOSE(width)   CLOSE_GEN(width, \
+                       WT DECL13(b);, STATE_READ, BLANK1, BLANK13)
+#endif
+
+#define CLOSE_GEN(width, WTb13, state_read, blank1, blank13)   do { \
+		unsigned ptr, num; \
+		unsigned char *out; \
+		WT DECL19(a); \
+		WTb13 \
+ \
+		ptr = sc->data_ptr; \
+		sc->data[ptr ++] = 0x01; \
+		memset(sc->data + ptr, 0, (sizeof sc->data) - ptr); \
+		radiogatun ## width ## _push13(sc, sc->data, sizeof sc->data); \
+ \
+		num = 17; \
+		for (;;) { \
+			ptr += 3 * (width >> 3); \
+			if (ptr > sizeof sc->data) \
+				break; \
+			num --; \
+		} \
+ \
+		state_read; \
+		if (num >= 13) { \
+			blank13; \
+			num -= 13; \
+		} \
+		while (num -- > 0) \
+			blank1; \
+ \
+		num = 0; \
+		out = dst; \
+		for (;;) { \
+			OUTW(out, a1); \
+			out += width >> 3; \
+			OUTW(out, a2); \
+			out += width >> 3; \
+			num += 2 * (width >> 3); \
+			if (num >= 32) \
+				break; \
+			blank1; \
+		} \
+		INIT; \
+	} while (0)
+
+/*
+ * Initialize context structure.
+ */
+#if SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN
+
+#define INIT   do { \
+		memset(sc->a, 0, sizeof sc->a); \
+		memset(sc->b, 0, sizeof sc->b); \
+		sc->data_ptr = 0; \
+	} while (0)
+
+#else
+
+#define INIT   do { \
+		size_t u; \
+		for (u = 0; u < 19; u ++) \
+			sc->a[u] = 0; \
+		for (u = 0; u < 39; u ++) \
+			sc->b[u] = 0; \
+		sc->data_ptr = 0; \
+	} while (0)
+
+#endif
+
+/* ======================================================================= */
+/*
+ * RadioGatun[32].
+ */
+
+#if !SPH_NO_RG32
+
+#undef WT
+#define WT           sph_u32
+#undef T
+#define T            SPH_T32
+#undef ROR
+#define ROR(x, n)    SPH_T32(((x) << ((32 - (n)) & 31)) | ((x) >> ((n) & 31)))
+#undef INW
+#define INW(i, j)    sph_dec32le_aligned(buf + (4 * (3 * (i) + (j))))
+#undef OUTW
+#define OUTW(b, v)   sph_enc32le(b, v)
+
+/*
+ * Insert data by big chunks of 13*12 = 156 bytes. Returned value is the
+ * number of remaining bytes (between 0 and 155). This method assumes that
+ * the input data is suitably aligned.
+ */
+static size_t
+radiogatun32_push13(sph_radiogatun32_context *sc, const void *data, size_t len)
+{
+	PUSH13;
+}
+
+/* see sph_radiogatun.h */
+void
+sph_radiogatun32_init(void *cc)
+{
+	sph_radiogatun32_context *sc;
+
+	sc = cc;
+	INIT;
+}
+
+#ifdef SPH_UPTR
+static void
+radiogatun32_short(void *cc, const void *data, size_t len)
+#else
+/* see sph_radiogatun.h */
+void
+sph_radiogatun32(void *cc, const void *data, size_t len)
+#endif
+{
+	sph_radiogatun32_context *sc;
+	unsigned ptr;
+
+	sc = cc;
+	ptr = sc->data_ptr;
+	while (len > 0) {
+		size_t clen;
+
+		clen = (sizeof sc->data) - ptr;
+		if (clen > len)
+			clen = len;
+		memcpy(sc->data + ptr, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+		ptr += clen;
+		if (ptr == sizeof sc->data) {
+			radiogatun32_push13(sc, sc->data, sizeof sc->data);
+			ptr = 0;
+		}
+	}
+	sc->data_ptr = ptr;
+}
+
+#ifdef SPH_UPTR
+/* see sph_radiogatun.h */
+void
+sph_radiogatun32(void *cc, const void *data, size_t len)
+{
+	sph_radiogatun32_context *sc;
+	unsigned ptr;
+	size_t rlen;
+
+	if (len < (2 * sizeof sc->data)) {
+		radiogatun32_short(cc, data, len);
+		return;
+	}
+	sc = cc;
+	ptr = sc->data_ptr;
+	if (ptr > 0) {
+		unsigned t;
+
+		t = (sizeof sc->data) - ptr;
+		radiogatun32_short(sc, data, t);
+		data = (const unsigned char *)data + t;
+		len -= t;
+	}
+#if !SPH_UNALIGNED
+	if (((SPH_UPTR)data & 3) != 0) {
+		radiogatun32_short(sc, data, len);
+		return;
+	}
+#endif
+	rlen = radiogatun32_push13(sc, data, len);
+	memcpy(sc->data, (const unsigned char *)data + len - rlen, rlen);
+	sc->data_ptr = rlen;
+}
+#endif
+
+/* see sph_radiogatun.h */
+void
+sph_radiogatun32_close(void *cc, void *dst)
+{
+	sph_radiogatun32_context *sc;
+
+	sc = cc;
+	CLOSE(32);
+}
+
+#endif
+
+/* ======================================================================= */
+/*
+ * RadioGatun[64]. Compiled only if a 64-bit or more type is available.
+ */
+
+#if SPH_64
+
+#if !SPH_NO_RG64
+
+#undef WT
+#define WT           sph_u64
+#undef T
+#define T            SPH_T64
+#undef ROR
+#define ROR(x, n)    SPH_T64(((x) << ((64 - (n)) & 63)) | ((x) >> ((n) & 63)))
+#undef INW
+#define INW(i, j)    sph_dec64le_aligned(buf + (8 * (3 * (i) + (j))))
+#undef OUTW
+#define OUTW(b, v)   sph_enc64le(b, v)
+
+/*
+ * On 32-bit x86, register pressure is such that using the small
+ * footprint version is a net gain (x2 speed), because that variant
+ * uses fewer local variables.
+ */
+#if SPH_I386_MSVC || SPH_I386_GCC || defined __i386__
+#undef PUSH13
+#define PUSH13   PUSH13_SF
+#undef CLOSE
+#define CLOSE    CLOSE_SF
+#endif
+
+/*
+ * Insert data by big chunks of 13*24 = 312 bytes. Returned value is the
+ * number of remaining bytes (between 0 and 311). This method assumes that
+ * the input data is suitably aligned.
+ */
+static size_t
+radiogatun64_push13(sph_radiogatun64_context *sc, const void *data, size_t len)
+{
+	PUSH13;
+}
+
+/* see sph_radiogatun.h */
+void
+sph_radiogatun64_init(void *cc)
+{
+	sph_radiogatun64_context *sc;
+
+	sc = cc;
+	INIT;
+}
+
+#ifdef SPH_UPTR
+static void
+radiogatun64_short(void *cc, const void *data, size_t len)
+#else
+/* see sph_radiogatun.h */
+void
+sph_radiogatun64(void *cc, const void *data, size_t len)
+#endif
+{
+	sph_radiogatun64_context *sc;
+	unsigned ptr;
+
+	sc = cc;
+	ptr = sc->data_ptr;
+	while (len > 0) {
+		size_t clen;
+
+		clen = (sizeof sc->data) - ptr;
+		if (clen > len)
+			clen = len;
+		memcpy(sc->data + ptr, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+		ptr += clen;
+		if (ptr == sizeof sc->data) {
+			radiogatun64_push13(sc, sc->data, sizeof sc->data);
+			ptr = 0;
+		}
+	}
+	sc->data_ptr = ptr;
+}
+
+#ifdef SPH_UPTR
+/* see sph_radiogatun.h */
+void
+sph_radiogatun64(void *cc, const void *data, size_t len)
+{
+	sph_radiogatun64_context *sc;
+	unsigned ptr;
+	size_t rlen;
+
+	if (len < (2 * sizeof sc->data)) {
+		radiogatun64_short(cc, data, len);
+		return;
+	}
+	sc = cc;
+	ptr = sc->data_ptr;
+	if (ptr > 0) {
+		unsigned t;
+
+		t = (sizeof sc->data) - ptr;
+		radiogatun64_short(sc, data, t);
+		data = (const unsigned char *)data + t;
+		len -= t;
+	}
+#if !SPH_UNALIGNED
+	if (((SPH_UPTR)data & 7) != 0) {
+		radiogatun64_short(sc, data, len);
+		return;
+	}
+#endif
+	rlen = radiogatun64_push13(sc, data, len);
+	memcpy(sc->data, (const unsigned char *)data + len - rlen, rlen);
+	sc->data_ptr = rlen;
+}
+#endif
+
+/* see sph_radiogatun.h */
+void
+sph_radiogatun64_close(void *cc, void *dst)
+{
+	sph_radiogatun64_context *sc;
+
+	sc = cc;
+	CLOSE(64);
+}
+
+#endif
+
+#endif
diff --git a/sha3/sph_radiogatun.h b/sha3/sph_radiogatun.h
new file mode 100644
index 000000000..763839771
--- /dev/null
+++ b/sha3/sph_radiogatun.h
@@ -0,0 +1,186 @@
+/* $Id: sph_radiogatun.h 226 2010-06-16 17:28:08Z tp $ */
+/**
+ * RadioGatun interface.
+ *
+ * RadioGatun has been published in: G. Bertoni, J. Daemen, M. Peeters
+ * and G. Van Assche, "RadioGatun, a belt-and-mill hash function",
+ * presented at the Second Cryptographic Hash Workshop, Santa Barbara,
+ * August 24-25, 2006. The main Web site, containing that article, the
+ * reference code and some test vectors, appears to be currently located
+ * at the following URL: http://radiogatun.noekeon.org/
+ *
+ * The presentation article does not specify endianness or padding. The
+ * reference code uses the following conventions, which we also apply
+ * here:
+ * <ul>
+ * <li>The input message is an integral number of sequences of three
+ * words. Each word is either a 32-bit of 64-bit word (depending on
+ * the version of RadioGatun).</li>
+ * <li>Input bytes are decoded into words using little-endian
+ * convention.</li>
+ * <li>Padding consists of a single bit of value 1, using little-endian
+ * convention within bytes (i.e. for a byte-oriented input, a single
+ * byte of value 0x01 is appended), then enough bits of value 0 to finish
+ * the current block.</li>
+ * <li>Output consists of 256 bits. Successive output words are encoded
+ * with little-endian convention.</li>
+ * </ul>
+ * These conventions are very close to those we use for PANAMA, which is
+ * a close ancestor or RadioGatun.
+ *
+ * RadioGatun is actually a family of functions, depending on some
+ * internal parameters. We implement here two functions, with a "belt
+ * length" of 13, a "belt width" of 3, and a "mill length" of 19. The
+ * RadioGatun[32] version uses 32-bit words, while the RadioGatun[64]
+ * variant uses 64-bit words.
+ *
+ * Strictly speaking, the name "RadioGatun" should use an acute accent
+ * on the "u", which we omitted here to keep strict ASCII-compatibility
+ * of this file.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_radiogatun.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_RADIOGATUN_H__
+#define SPH_RADIOGATUN_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for RadioGatun[32].
+ */
+#define SPH_SIZE_radiogatun32   256
+
+/**
+ * This structure is a context for RadioGatun[32] computations: it
+ * contains intermediate values and some data from the last entered
+ * block. Once a RadioGatun[32] computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running RadioGatun[32]
+ * computation can be cloned by copying the context (e.g. with a
+ * simple <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char data[156];   /* first field, for alignment */
+	unsigned data_ptr;
+	sph_u32 a[19], b[39];
+#endif
+} sph_radiogatun32_context;
+
+/**
+ * Initialize a RadioGatun[32] context. This process performs no
+ * memory allocation.
+ *
+ * @param cc   the RadioGatun[32] context (pointer to a
+ *             <code>sph_radiogatun32_context</code>)
+ */
+void sph_radiogatun32_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the RadioGatun[32] context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_radiogatun32(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current RadioGatun[32] computation and output the
+ * result into the provided buffer. The destination buffer must be wide
+ * enough to accomodate the result (32 bytes). The context is
+ * automatically reinitialized.
+ *
+ * @param cc    the RadioGatun[32] context
+ * @param dst   the destination buffer
+ */
+void sph_radiogatun32_close(void *cc, void *dst);
+
+#if SPH_64
+
+/**
+ * Output size (in bits) for RadioGatun[64].
+ */
+#define SPH_SIZE_radiogatun64   256
+
+/**
+ * This structure is a context for RadioGatun[64] computations: it
+ * contains intermediate values and some data from the last entered
+ * block. Once a RadioGatun[64] computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running RadioGatun[64]
+ * computation can be cloned by copying the context (e.g. with a
+ * simple <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char data[312];   /* first field, for alignment */
+	unsigned data_ptr;
+	sph_u64 a[19], b[39];
+#endif
+} sph_radiogatun64_context;
+
+/**
+ * Initialize a RadioGatun[64] context. This process performs no
+ * memory allocation.
+ *
+ * @param cc   the RadioGatun[64] context (pointer to a
+ *             <code>sph_radiogatun64_context</code>)
+ */
+void sph_radiogatun64_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the RadioGatun[64] context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_radiogatun64(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current RadioGatun[64] computation and output the
+ * result into the provided buffer. The destination buffer must be wide
+ * enough to accomodate the result (32 bytes). The context is
+ * automatically reinitialized.
+ *
+ * @param cc    the RadioGatun[64] context
+ * @param dst   the destination buffer
+ */
+void sph_radiogatun64_close(void *cc, void *dst);
+
+#endif
+
+#endif
\ No newline at end of file
diff --git a/util.c b/util.c
index e28f2c9bd..0bdbf7543 100644
--- a/util.c
+++ b/util.c
@@ -2496,6 +2496,9 @@ void print_hash_tests(void)
 	x17hash(&hash[0], &buf[0]);
 	printpfx("x17", hash);
 
+	x20r_hash(&hash[0], &buf[0]);
+	printpfx("x20r", hash);
+
 	yescrypthash(&hash[0], &buf[0]);
 	printpfx("yescrypt", hash);