From bc2a0ed23bd7db8ed9849e71fc3e36abdba7b99b Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Thu, 23 Jan 2025 13:32:31 +0100
Subject: [PATCH 01/11] Add HS1-SIV

---
 Cargo.lock             |   8 +
 Cargo.toml             |   1 +
 benches/Cargo.toml     |   6 +
 benches/src/hs1-siv.rs |  66 +++++
 hs1-siv/Cargo.toml     |  24 ++
 hs1-siv/LICENSE-APACHE | 201 ++++++++++++++
 hs1-siv/LICENSE-MIT    |  25 ++
 hs1-siv/README.md      |  32 +++
 hs1-siv/ref/.gitignore |   1 +
 hs1-siv/ref/encrypt.c  | 541 +++++++++++++++++++++++++++++++++++++
 hs1-siv/ref/main.c     | 124 +++++++++
 hs1-siv/ref/run.sh     |   1 +
 hs1-siv/src/hash.rs    | 191 +++++++++++++
 hs1-siv/src/lib.rs     | 599 +++++++++++++++++++++++++++++++++++++++++
 14 files changed, 1820 insertions(+)
 create mode 100644 benches/src/hs1-siv.rs
 create mode 100644 hs1-siv/Cargo.toml
 create mode 100644 hs1-siv/LICENSE-APACHE
 create mode 100644 hs1-siv/LICENSE-MIT
 create mode 100644 hs1-siv/README.md
 create mode 100644 hs1-siv/ref/.gitignore
 create mode 100644 hs1-siv/ref/encrypt.c
 create mode 100644 hs1-siv/ref/main.c
 create mode 100755 hs1-siv/ref/run.sh
 create mode 100644 hs1-siv/src/hash.rs
 create mode 100644 hs1-siv/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index d824e9e3..ed0842c3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -334,6 +334,14 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46"
 
+[[package]]
+name = "hs1-siv"
+version = "0.1.1"
+dependencies = [
+ "aead",
+ "chacha20",
+]
+
 [[package]]
 name = "hybrid-array"
 version = "0.2.1"
diff --git a/Cargo.toml b/Cargo.toml
index a5c5c263..0c4e82c3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,7 @@ members = [
     "chacha20poly1305",
     "deoxys",
     "eax",
+    "hs1-siv",
     "ocb3",
     "xaes-256-gcm",
 ]
diff --git a/benches/Cargo.toml b/benches/Cargo.toml
index 03564426..d1e85365 100644
--- a/benches/Cargo.toml
+++ b/benches/Cargo.toml
@@ -20,6 +20,7 @@ ascon-aead = { path = "../ascon-aead/" }
 chacha20poly1305 = { path = "../chacha20poly1305/" }
 deoxys = { path = "../deoxys/" }
 eax = { path = "../eax/" }
+hs1-siv = { path = "../hs1-siv/" }
 
 [target.'cfg(any(target_arch = "x86_64", target_arch = "x86"))'.dependencies]
 criterion-cycles-per-byte = "0.4.0"
@@ -53,3 +54,8 @@ harness = false
 name = "eax"
 path = "src/eax.rs"
 harness = false
+
+[[bench]]
+name = "hs1-siv"
+path = "src/hs1-siv.rs"
+harness = false
diff --git a/benches/src/hs1-siv.rs b/benches/src/hs1-siv.rs
new file mode 100644
index 00000000..20e47379
--- /dev/null
+++ b/benches/src/hs1-siv.rs
@@ -0,0 +1,66 @@
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+
+use hs1_siv::aead::{Aead, KeyInit};
+use hs1_siv::{Hs1SivLo, Hs1SivMe, Hs1SivHi};
+
+const KB: usize = 1024;
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
+type Benchmarker = Criterion;
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+type Benchmarker = Criterion<criterion_cycles_per_byte::CyclesPerByte>;
+
+fn bench(c: &mut Benchmarker) {
+    let mut group = c.benchmark_group("hs1-siv");
+
+    for size in &[KB, 2 * KB, 4 * KB, 8 * KB, 16 * KB] {
+        let buf = vec![0u8; *size];
+
+        group.throughput(Throughput::Bytes(*size as u64));
+
+        group.bench_function(BenchmarkId::new("encrypt-lo", size), |b| {
+            let cipher = Hs1SivLo::new(&Default::default());
+            b.iter(|| cipher.encrypt(&Default::default(), &*buf))
+        });
+        group.bench_function(BenchmarkId::new("decrypt-lo", size), |b| {
+            let cipher = Hs1SivLo::new(&Default::default());
+            b.iter(|| cipher.decrypt(&Default::default(), &*buf))
+        });
+
+        group.bench_function(BenchmarkId::new("encrypt-me", size), |b| {
+            let cipher = Hs1SivMe::new(&Default::default());
+            b.iter(|| cipher.encrypt(&Default::default(), &*buf))
+        });
+        group.bench_function(BenchmarkId::new("decrypt-me", size), |b| {
+            let cipher = Hs1SivMe::new(&Default::default());
+            b.iter(|| cipher.decrypt(&Default::default(), &*buf))
+        });
+
+        group.bench_function(BenchmarkId::new("encrypt-hi", size), |b| {
+            let cipher = Hs1SivHi::new(&Default::default());
+            b.iter(|| cipher.encrypt(&Default::default(), &*buf))
+        });
+        group.bench_function(BenchmarkId::new("decrypt-hi", size), |b| {
+            let cipher = Hs1SivHi::new(&Default::default());
+            b.iter(|| cipher.decrypt(&Default::default(), &*buf))
+        });
+    }
+
+    group.finish();
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
+criterion_group!(
+    name = benches;
+    config = Criterion::default();
+    targets = bench
+);
+
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+criterion_group!(
+    name = benches;
+    config = Criterion::default().with_measurement(criterion_cycles_per_byte::CyclesPerByte);
+    targets = bench
+);
+
+criterion_main!(benches);
diff --git a/hs1-siv/Cargo.toml b/hs1-siv/Cargo.toml
new file mode 100644
index 00000000..c6040571
--- /dev/null
+++ b/hs1-siv/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "hs1-siv"
+version = "0.1.1"
+edition = "2021"
+description = """
+Pure Rust implementation of the HS1-SIV Authenticated Encryption
+with Additional Data Cipher. Based on ChaCha.
+"""
+authors = ["David Hoppenbrouwers"]
+license = "Apache-2.0 OR MIT"
+readme = "README.md"
+documentation = "https://docs.rs/hs1-siv"
+homepage = "https://codeberg.org/Demindiro/rust-hs1-siv"
+repository = "https://codeberg.org/Demindiro/rust-hs1-siv"
+keywords = ["aead", "hs1-siv", "hs1", "siv"]
+categories = ["cryptography", "no-std"]
+rust-version = "1.81"
+
+[dependencies]
+aead = { version = "0.6.0-rc.0", default-features = false }
+chacha20 = { version = "=0.10.0-pre.2" }
+
+[dev-dependencies]
+aead = { version = "0.6.0-rc.0", features = ["alloc"] }
diff --git a/hs1-siv/LICENSE-APACHE b/hs1-siv/LICENSE-APACHE
new file mode 100644
index 00000000..78173fa2
--- /dev/null
+++ b/hs1-siv/LICENSE-APACHE
@@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/hs1-siv/LICENSE-MIT b/hs1-siv/LICENSE-MIT
new file mode 100644
index 00000000..1bae9ba5
--- /dev/null
+++ b/hs1-siv/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2025 David Hoppenbrouwers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/hs1-siv/README.md b/hs1-siv/README.md
new file mode 100644
index 00000000..daa8f9dd
--- /dev/null
+++ b/hs1-siv/README.md
@@ -0,0 +1,32 @@
+Pure Rust implementation of [HS1-SIV][0].
+
+HS1-SIV is based on the [ChaCha][1] stream cipher.
+The tag is generated using a new hashing algorithm.
+It also doubles as a SIV (synthetic IV),
+providing resistance against nonce reuse.
+
+The algorithm is configurable:
+- `B`: Block size, as a multiple of 16.
+- `T`: "collision level" (higher is more secure).
+- `R`: ChaCha rounds.
+- `L`: Tag length in bytes.
+
+3 standard settings are provided:
+
+| Name       | `B` | `T` | `R` | `L` |
+|------------|-----|-----|-----|-----|
+| `Hs1SivLo` |   4 |   2 |   8 |   8 |
+| `Hs1SivMe` |   4 |   4 |  12 |  16 |
+| `Hs1SivHi` |   4 |   6 |  20 |  32 |
+
+Security per setting is (`n` = amount of messages generated):
+
+| Name       | Key search  | SIV collision                   |
+|------------|-------------|---------------------------------|
+| `Hs1SivLo` | `n/(2^256)` | `(n^2)/(2^56)  + (n^2)/(2^64) ` |
+| `Hs1SivMe` | `n/(2^256)` | `(n^2)/(2^112) + (n^2)/(2^128)` |
+| `Hs1SivHi` | `n/(2^256)` | `(n^2)/(2^168) + (n^2)/(2^256)` |
+
+
+[0]: https://krovetz.net/csus/papers/hs1-siv_v2.2.pdf
+[1]: https://docs.rs/chacha20/
diff --git a/hs1-siv/ref/.gitignore b/hs1-siv/ref/.gitignore
new file mode 100644
index 00000000..ba2906d0
--- /dev/null
+++ b/hs1-siv/ref/.gitignore
@@ -0,0 +1 @@
+main
diff --git a/hs1-siv/ref/encrypt.c b/hs1-siv/ref/encrypt.c
new file mode 100644
index 00000000..c2ed49bc
--- /dev/null
+++ b/hs1-siv/ref/encrypt.c
@@ -0,0 +1,541 @@
+/*
+// HS1-SIV v2 reference code.
+//
+// Note: This implements HS1-SIV v2, and not v1 or Draft v2.
+//
+// ** This version is slow and susceptible to side-channel attacks. **
+// ** Do not use for any purpose other than to understand HS1-SIV.  **
+//
+// Written by Ted Krovetz (ted@krovetz.net). Last modified 28 July 2016.
+//
+// To the extent possible under law, the author has dedicated all copyright
+// and related and neighboring rights to this software to the public
+// domain worldwide. This software is distributed without any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication
+// along with this software. If not, see
+// <http://creativecommons.org/publicdomain/zero/1.0/>
+//
+// The author knows of no intellectual property claims relevant to this work.
+*/
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ *     I n c l u d e s   a n d   u t i l i t i e s
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#if HS1_SIV_LO
+#define HS1_SIV_NH_LEN      64
+#define HS1_SIV_HASH_RNDS    2
+#define HS1_SIV_CHACHA_RNDS  8
+#define HS1_SIV_SIV_LEN      8
+#elif HS1_SIV
+#define HS1_SIV_NH_LEN      64
+#define HS1_SIV_HASH_RNDS    4
+#define HS1_SIV_CHACHA_RNDS 12
+#define HS1_SIV_SIV_LEN     16
+#elif HS1_SIV_HI
+#define HS1_SIV_NH_LEN      64
+#define HS1_SIV_HASH_RNDS    6
+#define HS1_SIV_CHACHA_RNDS 20
+#define HS1_SIV_SIV_LEN     32
+#endif
+
+#define __STDC_LIMIT_MACROS
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#if __GNUC__
+    #define HS1_SIV_ALIGN(n) __attribute__ ((aligned(n)))
+#elif _MSC_VER
+    #define HS1_SIV_ALIGN(n) __declspec(align(n))
+#elif (__STDC_VERSION >= 201112L) || (__cplusplus >= 201103L)
+    #define HS1_SIV_ALIGN(n) alignas(n)
+#else /* Not GNU/Microsoft/C11: delete alignment uses.     */
+    #pragma message ( "Struct alignment not guaranteed" )
+    #define HS1_SIV_ALIGN(n)
+#endif
+
+HS1_SIV_ALIGN(16)
+typedef struct {
+    unsigned char chacha_key[32];
+    unsigned char nh_key[HS1_SIV_NH_LEN+16*(HS1_SIV_HASH_RNDS-1)];
+    unsigned char poly_key[HS1_SIV_HASH_RNDS*8];
+    #if (HS1_SIV_HASH_RNDS > 4) /* ASU */
+    unsigned char asu_key[HS1_SIV_HASH_RNDS*24];
+    #else
+    unsigned char asu_key[];
+    #endif
+} hs1siv_ctx_t;
+
+/* Little-endian reads and writes. */
+
+static uint32_t swap32(uint32_t x) {
+    return (((x & 0x000000ffu) << 24) | ((x & 0x0000ff00u) << 8)  |
+            ((x & 0x00ff0000u) >> 8)  | ((x & 0xff000000u) >> 24));
+}
+
+static uint64_t swap64(uint64_t x) {
+    return ((x & UINT64_C(0x00000000000000ff)) << 56) |
+           ((x & UINT64_C(0x000000000000ff00)) << 40) |
+           ((x & UINT64_C(0x0000000000ff0000)) << 24) |
+           ((x & UINT64_C(0x00000000ff000000)) <<  8) |
+           ((x & UINT64_C(0x000000ff00000000)) >>  8) |
+           ((x & UINT64_C(0x0000ff0000000000)) >> 24) |
+           ((x & UINT64_C(0x00ff000000000000)) >> 40) |
+           ((x & UINT64_C(0xff00000000000000)) >> 56);
+}
+
+static int le() { const union { int x; char e; } l = { 1 }; return l.e; }
+static uint32_t read32le(uint32_t *p) { return (le()?*p:swap32(*p)); }
+static uint64_t read64le(uint64_t *p) { return (le()?*p:swap64(*p)); }
+static void write32le(uint32_t *p, uint32_t w) { *p = (le()?w:swap32(w)); }
+static void write64le(uint64_t *p, uint64_t w) { *p = (le()?w:swap64(w)); }
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ *     C h a c h a   S e c t i o n -- Implementation borrowed from D Bernstein
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/* This chacha implementation was adapted from a public domain implementation
+ * found at http://cr.yp.to/chacha.html. It has been modified to accommodate
+ * 12-byte IVs as specified in RFC 7539.
+ */
+
+typedef struct { uint32_t input[16]; } chacha_ctx_t;
+
+static uint32_t rotl(uint32_t x, unsigned n) { return (x<<n) | (x>>(32-n)); }
+
+#define QUARTERROUND(a,b,c,d) \
+  x[a] += x[b]; x[d] = rotl(x[d] ^ x[a],16); \
+  x[c] += x[d]; x[b] = rotl(x[b] ^ x[c],12); \
+  x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8); \
+  x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
+
+static void salsa20_wordtobyte(unsigned char output[64], uint32_t input[16])
+{
+  uint32_t i, x[16];
+
+  for (i = 0;i < 16;++i) x[i] = input[i];
+  for (i = HS1_SIV_CHACHA_RNDS;i != 0;i -= 2) {
+    QUARTERROUND( 0, 4, 8,12)
+    QUARTERROUND( 1, 5, 9,13)
+    QUARTERROUND( 2, 6,10,14)
+    QUARTERROUND( 3, 7,11,15)
+    QUARTERROUND( 0, 5,10,15)
+    QUARTERROUND( 1, 6,11,12)
+    QUARTERROUND( 2, 7, 8,13)
+    QUARTERROUND( 3, 4, 9,14)
+  }
+  for (i = 0;i < 16;++i) x[i] += input[i];
+  for (i = 0;i < 16;++i) write32le((uint32_t *)(output + 4 * i),x[i]);
+}
+
+static const char sigma[] = "expand 32-byte k";
+static const char tau[] = "expand 16-byte k";
+
+void chacha_keysetup(chacha_ctx_t *x, const unsigned char *k, unsigned kbits)
+{
+  const char *constants;
+
+  x->input[4] = read32le((uint32_t *)(k + 0));
+  x->input[5] = read32le((uint32_t *)(k + 4));
+  x->input[6] = read32le((uint32_t *)(k + 8));
+  x->input[7] = read32le((uint32_t *)(k + 12));
+  if (kbits == 256) { /* recommended */
+    k += 16;
+    constants = sigma;
+  } else { /* kbits == 128 */
+    constants = tau;
+  }
+  x->input[8] = read32le((uint32_t *)(k + 0));
+  x->input[9] = read32le((uint32_t *)(k + 4));
+  x->input[10] = read32le((uint32_t *)(k + 8));
+  x->input[11] = read32le((uint32_t *)(k + 12));
+  x->input[0] = read32le((uint32_t *)(constants + 0));
+  x->input[1] = read32le((uint32_t *)(constants + 4));
+  x->input[2] = read32le((uint32_t *)(constants + 8));
+  x->input[3] = read32le((uint32_t *)(constants + 12));
+}
+
+void chacha_ivsetup(chacha_ctx_t *x,const unsigned char *iv)
+{
+  x->input[12] = 0;
+  x->input[13] = read32le((uint32_t *)(iv + 0)); /* Modified for 12-byte iv */
+  x->input[14] = read32le((uint32_t *)(iv + 4));
+  x->input[15] = read32le((uint32_t *)(iv + 8));
+}
+
+void chacha(chacha_ctx_t *x,unsigned char *out,unsigned bytes)
+{
+  unsigned char output[64];
+  unsigned i;
+
+  if (!bytes) return;
+  for (;;) {
+    salsa20_wordtobyte(output,x->input);
+    x->input[12] += 1;
+    if (bytes <= 64) {
+      for (i = 0;i < bytes;++i) out[i] = output[i];
+      return;
+    }
+    for (i = 0;i < 64;++i) out[i] = output[i];
+    bytes -= 64;
+    out += 64;
+  }
+}
+
+void hs1siv_chacha256(void *out, unsigned outbytes,
+                      unsigned char *iv, void *user_key)
+{
+    chacha_ctx_t ctx;
+
+    chacha_keysetup(&ctx, user_key, 256);
+    chacha_ivsetup(&ctx,iv);
+    chacha(&ctx,out,outbytes);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ *     H S 1 - H a s h   S e c t i o n
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static const uint64_t m60 = ((uint64_t)1 << 60) - 1;
+static const uint64_t m61 = ((uint64_t)1 << 61) - 1;
+
+/* Return 62 bits congruent to ak+m % (2^61-1). Assumes 60-bit k,m; 62-bit a */
+static uint64_t poly_step(uint64_t a, uint64_t b, uint64_t k) {
+    #if (__SIZEOF_INT128__)  /* 128-bit type available */
+        unsigned __int128 tmp = (unsigned __int128)a * (unsigned __int128)k;
+        return ((uint64_t)tmp & m61) + (uint64_t)(tmp >> 61) + b;
+    #else
+        uint64_t m = (uint64_t)(uint32_t)(a>>32) * (uint64_t)(uint32_t)k
+                   + (uint64_t)(uint32_t)(k>>32) * (uint64_t)(uint32_t)a;
+        uint64_t h = (uint64_t)(uint32_t)(a>>32) * (uint64_t)(uint32_t)(k>>32);
+        uint64_t l = (uint64_t)(uint32_t)a * (uint64_t)(uint32_t)k;
+        h += (m >> 32); l += (m << 32);  /* h:l += (m>>32):(m<<32)      */
+        /* CAUTION: Potential timing leak. Good compiler will eliminate */
+        if (l < (m << 32)) h += 1;       /* Check for carry from l to h */
+        return (l & m61) + ((h << 3) | (l >> 61)) + b;
+    #endif
+}
+
+static uint64_t poly_finalize(uint64_t a) {
+    a = (a & m61) + (a >> 61);   /* a may be 62 bits, so one final reduction */
+    if (a == m61) a = 0;
+    return a;
+}
+
+#if (HS1_SIV_HASH_RNDS > 4)
+static uint32_t asu_hash(uint64_t x, uint64_t *k) {
+    uint64_t t = k[0] + k[1] * (uint32_t)x + k[2] * (uint32_t)(x >> 32);
+    return (uint32_t)(t >> 32);
+}
+#endif
+
+// Rewritten from prf_hash2 which does two hashes concurrently and confused me
+// when I was tired
+void prf_hash1(uint64_t *h, uint32_t *in, unsigned inbytes, uint32_t *nhkey,
+               uint64_t polykey, uint64_t *asukey) {
+    uint64_t s0 = 1;
+    unsigned i=0, j;
+
+    /* Hash full blocks of HS1_SIV_NH_LEN bytes */
+    while (inbytes >= HS1_SIV_NH_LEN) {
+        uint64_t a0 = 0;
+        for (i=0;i<HS1_SIV_NH_LEN/4;i+=4) {
+            a0 += (uint64_t)(read32le(in+i+0) + nhkey[i+0]) * (read32le(in+i+2) + nhkey[i+2]);
+            a0 += (uint64_t)(read32le(in+i+1) + nhkey[i+1]) * (read32le(in+i+3) + nhkey[i+3]);
+        }
+        s0 = poly_step(s0, a0&m60, polykey);
+        inbytes -= HS1_SIV_NH_LEN;
+        in += HS1_SIV_NH_LEN/4;
+    }
+    /* If partial block remains, hash it */
+    i=0;
+    if (inbytes != 0) {
+        uint64_t a0 = 0;
+        while (inbytes >= 16) {
+            a0 += (uint64_t)(read32le(in+i+0) + nhkey[i+0]) * (read32le(in+i+2) + nhkey[i+2]);
+            a0 += (uint64_t)(read32le(in+i+1) + nhkey[i+1]) * (read32le(in+i+3) + nhkey[i+3]);
+            i += 4; inbytes -= 16;
+        }
+        if (inbytes) {
+            uint32_t tail[4] = {0,0,0,0};
+            for (j=0;j<inbytes;j++)
+                ((unsigned char *)tail)[j] = ((unsigned char *)(in+i))[j];
+            a0 += (uint64_t)(read32le(tail+0) + nhkey[i+0]) * (read32le(tail+2) + nhkey[i+2]);
+            a0 += (uint64_t)(read32le(tail+1) + nhkey[i+1]) * (read32le(tail+3) + nhkey[i+3]);
+            a0 += inbytes;
+        }
+        s0 = poly_step(s0, a0&m60, polykey);
+    }
+    s0 = poly_finalize(s0);
+    #if (HS1_SIV_HASH_RNDS > 4)
+    write64le(h, asu_hash(s0, asukey));
+    #else
+    (void)asukey;  /* Suppress warning */
+    write64le(h,s0);
+    #endif
+}
+
+#include <stdio.h>
+void prf_hash2(uint64_t *h, uint32_t *in, unsigned inbytes, uint32_t *nhkey,
+               uint64_t *polykey, uint64_t *asukey) {
+    uint64_t s0 = 1, s1 = 1;
+    unsigned i=0, j;
+    printf("- - %llu,%llu,\n", s0, s1);
+
+    /* Hash full blocks of HS1_SIV_NH_LEN bytes */
+    while (inbytes >= HS1_SIV_NH_LEN) {
+        uint64_t a0 = 0, a1 = 0;
+        for (i=0;i<HS1_SIV_NH_LEN/4;i+=8) {
+            a0 += (uint64_t)(read32le(in+i+0) + nhkey[i+0]) *
+                            (read32le(in+i+2) + nhkey[i+2]);
+            a0 += (uint64_t)(read32le(in+i+1) + nhkey[i+1]) *
+                            (read32le(in+i+3) + nhkey[i+3]);
+            a1 += (uint64_t)(read32le(in+i+0) + nhkey[i+4]) *
+                            (read32le(in+i+2) + nhkey[i+6]);
+            a1 += (uint64_t)(read32le(in+i+1) + nhkey[i+5]) *
+                            (read32le(in+i+3) + nhkey[i+7]);
+            a0 += (uint64_t)(read32le(in+i+4) + nhkey[i+4]) *
+                            (read32le(in+i+6) + nhkey[i+6]);
+            a0 += (uint64_t)(read32le(in+i+5) + nhkey[i+5]) *
+                            (read32le(in+i+7) + nhkey[i+7]);
+            a1 += (uint64_t)(read32le(in+i+4) + nhkey[i+8]) *
+                            (read32le(in+i+6) + nhkey[i+10]);
+            a1 += (uint64_t)(read32le(in+i+5) + nhkey[i+9]) *
+                            (read32le(in+i+7) + nhkey[i+11]);
+        }
+        s0 = poly_step(s0, a0&m60, polykey[0]);
+        s1 = poly_step(s1, a1&m60, polykey[1]);
+        inbytes -= HS1_SIV_NH_LEN;
+        in += HS1_SIV_NH_LEN/4;
+        printf("-A- %llu,%llu,\n", s0, s1);
+    }
+    /* If partial block remains, hash it */
+    i=0;
+    if (inbytes != 0) {
+        uint64_t a0 = 0, a1 = 0;
+        while (inbytes >= 16) {
+            a0 += (uint64_t)(read32le(in+i+0) + nhkey[i+0]) *
+                            (read32le(in+i+2) + nhkey[i+2]);
+            a0 += (uint64_t)(read32le(in+i+1) + nhkey[i+1]) *
+                            (read32le(in+i+3) + nhkey[i+3]);
+            a1 += (uint64_t)(read32le(in+i+0) + nhkey[i+4]) *
+                            (read32le(in+i+2) + nhkey[i+6]);
+            a1 += (uint64_t)(read32le(in+i+1) + nhkey[i+5]) *
+                            (read32le(in+i+3) + nhkey[i+7]);
+            i += 4; inbytes -= 16;
+        }
+        if (inbytes) {
+            uint32_t tail[4] = {0,0,0,0};
+            for (j=0;j<inbytes;j++)
+                ((unsigned char *)tail)[j] = ((unsigned char *)(in+i))[j];
+            a0 += (uint64_t)(read32le(tail+0) + nhkey[i+0]) *
+                            (read32le(tail+2) + nhkey[i+2]);
+            a0 += (uint64_t)(read32le(tail+1) + nhkey[i+1]) *
+                            (read32le(tail+3) + nhkey[i+3]);
+            a1 += (uint64_t)(read32le(tail+0) + nhkey[i+4]) *
+                            (read32le(tail+2) + nhkey[i+6]);
+            a1 += (uint64_t)(read32le(tail+1) + nhkey[i+5]) *
+                            (read32le(tail+3) + nhkey[i+7]);
+            a0 += inbytes;
+            a1 += inbytes;
+        }
+        s0 = poly_step(s0, a0&m60, polykey[0]);
+        s1 = poly_step(s1, a1&m60, polykey[1]);
+        printf("-C- %llu,%llu,\n", s0, s1);
+    }
+    s0 = poly_finalize(s0);
+    s1 = poly_finalize(s1);
+    #if (HS1_SIV_HASH_RNDS > 4)
+    write64le(h, (uint64_t)asu_hash(s1, asukey+3) << 32 | asu_hash(s0, asukey));
+    #else
+    (void)asukey;  /* Suppress warning */
+    write64le(h,s0);
+    write64le(h+1,s1);
+    #endif
+}
+
+void hs1_hash(hs1siv_ctx_t *ctx, void *in, unsigned inbytes, void *out) {
+    uint64_t *h = (uint64_t *)out;
+    unsigned k = (HS1_SIV_HASH_RNDS > 4 ? 1 : 2);
+
+    prf_hash2(h, (uint32_t *)in, inbytes, (uint32_t *)ctx->nh_key,
+              (uint64_t *)ctx->poly_key, (uint64_t *)ctx->asu_key);
+    #if HS1_SIV_HASH_RNDS > 2
+    prf_hash2(h+k, (uint32_t *)in, inbytes, (uint32_t *)ctx->nh_key+8,
+              (uint64_t *)ctx->poly_key+2, (uint64_t *)ctx->asu_key+6);
+    #if HS1_SIV_HASH_RNDS > 4
+    prf_hash2(h+2*k, (uint32_t *)in, inbytes, (uint32_t *)ctx->nh_key+16,
+              (uint64_t *)ctx->poly_key+4, (uint64_t *)ctx->asu_key+12);
+    #if HS1_SIV_HASH_RNDS > 6
+    prf_hash2(h+3*k, (uint32_t *)in, inbytes, (uint32_t *)ctx->nh_key+24,
+              (uint64_t *)ctx->poly_key+6, (uint64_t *)ctx->asu_key+18);
+    #endif
+    #endif
+    #endif
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ *     P R F   S e c t i o n
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void hs1siv_subkeygen(hs1siv_ctx_t *ctx, void *user_key, unsigned key_bytes)
+{
+    unsigned char chacha_key[32];
+    unsigned char iv[12] = {0};
+    unsigned i=0;
+
+    /* Copy user_key as many times as needed to fill 32 byte chacha key */
+    while (i < 32) {
+        unsigned nbytes = 32-i;
+        if (nbytes > key_bytes) nbytes = key_bytes;
+        memcpy(chacha_key+i,user_key,nbytes);
+        i += nbytes;
+    }
+
+    /* Build key-derivation nonce and fill context */
+    iv[0] = key_bytes;
+    iv[2] = HS1_SIV_SIV_LEN;
+    iv[4] = HS1_SIV_CHACHA_RNDS;
+    iv[5] = HS1_SIV_HASH_RNDS;
+    iv[6] = HS1_SIV_NH_LEN;
+    hs1siv_chacha256(ctx, sizeof(hs1siv_ctx_t), iv, chacha_key);
+
+    /* Pre-process internal keys: make future reads little-endian, mod poly */
+    for (i=0; i<sizeof(ctx->nh_key)/4; i++)
+        ((uint32_t *)ctx->nh_key)[i] = read32le(((uint32_t *)ctx->nh_key)+i);
+    for (i=0; i<sizeof(ctx->poly_key)/8; i++)
+        ((uint64_t *)ctx->poly_key)[i] = read64le(((uint64_t *)ctx->poly_key)+i)
+                                       & m60;
+    #if (HS1_SIV_HASH_RNDS > 4)
+    for (i=0; i<sizeof(ctx->asu_key)/8; i++)
+        ((uint64_t *)ctx->asu_key)[i] = read64le(((uint64_t *)ctx->asu_key)+i);
+    #endif
+}
+
+void hs1(
+    hs1siv_ctx_t *hs1_ctx,
+    void *in, unsigned inbytes,
+    void *iv,
+    void *out, unsigned outbytes
+)
+{
+    #if (HS1_SIV_HASH_RNDS > 4)
+    uint64_t h[HS1_SIV_HASH_RNDS/2];
+    #else
+    uint64_t h[HS1_SIV_HASH_RNDS];
+    #endif
+
+    unsigned i;
+    unsigned char key[32];
+    chacha_ctx_t chacha_ctx;
+
+    hs1_hash(hs1_ctx, in, inbytes, h);
+    memcpy(key, hs1_ctx->chacha_key, 32);
+    for (i=0; i<sizeof(h)/8;i++) ((uint64_t *)key)[i] ^= h[i];
+    chacha_keysetup(&chacha_ctx, key, 256);
+    chacha_ivsetup(&chacha_ctx,(unsigned char *)iv);
+    chacha(&chacha_ctx, (unsigned char *)out, outbytes);
+}
+
+void hs1siv_encrypt(hs1siv_ctx_t *ctx, void *m, unsigned mbytes,
+                    void *a, unsigned abytes, void *n, void *t, void *c)
+{
+    unsigned i;
+    unsigned abuflen = (abytes+HS1_SIV_NH_LEN-1)/HS1_SIV_NH_LEN*HS1_SIV_NH_LEN;
+    unsigned buflen = abuflen + (mbytes+15)/16*16 + 16;
+    uint32_t tmp_t[HS1_SIV_SIV_LEN/4];
+    unsigned char *buf = (unsigned char *)malloc(buflen);
+    memset(buf, 0, buflen);
+    memcpy(buf, a, abytes);
+    memcpy(buf+abuflen, m, mbytes);
+    write32le((uint32_t *)(buf+buflen-16), abytes);
+    write32le((uint32_t *)(buf+buflen-8), mbytes);
+    hs1(ctx, buf, buflen, n, tmp_t, HS1_SIV_SIV_LEN);
+    free(buf);
+    buf = (unsigned char *)malloc(mbytes+64);
+    hs1(ctx, tmp_t, HS1_SIV_SIV_LEN, n, buf, mbytes+64);
+    for (i=0; i<mbytes; i++)
+        buf[64+i] ^= ((unsigned char *)m)[i];
+    memcpy(c,buf+64,mbytes);
+    memcpy(t,tmp_t,HS1_SIV_SIV_LEN);
+    free(buf);
+}
+
+int hs1siv_decrypt(hs1siv_ctx_t *ctx, void *c, unsigned cbytes,
+                   void *a, unsigned abytes, void *n, void *t, void *m)
+{
+    unsigned i;
+    unsigned abuflen = (abytes+HS1_SIV_NH_LEN-1)/HS1_SIV_NH_LEN*HS1_SIV_NH_LEN;
+    unsigned buflen = abuflen + (cbytes+15)/16*16 + 16;
+    unsigned char *maybe_m = (unsigned char *)malloc(cbytes);
+    uint32_t maybe_t[HS1_SIV_SIV_LEN/4];
+    unsigned char *buf = (unsigned char *)malloc(cbytes+64);
+    memcpy(maybe_t,t,HS1_SIV_SIV_LEN);  /* move to aligned buffer */
+    hs1(ctx, maybe_t, HS1_SIV_SIV_LEN, n, buf, cbytes+64);
+    for (i=0; i<cbytes; i++)
+        ((unsigned char *)maybe_m)[i] = ((unsigned char *)c)[i] ^ buf[64+i];
+    free(buf);
+    buf = (unsigned char *)malloc(buflen);
+    memset(buf, 0, buflen);
+    memcpy(buf, a, abytes);
+    memcpy(buf+abuflen, maybe_m, cbytes);
+    write32le((uint32_t *)(buf+buflen-16), abytes);
+    write32le((uint32_t *)(buf+buflen-8), cbytes);
+    hs1(ctx, buf, buflen, n, maybe_t, HS1_SIV_SIV_LEN);
+    free(buf);
+    if (memcmp(t,maybe_t,HS1_SIV_SIV_LEN) == 0) {
+        memcpy(m,maybe_m,cbytes);
+        free(maybe_m);
+        return 0;
+    } else {
+        free(maybe_m);
+        return -1;
+    }
+}
+
+int crypto_aead_encrypt(
+    unsigned char *c,unsigned long long *clen,
+    const unsigned char *m,unsigned long long mlen,
+    const unsigned char *ad,unsigned long long adlen,
+    const unsigned char *nsec,
+    const unsigned char *npub,
+    const unsigned char *k
+)
+{
+    hs1siv_ctx_t ctx;
+    (void)nsec;
+    hs1siv_subkeygen(&ctx, (void *)k, CRYPTO_KEYBYTES);
+    if (clen) *clen = mlen+CRYPTO_ABYTES;
+    hs1siv_encrypt(&ctx, (void *)m, (unsigned)mlen, (void *)ad,
+            (unsigned)adlen, (void *)npub, c+mlen, c);
+    return 0;
+}
+
+int crypto_aead_decrypt(
+    unsigned char *m,unsigned long long *mlen,
+    unsigned char *nsec,
+    const unsigned char *c,unsigned long long clen,
+    const unsigned char *ad,unsigned long long adlen,
+    const unsigned char *npub,
+    const unsigned char *k
+)
+{
+    hs1siv_ctx_t ctx;
+    (void)nsec;
+    if (mlen) *mlen = clen-CRYPTO_ABYTES;
+    hs1siv_subkeygen(&ctx, (void *)k, CRYPTO_KEYBYTES);
+    return hs1siv_decrypt(&ctx, (void *)c, (unsigned)clen-CRYPTO_ABYTES,
+    	    (void *)ad, (unsigned)adlen, (void *)npub,
+    	    (void *)(c+clen-CRYPTO_ABYTES), m);
+}
diff --git a/hs1-siv/ref/main.c b/hs1-siv/ref/main.c
new file mode 100644
index 00000000..e7a51e9e
--- /dev/null
+++ b/hs1-siv/ref/main.c
@@ -0,0 +1,124 @@
+#include <stdio.h>
+
+/*
+#define CRYPTO_KEYBYTES 16
+#define CRYPTO_NSECBYTES 0
+#define CRYPTO_NPUBBYTES 12
+#define CRYPTO_ABYTES 16
+*/
+
+/* Exactly one of the following should be set */
+#define HS1_SIV_LO  0
+#define HS1_SIV     1
+#define HS1_SIV_HI  0
+
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NSECBYTES 0
+#define CRYPTO_NPUBBYTES 12
+#if HS1_SIV_LO
+# define CRYPTO_ABYTES 8
+#elif HS1_SIV
+# define CRYPTO_ABYTES 16
+#elif HS1_SIV_HI
+# define CRYPTO_ABYTES 32
+#else
+# error "one of HS_SIV, HS_SIV_LO or HS_SIV_HI must be 1"
+#endif
+
+#include "encrypt.c"
+
+#if CRYPTO_ABYTES != HS1_SIV_SIV_LEN
+# error "CRYPTO_ABYTES must equal HS1_SIV_SIV_LEN"
+#endif
+
+#define MSG "Hello to the entire wide, round, global globe!"
+#define MSG64 "Hello to the entire wide, round, global globe!!! okookokokokokok"
+//#define MSG ""
+#define KEY "Short keys? Use long for testing"
+#define NONCE "Quack quack!"
+
+void hs1siv_subkeygen(hs1siv_ctx_t *ctx, void *user_key, unsigned key_bytes);
+
+void print_bytes(const unsigned char *bytes, unsigned long long len, unsigned long long wrap) {
+    for (unsigned long long i = 0; i < len; i++) {
+        printf("0x%02x,", bytes[i]);
+        if (i % wrap == wrap - 1)
+            printf("\n");
+    }
+    if (len % wrap != 0)
+        printf("\n");
+}
+
+void print_words(const uint32_t *words, unsigned long long len, unsigned long long wrap) {
+    for (unsigned long long i = 0; i < len; i++) {
+        printf("0x%08x,", words[i]);
+        if (i % wrap == wrap - 1)
+            printf("\n");
+    }
+    if (len % wrap != 0)
+        printf("\n");
+}
+
+void print_doubles(const uint64_t *doubles, unsigned long long len, unsigned long long wrap) {
+    for (unsigned long long i = 0; i < len; i++) {
+        printf("0x%016llx,", doubles[i]);
+        if (i % wrap == wrap - 1)
+            printf("\n");
+    }
+    if (len % wrap != 0)
+        printf("\n");
+}
+
+void subkeygen(hs1siv_ctx_t *ctx) {
+    hs1siv_subkeygen(ctx, KEY, sizeof(KEY) - 1);
+    printf("chacha_key:\n");
+    print_bytes(ctx->chacha_key, sizeof(ctx->chacha_key), 8);
+    printf("nh_key:\n");
+    print_words((void *)ctx->nh_key, sizeof(ctx->nh_key) / 4, 2);
+    printf("poly_key:\n");
+    print_doubles((void *)ctx->poly_key, sizeof(ctx->poly_key) / 8, 1);
+#if (HS1_SIV_HASH_RNDS > 4)
+    printf("asu_key:\n");
+    print_doubles((void *)ctx->asu_key, sizeof(ctx->asu_key) / 8, 1);
+#endif
+}
+
+void hash(hs1siv_ctx_t *ctx) {
+    #if (HS1_SIV_HASH_RNDS > 4)
+    uint64_t h[HS1_SIV_HASH_RNDS/2];
+    #else
+    uint64_t h[HS1_SIV_HASH_RNDS];
+    #endif
+    hs1_hash(ctx, MSG, sizeof(MSG) - 1, h);
+    printf("h:\n");
+#if (HS1_SIV_HASH_RNDS > 4)
+    print_words((uint32_t *)h, sizeof(h) / 4, 1);
+#else
+    print_doubles(h, sizeof(h) / 8, 1);
+#endif
+}
+
+void ciphertext(void) {
+	unsigned char cbuf[1024];
+	unsigned long long clen;
+
+	crypto_aead_encrypt(
+        cbuf, &clen, 
+        MSG, sizeof(MSG) - 1,
+        "", 0,
+        (void *)0,
+        NONCE,
+        KEY
+    );
+
+    printf("ciphertext length: %llu\n", clen);
+    print_bytes(cbuf, clen, 8);
+}
+
+int main() {
+    hs1siv_ctx_t ctx;
+    subkeygen(&ctx);
+    hash(&ctx);
+    ciphertext();
+    return 0;
+}
diff --git a/hs1-siv/ref/run.sh b/hs1-siv/ref/run.sh
new file mode 100755
index 00000000..00066dfd
--- /dev/null
+++ b/hs1-siv/ref/run.sh
@@ -0,0 +1 @@
+cc main.c -o main && ./main
diff --git a/hs1-siv/src/hash.rs b/hs1-siv/src/hash.rs
new file mode 100644
index 00000000..87108996
--- /dev/null
+++ b/hs1-siv/src/hash.rs
@@ -0,0 +1,191 @@
+use super::{
+    mask, Array, ArraySize, False, Gr, Hs1HashKey, Hs1Params, PhantomData, Quot, True, B16, U4,
+};
+use aead::array::typenum::Unsigned;
+use core::mem;
+
+#[derive(Clone)]
+pub struct Hasher<P: Hs1Params> {
+    k: Hs1HashKey<P>,
+    h: Array<u64, P::T>,
+    block: Array<u32, Quot<B16<P>, U4>>,
+    bytes: u8,
+    _marker: PhantomData<P>,
+}
+
+pub(crate) mod sealed {
+    pub trait Hs1HashFinal {
+        type Output: Copy;
+        type Asu: Copy + AsMut<[u64]> + Default;
+
+        fn compute(h: u64, k_a: &Self::Asu) -> Self::Output;
+    }
+}
+
+impl sealed::Hs1HashFinal for False {
+    type Output = [u8; 8];
+    type Asu = [u64; 0];
+
+    fn compute(h: u64, []: &Self::Asu) -> Self::Output {
+        h.to_le_bytes()
+    }
+}
+
+impl sealed::Hs1HashFinal for True {
+    type Output = [u8; 4];
+    type Asu = [u64; 3];
+
+    fn compute(h: u64, &[k_a0, k_a1, k_a2]: &Self::Asu) -> Self::Output {
+        let (h1, h2) = (h & 0xffff_ffff, h >> 32);
+        let res = k_a0
+            .wrapping_add(k_a1.wrapping_mul(h1))
+            .wrapping_add(k_a2.wrapping_mul(h2));
+        let [_, _, _, _, res @ ..] = res.to_le_bytes();
+        res
+    }
+}
+
+type Hs1Hash<P> = Gr<<P as Hs1Params>::T, U4>;
+pub type Output<P> = <Hs1Hash<P> as sealed::Hs1HashFinal>::Output;
+pub type Asu<P> = <Hs1Hash<P> as sealed::Hs1HashFinal>::Asu;
+
+impl<P: Hs1Params> Hasher<P> {
+    pub fn new(k: &Hs1HashKey<P>) -> Self {
+        Self {
+            k: k.clone(),
+            h: array_from_iter(core::iter::repeat(1)),
+            block: Array::default(),
+            bytes: 0,
+            _marker: PhantomData,
+        }
+    }
+
+    fn update_block(&mut self) -> &mut Self {
+        assert!(usize::from(self.bytes) <= self.block_u8().len());
+
+        #[inline(always)]
+        fn nh(v1: &[u32], v2: &[u32]) -> u64 {
+            debug_assert_eq!(v1.len(), v2.len());
+            debug_assert_eq!(v1.len() % 4, 0);
+            // I originally used a fancy, compact iterator chain here but the optimizer is shit
+            // (and honestly, this is pretty compact too)
+            let mut s = 0u64;
+            for (x, y) in v1.chunks_exact(4).zip(v2.chunks_exact(4)) {
+                let d = u64::from(x[3].wrapping_add(y[3]));
+                let c = u64::from(x[2].wrapping_add(y[2]));
+                let b = u64::from(x[1].wrapping_add(y[1]));
+                let a = u64::from(x[0].wrapping_add(y[0]));
+                s = s.wrapping_add(a * c).wrapping_add(b * d);
+            }
+            s
+        }
+
+        let m_ints = &self.block;
+
+        let block16_count = usize::from(((self.bytes + 15) / 16).max(1));
+
+        self.k
+            .nh
+            .windows(B16::<P>::to_usize() / 4)
+            .step_by(4)
+            .map(|k_n_i| nh(&k_n_i[..block16_count * 4], &m_ints[..block16_count * 4]))
+            .map(|nh_i| (nh_i + (u64::from(self.bytes) & mask(4))) & mask(60))
+            .zip(self.k.poly.iter())
+            .zip(self.h.iter_mut())
+            .for_each(|((a_i, &k_p_i), h_i)| *h_i = poly_step(*h_i, a_i, k_p_i));
+
+        self.bytes = 0;
+
+        self
+    }
+
+    pub fn update<'a>(&'a mut self, bytes: &[u8]) -> &'a mut Self {
+        assert!(usize::from(self.bytes) < self.block_u8().len());
+        let start = usize::from(self.bytes);
+        let fill = bytes.len().min(self.block_u8().len() - start);
+        let end = start + fill;
+        let (now, rest) = bytes.split_at(fill);
+        self.block_u8()[start..end].copy_from_slice(now);
+        self.bytes = end as u8;
+        if end < self.block_u8().len() {
+            return self;
+        }
+        self.update_block();
+
+        let mut it = rest.chunks_exact(self.block_u8().len());
+        for blk in &mut it {
+            self.block_u8().copy_from_slice(blk);
+            self.bytes = B16::<P>::to_u8();
+            self.update_block();
+        }
+
+        let rest = it.remainder();
+        self.block_u8()[..rest.len()].copy_from_slice(rest);
+        self.bytes = rest.len() as u8;
+        self
+    }
+
+    pub(crate) fn pad_to(&mut self, bits: u8) -> &mut Self {
+        debug_assert!(1 << bits <= B16::<P>::to_u8());
+        let m = mask(bits) as u8;
+        let fill = (m + 1).wrapping_sub(self.bytes) & m;
+        self.update(&[0; 256][..usize::from(fill)])
+    }
+
+    // TODO &mut self helps avoid needing to clone(), but might be unintuitive
+    pub fn finalize(&mut self) -> Array<Output<P>, P::T> {
+        // TODO we need to handle empty data properly
+        // However, see the note in crate::test::test_vectors::hash_me_empty
+        use sealed::Hs1HashFinal;
+        if self.bytes != 0 {
+            self.update_block();
+        }
+        let mut out = Array::<Output<P>, P::T>::default();
+        for ((out_i, h_i), k_a_i) in out.iter_mut().zip(&self.h).zip(&self.k.asu) {
+            let h_i = poly_finalize(*h_i);
+            *out_i = Hs1Hash::<P>::compute(h_i, k_a_i);
+        }
+        out
+    }
+
+    fn block_u8(&mut self) -> &mut Array<u8, B16<P>> {
+        const {
+            assert!(
+                mem::size_of::<Array<u32, Quot<B16<P>, U4>>>()
+                    == mem::size_of::<Array<u8, B16<P>>>()
+            )
+        }
+        // SAFETY:
+        // - the alignment is correct
+        // - the lengths are equal
+        // - there is no padding
+        // - there are no invalid bit patterns
+        unsafe { mem::transmute(&mut self.block) }
+    }
+}
+
+#[inline(always)]
+const fn poly_step(a: u64, b: u64, k: u64) -> u64 {
+    let tmp = a as u128 * k as u128;
+    (tmp as u64 & mask(61))
+        .wrapping_add((tmp >> 61) as u64)
+        .wrapping_add(b)
+}
+
+#[inline(always)]
+const fn poly_finalize(a: u64) -> u64 {
+    let a = (a & mask(61)).wrapping_add(a >> 61);
+    [a, 0][(a == mask(61)) as usize]
+}
+
+#[inline(always)]
+fn array_from_iter<I, L>(it: I) -> Array<I::Item, L>
+where
+    I: IntoIterator,
+    L: ArraySize,
+    I::Item: Default,
+{
+    let mut v = Array::<I::Item, L>::default();
+    v.iter_mut().zip(it).for_each(|(w, r)| *w = r);
+    v
+}
diff --git a/hs1-siv/src/lib.rs b/hs1-siv/src/lib.rs
new file mode 100644
index 00000000..086df762
--- /dev/null
+++ b/hs1-siv/src/lib.rs
@@ -0,0 +1,599 @@
+#![no_std]
+#![doc = include_str!("../README.md")]
+#![warn(missing_docs, rust_2018_idioms)]
+
+mod hash;
+
+pub use aead;
+
+use aead::{
+    array::{
+        typenum::{Gr, IsGreater, Prod, Quot, Sub1, Sum, Unsigned},
+        Array, ArraySize,
+    },
+    consts::{False, True, B1, U0, U12, U16, U2, U32, U4, U6, U8},
+    AeadCore, AeadInPlace, KeyInit, KeySizeUser,
+};
+use chacha20::{
+    cipher::{StreamCipher, StreamCipherSeek},
+    ChaCha12, ChaCha20, ChaCha8, KeyIvInit,
+};
+use core::{
+    marker::PhantomData,
+    mem,
+    ops::{Add, Div, Mul, Sub},
+};
+use hash::Hasher;
+
+/// Implementation of HS1-SIV.
+///
+/// While HS1-SIV takes a key between 1 and 32 bytes,
+/// this structure instead stores the derived key,
+/// which is substantially larger:
+///
+/// - `Hs1SivLo`: 128 bytes.
+/// - `Hs1SivMe`: 176 bytes.
+/// - `Hs1SivHi`: 368 bytes.
+#[derive(Clone)]
+pub struct Hs1Siv<P>
+where
+    P: Hs1Params,
+{
+    key: Hs1Key<P>,
+    _marker: PhantomData<P>,
+}
+
+/// | `B` | `T` | `C`        | `L` |
+/// |-----|-----|------------|-----|
+/// |   4 |   2 | `ChaCha8`  |   8 |
+///
+/// | Key search  | SIV collision                   |
+/// |-------------|---------------------------------|
+/// | `n/(2^256)` | `(n^2)/(2^56)  + (n^2)/(2^64) ` |
+pub type Hs1SivLo = Hs1Siv<params::Hs1SivLo>;
+
+/// | `B` | `T` | `C`        | `L` |
+/// |-----|-----|------------|-----|
+/// |   4 |   4 | `ChaCha12` |  16 |
+///
+/// | Key search  | SIV collision                   |
+/// |-------------|---------------------------------|
+/// | `n/(2^256)` | `(n^2)/(2^112) + (n^2)/(2^128)` |
+pub type Hs1SivMe = Hs1Siv<params::Hs1SivMe>;
+
+/// | `B` | `T` | `C`        | `L` |
+/// |-----|-----|------------|-----|
+/// |   4 |   6 | `ChaCha20` |  32 |
+///
+/// | Key search  | SIV collision                   |
+/// |-------------|---------------------------------|
+/// | `n/(2^256)` | `(n^2)/(2^168) + (n^2)/(2^256)` |
+pub type Hs1SivHi = Hs1Siv<params::Hs1SivHi>;
+
+impl<P> AeadCore for Hs1Siv<P>
+where
+    P: Hs1Params,
+{
+    type TagSize = P::L;
+    type NonceSize = U12;
+    type CiphertextOverhead = U0;
+}
+
+impl<P> AeadInPlace for Hs1Siv<P>
+where
+    P: Hs1Params,
+{
+    fn encrypt_in_place_detached(
+        &self,
+        nonce: &aead::Nonce<Self>,
+        associated_data: &[u8],
+        buffer: &mut [u8],
+    ) -> aead::Result<aead::Tag<Self>> {
+        hs1_siv_encrypt::<P>(&self.key, nonce, associated_data, buffer)
+    }
+
+    fn decrypt_in_place_detached(
+        &self,
+        nonce: &aead::Nonce<Self>,
+        associated_data: &[u8],
+        buffer: &mut [u8],
+        tag: &aead::Tag<Self>,
+    ) -> aead::Result<()> {
+        hs1_siv_decrypt::<P>(&self.key, nonce, associated_data, buffer, tag)
+    }
+}
+
+impl<P> KeySizeUser for Hs1Siv<P>
+where
+    P: Hs1Params,
+{
+    type KeySize = U32;
+}
+
+impl<P> KeyInit for Hs1Siv<P>
+where
+    P: Hs1Params,
+{
+    fn new(key: &aead::Key<Self>) -> Self {
+        assert!((1..=32).contains(&key.len()));
+        let key = hs1_subkeygen::<P>(key);
+        Self {
+            key,
+            _marker: PhantomData,
+        }
+    }
+}
+
+/// Definitions of standard parameters for use with HS1-SIV.
+///
+/// Prefer using the type aliases at the root of the crate instead.
+pub mod params {
+    use super::*;
+
+    /// | `B` | `T` | `C`        | `L` |
+    /// |-----|-----|------------|-----|
+    /// |   4 |   2 | `ChaCha8`  |   8 |
+    ///
+    /// | Key search  | SIV collision                   |
+    /// |-------------|---------------------------------|
+    /// | `n/(2^256)` | `(n^2)/(2^56)  + (n^2)/(2^64) ` |
+    #[derive(Clone, Copy)]
+    pub struct Hs1SivLo;
+
+    /// | `B` | `T` | `C`        | `L` |
+    /// |-----|-----|------------|-----|
+    /// |   4 |   4 | `ChaCha12` |  16 |
+    ///
+    /// | Key search  | SIV collision                   |
+    /// |-------------|---------------------------------|
+    /// | `n/(2^256)` | `(n^2)/(2^112) + (n^2)/(2^128)` |
+    #[derive(Clone, Copy)]
+    pub struct Hs1SivMe;
+
+    /// | `B` | `T` | `C`        | `L` |
+    /// |-----|-----|------------|-----|
+    /// |   4 |   6 | `ChaCha20` |  32 |
+    ///
+    /// | Key search  | SIV collision                   |
+    /// |-------------|---------------------------------|
+    /// | `n/(2^256)` | `(n^2)/(2^168) + (n^2)/(2^256)` |
+    #[derive(Clone, Copy)]
+    pub struct Hs1SivHi;
+
+    impl Hs1Params for Hs1SivLo {
+        type B = U4;
+        type T = U2;
+        type C = ChaCha8;
+        type L = U8;
+    }
+
+    impl Hs1Params for Hs1SivMe {
+        type B = U4;
+        type T = U4;
+        type C = ChaCha12;
+        type L = U16;
+    }
+
+    impl Hs1Params for Hs1SivHi {
+        type B = U4;
+        type T = U6;
+        type C = ChaCha20;
+        type L = U32;
+    }
+}
+
+#[derive(Clone)]
+#[repr(C, align(16))]
+struct Hs1Key<P: Hs1Params> {
+    chacha: Array<u8, U32>,
+    hash: Hs1HashKey<P>,
+}
+
+#[derive(Clone)]
+#[repr(C, align(16))]
+struct Hs1HashKey<P: Hs1Params> {
+    nh: Array<u32, NhLen<P>>,
+    poly: Array<u64, P::T>,
+    asu: Array<hash::Asu<P>, P::T>,
+}
+
+impl<P: Hs1Params> Hs1Key<P> {
+    fn as_bytes_mut(&mut self) -> &mut [u8] {
+        // Ensure that all fields have a size which is a multiple of 16.
+        // This trivializes the safety proof, since padding is impossible if the check passes.
+        const {
+            const fn chk<T, L: ArraySize>() {
+                assert!(mem::size_of::<Array<T, L>>() % 16 == 0);
+            }
+            chk::<u8, U32>();
+            chk::<u32, NhLen<P>>();
+            chk::<u64, P::T>();
+            chk::<hash::Asu<P>, P::T>();
+        }
+        // SAFETY:
+        // - There are no padding bytes
+        // - There are no invalid bit patterns
+        unsafe {
+            let len = mem::size_of_val(self);
+            let ptr = self as *mut Self as *mut u8;
+            core::slice::from_raw_parts_mut(ptr, len)
+        }
+    }
+}
+
+type B16<P> = Prod<<P as Hs1Params>::B, U16>;
+type NhLen<P> = Sum<Quot<B16<P>, U4>, Prod<Sub1<<P as Hs1Params>::T>, U4>>;
+
+/// HS1 parameters.
+// hey, as long as it works!
+pub trait Hs1Params: Copy + Sync + Send
+where
+    Self::B: Mul<U16> + 'static,
+    B16<Self>: ArraySize,
+    Self::T: ArraySize,
+    Self::L: ArraySize,
+    Quot<B16<Self>, U4>: ArraySize,
+    // Hs1Key
+    Self::T: Sub<B1>,
+    Sub1<Self::T>: Mul<U4>,
+    B16<Self>: Div<U4>,
+    Quot<B16<Self>, U4>: Add<Prod<Sub1<Self::T>, U4>>,
+    NhLen<Self>: ArraySize,
+    // hs1_hash
+    Self::T: IsGreater<U4>,
+    Gr<Self::T, U4>: hash::sealed::Hs1HashFinal,
+    hash::Output<Self>: Default + AsRef<[u8]>,
+{
+    /// Block size, in terms of 16 bytes.
+    type B;
+    /// "collision level" (higher is more secure).
+    type T;
+    /// ChaCha implementation.
+    type C: KeyIvInit<KeySize = U32, IvSize = U12>
+        + StreamCipher
+        + StreamCipherSeek
+        + sealed::ChaChaImpl;
+    /// Tag length in bytes.
+    type L;
+}
+
+mod sealed {
+    // Necessary for subkeygen
+    pub trait ChaChaImpl {
+        const ROUNDS: u8;
+    }
+}
+
+impl sealed::ChaChaImpl for ChaCha8 {
+    const ROUNDS: u8 = 8;
+}
+impl sealed::ChaChaImpl for ChaCha12 {
+    const ROUNDS: u8 = 12;
+}
+impl sealed::ChaChaImpl for ChaCha20 {
+    const ROUNDS: u8 = 20;
+}
+
+/// # Note
+///
+/// `m.len()` may not exceed `2**38`.
+fn hs1_siv_encrypt<P: Hs1Params>(
+    k: &Hs1Key<P>,
+    n: &Array<u8, U12>,
+    a: &[u8],
+    m: &mut [u8],
+) -> Result<Array<u8, P::L>, aead::Error> {
+    if m.len() > 1 << 38 {
+        return Err(aead::Error);
+    }
+    let t = hs1_tag::<P>(k, a, n, &*m);
+    hs1::<P>(k, &[&*t], n, 64, m);
+    Ok(t)
+}
+
+fn hs1_siv_decrypt<P: Hs1Params>(
+    k: &Hs1Key<P>,
+    n: &Array<u8, U12>,
+    a: &[u8],
+    m: &mut [u8],
+    t: &Array<u8, P::L>,
+) -> Result<(), aead::Error> {
+    hs1::<P>(k, &[t], n, 64, m);
+    let t2 = hs1_tag::<P>(k, a, n, m);
+    let diff = t.iter().zip(t2.iter()).fold(0, |s, (x, y)| s | (x ^ y));
+    (diff == 0).then_some(()).ok_or_else(|| {
+        // Apparently keeping the plaintext is CVE-worthy (CVE-2023-42811)
+        // No way in hell I'm running the cipher again - just zero out the buffer
+        m.fill(0);
+        aead::Error
+    })
+}
+
+fn hs1_tag<P: Hs1Params>(k: &Hs1Key<P>, a: &[u8], n: &Array<u8, U12>, m: &[u8]) -> Array<u8, P::L> {
+    let a_m_len = &mut [0; 16];
+    a_m_len[..8].copy_from_slice(&(a.len() as u64).to_le_bytes());
+    a_m_len[8..].copy_from_slice(&(m.len() as u64).to_le_bytes());
+    let m2 = &[a, m, a_m_len];
+    let mut t = Array::<u8, P::L>::default();
+    hs1::<P>(k, m2, n, 0, &mut t);
+    t
+}
+
+#[inline(always)]
+fn hs1<P: Hs1Params>(k: &Hs1Key<P>, m: &[&[u8]], n: &Array<u8, U12>, y_offset: u32, y: &mut [u8]) {
+    let mut key = k.chacha;
+
+    let mut hasher = Hasher::<P>::new(&k.hash);
+    for (i, b) in m.iter().enumerate() {
+        if i > 0 {
+            hasher.pad_to(4);
+        }
+        hasher.update(b);
+    }
+    let input = hasher.finalize();
+
+    key.iter_mut()
+        .zip(input.iter().flat_map(|x| x.as_ref()))
+        .for_each(|(w, r)| *w ^= *r);
+    let mut cipher = P::C::new(&key, n);
+    cipher.seek(y_offset);
+    cipher.apply_keystream(y)
+}
+
+fn hs1_subkeygen<P: Hs1Params>(k: &[u8]) -> Hs1Key<P> {
+    assert!((1..=32).contains(&k.len()));
+
+    let k2 = &mut Array::<u8, U32>::default();
+    k2.iter_mut()
+        .zip(k.iter().cycle())
+        .for_each(|(w, r)| *w = *r);
+
+    let n = &mut Array::<u8, U12>::default();
+    debug_assert!(k.len() < 256);
+    debug_assert!(P::L::U64 < 256);
+    debug_assert!(P::T::U64 < 256);
+    debug_assert!(B16::<P>::U64 < 256);
+    n[0] = k.len() as u8;
+    n[2] = P::L::to_u8();
+    n[4] = <P::C as sealed::ChaChaImpl>::ROUNDS;
+    n[5] = P::T::to_u8();
+    n[6] = B16::<P>::to_u8();
+
+    let mut k = Hs1Key {
+        chacha: Array::default(),
+        hash: Hs1HashKey {
+            nh: Array::default(),
+            poly: Array::default(),
+            asu: Array::default(),
+        },
+    };
+
+    <P::C as KeyIvInit>::new(k2, n).apply_keystream(k.as_bytes_mut());
+    k.hash.poly.iter_mut().for_each(|p| *p &= mask(60));
+
+    k
+}
+
+#[inline(always)]
+const fn mask(bits: u8) -> u64 {
+    (1u64 << bits).wrapping_sub(1)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use aead::{Aead, KeyInit};
+
+    const MSG: &[u8] = b"Hello to the entire wide, round, global globe!";
+    const KEY: &[u8; 32] = b"Short keys? Use long for testing";
+    const NONCE: &[u8; 12] = b"Quack quack!";
+
+    fn hs1siv<P: Hs1Params>() {
+        let hs1 = Hs1Siv::<P>::new(KEY.into());
+        let cph = hs1.encrypt(NONCE.into(), MSG).unwrap();
+        let msg = hs1.decrypt(NONCE.into(), &*cph).unwrap();
+        assert_eq!(&msg, MSG);
+    }
+
+    #[test]
+    fn hs1siv_me() {
+        hs1siv::<params::Hs1SivMe>();
+    }
+
+    #[test]
+    fn hs1siv_lo() {
+        hs1siv::<params::Hs1SivLo>();
+    }
+
+    #[test]
+    fn hs1siv_hi() {
+        hs1siv::<params::Hs1SivHi>();
+    }
+
+    /// Custom generated vectors using (reference implementation)[0].
+    ///
+    /// [0]: https://bench.cr.yp.to/supercop.html
+    mod test_vectors {
+        use super::*;
+
+        #[test]
+        fn subkeygen_me() {
+            let k = hs1_subkeygen::<params::Hs1SivMe>(KEY);
+            assert_eq!(
+                k.chacha,
+                [
+                    0x02, 0xea, 0xb5, 0x34, 0x85, 0x3e, 0xf7, 0xf4, 0x81, 0x3f, 0x87, 0xd8, 0xd2,
+                    0x63, 0x1e, 0x05, 0xf9, 0x68, 0x91, 0xd0, 0x8a, 0x03, 0x34, 0xfc, 0x64, 0xbe,
+                    0x6b, 0x3a, 0x89, 0xfe, 0x20, 0x8d,
+                ]
+            );
+            assert_eq!(
+                k.hash.nh,
+                [
+                    0x74e7102f, 0x374603b7, 0xf470c90c, 0x8c829c82, 0x07d6f293, 0xf9e7e569,
+                    0xcd590406, 0xe6bdc9ad, 0xa2687cda, 0xfc1a8b80, 0x501efbee, 0x0df51d32,
+                    0x7fd3f594, 0xc3d1520b, 0x1b83db2f, 0x0791c054, 0x66583c46, 0xcb096241,
+                    0x7afc8085, 0x4b37d47a, 0x540287e0, 0xe1ace58b, 0x4f125f3b, 0xb69b5935,
+                    0x6cb2cf06, 0xbf86407b, 0x18a6a2e5, 0xe1eaa248,
+                ]
+            );
+            assert_eq!(
+                k.hash.poly,
+                [
+                    0x09aad6627602f656,
+                    0x07f2089068131f87,
+                    0x0a982e724caf2722,
+                    0x004f2d42b1092d0a,
+                ]
+            );
+            assert_eq!(k.hash.asu, [[]; 4]);
+        }
+
+        #[test]
+        fn subkeygen_lo() {
+            let k = hs1_subkeygen::<params::Hs1SivLo>(KEY);
+            assert_eq!(
+                k.chacha,
+                [
+                    0xab, 0x1b, 0x65, 0x62, 0xe5, 0x4c, 0x79, 0x27, 0x30, 0xa3, 0x4c, 0xa6, 0x7e,
+                    0x79, 0x0f, 0xb9, 0xa9, 0x85, 0x62, 0xb2, 0x17, 0x2e, 0x47, 0x99, 0xe3, 0x7a,
+                    0x0c, 0x63, 0x77, 0xc4, 0x85, 0xca,
+                ]
+            );
+            assert_eq!(
+                k.hash.nh,
+                [
+                    0xd743ff76, 0x64b9e928, 0x0effa5ae, 0xf850ec1d, 0xda1249a9, 0x29afefcf,
+                    0x18bb4916, 0x35d0b524, 0x2036f9c4, 0x0ae224a6, 0x98f18f97, 0x3aad32e2,
+                    0x85256859, 0x30e4ad2e, 0x63b08461, 0x13c97c7d, 0xe4d45609, 0x0ca44ba2,
+                    0x6c4b356e, 0x9b960e6b,
+                ]
+            );
+            assert_eq!(k.hash.poly, [0x0ef85bac983cb194, 0x0a584b5179c75231]);
+            assert_eq!(k.hash.asu, [[]; 2]);
+        }
+
+        #[test]
+        fn hash_me() {
+            let k = hs1_subkeygen::<params::Hs1SivMe>(KEY);
+            let h = Hasher::new(&k.hash).update(MSG).finalize();
+            assert_eq!(
+                h,
+                [
+                    0x1808a23d991ae22c,
+                    0x08f96bf01b438f3b,
+                    0x194ee1ffd24b84a0,
+                    0x0b25578352a73e9d,
+                ]
+                .map(u64::to_le_bytes)
+            );
+        }
+
+        #[test]
+        fn hash_me_64() {
+            const MSG64: &[u8; 64] =
+                b"Hello to the entire wide, round, global globe!!! okookokokokokok";
+            let k = hs1_subkeygen::<params::Hs1SivMe>(KEY);
+            let h = Hasher::new(&k.hash).update(MSG64).finalize();
+            assert_eq!(
+                h,
+                [
+                    0x0f128a7f7b601324,
+                    0x0dc82e748a2a1395,
+                    0x106966138221d2ba,
+                    0x09f86f41d6677d4d,
+                ]
+                .map(u64::to_le_bytes)
+            );
+        }
+
+        #[test]
+        fn hash_lo() {
+            let k = hs1_subkeygen::<params::Hs1SivLo>(KEY);
+            let h = Hasher::new(&k.hash).update(MSG).finalize();
+            assert_eq!(
+                h,
+                [0x1afa0c19eba9a66b, 0x15ceb31a087f2657,].map(u64::to_le_bytes)
+            );
+        }
+
+        #[test]
+        fn hash_hi() {
+            let k = hs1_subkeygen::<params::Hs1SivHi>(KEY);
+            let h = Hasher::new(&k.hash).update(MSG).finalize();
+            assert_eq!(
+                h,
+                [0xcf452c22, 0x452317a2, 0x7fa1f1d6, 0x100d9702, 0xcf1defb0, 0x4c73da69,]
+                    .map(u32::to_le_bytes)
+            );
+        }
+
+        // TODO I'm 99% sure this is wrong according to the paper,
+        // but it shouldn't be an issue as long as we don't expose the hasher
+        // to the public...
+        #[test]
+        fn hash_me_empty() {
+            let k = hs1_subkeygen::<params::Hs1SivMe>(KEY);
+            let h = Hasher::new(&k.hash).finalize();
+            assert_eq!(
+                h,
+                [
+                    0x0000000000000001,
+                    0x0000000000000001,
+                    0x0000000000000001,
+                    0x0000000000000001,
+                ]
+                .map(u64::to_le_bytes)
+            );
+        }
+
+        #[test]
+        fn hs1siv_me() {
+            let hs1 = Hs1SivMe::new(KEY.into());
+            let cph = hs1.encrypt(NONCE.into(), MSG).unwrap();
+            assert_eq!(
+                &cph,
+                &[
+                    0x1b, 0x26, 0x40, 0x4d, 0xe3, 0x46, 0xb3, 0x65, 0x07, 0xa7, 0x93, 0xf3, 0x6e,
+                    0xab, 0xb5, 0xcb, 0x1a, 0x99, 0x7c, 0xbf, 0xdf, 0x6c, 0xed, 0x15, 0xd9, 0xd0,
+                    0x26, 0x37, 0xf7, 0xcc, 0xd4, 0xb1, 0x20, 0xee, 0x02, 0x52, 0x3c, 0xee, 0xcc,
+                    0x41, 0x04, 0xbf, 0x42, 0xa9, 0xfc, 0x2e, 0x45, 0x06, 0x67, 0xa6, 0xfe, 0x07,
+                    0x2f, 0x00, 0x81, 0x72, 0x52, 0xa8, 0xb0, 0xb1, 0x2e, 0xd6,
+                ]
+            );
+        }
+
+        #[test]
+        fn hs1siv_lo() {
+            let hs1 = Hs1SivLo::new(KEY.into());
+            let cph = hs1.encrypt(NONCE.into(), MSG).unwrap();
+            assert_eq!(
+                &cph,
+                &[
+                    0xa8, 0xac, 0xcd, 0x91, 0x09, 0x39, 0xac, 0x6a, 0x13, 0x81, 0xa3, 0xa4, 0xbe,
+                    0xa1, 0xc9, 0x97, 0xa7, 0xda, 0xe6, 0x5e, 0x73, 0xd6, 0x0f, 0x2e, 0x87, 0xcf,
+                    0xe7, 0x20, 0xaf, 0x0d, 0x94, 0x45, 0xaa, 0x9b, 0x91, 0xf2, 0x11, 0x33, 0x48,
+                    0xc5, 0x7d, 0x0f, 0xd8, 0xda, 0xd7, 0x9a, 0x3d, 0xcf, 0x63, 0xea, 0xda, 0x32,
+                    0x7c, 0xa6,
+                ]
+            );
+        }
+
+        #[test]
+        fn hs1siv_hi() {
+            let hs1 = Hs1SivHi::new(KEY.into());
+            let cph = hs1.encrypt(NONCE.into(), MSG).unwrap();
+            assert_eq!(
+                &cph,
+                &[
+                    0xbc, 0x5d, 0xbb, 0x49, 0x52, 0x97, 0xb8, 0xb0, 0xab, 0x3a, 0x0b, 0x69, 0xb0,
+                    0x60, 0xd2, 0x75, 0xd1, 0x4e, 0x14, 0x73, 0x8f, 0xe3, 0xb6, 0x14, 0xb0, 0x06,
+                    0x01, 0x96, 0x4f, 0x90, 0x6e, 0x6a, 0x67, 0x71, 0xd0, 0x71, 0xf0, 0x4b, 0xc9,
+                    0xf8, 0x14, 0x54, 0x30, 0xe3, 0x33, 0xb0, 0x09, 0x97, 0x47, 0xf4, 0x8c, 0xd0,
+                    0x60, 0xae, 0x68, 0x40, 0xcb, 0x58, 0x64, 0x6b, 0xf9, 0x66, 0x5f, 0x58, 0xfa,
+                    0xdf, 0xd0, 0x50, 0xa7, 0x00, 0x43, 0x55, 0x5e, 0x63, 0xe9, 0x89, 0x31, 0x29,
+                ]
+            );
+        }
+    }
+}

From 68271ff04bb102fdd3f191bcd3bd0094eb79058c Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Thu, 23 Jan 2025 20:18:03 +0100
Subject: [PATCH 02/11] hs1-siv: fix cargo meta, bump version

---
 hs1-siv/Cargo.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hs1-siv/Cargo.toml b/hs1-siv/Cargo.toml
index c6040571..6f3cb8fa 100644
--- a/hs1-siv/Cargo.toml
+++ b/hs1-siv/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "hs1-siv"
-version = "0.1.1"
+version = "0.2.0-pre.1"
 edition = "2021"
 description = """
 Pure Rust implementation of the HS1-SIV Authenticated Encryption
@@ -10,8 +10,8 @@ authors = ["David Hoppenbrouwers"]
 license = "Apache-2.0 OR MIT"
 readme = "README.md"
 documentation = "https://docs.rs/hs1-siv"
-homepage = "https://codeberg.org/Demindiro/rust-hs1-siv"
-repository = "https://codeberg.org/Demindiro/rust-hs1-siv"
+homepage = "https://github.com/RustCrypto/AEADs/tree/master/hs1-siv"
+repository = "https://github.com/RustCrypto/AEADs/hs1-siv"
 keywords = ["aead", "hs1-siv", "hs1", "siv"]
 categories = ["cryptography", "no-std"]
 rust-version = "1.81"

From f5c1815d81bc94a8e52b1df92950a0380bc3b00a Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Fri, 24 Jan 2025 09:43:57 +0100
Subject: [PATCH 03/11] hs1-siv: remove ref/

---
 hs1-siv/ref/.gitignore |   1 -
 hs1-siv/ref/encrypt.c  | 541 -----------------------------------------
 hs1-siv/ref/main.c     | 124 ----------
 hs1-siv/ref/run.sh     |   1 -
 4 files changed, 667 deletions(-)
 delete mode 100644 hs1-siv/ref/.gitignore
 delete mode 100644 hs1-siv/ref/encrypt.c
 delete mode 100644 hs1-siv/ref/main.c
 delete mode 100755 hs1-siv/ref/run.sh

diff --git a/hs1-siv/ref/.gitignore b/hs1-siv/ref/.gitignore
deleted file mode 100644
index ba2906d0..00000000
--- a/hs1-siv/ref/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-main
diff --git a/hs1-siv/ref/encrypt.c b/hs1-siv/ref/encrypt.c
deleted file mode 100644
index c2ed49bc..00000000
--- a/hs1-siv/ref/encrypt.c
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
-// HS1-SIV v2 reference code.
-//
-// Note: This implements HS1-SIV v2, and not v1 or Draft v2.
-//
-// ** This version is slow and susceptible to side-channel attacks. **
-// ** Do not use for any purpose other than to understand HS1-SIV.  **
-//
-// Written by Ted Krovetz (ted@krovetz.net). Last modified 28 July 2016.
-//
-// To the extent possible under law, the author has dedicated all copyright
-// and related and neighboring rights to this software to the public
-// domain worldwide. This software is distributed without any warranty.
-//
-// You should have received a copy of the CC0 Public Domain Dedication
-// along with this software. If not, see
-// <http://creativecommons.org/publicdomain/zero/1.0/>
-//
-// The author knows of no intellectual property claims relevant to this work.
-*/
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- *
- *     I n c l u d e s   a n d   u t i l i t i e s
- *
- * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-#if HS1_SIV_LO
-#define HS1_SIV_NH_LEN      64
-#define HS1_SIV_HASH_RNDS    2
-#define HS1_SIV_CHACHA_RNDS  8
-#define HS1_SIV_SIV_LEN      8
-#elif HS1_SIV
-#define HS1_SIV_NH_LEN      64
-#define HS1_SIV_HASH_RNDS    4
-#define HS1_SIV_CHACHA_RNDS 12
-#define HS1_SIV_SIV_LEN     16
-#elif HS1_SIV_HI
-#define HS1_SIV_NH_LEN      64
-#define HS1_SIV_HASH_RNDS    6
-#define HS1_SIV_CHACHA_RNDS 20
-#define HS1_SIV_SIV_LEN     32
-#endif
-
-#define __STDC_LIMIT_MACROS
-#include <string.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-#if __GNUC__
-    #define HS1_SIV_ALIGN(n) __attribute__ ((aligned(n)))
-#elif _MSC_VER
-    #define HS1_SIV_ALIGN(n) __declspec(align(n))
-#elif (__STDC_VERSION >= 201112L) || (__cplusplus >= 201103L)
-    #define HS1_SIV_ALIGN(n) alignas(n)
-#else /* Not GNU/Microsoft/C11: delete alignment uses.     */
-    #pragma message ( "Struct alignment not guaranteed" )
-    #define HS1_SIV_ALIGN(n)
-#endif
-
-HS1_SIV_ALIGN(16)
-typedef struct {
-    unsigned char chacha_key[32];
-    unsigned char nh_key[HS1_SIV_NH_LEN+16*(HS1_SIV_HASH_RNDS-1)];
-    unsigned char poly_key[HS1_SIV_HASH_RNDS*8];
-    #if (HS1_SIV_HASH_RNDS > 4) /* ASU */
-    unsigned char asu_key[HS1_SIV_HASH_RNDS*24];
-    #else
-    unsigned char asu_key[];
-    #endif
-} hs1siv_ctx_t;
-
-/* Little-endian reads and writes. */
-
-static uint32_t swap32(uint32_t x) {
-    return (((x & 0x000000ffu) << 24) | ((x & 0x0000ff00u) << 8)  |
-            ((x & 0x00ff0000u) >> 8)  | ((x & 0xff000000u) >> 24));
-}
-
-static uint64_t swap64(uint64_t x) {
-    return ((x & UINT64_C(0x00000000000000ff)) << 56) |
-           ((x & UINT64_C(0x000000000000ff00)) << 40) |
-           ((x & UINT64_C(0x0000000000ff0000)) << 24) |
-           ((x & UINT64_C(0x00000000ff000000)) <<  8) |
-           ((x & UINT64_C(0x000000ff00000000)) >>  8) |
-           ((x & UINT64_C(0x0000ff0000000000)) >> 24) |
-           ((x & UINT64_C(0x00ff000000000000)) >> 40) |
-           ((x & UINT64_C(0xff00000000000000)) >> 56);
-}
-
-static int le() { const union { int x; char e; } l = { 1 }; return l.e; }
-static uint32_t read32le(uint32_t *p) { return (le()?*p:swap32(*p)); }
-static uint64_t read64le(uint64_t *p) { return (le()?*p:swap64(*p)); }
-static void write32le(uint32_t *p, uint32_t w) { *p = (le()?w:swap32(w)); }
-static void write64le(uint64_t *p, uint64_t w) { *p = (le()?w:swap64(w)); }
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- *
- *     C h a c h a   S e c t i o n -- Implementation borrowed from D Bernstein
- *
- * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-/* This chacha implementation was adapted from a public domain implementation
- * found at http://cr.yp.to/chacha.html. It has been modified to accommodate
- * 12-byte IVs as specified in RFC 7539.
- */
-
-typedef struct { uint32_t input[16]; } chacha_ctx_t;
-
-static uint32_t rotl(uint32_t x, unsigned n) { return (x<<n) | (x>>(32-n)); }
-
-#define QUARTERROUND(a,b,c,d) \
-  x[a] += x[b]; x[d] = rotl(x[d] ^ x[a],16); \
-  x[c] += x[d]; x[b] = rotl(x[b] ^ x[c],12); \
-  x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8); \
-  x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
-
-static void salsa20_wordtobyte(unsigned char output[64], uint32_t input[16])
-{
-  uint32_t i, x[16];
-
-  for (i = 0;i < 16;++i) x[i] = input[i];
-  for (i = HS1_SIV_CHACHA_RNDS;i != 0;i -= 2) {
-    QUARTERROUND( 0, 4, 8,12)
-    QUARTERROUND( 1, 5, 9,13)
-    QUARTERROUND( 2, 6,10,14)
-    QUARTERROUND( 3, 7,11,15)
-    QUARTERROUND( 0, 5,10,15)
-    QUARTERROUND( 1, 6,11,12)
-    QUARTERROUND( 2, 7, 8,13)
-    QUARTERROUND( 3, 4, 9,14)
-  }
-  for (i = 0;i < 16;++i) x[i] += input[i];
-  for (i = 0;i < 16;++i) write32le((uint32_t *)(output + 4 * i),x[i]);
-}
-
-static const char sigma[] = "expand 32-byte k";
-static const char tau[] = "expand 16-byte k";
-
-void chacha_keysetup(chacha_ctx_t *x, const unsigned char *k, unsigned kbits)
-{
-  const char *constants;
-
-  x->input[4] = read32le((uint32_t *)(k + 0));
-  x->input[5] = read32le((uint32_t *)(k + 4));
-  x->input[6] = read32le((uint32_t *)(k + 8));
-  x->input[7] = read32le((uint32_t *)(k + 12));
-  if (kbits == 256) { /* recommended */
-    k += 16;
-    constants = sigma;
-  } else { /* kbits == 128 */
-    constants = tau;
-  }
-  x->input[8] = read32le((uint32_t *)(k + 0));
-  x->input[9] = read32le((uint32_t *)(k + 4));
-  x->input[10] = read32le((uint32_t *)(k + 8));
-  x->input[11] = read32le((uint32_t *)(k + 12));
-  x->input[0] = read32le((uint32_t *)(constants + 0));
-  x->input[1] = read32le((uint32_t *)(constants + 4));
-  x->input[2] = read32le((uint32_t *)(constants + 8));
-  x->input[3] = read32le((uint32_t *)(constants + 12));
-}
-
-void chacha_ivsetup(chacha_ctx_t *x,const unsigned char *iv)
-{
-  x->input[12] = 0;
-  x->input[13] = read32le((uint32_t *)(iv + 0)); /* Modified for 12-byte iv */
-  x->input[14] = read32le((uint32_t *)(iv + 4));
-  x->input[15] = read32le((uint32_t *)(iv + 8));
-}
-
-void chacha(chacha_ctx_t *x,unsigned char *out,unsigned bytes)
-{
-  unsigned char output[64];
-  unsigned i;
-
-  if (!bytes) return;
-  for (;;) {
-    salsa20_wordtobyte(output,x->input);
-    x->input[12] += 1;
-    if (bytes <= 64) {
-      for (i = 0;i < bytes;++i) out[i] = output[i];
-      return;
-    }
-    for (i = 0;i < 64;++i) out[i] = output[i];
-    bytes -= 64;
-    out += 64;
-  }
-}
-
-void hs1siv_chacha256(void *out, unsigned outbytes,
-                      unsigned char *iv, void *user_key)
-{
-    chacha_ctx_t ctx;
-
-    chacha_keysetup(&ctx, user_key, 256);
-    chacha_ivsetup(&ctx,iv);
-    chacha(&ctx,out,outbytes);
-}
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- *
- *     H S 1 - H a s h   S e c t i o n
- *
- * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-static const uint64_t m60 = ((uint64_t)1 << 60) - 1;
-static const uint64_t m61 = ((uint64_t)1 << 61) - 1;
-
-/* Return 62 bits congruent to ak+m % (2^61-1). Assumes 60-bit k,m; 62-bit a */
-static uint64_t poly_step(uint64_t a, uint64_t b, uint64_t k) {
-    #if (__SIZEOF_INT128__)  /* 128-bit type available */
-        unsigned __int128 tmp = (unsigned __int128)a * (unsigned __int128)k;
-        return ((uint64_t)tmp & m61) + (uint64_t)(tmp >> 61) + b;
-    #else
-        uint64_t m = (uint64_t)(uint32_t)(a>>32) * (uint64_t)(uint32_t)k
-                   + (uint64_t)(uint32_t)(k>>32) * (uint64_t)(uint32_t)a;
-        uint64_t h = (uint64_t)(uint32_t)(a>>32) * (uint64_t)(uint32_t)(k>>32);
-        uint64_t l = (uint64_t)(uint32_t)a * (uint64_t)(uint32_t)k;
-        h += (m >> 32); l += (m << 32);  /* h:l += (m>>32):(m<<32)      */
-        /* CAUTION: Potential timing leak. Good compiler will eliminate */
-        if (l < (m << 32)) h += 1;       /* Check for carry from l to h */
-        return (l & m61) + ((h << 3) | (l >> 61)) + b;
-    #endif
-}
-
-static uint64_t poly_finalize(uint64_t a) {
-    a = (a & m61) + (a >> 61);   /* a may be 62 bits, so one final reduction */
-    if (a == m61) a = 0;
-    return a;
-}
-
-#if (HS1_SIV_HASH_RNDS > 4)
-static uint32_t asu_hash(uint64_t x, uint64_t *k) {
-    uint64_t t = k[0] + k[1] * (uint32_t)x + k[2] * (uint32_t)(x >> 32);
-    return (uint32_t)(t >> 32);
-}
-#endif
-
-// Rewritten from prf_hash2 which does two hashes concurrently and confused me
-// when I was tired
-void prf_hash1(uint64_t *h, uint32_t *in, unsigned inbytes, uint32_t *nhkey,
-               uint64_t polykey, uint64_t *asukey) {
-    uint64_t s0 = 1;
-    unsigned i=0, j;
-
-    /* Hash full blocks of HS1_SIV_NH_LEN bytes */
-    while (inbytes >= HS1_SIV_NH_LEN) {
-        uint64_t a0 = 0;
-        for (i=0;i<HS1_SIV_NH_LEN/4;i+=4) {
-            a0 += (uint64_t)(read32le(in+i+0) + nhkey[i+0]) * (read32le(in+i+2) + nhkey[i+2]);
-            a0 += (uint64_t)(read32le(in+i+1) + nhkey[i+1]) * (read32le(in+i+3) + nhkey[i+3]);
-        }
-        s0 = poly_step(s0, a0&m60, polykey);
-        inbytes -= HS1_SIV_NH_LEN;
-        in += HS1_SIV_NH_LEN/4;
-    }
-    /* If partial block remains, hash it */
-    i=0;
-    if (inbytes != 0) {
-        uint64_t a0 = 0;
-        while (inbytes >= 16) {
-            a0 += (uint64_t)(read32le(in+i+0) + nhkey[i+0]) * (read32le(in+i+2) + nhkey[i+2]);
-            a0 += (uint64_t)(read32le(in+i+1) + nhkey[i+1]) * (read32le(in+i+3) + nhkey[i+3]);
-            i += 4; inbytes -= 16;
-        }
-        if (inbytes) {
-            uint32_t tail[4] = {0,0,0,0};
-            for (j=0;j<inbytes;j++)
-                ((unsigned char *)tail)[j] = ((unsigned char *)(in+i))[j];
-            a0 += (uint64_t)(read32le(tail+0) + nhkey[i+0]) * (read32le(tail+2) + nhkey[i+2]);
-            a0 += (uint64_t)(read32le(tail+1) + nhkey[i+1]) * (read32le(tail+3) + nhkey[i+3]);
-            a0 += inbytes;
-        }
-        s0 = poly_step(s0, a0&m60, polykey);
-    }
-    s0 = poly_finalize(s0);
-    #if (HS1_SIV_HASH_RNDS > 4)
-    write64le(h, asu_hash(s0, asukey));
-    #else
-    (void)asukey;  /* Suppress warning */
-    write64le(h,s0);
-    #endif
-}
-
-#include <stdio.h>
-void prf_hash2(uint64_t *h, uint32_t *in, unsigned inbytes, uint32_t *nhkey,
-               uint64_t *polykey, uint64_t *asukey) {
-    uint64_t s0 = 1, s1 = 1;
-    unsigned i=0, j;
-    printf("- - %llu,%llu,\n", s0, s1);
-
-    /* Hash full blocks of HS1_SIV_NH_LEN bytes */
-    while (inbytes >= HS1_SIV_NH_LEN) {
-        uint64_t a0 = 0, a1 = 0;
-        for (i=0;i<HS1_SIV_NH_LEN/4;i+=8) {
-            a0 += (uint64_t)(read32le(in+i+0) + nhkey[i+0]) *
-                            (read32le(in+i+2) + nhkey[i+2]);
-            a0 += (uint64_t)(read32le(in+i+1) + nhkey[i+1]) *
-                            (read32le(in+i+3) + nhkey[i+3]);
-            a1 += (uint64_t)(read32le(in+i+0) + nhkey[i+4]) *
-                            (read32le(in+i+2) + nhkey[i+6]);
-            a1 += (uint64_t)(read32le(in+i+1) + nhkey[i+5]) *
-                            (read32le(in+i+3) + nhkey[i+7]);
-            a0 += (uint64_t)(read32le(in+i+4) + nhkey[i+4]) *
-                            (read32le(in+i+6) + nhkey[i+6]);
-            a0 += (uint64_t)(read32le(in+i+5) + nhkey[i+5]) *
-                            (read32le(in+i+7) + nhkey[i+7]);
-            a1 += (uint64_t)(read32le(in+i+4) + nhkey[i+8]) *
-                            (read32le(in+i+6) + nhkey[i+10]);
-            a1 += (uint64_t)(read32le(in+i+5) + nhkey[i+9]) *
-                            (read32le(in+i+7) + nhkey[i+11]);
-        }
-        s0 = poly_step(s0, a0&m60, polykey[0]);
-        s1 = poly_step(s1, a1&m60, polykey[1]);
-        inbytes -= HS1_SIV_NH_LEN;
-        in += HS1_SIV_NH_LEN/4;
-        printf("-A- %llu,%llu,\n", s0, s1);
-    }
-    /* If partial block remains, hash it */
-    i=0;
-    if (inbytes != 0) {
-        uint64_t a0 = 0, a1 = 0;
-        while (inbytes >= 16) {
-            a0 += (uint64_t)(read32le(in+i+0) + nhkey[i+0]) *
-                            (read32le(in+i+2) + nhkey[i+2]);
-            a0 += (uint64_t)(read32le(in+i+1) + nhkey[i+1]) *
-                            (read32le(in+i+3) + nhkey[i+3]);
-            a1 += (uint64_t)(read32le(in+i+0) + nhkey[i+4]) *
-                            (read32le(in+i+2) + nhkey[i+6]);
-            a1 += (uint64_t)(read32le(in+i+1) + nhkey[i+5]) *
-                            (read32le(in+i+3) + nhkey[i+7]);
-            i += 4; inbytes -= 16;
-        }
-        if (inbytes) {
-            uint32_t tail[4] = {0,0,0,0};
-            for (j=0;j<inbytes;j++)
-                ((unsigned char *)tail)[j] = ((unsigned char *)(in+i))[j];
-            a0 += (uint64_t)(read32le(tail+0) + nhkey[i+0]) *
-                            (read32le(tail+2) + nhkey[i+2]);
-            a0 += (uint64_t)(read32le(tail+1) + nhkey[i+1]) *
-                            (read32le(tail+3) + nhkey[i+3]);
-            a1 += (uint64_t)(read32le(tail+0) + nhkey[i+4]) *
-                            (read32le(tail+2) + nhkey[i+6]);
-            a1 += (uint64_t)(read32le(tail+1) + nhkey[i+5]) *
-                            (read32le(tail+3) + nhkey[i+7]);
-            a0 += inbytes;
-            a1 += inbytes;
-        }
-        s0 = poly_step(s0, a0&m60, polykey[0]);
-        s1 = poly_step(s1, a1&m60, polykey[1]);
-        printf("-C- %llu,%llu,\n", s0, s1);
-    }
-    s0 = poly_finalize(s0);
-    s1 = poly_finalize(s1);
-    #if (HS1_SIV_HASH_RNDS > 4)
-    write64le(h, (uint64_t)asu_hash(s1, asukey+3) << 32 | asu_hash(s0, asukey));
-    #else
-    (void)asukey;  /* Suppress warning */
-    write64le(h,s0);
-    write64le(h+1,s1);
-    #endif
-}
-
-void hs1_hash(hs1siv_ctx_t *ctx, void *in, unsigned inbytes, void *out) {
-    uint64_t *h = (uint64_t *)out;
-    unsigned k = (HS1_SIV_HASH_RNDS > 4 ? 1 : 2);
-
-    prf_hash2(h, (uint32_t *)in, inbytes, (uint32_t *)ctx->nh_key,
-              (uint64_t *)ctx->poly_key, (uint64_t *)ctx->asu_key);
-    #if HS1_SIV_HASH_RNDS > 2
-    prf_hash2(h+k, (uint32_t *)in, inbytes, (uint32_t *)ctx->nh_key+8,
-              (uint64_t *)ctx->poly_key+2, (uint64_t *)ctx->asu_key+6);
-    #if HS1_SIV_HASH_RNDS > 4
-    prf_hash2(h+2*k, (uint32_t *)in, inbytes, (uint32_t *)ctx->nh_key+16,
-              (uint64_t *)ctx->poly_key+4, (uint64_t *)ctx->asu_key+12);
-    #if HS1_SIV_HASH_RNDS > 6
-    prf_hash2(h+3*k, (uint32_t *)in, inbytes, (uint32_t *)ctx->nh_key+24,
-              (uint64_t *)ctx->poly_key+6, (uint64_t *)ctx->asu_key+18);
-    #endif
-    #endif
-    #endif
-}
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- *
- *     P R F   S e c t i o n
- *
- * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-void hs1siv_subkeygen(hs1siv_ctx_t *ctx, void *user_key, unsigned key_bytes)
-{
-    unsigned char chacha_key[32];
-    unsigned char iv[12] = {0};
-    unsigned i=0;
-
-    /* Copy user_key as many times as needed to fill 32 byte chacha key */
-    while (i < 32) {
-        unsigned nbytes = 32-i;
-        if (nbytes > key_bytes) nbytes = key_bytes;
-        memcpy(chacha_key+i,user_key,nbytes);
-        i += nbytes;
-    }
-
-    /* Build key-derivation nonce and fill context */
-    iv[0] = key_bytes;
-    iv[2] = HS1_SIV_SIV_LEN;
-    iv[4] = HS1_SIV_CHACHA_RNDS;
-    iv[5] = HS1_SIV_HASH_RNDS;
-    iv[6] = HS1_SIV_NH_LEN;
-    hs1siv_chacha256(ctx, sizeof(hs1siv_ctx_t), iv, chacha_key);
-
-    /* Pre-process internal keys: make future reads little-endian, mod poly */
-    for (i=0; i<sizeof(ctx->nh_key)/4; i++)
-        ((uint32_t *)ctx->nh_key)[i] = read32le(((uint32_t *)ctx->nh_key)+i);
-    for (i=0; i<sizeof(ctx->poly_key)/8; i++)
-        ((uint64_t *)ctx->poly_key)[i] = read64le(((uint64_t *)ctx->poly_key)+i)
-                                       & m60;
-    #if (HS1_SIV_HASH_RNDS > 4)
-    for (i=0; i<sizeof(ctx->asu_key)/8; i++)
-        ((uint64_t *)ctx->asu_key)[i] = read64le(((uint64_t *)ctx->asu_key)+i);
-    #endif
-}
-
-void hs1(
-    hs1siv_ctx_t *hs1_ctx,
-    void *in, unsigned inbytes,
-    void *iv,
-    void *out, unsigned outbytes
-)
-{
-    #if (HS1_SIV_HASH_RNDS > 4)
-    uint64_t h[HS1_SIV_HASH_RNDS/2];
-    #else
-    uint64_t h[HS1_SIV_HASH_RNDS];
-    #endif
-
-    unsigned i;
-    unsigned char key[32];
-    chacha_ctx_t chacha_ctx;
-
-    hs1_hash(hs1_ctx, in, inbytes, h);
-    memcpy(key, hs1_ctx->chacha_key, 32);
-    for (i=0; i<sizeof(h)/8;i++) ((uint64_t *)key)[i] ^= h[i];
-    chacha_keysetup(&chacha_ctx, key, 256);
-    chacha_ivsetup(&chacha_ctx,(unsigned char *)iv);
-    chacha(&chacha_ctx, (unsigned char *)out, outbytes);
-}
-
-void hs1siv_encrypt(hs1siv_ctx_t *ctx, void *m, unsigned mbytes,
-                    void *a, unsigned abytes, void *n, void *t, void *c)
-{
-    unsigned i;
-    unsigned abuflen = (abytes+HS1_SIV_NH_LEN-1)/HS1_SIV_NH_LEN*HS1_SIV_NH_LEN;
-    unsigned buflen = abuflen + (mbytes+15)/16*16 + 16;
-    uint32_t tmp_t[HS1_SIV_SIV_LEN/4];
-    unsigned char *buf = (unsigned char *)malloc(buflen);
-    memset(buf, 0, buflen);
-    memcpy(buf, a, abytes);
-    memcpy(buf+abuflen, m, mbytes);
-    write32le((uint32_t *)(buf+buflen-16), abytes);
-    write32le((uint32_t *)(buf+buflen-8), mbytes);
-    hs1(ctx, buf, buflen, n, tmp_t, HS1_SIV_SIV_LEN);
-    free(buf);
-    buf = (unsigned char *)malloc(mbytes+64);
-    hs1(ctx, tmp_t, HS1_SIV_SIV_LEN, n, buf, mbytes+64);
-    for (i=0; i<mbytes; i++)
-        buf[64+i] ^= ((unsigned char *)m)[i];
-    memcpy(c,buf+64,mbytes);
-    memcpy(t,tmp_t,HS1_SIV_SIV_LEN);
-    free(buf);
-}
-
-int hs1siv_decrypt(hs1siv_ctx_t *ctx, void *c, unsigned cbytes,
-                   void *a, unsigned abytes, void *n, void *t, void *m)
-{
-    unsigned i;
-    unsigned abuflen = (abytes+HS1_SIV_NH_LEN-1)/HS1_SIV_NH_LEN*HS1_SIV_NH_LEN;
-    unsigned buflen = abuflen + (cbytes+15)/16*16 + 16;
-    unsigned char *maybe_m = (unsigned char *)malloc(cbytes);
-    uint32_t maybe_t[HS1_SIV_SIV_LEN/4];
-    unsigned char *buf = (unsigned char *)malloc(cbytes+64);
-    memcpy(maybe_t,t,HS1_SIV_SIV_LEN);  /* move to aligned buffer */
-    hs1(ctx, maybe_t, HS1_SIV_SIV_LEN, n, buf, cbytes+64);
-    for (i=0; i<cbytes; i++)
-        ((unsigned char *)maybe_m)[i] = ((unsigned char *)c)[i] ^ buf[64+i];
-    free(buf);
-    buf = (unsigned char *)malloc(buflen);
-    memset(buf, 0, buflen);
-    memcpy(buf, a, abytes);
-    memcpy(buf+abuflen, maybe_m, cbytes);
-    write32le((uint32_t *)(buf+buflen-16), abytes);
-    write32le((uint32_t *)(buf+buflen-8), cbytes);
-    hs1(ctx, buf, buflen, n, maybe_t, HS1_SIV_SIV_LEN);
-    free(buf);
-    if (memcmp(t,maybe_t,HS1_SIV_SIV_LEN) == 0) {
-        memcpy(m,maybe_m,cbytes);
-        free(maybe_m);
-        return 0;
-    } else {
-        free(maybe_m);
-        return -1;
-    }
-}
-
-int crypto_aead_encrypt(
-    unsigned char *c,unsigned long long *clen,
-    const unsigned char *m,unsigned long long mlen,
-    const unsigned char *ad,unsigned long long adlen,
-    const unsigned char *nsec,
-    const unsigned char *npub,
-    const unsigned char *k
-)
-{
-    hs1siv_ctx_t ctx;
-    (void)nsec;
-    hs1siv_subkeygen(&ctx, (void *)k, CRYPTO_KEYBYTES);
-    if (clen) *clen = mlen+CRYPTO_ABYTES;
-    hs1siv_encrypt(&ctx, (void *)m, (unsigned)mlen, (void *)ad,
-            (unsigned)adlen, (void *)npub, c+mlen, c);
-    return 0;
-}
-
-int crypto_aead_decrypt(
-    unsigned char *m,unsigned long long *mlen,
-    unsigned char *nsec,
-    const unsigned char *c,unsigned long long clen,
-    const unsigned char *ad,unsigned long long adlen,
-    const unsigned char *npub,
-    const unsigned char *k
-)
-{
-    hs1siv_ctx_t ctx;
-    (void)nsec;
-    if (mlen) *mlen = clen-CRYPTO_ABYTES;
-    hs1siv_subkeygen(&ctx, (void *)k, CRYPTO_KEYBYTES);
-    return hs1siv_decrypt(&ctx, (void *)c, (unsigned)clen-CRYPTO_ABYTES,
-    	    (void *)ad, (unsigned)adlen, (void *)npub,
-    	    (void *)(c+clen-CRYPTO_ABYTES), m);
-}
diff --git a/hs1-siv/ref/main.c b/hs1-siv/ref/main.c
deleted file mode 100644
index e7a51e9e..00000000
--- a/hs1-siv/ref/main.c
+++ /dev/null
@@ -1,124 +0,0 @@
-#include <stdio.h>
-
-/*
-#define CRYPTO_KEYBYTES 16
-#define CRYPTO_NSECBYTES 0
-#define CRYPTO_NPUBBYTES 12
-#define CRYPTO_ABYTES 16
-*/
-
-/* Exactly one of the following should be set */
-#define HS1_SIV_LO  0
-#define HS1_SIV     1
-#define HS1_SIV_HI  0
-
-#define CRYPTO_KEYBYTES 32
-#define CRYPTO_NSECBYTES 0
-#define CRYPTO_NPUBBYTES 12
-#if HS1_SIV_LO
-# define CRYPTO_ABYTES 8
-#elif HS1_SIV
-# define CRYPTO_ABYTES 16
-#elif HS1_SIV_HI
-# define CRYPTO_ABYTES 32
-#else
-# error "one of HS_SIV, HS_SIV_LO or HS_SIV_HI must be 1"
-#endif
-
-#include "encrypt.c"
-
-#if CRYPTO_ABYTES != HS1_SIV_SIV_LEN
-# error "CRYPTO_ABYTES must equal HS1_SIV_SIV_LEN"
-#endif
-
-#define MSG "Hello to the entire wide, round, global globe!"
-#define MSG64 "Hello to the entire wide, round, global globe!!! okookokokokokok"
-//#define MSG ""
-#define KEY "Short keys? Use long for testing"
-#define NONCE "Quack quack!"
-
-void hs1siv_subkeygen(hs1siv_ctx_t *ctx, void *user_key, unsigned key_bytes);
-
-void print_bytes(const unsigned char *bytes, unsigned long long len, unsigned long long wrap) {
-    for (unsigned long long i = 0; i < len; i++) {
-        printf("0x%02x,", bytes[i]);
-        if (i % wrap == wrap - 1)
-            printf("\n");
-    }
-    if (len % wrap != 0)
-        printf("\n");
-}
-
-void print_words(const uint32_t *words, unsigned long long len, unsigned long long wrap) {
-    for (unsigned long long i = 0; i < len; i++) {
-        printf("0x%08x,", words[i]);
-        if (i % wrap == wrap - 1)
-            printf("\n");
-    }
-    if (len % wrap != 0)
-        printf("\n");
-}
-
-void print_doubles(const uint64_t *doubles, unsigned long long len, unsigned long long wrap) {
-    for (unsigned long long i = 0; i < len; i++) {
-        printf("0x%016llx,", doubles[i]);
-        if (i % wrap == wrap - 1)
-            printf("\n");
-    }
-    if (len % wrap != 0)
-        printf("\n");
-}
-
-void subkeygen(hs1siv_ctx_t *ctx) {
-    hs1siv_subkeygen(ctx, KEY, sizeof(KEY) - 1);
-    printf("chacha_key:\n");
-    print_bytes(ctx->chacha_key, sizeof(ctx->chacha_key), 8);
-    printf("nh_key:\n");
-    print_words((void *)ctx->nh_key, sizeof(ctx->nh_key) / 4, 2);
-    printf("poly_key:\n");
-    print_doubles((void *)ctx->poly_key, sizeof(ctx->poly_key) / 8, 1);
-#if (HS1_SIV_HASH_RNDS > 4)
-    printf("asu_key:\n");
-    print_doubles((void *)ctx->asu_key, sizeof(ctx->asu_key) / 8, 1);
-#endif
-}
-
-void hash(hs1siv_ctx_t *ctx) {
-    #if (HS1_SIV_HASH_RNDS > 4)
-    uint64_t h[HS1_SIV_HASH_RNDS/2];
-    #else
-    uint64_t h[HS1_SIV_HASH_RNDS];
-    #endif
-    hs1_hash(ctx, MSG, sizeof(MSG) - 1, h);
-    printf("h:\n");
-#if (HS1_SIV_HASH_RNDS > 4)
-    print_words((uint32_t *)h, sizeof(h) / 4, 1);
-#else
-    print_doubles(h, sizeof(h) / 8, 1);
-#endif
-}
-
-void ciphertext(void) {
-	unsigned char cbuf[1024];
-	unsigned long long clen;
-
-	crypto_aead_encrypt(
-        cbuf, &clen, 
-        MSG, sizeof(MSG) - 1,
-        "", 0,
-        (void *)0,
-        NONCE,
-        KEY
-    );
-
-    printf("ciphertext length: %llu\n", clen);
-    print_bytes(cbuf, clen, 8);
-}
-
-int main() {
-    hs1siv_ctx_t ctx;
-    subkeygen(&ctx);
-    hash(&ctx);
-    ciphertext();
-    return 0;
-}
diff --git a/hs1-siv/ref/run.sh b/hs1-siv/ref/run.sh
deleted file mode 100755
index 00066dfd..00000000
--- a/hs1-siv/ref/run.sh
+++ /dev/null
@@ -1 +0,0 @@
-cc main.c -o main && ./main

From 09817532710b96d1379da914afcd47c883b458c1 Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Fri, 24 Jan 2025 09:46:37 +0100
Subject: [PATCH 04/11] hs1-siv: Cargo.lock, README table entry

Rust 1.81.0 is unable to compile HS1-SIV, but 1.84.0 is.
---
 Cargo.lock | 2 +-
 README.md  | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Cargo.lock b/Cargo.lock
index ed0842c3..fc6654a5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -336,7 +336,7 @@ checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46"
 
 [[package]]
 name = "hs1-siv"
-version = "0.1.1"
+version = "0.2.0-pre.1"
 dependencies = [
  "aead",
  "chacha20",
diff --git a/README.md b/README.md
index 78990359..db6f99be 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ crate.
 | [`deoxys`]           | [Deoxys-I/II]                | [![crates.io](https://img.shields.io/crates/v/deoxys.svg)](https://crates.io/crates/deoxys) | [![Documentation](https://docs.rs/deoxys/badge.svg)](https://docs.rs/deoxys) | 1.81 |
 | [`eax`]              | [EAX]                        | [![crates.io](https://img.shields.io/crates/v/eax.svg)](https://crates.io/crates/eax) | [![Documentation](https://docs.rs/eax/badge.svg)](https://docs.rs/eax) | 1.81 |
 | [`mgm`]              | [MGM]                        | [![crates.io](https://img.shields.io/crates/v/mgm.svg)](https://crates.io/crates/mgm) | [![Documentation](https://docs.rs/mgm/badge.svg)](https://docs.rs/mgm) | 1.81 |
+| [`hs1_siv`]          | [HS1-SIV]                    | [![crates.io](https://img.shields.io/crates/v/hs1-siv.svg)](https://crates.io/crates/hs1-siv) | [![Documentation](https://docs.rs/hs1_siv/badge.svg)](https://docs.rs/hs1_siv) | 1.84 |
 
 ## MSRV Policy
 
@@ -88,3 +89,4 @@ dual licensed as above, without any additional terms or conditions.
 [EAX]: https://en.wikipedia.org/wiki/EAX_mode
 [MGM]: https://eprint.iacr.org/2019/123.pdf
 [(X)ChaCha20Poly1305]: https://tools.ietf.org/html/rfc8439
+[HS1-SIV]: https://krovetz.net/csus/papers/hs1-siv_v2.2.pdf

From 16a883f53e8f138615b1e8ec4db2eb1b5ed1ce9c Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Fri, 24 Jan 2025 09:57:47 +0100
Subject: [PATCH 05/11] hs1-siv: add .github workflow

---
 .github/workflows/benches.yml   |  2 +-
 .github/workflows/hs1-siv.yml   | 68 +++++++++++++++++++++++++++++++++
 .github/workflows/workspace.yml |  2 +-
 3 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/hs1-siv.yml

diff --git a/.github/workflows/benches.yml b/.github/workflows/benches.yml
index c05b57b5..1f753b0e 100644
--- a/.github/workflows/benches.yml
+++ b/.github/workflows/benches.yml
@@ -23,7 +23,7 @@ jobs:
     strategy:
       matrix:
         rust:
-          - 1.81.0 # MSRV
+          - 1.84.0 # MSRV
           - stable
     steps:
       - uses: actions/checkout@v4
diff --git a/.github/workflows/hs1-siv.yml b/.github/workflows/hs1-siv.yml
new file mode 100644
index 00000000..a0fa60b1
--- /dev/null
+++ b/.github/workflows/hs1-siv.yml
@@ -0,0 +1,68 @@
+name: hs1-siv
+
+on:
+  pull_request:
+    paths:
+      - ".github/workflows/hs1-siv.yml"
+      - "hs1-siv/**"
+      - "Cargo.*"
+  push:
+    branches: master
+
+defaults:
+  run:
+    working-directory: hs1-siv
+
+env:
+  CARGO_INCREMENTAL: 0
+  RUSTFLAGS: "-Dwarnings"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        rust:
+          - 1.84.0 # MSRV
+          - stable
+        target:
+          - armv7a-none-eabi
+          - thumbv7em-none-eabi
+          - wasm32-unknown-unknown
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@master
+        with:
+          toolchain: ${{ matrix.rust }}
+          targets: ${{ matrix.target }}
+      - run: cargo build --no-default-features --release --target ${{ matrix.target }}
+
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          # 32-bit Linux
+          - target: i686-unknown-linux-gnu
+            rust: 1.84.0 # MSRV
+            deps: sudo apt update && sudo apt install gcc-multilib
+          - target: i686-unknown-linux-gnu
+            rust: stable
+            deps: sudo apt update && sudo apt install gcc-multilib
+
+          # 64-bit Linux
+          - target: x86_64-unknown-linux-gnu
+            rust: 1.84.0 # MSRV
+          - target: x86_64-unknown-linux-gnu
+            rust: stable
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@master
+        with:
+          toolchain: ${{ matrix.rust }}
+          targets: ${{ matrix.target }}
+      - run: ${{ matrix.deps }}
+      - run: cargo test --target ${{ matrix.target }} --release --no-default-features
+      - run: cargo test --target ${{ matrix.target }} --release
+      - run: cargo test --target ${{ matrix.target }} --release --all-features
+      - run: cargo build --target ${{ matrix.target }} --benches
diff --git a/.github/workflows/workspace.yml b/.github/workflows/workspace.yml
index 1cd3f63f..e07cbfc4 100644
--- a/.github/workflows/workspace.yml
+++ b/.github/workflows/workspace.yml
@@ -28,6 +28,6 @@ jobs:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@master
         with:
-          toolchain: 1.81.0
+          toolchain: 1.84.0
           components: clippy
       - run: cargo clippy --all --all-features -- -D warnings

From 1dceeb1396dcc1bf3f19f905526b9a768b23c1c8 Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Fri, 24 Jan 2025 16:28:21 +0100
Subject: [PATCH 06/11] hs1-siv: fix max length check for 32 bit targets, also
 add to decrypt

---
 hs1-siv/src/lib.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hs1-siv/src/lib.rs b/hs1-siv/src/lib.rs
index 086df762..fc0f350c 100644
--- a/hs1-siv/src/lib.rs
+++ b/hs1-siv/src/lib.rs
@@ -283,7 +283,7 @@ fn hs1_siv_encrypt<P: Hs1Params>(
     a: &[u8],
     m: &mut [u8],
 ) -> Result<Array<u8, P::L>, aead::Error> {
-    if m.len() > 1 << 38 {
+    if m.len() as u128 > 1 << 38 {
         return Err(aead::Error);
     }
     let t = hs1_tag::<P>(k, a, n, &*m);
@@ -298,6 +298,9 @@ fn hs1_siv_decrypt<P: Hs1Params>(
     m: &mut [u8],
     t: &Array<u8, P::L>,
 ) -> Result<(), aead::Error> {
+    if m.len() as u128 > 1 << 38 {
+        return Err(aead::Error);
+    }
     hs1::<P>(k, &[t], n, 64, m);
     let t2 = hs1_tag::<P>(k, a, n, m);
     let diff = t.iter().zip(t2.iter()).fold(0, |s, (x, y)| s | (x ^ y));

From fa757d9129e721831d7f791231e3b36f51a177a2 Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Fri, 24 Jan 2025 16:28:51 +0100
Subject: [PATCH 07/11] optimize poly_finalize

LLVM isn't smart enough to change the array version to a cmovcc and
instead spills to memory
---
 hs1-siv/src/hash.rs | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/hs1-siv/src/hash.rs b/hs1-siv/src/hash.rs
index 87108996..b4368e83 100644
--- a/hs1-siv/src/hash.rs
+++ b/hs1-siv/src/hash.rs
@@ -175,7 +175,8 @@ const fn poly_step(a: u64, b: u64, k: u64) -> u64 {
 #[inline(always)]
 const fn poly_finalize(a: u64) -> u64 {
     let a = (a & mask(61)).wrapping_add(a >> 61);
-    [a, 0][(a == mask(61)) as usize]
+    let c = (a != mask(61)) as u64 * u64::MAX;
+    a & c
 }
 
 #[inline(always)]
@@ -189,3 +190,14 @@ where
     v.iter_mut().zip(it).for_each(|(w, r)| *w = r);
     v
 }
+
+#[cfg(test)]
+mod test {
+    #[test]
+    fn poly_finalize_mod_2_61() {
+        assert_eq!(super::poly_finalize(0), 0);
+        assert_eq!(super::poly_finalize((1 << 61) - 2), (1 << 61) - 2);
+        assert_eq!(super::poly_finalize((1 << 61) - 1), 0);
+        assert_eq!(super::poly_finalize(1 << 61), 1);
+    }
+}

From 323b7eec263db6ecf32620dd6be5afa6e2f2a104 Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Fri, 24 Jan 2025 17:59:39 +0100
Subject: [PATCH 08/11] hs1-siv: Compute NH for each h in parallel

---
 hs1-siv/src/hash.rs | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/hs1-siv/src/hash.rs b/hs1-siv/src/hash.rs
index b4368e83..ac73e744 100644
--- a/hs1-siv/src/hash.rs
+++ b/hs1-siv/src/hash.rs
@@ -64,31 +64,29 @@ impl<P: Hs1Params> Hasher<P> {
         assert!(usize::from(self.bytes) <= self.block_u8().len());
 
         #[inline(always)]
-        fn nh(v1: &[u32], v2: &[u32]) -> u64 {
-            debug_assert_eq!(v1.len(), v2.len());
-            debug_assert_eq!(v1.len() % 4, 0);
-            // I originally used a fancy, compact iterator chain here but the optimizer is shit
-            // (and honestly, this is pretty compact too)
-            let mut s = 0u64;
-            for (x, y) in v1.chunks_exact(4).zip(v2.chunks_exact(4)) {
-                let d = u64::from(x[3].wrapping_add(y[3]));
-                let c = u64::from(x[2].wrapping_add(y[2]));
-                let b = u64::from(x[1].wrapping_add(y[1]));
-                let a = u64::from(x[0].wrapping_add(y[0]));
-                s = s.wrapping_add(a * c).wrapping_add(b * d);
-            }
-            s
+        fn nh_step(&[ax, bx, cx, dx]: &[u32; 4], &[ay, by, cy, dy]: &[u32; 4]) -> u64 {
+            let d = u64::from(dx.wrapping_add(dy));
+            let c = u64::from(cx.wrapping_add(cy));
+            let b = u64::from(bx.wrapping_add(by));
+            let a = u64::from(ax.wrapping_add(ay));
+            (a * c).wrapping_add(b * d)
         }
 
         let m_ints = &self.block;
 
         let block16_count = usize::from(((self.bytes + 15) / 16).max(1));
 
-        self.k
-            .nh
-            .windows(B16::<P>::to_usize() / 4)
-            .step_by(4)
-            .map(|k_n_i| nh(&k_n_i[..block16_count * 4], &m_ints[..block16_count * 4]))
+        let mut nh = Array::<u64, P::T>::default();
+        for (i0, m_ints_i) in m_ints.chunks_exact(4).enumerate().take(block16_count) {
+            for (nh_i, k_n_i_i) in nh.iter_mut().zip(self.k.nh.chunks_exact(4).skip(i0)) {
+                let k_n_i_i = k_n_i_i.try_into().expect("exactly 4 elements");
+                let m_ints_i = m_ints_i.try_into().expect("exactly 4 elements");
+                let s = nh_step(k_n_i_i, m_ints_i);
+                *nh_i = nh_i.wrapping_add(s);
+            }
+        }
+
+        nh.iter()
             .map(|nh_i| (nh_i + (u64::from(self.bytes) & mask(4))) & mask(60))
             .zip(self.k.poly.iter())
             .zip(self.h.iter_mut())

From 1ff89ad5d3c4d2fcb5f4fe9f9a2a64d0282cb809 Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Fri, 24 Jan 2025 21:35:58 +0100
Subject: [PATCH 09/11] hs1-siv: add SSE2 version of Hasher::update_block

Despite my best efforts I seem unable to get LLVM to emit vectorized
code, even though it should be obviously beneficial.

I suspect LLVM is thrown off by the 64 bit multiply, which is missing in
the SSE2 instruction set. It did take me a while to figure out that
casting an array of __m128i to [u64; 2] would end up the most
performant.

The SSE2 version is about ~%20 faster for me, so it is a substantial
improvement.

Also, inline(always) on pretty much everything is now beneficial,
whereas before it led to significant regressions. It does create a fair
bit of code bloat though.
---
 hs1-siv/src/hash.rs      | 58 +++++++++++++++++++++-------------------
 hs1-siv/src/hash/sse2.rs | 49 +++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 27 deletions(-)
 create mode 100644 hs1-siv/src/hash/sse2.rs

diff --git a/hs1-siv/src/hash.rs b/hs1-siv/src/hash.rs
index ac73e744..0fb56fa2 100644
--- a/hs1-siv/src/hash.rs
+++ b/hs1-siv/src/hash.rs
@@ -1,9 +1,10 @@
-use super::{
-    mask, Array, ArraySize, False, Gr, Hs1HashKey, Hs1Params, PhantomData, Quot, True, B16, U4,
-};
+use super::{mask, Array, False, Gr, Hs1HashKey, Hs1Params, PhantomData, Quot, True, B16, U4};
 use aead::array::typenum::Unsigned;
 use core::mem;
 
+#[cfg(target_feature = "sse2")]
+mod sse2;
+
 #[derive(Clone)]
 pub struct Hasher<P: Hs1Params> {
     k: Hs1HashKey<P>,
@@ -53,41 +54,52 @@ impl<P: Hs1Params> Hasher<P> {
     pub fn new(k: &Hs1HashKey<P>) -> Self {
         Self {
             k: k.clone(),
-            h: array_from_iter(core::iter::repeat(1)),
-            block: Array::default(),
+            h: Array::from_fn(|_| 1),
+            block: Default::default(),
             bytes: 0,
             _marker: PhantomData,
         }
     }
 
+    #[inline(always)]
     fn update_block(&mut self) -> &mut Self {
         assert!(usize::from(self.bytes) <= self.block_u8().len());
 
+        #[cfg(target_feature = "sse2")]
+        if true {
+            // SAFETY: sse2 is supported
+            unsafe {
+                return self.update_block_sse2();
+            }
+        }
+
         #[inline(always)]
-        fn nh_step(&[ax, bx, cx, dx]: &[u32; 4], &[ay, by, cy, dy]: &[u32; 4]) -> u64 {
-            let d = u64::from(dx.wrapping_add(dy));
-            let c = u64::from(cx.wrapping_add(cy));
-            let b = u64::from(bx.wrapping_add(by));
+        fn nh_step(&[ax, bx, cx, dx]: &[u32; 4], &[ay, by, cy, dy]: &[u32; 4]) -> [u64; 2] {
             let a = u64::from(ax.wrapping_add(ay));
-            (a * c).wrapping_add(b * d)
+            let b = u64::from(bx.wrapping_add(by));
+            let c = u64::from(cx.wrapping_add(cy));
+            let d = u64::from(dx.wrapping_add(dy));
+            [a * c, b * d]
         }
 
         let m_ints = &self.block;
 
         let block16_count = usize::from(((self.bytes + 15) / 16).max(1));
 
-        let mut nh = Array::<u64, P::T>::default();
+        let mut nh = Array::<[u64; 2], P::T>::default();
         for (i0, m_ints_i) in m_ints.chunks_exact(4).enumerate().take(block16_count) {
-            for (nh_i, k_n_i_i) in nh.iter_mut().zip(self.k.nh.chunks_exact(4).skip(i0)) {
+            for ([nh_i0, nh_i1], k_n_i_i) in nh.iter_mut().zip(self.k.nh.chunks_exact(4).skip(i0)) {
                 let k_n_i_i = k_n_i_i.try_into().expect("exactly 4 elements");
                 let m_ints_i = m_ints_i.try_into().expect("exactly 4 elements");
-                let s = nh_step(k_n_i_i, m_ints_i);
-                *nh_i = nh_i.wrapping_add(s);
+                let [s0, s1] = nh_step(k_n_i_i, m_ints_i);
+                *nh_i0 = nh_i0.wrapping_add(s0);
+                *nh_i1 = nh_i1.wrapping_add(s1);
             }
         }
 
         nh.iter()
-            .map(|nh_i| (nh_i + (u64::from(self.bytes) & mask(4))) & mask(60))
+            .map(|&[ac, bd]| ac.wrapping_add(bd))
+            .map(|nh_i| (nh_i.wrapping_add(u64::from(self.bytes) & mask(4))) & mask(60))
             .zip(self.k.poly.iter())
             .zip(self.h.iter_mut())
             .for_each(|((a_i, &k_p_i), h_i)| *h_i = poly_step(*h_i, a_i, k_p_i));
@@ -97,6 +109,7 @@ impl<P: Hs1Params> Hasher<P> {
         self
     }
 
+    #[inline(always)]
     pub fn update<'a>(&'a mut self, bytes: &[u8]) -> &'a mut Self {
         assert!(usize::from(self.bytes) < self.block_u8().len());
         let start = usize::from(self.bytes);
@@ -123,6 +136,7 @@ impl<P: Hs1Params> Hasher<P> {
         self
     }
 
+    #[inline(always)]
     pub(crate) fn pad_to(&mut self, bits: u8) -> &mut Self {
         debug_assert!(1 << bits <= B16::<P>::to_u8());
         let m = mask(bits) as u8;
@@ -131,6 +145,7 @@ impl<P: Hs1Params> Hasher<P> {
     }
 
     // TODO &mut self helps avoid needing to clone(), but might be unintuitive
+    #[inline(always)]
     pub fn finalize(&mut self) -> Array<Output<P>, P::T> {
         // TODO we need to handle empty data properly
         // However, see the note in crate::test::test_vectors::hash_me_empty
@@ -146,6 +161,7 @@ impl<P: Hs1Params> Hasher<P> {
         out
     }
 
+    #[inline(always)]
     fn block_u8(&mut self) -> &mut Array<u8, B16<P>> {
         const {
             assert!(
@@ -177,18 +193,6 @@ const fn poly_finalize(a: u64) -> u64 {
     a & c
 }
 
-#[inline(always)]
-fn array_from_iter<I, L>(it: I) -> Array<I::Item, L>
-where
-    I: IntoIterator,
-    L: ArraySize,
-    I::Item: Default,
-{
-    let mut v = Array::<I::Item, L>::default();
-    v.iter_mut().zip(it).for_each(|(w, r)| *w = r);
-    v
-}
-
 #[cfg(test)]
 mod test {
     #[test]
diff --git a/hs1-siv/src/hash/sse2.rs b/hs1-siv/src/hash/sse2.rs
new file mode 100644
index 00000000..57c1e2c4
--- /dev/null
+++ b/hs1-siv/src/hash/sse2.rs
@@ -0,0 +1,49 @@
+use super::{mask, poly_step, Array, Hasher, Hs1Params};
+use core::arch::x86_64::*;
+
+impl<P: Hs1Params> Hasher<P> {
+    #[inline(always)]
+    #[cfg(target_feature = "sse2")]
+    pub(super) unsafe fn update_block_sse2(&mut self) -> &mut Self {
+        assert!(usize::from(self.bytes) <= self.block_u8().len());
+
+        #[inline(always)]
+        unsafe fn nh_step(x: &[u32; 4], y: &[u32; 4]) -> __m128i {
+            let x = x.as_ptr().cast::<__m128i>().read_unaligned();
+            let y = y.as_ptr().cast::<__m128i>().read_unaligned();
+            let xy = _mm_add_epi32(x, y);
+
+            let a_b = _mm_shuffle_epi32::<0b00_01_00_00>(xy);
+            let c_d = _mm_shuffle_epi32::<0b00_11_00_10>(xy);
+            _mm_mul_epu32(a_b, c_d)
+        }
+
+        let m_ints = &self.block;
+
+        let block16_count = usize::from(((self.bytes + 15) / 16).max(1));
+
+        let mut nh: Array<__m128i, P::T> = Array::from_fn(|_| _mm_setzero_si128());
+        for (i0, m_ints_i) in m_ints.chunks_exact(4).enumerate().take(block16_count) {
+            for (nh_i, k_n_i_i) in nh.iter_mut().zip(self.k.nh.chunks_exact(4).skip(i0)) {
+                let k_n_i_i = k_n_i_i.try_into().expect("exactly 4 elements");
+                let m_ints_i = m_ints_i.try_into().expect("exactly 4 elements");
+                let s = nh_step(k_n_i_i, m_ints_i);
+                *nh_i = _mm_add_epi64(*nh_i, s);
+            }
+        }
+
+        nh.iter()
+            .map(|nh_i| {
+                let &[ac, bd] = &*(nh_i as *const _ as *const [u64; 2]);
+                ac.wrapping_add(bd)
+            })
+            .map(|nh_i| (nh_i.wrapping_add(u64::from(self.bytes) & mask(4))) & mask(60))
+            .zip(self.k.poly.iter())
+            .zip(self.h.iter_mut())
+            .for_each(|((a_i, &k_p_i), h_i)| *h_i = poly_step(*h_i, a_i, k_p_i));
+
+        self.bytes = 0;
+
+        self
+    }
+}

From feb068b3ef2a676b4c6b683d923a7287458bf8cd Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Fri, 24 Jan 2025 21:49:45 +0100
Subject: [PATCH 10/11] hs1-siv: account for i686 in hash/sse2.rs

---
 hs1-siv/src/hash/sse2.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hs1-siv/src/hash/sse2.rs b/hs1-siv/src/hash/sse2.rs
index 57c1e2c4..4f748aee 100644
--- a/hs1-siv/src/hash/sse2.rs
+++ b/hs1-siv/src/hash/sse2.rs
@@ -1,4 +1,7 @@
 use super::{mask, poly_step, Array, Hasher, Hs1Params};
+#[cfg(target_arch = "x86")]
+use core::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
 use core::arch::x86_64::*;
 
 impl<P: Hs1Params> Hasher<P> {

From dd516daeb4da2f12b729793ba06e0d49ee2ce4a3 Mon Sep 17 00:00:00 2001
From: David Hoppenbrouwers <david@salt-inc.org>
Date: Fri, 24 Jan 2025 23:11:42 +0100
Subject: [PATCH 11/11] hs1-siv: fix Hasher using stale (non-zero) bytes for
 last block

oops...
---
 hs1-siv/src/hash.rs |  2 ++
 hs1-siv/src/lib.rs  | 78 +++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/hs1-siv/src/hash.rs b/hs1-siv/src/hash.rs
index 0fb56fa2..11e307b8 100644
--- a/hs1-siv/src/hash.rs
+++ b/hs1-siv/src/hash.rs
@@ -151,6 +151,8 @@ impl<P: Hs1Params> Hasher<P> {
         // However, see the note in crate::test::test_vectors::hash_me_empty
         use sealed::Hs1HashFinal;
         if self.bytes != 0 {
+            let offt = usize::from(self.bytes);
+            self.block_u8()[offt..].fill(0);
             self.update_block();
         }
         let mut out = Array::<Output<P>, P::T>::default();
diff --git a/hs1-siv/src/lib.rs b/hs1-siv/src/lib.rs
index fc0f350c..696df338 100644
--- a/hs1-siv/src/lib.rs
+++ b/hs1-siv/src/lib.rs
@@ -388,6 +388,7 @@ mod test {
     use aead::{Aead, KeyInit};
 
     const MSG: &[u8] = b"Hello to the entire wide, round, global globe!";
+    const MSG_LONG: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
     const KEY: &[u8; 32] = b"Short keys? Use long for testing";
     const NONCE: &[u8; 12] = b"Quack quack!";
 
@@ -494,10 +495,10 @@ mod test {
 
         #[test]
         fn hash_me_64() {
-            const MSG64: &[u8; 64] =
+            const MSG_64: &[u8; 64] =
                 b"Hello to the entire wide, round, global globe!!! okookokokokokok";
             let k = hs1_subkeygen::<params::Hs1SivMe>(KEY);
-            let h = Hasher::new(&k.hash).update(MSG64).finalize();
+            let h = Hasher::new(&k.hash).update(MSG_64).finalize();
             assert_eq!(
                 h,
                 [
@@ -510,6 +511,42 @@ mod test {
             );
         }
 
+        #[test]
+        fn hash_me_65() {
+            const MSG_65: &[u8; 65] =
+                b"Hello to the entire wide, round, global globe!!! okookokokokokok?";
+            let k = hs1_subkeygen::<params::Hs1SivMe>(KEY);
+            let h = Hasher::new(&k.hash).update(MSG_65).finalize();
+            assert_eq!(
+                h,
+                [
+                    0x10619b1a23127759,
+                    0x160f2049c69ee554,
+                    0x1de3d0b0f4d56bec,
+                    0x03e8ec8fdef39c71,
+                ]
+                .map(u64::to_le_bytes)
+            );
+        }
+
+        #[test]
+        fn hash_me_128() {
+            const MSG_128: &[u8; 128] =
+                b"Hello to the entire wide, round, global globe!!! okookokokokokokHello to the entire wide, round, global globe!!! okookokokokokok";
+            let k = hs1_subkeygen::<params::Hs1SivMe>(KEY);
+            let h = Hasher::new(&k.hash).update(MSG_128).finalize();
+            assert_eq!(
+                h,
+                [
+                    0x07d3154786d50a10,
+                    0x145bceb11f846780,
+                    0x0321fdeb01118846,
+                    0x0a0ac6ce29b11e5a,
+                ]
+                .map(u64::to_le_bytes)
+            );
+        }
+
         #[test]
         fn hash_lo() {
             let k = hs1_subkeygen::<params::Hs1SivLo>(KEY);
@@ -531,6 +568,43 @@ mod test {
             );
         }
 
+        #[test]
+        fn hash_lo_long() {
+            let k = hs1_subkeygen::<params::Hs1SivLo>(KEY);
+            let h = Hasher::new(&k.hash).update(MSG_LONG).finalize();
+            assert_eq!(
+                h,
+                [0x0b65743a2f4c73aa, 0x1863d3ec1873cd72,].map(u64::to_le_bytes)
+            );
+        }
+
+        #[test]
+        fn hash_me_long() {
+            let k = hs1_subkeygen::<params::Hs1SivMe>(KEY);
+            let h = Hasher::new(&k.hash).update(MSG_LONG).finalize();
+            assert_eq!(
+                h,
+                [
+                    0x1f8e6282cbc4455f,
+                    0x0e6ade357355de7b,
+                    0x1a5834576032c7b0,
+                    0x1bd063cb8b70044a,
+                ]
+                .map(u64::to_le_bytes)
+            );
+        }
+
+        #[test]
+        fn hash_hi_long() {
+            let k = hs1_subkeygen::<params::Hs1SivHi>(KEY);
+            let h = Hasher::new(&k.hash).update(MSG_LONG).finalize();
+            assert_eq!(
+                h,
+                [0x52645829, 0x8f0c0687, 0x01f33121, 0xc94264e3, 0x85dc8143, 0xc8fd435e,]
+                    .map(u32::to_le_bytes)
+            );
+        }
+
         // TODO I'm 99% sure this is wrong according to the paper,
         // but it shouldn't be an issue as long as we don't expose the hasher
         // to the public...