From 104e82a8b923f02df813733238267f9e82e51aac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Thu, 17 Apr 2025 16:28:04 +0400 Subject: [PATCH 1/2] blobby: add encode and decode bin utils --- Cargo.lock | 51 ++++++++-------- blobby/Cargo.toml | 4 +- blobby/README.md | 119 +++++++++++++++++++++++++++++++++++++ blobby/examples/convert.rs | 64 -------------------- blobby/src/bin/decode.rs | 58 ++++++++++++++++++ blobby/src/bin/encode.rs | 66 ++++++++++++++++++++ blobby/src/lib.rs | 46 +------------- 7 files changed, 269 insertions(+), 139 deletions(-) create mode 100644 blobby/README.md delete mode 100644 blobby/examples/convert.rs create mode 100644 blobby/src/bin/decode.rs create mode 100644 blobby/src/bin/encode.rs diff --git a/Cargo.lock b/Cargo.lock index 451d022f..0a6edf21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,9 +5,6 @@ version = 4 [[package]] name = "blobby" version = "0.4.0-pre.0" -dependencies = [ - "hex", -] [[package]] name = "block-buffer" @@ -50,9 +47,9 @@ version = "0.1.0" [[package]] name = "const-oid" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cb3c4a0d3776f7535c32793be81d6d5fec0d48ac70955d9834e643aa249a52f" +checksum = "0dabb6555f92fb9ee4140454eb5dcd14c7960e1225c6d1a6cc361f032947713e" [[package]] name = "cpufeatures" @@ -151,9 +148,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "keccak" @@ -166,9 +163,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.170" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "memchr" @@ -182,9 +179,9 @@ version = "0.4.0-pre" [[package]] name = "prettyplease" -version = "0.2.29" +version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" +checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6" dependencies = [ "proc-macro2", "syn", @@ -192,42 +189,42 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] name = "ryu" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "serde" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", @@ -236,9 +233,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.139" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", @@ -269,9 +266,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.98" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -286,9 +283,9 @@ checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" [[package]] name = "unicode-ident" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "wycheproof2blb" diff --git a/blobby/Cargo.toml b/blobby/Cargo.toml index 1da6b0d3..8df2f91c 100644 --- a/blobby/Cargo.toml +++ b/blobby/Cargo.toml @@ -9,6 +9,4 @@ repository = "https://github.com/RustCrypto/utils" categories = ["no-std"] edition = "2024" rust-version = "1.85" - -[dev-dependencies] -hex = "0.4" +readme = "README.md" diff --git a/blobby/README.md b/blobby/README.md new file mode 100644 index 00000000..b5e1e0cf --- /dev/null +++ b/blobby/README.md @@ -0,0 +1,119 @@ +# [RustCrypto]: Blobby + +[![crate][crate-image]][crate-link] +[![Docs][docs-image]][docs-link] +[![Build Status][build-image]][build-link] +![Apache2/MIT licensed][license-image] +![Rust Version][rustc-image] +[![Project Chat][chat-image]][chat-link] + +Iterators over a simple binary blob storage. + +## Examples +``` +let buf = b"\x02\x05hello\x06world!\x01\x02 \x00\x03\x06:::\x03\x01\x00"; +let mut v = blobby::BlobIterator::new(buf).unwrap(); +assert_eq!(v.next(), Some(Ok(&b"hello"[..]))); +assert_eq!(v.next(), Some(Ok(&b" "[..]))); +assert_eq!(v.next(), Some(Ok(&b""[..]))); +assert_eq!(v.next(), Some(Ok(&b"world!"[..]))); +assert_eq!(v.next(), Some(Ok(&b":::"[..]))); +assert_eq!(v.next(), Some(Ok(&b"world!"[..]))); +assert_eq!(v.next(), Some(Ok(&b"hello"[..]))); +assert_eq!(v.next(), Some(Ok(&b""[..]))); +assert_eq!(v.next(), None); + +let mut v = blobby::Blob2Iterator::new(buf).unwrap(); +assert_eq!(v.next(), Some(Ok([&b"hello"[..], b" "]))); +assert_eq!(v.next(), Some(Ok([&b""[..], b"world!"]))); +assert_eq!(v.next(), Some(Ok([&b":::"[..], b"world!"]))); +assert_eq!(v.next(), Some(Ok([&b"hello"[..], b""]))); +assert_eq!(v.next(), None); + +let mut v = blobby::Blob4Iterator::new(buf).unwrap(); +assert_eq!(v.next(), Some(Ok([&b"hello"[..], b" ", b"", b"world!"]))); +assert_eq!(v.next(), Some(Ok([&b":::"[..], b"world!", b"hello", b""]))); +assert_eq!(v.next(), None); +``` + +## Encoding and decoding + +This crate provides encoding and decoding utilities for converting between +the blobby format and text file with hex-encoded strings. + +Let's say we have the following test vectors for a 64-bit hash function: +```text +0123456789ABCDEF0123456789ABCDEF +217777950848CECD + +F7CD1446C9161C0A +FFFEFD +80081C35AA43F640 + +``` +The first, third, and fifth lines are hex-encoded hash inputs, while the second, +fourth, and sixth lines are hex-encoded hash outputs for input on the previous line. +Note that the file should contain a trailing empty line (i.e. every data line should end +with `\n`). + +We can encode this file into the Blobby format by running the following command: +```sh +cargo run --releae --bin encode -- /path/to/input.txt /path/to/output.blb +``` + +This will create a file which then can be read using `blobby::Blob2Iterator`. + +To see contents of a Blobby file you can use the following command: +```sh +cargo run --releae --bin decode -- /path/to/input.blb /path/to/output.txt +``` +The output file will contain a sequence of hex-encoded byte strings stored +in the input file. + +## Storage format + +Storage format represents a sequence of binary blobs. The format uses +git-flavored [variable-length quantity][0] (VLQ) for encoding unsigned +numbers. + +File starts with a number of de-duplicated blobs `d`. It followed by `d` +entries. Each entry starts with an integer `m`, immediately folowed by `m` +bytes representing de-duplicated binary blob. + +Next follows unspecified number of entries representing sequence of stored +blobs. Each entry starts with an unsigned integer `n`. The least significant +bit of this integer is used as a flag. If the flag is equal to 0, then the +number is followed by `n >> 1` bytes, representing a stored binary blob. +Otherwise the entry references a de-duplicated entry number `n >> 1`. + +[0]: https://en.wikipedia.org/wiki/Variable-length_quantity + +## License + +Licensed under either of: + + * [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + * [MIT license](http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. + +[//]: # (badges) + +[crate-image]: https://img.shields.io/crates/v/blobby.svg +[crate-link]: https://crates.io/crates/blobby +[docs-image]: https://docs.rs/blobby/badge.svg +[docs-link]: https://docs.rs/blobby/ +[license-image]: https://img.shields.io/badge/license-Apache2.0/MIT-blue.svg +[rustc-image]: https://img.shields.io/badge/rustc-1.85+-blue.svg +[chat-image]: https://img.shields.io/badge/zulip-join_chat-blue.svg +[chat-link]: https://rustcrypto.zulipchat.com/#narrow/stream/260052-utils +[build-image]: https://github.com/RustCrypto/utils/actions/workflows/blobby.yml/badge.svg?branch=master +[build-link]: https://github.com/RustCrypto/utils/actions/workflows/blobby.yml?query=branch:master + +[//]: # (general links) + +[RustCrypto]: https://github.com/rustcrypto diff --git a/blobby/examples/convert.rs b/blobby/examples/convert.rs deleted file mode 100644 index a65bf281..00000000 --- a/blobby/examples/convert.rs +++ /dev/null @@ -1,64 +0,0 @@ -//! Convert utility -use blobby::{BlobIterator, encode_blobs}; -use std::io::{self, BufRead, BufReader, BufWriter, Write}; -use std::{env, error::Error, fs::File}; - -fn encode(reader: impl BufRead, mut writer: impl Write) -> io::Result { - let mut blobs = Vec::new(); - for line in reader.lines() { - let blob = hex::decode(line?.as_str()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - blobs.push(blob); - } - let (data, idx_len) = encode_blobs(&blobs); - let data_len = data.len(); - println!("Index len: {:?}", idx_len); - writer.write_all(&data).map(|_| data_len) -} - -fn decode(mut reader: R, mut writer: W) -> io::Result { - let mut data = Vec::new(); - reader.read_to_end(&mut data)?; - let res: Vec<_> = BlobIterator::new(&data) - .map_err(|e| { - io::Error::new( - io::ErrorKind::InvalidData, - format!("invalid blobby data: {:?}", e), - ) - })? - .collect(); - for blob in res.iter() { - let blob = blob.map_err(|e| { - io::Error::new( - io::ErrorKind::InvalidData, - format!("invalid blobby data: {:?}", e), - ) - })?; - writer.write_all(hex::encode(blob).as_bytes())?; - writer.write_all(b"\n")?; - } - Ok(res.len()) -} - -fn main() -> Result<(), Box> { - let args: Vec = env::args().skip(1).collect(); - let is_encode = match args[0].as_str() { - "encode" => true, - "decode" => false, - _ => Err("unknown mode")?, - }; - let in_path = args[1].as_str(); - let out_path = args[2].as_str(); - let in_file = BufReader::new(File::open(in_path)?); - let out_file = BufWriter::new(File::create(out_path)?); - - let n = if is_encode { - encode(in_file, out_file)? - } else { - decode(in_file, out_file)? - }; - - println!("Processed {} record(s)", n); - - Ok(()) -} diff --git a/blobby/src/bin/decode.rs b/blobby/src/bin/decode.rs new file mode 100644 index 00000000..4e6d9c98 --- /dev/null +++ b/blobby/src/bin/decode.rs @@ -0,0 +1,58 @@ +//! Encoding utility +use blobby::BlobIterator; +use std::io::{self, BufRead, BufReader, BufWriter, Write}; +use std::{env, error::Error, fs::File}; + +fn encode_hex(data: &[u8]) -> String { + let mut res = String::with_capacity(2 * data.len()); + for &byte in data { + res.push_str(&format!("{byte:02X}")); + } + res +} + +fn decode(mut reader: R, mut writer: W) -> io::Result { + let mut data = Vec::new(); + reader.read_to_end(&mut data)?; + let res = BlobIterator::new(&data) + .map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid blobby data: {:?}", e), + ) + })? + .collect::>(); + for blob in res.iter() { + let blob = blob.map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid blobby data: {:?}", e), + ) + })?; + writer.write_all(encode_hex(blob).as_bytes())?; + writer.write_all(b"\n")?; + } + Ok(res.len()) +} + +fn main() -> Result<(), Box> { + let args: Vec = env::args().skip(1).collect(); + + if args.is_empty() { + println!( + "Blobby decoding utility.\n\ + Usage: decode " + ); + return Ok(()); + } + + let in_path = args[0].as_str(); + let out_path = args[1].as_str(); + let in_file = BufReader::new(File::open(in_path)?); + let out_file = BufWriter::new(File::create(out_path)?); + + let n = decode(in_file, out_file)?; + println!("Processed {n} record(s)"); + + Ok(()) +} diff --git a/blobby/src/bin/encode.rs b/blobby/src/bin/encode.rs new file mode 100644 index 00000000..ecd423a3 --- /dev/null +++ b/blobby/src/bin/encode.rs @@ -0,0 +1,66 @@ +//! Encoding utility +use blobby::encode_blobs; +use std::io::{self, BufRead, BufReader, BufWriter, Write}; +use std::{env, error::Error, fs::File}; + +fn decode_hex_char(b: u8) -> io::Result { + let res = match b { + b'0'..=b'9' => b - b'0', + b'a'..=b'f' => b - b'a' + 10, + b'A'..=b'F' => b - b'A' + 10, + _ => { + let msg = "Invalid hex string: invalid byte {b}"; + return Err(io::Error::new(io::ErrorKind::InvalidData, msg)); + } + }; + Ok(res) +} + +fn decode_hex(data: &str) -> io::Result> { + if data.len() % 2 != 0 { + let msg = "Invalid hex string: length is not even"; + return Err(io::Error::new(io::ErrorKind::InvalidData, msg)); + } + data.as_bytes() + .chunks_exact(2) + .map(|chunk| { + let a = decode_hex_char(chunk[0])?; + let b = decode_hex_char(chunk[1])?; + Ok((a << 4) | b) + }) + .collect() +} + +fn encode(reader: impl BufRead, mut writer: impl Write) -> io::Result { + let mut blobs = Vec::new(); + for line in reader.lines() { + let blob = decode_hex(&line?)?; + blobs.push(blob); + } + let (data, idx_len) = encode_blobs(&blobs); + println!("Index len: {:?}", idx_len); + writer.write_all(&data)?; + Ok(blobs.len()) +} + +fn main() -> Result<(), Box> { + let args: Vec = env::args().skip(1).collect(); + + if args.is_empty() { + println!( + "Blobby encoding utility.\n\ + Usage: encode " + ); + return Ok(()); + } + + let in_path = args[0].as_str(); + let out_path = args[1].as_str(); + let in_file = BufReader::new(File::open(in_path)?); + let out_file = BufWriter::new(File::create(out_path)?); + + let n = encode(in_file, out_file)?; + println!("Processed {n} record(s)"); + + Ok(()) +} diff --git a/blobby/src/lib.rs b/blobby/src/lib.rs index f0538dcb..d95dc42b 100644 --- a/blobby/src/lib.rs +++ b/blobby/src/lib.rs @@ -1,49 +1,5 @@ -//! Iterators over a simple binary blob storage. -//! -//! # Storage format -//! Storage format represents a sequence of binary blobs. The format uses -//! git-flavored [variable-length quantity][0] (VLQ) for encoding unsigned -//! numbers. -//! -//! File starts with a number of de-duplicated blobs `d`. It followed by `d` -//! entries. Each entry starts with an integer `m`, immediately folowed by `m` -//! bytes representing de-duplicated binary blob. -//! -//! Next follows unspecified number of entries representing sequence of stored -//! blobs. Each entry starts with an unsigned integer `n`. The least significant -//! bit of this integer is used as a flag. If the flag is equal to 0, then the -//! number is followed by `n >> 1` bytes, representing a stored binary blob. -//! Otherwise the entry references a de-duplicated entry number `n >> 1`. -//! -//! # Examples -//! ``` -//! let buf = b"\x02\x05hello\x06world!\x01\x02 \x00\x03\x06:::\x03\x01\x00"; -//! let mut v = blobby::BlobIterator::new(buf).unwrap(); -//! assert_eq!(v.next(), Some(Ok(&b"hello"[..]))); -//! assert_eq!(v.next(), Some(Ok(&b" "[..]))); -//! assert_eq!(v.next(), Some(Ok(&b""[..]))); -//! assert_eq!(v.next(), Some(Ok(&b"world!"[..]))); -//! assert_eq!(v.next(), Some(Ok(&b":::"[..]))); -//! assert_eq!(v.next(), Some(Ok(&b"world!"[..]))); -//! assert_eq!(v.next(), Some(Ok(&b"hello"[..]))); -//! assert_eq!(v.next(), Some(Ok(&b""[..]))); -//! assert_eq!(v.next(), None); -//! -//! let mut v = blobby::Blob2Iterator::new(buf).unwrap(); -//! assert_eq!(v.next(), Some(Ok([&b"hello"[..], b" "]))); -//! assert_eq!(v.next(), Some(Ok([&b""[..], b"world!"]))); -//! assert_eq!(v.next(), Some(Ok([&b":::"[..], b"world!"]))); -//! assert_eq!(v.next(), Some(Ok([&b"hello"[..], b""]))); -//! assert_eq!(v.next(), None); -//! -//! let mut v = blobby::Blob4Iterator::new(buf).unwrap(); -//! assert_eq!(v.next(), Some(Ok([&b"hello"[..], b" ", b"", b"world!"]))); -//! assert_eq!(v.next(), Some(Ok([&b":::"[..], b"world!", b"hello", b""]))); -//! assert_eq!(v.next(), None); -//! ``` -//! -//! [0]: https://en.wikipedia.org/wiki/Variable-length_quantity #![no_std] +#![doc = include_str!("../README.md")] #![doc( html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg", html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg" From 9b52b7fd8180068aa6130cd9b124662e328dd38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Mon, 21 Apr 2025 08:52:51 +0400 Subject: [PATCH 2/2] Tweak docs --- blobby/README.md | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/blobby/README.md b/blobby/README.md index b5e1e0cf..b6efb3b1 100644 --- a/blobby/README.md +++ b/blobby/README.md @@ -42,6 +42,24 @@ This crate provides encoding and decoding utilities for converting between the blobby format and text file with hex-encoded strings. Let's say we have the following test vectors for a 64-bit hash function: +```text +COUNT = 0 +INPUT = 0123456789ABCDEF0123456789ABCDEF +OUTPUT = 217777950848CECD + +COUNT = 1 +INPUT = +OUTPUT = F7CD1446C9161C0A + +COUNT = 2 +INPUT = FFFEFD +OUTPUT = 80081C35AA43F640 + +``` + +To transform it into the Blobby format you first have to modify it +to the following format: + ```text 0123456789ABCDEF0123456789ABCDEF 217777950848CECD @@ -56,14 +74,14 @@ fourth, and sixth lines are hex-encoded hash outputs for input on the previous l Note that the file should contain a trailing empty line (i.e. every data line should end with `\n`). -We can encode this file into the Blobby format by running the following command: +This file can be converted to the Blobby format by running the following command: ```sh cargo run --releae --bin encode -- /path/to/input.txt /path/to/output.blb ``` -This will create a file which then can be read using `blobby::Blob2Iterator`. +This will create a file which can be read using `blobby::Blob2Iterator`. -To see contents of a Blobby file you can use the following command: +To see contents of an existing Blobby file you can use the following command: ```sh cargo run --releae --bin decode -- /path/to/input.blb /path/to/output.txt ```