diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 2b31d1d5d268..bc37bd554aec 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -432,13 +432,119 @@ int crypto_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, } EXPORT_SYMBOL_GPL(crypto_skcipher_setkey); +int crypto_skcipher_set_data_unit_size(struct crypto_skcipher *tfm, + unsigned int data_unit_size) +{ + unsigned int blocksize; + + if (!data_unit_size) { + tfm->data_unit_size = 0; + return 0; + } + + if (!crypto_skcipher_supports_multi_data_unit(tfm)) + return -EOPNOTSUPP; + + blocksize = crypto_skcipher_blocksize(tfm); + if (data_unit_size < blocksize || data_unit_size % blocksize) + return -EINVAL; + + tfm->data_unit_size = data_unit_size; + return 0; +} +EXPORT_SYMBOL_GPL(crypto_skcipher_set_data_unit_size); + +static int crypto_skcipher_check_data_unit_size(struct crypto_skcipher *tfm, + struct skcipher_request *req) +{ + unsigned int du = tfm->data_unit_size; + + if (likely(!du)) + return 0; + if (req->cryptlen % du) + return -EINVAL; + return 0; +} + +/* + * Increment a 16-byte little-endian counter held in @iv. See + * crypto_skcipher_set_data_unit_size() for the convention. + */ +static inline void skcipher_iv_inc_le128(u8 *iv) +{ + __le64 lo_le, hi_le; + u64 lo; + + memcpy(&lo_le, iv, 8); + memcpy(&hi_le, iv + 8, 8); + lo = le64_to_cpu(lo_le) + 1; + lo_le = cpu_to_le64(lo); + memcpy(iv, &lo_le, 8); + if (unlikely(lo == 0)) { + hi_le = cpu_to_le64(le64_to_cpu(hi_le) + 1); + memcpy(iv + 8, &hi_le, 8); + } +} + +int skcipher_walk_data_units(struct skcipher_request *req, + int (*body)(struct skcipher_request *)) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const unsigned int du = tfm->data_unit_size; + const unsigned int total = req->cryptlen; + struct scatterlist *orig_src = req->src; + struct scatterlist *orig_dst = req->dst; + struct scatterlist src_sg[2], dst_sg[2]; + u8 iv_save[16]; + unsigned int off; + int err = 0; + + if (likely(!du)) + return body(req); + + /* + * Registration of an algorithm advertising + * CRYPTO_ALG_SKCIPHER_MULTI_DATA_UNIT enforces ivsize == 16 + * (see skcipher_prepare_alg_common()), so this is purely + * defensive against algorithm-registration bugs. + */ + if (WARN_ON_ONCE(crypto_skcipher_ivsize(tfm) != 16)) + return -EINVAL; + + memcpy(iv_save, req->iv, 16); + + for (off = 0; off < total; off += du) { + req->cryptlen = du; + req->src = scatterwalk_ffwd(src_sg, orig_src, off); + req->dst = (orig_src == orig_dst) ? req->src : + scatterwalk_ffwd(dst_sg, orig_dst, off); + + err = body(req); + if (err) + break; + + skcipher_iv_inc_le128(iv_save); + memcpy(req->iv, iv_save, 16); + } + + req->src = orig_src; + req->dst = orig_dst; + req->cryptlen = total; + return err; +} +EXPORT_SYMBOL_GPL(skcipher_walk_data_units); + int crypto_skcipher_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + int err; if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; + err = crypto_skcipher_check_data_unit_size(tfm, req); + if (err) + return err; if (alg->co.base.cra_type != &crypto_skcipher_type) return crypto_lskcipher_encrypt_sg(req); return alg->encrypt(req); @@ -449,9 +555,13 @@ int crypto_skcipher_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + int err; if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; + err = crypto_skcipher_check_data_unit_size(tfm, req); + if (err) + return err; if (alg->co.base.cra_type != &crypto_skcipher_type) return crypto_lskcipher_decrypt_sg(req); return alg->decrypt(req); @@ -680,6 +790,16 @@ int skcipher_prepare_alg_common(struct skcipher_alg_common *alg) (alg->ivsize + alg->statesize) > PAGE_SIZE / 2) return -EINVAL; + /* + * Algorithms advertising multi-data-unit support must use the + * 16-byte little-endian counter convention documented in + * crypto_skcipher_set_data_unit_size(); see also + * skcipher_walk_data_units(). + */ + if ((base->cra_flags & CRYPTO_ALG_SKCIPHER_MULTI_DATA_UNIT) && + alg->ivsize != 16) + return -EINVAL; + if (!alg->chunksize) alg->chunksize = base->cra_blocksize; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 4d86efae65b2..8ca92ee6b37c 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3211,6 +3211,123 @@ static int test_skcipher(int enc, const struct cipher_test_suite *suite, return 0; } +/* + * For algorithms that advertise CRYPTO_ALG_SKCIPHER_MULTI_DATA_UNIT, + * verify that one request batching N data units produces the same + * ciphertext as N back-to-back single-data-unit requests with IVs + * derived from the original IV by adding the data-unit index (treated + * as a 128-bit little-endian counter). + * + * This is a self-comparison: it does not depend on test-vector + * authoritativeness, only on the algorithm being internally consistent + * between its single-DU and multi-DU paths. + */ +#define TEST_MDU_NR_UNITS 4 +static int test_skcipher_multi_du(struct crypto_skcipher *tfm, + unsigned int du_size) +{ + const char *driver = crypto_skcipher_driver_name(tfm); + const unsigned int ivsize = crypto_skcipher_ivsize(tfm); + const unsigned int total = du_size * TEST_MDU_NR_UNITS; + struct skcipher_request *req = NULL; + struct scatterlist sg_in, sg_out; + DECLARE_CRYPTO_WAIT(wait); + u8 iv_orig[16] = {0}; + u8 iv_work[16]; + u8 *plain = NULL, *batched = NULL, *unit = NULL; + unsigned int i; + int err; + + if (ivsize != 16) + return 0; + + plain = kmalloc(total, GFP_KERNEL); + batched = kmalloc(total, GFP_KERNEL); + unit = kmalloc(total, GFP_KERNEL); + req = skcipher_request_alloc(tfm, GFP_KERNEL); + if (!plain || !batched || !unit || !req) { + err = -ENOMEM; + goto out; + } + + get_random_bytes(plain, total); + get_random_bytes(iv_orig, ivsize); + + /* Pass 1: one batched encrypt with data_unit_size set. */ + err = crypto_skcipher_set_data_unit_size(tfm, du_size); + if (err) { + pr_err("alg: skcipher: %s set_data_unit_size(%u) failed: %d\n", + driver, du_size, err); + goto out; + } + memcpy(batched, plain, total); + memcpy(iv_work, iv_orig, ivsize); + sg_init_one(&sg_in, batched, total); + sg_out = sg_in; + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP, + crypto_req_done, &wait); + skcipher_request_set_crypt(req, &sg_in, &sg_out, total, iv_work); + err = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); + if (err) { + pr_err("alg: skcipher: %s multi-DU batched encrypt failed: %d\n", + driver, err); + goto out_clear_du; + } + + /* Pass 2: TEST_MDU_NR_UNITS single-DU encrypts with derived IVs. */ + err = crypto_skcipher_set_data_unit_size(tfm, 0); + if (err) + goto out; + memcpy(unit, plain, total); + memcpy(iv_work, iv_orig, ivsize); + for (i = 0; i < TEST_MDU_NR_UNITS; i++) { + sg_init_one(&sg_in, unit + i * du_size, du_size); + sg_out = sg_in; + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP, + crypto_req_done, &wait); + skcipher_request_set_crypt(req, &sg_in, &sg_out, du_size, + iv_work); + err = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); + if (err) { + pr_err("alg: skcipher: %s single-DU[%u] encrypt failed: %d\n", + driver, i, err); + goto out; + } + /* Increment iv_work as a 128-bit little-endian counter. */ + { + __le64 lo_le, hi_le; + u64 lo; + + memcpy(&lo_le, iv_work, 8); + memcpy(&hi_le, iv_work + 8, 8); + lo = le64_to_cpu(lo_le) + 1; + lo_le = cpu_to_le64(lo); + memcpy(iv_work, &lo_le, 8); + if (lo == 0) { + hi_le = cpu_to_le64(le64_to_cpu(hi_le) + 1); + memcpy(iv_work + 8, &hi_le, 8); + } + } + } + + if (memcmp(batched, unit, total) != 0) { + pr_err("alg: skcipher: %s multi-DU mismatch (du=%u, n=%u)\n", + driver, du_size, TEST_MDU_NR_UNITS); + err = -EINVAL; + } + +out_clear_du: + (void)crypto_skcipher_set_data_unit_size(tfm, 0); +out: + skcipher_request_free(req); + kfree(unit); + kfree(batched); + kfree(plain); + return err; +} + static int alg_test_skcipher(const struct alg_test_desc *desc, const char *driver, u32 type, u32 mask) { @@ -3259,6 +3376,18 @@ static int alg_test_skcipher(const struct alg_test_desc *desc, if (err) goto out; + if (crypto_skcipher_supports_multi_data_unit(tfm)) { + static const unsigned int du_sizes[] = { 512, 1024, 2048, 4096 }; + unsigned int j; + + for (j = 0; j < ARRAY_SIZE(du_sizes); j++) { + err = test_skcipher_multi_du(tfm, du_sizes[j]); + if (err) + goto out; + cond_resched(); + } + } + err = test_skcipher_vs_generic_impl(desc->generic_driver, req, tsgls); out: free_cipher_test_sglists(tsgls); diff --git a/crypto/xts.c b/crypto/xts.c index ad97c8091582..f0585ea9d6d5 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -258,7 +258,7 @@ static int xts_init_crypt(struct skcipher_request *req, return 0; } -static int xts_encrypt(struct skcipher_request *req) +static int xts_encrypt_one(struct skcipher_request *req) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; @@ -275,7 +275,7 @@ static int xts_encrypt(struct skcipher_request *req) return xts_cts_final(req, crypto_skcipher_encrypt); } -static int xts_decrypt(struct skcipher_request *req) +static int xts_decrypt_one(struct skcipher_request *req) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; @@ -292,6 +292,16 @@ static int xts_decrypt(struct skcipher_request *req) return xts_cts_final(req, crypto_skcipher_decrypt); } +static int xts_encrypt(struct skcipher_request *req) +{ + return skcipher_walk_data_units(req, xts_encrypt_one); +} + +static int xts_decrypt(struct skcipher_request *req) +{ + return skcipher_walk_data_units(req, xts_decrypt_one); +} + static int xts_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); @@ -427,6 +437,17 @@ static int xts_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.base.cra_alignmask = alg->base.cra_alignmask | (__alignof__(u64) - 1); + /* + * Advertise multi-data-unit support only when the inner cipher is + * synchronous. The dispatcher in skcipher_walk_data_units() calls + * the single-DU body in a loop and assumes synchronous completion; + * supporting async would require a per-DU callback chain, which + * the slow software template does not need. + */ + if (!(alg->base.cra_flags & CRYPTO_ALG_ASYNC)) + inst->alg.base.cra_flags |= + CRYPTO_ALG_SKCIPHER_MULTI_DATA_UNIT; + inst->alg.ivsize = XTS_BLOCK_SIZE; inst->alg.min_keysize = alg->min_keysize * 2; inst->alg.max_keysize = alg->max_keysize * 2; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 608b617fb817..e3cc88cf0095 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -101,6 +101,14 @@ struct dm_crypt_request { struct scatterlist sg_in[4]; struct scatterlist sg_out[4]; u64 iv_sector; + /* + * Heap-allocated scatterlists used by the multi-data-unit path + * when one bio is processed in a single skcipher request. NULL + * when the inline sg_in[]/sg_out[] arrays above are sufficient + * (single-data-unit path). Freed in crypt_free_req_skcipher(). + */ + struct scatterlist *sg_in_ext; + struct scatterlist *sg_out_ext; }; struct crypt_config; @@ -151,6 +159,7 @@ enum cipher_flags { CRYPT_IV_LARGE_SECTORS, /* Calculate IV from sector_size, not 512B sectors */ CRYPT_ENCRYPT_PREPROCESS, /* Must preprocess data for encryption (elephant) */ CRYPT_KEY_MAC_SIZE_SET, /* The integrity_key_size option was used */ + CRYPT_MULTI_DATA_UNIT, /* Batch all sectors of a bio per crypto request */ }; /* @@ -1426,12 +1435,153 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, return r; } +/* + * Multi-data-unit variant of crypt_convert_block_skcipher. Submits all + * remaining sectors of the current bio in one skcipher request whose + * data_unit_size is cc->sector_size. The cipher walks the IV between + * data units (see crypto_skcipher_set_data_unit_size()). + * + * Returns the same set of values as crypt_convert_block_skcipher: + * 0 on synchronous success (full chunk processed), + * -EINPROGRESS / -EBUSY on asynchronous dispatch, + * -EAGAIN if the per-bio scatterlist allocation cannot be made. The + * caller MUST disable multi-data-unit batching for the rest + * of this bio and re-enter the per-sector path, which uses + * only mempool reserves and is therefore safe even on the + * swap-out-to-dm-crypt path under total memory exhaustion. + * negative errno otherwise. + * + * On success the bio iterators have been advanced by the chunk size. + * + * Walks the bio with __bio_for_each_bvec so that multi-page folios + * produce one scatterlist entry rather than N (one per PAGE_SIZE). + */ +static int crypt_convert_block_skcipher_multi(struct crypt_config *cc, + struct convert_context *ctx, + struct skcipher_request *req, + unsigned int *out_processed) +{ + const unsigned int sector_size = cc->sector_size; + const gfp_t gfp = GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN; + unsigned int total_in = ctx->iter_in.bi_size; + unsigned int total_out = ctx->iter_out.bi_size; + unsigned int total = min(total_in, total_out); + unsigned int n_sectors; + unsigned int n_sg_in = 0, n_sg_out = 0; + struct dm_crypt_request *dmreq = dmreq_of_req(cc, req); + struct scatterlist *sg_in = NULL, *sg_out = NULL; + struct bvec_iter iter_in, iter_out; + struct bio_vec bv; + u8 *iv, *org_iv; + int r; + + if (unlikely(total < sector_size)) + return -EIO; + n_sectors = total / sector_size; + total = n_sectors * sector_size; + + /* + * Walk the bio_vec iterators to count how many SG entries we need + * for exactly @total bytes. bi_size of the iterators is at least + * @total by construction above. + */ + iter_in = ctx->iter_in; + iter_in.bi_size = total; + __bio_for_each_bvec(bv, ctx->bio_in, iter_in, iter_in) + n_sg_in++; + + iter_out = ctx->iter_out; + iter_out.bi_size = total; + __bio_for_each_bvec(bv, ctx->bio_out, iter_out, iter_out) + n_sg_out++; + + sg_in = kmalloc_array(n_sg_in, sizeof(*sg_in), gfp); + sg_out = (ctx->bio_in == ctx->bio_out) ? sg_in : + kmalloc_array(n_sg_out, sizeof(*sg_out), gfp); + if (!sg_in || !sg_out) { + /* + * Allocation may legitimately fail under memory pressure on + * the swap-out-to-dm-crypt path. Return -EAGAIN so the + * caller falls back to the per-sector path for this bio + * rather than looping forever in the allocator or requeueing + * the bio just to fail again. + */ + kfree(sg_in); + if (sg_out != sg_in) + kfree(sg_out); + return -EAGAIN; + } + + sg_init_table(sg_in, n_sg_in); + { + unsigned int i = 0; + + iter_in = ctx->iter_in; + iter_in.bi_size = total; + __bio_for_each_bvec(bv, ctx->bio_in, iter_in, iter_in) + sg_set_page(&sg_in[i++], bv.bv_page, bv.bv_len, + bv.bv_offset); + } + + if (sg_out != sg_in) { + unsigned int i = 0; + + sg_init_table(sg_out, n_sg_out); + iter_out = ctx->iter_out; + iter_out.bi_size = total; + __bio_for_each_bvec(bv, ctx->bio_out, iter_out, iter_out) + sg_set_page(&sg_out[i++], bv.bv_page, bv.bv_len, + bv.bv_offset); + } + + /* + * Compute the IV for the first data unit. The cipher will derive + * IVs for subsequent data units by treating this one as a 128-bit + * little-endian counter and adding the data-unit index, which + * matches the layout produced by plain and plain64. + */ + dmreq->iv_sector = ctx->cc_sector; + if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags)) + dmreq->iv_sector >>= cc->sector_shift; + dmreq->ctx = ctx; + + iv = iv_of_dmreq(cc, dmreq); + org_iv = org_iv_of_dmreq(cc, dmreq); + r = cc->iv_gen_ops->generator(cc, org_iv, dmreq); + if (r < 0) + goto out_free_sg; + memcpy(iv, org_iv, cc->iv_size); + + /* Stash the SG arrays for cleanup on completion / free. */ + dmreq->sg_in_ext = sg_in; + dmreq->sg_out_ext = (sg_out == sg_in) ? NULL : sg_out; + + skcipher_request_set_crypt(req, sg_in, sg_out, total, iv); + + if (bio_data_dir(ctx->bio_in) == WRITE) + r = crypto_skcipher_encrypt(req); + else + r = crypto_skcipher_decrypt(req); + + *out_processed = total; + return r; + +out_free_sg: + kfree(sg_in); + if (sg_out != sg_in) + kfree(sg_out); + dmreq->sg_in_ext = NULL; + dmreq->sg_out_ext = NULL; + return r; +} + static void kcryptd_async_done(void *async_req, int error); static int crypt_alloc_req_skcipher(struct crypt_config *cc, struct convert_context *ctx) { unsigned int key_index = ctx->cc_sector & (cc->tfms_count - 1); + struct dm_crypt_request *dmreq; if (!ctx->r.req) { ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); @@ -1441,6 +1591,18 @@ static int crypt_alloc_req_skcipher(struct crypt_config *cc, skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]); + /* + * Initialise the heap-allocated scatterlist pointers so that + * crypt_free_req_skcipher() does not read uninitialised memory + * for paths that don't take the multi-data-unit branch. The + * dmreq trailer lives in the per-bio data area which is not + * zeroed by the dm core, and the request is reused from the + * mempool across many bios. + */ + dmreq = dmreq_of_req(cc, ctx->r.req); + dmreq->sg_in_ext = NULL; + dmreq->sg_out_ext = NULL; + /* * Use REQ_MAY_BACKLOG so a cipher driver internally backlogs * requests if driver request queue is full. @@ -1487,6 +1649,12 @@ static void crypt_free_req_skcipher(struct crypt_config *cc, struct skcipher_request *req, struct bio *base_bio) { struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size); + struct dm_crypt_request *dmreq = dmreq_of_req(cc, req); + + kfree(dmreq->sg_in_ext); + dmreq->sg_in_ext = NULL; + kfree(dmreq->sg_out_ext); + dmreq->sg_out_ext = NULL; if ((struct skcipher_request *)(io + 1) != req) mempool_free(req, &cc->req_pool); @@ -1515,7 +1683,9 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_ static blk_status_t crypt_convert(struct crypt_config *cc, struct convert_context *ctx, bool atomic, bool reset_pending) { - unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT; + const unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT; + bool multi_du = test_bit(CRYPT_MULTI_DATA_UNIT, &cc->cipher_flags); + unsigned int processed; int r; /* @@ -1536,8 +1706,13 @@ static blk_status_t crypt_convert(struct crypt_config *cc, atomic_inc(&ctx->cc_pending); + processed = cc->sector_size; if (crypt_integrity_aead(cc)) r = crypt_convert_block_aead(cc, ctx, ctx->r.req_aead, ctx->tag_offset); + else if (multi_du) + r = crypt_convert_block_skcipher_multi(cc, ctx, + ctx->r.req, + &processed); else r = crypt_convert_block_skcipher(cc, ctx, ctx->r.req, ctx->tag_offset); @@ -1559,8 +1734,19 @@ static blk_status_t crypt_convert(struct crypt_config *cc, * exit and continue processing in a workqueue */ ctx->r.req = NULL; - ctx->tag_offset++; - ctx->cc_sector += sector_step; + if (!multi_du) { + ctx->tag_offset++; + ctx->cc_sector += sector_step; + } else { + bio_advance_iter(ctx->bio_in, + &ctx->iter_in, + processed); + bio_advance_iter(ctx->bio_out, + &ctx->iter_out, + processed); + ctx->cc_sector += + processed >> SECTOR_SHIFT; + } return BLK_STS_DEV_RESOURCE; } } else { @@ -1574,19 +1760,52 @@ static blk_status_t crypt_convert(struct crypt_config *cc, */ case -EINPROGRESS: ctx->r.req = NULL; - ctx->tag_offset++; - ctx->cc_sector += sector_step; + if (!multi_du) { + ctx->tag_offset++; + ctx->cc_sector += sector_step; + } else { + bio_advance_iter(ctx->bio_in, &ctx->iter_in, + processed); + bio_advance_iter(ctx->bio_out, &ctx->iter_out, + processed); + ctx->cc_sector += processed >> SECTOR_SHIFT; + } continue; /* * The request was already processed (synchronously). */ case 0: atomic_dec(&ctx->cc_pending); - ctx->cc_sector += sector_step; - ctx->tag_offset++; + if (!multi_du) { + ctx->cc_sector += sector_step; + ctx->tag_offset++; + } else { + bio_advance_iter(ctx->bio_in, &ctx->iter_in, + processed); + bio_advance_iter(ctx->bio_out, &ctx->iter_out, + processed); + ctx->cc_sector += processed >> SECTOR_SHIFT; + } if (!atomic) cond_resched(); continue; + /* + * Multi-data-unit scatterlist allocation failed. This can + * happen on the swap-out-to-dm-crypt path under memory + * pressure, where retrying with the same allocation policy + * could loop forever. Disable multi-data-unit batching for + * the rest of this crypt_convert() invocation and re-enter + * the per-sector path, which uses only mempool reserves and + * is guaranteed to make forward progress even under total + * memory exhaustion. The per-tfm data_unit_size is left + * unchanged, so subsequent bios (which start a fresh + * crypt_convert() and re-read cipher_flags) will retry the + * multi-data-unit path once memory pressure eases. + */ + case -EAGAIN: + atomic_dec(&ctx->cc_pending); + multi_du = false; + continue; /* * There was a data integrity error. */ @@ -3063,6 +3282,45 @@ static int crypt_ctr_cipher(struct dm_target *ti, char *cipher_in, char *key) } } + /* + * Enable multi-data-unit batching when the cipher supports it and + * the IV layout is one we can derive per-DU from a single starting + * IV: plain or plain64 produce a sequential 64-bit little-endian + * counter, which matches the convention of + * crypto_skcipher_set_data_unit_size(). Restrict to the simple + * case (single tfm, no integrity, no per-sector post() callback) + * to keep the consumer path small; modes like essiv, lmk, tcw, + * eboiv, plain64be, random, null, benbi, and elephant are + * deliberately excluded because their generators or post-IV hooks + * cannot be re-derived by the cipher between data units. + */ + if (!crypt_integrity_aead(cc) && cc->tfms_count == 1 && + cc->iv_gen_ops && + (cc->iv_gen_ops == &crypt_iv_plain_ops || + cc->iv_gen_ops == &crypt_iv_plain64_ops) && + !cc->iv_gen_ops->post && + !cc->integrity_tag_size && !cc->integrity_iv_size && + crypto_skcipher_supports_multi_data_unit(cc->cipher_tfm.tfms[0])) { + ret = crypto_skcipher_set_data_unit_size(cc->cipher_tfm.tfms[0], + cc->sector_size); + if (!ret) { + set_bit(CRYPT_MULTI_DATA_UNIT, &cc->cipher_flags); + DMINFO("Using multi-data-unit crypto offload (du=%u)", + cc->sector_size); + } else { + /* + * The driver advertised the capability via cra_flags + * but rejected the requested data unit size. This is + * a driver bug worth seeing in dmesg; fall back to + * the per-sector path so the device still activates. + */ + DMWARN_LIMIT("multi-DU offload disabled: %s rejected du=%u (%d)", + crypto_skcipher_driver_name(cc->cipher_tfm.tfms[0]), + cc->sector_size, ret); + ret = 0; + } + } + /* wipe the kernel key payload copy */ if (cc->key_string) memset(cc->key, 0, cc->key_size * sizeof(u8)); diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index a965b6aabf61..bed1b1f1bbdc 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -21,6 +21,40 @@ */ #define CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE CRYPTO_ALG_OPTIONAL_KEY +/** + * skcipher_walk_data_units - dispatch a request as one body call per data unit + * @req: the caller's skcipher request + * @body: the algorithm's single-data-unit encrypt or decrypt function + * + * When tfm->data_unit_size is zero this is a tail call into @body with + * @req unchanged. Otherwise the request is split into + * cryptlen / data_unit_size sub-ranges and @body is called once per + * sub-range with req->cryptlen, req->src, req->dst, and req->iv adjusted + * for that sub-range. The IV passed to data unit n is the caller- + * supplied IV plus n, where + is a 128-bit little-endian add — this + * matches the convention documented in + * crypto_skcipher_set_data_unit_size(). + * + * Many single-data-unit XTS bodies modify the IV buffer in place during + * processing (the tweak is walked block by block). This helper saves + * the caller's IV before each call and rewrites the next data unit's + * IV from the saved value, so the body always sees a fresh per-DU IV + * regardless of any in-place mutation it performs. + * + * The body MUST run to completion synchronously. Drivers that use this + * helper therefore advertise CRYPTO_ALG_SKCIPHER_MULTI_DATA_UNIT only + * for synchronous configurations. + * + * After the call returns, the contents of req->iv are unspecified per + * the documented contract. src/dst/cryptlen are restored to the + * caller's values to keep skcipher request post-conditions intact. + * + * Return: 0 on success, or the body's negative errno on the first + * data unit that returned non-zero. + */ +int skcipher_walk_data_units(struct skcipher_request *req, + int (*body)(struct skcipher_request *)); + struct aead_request; struct rtattr; diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 4efe2ca8c4d1..5941b6b24b98 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -26,6 +26,15 @@ /* Set this bit if the skcipher operation is not final. */ #define CRYPTO_SKCIPHER_REQ_NOTFINAL 0x00000002 +/* + * Set in cra_flags by an skcipher algorithm that supports processing + * multiple data units in a single request. See + * crypto_skcipher_set_data_unit_size(). + * + * Type-specific flag in the 0xff000000 reserved range. + */ +#define CRYPTO_ALG_SKCIPHER_MULTI_DATA_UNIT 0x01000000 + struct scatterlist; /** @@ -53,6 +62,22 @@ struct skcipher_request { struct crypto_skcipher { unsigned int reqsize; + /* + * Number of bytes in one data unit when batching multiple data units + * per request. 0 means "single data unit per request" (legacy + * behaviour). Set via crypto_skcipher_set_data_unit_size(). + * + * When non-zero, cryptlen must be a multiple of data_unit_size. The + * IV passed in skcipher_request::iv applies to the first data unit; + * the algorithm advances the tweak between data units according to + * the mode specification (e.g., LE128 multiply for XTS per + * IEEE 1619). + * + * Only algorithms that advertise CRYPTO_ALG_SKCIPHER_MULTI_DATA_UNIT + * in cra_flags accept a non-zero value. + */ + unsigned int data_unit_size; + struct crypto_tfm base; }; @@ -492,6 +517,66 @@ static inline unsigned int crypto_lskcipher_chunksize( return crypto_lskcipher_alg(tfm)->co.chunksize; } +/** + * crypto_skcipher_supports_multi_data_unit() - test multi-data-unit support + * @tfm: cipher handle + * + * Return: true if the algorithm advertises that it can process multiple + * data units in a single skcipher_request. + */ +static inline bool +crypto_skcipher_supports_multi_data_unit(struct crypto_skcipher *tfm) +{ + return crypto_skcipher_alg_common(tfm)->base.cra_flags & + CRYPTO_ALG_SKCIPHER_MULTI_DATA_UNIT; +} + +/** + * crypto_skcipher_set_data_unit_size() - set data unit size for the tfm + * @tfm: cipher handle + * @data_unit_size: data unit size in bytes; 0 disables multi-data-unit mode + * + * Configure the tfm to process multiple data units per request. When set + * to a non-zero value, every subsequent encrypt/decrypt request must have + * cryptlen that is a multiple of @data_unit_size. Each data unit is + * processed as if it were a separate request whose IV is derived from the + * preceding data unit's IV by the algorithm-specific tweak update rule: + * the implementation treats the caller-supplied IV as a 128-bit + * little-endian counter and adds the data-unit index for each subsequent + * data unit. + * + * The contents of req->iv after a multi-data-unit request returns are + * unspecified — callers MUST NOT rely on it being either the original + * value or the final-data-unit value. Set a fresh IV before every + * request. + * + * The algorithm must advertise CRYPTO_ALG_SKCIPHER_MULTI_DATA_UNIT in its + * cra_flags. @data_unit_size must be a positive multiple of the + * algorithm's cra_blocksize, otherwise -EINVAL is returned. + * + * Setting @data_unit_size to 0 reverts the tfm to single-data-unit + * behaviour and is always permitted. + * + * Return: 0 on success; -EOPNOTSUPP if the algorithm does not advertise + * multi-data-unit support; -EINVAL if @data_unit_size is not a + * positive multiple of the cipher block size. + */ +int crypto_skcipher_set_data_unit_size(struct crypto_skcipher *tfm, + unsigned int data_unit_size); + +/** + * crypto_skcipher_data_unit_size() - obtain data unit size + * @tfm: cipher handle + * + * Return: configured data unit size in bytes; 0 if multi-data-unit mode + * is disabled. + */ +static inline unsigned int +crypto_skcipher_data_unit_size(struct crypto_skcipher *tfm) +{ + return tfm->data_unit_size; +} + /** * crypto_skcipher_statesize() - obtain state size * @tfm: cipher handle