Skip to content

Commit 911e40d

Browse files
committed
fix: Add buffer bounds validation to Cython deserializers
Add bounds checking to prevent buffer overruns and properly handle CQL protocol value semantics in deserializers. Changes: - subelem(): Add bounds validation with protocol-compliant value handling * Happy path: Check elemlen >= 0 and offset + elemlen <= buf.size * Support NULL values (elemlen == -1) per CQL protocol * Support "not set" values (elemlen == -2) per CQL protocol * Reject invalid values (elemlen < -2) with clear error message - _unpack_len(): Add bounds check before reading int32 length field * Validates offset + 4 <= buf.size before pointer dereference * Prevents reading beyond buffer boundaries - DesTupleType: Add defensive bounds checking for tuple deserialization * Check p + 4 <= buf.size before reading item length * Check p + itemlen <= buf.size before reading item data * Explicit NULL value handling (itemlen < 0) * Clear error messages for buffer overruns - DesCompositeType: Add bounds validation for composite type elements * Check 2 + element_length + 1 <= buf.size (length + data + EOC byte) * Prevents buffer overrun when reading composite elements - DesVectorType._deserialize_generic(): Add size validation * Verify buf.size == expected_size before processing * Provides clear error message with expected vs actual sizes Protocol specification reference: [value] = [int] n, followed by n bytes if n >= 0 n == -1: NULL value n == -2: not set value n < -2: invalid (error) Signed-off-by: Yaniv Kaul <yaniv.kaul@scylladb.com>
1 parent 7451f9f commit 911e40d

1 file changed

Lines changed: 54 additions & 14 deletions

File tree

cassandra/deserializers.pyx

Lines changed: 54 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,13 @@ cdef class DesVectorType(Deserializer):
398398
f"VectorType with variable-size subtype {self.subtype.typename} "
399399
"is not supported in Cython deserializer")
400400

401+
# Validate total size before processing
402+
cdef int expected_size = self.vector_size * serialized_size
403+
if buf.size != expected_size:
404+
raise ValueError(
405+
f"Expected vector of type {self.subtype.typename} and dimension {self.vector_size} "
406+
f"to have serialized size {expected_size}; observed serialized size of {buf.size} instead")
407+
401408
for i in range(self.vector_size):
402409
from_ptr_and_size(buf.ptr + offset, serialized_size, &elem_buf)
403410
result[i] = self.subtype.deserialize(to_bytes(&elem_buf), protocol_version)
@@ -473,18 +480,37 @@ cdef inline int subelem(
473480
Read the next element from the buffer: first read the size (in bytes) of the
474481
element, then fill elem_buf with a newly sliced buffer of this size (and the
475482
right offset).
483+
484+
Protocol: n >= 0: n bytes follow
485+
n == -1: NULL value
486+
n == -2: not set value
487+
n < -2: invalid
476488
"""
477489
cdef int32_t elemlen
478490

479491
_unpack_len(buf, offset[0], &elemlen)
480492
offset[0] += sizeof(int32_t)
481-
from_ptr_and_size(buf.ptr + offset[0], elemlen, elem_buf)
482-
offset[0] += elemlen
483-
return 0
493+
494+
# Happy path: non-negative length element that fits in buffer
495+
if elemlen >= 0:
496+
if offset[0] + elemlen <= buf.size:
497+
from_ptr_and_size(buf.ptr + offset[0], elemlen, elem_buf)
498+
offset[0] += elemlen
499+
return 0
500+
raise IndexError("Element length %d at offset %d exceeds buffer size %d" % (elemlen, offset[0], buf.size))
501+
# NULL value (-1) or not set value (-2)
502+
elif elemlen == -1 or elemlen == -2:
503+
from_ptr_and_size(NULL, elemlen, elem_buf)
504+
return 0
505+
# Invalid value (n < -2)
506+
else:
507+
raise ValueError("Invalid element length %d at offset %d" % (elemlen, offset[0]))
484508

485509

486510
cdef inline int _unpack_len(Buffer *buf, int offset, int32_t *output) except -1:
487511
"""Read a big-endian int32 at the given offset using direct pointer access."""
512+
if offset + sizeof(int32_t) > buf.size:
513+
raise IndexError("Cannot read length field: offset %d + 4 exceeds buffer size %d" % (offset, buf.size))
488514
cdef uint32_t *src = <uint32_t*>(buf.ptr + offset)
489515
output[0] = <int32_t>ntohl(src[0])
490516
return 0
@@ -556,16 +582,24 @@ cdef class DesTupleType(_DesParameterizedType):
556582
values = []
557583
for i in range(self.subtypes_len):
558584
item = None
559-
if p < buf.size:
585+
if p + 4 <= buf.size:
560586
# Read itemlen directly using ntohl instead of slice_buffer
561587
itemlen = <int32_t>ntohl((<uint32_t*>(buf.ptr + p))[0])
562588
p += 4
563-
if itemlen >= 0:
589+
590+
if itemlen >= 0 and p + itemlen <= buf.size:
564591
from_ptr_and_size(buf.ptr + p, itemlen, &item_buf)
565592
p += itemlen
566593

567594
deserializer = self.deserializers[i]
568595
item = from_binary(deserializer, &item_buf, protocol_version)
596+
elif itemlen < 0:
597+
# NULL value, item stays None
598+
pass
599+
else:
600+
raise IndexError("Tuple item length %d at offset %d exceeds buffer size %d" % (itemlen, p, buf.size))
601+
elif p < buf.size:
602+
raise IndexError("Cannot read tuple item length at offset %d: only %d bytes remain" % (p, buf.size - p))
569603

570604
tuple_set(res, i, item)
571605

@@ -607,17 +641,23 @@ cdef class DesCompositeType(_DesParameterizedType):
607641
break
608642

609643
element_length = unpack_num[uint16_t](buf)
610-
from_ptr_and_size(buf.ptr + 2, element_length, &elem_buf)
611644

612-
deserializer = self.deserializers[i]
613-
item = from_binary(deserializer, &elem_buf, protocol_version)
614-
tuple_set(res, i, item)
645+
# Validate that we have enough data for the element and EOC byte (happy path check)
646+
if 2 + element_length + 1 <= buf.size:
647+
from_ptr_and_size(buf.ptr + 2, element_length, &elem_buf)
648+
649+
deserializer = self.deserializers[i]
650+
item = from_binary(deserializer, &elem_buf, protocol_version)
651+
tuple_set(res, i, item)
615652

616-
# skip element length, element, and the EOC (one byte)
617-
# Advance buffer in-place with direct assignment
618-
start = 2 + element_length + 1
619-
buf.ptr = buf.ptr + start
620-
buf.size = buf.size - start
653+
# skip element length, element, and the EOC (one byte)
654+
# Advance buffer in-place with direct assignment
655+
start = 2 + element_length + 1
656+
buf.ptr = buf.ptr + start
657+
buf.size = buf.size - start
658+
else:
659+
raise IndexError("Composite element length %d requires %d bytes but only %d remain" %
660+
(element_length, 2 + element_length + 1, buf.size))
621661

622662
return res
623663

0 commit comments

Comments
 (0)