|
19 | 19 | //! and BinaryRowBuilder for constructing BinaryRow instances. |
20 | 20 |
|
21 | 21 | use crate::spec::murmur_hash::hash_by_words; |
| 22 | +use crate::spec::{DataType, Datum}; |
22 | 23 | use serde::{Deserialize, Serialize}; |
23 | 24 |
|
24 | 25 | pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0); |
@@ -523,6 +524,87 @@ impl BinaryRowBuilder { |
523 | 524 | serialized.extend_from_slice(&self.data); |
524 | 525 | serialized |
525 | 526 | } |
| 527 | + |
| 528 | + /// Write a Datum value at the given position, dispatching by type. |
| 529 | + pub fn write_datum(&mut self, pos: usize, datum: &Datum, data_type: &DataType) { |
| 530 | + match datum { |
| 531 | + Datum::Bool(v) => self.write_boolean(pos, *v), |
| 532 | + Datum::TinyInt(v) => self.write_byte(pos, *v), |
| 533 | + Datum::SmallInt(v) => self.write_short(pos, *v), |
| 534 | + Datum::Int(v) | Datum::Date(v) | Datum::Time(v) => self.write_int(pos, *v), |
| 535 | + Datum::Long(v) => self.write_long(pos, *v), |
| 536 | + Datum::Float(v) => self.write_float(pos, *v), |
| 537 | + Datum::Double(v) => self.write_double(pos, *v), |
| 538 | + Datum::Timestamp { millis, nanos } => { |
| 539 | + let precision = match data_type { |
| 540 | + DataType::Timestamp(ts) => ts.precision(), |
| 541 | + _ => 3, |
| 542 | + }; |
| 543 | + if precision <= 3 { |
| 544 | + self.write_timestamp_compact(pos, *millis); |
| 545 | + } else { |
| 546 | + self.write_timestamp_non_compact(pos, *millis, *nanos); |
| 547 | + } |
| 548 | + } |
| 549 | + Datum::LocalZonedTimestamp { millis, nanos } => { |
| 550 | + let precision = match data_type { |
| 551 | + DataType::LocalZonedTimestamp(ts) => ts.precision(), |
| 552 | + _ => 3, |
| 553 | + }; |
| 554 | + if precision <= 3 { |
| 555 | + self.write_timestamp_compact(pos, *millis); |
| 556 | + } else { |
| 557 | + self.write_timestamp_non_compact(pos, *millis, *nanos); |
| 558 | + } |
| 559 | + } |
| 560 | + Datum::Decimal { |
| 561 | + unscaled, |
| 562 | + precision, |
| 563 | + .. |
| 564 | + } => { |
| 565 | + if *precision <= 18 { |
| 566 | + self.write_decimal_compact(pos, *unscaled as i64); |
| 567 | + } else { |
| 568 | + self.write_decimal_var_len(pos, *unscaled); |
| 569 | + } |
| 570 | + } |
| 571 | + Datum::String(s) => { |
| 572 | + if s.len() <= 7 { |
| 573 | + self.write_string_inline(pos, s); |
| 574 | + } else { |
| 575 | + self.write_string(pos, s); |
| 576 | + } |
| 577 | + } |
| 578 | + Datum::Bytes(b) => { |
| 579 | + if b.len() <= 7 { |
| 580 | + self.write_binary_inline(pos, b); |
| 581 | + } else { |
| 582 | + self.write_binary(pos, b); |
| 583 | + } |
| 584 | + } |
| 585 | + } |
| 586 | + } |
| 587 | +} |
| 588 | + |
| 589 | +/// Build a serialized BinaryRow from optional Datum values. |
| 590 | +/// Returns empty vec if all values are None. |
| 591 | +pub fn datums_to_binary_row(datums: &[(&Option<Datum>, &DataType)]) -> Vec<u8> { |
| 592 | + if datums.iter().all(|(d, _)| d.is_none()) { |
| 593 | + return vec![]; |
| 594 | + } |
| 595 | + let arity = datums.len() as i32; |
| 596 | + let mut builder = BinaryRowBuilder::new(arity); |
| 597 | + for (pos, (datum_opt, data_type)) in datums.iter().enumerate() { |
| 598 | + match datum_opt { |
| 599 | + Some(datum) => { |
| 600 | + builder.write_datum(pos, datum, data_type); |
| 601 | + } |
| 602 | + None => { |
| 603 | + builder.set_null_at(pos); |
| 604 | + } |
| 605 | + } |
| 606 | + } |
| 607 | + builder.build_serialized() |
526 | 608 | } |
527 | 609 |
|
528 | 610 | #[cfg(test)] |
@@ -756,6 +838,73 @@ mod tests { |
756 | 838 | assert_eq!(nano, 0); |
757 | 839 | } |
758 | 840 |
|
| 841 | + #[test] |
| 842 | + fn test_write_datum_int_and_string() { |
| 843 | + let mut builder = BinaryRowBuilder::new(2); |
| 844 | + builder.write_datum( |
| 845 | + 0, |
| 846 | + &Datum::Int(42), |
| 847 | + &DataType::Int(crate::spec::IntType::new()), |
| 848 | + ); |
| 849 | + builder.write_datum( |
| 850 | + 1, |
| 851 | + &Datum::String("hello".to_string()), |
| 852 | + &DataType::VarChar(crate::spec::VarCharType::string_type()), |
| 853 | + ); |
| 854 | + let row = builder.build(); |
| 855 | + assert_eq!(row.get_int(0).unwrap(), 42); |
| 856 | + assert_eq!(row.get_string(1).unwrap(), "hello"); |
| 857 | + } |
| 858 | + |
| 859 | + #[test] |
| 860 | + fn test_write_datum_long_string() { |
| 861 | + let mut builder = BinaryRowBuilder::new(1); |
| 862 | + builder.write_datum( |
| 863 | + 0, |
| 864 | + &Datum::String("long_string_value".to_string()), |
| 865 | + &DataType::VarChar(crate::spec::VarCharType::string_type()), |
| 866 | + ); |
| 867 | + let row = builder.build(); |
| 868 | + assert_eq!(row.get_string(0).unwrap(), "long_string_value"); |
| 869 | + } |
| 870 | + |
| 871 | + #[test] |
| 872 | + fn test_datums_to_binary_row_roundtrip() { |
| 873 | + let d1 = Some(Datum::Int(100)); |
| 874 | + let d2 = Some(Datum::String("abc".to_string())); |
| 875 | + let dt1 = DataType::Int(crate::spec::IntType::new()); |
| 876 | + let dt2 = DataType::VarChar(crate::spec::VarCharType::string_type()); |
| 877 | + let datums = vec![(&d1, &dt1), (&d2, &dt2)]; |
| 878 | + let bytes = datums_to_binary_row(&datums); |
| 879 | + assert!(!bytes.is_empty()); |
| 880 | + let row = BinaryRow::from_serialized_bytes(&bytes).unwrap(); |
| 881 | + assert_eq!(row.get_int(0).unwrap(), 100); |
| 882 | + assert_eq!(row.get_string(1).unwrap(), "abc"); |
| 883 | + } |
| 884 | + |
| 885 | + #[test] |
| 886 | + fn test_datums_to_binary_row_all_none() { |
| 887 | + let d1: Option<Datum> = None; |
| 888 | + let dt1 = DataType::Int(crate::spec::IntType::new()); |
| 889 | + let datums = vec![(&d1, &dt1)]; |
| 890 | + let bytes = datums_to_binary_row(&datums); |
| 891 | + assert!(bytes.is_empty()); |
| 892 | + } |
| 893 | + |
| 894 | + #[test] |
| 895 | + fn test_datums_to_binary_row_mixed_null() { |
| 896 | + let d1 = Some(Datum::Int(7)); |
| 897 | + let d2: Option<Datum> = None; |
| 898 | + let dt1 = DataType::Int(crate::spec::IntType::new()); |
| 899 | + let dt2 = DataType::Int(crate::spec::IntType::new()); |
| 900 | + let datums = vec![(&d1, &dt1), (&d2, &dt2)]; |
| 901 | + let bytes = datums_to_binary_row(&datums); |
| 902 | + assert!(!bytes.is_empty()); |
| 903 | + let row = BinaryRow::from_serialized_bytes(&bytes).unwrap(); |
| 904 | + assert_eq!(row.get_int(0).unwrap(), 7); |
| 905 | + assert!(row.is_null_at(1)); |
| 906 | + } |
| 907 | + |
759 | 908 | #[test] |
760 | 909 | fn test_get_timestamp_non_compact() { |
761 | 910 | let epoch_millis: i64 = 1_704_067_200_123; |
|
0 commit comments