diff --git a/Cargo.toml b/Cargo.toml index 4762394..975ed09 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,14 +17,14 @@ resolver = "2" members = ["simdjson-sys"] [workspace.package] -version = "0.3.0-alpha.3" +version = "0.4.0-alpha" [workspace.dependencies] -simdjson-sys = { path = "simdjson-sys", version = "0.1.0-alpha.2" } +simdjson-sys = { path = "simdjson-sys", version = "0.2.0-alpha" } [dependencies] -thiserror = "1.0" +thiserror = "2.0" simdjson-sys = { workspace = true } # serde compatibilty diff --git a/README.md b/README.md index 0c4a78a..cdae367 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![Crates.io](https://img.shields.io/crates/v/simdjson-rust?style=for-the-badge)](https://crates.io/crates/simdjson-rust) [![docs.rs](https://img.shields.io/docsrs/simdjson-rust/latest?style=for-the-badge)](https://docs.rs/simdjson-rust) -This crate currently uses [`simdjson 3.2.3`][simdjson]. You can have a try and give feedback. +This crate currently uses [`simdjson 4.2.4`][simdjson]. You can have a try and give feedback. If you @@ -110,6 +110,59 @@ fn main() -> simdjson_rust::Result<()> { } ``` +### Serde Integration + +This crate provides optional serde compatibility with SIMD-accelerated JSON serialization and deserialization. + +Enable serde support by adding the feature: + +```toml +simdjson-rust = { version = "0.4.0-alpha", features = ["serde_impl"] } +``` + +#### Deserialization (Parsing JSON to Rust structs) + +```rust +use serde::Deserialize; +use simdjson_rust::dom::Parser; +use simdjson_rust::serde::de::from_element; + +#[derive(Deserialize)] +struct User { + name: String, + age: u32, + active: bool, +} + +let mut parser = Parser::default(); +let ps = r#"{"name": "Alice", "age": 30, "active": true}"#.to_padded_string(); +let elm = parser.parse(&ps)?; +let user: User = from_element(&elm)?; +``` + +#### Serialization (Rust structs to JSON) + +```rust +use serde::Serialize; +use simdjson_rust::serde::ser::to_string; + +#[derive(Serialize)] +struct User { + name: String, + age: u32, + active: bool, +} + +let user = User { + name: "Alice".to_string(), + age: 30, + active: true, +}; + +let json = to_string(&user)?; +// Output: {"name":"Alice","age":30,"active":true} +``` + ## Other interesting things There are also pure Rust port of [`simdjson`][simdjson] available here [`simd-json`](https://github.com/simd-lite/simd-json). diff --git a/examples/issue_20.rs b/examples/issue_20.rs index 7fc5cee..76adbef 100644 --- a/examples/issue_20.rs +++ b/examples/issue_20.rs @@ -1,4 +1,4 @@ -use simdjson_rust::{ondemand, prelude::*, Result}; +use simdjson_rust::{Result, ondemand, prelude::*}; fn main() -> Result<()> { let data = r#"{"ingredients": [{"rose_wine": {"black_sesame_seed": 3}}, {"myrtleberry": {"black_sesame_seed": 5}}, {"grilled_beef": {"black_sesame_seed": 1}}, {"parmesan_cheese": {"black_sesame_seed": 2}}, {"strawberry_jam": {"black_sesame_seed": 1}}, {"tuna": {"black_sesame_seed": 3}}, {"black_tea": {"black_sesame_seed": 8}}, {"japanese_mint": {"black_sesame_seed": 5}}, {"pork": {"black_sesame_seed": 1}}, {"french_lavender": {"black_sesame_seed": 2}}, {"nutmeg": {"black_sesame_seed": 12}}, {"carob_fruit": {"black_sesame_seed": 2}}, {"mastic_gum_leaf_oil": {"black_sesame_seed": 2}}, {"california_pepper": {"black_sesame_seed": 5}}, {"orange_peel_oil": {"black_sesame_seed": 2}}, {"eucalyptus_oil": {"black_sesame_seed": 4}}, {"monarda_punctata": {"black_sesame_seed": 1}}, {"dried_green_tea": {"black_sesame_seed": 7}}, {"sake": {"black_sesame_seed": 2}}, {"currant": {"black_sesame_seed": 2}}, {"sweet_grass_oil": {"black_sesame_seed": 2}}, {"safflower_seed": {"black_sesame_seed": 17}}, {"sparkling_wine": {"black_sesame_seed": 3}}, {"enokidake": {"black_sesame_seed": 1}}, {"raw_peanut": {"black_sesame_seed": 1}}, {"cucumber": {"black_sesame_seed": 1}}, {"russian_cheese": {"black_sesame_seed": 1}}, {"catfish": {"black_sesame_seed": 3}}, {"origanum": {"black_sesame_seed": 2}}, {"roasted_beef": {"black_sesame_seed": 1}}, {"limburger_cheese": {"black_sesame_seed": 1}}, {"cantaloupe": {"black_sesame_seed": 3}}, {"corn_mint_oil": {"black_sesame_seed": 5}}, {"marjoram": {"black_sesame_seed": 4}}, {"herring": {"black_sesame_seed": 3}}, {"kumquat_peel_oil": {"black_sesame_seed": 1}}, {"cheese": {"black_sesame_seed": 1}}, {"lavender": {"black_sesame_seed": 1}}, {"cayenne": {"black_sesame_seed": 5}}, {"red_kidney_bean": {"black_sesame_seed": 4}}, {"mangosteen": {"black_sesame_seed": 2}}, {"lovage_root": {"black_sesame_seed": 1}}, {"mastic_gum": {"black_sesame_seed": 2}}, {"liver": {"black_sesame_seed": 1}}, {"thyme": {"black_sesame_seed": 3}}, {"oregano": {"black_sesame_seed": 1}}, {"lamb_liver": {"black_sesame_seed": 1}}, {"fruit": {"black_sesame_seed": 2}}, {"crab": {"black_sesame_seed": 1}}, {"french_peppermint": {"black_sesame_seed": 7}}, {"jamaican_rum": {"black_sesame_seed": 4}}, {"orange": {"black_sesame_seed": 7}}, {"romano_cheese": {"black_sesame_seed": 2}}, {"keta_salmon": {"black_sesame_seed": 3}}, {"juniper_berry": {"black_sesame_seed": 5}}, {"cheddar_cheese": {"black_sesame_seed": 1}}, {"calytrix_tetragona_oil": {"black_sesame_seed": 1}}, {"sea_bass": {"black_sesame_seed": 3}}, {"raw_pork": {"black_sesame_seed": 1}}, {"pear": {"black_sesame_seed": 2}}, {"pike": {"black_sesame_seed": 3}}, {"peanut": {"black_sesame_seed": 1}}, {"kumquat": {"black_sesame_seed": 2}}, {"european_cranberry": {"black_sesame_seed": 7}}, {"lemongrass": {"black_sesame_seed": 1}}, {"gin": {"black_sesame_seed": 1}}, {"orange_juice": {"black_sesame_seed": 1}}, {"dried_fig": {"black_sesame_seed": 2}}, {"sassafras": {"black_sesame_seed": 1}}, {"geranium": {"black_sesame_seed": 1}}, {"java_citronella": {"black_sesame_seed": 2}}, {"bourbon_whiskey": {"black_sesame_seed": 1}}, {"peanut_butter": {"black_sesame_seed": 1}}, {"smoked_fish": {"black_sesame_seed": 3}}, {"pennyroyal": {"black_sesame_seed": 1}}, {"roasted_shrimp": {"black_sesame_seed": 1}}, {"concord_grape": {"black_sesame_seed": 3}}, {"munster_cheese": {"black_sesame_seed": 1}}, {"sage": {"black_sesame_seed": 3}}, {"cocoa": {"black_sesame_seed": 3}}, {"champagne_wine": {"black_sesame_seed": 3}}, {"chicory_root": {"black_sesame_seed": 2}}, {"chamaecyparis_formosensis_oil": {"black_sesame_seed": 1}}, {"crowberry": {"black_sesame_seed": 5}}, {"water_apple": {"black_sesame_seed": 2}}, {"mint_oil": {"black_sesame_seed": 4}}, {"wheaten_bread": {"black_sesame_seed": 1}}, {"durian": {"black_sesame_seed": 2}}, {"cloudberry": {"black_sesame_seed": 7}}, {"japanese_star_anise": {"black_sesame_seed": 1}}, {"fried_cured_pork": {"black_sesame_seed": 1}}, {"satsuma": {"black_sesame_seed": 4}}, {"winter_savory": {"black_sesame_seed": 1}}, {"cowberry": {"black_sesame_seed": 5}}, {"snap_bean": {"black_sesame_seed": 4}}, {"cabernet_sauvignon_grape": {"black_sesame_seed": 3}}, {"port_wine": {"black_sesame_seed": 3}}, {"blackberry": {"black_sesame_seed": 7}}, {"tea_tree_oil": {"black_sesame_seed": 2}}, {"shiitake": {"black_sesame_seed": 1}}, {"cumin": {"black_sesame_seed": 6}}, {"prune": {"black_sesame_seed": 2}}, {"bog_blueberry": {"black_sesame_seed": 5}}, {"celery": {"black_sesame_seed": 4}}, {"boiled_pork": {"black_sesame_seed": 1}}, {"rooibus_tea": {"black_sesame_seed": 7}}, {"eucalyptus_globulus": {"black_sesame_seed": 1}}, {"tabasco_pepper": {"black_sesame_seed": 5}}, {"fermented_shrimp": {"black_sesame_seed": 1}}, {"jackfruit": {"black_sesame_seed": 2}}, {"gruyere_cheese": {"black_sesame_seed": 3}}, {"bread": {"black_sesame_seed": 1}}, {"pineapple": {"black_sesame_seed": 1}}, {"peppermint": {"black_sesame_seed": 7}}, {"cruciferae_seed": {"black_sesame_seed": 17}}, {"israeli_orange": {"black_sesame_seed": 7}}, {"monarda_punctata_oil": {"black_sesame_seed": 1}}, {"california_orange": {"black_sesame_seed": 7}}, {"peppermint_oil": {"black_sesame_seed": 5}}, {"ceylon_tea": {"black_sesame_seed": 7}}, {"cod": {"black_sesame_seed": 3}}, {"roasted_peanut": {"black_sesame_seed": 1}}, {"fish": {"black_sesame_seed": 3}}, {"plum": {"black_sesame_seed": 3}}, {"fennel": {"black_sesame_seed": 3}}, {"toasted_oat": {"black_sesame_seed": 1}}, {"cured_pork": {"black_sesame_seed": 4}}, {"pork_liver": {"black_sesame_seed": 1}}, {"mastic_gum_fruit_oil": {"black_sesame_seed": 1}}, {"passion_fruit": {"black_sesame_seed": 7}}, {"parsley": {"black_sesame_seed": 7}}, {"soybean": {"black_sesame_seed": 4}}, {"strawberry": {"black_sesame_seed": 6}}, {"scotch_spearmint_oil": {"black_sesame_seed": 4}}, {"pork_sausage": {"black_sesame_seed": 1}}, {"hernandia_peltata_oil": {"black_sesame_seed": 1}}, {"lemongrass_oil": {"black_sesame_seed": 1}}, {"roasted_filbert": {"black_sesame_seed": 3}}, {"wild_berry": {"black_sesame_seed": 5}}, {"wine": {"black_sesame_seed": 3}}, {"raw_lean_fish": {"black_sesame_seed": 3}}, {"roasted_lamb": {"black_sesame_seed": 2}}, {"pinto_bean": {"black_sesame_seed": 4}}, {"spearmint_oil": {"black_sesame_seed": 1}}, {"chicken": {"black_sesame_seed": 1}}, {"guarana": {"black_sesame_seed": 2}}, {"ocimum_viride": {"black_sesame_seed": 1}}, {"lantana_camara_oil": {"black_sesame_seed": 1}}, {"roman_chamomile": {"black_sesame_seed": 2}}, {"mate": {"black_sesame_seed": 1}}, {"roasted_mate": {"black_sesame_seed": 1}}, {"palmarosa": {"black_sesame_seed": 1}}, {"clary_sage": {"black_sesame_seed": 1}}, {"mint": {"black_sesame_seed": 5}}, {"myrtle": {"black_sesame_seed": 4}}, {"cabernet_sauvignon_wine": {"black_sesame_seed": 3}}, {"fermented_tea": {"black_sesame_seed": 7}}, {"brown_rice": {"black_sesame_seed": 1}}, {"mastic_gum_oil": {"black_sesame_seed": 2}}, {"vanilla": {"black_sesame_seed": 3}}, {"popcorn": {"black_sesame_seed": 2}}, {"eucalyptus_macarthurii": {"black_sesame_seed": 1}}, {"shrimp": {"black_sesame_seed": 1}}, {"malagueta_pepper": {"black_sesame_seed": 5}}, {"milk": {"black_sesame_seed": 4}}, {"muscadine_grape": {"black_sesame_seed": 3}}, {"mung_bean": {"black_sesame_seed": 4}}, {"kaffir_lime": {"black_sesame_seed": 5}}, {"huckleberry": {"black_sesame_seed": 5}}, {"tangerine_juice": {"black_sesame_seed": 1}}, {"muscat_grape": {"black_sesame_seed": 3}}, {"eucalyptus_bakeries_oil": {"black_sesame_seed": 1}}, {"smoked_pork_belly": {"black_sesame_seed": 1}}, {"camembert_cheese": {"black_sesame_seed": 1}}, {"haddock": {"black_sesame_seed": 3}}, {"cymbopogon_sennaarensis": {"black_sesame_seed": 1}}, {"whitefish": {"black_sesame_seed": 3}}, {"yellow_passion_fruit": {"black_sesame_seed": 1}}, {"calytrix_tetragona": {"black_sesame_seed": 1}}, {"roasted_malt": {"black_sesame_seed": 3}}, {"thymus": {"black_sesame_seed": 6}}, {"mandarin_peel": {"black_sesame_seed": 3}}, {"loganberry": {"black_sesame_seed": 5}}, {"tangerine_peel_oil": {"black_sesame_seed": 1}}, {"mango": {"black_sesame_seed": 6}}, {"roman_chamomile_oil": {"black_sesame_seed": 1}}, {"muskmelon": {"black_sesame_seed": 3}}, {"roasted_chicory_root": {"black_sesame_seed": 2}}, {"sherry": {"black_sesame_seed": 3}}, {"fatty_fish": {"black_sesame_seed": 3}}, {"lime_juice": {"black_sesame_seed": 1}}, {"dried_black_tea": {"black_sesame_seed": 7}}, {"malay_apple": {"black_sesame_seed": 2}}, {"navy_bean": {"black_sesame_seed": 4}}, {"smoked_pork": {"black_sesame_seed": 1}}, {"mutton_liver": {"black_sesame_seed": 1}}, {"seychelles_tea": {"black_sesame_seed": 7}}, {"lime": {"black_sesame_seed": 5}}, {"raw_fish": {"black_sesame_seed": 3}}, {"papaya": {"black_sesame_seed": 5}}, {"green_tea": {"black_sesame_seed": 7}}, {"citrus_peel_oil": {"black_sesame_seed": 7}}, {"seed": {"black_sesame_seed": 17}}, {"raw_fatty_fish": {"black_sesame_seed": 3}}, {"parsnip_fruit": {"black_sesame_seed": 2}}, {"parsnip": {"black_sesame_seed": 1}}, {"blenheim_apricot": {"black_sesame_seed": 2}}, {"buchu": {"black_sesame_seed": 4}}, {"blueberry": {"black_sesame_seed": 6}}, {"sauvignon_blanc_grape": {"black_sesame_seed": 3}}, {"kiwi": {"black_sesame_seed": 2}}, {"white_wine": {"black_sesame_seed": 4}}, {"long_pepper": {"black_sesame_seed": 5}}, {"fried_pork": {"black_sesame_seed": 1}}, {"kidney_bean": {"black_sesame_seed": 4}}, {"wild_raspberry": {"black_sesame_seed": 5}}, {"licorice": {"black_sesame_seed": 3}}, {"grapefruit": {"black_sesame_seed": 4}}, {"roasted_coconut": {"black_sesame_seed": 1}}, {"buchu_oil": {"black_sesame_seed": 4}}, {"guinea_pepper": {"black_sesame_seed": 5}}, {"burley_tobacco": {"black_sesame_seed": 1}}, {"monkey_orange": {"black_sesame_seed": 4}}, {"cooked_apple": {"black_sesame_seed": 2}}, {"roasted_cocoa": {"black_sesame_seed": 3}}, {"cream_cheese": {"black_sesame_seed": 1}}, {"smoked_fatty_fish": {"black_sesame_seed": 3}}, {"oatmeal": {"black_sesame_seed": 2}}, {"ocimum_gratissimum": {"black_sesame_seed": 1}}, {"coconut": {"black_sesame_seed": 1}}, {"roasted_pecan": {"black_sesame_seed": 3}}, {"horse_mackerel": {"black_sesame_seed": 3}}, {"peach": {"black_sesame_seed": 2}}, {"dwarf_quince": {"black_sesame_seed": 2}}, {"seed_oil": {"black_sesame_seed": 3}}, {"lingonberry": {"black_sesame_seed": 2}}, {"capsicum": {"black_sesame_seed": 5}}, {"leaf": {"black_sesame_seed": 6}}, {"jasmine_tea": {"black_sesame_seed": 7}}, {"elderberry": {"black_sesame_seed": 7}}, {"cape_gooseberry": {"black_sesame_seed": 6}}, {"roasted_spanish_peanut": {"black_sesame_seed": 1}}, {"lean_fish": {"black_sesame_seed": 3}}, {"comte_cheese": {"black_sesame_seed": 1}}, {"root": {"black_sesame_seed": 1}}, {"ginger": {"black_sesame_seed": 10}}, {"cherry": {"black_sesame_seed": 2}}, {"eucalyptus_dives": {"black_sesame_seed": 1}}, {"uncured_boiled_pork": {"black_sesame_seed": 1}}, {"raw_lamb": {"black_sesame_seed": 2}}, {"salmon": {"black_sesame_seed": 3}}, {"crownberry": {"black_sesame_seed": 5}}, {"rapeseed": {"black_sesame_seed": 17}}, {"dried_parsley": {"black_sesame_seed": 7}}, {"lemon_balm": {"black_sesame_seed": 3}}, {"roasted_barley": {"black_sesame_seed": 2}}, {"chicken_liver": {"black_sesame_seed": 1}}, {"tangerine": {"black_sesame_seed": 5}}, {"cilantro": {"black_sesame_seed": 1}}, {"fenugreek": {"black_sesame_seed": 1}}, {"swiss_cheese": {"black_sesame_seed": 1}}, {"raw_chicken": {"black_sesame_seed": 1}}, {"sheep_cheese": {"black_sesame_seed": 1}}, {"celery_seed": {"black_sesame_seed": 3}}, {"french_bean": {"black_sesame_seed": 4}}, {"whiskey": {"black_sesame_seed": 3}}, {"tuber": {"black_sesame_seed": 1}}, {"grape": {"black_sesame_seed": 3}}, {"coffee": {"black_sesame_seed": 3}}, {"filbert": {"black_sesame_seed": 2}}, {"peanut_oil": {"black_sesame_seed": 1}}, {"quince": {"black_sesame_seed": 2}}, {"spanish_sage": {"black_sesame_seed": 1}}, {"lemon_peel_oil": {"black_sesame_seed": 2}}, {"smoked_herring": {"black_sesame_seed": 3}}, {"coriander": {"black_sesame_seed": 6}}, {"rice": {"black_sesame_seed": 1}}, {"cinnamon": {"black_sesame_seed": 7}}, {"roasted_pork": {"black_sesame_seed": 1}}, {"chinese_quince": {"black_sesame_seed": 3}}, {"chive": {"black_sesame_seed": 3}}, {"grapefruit_juice": {"black_sesame_seed": 4}}, {"fried_chicken": {"black_sesame_seed": 2}}, {"emmental_cheese": {"black_sesame_seed": 1}}, {"melon": {"black_sesame_seed": 3}}, {"laurel": {"black_sesame_seed": 7}}, {"nectarine": {"black_sesame_seed": 4}}, {"wort": {"black_sesame_seed": 3}}, {"rum": {"black_sesame_seed": 4}}, {"caraway_seed": {"black_sesame_seed": 1}}, {"calamus": {"black_sesame_seed": 2}}, {"lemon_peel": {"black_sesame_seed": 2}}, {"watermelon": {"black_sesame_seed": 2}}, {"capsicum_annuum": {"black_sesame_seed": 5}}, {"hop": {"black_sesame_seed": 3}}, {"uncured_pork": {"black_sesame_seed": 1}}, {"provolone_cheese": {"black_sesame_seed": 1}}, {"boiled_beef": {"black_sesame_seed": 1}}, {"mountain_papaya": {"black_sesame_seed": 2}}, {"uncured_smoked_pork": {"black_sesame_seed": 1}}, {"spearmint": {"black_sesame_seed": 1}}, {"raw_beef": {"black_sesame_seed": 1}}, {"chinese_star_anise": {"black_sesame_seed": 1}}, {"boiled_crab": {"black_sesame_seed": 1}}, {"pawpaw": {"black_sesame_seed": 1}}, {"italian_lime": {"black_sesame_seed": 5}}, {"wheat_bread": {"black_sesame_seed": 1}}, {"calabash_nutmeg": {"black_sesame_seed": 3}}, {"yeast": {"black_sesame_seed": 1}}, {"choke_cherry": {"black_sesame_seed": 2}}, {"chokeberry": {"black_sesame_seed": 5}}, {"rice_husk": {"black_sesame_seed": 1}}, {"goat_cheese": {"black_sesame_seed": 1}}, {"finocchoi_fennel_oil": {"black_sesame_seed": 1}}, {"thai_pepper": {"black_sesame_seed": 5}}, {"sauvignon_grape": {"black_sesame_seed": 3}}, {"rose_apple": {"black_sesame_seed": 3}}, {"sour_cherry": {"black_sesame_seed": 2}}, {"crisp_bread": {"black_sesame_seed": 3}}, {"pepper": {"black_sesame_seed": 5}}, {"corn_mint": {"black_sesame_seed": 5}}, {"dried_kidney_bean": {"black_sesame_seed": 4}}, {"origanum_floribundum": {"black_sesame_seed": 1}}, {"hinoki_oil": {"black_sesame_seed": 1}}, {"prickly_pear": {"black_sesame_seed": 2}}, {"porcini": {"black_sesame_seed": 1}}, {"palm": {"black_sesame_seed": 1}}, {"cumin_fruit_oil": {"black_sesame_seed": 1}}, {"raspberry": {"black_sesame_seed": 7}}, {"pimento": {"black_sesame_seed": 2}}, {"fermented_russian_black_tea": {"black_sesame_seed": 7}}, {"ceylon_citronella": {"black_sesame_seed": 1}}, {"hinoki": {"black_sesame_seed": 1}}, {"eucalyptus": {"black_sesame_seed": 6}}, {"tarragon": {"black_sesame_seed": 1}}, {"mantis_shrimp": {"black_sesame_seed": 1}}, {"citrus_peel": {"black_sesame_seed": 7}}, {"grilled_pork": {"black_sesame_seed": 1}}, {"green_bell_pepper": {"black_sesame_seed": 5}}, {"peru_balsam": {"black_sesame_seed": 2}}, {"elderberry_fruit": {"black_sesame_seed": 2}}, {"cranberry": {"black_sesame_seed": 7}}, {"red_currant": {"black_sesame_seed": 5}}, {"orange_peel": {"black_sesame_seed": 2}}, {"raw_bean": {"black_sesame_seed": 4}}, {"corn": {"black_sesame_seed": 3}}, {"galanga": {"black_sesame_seed": 1}}, {"lima_bean": {"black_sesame_seed": 4}}, {"brewed_tea": {"black_sesame_seed": 7}}, {"feta_cheese": {"black_sesame_seed": 1}}, {"butter": {"black_sesame_seed": 3}}, {"oregano_oil": {"black_sesame_seed": 1}}, {"orthodon_citraliferum": {"black_sesame_seed": 1}}, {"satureia_thymera": {"black_sesame_seed": 1}}, {"sweetfish": {"black_sesame_seed": 3}}, {"prunus": {"black_sesame_seed": 2}}, {"turmeric": {"black_sesame_seed": 4}}, {"perovski_abrotanoides_oil": {"black_sesame_seed": 1}}, {"clove_oil": {"black_sesame_seed": 2}}, {"litchi": {"black_sesame_seed": 3}}, {"kelp": {"black_sesame_seed": 1}}, {"tahiti_vanilla": {"black_sesame_seed": 2}}, {"feijoa": {"black_sesame_seed": 4}}, {"globefish": {"black_sesame_seed": 3}}, {"caraway": {"black_sesame_seed": 2}}, {"japanese_peppermint_oil": {"black_sesame_seed": 1}}, {"lovage": {"black_sesame_seed": 7}}, {"dill": {"black_sesame_seed": 9}}, {"mackerel": {"black_sesame_seed": 3}}, {"mexican_lime": {"black_sesame_seed": 5}}, {"pecan": {"black_sesame_seed": 3}}, {"mushroom": {"black_sesame_seed": 1}}, {"lovage_leaf": {"black_sesame_seed": 1}}, {"eel": {"black_sesame_seed": 3}}, {"cognac": {"black_sesame_seed": 5}}, {"fried_beef": {"black_sesame_seed": 2}}, {"red_bean": {"black_sesame_seed": 4}}, {"star_anise": {"black_sesame_seed": 1}}, {"citrus_juice": {"black_sesame_seed": 2}}, {"neroli_bigarade": {"black_sesame_seed": 1}}, {"hop_oil": {"black_sesame_seed": 3}}, {"roasted_chicken": {"black_sesame_seed": 1}}, {"blue_cheese": {"black_sesame_seed": 1}}, {"pouching_tea": {"black_sesame_seed": 7}}, {"domiati_cheese": {"black_sesame_seed": 1}}, {"callitris": {"black_sesame_seed": 1}}, {"roasted_green_tea": {"black_sesame_seed": 7}}, {"cherimoya": {"black_sesame_seed": 1}}, {"elder_flower": {"black_sesame_seed": 1}}, {"guava": {"black_sesame_seed": 6}}, {"lime_peel_oil": {"black_sesame_seed": 3}}, {"matsutake": {"black_sesame_seed": 1}}, {"olive": {"black_sesame_seed": 2}}, {"clove": {"black_sesame_seed": 5}}, {"ceylon_tea_cinnamon_leaf": {"black_sesame_seed": 1}}, {"sperm_whale_oil": {"black_sesame_seed": 1}}, {"california_orange_peel": {"black_sesame_seed": 2}}, {"rye_bread": {"black_sesame_seed": 3}}, {"citrus": {"black_sesame_seed": 4}}, {"mozzarella_cheese": {"black_sesame_seed": 1}}, {"petitgrain": {"black_sesame_seed": 1}}, {"boiled_chicken": {"black_sesame_seed": 1}}, {"roasted_turkey": {"black_sesame_seed": 1}}, {"dill_seed": {"black_sesame_seed": 17}}, {"mandarin": {"black_sesame_seed": 4}}, {"scallop": {"black_sesame_seed": 1}}, {"corn_oil": {"black_sesame_seed": 2}}, {"carrot": {"black_sesame_seed": 4}}, {"eucalyptus_globulus_oil": {"black_sesame_seed": 1}}, {"white_bread": {"black_sesame_seed": 1}}, {"java_citronella_oil": {"black_sesame_seed": 2}}, {"rosemary": {"black_sesame_seed": 5}}, {"tamarind": {"black_sesame_seed": 4}}, {"scotch_spearmint": {"black_sesame_seed": 4}}, {"rabbiteye_blueberry": {"black_sesame_seed": 5}}, {"fennel_oil": {"black_sesame_seed": 1}}, {"tilsit_cheese": {"black_sesame_seed": 1}}, {"squid": {"black_sesame_seed": 1}}, {"cardamom": {"black_sesame_seed": 7}}, {"tea": {"black_sesame_seed": 7}}, {"pilchard": {"black_sesame_seed": 3}}, {"starfruit": {"black_sesame_seed": 4}}, {"wild_strawberry": {"black_sesame_seed": 5}}, {"malt": {"black_sesame_seed": 3}}, {"tomato": {"black_sesame_seed": 3}}, {"lemon": {"black_sesame_seed": 5}}, {"loquat": {"black_sesame_seed": 3}}, {"roquefort_cheese": {"black_sesame_seed": 1}}, {"mentha_silvestris_oil": {"black_sesame_seed": 1}}, {"palm_fruit": {"black_sesame_seed": 2}}, {"mandarin_peel_oil": {"black_sesame_seed": 3}}, {"fig": {"black_sesame_seed": 2}}, {"kola_tea": {"black_sesame_seed": 7}}, {"japanese_peppermint": {"black_sesame_seed": 7}}, {"caja_fruit": {"black_sesame_seed": 2}}, {"watercress": {"black_sesame_seed": 1}}, {"hog_plum": {"black_sesame_seed": 2}}, {"buckwheat": {"black_sesame_seed": 2}}, {"red_wine": {"black_sesame_seed": 3}}, {"botrytized_wine": {"black_sesame_seed": 3}}, {"ethiopian_pepper": {"black_sesame_seed": 5}}, {"smoked_salmon": {"black_sesame_seed": 3}}, {"lamb": {"black_sesame_seed": 2}}, {"mace": {"black_sesame_seed": 6}}, {"echinacea": {"black_sesame_seed": 1}}, {"cottage_cheese": {"black_sesame_seed": 1}}]} diff --git a/examples/quickstart.rs b/examples/quickstart.rs index d82c443..a33efb7 100644 --- a/examples/quickstart.rs +++ b/examples/quickstart.rs @@ -1,4 +1,4 @@ -use simdjson_rust::{ondemand, prelude::*, Result}; +use simdjson_rust::{Result, ondemand, prelude::*}; fn main() -> Result<()> { let ps = load_padded_string("simdjson-sys/simdjson/jsonexamples/twitter.json")?; diff --git a/simdjson-sys/Cargo.toml b/simdjson-sys/Cargo.toml index 63efe30..b83a15f 100644 --- a/simdjson-sys/Cargo.toml +++ b/simdjson-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "simdjson-sys" -version = "0.1.0-alpha.2" +version = "0.2.0-alpha" edition = "2021" authors = ["SunDoge <384813529@qq.com>"] license = "Apache-2.0" @@ -16,5 +16,5 @@ exclude = ["simdjson/", "!simdjson/singleheader/simdjson.*"] [dependencies] [build-dependencies] -bindgen = "0.66.1" -cc = { version = "1.0.83", features = ["parallel"] } +bindgen = "0.72.1" +cc = { version = "1.2.56", features = ["parallel"] } diff --git a/simdjson-sys/build.rs b/simdjson-sys/build.rs index a9c00cb..399dd6d 100644 --- a/simdjson-sys/build.rs +++ b/simdjson-sys/build.rs @@ -3,7 +3,7 @@ use std::{env, path::PathBuf}; fn main() { cc::Build::new() .cpp(true) - .flag_if_supported("-std=c++17") + .flag_if_supported("-std=c++20") .flag_if_supported("/std:c++20") // error C7555: use of designated initializers requires at least '/std:c++20' .flag_if_supported("-pthread") .flag_if_supported("-O3") @@ -22,7 +22,7 @@ fn main() { .header("src/simdjson_c_api.h") // Tell cargo to invalidate the built crate whenever any of the // included header files changed. - .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) // Finish the builder and generate the bindings. .generate() // Unwrap the Result and panic on failure. diff --git a/simdjson-sys/simdjson b/simdjson-sys/simdjson index bf849e3..980f2ad 160000 --- a/simdjson-sys/simdjson +++ b/simdjson-sys/simdjson @@ -1 +1 @@ -Subproject commit bf849e36191d4cf2442f4af57a794103df3e2979 +Subproject commit 980f2ad3afb12729157b44ed33d2bac41b67b54b diff --git a/simdjson-sys/src/simdjson_c_api.cpp b/simdjson-sys/src/simdjson_c_api.cpp index 09c974d..28a5f05 100644 --- a/simdjson-sys/src/simdjson_c_api.cpp +++ b/simdjson-sys/src/simdjson_c_api.cpp @@ -1,5 +1,5 @@ -#include "simdjson_c_api.h" #include "simdjson.h" +#include "simdjson_c_api.h" #include #include #include @@ -517,3 +517,89 @@ bool SJ_DOM_document_stream_iterator_not_equal( SJ_DOM_document_stream_iterator *rhs) { return *cast_to_type(lhs) != *cast_to_type(rhs); } + +// builder::string_builder +IMPL_HANDLE(SJ_string_builder, simdjson::builder::string_builder) + +SJ_string_builder *SJ_string_builder_new(size_t initial_capacity) { + return object_to_pointer( + simdjson::builder::string_builder(initial_capacity)); +} + +void SJ_string_builder_clear(SJ_string_builder *sb) { + cast_to_type(sb)->clear(); +} + +void SJ_string_builder_append_bool(SJ_string_builder *sb, bool v) { + cast_to_type(sb)->append(v); +} + +void SJ_string_builder_append_int64(SJ_string_builder *sb, int64_t v) { + cast_to_type(sb)->append(v); +} + +void SJ_string_builder_append_uint64(SJ_string_builder *sb, uint64_t v) { + cast_to_type(sb)->append(v); +} + +void SJ_string_builder_append_double(SJ_string_builder *sb, double v) { + cast_to_type(sb)->append(v); +} + +void SJ_string_builder_append_null(SJ_string_builder *sb) { + cast_to_type(sb)->append_null(); +} + +void SJ_string_builder_append_char(SJ_string_builder *sb, char c) { + cast_to_type(sb)->append(c); +} + +void SJ_string_builder_escape_and_append_with_quotes(SJ_string_builder *sb, + const char *str, + size_t len) { + cast_to_type(sb)->escape_and_append_with_quotes(std::string_view(str, len)); +} + +void SJ_string_builder_start_object(SJ_string_builder *sb) { + cast_to_type(sb)->start_object(); +} + +void SJ_string_builder_end_object(SJ_string_builder *sb) { + cast_to_type(sb)->end_object(); +} + +void SJ_string_builder_start_array(SJ_string_builder *sb) { + cast_to_type(sb)->start_array(); +} + +void SJ_string_builder_end_array(SJ_string_builder *sb) { + cast_to_type(sb)->end_array(); +} + +void SJ_string_builder_append_comma(SJ_string_builder *sb) { + cast_to_type(sb)->append_comma(); +} + +void SJ_string_builder_append_colon(SJ_string_builder *sb) { + cast_to_type(sb)->append_colon(); +} + +void SJ_string_builder_append_raw(SJ_string_builder *sb, const char *str, + size_t len) { + cast_to_type(sb)->append_raw(str, len); +} + +SJ_string_view_result SJ_string_builder_view(const SJ_string_builder *sb) { + std::string_view sv; + const error_code error = + cast_to_type(const_cast(sb))->view().get(sv); + return {static_cast(error), {.data = sv.data(), .len = sv.size()}}; +} + +bool SJ_string_builder_validate_unicode(const SJ_string_builder *sb) { + return cast_to_type(const_cast(sb))->validate_unicode(); +} + +size_t SJ_string_builder_size(const SJ_string_builder *sb) { + return cast_to_type(const_cast(sb))->size(); +} diff --git a/simdjson-sys/src/simdjson_c_api.h b/simdjson-sys/src/simdjson_c_api.h index fb0e1a7..466421f 100644 --- a/simdjson-sys/src/simdjson_c_api.h +++ b/simdjson-sys/src/simdjson_c_api.h @@ -233,9 +233,9 @@ SJ_DOM_parser *SJ_DOM_parser_new(size_t max_capacity); SJ_DOM_element_result SJ_DOM_parser_parse(SJ_DOM_parser *parser, const char *json, size_t len); SJ_DOM_element_result SJ_DOM_parser_parse_into_document(SJ_DOM_parser *parser, - SJ_DOM_document *doc, - const char *json, - size_t len); + SJ_DOM_document *doc, + const char *json, + size_t len); SJ_DOM_document_stream_result SJ_DOM_parser_parse_many(SJ_DOM_parser *parser, const char *json, size_t len, @@ -297,6 +297,32 @@ DEFINE_GET_V2(SJ_DOM_document_stream_iterator, void, step) bool SJ_DOM_document_stream_iterator_not_equal( SJ_DOM_document_stream_iterator *lhs, SJ_DOM_document_stream_iterator *rhs); +// builder::string_builder +DEFINE_HANDLE(SJ_string_builder) + +SJ_string_builder *SJ_string_builder_new(size_t initial_capacity); +void SJ_string_builder_clear(SJ_string_builder *sb); +void SJ_string_builder_append_bool(SJ_string_builder *sb, bool v); +void SJ_string_builder_append_int64(SJ_string_builder *sb, int64_t v); +void SJ_string_builder_append_uint64(SJ_string_builder *sb, uint64_t v); +void SJ_string_builder_append_double(SJ_string_builder *sb, double v); +void SJ_string_builder_append_null(SJ_string_builder *sb); +void SJ_string_builder_append_char(SJ_string_builder *sb, char c); +void SJ_string_builder_escape_and_append_with_quotes(SJ_string_builder *sb, + const char *str, + size_t len); +void SJ_string_builder_start_object(SJ_string_builder *sb); +void SJ_string_builder_end_object(SJ_string_builder *sb); +void SJ_string_builder_start_array(SJ_string_builder *sb); +void SJ_string_builder_end_array(SJ_string_builder *sb); +void SJ_string_builder_append_comma(SJ_string_builder *sb); +void SJ_string_builder_append_colon(SJ_string_builder *sb); +void SJ_string_builder_append_raw(SJ_string_builder *sb, const char *str, + size_t len); +SJ_string_view_result SJ_string_builder_view(const SJ_string_builder *sb); +bool SJ_string_builder_validate_unicode(const SJ_string_builder *sb); +size_t SJ_string_builder_size(const SJ_string_builder *sb); + #ifdef __cplusplus } #endif diff --git a/src/builder.rs b/src/builder.rs new file mode 100644 index 0000000..e45ce5b --- /dev/null +++ b/src/builder.rs @@ -0,0 +1,160 @@ +use std::ptr::NonNull; + +use simdjson_sys as ffi; + +use crate::{Result, SimdJsonError}; + +const DEFAULT_INITIAL_CAPACITY: usize = 1024; + +/// A high-performance JSON string builder backed by simdjson's SIMD-accelerated +/// string_builder. Provides efficient JSON serialization with automatic string +/// escaping and structural token management. +pub struct StringBuilder { + ptr: NonNull, +} + +impl StringBuilder { + /// Creates a new StringBuilder with the default initial capacity (1KB). + pub fn new() -> Self { + Self::with_capacity(DEFAULT_INITIAL_CAPACITY) + } + + /// Creates a new StringBuilder with the specified initial capacity in bytes. + pub fn with_capacity(capacity: usize) -> Self { + let ptr = unsafe { NonNull::new_unchecked(ffi::SJ_string_builder_new(capacity)) }; + Self { ptr } + } + + /// Clears the contents of the builder, resetting it to empty while retaining + /// the allocated capacity. + pub fn clear(&mut self) { + unsafe { ffi::SJ_string_builder_clear(self.ptr.as_ptr()) } + } + + /// Appends a boolean value (`true` or `false`). + pub fn append_bool(&mut self, v: bool) { + unsafe { ffi::SJ_string_builder_append_bool(self.ptr.as_ptr(), v) } + } + + /// Appends a signed 64-bit integer. + pub fn append_i64(&mut self, v: i64) { + unsafe { ffi::SJ_string_builder_append_int64(self.ptr.as_ptr(), v) } + } + + /// Appends an unsigned 64-bit integer. + pub fn append_u64(&mut self, v: u64) { + unsafe { ffi::SJ_string_builder_append_uint64(self.ptr.as_ptr(), v) } + } + + /// Appends a 64-bit floating-point number. + pub fn append_f64(&mut self, v: f64) { + unsafe { ffi::SJ_string_builder_append_double(self.ptr.as_ptr(), v) } + } + + /// Appends the JSON `null` literal. + pub fn append_null(&mut self) { + unsafe { ffi::SJ_string_builder_append_null(self.ptr.as_ptr()) } + } + + /// Appends a single character (unescaped). + pub fn append_char(&mut self, c: char) { + unsafe { ffi::SJ_string_builder_append_char(self.ptr.as_ptr(), c as i8) } + } + + /// Appends a string with JSON escaping and surrounding double quotes. + /// This uses SIMD-accelerated escaping for performance. + pub fn append_string(&mut self, s: &str) { + unsafe { + ffi::SJ_string_builder_escape_and_append_with_quotes( + self.ptr.as_ptr(), + s.as_ptr().cast(), + s.len(), + ) + } + } + + /// Appends the opening brace `{` for a JSON object. + pub fn start_object(&mut self) { + unsafe { ffi::SJ_string_builder_start_object(self.ptr.as_ptr()) } + } + + /// Appends the closing brace `}` for a JSON object. + pub fn end_object(&mut self) { + unsafe { ffi::SJ_string_builder_end_object(self.ptr.as_ptr()) } + } + + /// Appends the opening bracket `[` for a JSON array. + pub fn start_array(&mut self) { + unsafe { ffi::SJ_string_builder_start_array(self.ptr.as_ptr()) } + } + + /// Appends the closing bracket `]` for a JSON array. + pub fn end_array(&mut self) { + unsafe { ffi::SJ_string_builder_end_array(self.ptr.as_ptr()) } + } + + /// Appends a comma `,` separator. + pub fn append_comma(&mut self) { + unsafe { ffi::SJ_string_builder_append_comma(self.ptr.as_ptr()) } + } + + /// Appends a colon `:` separator (for object key-value pairs). + pub fn append_colon(&mut self) { + unsafe { ffi::SJ_string_builder_append_colon(self.ptr.as_ptr()) } + } + + /// Appends raw bytes without escaping. Use with caution — the caller must + /// ensure the content is valid JSON. + pub fn append_raw(&mut self, s: &str) { + unsafe { ffi::SJ_string_builder_append_raw(self.ptr.as_ptr(), s.as_ptr().cast(), s.len()) } + } + + /// Returns a view of the written JSON buffer as a string slice. + pub fn view(&self) -> Result<&str> { + unsafe { + let result = ffi::SJ_string_builder_view(self.ptr.as_ptr()); + if result.error != 0 { + return Err(SimdJsonError::from(result.error)); + } + let slice = std::slice::from_raw_parts(result.value.data.cast(), result.value.len); + Ok(std::str::from_utf8_unchecked(slice)) + } + } + + /// Validates that the content is valid UTF-8. + pub fn validate_unicode(&self) -> bool { + unsafe { ffi::SJ_string_builder_validate_unicode(self.ptr.as_ptr()) } + } + + /// Returns the current size of the written JSON buffer in bytes. + pub fn size(&self) -> usize { + unsafe { ffi::SJ_string_builder_size(self.ptr.as_ptr()) } + } + + /// Consumes the builder and returns the JSON string. + pub fn into_string(self) -> Result { + let s = self.view()?.to_owned(); + Ok(s) + } +} + +impl Default for StringBuilder { + fn default() -> Self { + Self::new() + } +} + +impl Drop for StringBuilder { + fn drop(&mut self) { + unsafe { ffi::SJ_string_builder_free(self.ptr.as_ptr()) } + } +} + +impl std::fmt::Display for StringBuilder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.view() { + Ok(s) => write!(f, "{s}"), + Err(e) => write!(f, ""), + } + } +} diff --git a/src/dom/array.rs b/src/dom/array.rs index 4cc50b3..46d4f7c 100644 --- a/src/dom/array.rs +++ b/src/dom/array.rs @@ -4,8 +4,8 @@ use simdjson_sys as ffi; use super::{document::Document, element::Element}; use crate::{ - macros::{impl_drop, map_ptr_result}, Result, + macros::{impl_drop, map_ptr_result}, }; pub struct Array<'a> { @@ -21,7 +21,7 @@ impl<'a> Array<'a> { } } - pub fn iter(&self) -> ArrayIter { + pub fn iter(&self) -> ArrayIter<'_> { let begin = unsafe { NonNull::new_unchecked(ffi::SJ_DOM_array_begin(self.ptr.as_ptr())) }; let end = unsafe { NonNull::new_unchecked(ffi::SJ_DOM_array_end(self.ptr.as_ptr())) }; ArrayIter::new(begin, end) diff --git a/src/dom/document_stream.rs b/src/dom/document_stream.rs index b326847..a920e2c 100644 --- a/src/dom/document_stream.rs +++ b/src/dom/document_stream.rs @@ -4,8 +4,8 @@ use simdjson_sys as ffi; use super::Element; use crate::{ - macros::{impl_drop, map_ptr_result}, Result, + macros::{impl_drop, map_ptr_result}, }; pub struct DocumentStream { @@ -17,7 +17,7 @@ impl DocumentStream { Self { ptr } } - pub fn iter(&self) -> DocumentStreamIter { + pub fn iter(&self) -> DocumentStreamIter<'_> { let begin = unsafe { NonNull::new_unchecked(ffi::SJ_DOM_document_stream_begin(self.ptr.as_ptr())) }; let end = diff --git a/src/dom/element.rs b/src/dom/element.rs index 15beade..2d8dd47 100644 --- a/src/dom/element.rs +++ b/src/dom/element.rs @@ -4,20 +4,20 @@ use simdjson_sys as ffi; use super::{array::Array, document::Document, object::Object}; use crate::{ + Result, macros::{impl_drop, map_primitive_result, map_ptr_result}, utils::string_view_struct_to_str, - Result, }; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ElementType { - Array = '[' as _, - Object = '{' as _, - Int64 = 'l' as _, - UInt64 = 'u' as _, - Double = 'd' as _, - String = '"' as _, - Bool = 't' as _, + Array = '[' as _, + Object = '{' as _, + Int64 = 'l' as _, + UInt64 = 'u' as _, + Double = 'd' as _, + String = '"' as _, + Bool = 't' as _, NullValue = 'n' as _, } @@ -54,11 +54,11 @@ impl<'a> Element<'a> { unsafe { ElementType::from(ffi::SJ_DOM_element_type(self.ptr.as_ptr())) } } - pub fn get_array(&self) -> Result { + pub fn get_array(&self) -> Result> { map_ptr_result!(ffi::SJ_DOM_element_get_array(self.ptr.as_ptr())).map(Array::new) } - pub fn get_object(&self) -> Result { + pub fn get_object(&self) -> Result> { map_ptr_result!(ffi::SJ_DOM_element_get_object(self.ptr.as_ptr())).map(Object::new) } @@ -83,7 +83,11 @@ impl<'a> Element<'a> { map_primitive_result!(ffi::SJ_DOM_element_get_bool(self.ptr.as_ptr())) } - pub fn at_pointer(&self, json_pointer: &str) -> Result { + pub fn is_null(&self) -> bool { + self.get_type() == ElementType::NullValue + } + + pub fn at_pointer(&self, json_pointer: &str) -> Result> { map_ptr_result!(ffi::SJ_DOM_element_at_pointer( self.ptr.as_ptr(), json_pointer.as_ptr().cast(), @@ -94,3 +98,13 @@ impl<'a> Element<'a> { } impl_drop!(Element<'a>, ffi::SJ_DOM_element_free); + +#[cfg(feature = "serde_impl")] +impl std::fmt::Display for Element<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match crate::serde::value::element_to_value(self) { + Ok(value) => write!(f, "{value}"), + Err(e) => write!(f, ""), + } + } +} diff --git a/src/dom/object.rs b/src/dom/object.rs index aa25591..a34d8ef 100644 --- a/src/dom/object.rs +++ b/src/dom/object.rs @@ -2,7 +2,7 @@ use std::{marker::PhantomData, ptr::NonNull}; use simdjson_sys as ffi; -use super::{document::Document, Element}; +use super::{Element, document::Document}; use crate::{macros::impl_drop, utils::string_view_struct_to_str}; pub struct Object<'a> { @@ -18,7 +18,7 @@ impl<'a> Object<'a> { } } - pub fn iter(&self) -> ObjectIter { + pub fn iter(&self) -> ObjectIter<'_> { let begin = unsafe { NonNull::new_unchecked(ffi::SJ_DOM_object_begin(self.ptr.as_ptr())) }; let end = unsafe { NonNull::new_unchecked(ffi::SJ_DOM_object_end(self.ptr.as_ptr())) }; ObjectIter::new(begin, end) diff --git a/src/dom/parser.rs b/src/dom/parser.rs index 8b8baa0..3f07ae5 100644 --- a/src/dom/parser.rs +++ b/src/dom/parser.rs @@ -5,8 +5,8 @@ use simdjson_sys as ffi; use super::{document::Document, document_stream::DocumentStream, element::Element}; use crate::{ - macros::{impl_drop, map_ptr_result}, Result, + macros::{impl_drop, map_ptr_result}, }; pub struct Parser { @@ -25,7 +25,7 @@ impl Parser { Self { ptr } } - pub fn parse(&mut self, padded_string: &String) -> Result { + pub fn parse(&mut self, padded_string: &str) -> Result> { map_ptr_result!(ffi::SJ_DOM_parser_parse( self.ptr.as_ptr(), padded_string.as_ptr().cast(), @@ -37,7 +37,7 @@ impl Parser { pub fn parse_into_document<'d>( &self, doc: &'d mut Document, - padded_string: &String, + padded_string: &str, ) -> Result> { map_ptr_result!(ffi::SJ_DOM_parser_parse_into_document( self.ptr.as_ptr(), @@ -48,13 +48,13 @@ impl Parser { .map(Element::new) } - pub fn parse_many(&mut self, padded_string: &String) -> Result { + pub fn parse_many(&mut self, padded_string: &str) -> Result { self.parse_batch(padded_string, DEFAULT_BATCH_SIZE) } pub fn parse_batch( &mut self, - padded_string: &String, + padded_string: &str, batch_size: usize, ) -> Result { map_ptr_result!(ffi::SJ_DOM_parser_parse_many( diff --git a/src/error.rs b/src/error.rs index 3fdba0e..2455ed9 100644 --- a/src/error.rs +++ b/src/error.rs @@ -102,6 +102,10 @@ pub enum SimdJsonError { #[error("todo")] StdIoError(#[from] std::io::Error), + + #[cfg(feature = "serde_impl")] + #[error("serde: {0}")] + Serde(String), } impl From for SimdJsonError { @@ -142,3 +146,17 @@ impl From for SimdJsonError { } } } + +#[cfg(feature = "serde_impl")] +impl serde::de::Error for SimdJsonError { + fn custom(msg: T) -> Self { + SimdJsonError::Serde(msg.to_string()) + } +} + +#[cfg(feature = "serde_impl")] +impl serde::ser::Error for SimdJsonError { + fn custom(msg: T) -> Self { + SimdJsonError::Serde(msg.to_string()) + } +} diff --git a/src/lib.rs b/src/lib.rs index bdf072e..e809adb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ mod macros; +pub mod builder; pub mod dom; mod error; pub mod ondemand; @@ -10,7 +11,8 @@ mod utils; pub use error::{Result, SimdJsonError}; pub use simdjson_sys::{SIMDJSON_MAXSIZE_BYTES, SIMDJSON_PADDING}; -// pub mod serde; +#[cfg(feature = "serde_impl")] +pub mod serde; #[cfg(test)] mod tests {} diff --git a/src/ondemand/array.rs b/src/ondemand/array.rs index 6afafc8..c49c139 100644 --- a/src/ondemand/array.rs +++ b/src/ondemand/array.rs @@ -67,7 +67,7 @@ impl<'a> Array<'a> { ) } - pub fn iter(&mut self) -> Result { + pub fn iter(&mut self) -> Result> { let begin = map_result!( ffi::SJ_OD_array_begin(self.ptr.as_mut()), ffi::SJ_OD_array_iterator_result_error, diff --git a/src/ondemand/document.rs b/src/ondemand/document.rs index 9953d87..55fc74f 100644 --- a/src/ondemand/document.rs +++ b/src/ondemand/document.rs @@ -2,7 +2,7 @@ use std::{marker::PhantomData, ptr::NonNull}; use simdjson_sys as ffi; -use super::{array::Array, number::Number, object::Object, parser::Parser, value::Value, JsonType}; +use super::{JsonType, array::Array, number::Number, object::Object, parser::Parser, value::Value}; use crate::{ error::Result, macros::{impl_drop, map_result}, diff --git a/src/ondemand/value.rs b/src/ondemand/value.rs index f9959df..4ed029c 100644 --- a/src/ondemand/value.rs +++ b/src/ondemand/value.rs @@ -2,7 +2,7 @@ use std::{marker::PhantomData, ptr::NonNull}; use simdjson_sys as ffi; -use super::{array::Array, document::Document, number::Number, object::Object, JsonType}; +use super::{JsonType, array::Array, document::Document, number::Number, object::Object}; use crate::{ error::Result, macros::{impl_drop, map_result}, diff --git a/src/prelude.rs b/src/prelude.rs index f519ca8..09325a2 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -1,3 +1,3 @@ pub use crate::padded_string::{ - load_padded_string, make_padded_string, IntoPaddedString, ToPaddedString, + IntoPaddedString, ToPaddedString, load_padded_string, make_padded_string, }; diff --git a/src/serde/de.rs b/src/serde/de.rs index 3f4a130..9920583 100644 --- a/src/serde/de.rs +++ b/src/serde/de.rs @@ -1,25 +1,19 @@ -use crate::dom::array::ArrayIter; -use crate::dom::element::{Element, ElementType}; -use crate::dom::object::ObjectIter; - +use crate::dom::{ArrayIter, Element, ElementType, ObjectIter}; use crate::error::SimdJsonError; -use crate::libsimdjson::ffi; + use serde::de::{ - Deserialize, DeserializeSeed, Deserializer, IntoDeserializer, MapAccess, SeqAccess, Visitor, + Deserialize, DeserializeSeed, Deserializer, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, + VariantAccess, Visitor, }; -// pub struct ElementVisitor; - -// impl<'de> Visitor for ElementVisitor { - -// } +fn de_error(msg: &str) -> SimdJsonError { + SimdJsonError::Serde(msg.to_owned()) +} pub fn from_element<'a, T>(element: &'a Element<'a>) -> Result where T: Deserialize<'a>, { - // let mut parser = Parser::default(); - // let mut doc = parser.parse_str(s)?; let t = T::deserialize(element)?; Ok(t) } @@ -34,8 +28,8 @@ impl<'de, 'a> Deserializer<'de> for &'a Element<'a> { match self.get_type() { ElementType::NullValue => self.deserialize_unit(visitor), ElementType::Bool => self.deserialize_bool(visitor), - ElementType::String => self.deserialize_str(visitor), - ElementType::Uint64 => self.deserialize_u64(visitor), + ElementType::String => self.deserialize_string(visitor), + ElementType::UInt64 => self.deserialize_u64(visitor), ElementType::Int64 => self.deserialize_i64(visitor), ElementType::Array => self.deserialize_seq(visitor), ElementType::Object => self.deserialize_map(visitor), @@ -61,124 +55,132 @@ impl<'de, 'a> Deserializer<'de> for &'a Element<'a> { where V: Visitor<'de>, { - visitor.visit_i8(self.get_i64()? as i8) + let v = self.get_int64()?; + let narrow = + i8::try_from(v).map_err(|_| de_error(&format!("i64 value {v} out of range for i8")))?; + visitor.visit_i8(narrow) } fn deserialize_i16(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_i16(self.get_i64()? as i16) + let v = self.get_int64()?; + let narrow = i16::try_from(v) + .map_err(|_| de_error(&format!("i64 value {v} out of range for i16")))?; + visitor.visit_i16(narrow) } fn deserialize_i32(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_i32(self.get_i64()? as i32) + let v = self.get_int64()?; + let narrow = i32::try_from(v) + .map_err(|_| de_error(&format!("i64 value {v} out of range for i32")))?; + visitor.visit_i32(narrow) } fn deserialize_i64(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_i64(self.get_i64()?) + visitor.visit_i64(self.get_int64()?) } fn deserialize_u8(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_u8(self.get_u64()? as u8) + let v = self.get_uint64()?; + let narrow = + u8::try_from(v).map_err(|_| de_error(&format!("u64 value {v} out of range for u8")))?; + visitor.visit_u8(narrow) } fn deserialize_u16(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_u16(self.get_u64()? as u16) + let v = self.get_uint64()?; + let narrow = u16::try_from(v) + .map_err(|_| de_error(&format!("u64 value {v} out of range for u16")))?; + visitor.visit_u16(narrow) } fn deserialize_u32(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_u32(self.get_u64()? as u32) + let v = self.get_uint64()?; + let narrow = u32::try_from(v) + .map_err(|_| de_error(&format!("u64 value {v} out of range for u32")))?; + visitor.visit_u32(narrow) } fn deserialize_u64(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_u64(self.get_u64()?) + visitor.visit_u64(self.get_uint64()?) } - // Float parsing is stupidly hard. fn deserialize_f32(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_f32(self.get_f64()? as f32) + visitor.visit_f32(self.get_double()? as f32) } - // Float parsing is stupidly hard. fn deserialize_f64(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_f64(self.get_f64()?) + visitor.visit_f64(self.get_double()?) } - // The `Serializer` implementation on the previous page serialized chars as - // single-character strings so handle that representation here. - fn deserialize_char(self, _visitor: V) -> Result + fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { - // Parse a string, check that it is one character, call `visit_char`. - unimplemented!() + let s = self.get_string()?; + let mut chars = s.chars(); + match (chars.next(), chars.next()) { + (Some(c), None) => visitor.visit_char(c), + _ => Err(de_error("expected a single character string")), + } } - // Refer to the "Understanding deserializer lifetimes" page for information - // about the three deserialization flavors of strings in Serde. - fn deserialize_str(self, _visitor: V) -> Result + fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'de>, { - unimplemented!() + let s = self.get_string()?; + visitor.visit_str(s) } fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_string(self.get_string()?) + let s = self.get_string()?; + visitor.visit_string(s.to_owned()) } - // The `Serializer` implementation on the previous page serialized byte - // arrays as JSON arrays of bytes. Handle that representation here. - fn deserialize_bytes(self, _visitor: V) -> Result + fn deserialize_bytes(self, visitor: V) -> Result where V: Visitor<'de>, { - unimplemented!() + self.deserialize_seq(visitor) } - fn deserialize_byte_buf(self, _visitor: V) -> Result + fn deserialize_byte_buf(self, visitor: V) -> Result where V: Visitor<'de>, { - unimplemented!() + self.deserialize_seq(visitor) } - // An absent optional is represented as the JSON `null` and a present - // optional is represented as just the contained value. - // - // As commented in `Serializer` implementation, this is a lossy - // representation. For example the values `Some(())` and `None` both - // serialize as just `null`. Unfortunately this is typically what people - // expect when working with JSON. Other formats are encouraged to behave - // more intelligently if possible. fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, @@ -190,7 +192,6 @@ impl<'de, 'a> Deserializer<'de> for &'a Element<'a> { } } - // Unit struct means a named value containing no data. fn deserialize_unit_struct( self, _name: &'static str, @@ -202,9 +203,6 @@ impl<'de, 'a> Deserializer<'de> for &'a Element<'a> { self.deserialize_unit(visitor) } - // As is done here, serializers are encouraged to treat newtype structs as - // insignificant wrappers around the data they contain. That means not - // parsing anything other than the contained value. fn deserialize_newtype_struct( self, _name: &'static str, @@ -216,26 +214,15 @@ impl<'de, 'a> Deserializer<'de> for &'a Element<'a> { visitor.visit_newtype_struct(self) } - // Deserialization of compound types like sequences and maps happens by - // passing the visitor an "Access" object that gives it the ability to - // iterate through the data contained in the sequence. fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { - // Parse the opening bracket of the sequence. - // de::Deserializer::deserialize_seq(self.into_iter(), visitor) - // unimplemented!() - let value = visitor.visit_seq(ArrayIter::new(&self.get_array()?))?; - Ok(value) + let array = self.get_array()?; + let iter = array.iter(); + visitor.visit_seq(SeqAccessor(iter)) } - // Tuples look just like sequences in JSON. Some formats may be able to - // represent tuples more efficiently. - // - // As indicated by the length parameter, the `Deserialize` implementation - // for a tuple in the Serde data model is required to know the length of the - // tuple before even looking at the input data. fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, @@ -243,7 +230,6 @@ impl<'de, 'a> Deserializer<'de> for &'a Element<'a> { self.deserialize_seq(visitor) } - // Tuple structs look just like sequences in JSON. fn deserialize_tuple_struct( self, _name: &'static str, @@ -256,22 +242,15 @@ impl<'de, 'a> Deserializer<'de> for &'a Element<'a> { self.deserialize_seq(visitor) } - // Much like `deserialize_seq` but calls the visitors `visit_map` method - // with a `MapAccess` implementation, rather than the visitor's `visit_seq` - // method with a `SeqAccess` implementation. fn deserialize_map(self, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_map(ObjectIter::new(&self.get_object()?)) + let object = self.get_object()?; + let iter = object.iter(); + visitor.visit_map(MapAccessor::new(iter)) } - // Structs look just like maps in JSON. - // - // Notice the `fields` parameter - a "struct" in the Serde data model means - // that the `Deserialize` implementation is required to know what the fields - // are before even looking at the input data. Any key-value pairing in which - // the fields cannot be known ahead of time is probably a map. fn deserialize_struct( self, _name: &'static str, @@ -288,18 +267,38 @@ impl<'de, 'a> Deserializer<'de> for &'a Element<'a> { self, _name: &'static str, _variants: &'static [&'static str], - _visitor: V, + visitor: V, ) -> Result where V: Visitor<'de>, { - unimplemented!() + match self.get_type() { + ElementType::String => { + let s = self.get_string()?; + let de: serde::de::value::StrDeserializer<'_, SimdJsonError> = + s.into_deserializer(); + visitor.visit_enum(de) + } + ElementType::Object => { + let object = self.get_object()?; + let mut iter = object.iter(); + let pair = iter.next(); + drop(iter); + match pair { + Some((variant, value)) => { + let variant_owned = String::from(variant); + visitor.visit_enum(EnumDeserializer { + variant: variant_owned, + value, + }) + } + None => Err(de_error("expected an object with a single key for enum")), + } + } + _ => Err(de_error("expected a string or object for enum")), + } } - // An identifier in Serde is the type that identifies a field of a struct or - // the variant of an enum. In JSON, struct fields and enum variants are - // represented as strings. In other formats they may be represented as - // numeric indices. fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, @@ -307,17 +306,6 @@ impl<'de, 'a> Deserializer<'de> for &'a Element<'a> { self.deserialize_str(visitor) } - // Like `deserialize_any` but indicates to the `Deserializer` that it makes - // no difference which `Visitor` method is called because the data is - // ignored. - // - // Some deserializers are able to implement this more efficiently than - // `deserialize_any`, for example by rapidly skipping over matched - // delimiters without paying close attention to the data in between. - // - // Some formats are not able to implement this at all. Formats that can - // implement `deserialize_any` and `deserialize_ignored_any` are known as - // self-describing. fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, @@ -326,32 +314,51 @@ impl<'de, 'a> Deserializer<'de> for &'a Element<'a> { } } -impl<'de, 'a> SeqAccess<'de> for ArrayIter<'a> { +struct SeqAccessor<'a>(ArrayIter<'a>); + +impl<'de, 'a> SeqAccess<'de> for SeqAccessor<'a> { type Error = SimdJsonError; fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> where T: DeserializeSeed<'de>, { - if let Some(element) = self.next() { - seed.deserialize(&element).map(Some) - } else { - Ok(None) + match self.0.next() { + Some(element) => seed.deserialize(&element).map(Some), + None => Ok(None), + } + } +} + +struct MapAccessor<'a> { + iter: ObjectIter<'a>, + pending_value: Option>, +} + +impl<'a> MapAccessor<'a> { + fn new(iter: ObjectIter<'a>) -> Self { + Self { + iter, + pending_value: None, } } } -impl<'de, 'a> MapAccess<'de> for ObjectIter<'a> { +impl<'de, 'a> MapAccess<'de> for MapAccessor<'a> { type Error = SimdJsonError; fn next_key_seed(&mut self, seed: K) -> Result, Self::Error> where K: DeserializeSeed<'de>, { - if self.has_next() { - seed.deserialize(self.key().into_deserializer()).map(Some) - } else { - Ok(None) + match self.iter.next() { + Some((key, value)) => { + self.pending_value = Some(value); + let de: serde::de::value::StringDeserializer = + String::from(key).into_deserializer(); + seed.deserialize(de).map(Some) + } + None => Ok(None), } } @@ -359,8 +366,66 @@ impl<'de, 'a> MapAccess<'de> for ObjectIter<'a> { where V: DeserializeSeed<'de>, { - let result = seed.deserialize(&self.value()); - ffi::object_iterator_next(self.ptr.pin_mut()); - result + match self.pending_value.take() { + Some(value) => seed.deserialize(&value), + None => Err(de_error("next_value_seed called before next_key_seed")), + } + } +} + +struct EnumDeserializer<'a> { + variant: String, + value: Element<'a>, +} + +impl<'de, 'a> EnumAccess<'de> for EnumDeserializer<'a> { + type Error = SimdJsonError; + type Variant = VariantDeserializer<'a>; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> + where + V: DeserializeSeed<'de>, + { + let de: serde::de::value::StringDeserializer = + self.variant.into_deserializer(); + let variant = seed.deserialize(de)?; + Ok((variant, VariantDeserializer(self.value))) + } +} + +struct VariantDeserializer<'a>(Element<'a>); + +impl<'de, 'a> VariantAccess<'de> for VariantDeserializer<'a> { + type Error = SimdJsonError; + + fn unit_variant(self) -> Result<(), Self::Error> { + Err(de_error( + "expected a string for unit variant, got an object key", + )) + } + + fn newtype_variant_seed(self, seed: T) -> Result + where + T: DeserializeSeed<'de>, + { + seed.deserialize(&self.0) + } + + fn tuple_variant(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + serde::Deserializer::deserialize_seq(&self.0, visitor) + } + + fn struct_variant( + self, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + serde::Deserializer::deserialize_map(&self.0, visitor) } } diff --git a/src/serde/mod.rs b/src/serde/mod.rs index dacf8af..2023854 100644 --- a/src/serde/mod.rs +++ b/src/serde/mod.rs @@ -1,29 +1,3 @@ pub mod de; - -#[cfg(test)] -mod tests { - use super::*; - // use super::element::GetValue; - use crate::dom::parser::Parser; - use serde::Deserialize; - - #[test] - fn test_element() -> Result<(), Box> { - let mut parser = Parser::default(); - let elm = parser.parse(r#"[true, false]"#)?; - println!("{}", elm); - let a: Vec = de::from_element(&elm)?; - assert_eq!(vec![true, false], a); - - #[derive(Debug, Deserialize)] - struct A { - field1: bool, - } - let elm = parser.parse(r#"{"field1": false}"#)?; - println!("{}", elm); - let a: A = de::from_element(&elm)?; - assert!(!a.field1); - - Ok(()) - } -} +pub mod ser; +pub mod value; diff --git a/src/serde/ser.rs b/src/serde/ser.rs new file mode 100644 index 0000000..65a7ca5 --- /dev/null +++ b/src/serde/ser.rs @@ -0,0 +1,382 @@ +use crate::builder::StringBuilder; +use crate::error::SimdJsonError; + +use serde::ser::{ + Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, SerializeTuple, + SerializeTupleStruct, SerializeTupleVariant, Serializer, +}; + +/// Serialize a value to a JSON string using simdjson's SIMD-accelerated builder. +pub fn to_string(value: &T) -> Result { + let mut builder = StringBuilder::new(); + value.serialize(&mut BuilderSerializer::new(&mut builder))?; + builder.into_string() +} + +/// Serialize a value to a JSON string with a pre-allocated capacity hint. +pub fn to_string_with_capacity( + value: &T, + capacity: usize, +) -> Result { + let mut builder = StringBuilder::with_capacity(capacity); + value.serialize(&mut BuilderSerializer::new(&mut builder))?; + builder.into_string() +} + +pub struct BuilderSerializer<'a> { + builder: &'a mut StringBuilder, +} + +impl<'a> BuilderSerializer<'a> { + pub fn new(builder: &'a mut StringBuilder) -> Self { + Self { builder } + } +} + +impl<'a> Serializer for &'a mut BuilderSerializer<'a> { + type Ok = (); + type Error = SimdJsonError; + + type SerializeSeq = SeqSerializer<'a>; + type SerializeTuple = SeqSerializer<'a>; + type SerializeTupleStruct = SeqSerializer<'a>; + type SerializeTupleVariant = SeqSerializer<'a>; + type SerializeMap = MapSerializer<'a>; + type SerializeStruct = MapSerializer<'a>; + type SerializeStructVariant = MapSerializer<'a>; + + fn serialize_bool(self, v: bool) -> Result { + self.builder.append_bool(v); + Ok(()) + } + + fn serialize_i8(self, v: i8) -> Result { + self.builder.append_i64(v as i64); + Ok(()) + } + + fn serialize_i16(self, v: i16) -> Result { + self.builder.append_i64(v as i64); + Ok(()) + } + + fn serialize_i32(self, v: i32) -> Result { + self.builder.append_i64(v as i64); + Ok(()) + } + + fn serialize_i64(self, v: i64) -> Result { + self.builder.append_i64(v); + Ok(()) + } + + fn serialize_u8(self, v: u8) -> Result { + self.builder.append_u64(v as u64); + Ok(()) + } + + fn serialize_u16(self, v: u16) -> Result { + self.builder.append_u64(v as u64); + Ok(()) + } + + fn serialize_u32(self, v: u32) -> Result { + self.builder.append_u64(v as u64); + Ok(()) + } + + fn serialize_u64(self, v: u64) -> Result { + self.builder.append_u64(v); + Ok(()) + } + + fn serialize_f32(self, v: f32) -> Result { + self.builder.append_f64(v as f64); + Ok(()) + } + + fn serialize_f64(self, v: f64) -> Result { + if !v.is_finite() { + return Err(SimdJsonError::Serde(format!( + "cannot serialize non-finite float: {v}" + ))); + } + self.builder.append_f64(v); + Ok(()) + } + + fn serialize_char(self, v: char) -> Result { + self.serialize_str(&v.to_string()) + } + + fn serialize_str(self, v: &str) -> Result { + self.builder.append_string(v); + Ok(()) + } + + fn serialize_bytes(self, v: &[u8]) -> Result { + self.builder.start_array(); + for (i, byte) in v.iter().enumerate() { + if i > 0 { + self.builder.append_comma(); + } + self.builder.append_u64(*byte as u64); + } + self.builder.end_array(); + Ok(()) + } + + fn serialize_none(self) -> Result { + self.serialize_unit() + } + + fn serialize_some(self, value: &T) -> Result { + value.serialize(self) + } + + fn serialize_unit(self) -> Result { + self.builder.append_null(); + Ok(()) + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + self.serialize_unit() + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result { + self.serialize_str(variant) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + value: &T, + ) -> Result { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result { + self.builder.start_object(); + self.builder.append_string(variant); + self.builder.append_colon(); + value.serialize(&mut BuilderSerializer::new(self.builder))?; + self.builder.end_object(); + Ok(()) + } + + fn serialize_seq(self, _len: Option) -> Result { + self.builder.start_array(); + Ok(SeqSerializer { + builder: self.builder, + first: true, + }) + } + + fn serialize_tuple(self, len: usize) -> Result { + self.serialize_seq(Some(len)) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + len: usize, + ) -> Result { + self.serialize_seq(Some(len)) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result { + self.builder.start_object(); + self.builder.append_string(variant); + self.builder.append_colon(); + self.builder.start_array(); + Ok(SeqSerializer { + builder: self.builder, + first: true, + }) + } + + fn serialize_map(self, _len: Option) -> Result { + self.builder.start_object(); + Ok(MapSerializer { + builder: self.builder, + first: true, + }) + } + + fn serialize_struct( + self, + _name: &'static str, + len: usize, + ) -> Result { + self.serialize_map(Some(len)) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result { + self.builder.start_object(); + self.builder.append_string(variant); + self.builder.append_colon(); + self.builder.start_object(); + Ok(MapSerializer { + builder: self.builder, + first: true, + }) + } +} + +pub struct SeqSerializer<'a> { + builder: &'a mut StringBuilder, + first: bool, +} + +impl<'a> SerializeSeq for SeqSerializer<'a> { + type Ok = (); + type Error = SimdJsonError; + + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> { + if !self.first { + self.builder.append_comma(); + } + self.first = false; + value.serialize(&mut BuilderSerializer::new(self.builder)) + } + + fn end(self) -> Result { + self.builder.end_array(); + Ok(()) + } +} + +impl<'a> SerializeTuple for SeqSerializer<'a> { + type Ok = (); + type Error = SimdJsonError; + + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> { + SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result { + SerializeSeq::end(self) + } +} + +impl<'a> SerializeTupleStruct for SeqSerializer<'a> { + type Ok = (); + type Error = SimdJsonError; + + fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> { + SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result { + SerializeSeq::end(self) + } +} + +impl<'a> SerializeTupleVariant for SeqSerializer<'a> { + type Ok = (); + type Error = SimdJsonError; + + fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> { + SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result { + self.builder.end_array(); + self.builder.end_object(); + Ok(()) + } +} + +pub struct MapSerializer<'a> { + builder: &'a mut StringBuilder, + first: bool, +} + +impl<'a> SerializeMap for MapSerializer<'a> { + type Ok = (); + type Error = SimdJsonError; + + fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> { + if !self.first { + self.builder.append_comma(); + } + self.first = false; + key.serialize(&mut BuilderSerializer::new(self.builder)) + } + + fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> { + self.builder.append_colon(); + value.serialize(&mut BuilderSerializer::new(self.builder)) + } + + fn end(self) -> Result { + self.builder.end_object(); + Ok(()) + } +} + +impl<'a> SerializeStruct for MapSerializer<'a> { + type Ok = (); + type Error = SimdJsonError; + + fn serialize_field( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), Self::Error> { + if !self.first { + self.builder.append_comma(); + } + self.first = false; + self.builder.append_string(key); + self.builder.append_colon(); + value.serialize(&mut BuilderSerializer::new(self.builder)) + } + + fn end(self) -> Result { + self.builder.end_object(); + Ok(()) + } +} + +impl<'a> SerializeStructVariant for MapSerializer<'a> { + type Ok = (); + type Error = SimdJsonError; + + fn serialize_field( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), Self::Error> { + SerializeStruct::serialize_field(self, key, value) + } + + fn end(self) -> Result { + self.builder.end_object(); + self.builder.end_object(); + Ok(()) + } +} diff --git a/src/serde/value.rs b/src/serde/value.rs new file mode 100644 index 0000000..d7db556 --- /dev/null +++ b/src/serde/value.rs @@ -0,0 +1,65 @@ +use serde_json::{Map, Number, Value}; + +use crate::dom::{Element, ElementType}; +use crate::error::SimdJsonError; + +const MAX_NESTING_DEPTH: usize = 128; + +/// Convert a DOM `Element` into a `serde_json::Value`. +/// +/// Enforces a maximum nesting depth to guard against stack overflow from +/// adversarial inputs with extreme nesting (e.g. `[[[[...]]]]` repeated +/// thousands of times). +pub fn element_to_value(element: &Element<'_>) -> Result { + element_to_value_inner(element, 0) +} + +fn element_to_value_inner(element: &Element<'_>, depth: usize) -> Result { + if depth > MAX_NESTING_DEPTH { + return Err(SimdJsonError::Serde(format!( + "nesting depth exceeds maximum of {MAX_NESTING_DEPTH}" + ))); + } + + match element.get_type() { + ElementType::NullValue => Ok(Value::Null), + ElementType::Bool => Ok(Value::Bool(element.get_bool()?)), + ElementType::String => Ok(Value::String(element.get_string()?.to_owned())), + ElementType::Int64 => { + let v = element.get_int64()?; + Ok(Value::Number(Number::from(v))) + } + ElementType::UInt64 => { + let v = element.get_uint64()?; + Ok(Value::Number(Number::from(v))) + } + ElementType::Double => { + let v = element.get_double()?; + match Number::from_f64(v) { + Some(n) => Ok(Value::Number(n)), + None => Err(SimdJsonError::Serde(format!( + "cannot represent {v} as a JSON number (NaN or Infinity)" + ))), + } + } + ElementType::Array => { + let array = element.get_array()?; + let mut vec = Vec::with_capacity(array.size()); + for child in array.iter() { + vec.push(element_to_value_inner(&child, depth + 1)?); + } + Ok(Value::Array(vec)) + } + ElementType::Object => { + let object = element.get_object()?; + let mut map = Map::new(); + for (key, child) in object.iter() { + map.insert( + String::from(key), + element_to_value_inner(&child, depth + 1)?, + ); + } + Ok(Value::Object(map)) + } + } +} diff --git a/tests/builder_tests.rs b/tests/builder_tests.rs new file mode 100644 index 0000000..7c48e12 --- /dev/null +++ b/tests/builder_tests.rs @@ -0,0 +1,410 @@ +#![cfg(feature = "serde_impl")] + +use serde::{Deserialize, Serialize}; +use simdjson_rust::builder::StringBuilder; +use simdjson_rust::dom::Parser; +use simdjson_rust::prelude::*; +use simdjson_rust::serde::de::from_element; +use simdjson_rust::serde::ser::{to_string, to_string_with_capacity}; + +// --------------------------------------------------------------------------- +// Basic StringBuilder usage +// --------------------------------------------------------------------------- + +#[test] +fn builder_basic_types() { + let mut builder = StringBuilder::new(); + builder.start_array(); + builder.append_bool(true); + builder.append_comma(); + builder.append_i64(42); + builder.append_comma(); + builder.append_f64(3.15); + builder.append_comma(); + builder.append_string("hello"); + builder.append_comma(); + builder.append_null(); + builder.end_array(); + + let json = builder.view().unwrap(); + assert_eq!(json, r#"[true,42,3.15,"hello",null]"#); +} + +#[test] +fn builder_object() { + let mut builder = StringBuilder::new(); + builder.start_object(); + builder.append_string("name"); + builder.append_colon(); + builder.append_string("Alice"); + builder.append_comma(); + builder.append_string("age"); + builder.append_colon(); + builder.append_i64(30); + builder.end_object(); + + let json = builder.view().unwrap(); + assert_eq!(json, r#"{"name":"Alice","age":30}"#); +} + +#[test] +fn builder_nested() { + let mut builder = StringBuilder::new(); + builder.start_object(); + builder.append_string("data"); + builder.append_colon(); + builder.start_array(); + builder.append_i64(1); + builder.append_comma(); + builder.append_i64(2); + builder.append_comma(); + builder.append_i64(3); + builder.end_array(); + builder.end_object(); + + let json = builder.view().unwrap(); + assert_eq!(json, r#"{"data":[1,2,3]}"#); +} + +#[test] +fn builder_clear_reuse() { + let mut builder = StringBuilder::new(); + builder.append_i64(42); + assert_eq!(builder.view().unwrap(), "42"); + + builder.clear(); + builder.append_string("reset"); + assert_eq!(builder.view().unwrap(), r#""reset""#); +} + +#[test] +fn builder_display() { + let mut builder = StringBuilder::new(); + builder.append_bool(true); + assert_eq!(format!("{builder}"), "true"); +} + +// --------------------------------------------------------------------------- +// Serde serialization +// --------------------------------------------------------------------------- + +#[derive(Serialize, Deserialize, PartialEq, Debug)] +struct Person { + name: String, + age: u32, + active: bool, +} + +#[test] +fn serialize_struct() { + let person = Person { + name: "Bob".to_string(), + age: 25, + active: true, + }; + let json = to_string(&person).unwrap(); + assert_eq!(json, r#"{"name":"Bob","age":25,"active":true}"#); +} + +#[test] +fn serialize_vec() { + let vec = vec![1, 2, 3, 4, 5]; + let json = to_string(&vec).unwrap(); + assert_eq!(json, "[1,2,3,4,5]"); +} + +#[test] +fn serialize_nested() { + #[derive(Serialize)] + struct Outer { + inner: Vec, + flag: bool, + } + let outer = Outer { + inner: vec![10, 20], + flag: false, + }; + let json = to_string(&outer).unwrap(); + assert_eq!(json, r#"{"inner":[10,20],"flag":false}"#); +} + +#[test] +fn serialize_option_some() { + #[derive(Serialize)] + struct WithOption { + value: Option, + } + let obj = WithOption { value: Some(42) }; + let json = to_string(&obj).unwrap(); + assert_eq!(json, r#"{"value":42}"#); +} + +#[test] +fn serialize_option_none() { + #[derive(Serialize)] + struct WithOption { + value: Option, + } + let obj = WithOption { value: None }; + let json = to_string(&obj).unwrap(); + assert_eq!(json, r#"{"value":null}"#); +} + +#[test] +fn serialize_tuple() { + let tuple = (1, "two", 3.0); + let json = to_string(&tuple).unwrap(); + assert_eq!(json, r#"[1,"two",3.0]"#); +} + +#[test] +fn serialize_unit_enum() { + #[derive(Serialize)] + enum Color { + Red, + Green, + Blue, + } + let color = Color::Green; + let json = to_string(&color).unwrap(); + assert_eq!(json, r#""Green""#); +} + +#[test] +fn serialize_newtype_enum() { + #[derive(Serialize)] + enum Value { + Number(i32), + } + let val = Value::Number(42); + let json = to_string(&val).unwrap(); + assert_eq!(json, r#"{"Number":42}"#); +} + +#[test] +fn serialize_struct_enum() { + #[derive(Serialize)] + enum Shape { + Rectangle { width: f64, height: f64 }, + } + let shape = Shape::Rectangle { + width: 10.0, + height: 5.0, + }; + let json = to_string(&shape).unwrap(); + assert_eq!(json, r#"{"Rectangle":{"width":10.0,"height":5.0}}"#); +} + +#[test] +fn serialize_hashmap() { + use std::collections::HashMap; + let mut map = HashMap::new(); + map.insert("key1", 100); + map.insert("key2", 200); + let json = to_string(&map).unwrap(); + // HashMap iteration order is not guaranteed, so check both possibilities + assert!( + json == r#"{"key1":100,"key2":200}"# || json == r#"{"key2":200,"key1":100}"#, + "got: {json}" + ); +} + +#[test] +fn serialize_escaped_strings() { + #[derive(Serialize)] + struct WithString { + text: String, + } + let obj = WithString { + text: "hello \"world\"\nline2".to_string(), + }; + let json = to_string(&obj).unwrap(); + assert!(json.contains(r#"\""#), "should escape quotes"); + assert!(json.contains(r#"\n"#), "should escape newlines"); +} + +#[test] +fn serialize_unicode() { + #[derive(Serialize)] + struct WithEmoji { + emoji: String, + } + let obj = WithEmoji { + emoji: "Hello ❤️".to_string(), + }; + let json = to_string(&obj).unwrap(); + assert!(json.contains("❤"), "should preserve unicode"); +} + +// --------------------------------------------------------------------------- +// Round-trip tests (serialize → parse → deserialize) +// --------------------------------------------------------------------------- + +#[test] +fn round_trip_struct() { + let original = Person { + name: "Charlie".to_string(), + age: 35, + active: false, + }; + + let json = to_string(&original).unwrap(); + let mut parser = Parser::default(); + let ps = json.into_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let deserialized: Person = from_element(&elm).unwrap(); + + assert_eq!(original, deserialized); +} + +#[test] +fn round_trip_nested() { + #[derive(Serialize, Deserialize, PartialEq, Debug)] + struct Nested { + values: Vec, + metadata: Metadata, + } + + #[derive(Serialize, Deserialize, PartialEq, Debug)] + struct Metadata { + count: usize, + valid: bool, + } + + let original = Nested { + values: vec![1, 2, 3], + metadata: Metadata { + count: 3, + valid: true, + }, + }; + + let json = to_string(&original).unwrap(); + let mut parser = Parser::default(); + let ps = json.into_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let deserialized: Nested = from_element(&elm).unwrap(); + + assert_eq!(original, deserialized); +} + +#[test] +fn round_trip_vec() { + let original = vec![10, 20, 30, 40, 50]; + let json = to_string(&original).unwrap(); + let mut parser = Parser::default(); + let ps = json.into_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let deserialized: Vec = from_element(&elm).unwrap(); + assert_eq!(original, deserialized); +} + +// --------------------------------------------------------------------------- +// Security: NaN/Infinity rejection +// --------------------------------------------------------------------------- + +#[test] +fn serialize_nan_rejected() { + let result = to_string(&f64::NAN); + assert!(result.is_err(), "NaN should be rejected"); +} + +#[test] +fn serialize_infinity_rejected() { + let result = to_string(&f64::INFINITY); + assert!(result.is_err(), "Infinity should be rejected"); +} + +#[test] +fn serialize_neg_infinity_rejected() { + let result = to_string(&f64::NEG_INFINITY); + assert!(result.is_err(), "Negative infinity should be rejected"); +} + +// --------------------------------------------------------------------------- +// Edge cases +// --------------------------------------------------------------------------- + +#[test] +fn serialize_empty_vec() { + let vec: Vec = vec![]; + let json = to_string(&vec).unwrap(); + assert_eq!(json, "[]"); +} + +#[test] +fn serialize_empty_struct() { + #[derive(Serialize)] + struct Empty {} + let obj = Empty {}; + let json = to_string(&obj).unwrap(); + assert_eq!(json, "{}"); +} + +#[test] +fn serialize_unit() { + let json = to_string(&()).unwrap(); + assert_eq!(json, "null"); +} + +#[test] +fn serialize_newtype_struct() { + #[derive(Serialize, Deserialize, PartialEq, Debug)] + struct Wrapper(i32); + let original = Wrapper(42); + let json = to_string(&original).unwrap(); + assert_eq!(json, "42"); + + let mut parser = Parser::default(); + let ps = json.into_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let deserialized: Wrapper = from_element(&elm).unwrap(); + assert_eq!(original, deserialized); +} + +#[test] +fn serialize_char() { + let json = to_string(&'A').unwrap(); + assert_eq!(json, r#""A""#); +} + +#[test] +fn serialize_bytes() { + let bytes: &[u8] = &[1, 2, 3]; + let json = to_string(&bytes).unwrap(); + assert_eq!(json, "[1,2,3]"); +} + +// --------------------------------------------------------------------------- +// Numeric boundaries +// --------------------------------------------------------------------------- + +#[test] +fn serialize_i64_min_max() { + let json = to_string(&i64::MIN).unwrap(); + assert_eq!(json, "-9223372036854775808"); + + let json = to_string(&i64::MAX).unwrap(); + assert_eq!(json, "9223372036854775807"); +} + +#[test] +fn serialize_u64_max() { + let json = to_string(&u64::MAX).unwrap(); + assert_eq!(json, "18446744073709551615"); +} + +// --------------------------------------------------------------------------- +// Capacity hint +// --------------------------------------------------------------------------- + +#[test] +fn serialize_with_capacity() { + let person = Person { + name: "Dave".to_string(), + age: 40, + active: true, + }; + let json = to_string_with_capacity(&person, 128).unwrap(); + assert_eq!(json, r#"{"name":"Dave","age":40,"active":true}"#); +} diff --git a/tests/serde_tests.rs b/tests/serde_tests.rs new file mode 100644 index 0000000..4336faf --- /dev/null +++ b/tests/serde_tests.rs @@ -0,0 +1,645 @@ +#![cfg(feature = "serde_impl")] + +use serde::{Deserialize, Serialize}; +use simdjson_rust::dom::Parser; +use simdjson_rust::prelude::*; +use simdjson_rust::serde::de::from_element; +use simdjson_rust::serde::value::element_to_value; + +// --------------------------------------------------------------------------- +// Basic type deserialization +// --------------------------------------------------------------------------- + +#[test] +fn deserialize_bool_array() { + let mut parser = Parser::default(); + let ps = "[true, false, true]".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: Vec = from_element(&elm).unwrap(); + assert_eq!(v, vec![true, false, true]); +} + +#[test] +fn deserialize_integer_array() { + let mut parser = Parser::default(); + let ps = "[1, 2, 3]".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: Vec = from_element(&elm).unwrap(); + assert_eq!(v, vec![1, 2, 3]); +} + +#[test] +fn deserialize_signed_integer_array() { + let mut parser = Parser::default(); + let ps = "[-1, 0, 42]".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: Vec = from_element(&elm).unwrap(); + assert_eq!(v, vec![-1, 0, 42]); +} + +#[test] +fn deserialize_float_array() { + let mut parser = Parser::default(); + let ps = "[1.5, 2.25, 3.125]".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: Vec = from_element(&elm).unwrap(); + assert_eq!(v, vec![1.5, 2.25, 3.125]); +} + +#[test] +fn deserialize_string_array() { + let mut parser = Parser::default(); + let ps = r#"["hello", "world"]"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: Vec = from_element(&elm).unwrap(); + assert_eq!(v, vec!["hello".to_string(), "world".to_string()]); +} + +// --------------------------------------------------------------------------- +// Struct deserialization +// --------------------------------------------------------------------------- + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +struct Simple { + name: String, + age: u32, + active: bool, +} + +#[test] +fn deserialize_struct() { + let mut parser = Parser::default(); + let ps = r#"{"name": "Alice", "age": 30, "active": true}"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let s: Simple = from_element(&elm).unwrap(); + assert_eq!( + s, + Simple { + name: "Alice".to_string(), + age: 30, + active: true, + } + ); +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +struct Nested { + inner: Simple, + tags: Vec, +} + +#[test] +fn deserialize_nested_struct() { + let mut parser = Parser::default(); + let ps = r#"{"inner": {"name": "Bob", "age": 25, "active": false}, "tags": ["rust", "json"]}"# + .to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let n: Nested = from_element(&elm).unwrap(); + assert_eq!( + n, + Nested { + inner: Simple { + name: "Bob".to_string(), + age: 25, + active: false, + }, + tags: vec!["rust".to_string(), "json".to_string()], + } + ); +} + +// --------------------------------------------------------------------------- +// Optional fields +// --------------------------------------------------------------------------- + +#[derive(Debug, Deserialize, PartialEq)] +struct WithOptional { + required: String, + optional: Option, +} + +#[test] +fn deserialize_option_some() { + let mut parser = Parser::default(); + let ps = r#"{"required": "yes", "optional": 42}"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let w: WithOptional = from_element(&elm).unwrap(); + assert_eq!( + w, + WithOptional { + required: "yes".to_string(), + optional: Some(42), + } + ); +} + +#[test] +fn deserialize_option_null() { + let mut parser = Parser::default(); + let ps = r#"{"required": "yes", "optional": null}"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let w: WithOptional = from_element(&elm).unwrap(); + assert_eq!( + w, + WithOptional { + required: "yes".to_string(), + optional: None, + } + ); +} + +// --------------------------------------------------------------------------- +// Enum deserialization +// --------------------------------------------------------------------------- + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +enum Color { + Red, + Green, + Blue, +} + +#[test] +fn deserialize_unit_enum() { + let mut parser = Parser::default(); + let ps = r#""Red""#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let c: Color = from_element(&elm).unwrap(); + assert_eq!(c, Color::Red); +} + +#[derive(Debug, Deserialize, PartialEq)] +enum Shape { + Circle(f64), + Rectangle { width: f64, height: f64 }, +} + +#[test] +fn deserialize_newtype_enum() { + let mut parser = Parser::default(); + let ps = r#"{"Circle": 3.15}"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let s: Shape = from_element(&elm).unwrap(); + assert_eq!(s, Shape::Circle(3.15)); +} + +#[test] +fn deserialize_struct_enum() { + let mut parser = Parser::default(); + let ps = r#"{"Rectangle": {"width": 10.0, "height": 5.0}}"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let s: Shape = from_element(&elm).unwrap(); + assert_eq!( + s, + Shape::Rectangle { + width: 10.0, + height: 5.0, + } + ); +} + +// --------------------------------------------------------------------------- +// Null / unit +// --------------------------------------------------------------------------- + +#[test] +fn deserialize_null() { + let mut parser = Parser::default(); + let ps = "null".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: () = from_element(&elm).unwrap(); + assert_eq!(v, ()); +} + +// --------------------------------------------------------------------------- +// Tuple +// --------------------------------------------------------------------------- + +#[test] +fn deserialize_tuple() { + let mut parser = Parser::default(); + let ps = r#"[1, "two", 3.0]"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: (u64, String, f64) = from_element(&elm).unwrap(); + assert_eq!(v, (1, "two".to_string(), 3.0)); +} + +// --------------------------------------------------------------------------- +// HashMap +// --------------------------------------------------------------------------- + +#[test] +fn deserialize_hashmap() { + use std::collections::HashMap; + let mut parser = Parser::default(); + let ps = r#"{"a": 1, "b": 2}"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let m: HashMap = from_element(&elm).unwrap(); + assert_eq!(m.get("a"), Some(&1)); + assert_eq!(m.get("b"), Some(&2)); + assert_eq!(m.len(), 2); +} + +// --------------------------------------------------------------------------- +// element_to_value conversion +// --------------------------------------------------------------------------- + +#[test] +fn element_to_value_scalars() { + { + let mut parser = Parser::default(); + let ps = "42".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v = element_to_value(&elm).unwrap(); + assert_eq!(v, serde_json::json!(42)); + } + { + let mut parser = Parser::default(); + let ps = "true".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v = element_to_value(&elm).unwrap(); + assert_eq!(v, serde_json::json!(true)); + } + { + let mut parser = Parser::default(); + let ps = "null".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v = element_to_value(&elm).unwrap(); + assert_eq!(v, serde_json::json!(null)); + } + { + let mut parser = Parser::default(); + let ps = r#""hello""#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v = element_to_value(&elm).unwrap(); + assert_eq!(v, serde_json::json!("hello")); + } +} + +#[test] +fn element_to_value_nested() { + let mut parser = Parser::default(); + let ps = r#"{"key": [1, 2, {"nested": true}]}"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v = element_to_value(&elm).unwrap(); + assert_eq!(v, serde_json::json!({"key": [1, 2, {"nested": true}]})); +} + +// --------------------------------------------------------------------------- +// Round-trip: parse → serde_json::Value → to_string → parse again +// --------------------------------------------------------------------------- + +#[test] +fn round_trip_via_serde_json() { + let input = r#"{"name":"test","values":[1,2,3],"nested":{"flag":true,"nothing":null}}"#; + let mut parser = Parser::default(); + let ps = input.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let value = element_to_value(&elm).unwrap(); + let serialized = serde_json::to_string(&value).unwrap(); + + let reparsed: serde_json::Value = serde_json::from_str(&serialized).unwrap(); + assert_eq!(value, reparsed); +} + +// --------------------------------------------------------------------------- +// Display impl +// --------------------------------------------------------------------------- + +#[test] +fn element_display() { + let mut parser = Parser::default(); + let ps = r#"{"a": 1}"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let displayed = format!("{elm}"); + let reparsed: serde_json::Value = serde_json::from_str(&displayed).unwrap(); + assert_eq!(reparsed, serde_json::json!({"a": 1})); +} + +// --------------------------------------------------------------------------- +// Security: numeric overflow protection +// --------------------------------------------------------------------------- + +#[test] +fn overflow_u8() { + let mut parser = Parser::default(); + let ps = "256".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result: Result = from_element(&elm); + assert!(result.is_err(), "256 should not fit in u8"); +} + +#[test] +fn overflow_i8() { + let mut parser = Parser::default(); + let ps = "128".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result: Result = from_element(&elm); + assert!(result.is_err(), "128 should not fit in i8"); +} + +#[test] +fn overflow_i8_negative() { + let mut parser = Parser::default(); + let ps = "-129".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result: Result = from_element(&elm); + assert!(result.is_err(), "-129 should not fit in i8"); +} + +#[test] +fn overflow_u16() { + let mut parser = Parser::default(); + let ps = "65536".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result: Result = from_element(&elm); + assert!(result.is_err(), "65536 should not fit in u16"); +} + +#[test] +fn overflow_u32() { + let mut parser = Parser::default(); + let ps = "4294967296".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result: Result = from_element(&elm); + assert!(result.is_err(), "4294967296 should not fit in u32"); +} + +#[test] +fn overflow_i32() { + let mut parser = Parser::default(); + let ps = "2147483648".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result: Result = from_element(&elm); + assert!(result.is_err(), "2147483648 should not fit in i32"); +} + +// --------------------------------------------------------------------------- +// Security: deep nesting protection (element_to_value) +// --------------------------------------------------------------------------- + +#[test] +fn deep_nesting_protection() { + let depth = 200; + let mut json = String::new(); + for _ in 0..depth { + json.push_str(r#"{"a":"#); + } + json.push('1'); + for _ in 0..depth { + json.push('}'); + } + let mut parser = Parser::default(); + let ps = json.into_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result = element_to_value(&elm); + assert!( + result.is_err(), + "nesting depth of {depth} should exceed the limit" + ); +} + +// --------------------------------------------------------------------------- +// Security: escaped strings are handled correctly +// --------------------------------------------------------------------------- + +#[test] +fn escaped_strings() { + let mut parser = Parser::default(); + let ps = r#"{"msg": "hello \"world\"\nline2"}"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v = element_to_value(&elm).unwrap(); + let s = v["msg"].as_str().unwrap(); + assert!(s.contains('"'), "should contain literal quote"); + assert!(s.contains('\n'), "should contain literal newline"); +} + +#[test] +fn unicode_strings() { + let mut parser = Parser::default(); + let ps = r#"{"emoji": "Hello \u2764\uFE0F"}"#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v = element_to_value(&elm).unwrap(); + let s = v["emoji"].as_str().unwrap(); + assert!(s.contains('❤'), "should contain heart emoji"); +} + +// --------------------------------------------------------------------------- +// Security: type mismatch produces errors, not panics +// --------------------------------------------------------------------------- + +#[test] +fn type_mismatch_string_as_int() { + let mut parser = Parser::default(); + let ps = r#""not_a_number""#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result: Result = from_element(&elm); + assert!(result.is_err()); +} + +#[test] +fn type_mismatch_int_as_string() { + let mut parser = Parser::default(); + let ps = "42".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result: Result = from_element(&elm); + assert!(result.is_err()); +} + +#[test] +fn type_mismatch_array_as_struct() { + let mut parser = Parser::default(); + let ps = "[1, 2, 3]".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result: Result = from_element(&elm); + assert!(result.is_err()); +} + +// --------------------------------------------------------------------------- +// Security: empty inputs +// --------------------------------------------------------------------------- + +#[test] +fn empty_array() { + let mut parser = Parser::default(); + let ps = "[]".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: Vec = from_element(&elm).unwrap(); + assert!(v.is_empty()); +} + +#[test] +fn empty_object() { + use std::collections::HashMap; + let mut parser = Parser::default(); + let ps = "{}".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let m: HashMap = from_element(&elm).unwrap(); + assert!(m.is_empty()); +} + +// --------------------------------------------------------------------------- +// Large integers at boundaries +// --------------------------------------------------------------------------- + +#[test] +fn max_u64() { + let mut parser = Parser::default(); + let ps = "18446744073709551615".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: u64 = from_element(&elm).unwrap(); + assert_eq!(v, u64::MAX); +} + +#[test] +fn min_i64() { + let mut parser = Parser::default(); + let ps = "-9223372036854775808".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: i64 = from_element(&elm).unwrap(); + assert_eq!(v, i64::MIN); +} + +#[test] +fn max_i64() { + let mut parser = Parser::default(); + let ps = "9223372036854775807".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let v: i64 = from_element(&elm).unwrap(); + assert_eq!(v, i64::MAX); +} + +// --------------------------------------------------------------------------- +// Char deserialization +// --------------------------------------------------------------------------- + +#[test] +fn deserialize_char_single() { + let mut parser = Parser::default(); + let ps = r#""A""#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let c: char = from_element(&elm).unwrap(); + assert_eq!(c, 'A'); +} + +#[test] +fn deserialize_char_multi_rejects() { + let mut parser = Parser::default(); + let ps = r#""AB""#.to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let result: Result = from_element(&elm); + assert!(result.is_err(), "multi-char string should fail for char"); +} + +// --------------------------------------------------------------------------- +// Newtype struct +// --------------------------------------------------------------------------- + +#[derive(Debug, Deserialize, PartialEq)] +struct Wrapper(u64); + +#[test] +fn deserialize_newtype_struct() { + let mut parser = Parser::default(); + let ps = "42".to_padded_string(); + let elm = parser.parse(&ps).unwrap(); + let w: Wrapper = from_element(&elm).unwrap(); + assert_eq!(w, Wrapper(42)); +} + +// --------------------------------------------------------------------------- +// Memory leak regression test: at_pointer method +// --------------------------------------------------------------------------- + +#[test] +fn at_pointer_functionality_test() { + // Test that at_pointer works correctly and doesn't cause issues + let json = r#"{ + "users": [ + {"name": "Alice", "age": 30, "active": true}, + {"name": "Bob", "age": 25, "active": false} + ], + "metadata": { + "version": "1.0", + "count": 2 + }, + "settings": { + "theme": "dark", + "notifications": { + "email": true, + "push": false + } + } + }"#; + + let mut parser = Parser::default(); + let ps = json.to_padded_string(); + let root = parser.parse(&ps).unwrap(); + + // Test basic at_pointer functionality + let users = root.at_pointer("/users").unwrap(); + assert!(users.get_array().is_ok()); + + let first_user = root.at_pointer("/users/0").unwrap(); + assert!(first_user.get_object().is_ok()); + + let user_name = root.at_pointer("/users/0/name").unwrap(); + assert_eq!(user_name.get_string().unwrap(), "Alice"); + + let user_age = root.at_pointer("/users/0/age").unwrap(); + assert_eq!(user_age.get_uint64().unwrap(), 30); + + let metadata_version = root.at_pointer("/metadata/version").unwrap(); + assert_eq!(metadata_version.get_string().unwrap(), "1.0"); + + let count = root.at_pointer("/metadata/count").unwrap(); + assert_eq!(count.get_uint64().unwrap(), 2); + + // Test nested at_pointer calls + let notifications = root.at_pointer("/settings/notifications").unwrap(); + let email_setting = notifications.at_pointer("/email").unwrap(); + assert_eq!(email_setting.get_bool().unwrap(), true); + + // Test that elements are still accessible after multiple at_pointer calls + let push_setting = root.at_pointer("/settings/notifications/push").unwrap(); + assert_eq!(push_setting.get_bool().unwrap(), false); + + // Test round-trip through serde to ensure no corruption + let value = element_to_value(&root).unwrap(); + let serialized = serde_json::to_string(&value).unwrap(); + let reparsed: serde_json::Value = serde_json::from_str(&serialized).unwrap(); + assert_eq!(value, reparsed); +} + +#[test] +fn at_pointer_extensive_usage() { + // Create a deeply nested structure and access many elements via at_pointer + // This stress test helps ensure no memory corruption or leaks + let mut json = String::from("{\"root\": "); + for i in 0..10 { + json.push_str(&format!("{{\"level_{}\": ", i)); + } + json.push_str("\"deepest_value\""); + for _ in 0..10 { + json.push_str("}"); + } + json.push('}'); + + let mut parser = Parser::default(); + let ps = json.to_padded_string(); + let root = parser.parse(&ps).unwrap(); + + // Access elements at various depths + let mut current_path = "/root".to_string(); + for i in 0..10 { + let level_path = format!("{}/level_{}", current_path, i); + let element = root.at_pointer(&level_path).unwrap(); + assert!(element.get_object().is_ok()); + current_path = level_path; + } + + // Access the final value + let value_path = format!("{}/level_9", current_path); + let deepest = root.at_pointer(&(value_path + "/level_9")).unwrap(); + assert_eq!(deepest.get_string().unwrap(), "deepest_value"); +}