From 52f022ac5cbf8a8a3a68f8c14d95d82a8036f109 Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Mon, 21 Jul 2025 21:10:25 +0100 Subject: [PATCH 1/7] Implement snomed usage data and add testing --- rust/codelist-builder-rs/Cargo.toml | 8 +- rust/codelist-builder-rs/src/errors.rs | 21 + rust/codelist-builder-rs/src/lib.rs | 3 + .../src/snomed_usage_data.rs | 391 ++++++++++++++++++ rust/codelist-builder-rs/src/usage_year.rs | 100 +++++ .../tests/download_usage.rs | 38 ++ 6 files changed, 560 insertions(+), 1 deletion(-) create mode 100644 rust/codelist-builder-rs/src/errors.rs create mode 100644 rust/codelist-builder-rs/src/snomed_usage_data.rs create mode 100644 rust/codelist-builder-rs/src/usage_year.rs create mode 100644 rust/codelist-builder-rs/tests/download_usage.rs diff --git a/rust/codelist-builder-rs/Cargo.toml b/rust/codelist-builder-rs/Cargo.toml index a8a7e51..c8636f9 100644 --- a/rust/codelist-builder-rs/Cargo.toml +++ b/rust/codelist-builder-rs/Cargo.toml @@ -8,7 +8,13 @@ description = "Builder library for medical codelists" [dependencies] codelist-rs = { path = "../codelist-rs" } serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" thiserror = { version = "2.0.9" } thiserror-ext = { version = "0.2.1" } +reqwest = { version = "0.12.2" } +csv = { version = "1.3.1" } +async-trait = "0.1" - +[dev-dependencies] +tokio = { version = "1.0", features = ["full"] } +wiremock = { version = "0.6.4" } \ No newline at end of file diff --git a/rust/codelist-builder-rs/src/errors.rs b/rust/codelist-builder-rs/src/errors.rs new file mode 100644 index 0000000..c84e45d --- /dev/null +++ b/rust/codelist-builder-rs/src/errors.rs @@ -0,0 +1,21 @@ +//! This file contains custom errors for the codelist-builder library + +/// Enum to represent the different types of errors that can occur in the +/// codelist-builder library + +#[derive(Debug, thiserror::Error, thiserror_ext::Construct)] +pub enum CodeListBuilderError { + #[error("Invalid usage year: {name}")] + InvalidUsageYear { name: String }, + + #[error("Invalid usage data: {name}")] + InvalidUsageData { name: String }, + + #[error("HTTP request error: {0}")] + #[construct(skip)] + ReqwestError(#[from] reqwest::Error), + + #[error("CSV error: {0}")] + #[construct(skip)] + CSVError(#[from] csv::Error), +} diff --git a/rust/codelist-builder-rs/src/lib.rs b/rust/codelist-builder-rs/src/lib.rs index e69de29..9da729b 100644 --- a/rust/codelist-builder-rs/src/lib.rs +++ b/rust/codelist-builder-rs/src/lib.rs @@ -0,0 +1,3 @@ +pub mod errors; +pub mod snomed_usage_data; +pub mod usage_year; diff --git a/rust/codelist-builder-rs/src/snomed_usage_data.rs b/rust/codelist-builder-rs/src/snomed_usage_data.rs new file mode 100644 index 0000000..9151eba --- /dev/null +++ b/rust/codelist-builder-rs/src/snomed_usage_data.rs @@ -0,0 +1,391 @@ +//! This file contains the snomed usage data struct and its implementation + +// Internal imports +use crate::errors::CodeListBuilderError; +use crate::usage_year::UsageYear; + +// External imports +use csv; +use reqwest; +use serde::{Deserialize, Serialize}; + +/// Struct to represent a snomed usage data entry +/// +/// # Fields +/// * `snomed_concept_id` - The snomed concept id +/// * `description` - The description +/// * `usage` - The usage +/// * `active_at_start` - Whether the concept was active at the sstart of the usage period +/// * `active_at_end` - Whether the concept was active at the end of the usage period +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct SnomedUsageDataEntry { + pub snomed_concept_id: String, + pub description: String, + pub usage: String, // allows for * for count of 1-4 + pub active_at_start: bool, + pub active_at_end: bool, +} + +/// Struct to represent snomed usage data +/// +/// # Fields +/// * `usage_data` - The usage data +/// * `usage_year` - The usage year +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct SnomedUsageData { + pub usage_data: Vec, + pub usage_year: UsageYear, +} + +impl SnomedUsageData { + /// Download snomed usage data from a url + /// + /// # Arguments + /// * `base_url` - The base url + /// * `usage_year` - The usage year + /// + /// # Returns + /// Self or an error if the download fails + pub async fn download_usage( + base_url: &str, + usage_year: UsageYear, + ) -> Result { + let url = format!( + "{}/{}", + base_url.trim_end_matches('/'), + usage_year.path().trim_start_matches('/') + ); + + let body = reqwest::get(&url).await?.text().await?; + + let usage_data = Self::parse_from_string(&body)?; + + Ok(SnomedUsageData { usage_data, usage_year }) + } + + /// Parse snomed usage data from a string + /// + /// # Arguments + /// * `data` - The data to parse + /// + /// # Returns + /// * The parsed usage data or an error + pub fn parse_from_string( + data: &str, + ) -> Result, CodeListBuilderError> { + let mut rdr = csv::ReaderBuilder::new() + .has_headers(true) + .delimiter(b'\t') + .from_reader(data.as_bytes()); + + let mut usage_data = Vec::new(); + + for (row_idx, result) in rdr.records().enumerate() { + let record = result?; + + if record.len() != 5 { + return Err(CodeListBuilderError::invalid_usage_data(format!( + "Invalid number of columns in record ({}) at row {}", + record.len(), + row_idx + 1 + ))); + } + + if let Some((col_idx, _)) = + record.iter().enumerate().find(|(_, field)| field.trim().is_empty()) + { + return Err(CodeListBuilderError::invalid_usage_data(format!( + "Empty value found in record at row {}, column {}", + row_idx + 1, + col_idx + ))); + } + + let entry = SnomedUsageDataEntry { + snomed_concept_id: record[0].to_string(), + description: record[1].to_string(), + usage: record[2].to_string(), + active_at_start: record[3] == *"1", + active_at_end: record[4] == *"1", + }; + + usage_data.push(entry); + } + Ok(usage_data) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::errors::CodeListBuilderError; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + const LONG_TEST_DATA: &str = "SNOMED_Concept_ID Description Usage Active_at_Start Active_at_End +279991000000102 Short message service text message sent to patient (procedure) 122292090 1 1 +163030003 On examination - Systolic blood pressure reading (finding) 59227180 1 1 +163031004 On examination - Diastolic blood pressure reading (finding) 59184050 1 1 +163020007 On examination - blood pressure reading (finding) 37837700 1 1 +1000731000000107 Serum creatinine level (observable entity) 33211250 1 1 +1000661000000107 Serum sodium level (observable entity) 31630420 1 1 +1000651000000109 Serum potassium level (observable entity) 31542470 1 1 +162763007 On examination - weight (finding) 30836800 1 1 +1022431000000105 Haemoglobin estimation (observable entity) 29864410 1 1 +4468401000001106 Triptorelin 3.75mg injection (pdr for recon)+solvent prefilled syringe (product) 80 0 0"; + + const SINGLE_ENTRY_TEST_DATA: &str = + "SNOMED_Concept_ID Description Usage Active_at_Start Active_at_End +279991000000102 Short message service text message sent to patient (procedure) 122292090 1 1"; + + #[test] + fn test_parse_from_string_single_entry() -> Result<(), CodeListBuilderError> { + let test_data = SINGLE_ENTRY_TEST_DATA; + + let entries = SnomedUsageData::parse_from_string(test_data)?; + + assert_eq!(entries.len(), 1); + + let entry = &entries[0]; + assert_eq!(entry.snomed_concept_id, "279991000000102"); + assert_eq!( + entry.description, + "Short message service text message sent to patient (procedure)" + ); + assert_eq!(entry.usage, "122292090"); + assert!(entry.active_at_start); + assert!(entry.active_at_end); + + Ok(()) + } + + #[test] + fn test_parse_from_string_multiple_entries() -> Result<(), CodeListBuilderError> { + let test_data = LONG_TEST_DATA; + + let entries = SnomedUsageData::parse_from_string(test_data)?; + + assert_eq!(entries.len(), 10); + + assert_eq!(entries[0].snomed_concept_id, "279991000000102"); + assert_eq!( + entries[0].description, + "Short message service text message sent to patient (procedure)" + ); + assert_eq!(entries[0].usage, "122292090"); + assert!(entries[0].active_at_start); + assert!(entries[0].active_at_end); + + assert_eq!(entries[1].snomed_concept_id, "163030003"); + assert_eq!( + entries[1].description, + "On examination - Systolic blood pressure reading (finding)" + ); + assert_eq!(entries[1].usage, "59227180"); + assert!(entries[1].active_at_start); + assert!(entries[1].active_at_end); + + assert_eq!(entries[2].snomed_concept_id, "163031004"); + assert_eq!( + entries[2].description, + "On examination - Diastolic blood pressure reading (finding)" + ); + assert_eq!(entries[2].usage, "59184050"); + assert!(entries[2].active_at_start); + assert!(entries[2].active_at_end); + + assert_eq!(entries[3].snomed_concept_id, "163020007"); + assert_eq!(entries[3].description, "On examination - blood pressure reading (finding)"); + assert_eq!(entries[3].usage, "37837700"); + assert!(entries[3].active_at_start); + assert!(entries[3].active_at_end); + + assert_eq!(entries[4].snomed_concept_id, "1000731000000107"); + assert_eq!(entries[4].description, "Serum creatinine level (observable entity)"); + assert_eq!(entries[4].usage, "33211250"); + assert!(entries[4].active_at_start); + assert!(entries[4].active_at_end); + + assert_eq!(entries[5].snomed_concept_id, "1000661000000107"); + assert_eq!(entries[5].description, "Serum sodium level (observable entity)"); + assert_eq!(entries[5].usage, "31630420"); + assert!(entries[5].active_at_start); + assert!(entries[5].active_at_end); + + assert_eq!(entries[6].snomed_concept_id, "1000651000000109"); + assert_eq!(entries[6].description, "Serum potassium level (observable entity)"); + assert_eq!(entries[6].usage, "31542470"); + assert!(entries[6].active_at_start); + assert!(entries[6].active_at_end); + + assert_eq!(entries[7].snomed_concept_id, "162763007"); + assert_eq!(entries[7].description, "On examination - weight (finding)"); + assert_eq!(entries[7].usage, "30836800"); + assert!(entries[7].active_at_start); + assert!(entries[7].active_at_end); + + assert_eq!(entries[8].snomed_concept_id, "1022431000000105"); + assert_eq!(entries[8].description, "Haemoglobin estimation (observable entity)"); + assert_eq!(entries[8].usage, "29864410"); + assert!(entries[8].active_at_start); + assert!(entries[8].active_at_end); + + assert_eq!(entries[9].snomed_concept_id, "4468401000001106"); + assert_eq!( + entries[9].description, + "Triptorelin 3.75mg injection (pdr for recon)+solvent prefilled syringe (product)" + ); + assert_eq!(entries[9].usage, "80"); + assert!(!entries[9].active_at_start); + assert!(!entries[9].active_at_end); + + Ok(()) + } + + #[test] + fn test_parse_from_string_empty_data() -> Result<(), CodeListBuilderError> { + let test_data = "SNOMED_Concept_ID Description Usage Active_at_Start Active_at_End"; + let entries = SnomedUsageData::parse_from_string(test_data)?; + assert_eq!(entries.len(), 0); + Ok(()) + } + + #[test] + fn test_parse_from_string_column_count_too_small() -> Result<(), CodeListBuilderError> { + let test_data = "SNOMED_Concept_ID Description Usage Active_at_Start +279991000000102 Short message service text message sent to patient (procedure) 122292090 1"; + let error = SnomedUsageData::parse_from_string(test_data).unwrap_err(); + let error_string = error.to_string(); + assert_eq!( + &error_string, + "Invalid usage data: Invalid number of columns in record (4) at row 1" + ); + Ok(()) + } + + #[test] + fn test_parse_from_string_column_count_too_big() -> Result<(), CodeListBuilderError> { + let test_data = + "SNOMED_Concept_ID Description Usage Active_at_Start Active_at_End Active_at_End +279991000000102 Short message service text message sent to patient (procedure) 122292090 1 1 1"; + let error = SnomedUsageData::parse_from_string(test_data).unwrap_err(); + let error_string = error.to_string(); + assert_eq!( + &error_string, + "Invalid usage data: Invalid number of columns in record (6) at row 1" + ); + Ok(()) + } + + #[test] + fn test_parse_from_string_unequal_column_count() -> Result<(), CodeListBuilderError> { + let test_data = "SNOMED_Concept_ID Description Usage Active_at_Start Active_at_End +279991000000102 Short message service text message sent to patient (procedure) 122292090 1 +163030003 On examination - Systolic blood pressure reading (finding) 59227180 1 +163031004 On examination - Diastolic blood pressure reading (finding) 59184050 1 1 +163020007 On examination - blood pressure reading (finding) 37837700 1 1 +1000731000000107 Serum creatinine level (observable entity) 33211250 1 +1000661000000107 Serum sodium level (observable entity) 31630420 1 1"; + let error = SnomedUsageData::parse_from_string(test_data).unwrap_err(); + let error_string = error.to_string(); + assert!(error_string.contains("CSV error:")); + Ok(()) + } + + #[tokio::test] + async fn test_download_usage_from_url() -> Result<(), CodeListBuilderError> { + let mock_server = MockServer::start().await; + let usage_year = UsageYear::Y2020_21; + + let test_data = LONG_TEST_DATA; + + Mock::given(method("GET")) + .and(path(usage_year.path())) + .respond_with(ResponseTemplate::new(200).set_body_string(test_data)) + .mount(&mock_server) + .await; + + let result = SnomedUsageData::download_usage(&mock_server.uri(), usage_year).await?; + + let usage_data = result.usage_data; + let usage_year = result.usage_year; + + assert_eq!(usage_data.len(), 10); + + assert_eq!(usage_data[0].snomed_concept_id, "279991000000102"); + assert_eq!( + usage_data[0].description, + "Short message service text message sent to patient (procedure)" + ); + assert_eq!(usage_data[0].usage, "122292090"); + assert!(usage_data[0].active_at_start); + assert!(usage_data[0].active_at_end); + + assert_eq!(usage_data[1].snomed_concept_id, "163030003"); + assert_eq!( + usage_data[1].description, + "On examination - Systolic blood pressure reading (finding)" + ); + assert_eq!(usage_data[1].usage, "59227180"); + assert!(usage_data[1].active_at_start); + assert!(usage_data[1].active_at_end); + + assert_eq!(usage_data[2].snomed_concept_id, "163031004"); + assert_eq!( + usage_data[2].description, + "On examination - Diastolic blood pressure reading (finding)" + ); + assert_eq!(usage_data[2].usage, "59184050"); + assert!(usage_data[2].active_at_start); + assert!(usage_data[2].active_at_end); + + assert_eq!(usage_data[3].snomed_concept_id, "163020007"); + assert_eq!(usage_data[3].description, "On examination - blood pressure reading (finding)"); + assert_eq!(usage_data[3].usage, "37837700"); + assert!(usage_data[3].active_at_start); + assert!(usage_data[3].active_at_end); + + assert_eq!(usage_data[4].snomed_concept_id, "1000731000000107"); + assert_eq!(usage_data[4].description, "Serum creatinine level (observable entity)"); + assert_eq!(usage_data[4].usage, "33211250"); + assert!(usage_data[4].active_at_start); + assert!(usage_data[4].active_at_end); + + assert_eq!(usage_data[5].snomed_concept_id, "1000661000000107"); + assert_eq!(usage_data[5].description, "Serum sodium level (observable entity)"); + assert_eq!(usage_data[5].usage, "31630420"); + assert!(usage_data[5].active_at_start); + assert!(usage_data[5].active_at_end); + + assert_eq!(usage_data[6].snomed_concept_id, "1000651000000109"); + assert_eq!(usage_data[6].description, "Serum potassium level (observable entity)"); + assert_eq!(usage_data[6].usage, "31542470"); + assert!(usage_data[6].active_at_start); + assert!(usage_data[6].active_at_end); + + assert_eq!(usage_data[7].snomed_concept_id, "162763007"); + assert_eq!(usage_data[7].description, "On examination - weight (finding)"); + assert_eq!(usage_data[7].usage, "30836800"); + assert!(usage_data[7].active_at_start); + assert!(usage_data[7].active_at_end); + + assert_eq!(usage_data[8].snomed_concept_id, "1022431000000105"); + assert_eq!(usage_data[8].description, "Haemoglobin estimation (observable entity)"); + assert_eq!(usage_data[8].usage, "29864410"); + assert!(usage_data[8].active_at_start); + assert!(usage_data[8].active_at_end); + + assert_eq!(usage_data[9].snomed_concept_id, "4468401000001106"); + assert_eq!( + usage_data[9].description, + "Triptorelin 3.75mg injection (pdr for recon)+solvent prefilled syringe (product)" + ); + assert_eq!(usage_data[9].usage, "80"); + assert!(!usage_data[9].active_at_start); + assert!(!usage_data[9].active_at_end); + + assert_eq!(usage_year, UsageYear::Y2020_21); + + Ok(()) + } +} diff --git a/rust/codelist-builder-rs/src/usage_year.rs b/rust/codelist-builder-rs/src/usage_year.rs new file mode 100644 index 0000000..aed3985 --- /dev/null +++ b/rust/codelist-builder-rs/src/usage_year.rs @@ -0,0 +1,100 @@ +//! This file contains the usage year enum and its implementation + +// Internal imports +use crate::errors::CodeListBuilderError; + +// External imports +use serde::{Deserialize, Serialize}; +use std::str::FromStr; + +/// Enum to represent usage year +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum UsageYear { + Y2011_12, + Y2012_13, + Y2013_14, + Y2014_15, + Y2015_16, + Y2016_17, + Y2017_18, + Y2018_19, + Y2019_20, + Y2020_21, + Y2021_22, + Y2022_23, + Y2023_24, +} + +impl FromStr for UsageYear { + type Err = CodeListBuilderError; + + /// Convert a string to a usage year + /// + /// # Arguments + /// * `s` - The string to convert + /// + /// # Returns Self or an error if the string is not a valid usage year + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "2011-12" => Ok(UsageYear::Y2011_12), + "2012-13" => Ok(UsageYear::Y2012_13), + "2013-14" => Ok(UsageYear::Y2013_14), + "2014-15" => Ok(UsageYear::Y2014_15), + "2015-16" => Ok(UsageYear::Y2015_16), + "2016-17" => Ok(UsageYear::Y2016_17), + "2017-18" => Ok(UsageYear::Y2017_18), + "2018-19" => Ok(UsageYear::Y2018_19), + "2019-20" => Ok(UsageYear::Y2019_20), + "2020-21" => Ok(UsageYear::Y2020_21), + "2021-22" => Ok(UsageYear::Y2021_22), + "2022-23" => Ok(UsageYear::Y2022_23), + "2023-24" => Ok(UsageYear::Y2023_24), + invalid_string => Err(CodeListBuilderError::invalid_usage_year(invalid_string)), + } + } +} + +impl UsageYear { + /// Get the URL for the usage year + /// + /// # Returns + /// * `String` - The path for the usage year + pub fn path(&self) -> String { + match self { + UsageYear::Y2011_12 => "/53/C8F877/SNOMED_code_usage_2011-12.txt".to_string(), + UsageYear::Y2012_13 => "/69/866A44/SNOMED_code_usage_2012-13.txt".to_string(), + UsageYear::Y2013_14 => "/82/40F702/SNOMED_code_usage_2013-14.txt".to_string(), + UsageYear::Y2014_15 => "/BB/47E566/SNOMED_code_usage_2014-15.txt".to_string(), + UsageYear::Y2015_16 => "/8B/15EAA1/SNOMED_code_usage_2015-16.txt".to_string(), + UsageYear::Y2016_17 => "/E2/79561E/SNOMED_code_usage_2016-17.txt".to_string(), + UsageYear::Y2017_18 => "/9F/024949/SNOMED_code_usage_2017-18.txt".to_string(), + UsageYear::Y2018_19 => "/13/F2956B/SNOMED_code_usage_2018-19.txt".to_string(), + UsageYear::Y2019_20 => "/8F/882EB3/SNOMED_code_usage_2019-20.txt".to_string(), + UsageYear::Y2020_21 => "/8A/09BBE6/SNOMED_code_usage_2020-21.txt".to_string(), + UsageYear::Y2021_22 => "/71/6C02F5/SNOMED_code_usage_2021-22.txt".to_string(), + UsageYear::Y2022_23 => "/09/E1218D/SNOMED_code_usage_2022-23.txt".to_string(), + UsageYear::Y2023_24 => "/B8/7D8335/SNOMED_code_usage_2023-24.txt".to_string(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::str::FromStr; + + #[test] + fn test_from_str() -> Result<(), CodeListBuilderError> { + let usage_year = UsageYear::from_str("2011-12")?; + assert_eq!(usage_year, UsageYear::Y2011_12); + Ok(()) + } + + #[test] + fn test_path() { + let usage_year = UsageYear::Y2015_16; + let url = usage_year.path(); + let expected_url = "/8B/15EAA1/SNOMED_code_usage_2015-16.txt".to_string(); + assert_eq!(url, expected_url); + } +} diff --git a/rust/codelist-builder-rs/tests/download_usage.rs b/rust/codelist-builder-rs/tests/download_usage.rs new file mode 100644 index 0000000..b178e09 --- /dev/null +++ b/rust/codelist-builder-rs/tests/download_usage.rs @@ -0,0 +1,38 @@ +use codelist_builder_rs::errors::CodeListBuilderError; +use codelist_builder_rs::snomed_usage_data::SnomedUsageData; +use codelist_builder_rs::usage_year::UsageYear; + +#[tokio::test] +async fn test_download_usage() -> Result<(), CodeListBuilderError> { + let base_url = "https://files.digital.nhs.uk"; + + let result_2011_12 = SnomedUsageData::download_usage(base_url, UsageYear::Y2011_12).await?; + let result_2012_13 = SnomedUsageData::download_usage(base_url, UsageYear::Y2012_13).await?; + let result_2013_14 = SnomedUsageData::download_usage(base_url, UsageYear::Y2013_14).await?; + let result_2014_15 = SnomedUsageData::download_usage(base_url, UsageYear::Y2014_15).await?; + let result_2015_16 = SnomedUsageData::download_usage(base_url, UsageYear::Y2015_16).await?; + let result_2016_17 = SnomedUsageData::download_usage(base_url, UsageYear::Y2016_17).await?; + let result_2017_18 = SnomedUsageData::download_usage(base_url, UsageYear::Y2017_18).await?; + let result_2018_19 = SnomedUsageData::download_usage(base_url, UsageYear::Y2018_19).await?; + let result_2019_20 = SnomedUsageData::download_usage(base_url, UsageYear::Y2019_20).await?; + let result_2020_21 = SnomedUsageData::download_usage(base_url, UsageYear::Y2020_21).await?; + let result_2021_22 = SnomedUsageData::download_usage(base_url, UsageYear::Y2021_22).await?; + let result_2022_23 = SnomedUsageData::download_usage(base_url, UsageYear::Y2022_23).await?; + let result_2023_24 = SnomedUsageData::download_usage(base_url, UsageYear::Y2023_24).await?; + + assert!(!result_2011_12.usage_data.is_empty()); + assert!(!result_2012_13.usage_data.is_empty()); + assert!(!result_2013_14.usage_data.is_empty()); + assert!(!result_2014_15.usage_data.is_empty()); + assert!(!result_2015_16.usage_data.is_empty()); + assert!(!result_2016_17.usage_data.is_empty()); + assert!(!result_2017_18.usage_data.is_empty()); + assert!(!result_2018_19.usage_data.is_empty()); + assert!(!result_2019_20.usage_data.is_empty()); + assert!(!result_2020_21.usage_data.is_empty()); + assert!(!result_2021_22.usage_data.is_empty()); + assert!(!result_2022_23.usage_data.is_empty()); + assert!(!result_2023_24.usage_data.is_empty()); + + Ok(()) +} From 1c194c428dbdd2882b54972923dbb87eed0aaaa3 Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Mon, 21 Jul 2025 22:17:24 +0100 Subject: [PATCH 2/7] fix clippy --- bindings/python/src/codelist.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/src/codelist.rs b/bindings/python/src/codelist.rs index 6a2041e..758cd01 100755 --- a/bindings/python/src/codelist.rs +++ b/bindings/python/src/codelist.rs @@ -458,7 +458,7 @@ impl PyCodeList { match custom_regex { Some(regex_str) => { let regex = Regex::new(®ex_str) - .map_err(|e| PyValueError::new_err(format!("Invalid regex: {}", e)))?; + .map_err(|e| PyValueError::new_err(format!("Invalid regex: {e}")))?; self.inner .validate_codes(Some(®ex)) .map_err(|e| PyValueError::new_err(e.to_string()))? From e0b02ad11c6bc3a8167370a5295682f573307763 Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Mon, 21 Jul 2025 22:22:00 +0100 Subject: [PATCH 3/7] format code --- bindings/python/src/codelist.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/src/codelist.rs b/bindings/python/src/codelist.rs index 758cd01..5be82ac 100755 --- a/bindings/python/src/codelist.rs +++ b/bindings/python/src/codelist.rs @@ -458,7 +458,7 @@ impl PyCodeList { match custom_regex { Some(regex_str) => { let regex = Regex::new(®ex_str) - .map_err(|e| PyValueError::new_err(format!("Invalid regex: {e}")))?; + .map_err(|e| PyValueError::new_err(format!("Invalid regex: {e}")))?; self.inner .validate_codes(Some(®ex)) .map_err(|e| PyValueError::new_err(e.to_string()))? From 28b0367dc8aeacdbb65578c46d4d8dcf513fb298 Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Sat, 2 Aug 2025 16:39:00 +0100 Subject: [PATCH 4/7] fix typo in snomed_usage_data docs --- rust/codelist-builder-rs/src/snomed_usage_data.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/codelist-builder-rs/src/snomed_usage_data.rs b/rust/codelist-builder-rs/src/snomed_usage_data.rs index 9151eba..4a8f922 100644 --- a/rust/codelist-builder-rs/src/snomed_usage_data.rs +++ b/rust/codelist-builder-rs/src/snomed_usage_data.rs @@ -15,7 +15,7 @@ use serde::{Deserialize, Serialize}; /// * `snomed_concept_id` - The snomed concept id /// * `description` - The description /// * `usage` - The usage -/// * `active_at_start` - Whether the concept was active at the sstart of the usage period +/// * `active_at_start` - Whether the concept was active at the start of the usage period /// * `active_at_end` - Whether the concept was active at the end of the usage period #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct SnomedUsageDataEntry { From 9be19da216f170c0ec7b978e1baa3ce177566106 Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Fri, 8 Aug 2025 15:49:55 +0100 Subject: [PATCH 5/7] Improve docs in snowmed_usage_data --- .../src/snomed_usage_data.rs | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/rust/codelist-builder-rs/src/snomed_usage_data.rs b/rust/codelist-builder-rs/src/snomed_usage_data.rs index 4a8f922..663c6af 100644 --- a/rust/codelist-builder-rs/src/snomed_usage_data.rs +++ b/rust/codelist-builder-rs/src/snomed_usage_data.rs @@ -1,4 +1,30 @@ //! This file contains the snomed usage data struct and its implementation +//! It contains functionality for downloading and parsing SNOMED usage data files from NHS Digital. See https://digital.nhs.uk/data-and-information/publications/statistical/mi-snomed-code-usage-in-primary-care. +//! +//! Components of the file: +//! +//! SNOMED_Concept_ID: +//! SNOMED concepts which have been added to a patient record in a general practice system during the reporting period. +//! +//! Description: +//! The fully specified name associated with the SNOMED_Concept_ID on the final day of the reporting period (31 July). +//! +//! Usage: +//! The number of times that the SNOMED_Concept_ID was added into any patient record within the reporting period, rounded to the nearerst 10. Usage of 1 to 4 is displayed as *. SNOMED concepts with no code usage are not included. +//! Important notes: +//! - Data prior to 2019 was originally submitted mostly in READ V2 or CTV3, but in the usage files, these codes have been mapped to corresponding SNOMED codes using final 2020 version of the mapping tables published by NHS England. +//! - The usage does not show how many patients had each code added to their record - each addition regardless of whether it is the same patient increments the count by 1. Therefore it is not possible to infer the number of individual patients with a particular code. +//! - For the 2011-12 to 2017-18 data, it is stated that "Current maximum value is approximately 250,000,000" - no such maximum is stated for the 2018-19 onwards data. +//! +//! Active_at_Start: +//! Active status of the SNOMED_Concept_ID on the first day of the reporting period. This is taken from the most recent UK clinical extension, or associated International extention, which was published up to the start of the reporting year (1 August). +//! 1 = SNOMED concept was published and was active. +//! 0 = SNOMED concept was either not yet available or was inactive. +//! +//! Active_at_End: +//! Active status of the SNOMED_Concept_ID on the last day of the reporting period. This is taken from the most recent UK clinical extension, or associated International extention, which was published up to the end of the reporting year (31 July). +//! 1 = SNOMED concept was published and was active. +//! 0 = SNOMED concept was either not yet available or was inactive. // Internal imports use crate::errors::CodeListBuilderError; @@ -14,14 +40,14 @@ use serde::{Deserialize, Serialize}; /// # Fields /// * `snomed_concept_id` - The snomed concept id /// * `description` - The description -/// * `usage` - The usage +/// * `usage` - The usage. A count of 1-4 is denoted by a *. Counts above 4 are denoted by a number rounded to the nearest 10. /// * `active_at_start` - Whether the concept was active at the start of the usage period /// * `active_at_end` - Whether the concept was active at the end of the usage period #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct SnomedUsageDataEntry { pub snomed_concept_id: String, pub description: String, - pub usage: String, // allows for * for count of 1-4 + pub usage: String, pub active_at_start: bool, pub active_at_end: bool, } From 32b6003e4849e2c04707134622cfa401b0529728 Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Fri, 8 Aug 2025 16:35:38 +0100 Subject: [PATCH 6/7] Add codelist-builder to workspace and improve error handling in download_usage --- Cargo.toml | 1 + rust/codelist-builder-rs/src/errors.rs | 3 ++ .../src/snomed_usage_data.rs | 51 ++++++++++++++----- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 05d8e6a..4aa8e70 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ members = [ "rust/codelist-rs", "rust/codelist-validator-rs", + "rust/codelist-builder-rs", "bindings/python", "bindings/r/src/rust" ] diff --git a/rust/codelist-builder-rs/src/errors.rs b/rust/codelist-builder-rs/src/errors.rs index c84e45d..b5ec0a8 100644 --- a/rust/codelist-builder-rs/src/errors.rs +++ b/rust/codelist-builder-rs/src/errors.rs @@ -11,6 +11,9 @@ pub enum CodeListBuilderError { #[error("Invalid usage data: {name}")] InvalidUsageData { name: String }, + #[error("HTTP error code: {code}: {body}")] + HttpErrorCode { code: String, body: String }, + #[error("HTTP request error: {0}")] #[construct(skip)] ReqwestError(#[from] reqwest::Error), diff --git a/rust/codelist-builder-rs/src/snomed_usage_data.rs b/rust/codelist-builder-rs/src/snomed_usage_data.rs index 663c6af..d1d55cc 100644 --- a/rust/codelist-builder-rs/src/snomed_usage_data.rs +++ b/rust/codelist-builder-rs/src/snomed_usage_data.rs @@ -73,21 +73,27 @@ impl SnomedUsageData { /// # Returns /// Self or an error if the download fails pub async fn download_usage( - base_url: &str, - usage_year: UsageYear, - ) -> Result { - let url = format!( - "{}/{}", - base_url.trim_end_matches('/'), - usage_year.path().trim_start_matches('/') - ); - - let body = reqwest::get(&url).await?.text().await?; + base_url: &str, + usage_year: UsageYear, +) -> Result { + let url = format!( + "{}/{}", + base_url.trim_end_matches('/'), + usage_year.path().trim_start_matches('/') + ); + + let response = reqwest::get(&url).await.map_err(CodeListBuilderError::from)?; + if !response.status().is_success() { + let status = response.status().to_string(); + let body = response.text().await.unwrap_or_default(); + return Err(CodeListBuilderError::http_error_code(status, body)); + } + let body = response.text().await.map_err(CodeListBuilderError::from)?; - let usage_data = Self::parse_from_string(&body)?; + let usage_data = Self::parse_from_string(&body)?; - Ok(SnomedUsageData { usage_data, usage_year }) - } + Ok(SnomedUsageData { usage_data, usage_year }) +} /// Parse snomed usage data from a string /// @@ -414,4 +420,23 @@ mod tests { Ok(()) } + + #[tokio::test] + async fn test_download_usage_from_url_error_response() -> Result<(), CodeListBuilderError> { + let mock_server = MockServer::start().await; + let usage_year = UsageYear::Y2020_21; + + Mock::given(method("GET")) + .and(path(usage_year.path())) + .respond_with(ResponseTemplate::new(500)) + .mount(&mock_server) + .await; + + let error = SnomedUsageData::download_usage(&mock_server.uri(), usage_year).await.unwrap_err(); + let error_string = error.to_string(); + + assert_eq!(&error_string, "HTTP error code: 500 Internal Server Error: "); + + Ok(()) + } } From 0a7909bdac00964d8800470c5c8d2816eaa01739 Mon Sep 17 00:00:00 2001 From: Emma <156218556+em-baggie@users.noreply.github.com> Date: Fri, 8 Aug 2025 18:29:55 +0100 Subject: [PATCH 7/7] Format --- .../src/snomed_usage_data.rs | 53 ++++++++++--------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/rust/codelist-builder-rs/src/snomed_usage_data.rs b/rust/codelist-builder-rs/src/snomed_usage_data.rs index d1d55cc..82d30f4 100644 --- a/rust/codelist-builder-rs/src/snomed_usage_data.rs +++ b/rust/codelist-builder-rs/src/snomed_usage_data.rs @@ -1,26 +1,26 @@ //! This file contains the snomed usage data struct and its implementation //! It contains functionality for downloading and parsing SNOMED usage data files from NHS Digital. See https://digital.nhs.uk/data-and-information/publications/statistical/mi-snomed-code-usage-in-primary-care. -//! +//! //! Components of the file: -//! +//! //! SNOMED_Concept_ID: //! SNOMED concepts which have been added to a patient record in a general practice system during the reporting period. -//! +//! //! Description: //! The fully specified name associated with the SNOMED_Concept_ID on the final day of the reporting period (31 July). -//! +//! //! Usage: //! The number of times that the SNOMED_Concept_ID was added into any patient record within the reporting period, rounded to the nearerst 10. Usage of 1 to 4 is displayed as *. SNOMED concepts with no code usage are not included. //! Important notes: //! - Data prior to 2019 was originally submitted mostly in READ V2 or CTV3, but in the usage files, these codes have been mapped to corresponding SNOMED codes using final 2020 version of the mapping tables published by NHS England. //! - The usage does not show how many patients had each code added to their record - each addition regardless of whether it is the same patient increments the count by 1. Therefore it is not possible to infer the number of individual patients with a particular code. //! - For the 2011-12 to 2017-18 data, it is stated that "Current maximum value is approximately 250,000,000" - no such maximum is stated for the 2018-19 onwards data. -//! +//! //! Active_at_Start: //! Active status of the SNOMED_Concept_ID on the first day of the reporting period. This is taken from the most recent UK clinical extension, or associated International extention, which was published up to the start of the reporting year (1 August). //! 1 = SNOMED concept was published and was active. //! 0 = SNOMED concept was either not yet available or was inactive. -//! +//! //! Active_at_End: //! Active status of the SNOMED_Concept_ID on the last day of the reporting period. This is taken from the most recent UK clinical extension, or associated International extention, which was published up to the end of the reporting year (31 July). //! 1 = SNOMED concept was published and was active. @@ -73,27 +73,27 @@ impl SnomedUsageData { /// # Returns /// Self or an error if the download fails pub async fn download_usage( - base_url: &str, - usage_year: UsageYear, -) -> Result { - let url = format!( - "{}/{}", - base_url.trim_end_matches('/'), - usage_year.path().trim_start_matches('/') - ); - - let response = reqwest::get(&url).await.map_err(CodeListBuilderError::from)?; - if !response.status().is_success() { - let status = response.status().to_string(); - let body = response.text().await.unwrap_or_default(); - return Err(CodeListBuilderError::http_error_code(status, body)); - } - let body = response.text().await.map_err(CodeListBuilderError::from)?; + base_url: &str, + usage_year: UsageYear, + ) -> Result { + let url = format!( + "{}/{}", + base_url.trim_end_matches('/'), + usage_year.path().trim_start_matches('/') + ); + + let response = reqwest::get(&url).await.map_err(CodeListBuilderError::from)?; + if !response.status().is_success() { + let status = response.status().to_string(); + let body = response.text().await.unwrap_or_default(); + return Err(CodeListBuilderError::http_error_code(status, body)); + } + let body = response.text().await.map_err(CodeListBuilderError::from)?; - let usage_data = Self::parse_from_string(&body)?; + let usage_data = Self::parse_from_string(&body)?; - Ok(SnomedUsageData { usage_data, usage_year }) -} + Ok(SnomedUsageData { usage_data, usage_year }) + } /// Parse snomed usage data from a string /// @@ -432,7 +432,8 @@ mod tests { .mount(&mock_server) .await; - let error = SnomedUsageData::download_usage(&mock_server.uri(), usage_year).await.unwrap_err(); + let error = + SnomedUsageData::download_usage(&mock_server.uri(), usage_year).await.unwrap_err(); let error_string = error.to_string(); assert_eq!(&error_string, "HTTP error code: 500 Internal Server Error: ");