From 32994571b3be50fbe854d447b9073fe135001936 Mon Sep 17 00:00:00 2001 From: Sergio Marti <44024844+sermars@users.noreply.github.com> Date: Wed, 14 May 2025 09:00:33 +0200 Subject: [PATCH 01/20] ai catalog template --- docs/ai_assessment_catalog/index.md | 48 ++++++++++++++++++++++++ mkdocs.yml | 58 +++++++++++++++-------------- 2 files changed, 78 insertions(+), 28 deletions(-) create mode 100644 docs/ai_assessment_catalog/index.md diff --git a/docs/ai_assessment_catalog/index.md b/docs/ai_assessment_catalog/index.md new file mode 100644 index 00000000..f7e07014 --- /dev/null +++ b/docs/ai_assessment_catalog/index.md @@ -0,0 +1,48 @@ +--- +icon: material/store-search-outline +title: AI Assessment Catalog +hide: + - toc +--- + + + +
+
+[_PAGE DESCRIPTION_] + +
+
+ + +
+
+ + +
+ +
+ +| Dataset | Super Node | TEF Node | Site | Data Model | Sampling Time | Historical | Owner | Get Access | +| ------- | ---------- | -------- | ---- | ---------- | ------------- | ---------- | ----- | ---------- | +| [Waste Container](./metadata_datasets/south_spain_valencia.md) | South | Spain | Valencia | [gitlab_vlci](https://gitlab.com/vlci-public/models-dades/wastemanagement/-/blob/main/WasteContainer/spec.md?ref_type=heads) | RealTime | From 2000 | València City Council | [Contact](https://valencia.opendatasoft.com/pages/home/) | diff --git a/mkdocs.yml b/mkdocs.yml index 02f61a70..b344389a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -43,15 +43,15 @@ nav: - MV Data Space: - dataspace/index.md - Trust Frameworks: - - dataspace/trust_frameworks/index.md - - Gaia-X: - - dataspace/trust_frameworks/gaiax_clearing_house/index.md - - iShare: - - dataspace/trust_frameworks/ishare/index.md - - Fiware Trust Anchor: - - dataspace/trust_frameworks/fiware_trust_anchor/index.md - - dataspace/trust_frameworks/fiware_trust_anchor/api_trusted_issuers_list.md - - dataspace/trust_frameworks/fiware_trust_anchor/api_trusted_issuers_registry.md + - dataspace/trust_frameworks/index.md + - Gaia-X: + - dataspace/trust_frameworks/gaiax_clearing_house/index.md + - iShare: + - dataspace/trust_frameworks/ishare/index.md + - Fiware Trust Anchor: + - dataspace/trust_frameworks/fiware_trust_anchor/index.md + - dataspace/trust_frameworks/fiware_trust_anchor/api_trusted_issuers_list.md + - dataspace/trust_frameworks/fiware_trust_anchor/api_trusted_issuers_registry.md - dataspace/vc_issuer.md - dataspace/connector.md - Guides: @@ -69,33 +69,35 @@ nav: - Data Space Connectors: - documentation/data_space_connectors/index.md - Eclipse: documentation/data_space_connectors/eclipse/index.md - - Fiware: + - Fiware: - documentation/data_space_connectors/fiware/index.md + - AI Assessment Catalog: + - ai_assessment_catalog/index.md - AI services: - services/index.md - Minimal Interoperable AI Service: services/waste_collection.md - Container Location Optimization: services/wastecontainer_location_optimization.md - TEF Nodes: - tef/index.md - - Nordic - POWER: - - tef/nordic_power/doll_living_lab.md - - tef/nordic_power/aarhus_city_lab.md - - tef/nordic_power/net_zero_innovation_hub.md - - tef/nordic_power/dti.md - - tef/nordic_power/center_denmark.md - - tef/nordic_power/gate21.md - - tef/nordic_power/rise.md - - tef/nordic_power/tampere.md + - Nordic - POWER: + - tef/nordic_power/doll_living_lab.md + - tef/nordic_power/aarhus_city_lab.md + - tef/nordic_power/net_zero_innovation_hub.md + - tef/nordic_power/dti.md + - tef/nordic_power/center_denmark.md + - tef/nordic_power/gate21.md + - tef/nordic_power/rise.md + - tef/nordic_power/tampere.md - Central - MOVE: - - tef/central_move/mechelen.md - - tef/central_move/brussels.md - - tef/central_move/eindhoven.md - - tef/central_move/paris.md - - tef/central_move/list.md + - tef/central_move/mechelen.md + - tef/central_move/brussels.md + - tef/central_move/eindhoven.md + - tef/central_move/paris.md + - tef/central_move/list.md - South - CONNECT: - - tef/south_connect/valencia.md - - tef/south_connect/milano.md - - tef/south_connect/warsaw.md + - tef/south_connect/valencia.md + - tef/south_connect/milano.md + - tef/south_connect/warsaw.md - Toolbox: - toolbox/index.md - AI Toolkit: toolbox/ai_toolkit.md @@ -147,7 +149,7 @@ plugins: extra: generator: false labels: - data_brokers: + data_brokers: fiware: 'Fiware' kafka: 'Kafka' iot_ticket: 'IoT-Ticket Azure' From 8fa6eeb60a834ee10fb384178a44bae598e94447 Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 12:06:19 +0100 Subject: [PATCH 02/20] Create citcom_label.md --- docs/toolbox/citcom_label.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 docs/toolbox/citcom_label.md diff --git a/docs/toolbox/citcom_label.md b/docs/toolbox/citcom_label.md new file mode 100644 index 00000000..8077c596 --- /dev/null +++ b/docs/toolbox/citcom_label.md @@ -0,0 +1,28 @@ +# Citcom Label + +The Citcom Label is an initiative currently under development within Citcom.ai. Its goal is to create a trusted, recognisable signal that helps AI providers demonstrate responsible practices and gives buyers—especially public-sector actors such as smart cities—a clearer basis for evaluating and procuring AI solutions. + +## What will the Citcom Label be? + +The label is envisioned as a **system of digital badges**, each representing a specific dimension of trustworthiness assessed during the evaluation process. +These badges would include a **watermark**, ensuring authenticity and preventing misuse. Each badge would be **verifiable through the Citcom Hub**, allowing external stakeholders to confirm its origin, evaluation status, and associated criteria. + +The Citcom badges are **not intended to function as legally binding conformity certificates under the AI Act**. Instead, they serve as **smart-city–oriented quality marks**, helping cities and other public authorities gain confidence in the AI solutions they consider adopting. + +For AI innovators, the Citcom badge system provides **independent third-party validation**, helping them promote their solutions and demonstrate that they meet recognised standards of trustworthiness. For cities and public buyers, the badges offer **clear, evidence-based guidance** to support more informed and transparent procurement decisions. + +## On what basis will the Citcom badges be awarded? + +The detailed criteria are still being developed with Citcom partners, but several guiding principles are emerging: + +### Completion of an evaluation +A badge is expected to be awarded only once a solution completes a structured assessment aligned with shared guidelines for the relevant dimension of trustworthiness. + +### Common methodology +Work is ongoing to define a coherent framework that determines how systems are qualified, how requirements translate into test cases, and how results are interpreted across different trust dimensions. + +### Success thresholds +Initial discussions point toward setting minimum quantitative and qualitative thresholds that vary by product type, maturity level, and the specific dimension being assessed. + +### Real-world validation +Evaluations are expected to rely on practical or pilot scenarios using the actual product, ensuring that results reflect real-world behaviour. From 3e3e289cac78aef7f9c08541249802d2b04c76f2 Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 13:08:15 +0100 Subject: [PATCH 03/20] Update citcom_label.md --- docs/toolbox/citcom_label.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/toolbox/citcom_label.md b/docs/toolbox/citcom_label.md index 8077c596..d6c32386 100644 --- a/docs/toolbox/citcom_label.md +++ b/docs/toolbox/citcom_label.md @@ -2,6 +2,7 @@ The Citcom Label is an initiative currently under development within Citcom.ai. Its goal is to create a trusted, recognisable signal that helps AI providers demonstrate responsible practices and gives buyers—especially public-sector actors such as smart cities—a clearer basis for evaluating and procuring AI solutions. + ## What will the Citcom Label be? The label is envisioned as a **system of digital badges**, each representing a specific dimension of trustworthiness assessed during the evaluation process. @@ -26,3 +27,26 @@ Initial discussions point toward setting minimum quantitative and qualitative th ### Real-world validation Evaluations are expected to rely on practical or pilot scenarios using the actual product, ensuring that results reflect real-world behaviour. + + +## Who will conduct the assessment and with which methodologies? + +The assessment behind each Citcom badge will be carried out by the participating TEF sites. Each site brings its own specialised methodologies, tools, and testing infrastructures, reflecting the diversity of technical expertise across the Citcom network. + +These assessment solutions cover different dimensions of trustworthiness and can be consulted through the **AI Assessment Catalogue**, available at the following link: + +**** + +The catalogue provides an overview of the available evaluation tools, test suites, and methodologies, enabling innovators to understand which capabilities are applied to their systems and helping cities see how specific trust dimensions are assessed. + +### Can an AI provider receive assessments across multiple TEF sites? + +Yes. If a solution would benefit from complementary expertise available across several TEF sites, an AI provider can undergo assessments in multiple locations. In such cases, the **first-contact TEF site** will coordinate the overall process. + +The coordinating TEF site will: +- connect with the additional TEF sites that carry out their assessments independently, +- ensure that each participating site manages its own contractual and operational responsibilities, +- consolidate the evaluation results into a unified report, +- and oversee the issuance of the Citcom badges corresponding to the dimensions assessed across all sites. + +This ensures a seamless experience for AI innovators while leveraging the full breadth of expertise across the TEF network. From f3387af1ce4648064ea784841bcc8bcae2a0d3d6 Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 13:09:43 +0100 Subject: [PATCH 04/20] Create citcom_label.md --- docs/ai_assessment_catalog/citcom_label.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/ai_assessment_catalog/citcom_label.md diff --git a/docs/ai_assessment_catalog/citcom_label.md b/docs/ai_assessment_catalog/citcom_label.md new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/docs/ai_assessment_catalog/citcom_label.md @@ -0,0 +1 @@ + From 82eb1737a4eaf252484357e05b74b3ea7885311b Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 13:10:00 +0100 Subject: [PATCH 05/20] Update citcom_label.md --- docs/ai_assessment_catalog/citcom_label.md | 51 ++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/docs/ai_assessment_catalog/citcom_label.md b/docs/ai_assessment_catalog/citcom_label.md index 8b137891..d6c32386 100644 --- a/docs/ai_assessment_catalog/citcom_label.md +++ b/docs/ai_assessment_catalog/citcom_label.md @@ -1 +1,52 @@ +# Citcom Label +The Citcom Label is an initiative currently under development within Citcom.ai. Its goal is to create a trusted, recognisable signal that helps AI providers demonstrate responsible practices and gives buyers—especially public-sector actors such as smart cities—a clearer basis for evaluating and procuring AI solutions. + + +## What will the Citcom Label be? + +The label is envisioned as a **system of digital badges**, each representing a specific dimension of trustworthiness assessed during the evaluation process. +These badges would include a **watermark**, ensuring authenticity and preventing misuse. Each badge would be **verifiable through the Citcom Hub**, allowing external stakeholders to confirm its origin, evaluation status, and associated criteria. + +The Citcom badges are **not intended to function as legally binding conformity certificates under the AI Act**. Instead, they serve as **smart-city–oriented quality marks**, helping cities and other public authorities gain confidence in the AI solutions they consider adopting. + +For AI innovators, the Citcom badge system provides **independent third-party validation**, helping them promote their solutions and demonstrate that they meet recognised standards of trustworthiness. For cities and public buyers, the badges offer **clear, evidence-based guidance** to support more informed and transparent procurement decisions. + +## On what basis will the Citcom badges be awarded? + +The detailed criteria are still being developed with Citcom partners, but several guiding principles are emerging: + +### Completion of an evaluation +A badge is expected to be awarded only once a solution completes a structured assessment aligned with shared guidelines for the relevant dimension of trustworthiness. + +### Common methodology +Work is ongoing to define a coherent framework that determines how systems are qualified, how requirements translate into test cases, and how results are interpreted across different trust dimensions. + +### Success thresholds +Initial discussions point toward setting minimum quantitative and qualitative thresholds that vary by product type, maturity level, and the specific dimension being assessed. + +### Real-world validation +Evaluations are expected to rely on practical or pilot scenarios using the actual product, ensuring that results reflect real-world behaviour. + + +## Who will conduct the assessment and with which methodologies? + +The assessment behind each Citcom badge will be carried out by the participating TEF sites. Each site brings its own specialised methodologies, tools, and testing infrastructures, reflecting the diversity of technical expertise across the Citcom network. + +These assessment solutions cover different dimensions of trustworthiness and can be consulted through the **AI Assessment Catalogue**, available at the following link: + +**** + +The catalogue provides an overview of the available evaluation tools, test suites, and methodologies, enabling innovators to understand which capabilities are applied to their systems and helping cities see how specific trust dimensions are assessed. + +### Can an AI provider receive assessments across multiple TEF sites? + +Yes. If a solution would benefit from complementary expertise available across several TEF sites, an AI provider can undergo assessments in multiple locations. In such cases, the **first-contact TEF site** will coordinate the overall process. + +The coordinating TEF site will: +- connect with the additional TEF sites that carry out their assessments independently, +- ensure that each participating site manages its own contractual and operational responsibilities, +- consolidate the evaluation results into a unified report, +- and oversee the issuance of the Citcom badges corresponding to the dimensions assessed across all sites. + +This ensures a seamless experience for AI innovators while leveraging the full breadth of expertise across the TEF network. From 85c24da335993b43d10d57f3827ccfc412c89506 Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 13:12:15 +0100 Subject: [PATCH 06/20] Update index.md --- docs/ai_assessment_catalog/index.md | 100 +++++++++++++++------------- 1 file changed, 52 insertions(+), 48 deletions(-) diff --git a/docs/ai_assessment_catalog/index.md b/docs/ai_assessment_catalog/index.md index f7e07014..d6c32386 100644 --- a/docs/ai_assessment_catalog/index.md +++ b/docs/ai_assessment_catalog/index.md @@ -1,48 +1,52 @@ ---- -icon: material/store-search-outline -title: AI Assessment Catalog -hide: - - toc ---- - - - -
-
-[_PAGE DESCRIPTION_] - -
-
- - -
-
- - -
- -
- -| Dataset | Super Node | TEF Node | Site | Data Model | Sampling Time | Historical | Owner | Get Access | -| ------- | ---------- | -------- | ---- | ---------- | ------------- | ---------- | ----- | ---------- | -| [Waste Container](./metadata_datasets/south_spain_valencia.md) | South | Spain | Valencia | [gitlab_vlci](https://gitlab.com/vlci-public/models-dades/wastemanagement/-/blob/main/WasteContainer/spec.md?ref_type=heads) | RealTime | From 2000 | València City Council | [Contact](https://valencia.opendatasoft.com/pages/home/) | +# Citcom Label + +The Citcom Label is an initiative currently under development within Citcom.ai. Its goal is to create a trusted, recognisable signal that helps AI providers demonstrate responsible practices and gives buyers—especially public-sector actors such as smart cities—a clearer basis for evaluating and procuring AI solutions. + + +## What will the Citcom Label be? + +The label is envisioned as a **system of digital badges**, each representing a specific dimension of trustworthiness assessed during the evaluation process. +These badges would include a **watermark**, ensuring authenticity and preventing misuse. Each badge would be **verifiable through the Citcom Hub**, allowing external stakeholders to confirm its origin, evaluation status, and associated criteria. + +The Citcom badges are **not intended to function as legally binding conformity certificates under the AI Act**. Instead, they serve as **smart-city–oriented quality marks**, helping cities and other public authorities gain confidence in the AI solutions they consider adopting. + +For AI innovators, the Citcom badge system provides **independent third-party validation**, helping them promote their solutions and demonstrate that they meet recognised standards of trustworthiness. For cities and public buyers, the badges offer **clear, evidence-based guidance** to support more informed and transparent procurement decisions. + +## On what basis will the Citcom badges be awarded? + +The detailed criteria are still being developed with Citcom partners, but several guiding principles are emerging: + +### Completion of an evaluation +A badge is expected to be awarded only once a solution completes a structured assessment aligned with shared guidelines for the relevant dimension of trustworthiness. + +### Common methodology +Work is ongoing to define a coherent framework that determines how systems are qualified, how requirements translate into test cases, and how results are interpreted across different trust dimensions. + +### Success thresholds +Initial discussions point toward setting minimum quantitative and qualitative thresholds that vary by product type, maturity level, and the specific dimension being assessed. + +### Real-world validation +Evaluations are expected to rely on practical or pilot scenarios using the actual product, ensuring that results reflect real-world behaviour. + + +## Who will conduct the assessment and with which methodologies? + +The assessment behind each Citcom badge will be carried out by the participating TEF sites. Each site brings its own specialised methodologies, tools, and testing infrastructures, reflecting the diversity of technical expertise across the Citcom network. + +These assessment solutions cover different dimensions of trustworthiness and can be consulted through the **AI Assessment Catalogue**, available at the following link: + +**** + +The catalogue provides an overview of the available evaluation tools, test suites, and methodologies, enabling innovators to understand which capabilities are applied to their systems and helping cities see how specific trust dimensions are assessed. + +### Can an AI provider receive assessments across multiple TEF sites? + +Yes. If a solution would benefit from complementary expertise available across several TEF sites, an AI provider can undergo assessments in multiple locations. In such cases, the **first-contact TEF site** will coordinate the overall process. + +The coordinating TEF site will: +- connect with the additional TEF sites that carry out their assessments independently, +- ensure that each participating site manages its own contractual and operational responsibilities, +- consolidate the evaluation results into a unified report, +- and oversee the issuance of the Citcom badges corresponding to the dimensions assessed across all sites. + +This ensures a seamless experience for AI innovators while leveraging the full breadth of expertise across the TEF network. From 728326632f17bef74be35f5e6cde1b8304920c3f Mon Sep 17 00:00:00 2001 From: alessio0208 Date: Thu, 27 Nov 2025 13:41:59 +0100 Subject: [PATCH 07/20] added citcom label and ai_assessment_catalogue --- docs/ai_assessment_catalog/citcom_label.md | 52 ------------------- docs/citcom_label/ai_assessment_catalogue.md | 50 ++++++++++++++++++ .../index.md | 2 +- docs/data_catalog/index.md | 36 ++++++------- 4 files changed, 66 insertions(+), 74 deletions(-) delete mode 100644 docs/ai_assessment_catalog/citcom_label.md create mode 100644 docs/citcom_label/ai_assessment_catalogue.md rename docs/{ai_assessment_catalog => citcom_label}/index.md (98%) diff --git a/docs/ai_assessment_catalog/citcom_label.md b/docs/ai_assessment_catalog/citcom_label.md deleted file mode 100644 index d6c32386..00000000 --- a/docs/ai_assessment_catalog/citcom_label.md +++ /dev/null @@ -1,52 +0,0 @@ -# Citcom Label - -The Citcom Label is an initiative currently under development within Citcom.ai. Its goal is to create a trusted, recognisable signal that helps AI providers demonstrate responsible practices and gives buyers—especially public-sector actors such as smart cities—a clearer basis for evaluating and procuring AI solutions. - - -## What will the Citcom Label be? - -The label is envisioned as a **system of digital badges**, each representing a specific dimension of trustworthiness assessed during the evaluation process. -These badges would include a **watermark**, ensuring authenticity and preventing misuse. Each badge would be **verifiable through the Citcom Hub**, allowing external stakeholders to confirm its origin, evaluation status, and associated criteria. - -The Citcom badges are **not intended to function as legally binding conformity certificates under the AI Act**. Instead, they serve as **smart-city–oriented quality marks**, helping cities and other public authorities gain confidence in the AI solutions they consider adopting. - -For AI innovators, the Citcom badge system provides **independent third-party validation**, helping them promote their solutions and demonstrate that they meet recognised standards of trustworthiness. For cities and public buyers, the badges offer **clear, evidence-based guidance** to support more informed and transparent procurement decisions. - -## On what basis will the Citcom badges be awarded? - -The detailed criteria are still being developed with Citcom partners, but several guiding principles are emerging: - -### Completion of an evaluation -A badge is expected to be awarded only once a solution completes a structured assessment aligned with shared guidelines for the relevant dimension of trustworthiness. - -### Common methodology -Work is ongoing to define a coherent framework that determines how systems are qualified, how requirements translate into test cases, and how results are interpreted across different trust dimensions. - -### Success thresholds -Initial discussions point toward setting minimum quantitative and qualitative thresholds that vary by product type, maturity level, and the specific dimension being assessed. - -### Real-world validation -Evaluations are expected to rely on practical or pilot scenarios using the actual product, ensuring that results reflect real-world behaviour. - - -## Who will conduct the assessment and with which methodologies? - -The assessment behind each Citcom badge will be carried out by the participating TEF sites. Each site brings its own specialised methodologies, tools, and testing infrastructures, reflecting the diversity of technical expertise across the Citcom network. - -These assessment solutions cover different dimensions of trustworthiness and can be consulted through the **AI Assessment Catalogue**, available at the following link: - -**** - -The catalogue provides an overview of the available evaluation tools, test suites, and methodologies, enabling innovators to understand which capabilities are applied to their systems and helping cities see how specific trust dimensions are assessed. - -### Can an AI provider receive assessments across multiple TEF sites? - -Yes. If a solution would benefit from complementary expertise available across several TEF sites, an AI provider can undergo assessments in multiple locations. In such cases, the **first-contact TEF site** will coordinate the overall process. - -The coordinating TEF site will: -- connect with the additional TEF sites that carry out their assessments independently, -- ensure that each participating site manages its own contractual and operational responsibilities, -- consolidate the evaluation results into a unified report, -- and oversee the issuance of the Citcom badges corresponding to the dimensions assessed across all sites. - -This ensures a seamless experience for AI innovators while leveraging the full breadth of expertise across the TEF network. diff --git a/docs/citcom_label/ai_assessment_catalogue.md b/docs/citcom_label/ai_assessment_catalogue.md new file mode 100644 index 00000000..37304163 --- /dev/null +++ b/docs/citcom_label/ai_assessment_catalogue.md @@ -0,0 +1,50 @@ +--- +icon: material/robot-search-outline +title: AI Assessment Catalogue +hide: + - toc +--- + + + +
+
+The AI Assessment Catalogue is a centralized hub showcasing the evaluation tools, testing frameworks, and assessment solutions available across the Citcom.ai TEF network. +It is regularly updated as new methodologies and tools become available at each TEF site. +If you would like to request an assessment or learn more about a tool, please contact the relevant TEF sites. + + +
+
+ + +
+ +
+ +| Solution Name | Provider | Licensing Type | Project Phase / TRL | Domain of Application | AI Risk Category | Ethical Dimensions | Security & Securitization of Data | Example of Use Case | Resources | +|---------------|----------|----------------|----------------------|------------------------|------------------|--------------------|-----------------------------------|----------------------|-----------| +| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | GitHub: , Paper: | +| **MLA-BiTe** | LIST | To be open sourced | TRL 6–8 | LLM bias testing | General Purpose AI | Fairness, Robustness | No data privacy requirements | A city plans to evaluate fairness in its citizen-facing chatbot. MLA-BiTe allows non-technical staff to create local scenario-based prompts to uncover discriminatory behaviour across sensitive categories, supporting multiple languages and augmentations. | — | +| **Legal KG-RAG** | LIST | Proprietary | TRL 5–7 | LLM factuality accuracy testing | General Purpose AI | Transparency, Explainability, Robustness | Depends on whether the RAG is performed on sensitive data | A city using a standard RAG pipeline obtains irrelevant results. Legal KG-RAG rebuilds the legal corpus as a Neo4j knowledge graph, enabling direct comparison between traditional and KG-enhanced retrieval. | — | +| **MLA-Reject** | LIST | To be open sourced | TRL 6–8 | LLM robustness to jailbreaking | General Purpose AI | Robustness | Depends on whether the system has access to sensitive data | A public administration operates a multilingual assistant for internal queries. They want to test robustness against unsafe or misleading prompts. MLA-Reject generates difficult negative prompts to test refusal behaviour and safety guardrails, revealing weaknesses and improving configurations. | — | diff --git a/docs/ai_assessment_catalog/index.md b/docs/citcom_label/index.md similarity index 98% rename from docs/ai_assessment_catalog/index.md rename to docs/citcom_label/index.md index d6c32386..7f6818b3 100644 --- a/docs/ai_assessment_catalog/index.md +++ b/docs/citcom_label/index.md @@ -35,7 +35,7 @@ The assessment behind each Citcom badge will be carried out by the participating These assessment solutions cover different dimensions of trustworthiness and can be consulted through the **AI Assessment Catalogue**, available at the following link: -**** +[AI Assessment Catalogue](ai_assessment_catalogue.md) The catalogue provides an overview of the available evaluation tools, test suites, and methodologies, enabling innovators to understand which capabilities are applied to their systems and helping cities see how specific trust dimensions are assessed. diff --git a/docs/data_catalog/index.md b/docs/data_catalog/index.md index d6fd4bd3..bc2f2611 100644 --- a/docs/data_catalog/index.md +++ b/docs/data_catalog/index.md @@ -1,12 +1,12 @@ --- -icon: material/store-search-outline -title: Data Catalog +icon: material/robot-search-outline +title: AI Assessment Catalogue hide: - toc --- -
-The data catalog is a centralized hub to keep track of available datasets. It is regularly updated to include new data as it becomes available in any TEF node. If you want access to any dataset, please click "Contact" to reach the owners. +The AI Assessment Catalogue is a centralized hub showcasing the evaluation tools, testing frameworks, and assessment solutions available across the TEF network. +It is regularly updated as new methodologies and tools become available at each TEF site. +If you would like to request an assessment or learn more about a tool, please click **Contact** or open the linked resources. -!!! question "[How to add new datasets?](./instructions.md)" +!!! question "[How to add new assessment solutions?](./instructions.md)"
-[:simple-github: Add New Datasets ](https://github.com/CitComAI-Hub/CitComAI-Hub.github.io/issues/new/choose){:target="_blank" .md-button .md-button--primary-light } +[:simple-github: Add New Assessment Solution](https://github.com/CitComAI-Hub/CitComAI-Hub.github.io/issues/new/choose){:target="_blank" .md-button .md-button--primary-light }
@@ -35,7 +36,7 @@ The data catalog is a centralized hub to keep track of available datasets. It is - +
-| Dataset | Super Node | TEF Node | Site | Data Model | Sampling Time | Historical | Owner | Get Access | -| ------- | ---------- | -------- | ---- | ---------- | ------------- | ---------- | ----- | ---------- | -| [Waste Container](./metadata_datasets/south_spain_valencia.md) | South | Spain | Valencia | [gitlab_vlci](https://gitlab.com/vlci-public/models-dades/wastemanagement/-/blob/main/WasteContainer/spec.md?ref_type=heads) | RealTime | From 2000 | València City Council | [Contact](https://valencia.opendatasoft.com/pages/home/) | -| [Weather Forecast](./metadata_datasets/south_spain_valencia.md) | South | Spain | Valencia | [gitlab_vlci](https://gitlab.com/vlci-public/models-dades/weather/blob/main/WeatherForecast/spec.md) | Daily | From 2010 | València City Council | [Contact](https://valencia.opendatasoft.com/pages/home/) | -| [Bikeparking stands in Aarhus City](./metadata_datasets/nordic_citcom_gtm.md) | Nordic | Denmark | GTM | *no specific value* | Ongoing | No | Aarhus Municipality | [City of Aarhus](https://www.opendata.dk/city-of-aarhus/cykelparkering_aarhus) | -| [Bike terminals in Aarhus, air and tools](./metadata_datasets/nordic_citcom_gtm.md) | Nordic | Denmark | GTM | *no specific value* | Ongoing | No | Aarhus Municipality | [Cykelterminal - Dataset](https://www.opendata.dk/city-of-aarhus/cykelterminal) | -| [Citybike locations in Aarhus](./metadata_datasets/nordic_citcom_gtm.md) | Nordic | Denmark | GTM | *no specific value* | *no specific value* | No | Aarhus Municipality | [Aarhus Bycykel - Dataset](https://www.opendata.dk/city-of-aarhus/aarhus-bycykel) | -| [Fast track bikeroutes in Aarhus](./metadata_datasets/nordic_citcom_gtm.md) | Nordic | Denmark | GTM | *no specific value* | Ongoing | No | Aarhus Municipality | [Supercykelsti i Aarhus Kommune - Dataset](https://www.opendata.dk/city-of-aarhus/supercykelsti) | -| [recreative bikeroutes in Aarhus](./metadata_datasets/nordic_citcom_gtm.md) | Nordic | Denmark | GTM | *no specific value* | Ongoing | No | Aarhus Municipality | [Rekreative cykelruter - Dataset](https://www.opendata.dk/city-of-aarhus/rekreative-cykelruter) | -| [AirQuality](./metadata_datasets/south_italy_uptown.md) | South | Italy | UpTown | *no specific value* | 30s | Yes | Politecnico di Milano | [neslab.it](https://www.neslab.it) | -| [Biodiversity](./metadata_datasets/south_italy_uptown.md) | South | Italy | UpTown | *no specific value* | 15m | Yes | Politecnico di Milano | [neslab.it](https://www.neslab.it) | -| [Energy distribution](./metadata_datasets/south_italy_uptown.md) | South | Italy | UpTown | *no specific value* | Hourly | Yes | A2A | [neslab.it](https://www.neslab.it) | -| [Archaeological Site](./metadata_datasets/south_italy_mithraeum-of-circus-maximus.md) | South | Italy | Mithraeum of Circus Maximus | *no specific value* | Real-time | Yes | Politecnico di Milano | [neslab.it](https://www.neslab.it) | +| Solution Name | Provider | Licensing Type | Project Phase / TRL | Domain of Application | AI Risk Category | Ethical Dimensions | Security & Securitization of Data | Example of Use Case | Resources | +|---------------|----------|----------------|----------------------|------------------------|------------------|--------------------|-----------------------------------|----------------------|-----------| +| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | GitHub: , Paper: | +| **MLA-BiTe** | LIST | To be open sourced | TRL 6–8 | LLM bias testing | General Purpose AI | Fairness, Robustness | No data privacy requirements | A city plans to evaluate fairness in its citizen-facing chatbot. MLA-BiTe allows non-technical staff to create local scenario-based prompts to uncover discriminatory behaviour across sensitive categories, supporting multiple languages and augmentations. | — | +| **Legal KG-RAG** | LIST | Proprietary | TRL 5–7 | LLM factuality accuracy testing | General Purpose AI | Transparency, Explainability, Robustness | Depends on whether the RAG is performed on sensitive data | A city using a standard RAG pipeline obtains irrelevant results. Legal KG-RAG rebuilds the legal corpus as a Neo4j knowledge graph, enabling direct comparison between traditional and KG-enhanced retrieval. | — | +| **MLA-Reject** | LIST | To be open sourced | TRL 6–8 | LLM robustness to jailbreaking | General Purpose AI | Robustness | Depends on whether the system has access to sensitive data | A public administration operates a multilingual assistant for internal queries. They want to test robustness against unsafe or misleading prompts. MLA-Reject generates difficult negative prompts to test refusal behaviour and safety guardrails, revealing weaknesses and improving configurations. | — | From 29e9575e9b8e68c2a1f2ad118333808bff00d980 Mon Sep 17 00:00:00 2001 From: alessio0208 Date: Thu, 27 Nov 2025 13:59:40 +0100 Subject: [PATCH 08/20] added citcom label and ai_assessment_catalogue --- docs/citcom_label/ai_assessment_catalogue.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/citcom_label/ai_assessment_catalogue.md b/docs/citcom_label/ai_assessment_catalogue.md index 37304163..de284aab 100644 --- a/docs/citcom_label/ai_assessment_catalogue.md +++ b/docs/citcom_label/ai_assessment_catalogue.md @@ -20,7 +20,7 @@ hide:
-The AI Assessment Catalogue is a centralized hub showcasing the evaluation tools, testing frameworks, and assessment solutions available across the Citcom.ai TEF network. +The AI Assessment Catalogue showcases the evaluation tools, testing frameworks, and assessment solutions available across the Citcom.ai TEF network. It is regularly updated as new methodologies and tools become available at each TEF site. If you would like to request an assessment or learn more about a tool, please contact the relevant TEF sites. @@ -42,6 +42,20 @@ If you would like to request an assessment or learn more about a tool, please co
+ + | Solution Name | Provider | Licensing Type | Project Phase / TRL | Domain of Application | AI Risk Category | Ethical Dimensions | Security & Securitization of Data | Example of Use Case | Resources | |---------------|----------|----------------|----------------------|------------------------|------------------|--------------------|-----------------------------------|----------------------|-----------| | **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | GitHub: , Paper: | From 11f64d0735a43199ec8cba5e13f57be0d9843b69 Mon Sep 17 00:00:00 2001 From: alessio0208 Date: Thu, 27 Nov 2025 14:01:19 +0100 Subject: [PATCH 09/20] added citcom label and ai_assessment_catalogue --- docs/citcom_label/ai_assessment_catalogue.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/citcom_label/ai_assessment_catalogue.md b/docs/citcom_label/ai_assessment_catalogue.md index de284aab..2c8cd2b1 100644 --- a/docs/citcom_label/ai_assessment_catalogue.md +++ b/docs/citcom_label/ai_assessment_catalogue.md @@ -43,19 +43,28 @@ If you would like to request an assessment or learn more about a tool, please co
+ | Solution Name | Provider | Licensing Type | Project Phase / TRL | Domain of Application | AI Risk Category | Ethical Dimensions | Security & Securitization of Data | Example of Use Case | Resources | |---------------|----------|----------------|----------------------|------------------------|------------------|--------------------|-----------------------------------|----------------------|-----------| | **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | GitHub: , Paper: | From 640f564063d8b08fe17b9334725cb984c56bf7ee Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 14:02:23 +0100 Subject: [PATCH 10/20] Update ai_assessment_catalogue.md --- docs/citcom_label/ai_assessment_catalogue.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/citcom_label/ai_assessment_catalogue.md b/docs/citcom_label/ai_assessment_catalogue.md index 2c8cd2b1..8cf0c44e 100644 --- a/docs/citcom_label/ai_assessment_catalogue.md +++ b/docs/citcom_label/ai_assessment_catalogue.md @@ -65,9 +65,9 @@ If you would like to request an assessment or learn more about a tool, please co -| Solution Name | Provider | Licensing Type | Project Phase / TRL | Domain of Application | AI Risk Category | Ethical Dimensions | Security & Securitization of Data | Example of Use Case | Resources | -|---------------|----------|----------------|----------------------|------------------------|------------------|--------------------|-----------------------------------|----------------------|-----------| -| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | GitHub: , Paper: | -| **MLA-BiTe** | LIST | To be open sourced | TRL 6–8 | LLM bias testing | General Purpose AI | Fairness, Robustness | No data privacy requirements | A city plans to evaluate fairness in its citizen-facing chatbot. MLA-BiTe allows non-technical staff to create local scenario-based prompts to uncover discriminatory behaviour across sensitive categories, supporting multiple languages and augmentations. | — | -| **Legal KG-RAG** | LIST | Proprietary | TRL 5–7 | LLM factuality accuracy testing | General Purpose AI | Transparency, Explainability, Robustness | Depends on whether the RAG is performed on sensitive data | A city using a standard RAG pipeline obtains irrelevant results. Legal KG-RAG rebuilds the legal corpus as a Neo4j knowledge graph, enabling direct comparison between traditional and KG-enhanced retrieval. | — | -| **MLA-Reject** | LIST | To be open sourced | TRL 6–8 | LLM robustness to jailbreaking | General Purpose AI | Robustness | Depends on whether the system has access to sensitive data | A public administration operates a multilingual assistant for internal queries. They want to test robustness against unsafe or misleading prompts. MLA-Reject generates difficult negative prompts to test refusal behaviour and safety guardrails, revealing weaknesses and improving configurations. | — | +| Solution Name | Provider | Licensing Type | Project Phase / TRL | Domain of Application | AI Risk Category | Ethical Dimensions | Security & Securitization of Data | Resources | Example of Use Case | +|---------------|----------|----------------|----------------------|------------------------|------------------|--------------------|-----------------------------------|-----------|----------------------| +| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | GitHub: , Paper: | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | +| **MLA-BiTe** | LIST | To be open sourced | TRL 6–8 | LLM bias testing | General Purpose AI | Fairness, Robustness | No data privacy requirements | — | A city plans to evaluate fairness in its citizen-facing chatbot. MLA-BiTe allows non-technical staff to create local scenario-based prompts to uncover discriminatory behaviour across sensitive categories, supporting multiple languages and augmentations. | +| **Legal KG-RAG** | LIST | Proprietary | TRL 5–7 | LLM factuality accuracy testing | General Purpose AI | Transparency, Explainability, Robustness | Depends on whether the RAG is performed on sensitive data | — | A city using a standard RAG pipeline obtains irrelevant results. Legal KG-RAG rebuilds the legal corpus as a Neo4j knowledge graph, enabling direct comparison between traditional and KG-enhanced retrieval. | +| **MLA-Reject** | LIST | To be open sourced | TRL 6–8 | LLM robustness to jailbreaking | General Purpose AI | Robustness | Depends on whether the system has access to sensitive data | — | A public administration operates a multilingual assistant for internal queries. They want to test robustness against unsafe or misleading prompts. MLA-Reject generates difficult negative prompts to test refusal behaviour and safety guardrails, revealing weaknesses and improving configurations. | From e6410cedbd30622ecfe746bebeeb587f75eadc12 Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 14:02:55 +0100 Subject: [PATCH 11/20] Update ai_assessment_catalogue.md --- docs/citcom_label/ai_assessment_catalogue.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/citcom_label/ai_assessment_catalogue.md b/docs/citcom_label/ai_assessment_catalogue.md index 8cf0c44e..031f5389 100644 --- a/docs/citcom_label/ai_assessment_catalogue.md +++ b/docs/citcom_label/ai_assessment_catalogue.md @@ -67,7 +67,7 @@ If you would like to request an assessment or learn more about a tool, please co | Solution Name | Provider | Licensing Type | Project Phase / TRL | Domain of Application | AI Risk Category | Ethical Dimensions | Security & Securitization of Data | Resources | Example of Use Case | |---------------|----------|----------------|----------------------|------------------------|------------------|--------------------|-----------------------------------|-----------|----------------------| -| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | GitHub: , Paper: | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | +| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | GitHub: , Paper: | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | | **MLA-BiTe** | LIST | To be open sourced | TRL 6–8 | LLM bias testing | General Purpose AI | Fairness, Robustness | No data privacy requirements | — | A city plans to evaluate fairness in its citizen-facing chatbot. MLA-BiTe allows non-technical staff to create local scenario-based prompts to uncover discriminatory behaviour across sensitive categories, supporting multiple languages and augmentations. | | **Legal KG-RAG** | LIST | Proprietary | TRL 5–7 | LLM factuality accuracy testing | General Purpose AI | Transparency, Explainability, Robustness | Depends on whether the RAG is performed on sensitive data | — | A city using a standard RAG pipeline obtains irrelevant results. Legal KG-RAG rebuilds the legal corpus as a Neo4j knowledge graph, enabling direct comparison between traditional and KG-enhanced retrieval. | | **MLA-Reject** | LIST | To be open sourced | TRL 6–8 | LLM robustness to jailbreaking | General Purpose AI | Robustness | Depends on whether the system has access to sensitive data | — | A public administration operates a multilingual assistant for internal queries. They want to test robustness against unsafe or misleading prompts. MLA-Reject generates difficult negative prompts to test refusal behaviour and safety guardrails, revealing weaknesses and improving configurations. | From 7ae60420ad4daf08c6c4824b533a08687a8ba6cf Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 14:04:45 +0100 Subject: [PATCH 12/20] Update ai_assessment_catalogue.md --- docs/citcom_label/ai_assessment_catalogue.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/citcom_label/ai_assessment_catalogue.md b/docs/citcom_label/ai_assessment_catalogue.md index 031f5389..71d178de 100644 --- a/docs/citcom_label/ai_assessment_catalogue.md +++ b/docs/citcom_label/ai_assessment_catalogue.md @@ -47,19 +47,19 @@ If you would like to request an assessment or learn more about a tool, please co /* Make ALL columns narrow by default */ .md-typeset table:not(.no-format) th, .md-typeset table:not(.no-format) td { - width: 80px; + width: 60px; } -/* Make the Example of Use Case column (9th) wide */ +/* Make the Resources column (9th) small */ .md-typeset table:not(.no-format) th:nth-child(9), .md-typeset table:not(.no-format) td:nth-child(9) { - width: 450px; /* adjust as you want */ + width: 100px; } -/* Force the last column (10th: Resources) to be small */ +/* Make the last column (10th: Example of Use Case) wide */ .md-typeset table:not(.no-format) th:nth-child(10), .md-typeset table:not(.no-format) td:nth-child(10) { - width: 120px; /* prevents it from expanding */ + width: 500px; } From d4028048cc90e22c909d9614f9d60ba9fabc2e8d Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 14:05:59 +0100 Subject: [PATCH 13/20] Update ai_assessment_catalogue.md --- docs/citcom_label/ai_assessment_catalogue.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/citcom_label/ai_assessment_catalogue.md b/docs/citcom_label/ai_assessment_catalogue.md index 71d178de..27ec93fb 100644 --- a/docs/citcom_label/ai_assessment_catalogue.md +++ b/docs/citcom_label/ai_assessment_catalogue.md @@ -67,7 +67,7 @@ If you would like to request an assessment or learn more about a tool, please co | Solution Name | Provider | Licensing Type | Project Phase / TRL | Domain of Application | AI Risk Category | Ethical Dimensions | Security & Securitization of Data | Resources | Example of Use Case | |---------------|----------|----------------|----------------------|------------------------|------------------|--------------------|-----------------------------------|-----------|----------------------| -| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | GitHub: , Paper: | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | +| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | GitHub: , Paper: "FAIRGAME: A Framework for AI Agents Bias Recognition Using Game Theory", Frontiers in AI and Applications, Vol. 413: ECAI 2025| A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | | **MLA-BiTe** | LIST | To be open sourced | TRL 6–8 | LLM bias testing | General Purpose AI | Fairness, Robustness | No data privacy requirements | — | A city plans to evaluate fairness in its citizen-facing chatbot. MLA-BiTe allows non-technical staff to create local scenario-based prompts to uncover discriminatory behaviour across sensitive categories, supporting multiple languages and augmentations. | | **Legal KG-RAG** | LIST | Proprietary | TRL 5–7 | LLM factuality accuracy testing | General Purpose AI | Transparency, Explainability, Robustness | Depends on whether the RAG is performed on sensitive data | — | A city using a standard RAG pipeline obtains irrelevant results. Legal KG-RAG rebuilds the legal corpus as a Neo4j knowledge graph, enabling direct comparison between traditional and KG-enhanced retrieval. | | **MLA-Reject** | LIST | To be open sourced | TRL 6–8 | LLM robustness to jailbreaking | General Purpose AI | Robustness | Depends on whether the system has access to sensitive data | — | A public administration operates a multilingual assistant for internal queries. They want to test robustness against unsafe or misleading prompts. MLA-Reject generates difficult negative prompts to test refusal behaviour and safety guardrails, revealing weaknesses and improving configurations. | From 340252bd165cee132689a8fe1a91428ecb7c2822 Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 14:12:37 +0100 Subject: [PATCH 14/20] Update index.md --- docs/data_catalog/index.md | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/docs/data_catalog/index.md b/docs/data_catalog/index.md index bc2f2611..d6fd4bd3 100644 --- a/docs/data_catalog/index.md +++ b/docs/data_catalog/index.md @@ -1,12 +1,12 @@ --- -icon: material/robot-search-outline -title: AI Assessment Catalogue +icon: material/store-search-outline +title: Data Catalog hide: - toc --- +
-The AI Assessment Catalogue is a centralized hub showcasing the evaluation tools, testing frameworks, and assessment solutions available across the TEF network. -It is regularly updated as new methodologies and tools become available at each TEF site. -If you would like to request an assessment or learn more about a tool, please click **Contact** or open the linked resources. +The data catalog is a centralized hub to keep track of available datasets. It is regularly updated to include new data as it becomes available in any TEF node. If you want access to any dataset, please click "Contact" to reach the owners. -!!! question "[How to add new assessment solutions?](./instructions.md)" +!!! question "[How to add new datasets?](./instructions.md)"
-[:simple-github: Add New Assessment Solution](https://github.com/CitComAI-Hub/CitComAI-Hub.github.io/issues/new/choose){:target="_blank" .md-button .md-button--primary-light } +[:simple-github: Add New Datasets ](https://github.com/CitComAI-Hub/CitComAI-Hub.github.io/issues/new/choose){:target="_blank" .md-button .md-button--primary-light }
@@ -36,7 +35,7 @@ If you would like to request an assessment or learn more about a tool, please cl - + -| Solution Name | Provider | Licensing Type | Project Phase / TRL | Domain of Application | AI Risk Category | Ethical Dimensions | Security & Securitization of Data | Example of Use Case | Resources | -|---------------|----------|----------------|----------------------|------------------------|------------------|--------------------|-----------------------------------|----------------------|-----------| -| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | GitHub: , Paper: | -| **MLA-BiTe** | LIST | To be open sourced | TRL 6–8 | LLM bias testing | General Purpose AI | Fairness, Robustness | No data privacy requirements | A city plans to evaluate fairness in its citizen-facing chatbot. MLA-BiTe allows non-technical staff to create local scenario-based prompts to uncover discriminatory behaviour across sensitive categories, supporting multiple languages and augmentations. | — | -| **Legal KG-RAG** | LIST | Proprietary | TRL 5–7 | LLM factuality accuracy testing | General Purpose AI | Transparency, Explainability, Robustness | Depends on whether the RAG is performed on sensitive data | A city using a standard RAG pipeline obtains irrelevant results. Legal KG-RAG rebuilds the legal corpus as a Neo4j knowledge graph, enabling direct comparison between traditional and KG-enhanced retrieval. | — | -| **MLA-Reject** | LIST | To be open sourced | TRL 6–8 | LLM robustness to jailbreaking | General Purpose AI | Robustness | Depends on whether the system has access to sensitive data | A public administration operates a multilingual assistant for internal queries. They want to test robustness against unsafe or misleading prompts. MLA-Reject generates difficult negative prompts to test refusal behaviour and safety guardrails, revealing weaknesses and improving configurations. | — | +| Dataset | Super Node | TEF Node | Site | Data Model | Sampling Time | Historical | Owner | Get Access | +| ------- | ---------- | -------- | ---- | ---------- | ------------- | ---------- | ----- | ---------- | +| [Waste Container](./metadata_datasets/south_spain_valencia.md) | South | Spain | Valencia | [gitlab_vlci](https://gitlab.com/vlci-public/models-dades/wastemanagement/-/blob/main/WasteContainer/spec.md?ref_type=heads) | RealTime | From 2000 | València City Council | [Contact](https://valencia.opendatasoft.com/pages/home/) | +| [Weather Forecast](./metadata_datasets/south_spain_valencia.md) | South | Spain | Valencia | [gitlab_vlci](https://gitlab.com/vlci-public/models-dades/weather/blob/main/WeatherForecast/spec.md) | Daily | From 2010 | València City Council | [Contact](https://valencia.opendatasoft.com/pages/home/) | +| [Bikeparking stands in Aarhus City](./metadata_datasets/nordic_citcom_gtm.md) | Nordic | Denmark | GTM | *no specific value* | Ongoing | No | Aarhus Municipality | [City of Aarhus](https://www.opendata.dk/city-of-aarhus/cykelparkering_aarhus) | +| [Bike terminals in Aarhus, air and tools](./metadata_datasets/nordic_citcom_gtm.md) | Nordic | Denmark | GTM | *no specific value* | Ongoing | No | Aarhus Municipality | [Cykelterminal - Dataset](https://www.opendata.dk/city-of-aarhus/cykelterminal) | +| [Citybike locations in Aarhus](./metadata_datasets/nordic_citcom_gtm.md) | Nordic | Denmark | GTM | *no specific value* | *no specific value* | No | Aarhus Municipality | [Aarhus Bycykel - Dataset](https://www.opendata.dk/city-of-aarhus/aarhus-bycykel) | +| [Fast track bikeroutes in Aarhus](./metadata_datasets/nordic_citcom_gtm.md) | Nordic | Denmark | GTM | *no specific value* | Ongoing | No | Aarhus Municipality | [Supercykelsti i Aarhus Kommune - Dataset](https://www.opendata.dk/city-of-aarhus/supercykelsti) | +| [recreative bikeroutes in Aarhus](./metadata_datasets/nordic_citcom_gtm.md) | Nordic | Denmark | GTM | *no specific value* | Ongoing | No | Aarhus Municipality | [Rekreative cykelruter - Dataset](https://www.opendata.dk/city-of-aarhus/rekreative-cykelruter) | +| [AirQuality](./metadata_datasets/south_italy_uptown.md) | South | Italy | UpTown | *no specific value* | 30s | Yes | Politecnico di Milano | [neslab.it](https://www.neslab.it) | +| [Biodiversity](./metadata_datasets/south_italy_uptown.md) | South | Italy | UpTown | *no specific value* | 15m | Yes | Politecnico di Milano | [neslab.it](https://www.neslab.it) | +| [Energy distribution](./metadata_datasets/south_italy_uptown.md) | South | Italy | UpTown | *no specific value* | Hourly | Yes | A2A | [neslab.it](https://www.neslab.it) | +| [Archaeological Site](./metadata_datasets/south_italy_mithraeum-of-circus-maximus.md) | South | Italy | Mithraeum of Circus Maximus | *no specific value* | Real-time | Yes | Politecnico di Milano | [neslab.it](https://www.neslab.it) | From 5e28fb5d183b806940677308f0c08425d4a57d66 Mon Sep 17 00:00:00 2001 From: alessiobuscemi Date: Thu, 27 Nov 2025 14:13:29 +0100 Subject: [PATCH 15/20] Update ai_assessment_catalogue.md --- docs/citcom_label/ai_assessment_catalogue.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/citcom_label/ai_assessment_catalogue.md b/docs/citcom_label/ai_assessment_catalogue.md index 27ec93fb..20d2373e 100644 --- a/docs/citcom_label/ai_assessment_catalogue.md +++ b/docs/citcom_label/ai_assessment_catalogue.md @@ -20,6 +20,7 @@ hide:
+ The AI Assessment Catalogue showcases the evaluation tools, testing frameworks, and assessment solutions available across the Citcom.ai TEF network. It is regularly updated as new methodologies and tools become available at each TEF site. If you would like to request an assessment or learn more about a tool, please contact the relevant TEF sites. From 6c99bba9f2b06560dc9b914bec9c3ec726cbea3c Mon Sep 17 00:00:00 2001 From: alessio0208 Date: Thu, 27 Nov 2025 14:26:06 +0100 Subject: [PATCH 16/20] removed citcom_label.md from toolbox --- docs/toolbox/citcom_label.md | 52 ------------------------------------ 1 file changed, 52 deletions(-) delete mode 100644 docs/toolbox/citcom_label.md diff --git a/docs/toolbox/citcom_label.md b/docs/toolbox/citcom_label.md deleted file mode 100644 index d6c32386..00000000 --- a/docs/toolbox/citcom_label.md +++ /dev/null @@ -1,52 +0,0 @@ -# Citcom Label - -The Citcom Label is an initiative currently under development within Citcom.ai. Its goal is to create a trusted, recognisable signal that helps AI providers demonstrate responsible practices and gives buyers—especially public-sector actors such as smart cities—a clearer basis for evaluating and procuring AI solutions. - - -## What will the Citcom Label be? - -The label is envisioned as a **system of digital badges**, each representing a specific dimension of trustworthiness assessed during the evaluation process. -These badges would include a **watermark**, ensuring authenticity and preventing misuse. Each badge would be **verifiable through the Citcom Hub**, allowing external stakeholders to confirm its origin, evaluation status, and associated criteria. - -The Citcom badges are **not intended to function as legally binding conformity certificates under the AI Act**. Instead, they serve as **smart-city–oriented quality marks**, helping cities and other public authorities gain confidence in the AI solutions they consider adopting. - -For AI innovators, the Citcom badge system provides **independent third-party validation**, helping them promote their solutions and demonstrate that they meet recognised standards of trustworthiness. For cities and public buyers, the badges offer **clear, evidence-based guidance** to support more informed and transparent procurement decisions. - -## On what basis will the Citcom badges be awarded? - -The detailed criteria are still being developed with Citcom partners, but several guiding principles are emerging: - -### Completion of an evaluation -A badge is expected to be awarded only once a solution completes a structured assessment aligned with shared guidelines for the relevant dimension of trustworthiness. - -### Common methodology -Work is ongoing to define a coherent framework that determines how systems are qualified, how requirements translate into test cases, and how results are interpreted across different trust dimensions. - -### Success thresholds -Initial discussions point toward setting minimum quantitative and qualitative thresholds that vary by product type, maturity level, and the specific dimension being assessed. - -### Real-world validation -Evaluations are expected to rely on practical or pilot scenarios using the actual product, ensuring that results reflect real-world behaviour. - - -## Who will conduct the assessment and with which methodologies? - -The assessment behind each Citcom badge will be carried out by the participating TEF sites. Each site brings its own specialised methodologies, tools, and testing infrastructures, reflecting the diversity of technical expertise across the Citcom network. - -These assessment solutions cover different dimensions of trustworthiness and can be consulted through the **AI Assessment Catalogue**, available at the following link: - -**** - -The catalogue provides an overview of the available evaluation tools, test suites, and methodologies, enabling innovators to understand which capabilities are applied to their systems and helping cities see how specific trust dimensions are assessed. - -### Can an AI provider receive assessments across multiple TEF sites? - -Yes. If a solution would benefit from complementary expertise available across several TEF sites, an AI provider can undergo assessments in multiple locations. In such cases, the **first-contact TEF site** will coordinate the overall process. - -The coordinating TEF site will: -- connect with the additional TEF sites that carry out their assessments independently, -- ensure that each participating site manages its own contractual and operational responsibilities, -- consolidate the evaluation results into a unified report, -- and oversee the issuance of the Citcom badges corresponding to the dimensions assessed across all sites. - -This ensures a seamless experience for AI innovators while leveraging the full breadth of expertise across the TEF network. From 8b7afd723bdf6200804541414350c2b3c160c699 Mon Sep 17 00:00:00 2001 From: alessio0208 Date: Thu, 27 Nov 2025 15:55:00 +0100 Subject: [PATCH 17/20] renamed folder --- docs/{citcom_label => ai_assessment}/ai_assessment_catalogue.md | 0 docs/{citcom_label => ai_assessment}/index.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename docs/{citcom_label => ai_assessment}/ai_assessment_catalogue.md (100%) rename docs/{citcom_label => ai_assessment}/index.md (100%) diff --git a/docs/citcom_label/ai_assessment_catalogue.md b/docs/ai_assessment/ai_assessment_catalogue.md similarity index 100% rename from docs/citcom_label/ai_assessment_catalogue.md rename to docs/ai_assessment/ai_assessment_catalogue.md diff --git a/docs/citcom_label/index.md b/docs/ai_assessment/index.md similarity index 100% rename from docs/citcom_label/index.md rename to docs/ai_assessment/index.md From e46367b6970cc81169fe0ec42fc6e8b9be134e16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Cipri=C3=A0?= Date: Fri, 28 Nov 2025 12:14:40 +0100 Subject: [PATCH 18/20] Add AI Assessment Catalogue and update navigation structure --- docs/ai_assessment/index.md | 52 ------------------ .../ai_assessment/ai_assessment_catalogue.md | 2 +- docs/documentation/ai_assessment/index.md | 53 +++++++++++++++++++ mkdocs.yml | 1 + 4 files changed, 55 insertions(+), 53 deletions(-) delete mode 100644 docs/ai_assessment/index.md rename docs/{ => documentation}/ai_assessment/ai_assessment_catalogue.md (99%) diff --git a/docs/ai_assessment/index.md b/docs/ai_assessment/index.md deleted file mode 100644 index 7f6818b3..00000000 --- a/docs/ai_assessment/index.md +++ /dev/null @@ -1,52 +0,0 @@ -# Citcom Label - -The Citcom Label is an initiative currently under development within Citcom.ai. Its goal is to create a trusted, recognisable signal that helps AI providers demonstrate responsible practices and gives buyers—especially public-sector actors such as smart cities—a clearer basis for evaluating and procuring AI solutions. - - -## What will the Citcom Label be? - -The label is envisioned as a **system of digital badges**, each representing a specific dimension of trustworthiness assessed during the evaluation process. -These badges would include a **watermark**, ensuring authenticity and preventing misuse. Each badge would be **verifiable through the Citcom Hub**, allowing external stakeholders to confirm its origin, evaluation status, and associated criteria. - -The Citcom badges are **not intended to function as legally binding conformity certificates under the AI Act**. Instead, they serve as **smart-city–oriented quality marks**, helping cities and other public authorities gain confidence in the AI solutions they consider adopting. - -For AI innovators, the Citcom badge system provides **independent third-party validation**, helping them promote their solutions and demonstrate that they meet recognised standards of trustworthiness. For cities and public buyers, the badges offer **clear, evidence-based guidance** to support more informed and transparent procurement decisions. - -## On what basis will the Citcom badges be awarded? - -The detailed criteria are still being developed with Citcom partners, but several guiding principles are emerging: - -### Completion of an evaluation -A badge is expected to be awarded only once a solution completes a structured assessment aligned with shared guidelines for the relevant dimension of trustworthiness. - -### Common methodology -Work is ongoing to define a coherent framework that determines how systems are qualified, how requirements translate into test cases, and how results are interpreted across different trust dimensions. - -### Success thresholds -Initial discussions point toward setting minimum quantitative and qualitative thresholds that vary by product type, maturity level, and the specific dimension being assessed. - -### Real-world validation -Evaluations are expected to rely on practical or pilot scenarios using the actual product, ensuring that results reflect real-world behaviour. - - -## Who will conduct the assessment and with which methodologies? - -The assessment behind each Citcom badge will be carried out by the participating TEF sites. Each site brings its own specialised methodologies, tools, and testing infrastructures, reflecting the diversity of technical expertise across the Citcom network. - -These assessment solutions cover different dimensions of trustworthiness and can be consulted through the **AI Assessment Catalogue**, available at the following link: - -[AI Assessment Catalogue](ai_assessment_catalogue.md) - -The catalogue provides an overview of the available evaluation tools, test suites, and methodologies, enabling innovators to understand which capabilities are applied to their systems and helping cities see how specific trust dimensions are assessed. - -### Can an AI provider receive assessments across multiple TEF sites? - -Yes. If a solution would benefit from complementary expertise available across several TEF sites, an AI provider can undergo assessments in multiple locations. In such cases, the **first-contact TEF site** will coordinate the overall process. - -The coordinating TEF site will: -- connect with the additional TEF sites that carry out their assessments independently, -- ensure that each participating site manages its own contractual and operational responsibilities, -- consolidate the evaluation results into a unified report, -- and oversee the issuance of the Citcom badges corresponding to the dimensions assessed across all sites. - -This ensures a seamless experience for AI innovators while leveraging the full breadth of expertise across the TEF network. diff --git a/docs/ai_assessment/ai_assessment_catalogue.md b/docs/documentation/ai_assessment/ai_assessment_catalogue.md similarity index 99% rename from docs/ai_assessment/ai_assessment_catalogue.md rename to docs/documentation/ai_assessment/ai_assessment_catalogue.md index 20d2373e..d4fa8eba 100644 --- a/docs/ai_assessment/ai_assessment_catalogue.md +++ b/docs/documentation/ai_assessment/ai_assessment_catalogue.md @@ -1,5 +1,5 @@ --- -icon: material/robot-search-outline +# icon: material/robot-search-outline title: AI Assessment Catalogue hide: - toc diff --git a/docs/documentation/ai_assessment/index.md b/docs/documentation/ai_assessment/index.md index 5312b5b9..b6c543f3 100644 --- a/docs/documentation/ai_assessment/index.md +++ b/docs/documentation/ai_assessment/index.md @@ -1,3 +1,56 @@ --- title: AI Assessment --- + +# Citcom Label + +The Citcom Label is an initiative currently under development within Citcom.ai. Its goal is to create a trusted, recognisable signal that helps AI providers demonstrate responsible practices and gives buyers—especially public-sector actors such as smart cities—a clearer basis for evaluating and procuring AI solutions. + + +## What will the Citcom Label be? + +The label is envisioned as a **system of digital badges**, each representing a specific dimension of trustworthiness assessed during the evaluation process. +These badges would include a **watermark**, ensuring authenticity and preventing misuse. Each badge would be **verifiable through the Citcom Hub**, allowing external stakeholders to confirm its origin, evaluation status, and associated criteria. + +The Citcom badges are **not intended to function as legally binding conformity certificates under the AI Act**. Instead, they serve as **smart-city–oriented quality marks**, helping cities and other public authorities gain confidence in the AI solutions they consider adopting. + +For AI innovators, the Citcom badge system provides **independent third-party validation**, helping them promote their solutions and demonstrate that they meet recognised standards of trustworthiness. For cities and public buyers, the badges offer **clear, evidence-based guidance** to support more informed and transparent procurement decisions. + +## On what basis will the Citcom badges be awarded? + +The detailed criteria are still being developed with Citcom partners, but several guiding principles are emerging: + +### Completion of an evaluation +A badge is expected to be awarded only once a solution completes a structured assessment aligned with shared guidelines for the relevant dimension of trustworthiness. + +### Common methodology +Work is ongoing to define a coherent framework that determines how systems are qualified, how requirements translate into test cases, and how results are interpreted across different trust dimensions. + +### Success thresholds +Initial discussions point toward setting minimum quantitative and qualitative thresholds that vary by product type, maturity level, and the specific dimension being assessed. + +### Real-world validation +Evaluations are expected to rely on practical or pilot scenarios using the actual product, ensuring that results reflect real-world behaviour. + + +## Who will conduct the assessment and with which methodologies? + +The assessment behind each Citcom badge will be carried out by the participating TEF sites. Each site brings its own specialised methodologies, tools, and testing infrastructures, reflecting the diversity of technical expertise across the Citcom network. + +These assessment solutions cover different dimensions of trustworthiness and can be consulted through the **AI Assessment Catalogue**, available at the following link: + +[AI Assessment Catalogue](ai_assessment_catalogue.md) + +The catalogue provides an overview of the available evaluation tools, test suites, and methodologies, enabling innovators to understand which capabilities are applied to their systems and helping cities see how specific trust dimensions are assessed. + +### Can an AI provider receive assessments across multiple TEF sites? + +Yes. If a solution would benefit from complementary expertise available across several TEF sites, an AI provider can undergo assessments in multiple locations. In such cases, the **first-contact TEF site** will coordinate the overall process. + +The coordinating TEF site will: +- connect with the additional TEF sites that carry out their assessments independently, +- ensure that each participating site manages its own contractual and operational responsibilities, +- consolidate the evaluation results into a unified report, +- and oversee the issuance of the Citcom badges corresponding to the dimensions assessed across all sites. + +This ensures a seamless experience for AI innovators while leveraging the full breadth of expertise across the TEF network. diff --git a/mkdocs.yml b/mkdocs.yml index 05e75e59..a639aed0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -66,6 +66,7 @@ nav: - documentation/local_digital_twins/index.md - AI Assessment: - documentation/ai_assessment/index.md + - documentation/ai_assessment/ai_assessment_catalogue.md - AI services: - services/index.md - Minimal Interoperable AI Service: services/waste_collection.md From 751964b9fd2ccdec6b8e8bf715d5bf07cf91a05b Mon Sep 17 00:00:00 2001 From: Sergio <44024844+sermars@users.noreply.github.com> Date: Tue, 2 Dec 2025 10:30:53 +0000 Subject: [PATCH 19/20] fixed ai assessment catalog --- docs/assets/css/ai_assess_catalog.css | 107 ++++++++++++++++++ .../ai_assessment/ai_assessment_catalogue.md | 80 +++++++------ mkdocs.yml | 1 + 3 files changed, 156 insertions(+), 32 deletions(-) create mode 100644 docs/assets/css/ai_assess_catalog.css diff --git a/docs/assets/css/ai_assess_catalog.css b/docs/assets/css/ai_assess_catalog.css new file mode 100644 index 00000000..6e6c96bf --- /dev/null +++ b/docs/assets/css/ai_assess_catalog.css @@ -0,0 +1,107 @@ +/* AI Assessment Catalogue table layout (scoped to .ai-assessment-table) */ +.ai-assessment-table { overflow-x: visible; } + +.ai-assessment-table table { + table-layout: fixed; + width: 100%; + min-width: 1500px; + border-collapse: collapse; +} + +.ai-assessment-table table th, +.ai-assessment-table table td { + white-space: normal; + overflow-wrap: anywhere; + vertical-align: top; +} + +/* Per-column minimum widths */ +.ai-assessment-table table th:nth-child(1), +.ai-assessment-table table td:nth-child(1) { min-width: 160px; } + +.ai-assessment-table table th:nth-child(2), +.ai-assessment-table table td:nth-child(2) { min-width: 110px; } + +.ai-assessment-table table th:nth-child(3), +.ai-assessment-table table td:nth-child(3) { min-width: 140px; } + +.ai-assessment-table table th:nth-child(4), +.ai-assessment-table table td:nth-child(4) { min-width: 140px; } + +.ai-assessment-table table th:nth-child(5), +.ai-assessment-table table td:nth-child(5) { min-width: 220px; } + +.ai-assessment-table table th:nth-child(6), +.ai-assessment-table table td:nth-child(6) { min-width: 160px; } + +.ai-assessment-table table th:nth-child(7), +.ai-assessment-table table td:nth-child(7) { min-width: 160px; } + +.ai-assessment-table table th:nth-child(8), +.ai-assessment-table table td:nth-child(8) { min-width: 220px; } + +.ai-assessment-table table th:nth-child(9), +.ai-assessment-table table td:nth-child(9) { min-width: 160px; } + +.ai-assessment-table table th:nth-child(10), +.ai-assessment-table table td:nth-child(10) { min-width: 560px; max-width: 800px; } + +/* --- Page-scoped layout to reclaim sidebar width and show it on hover --- */ +.catalog-page .md-content { grid-column: 1 / -1; } + +.catalog-page .md-sidebar--primary { + position: fixed; + left: 0; + top: 0; + height: 100vh; + transform: translateX(-100%); + transition: transform .18s ease-in-out; + z-index: 1000; + box-shadow: 0 0 0 rgba(0,0,0,0); + background-color: var(--md-default-bg-color); + backdrop-filter: none; + -webkit-backdrop-filter: none; + overflow-y: auto; +} + +.catalog-page.catalog-nav-open .md-sidebar--primary { + transform: translateX(0); + box-shadow: 0 4px 24px rgba(0,0,0,.18); +} + +.catalog-nav-toggle { + position: fixed; + left: 6px; + top: 50%; + transform: translateY(-50%); + width: 36px; + height: 36px; + border-radius: 18px; + border: none; + background: #fff; + color: #111; + box-shadow: 0 2px 10px rgba(0,0,0,.15); + display: flex; + align-items: center; + justify-content: center; + cursor: pointer; + z-index: 1100; +} + +.catalog-nav-toggle:hover { background: #f3f3f3; } + +/* Widen the page content area for this catalogue */ +.catalog-page .md-grid { + max-width: 1600px; + width: min(96vw, 1600px); +} + +.catalog-page .md-content__inner { + margin: 0 0.75rem 2.5rem; +} + +/* Hide the toggle while the sidebar is open */ +.catalog-page.catalog-nav-open .catalog-nav-toggle { + opacity: 0; + pointer-events: none; +} diff --git a/docs/documentation/ai_assessment/ai_assessment_catalogue.md b/docs/documentation/ai_assessment/ai_assessment_catalogue.md index d4fa8eba..178cb46f 100644 --- a/docs/documentation/ai_assessment/ai_assessment_catalogue.md +++ b/docs/documentation/ai_assessment/ai_assessment_catalogue.md @@ -1,12 +1,12 @@ --- -# icon: material/robot-search-outline +icon: material/store-search-outline title: AI Assessment Catalogue hide: - toc --- + + + + + +
- -The AI Assessment Catalogue showcases the evaluation tools, testing frameworks, and assessment solutions available across the Citcom.ai TEF network. -It is regularly updated as new methodologies and tools become available at each TEF site. +The AI Assessment Catalogue showcases the evaluation tools, testing frameworks, and assessment solutions available across the Citcom.ai TEF network. +It is regularly updated as new methodologies and tools become available at each TEF site. If you would like to request an assessment or learn more about a tool, please contact the relevant TEF sites. +
+
@@ -31,7 +66,7 @@ If you would like to request an assessment or learn more about a tool, please co - +
-
- - +
+
| Solution Name | Provider | Licensing Type | Project Phase / TRL | Domain of Application | AI Risk Category | Ethical Dimensions | Security & Securitization of Data | Resources | Example of Use Case | -|---------------|----------|----------------|----------------------|------------------------|------------------|--------------------|-----------------------------------|-----------|----------------------| -| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | GitHub: , Paper: "FAIRGAME: A Framework for AI Agents Bias Recognition Using Game Theory", Frontiers in AI and Applications, Vol. 413: ECAI 2025| A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | -| **MLA-BiTe** | LIST | To be open sourced | TRL 6–8 | LLM bias testing | General Purpose AI | Fairness, Robustness | No data privacy requirements | — | A city plans to evaluate fairness in its citizen-facing chatbot. MLA-BiTe allows non-technical staff to create local scenario-based prompts to uncover discriminatory behaviour across sensitive categories, supporting multiple languages and augmentations. | +| ------------- | -------- | -------------- | ------------------- | --------------------- | ---------------- | ------------------ | --------------------------------- | --------- | ------------------- | +| **FAIRGAME** | LIST | Open-source | TRL 6–8 | LLM bias testing, AI agents behavioural testing, jailbreaking testing | General Purpose AI | Fairness, Robustness | Depends on the use case (whether the chatbot/AI agent has access to sensitive data) | GitHub: , Paper: "FAIRGAME: A Framework for AI Agents Bias Recognition Using Game Theory", Frontiers in AI and Applications, Vol. 413: ECAI 2025 | A city aims to test its citizen-facing chatbot before launch. FAIRGAME enables the creation of simulated users with diverse identities, personalities, and requests using LLMs, allowing evaluation in dynamic, real-world-like conversations. | +| **MLA-BiTe** | LIST | To be open sourced | TRL 6–8 | LLM bias testing | General Purpose AI | Fairness, Robustness | No data privacy requirements | — | A city plans to evaluate fairness in its citizen-facing chatbot. MLA-BiTe allows non-technical staff to create local scenario-based prompts to uncover discriminatory behaviour across sensitive categories, supporting multiple languages and augmentations. | | **Legal KG-RAG** | LIST | Proprietary | TRL 5–7 | LLM factuality accuracy testing | General Purpose AI | Transparency, Explainability, Robustness | Depends on whether the RAG is performed on sensitive data | — | A city using a standard RAG pipeline obtains irrelevant results. Legal KG-RAG rebuilds the legal corpus as a Neo4j knowledge graph, enabling direct comparison between traditional and KG-enhanced retrieval. | | **MLA-Reject** | LIST | To be open sourced | TRL 6–8 | LLM robustness to jailbreaking | General Purpose AI | Robustness | Depends on whether the system has access to sensitive data | — | A public administration operates a multilingual assistant for internal queries. They want to test robustness against unsafe or misleading prompts. MLA-Reject generates difficult negative prompts to test refusal behaviour and safety guardrails, revealing weaknesses and improving configurations. | + +
diff --git a/mkdocs.yml b/mkdocs.yml index a639aed0..47d79952 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -9,6 +9,7 @@ extra_css: - stylesheets/extra.css - stylesheets/neoteroi-mkdocs.css - assets/css/data_catalog.css + - assets/css/ai_assess_catalog.css extra_javascript: - assets/js/data_catalog.js theme: From 127f249391986f7e7ccba517bec6dd3531b3a173 Mon Sep 17 00:00:00 2001 From: Sergio <44024844+sermars@users.noreply.github.com> Date: Tue, 2 Dec 2025 10:32:59 +0000 Subject: [PATCH 20/20] Enable live reload for mkdocs server --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 42c960b6..72ea582d 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ virtenv_create: .PHONY: run_mkdocs ## Run mkdocs run_mkdocs: - source $(VENV_NAME)/bin/activate && mkdocs serve + source $(VENV_NAME)/bin/activate && mkdocs serve --livereload ################################################################################ # Self Documenting Commands #