getValidationTypeSet() {
+ return validatorMap.keySet();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ DefaultValidatorRegistry that = (DefaultValidatorRegistry) o;
+ return Objects.equals(validatorMap, that.validatorMap);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(validatorMap);
+ }
+}
diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidator.java b/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidator.java
new file mode 100644
index 0000000..51c13c4
--- /dev/null
+++ b/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidator.java
@@ -0,0 +1,36 @@
+package org.phenopackets.validator.core;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.List;
+
+/**
+ * Phenopacket validator applies rules to check that the provided data meets the requirements for a valid Phenopacket.
+ *
+ * @author Daniel Danis
+ * @author Peter N Robinson
+ */
+public interface PhenopacketValidator {
+
+ ValidatorInfo info();
+
+ List validate(InputStream inputStream);
+
+ // -----------------------------------------------------------------------------------------------------------------
+
+ default List validate(File phenopacket) throws IOException {
+ try (InputStream inputStream = Files.newInputStream(phenopacket.toPath())) {
+ byte[] bytes = inputStream.readAllBytes();
+ return validate(bytes);
+ }
+ }
+
+ default List validate(String content) {
+ return validate(new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8)));
+ }
+
+}
diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidatorRegistry.java b/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidatorRegistry.java
new file mode 100644
index 0000000..b48b36c
--- /dev/null
+++ b/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidatorRegistry.java
@@ -0,0 +1,23 @@
+package org.phenopackets.validator.core;
+
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+/**
+ * The phenopacket validator registry stores available Validators designed to perform specific {@link ValidatorInfo}.
+ *
+ * @author Daniel Danis
+ * @author Peter N Robinson
+ */
+public interface PhenopacketValidatorRegistry {
+
+ static PhenopacketValidatorRegistry of(Map validMap) {
+ return new DefaultValidatorRegistry(validMap);
+ }
+
+ Optional extends PhenopacketValidator> getValidatorForType(ValidatorInfo type);
+
+ Set getValidationTypeSet();
+}
+
diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItem.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItem.java
new file mode 100644
index 0000000..44efe0f
--- /dev/null
+++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItem.java
@@ -0,0 +1,30 @@
+package org.phenopackets.validator.core;
+
+/**
+ * The interface represents a single issue found during validation.
+ *
+ * @author Daniel Danis
+ * @author Peter N Robinson
+ */
+public interface ValidationItem {
+
+ static ValidationItem of(ValidatorInfo validatorInfo, ValidationItemType type, String message) {
+ return new ValidationItemDefault(validatorInfo, type, message);
+ }
+
+ /**
+ * @return basic description of the validator that produced this issue.
+ */
+ ValidatorInfo validatorInfo();
+
+ /**
+ * @return description of the issue in a standard way intended for both grouping of the issues and human consumption.
+ */
+ ValidationItemType type();
+
+ /**
+ * @return string with description of the issue intended for human consumption.
+ */
+ String message();
+
+}
diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemDefault.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemDefault.java
new file mode 100644
index 0000000..34f8860
--- /dev/null
+++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemDefault.java
@@ -0,0 +1,3 @@
+package org.phenopackets.validator.core;
+
+record ValidationItemDefault(ValidatorInfo validatorInfo, ValidationItemType type, String message) implements ValidationItem {}
diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemType.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemType.java
new file mode 100644
index 0000000..ac8489d
--- /dev/null
+++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemType.java
@@ -0,0 +1,13 @@
+package org.phenopackets.validator.core;
+
+public interface ValidationItemType {
+
+ static ValidationItemType of(String itemType, String itemDescription) {
+ return new ValidationItemTypeDefault(itemType, itemDescription);
+ }
+
+ String itemType();
+
+ String itemDescription();
+
+}
diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypeDefault.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypeDefault.java
new file mode 100644
index 0000000..5f30f99
--- /dev/null
+++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypeDefault.java
@@ -0,0 +1,3 @@
+package org.phenopackets.validator.core;
+
+record ValidationItemTypeDefault(String itemType, String itemDescription) implements ValidationItemType {}
diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypes.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypes.java
new file mode 100644
index 0000000..2499929
--- /dev/null
+++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypes.java
@@ -0,0 +1,13 @@
+package org.phenopackets.validator.core;
+
+public class ValidationItemTypes {
+
+ private static final ValidationItemType SYNTAX_ERROR = ValidationItemType.of("Syntax error", "The input does not conform the with phenopacket syntax");
+
+ private ValidationItemTypes() {}
+
+ public static ValidationItemType syntaxError() {
+ return SYNTAX_ERROR;
+ }
+
+}
diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidatorInfo.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidatorInfo.java
new file mode 100644
index 0000000..64915d3
--- /dev/null
+++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidatorInfo.java
@@ -0,0 +1,30 @@
+package org.phenopackets.validator.core;
+
+public interface ValidatorInfo {
+
+ static ValidatorInfo generic() {
+ return DefaultValidationInfo.generic();
+ }
+
+ /**
+ * This class implements additional validation of a phenopacket that is intended to be used
+ * for HPO rare disease phenotyping. By assumption, the phenopacket will have been first
+ * checked against the {@link ValidatorInfo#generic()} specification. This class performs validation with the
+ * file {@code hpo-rare-disease-schema.json}.
+ */
+ static ValidatorInfo rareDiseaseValidation() {
+ return DefaultValidationInfo.rareDiseaseValidator();
+ }
+
+ static ValidatorInfo of(String validatorId, String validatorName) {
+ return DefaultValidationInfo.of(validatorId, validatorName);
+ }
+
+ String validatorId();
+
+ String validatorName();
+
+ int hashCode();
+
+ boolean equals(Object o);
+}
diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/except/PhenopacketValidatorRuntimeException.java b/validator-core/src/main/java/org/phenopackets/validator/core/except/PhenopacketValidatorRuntimeException.java
new file mode 100644
index 0000000..f640744
--- /dev/null
+++ b/validator-core/src/main/java/org/phenopackets/validator/core/except/PhenopacketValidatorRuntimeException.java
@@ -0,0 +1,24 @@
+package org.phenopackets.validator.core.except;
+
+public class PhenopacketValidatorRuntimeException extends RuntimeException {
+
+ public PhenopacketValidatorRuntimeException() {
+ super();
+ }
+
+ public PhenopacketValidatorRuntimeException(String message) {
+ super(message);
+ }
+
+ public PhenopacketValidatorRuntimeException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public PhenopacketValidatorRuntimeException(Throwable cause) {
+ super(cause);
+ }
+
+ protected PhenopacketValidatorRuntimeException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/validator-core/src/test/java/org/phenopackets/schema/validator/core/jsonschema/JsonSchemaValidatorTest.java b/validator-core/src/test/java/org/phenopackets/schema/validator/core/jsonschema/JsonSchemaValidatorTest.java
deleted file mode 100644
index 252b38e..0000000
--- a/validator-core/src/test/java/org/phenopackets/schema/validator/core/jsonschema/JsonSchemaValidatorTest.java
+++ /dev/null
@@ -1,73 +0,0 @@
-package org.phenopackets.schema.validator.core.jsonschema;
-
-import org.junit.jupiter.api.Test;
-import org.phenopackets.schema.validator.core.validation.ErrorType;
-import org.phenopackets.schema.validator.core.validation.ValidationItem;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class JsonSchemaValidatorTest {
-
- private static File fileFromClasspath(String path) {
- String fname = Thread.currentThread().getContextClassLoader().getResource(path).getPath();
- return new File(fname);
- }
-
- @Test
- public void testValidationOfSimpleValidPhenopacket() throws IOException {
- File validSimplePhenopacket = fileFromClasspath("json/validSimplePhenopacket.json");
- JsonSchemaValidator validator = new JsonSchemaValidator(validSimplePhenopacket);
- List extends ValidationItem> errors = validator.validate();
- assertTrue(errors.isEmpty());
- }
-
- /**
- * This example phenopacket does not contain anything except {"disney" : "donald"}
- * It does not contain an id or a metaData element and thus should fail.
- */
- @Test
- public void testValidationOfSimpleInValidPhenopacket() {
- File invalidSimplePhenopacket = fileFromClasspath("json/invalidSimplePhenopacket.json");
- JsonSchemaValidator validator = new JsonSchemaValidator(invalidSimplePhenopacket);
- List extends ValidationItem> errors = validator.validate();
- assertEquals(3, errors.size());
- ValidationItem error = errors.get(0);
- assertEquals(ErrorType.JSON_REQUIRED, error.errorType());
- assertEquals("$.id: is missing but it is required", error.message());
- error = errors.get(1);
- assertEquals(ErrorType.JSON_REQUIRED, error.errorType());
- assertEquals("$.metaData: is missing but it is required", error.message());
- error = errors.get(2);
- assertEquals(ErrorType.JSON_ADDITIONAL_PROPERTIES, error.errorType());
- assertEquals("$.disney: is not defined in the schema and the schema does not allow additional properties", error.message());
- }
-
- @Test
- public void testRareDiseaseBethlemahmValidPhenopacket() throws IOException {
- File myopathyPhenopacket = fileFromClasspath("json/bethlehamMyopathyExample.json");
- JsonSchemaValidator validator = new JsonSchemaValidator(myopathyPhenopacket);
- List extends ValidationItem> errors = validator.validate();
- assertTrue(errors.isEmpty());
- }
-
- @Test
- public void testRareDiseaseBethlemahmInvalidValidPhenopacket() throws IOException {
- File invalidMyopathyPhenopacket = fileFromClasspath("json/bethlehamMyopathyInvalidExample.json");
- JsonSchemaValidator validator = new JsonSchemaValidator(invalidMyopathyPhenopacket);
- List extends ValidationItem> errors = validator.validate();
-// for (ValidationError ve : errors) {
-// System.out.println(ve.getMessage());
-// }
- assertEquals(1, errors.size());
- ValidationItem error = errors.get(0);
- assertEquals(ErrorType.JSON_ADDITIONAL_PROPERTIES, error.errorType());
- assertEquals("$.phenotypicFeaturesMALFORMED: is not defined in the schema and the schema does not allow additional properties", error.message());
- }
-
-
-}
diff --git a/validator-core/src/test/resources/json/invalidSimplePhenopacket.json b/validator-core/src/test/resources/json/invalidSimplePhenopacket.json
deleted file mode 100644
index 8258e4e..0000000
--- a/validator-core/src/test/resources/json/invalidSimplePhenopacket.json
+++ /dev/null
@@ -1 +0,0 @@
-{"disney" : "donald"}
\ No newline at end of file
diff --git a/validator-core/src/test/resources/json/validSimplePhenopacket.json b/validator-core/src/test/resources/json/validSimplePhenopacket.json
deleted file mode 100644
index eb6c3bb..0000000
--- a/validator-core/src/test/resources/json/validSimplePhenopacket.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{"id" : "world",
- "metaData": {
- "created": "2021-07-01T19:32:35Z",
- "createdBy": "Hpo Case Annotator : 1.0.15-SNAPSHOT",
- "submittedBy": "HPO:probinson",
- "resources": [{
- "id": "hp",
- "name": "human phenotype ontology",
- "url": "http://purl.obolibrary.org/obo/hp.owl",
- "version": "2018-03-08",
- "namespacePrefix": "HP",
- "iriPrefix": "http://purl.obolibrary.org/obo/HP_"
- }],
- "phenopacketSchemaVersion": "2.0"
- }
-}
\ No newline at end of file
diff --git a/validator-jsonschema/pom.xml b/validator-jsonschema/pom.xml
new file mode 100644
index 0000000..8fffb20
--- /dev/null
+++ b/validator-jsonschema/pom.xml
@@ -0,0 +1,46 @@
+
+
+
+ validator
+ org.phenopackets.validator
+ 0.1.1-SNAPSHOT
+
+ 4.0.0
+
+ validator-jsonschema
+
+
+
+ org.phenopackets.validator
+ validator-core
+ ${project.parent.version}
+
+
+
+ com.networknt
+ json-schema-validator
+
+
+
+
+ org.phenopackets
+ phenopacket-schema
+ test
+
+
+ com.google.protobuf
+ protobuf-java-util
+ test
+
+
+
+ org.phenopackets.phenotools
+ phenotools-builder
+ 0.0.1-SNAPSHOT
+ test
+
+
+
+
\ No newline at end of file
diff --git a/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidator.java b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidator.java
new file mode 100644
index 0000000..17565c5
--- /dev/null
+++ b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidator.java
@@ -0,0 +1,113 @@
+package org.phenopackets.validator.jsonschema;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.networknt.schema.JsonSchema;
+import com.networknt.schema.JsonSchemaFactory;
+import com.networknt.schema.SpecVersion;
+import org.phenopackets.validator.core.*;
+import org.phenopackets.validator.core.except.PhenopacketValidatorRuntimeException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+
+public class JsonSchemaValidator implements PhenopacketValidator {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(JsonSchemaValidator.class);
+
+ /**
+ * The latest version of the spec that is supported by our JSON SCHEMA library is 2019/09.
+ */
+ private static final SpecVersion.VersionFlag VERSION_FLAG = SpecVersion.VersionFlag.V201909;
+ private final JsonSchema jsonSchema;
+ private final ObjectMapper objectMapper = new ObjectMapper();
+ private final ValidatorInfo validatorInfo;
+
+
+ private JsonSchemaValidator(JsonSchema jsonSchema, ValidatorInfo validatorInfo) {
+ this.jsonSchema = jsonSchema;
+ this.validatorInfo = validatorInfo;
+ }
+
+ /**
+ * @param jsonSchema path to JSON schema specification file
+ * @throws PhenopacketValidatorRuntimeException if jsonSchema is not a valid file or if the file is
+ * not a valid JSON schema specification
+ */
+ public static JsonSchemaValidator of(File jsonSchema, ValidatorInfo validatorInfo) {
+ if (!jsonSchema.isFile()) {
+ throw new PhenopacketValidatorRuntimeException("Could not open file at \"" + jsonSchema.getAbsolutePath() + "\"");
+ }
+ LOGGER.debug("Reading JSON schema from `{}`", jsonSchema.getAbsolutePath());
+ try (InputStream inputStream = Files.newInputStream(jsonSchema.toPath())) {
+ return of(inputStream, validatorInfo);
+ } catch (Exception e) {
+ throw new PhenopacketValidatorRuntimeException("Invalid JSON schema specification: " + e.getMessage());
+ }
+ }
+
+ public static JsonSchemaValidator of(InputStream inputStream, ValidatorInfo validatorInfo) {
+ JsonSchemaFactory schemaFactory = JsonSchemaFactory.getInstance(VERSION_FLAG);
+ return new JsonSchemaValidator(schemaFactory.getSchema(inputStream), validatorInfo);
+ }
+
+ private static ValidationItemType stringToErrorType(String error) {
+ return switch (error) {
+ case "additionalProperties" -> JsonValidationItemTypes.JSON_ADDITIONAL_PROPERTIES;
+ case "required" -> JsonValidationItemTypes.JSON_REQUIRED;
+ case "type", "enum" -> JsonValidationItemTypes.JSON_TYPE;
+ default -> throw new PhenopacketValidatorRuntimeException("Did not recognize JSON error type: \"" + error + "\"");
+ };
+ }
+
+ @Override
+ public ValidatorInfo info() {
+ return validatorInfo;
+ }
+
+ /**
+ * Validate the {@code inputStream} content (assumed to be a Phenopacket formated in JSON)
+ *
+ * @return List of {@link ValidationItem} objects (empty list if there were no errors)
+ */
+ @Override
+ public List validate(InputStream inputStream) {
+ List errors = new ArrayList<>();
+ try {
+ JsonNode json = objectMapper.readTree(inputStream);
+ jsonSchema.validate(json)
+ .forEach(e -> errors.add(ValidationItem.of(validatorInfo, stringToErrorType(e.getType()), e.getMessage())));
+
+ return errors;
+ } catch (Exception e) {
+ if (e instanceof IOException) {
+ LOGGER.warn("Error while decoding JSON content: {}", e.getMessage(), e);
+ } else {
+ LOGGER.warn("Error while validating: {}", e.getMessage(), e);
+ }
+
+ return List.of(ValidationItem.of(validatorInfo, ValidationItemTypes.syntaxError(), e.getMessage()));
+ }
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ JsonSchemaValidator that = (JsonSchemaValidator) o;
+ return Objects.equals(jsonSchema, that.jsonSchema) && Objects.equals(objectMapper, that.objectMapper) && Objects.equals(validatorInfo, that.validatorInfo);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(jsonSchema, objectMapper, validatorInfo);
+ }
+}
diff --git a/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidators.java b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidators.java
new file mode 100644
index 0000000..9c75490
--- /dev/null
+++ b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidators.java
@@ -0,0 +1,40 @@
+package org.phenopackets.validator.jsonschema;
+
+import org.phenopackets.validator.core.PhenopacketValidator;
+import org.phenopackets.validator.core.ValidatorInfo;
+import org.phenopackets.validator.core.except.PhenopacketValidatorRuntimeException;
+
+import java.io.InputStream;
+import java.util.Map;
+
+/**
+ * Utility class that uses JSON schema definitions that are bundled within the application (on classpath).
+ *
+ * @author Daniel Danis
+ * @author Peter N Robinson
+ */
+public class JsonSchemaValidators {
+
+ public static Map genericValidator() {
+ return Map.of(
+ ValidatorInfo.generic(), makeJsonValidator("/schema/phenopacket-generic.json", ValidatorInfo.generic())
+ );
+ }
+
+ public static Map rareHpoValidator() {
+ return Map.of(
+ ValidatorInfo.generic(), makeJsonValidator("/schema/hpo-rare-disease-schema.json", ValidatorInfo.generic())
+ );
+ }
+
+ private static PhenopacketValidator makeJsonValidator(String schemaPath, ValidatorInfo validationName) {
+ InputStream inputStream = JsonSchemaValidators.class.getResourceAsStream(schemaPath);
+ if (inputStream == null)
+ throw new PhenopacketValidatorRuntimeException("Invalid JSON schema path `" + schemaPath + '`');
+
+ return JsonSchemaValidator.of(inputStream, validationName);
+ }
+
+ private JsonSchemaValidators() {}
+
+}
diff --git a/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonValidationItemTypes.java b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonValidationItemTypes.java
new file mode 100644
index 0000000..5c1d621
--- /dev/null
+++ b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonValidationItemTypes.java
@@ -0,0 +1,26 @@
+package org.phenopackets.validator.jsonschema;
+
+import org.phenopackets.validator.core.ValidationItemType;
+
+public class JsonValidationItemTypes {
+
+ /**
+ * JSON schema error meaning that the JSON code contained a property not present in the schema.
+ */
+ public final static ValidationItemType JSON_ADDITIONAL_PROPERTIES = ValidationItemType.of("additionalProperties", "Use of a property not present in the schema");
+
+ /**
+ * JSON schema error meaning that the JSON code failed to contain a property required by the schema.
+ */
+ public final static ValidationItemType JSON_REQUIRED = ValidationItemType.of("required", "failure to contain a property required by the schema");
+
+ /**
+ * The type of an object is not as required by the schema, e.g., we get a string instead of an array.
+ */
+ public final static ValidationItemType JSON_TYPE = ValidationItemType.of("type", "incorrect data type of object");
+
+ // TODO - add item description
+ public final static ValidationItemType JSON_ENUM = ValidationItemType.of("enum", "TODO");
+
+ private JsonValidationItemTypes() {}
+}
diff --git a/validator-core/src/main/resources/schema/hpo-rare-disease-schema.json b/validator-jsonschema/src/main/resources/schema/hpo-rare-disease-schema.json
similarity index 70%
rename from validator-core/src/main/resources/schema/hpo-rare-disease-schema.json
rename to validator-jsonschema/src/main/resources/schema/hpo-rare-disease-schema.json
index b3c2450..fc7325b 100644
--- a/validator-core/src/main/resources/schema/hpo-rare-disease-schema.json
+++ b/validator-jsonschema/src/main/resources/schema/hpo-rare-disease-schema.json
@@ -3,7 +3,6 @@
"$id": "https://www.ga4gh.org/phenopackets",
"title": "HPO Rare Disease Phenopacket Schema",
"description": "HPO Rare Disease Schema for GGA4GH Phenopacket",
- "_comment": "Here we require the phenopacket to have the following elements that are not required by the default schema 1. subject (proband being investigated) 2. at least one phenotypicFeature element 3. time_at_last encounter (subelement of subject), representing the age of the proband. In addition, we require that Human Phenotype Ontology (HPO) terms are used to represent phenotypicFeature",
"type": "object",
"properties": {
"subject": {
diff --git a/validator-core/src/main/resources/schema/phenopacket-general-schema.json b/validator-jsonschema/src/main/resources/schema/phenopacket-generic.json
similarity index 89%
rename from validator-core/src/main/resources/schema/phenopacket-general-schema.json
rename to validator-jsonschema/src/main/resources/schema/phenopacket-generic.json
index 1d0b4ca..00aa0f5 100644
--- a/validator-core/src/main/resources/schema/phenopacket-general-schema.json
+++ b/validator-jsonschema/src/main/resources/schema/phenopacket-generic.json
@@ -32,10 +32,18 @@
"$ref": "#/definitions/timeElement"
},
"vitalStatus": {
- "enum": ["UNKNOWN_STATUS", "ALIVE", "DECEASED"]
+ "type": "object",
+ "properties": {
+ "status": {"enum": ["UNKNOWN_STATUS", "ALIVE", "DECEASED"]},
+ "timeOfDeath": {"$ref": "#/definitions/timeElement"},
+ "causeOfDeath": {"$ref": "#/definitions/ontologyClass"},
+ "survivalTimeInDays": "integer"
+ },
+ "required": [ "status"],
+ "additionalProperties": false
},
"sex": {
- "enum": ["UNKNOWN_SEX", "FEMALE", "MALE", "UNKNOWN_SEX"]
+ "enum": ["UNKNOWN_SEX", "FEMALE", "MALE"]
},
"karyotypicSex": {
"enum": ["UNKNOWN_KARYOTYPE", "XX", "XY", "XO", "XXY","XXX","XXYY", "XXXY", "XXXX", "XYY", "OTHER_KARYOTYPE"]
@@ -176,23 +184,27 @@
"reference": {
"$ref": "#/definitions/externalReference"
}
- }
+ },
+ "required": ["evidenceCode"],
+ "additionalProperties": false
},
"timeElement": {
+ "type": "object",
"properties": {
"gestationalAge": {"$ref": "#/definitions/gestationalAge"},
"age" : {"$ref": "#/definitions/age"},
"ageRange": { "$ref": "#/definitions/ageRange"},
"ontologyClass": { "$ref": "#/definitions/ontologyClass"},
"timestamp": { "type": "string", "format": "date-time"},
- "timeInterval": {"$ref": "#/definitions/timeInterval"}
+ "interval": {"$ref": "#/definitions/timeInterval"}
},
+ "additionalProperties": false,
"oneOf": [ { "required": [ "gestationalAge" ]},
{ "required": [ "age" ]},
{ "required": [ "ageRange"]},
{ "required": [ "ontologyClass" ]},
{ "required": [ "timestamp" ]},
- { "required": [ "timeInterval" ]}
+ { "required": [ "interval" ]}
]
},
"update": {
@@ -291,19 +303,61 @@
"additionalProperties": false
},
"value": {
- "oneOf": [
- {
+ "type": "object",
+ "properties": {
+ "quantity": {
+ "$ref": "#/definitions/quantity"
+ },
+ "ontologyClass": {
+ "$ref": "#/definitions/ontologyClass"
+ }
+ },
+ "additionalProperties": false,
+ "oneOf": [ { "required": [ "quantity" ]},
+ { "required": [ "ontologyClass" ]}
+ ]
+ },
+ "typedQuantity": {
+ "type": "object",
+ "properties": {
+ "type": {
"$ref": "#/definitions/ontologyClass"
},
- {
+ "quantity": {
"$ref": "#/definitions/quantity"
}
- ]
+ },
+ "required": ["type", "quantity"],
+ "additionalProperties": false
},
"complexValue": {
- "type": "null"
+ "type": "object",
+ "properties": {
+ "typedQuantities": {
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/typedQuantity"
+ },
+ "minItems": 1,
+ "uniqueItems": true
+ }
+ },
+ "required": ["typedQuantities"],
+ "additionalProperties": false
},
"measurement": {
+ "type": "object",
+ "properties": {
+ "description": "string",
+ "assay": { "$ref": "#/definitions/ontologyClass" },
+ "value": {"$ref": "#/definitions/value" },
+ "complexValue": { "$ref": "#/definitions/complexValue" },
+ "timeObserved": { "$ref": "#/definitions/timeElement"},
+ "procedure": { "$ref": "#/definitions/procedure"}
+ },
+ "required": ["assay"],
+ "additionalProperties": false,
+ "oneOf": [ { "required": [ "value" ]}, { "required": [ "complexValue" ]}]
},
"age": {
"type": "object",
@@ -467,7 +521,10 @@
"$ref": "#/definitions/timeElement"
},
"diseaseStage": {
- "$ref": "#/definitions/ontologyClass"
+ "type": "array",
+ "items": {
+ "$ref": "#/definitions/ontologyClass"
+ }
},
"clinicalTnmFinding": {
"$ref": "#/definitions/ontologyClass"
@@ -478,7 +535,9 @@
"laterality": {
"$ref": "#/definitions/ontologyClass"
}
- }
+ },
+ "required": ["term"],
+ "additionalProperties": false
},
"interpretation": {
"type": "object",
diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/BiosampleValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/BiosampleValidatorTest.java
new file mode 100644
index 0000000..963ea74
--- /dev/null
+++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/BiosampleValidatorTest.java
@@ -0,0 +1,81 @@
+package org.phenopackets.validator.jsonschema;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.google.protobuf.util.JsonFormat;
+import org.junit.jupiter.api.Test;
+import org.phenopackets.phenotools.builder.PhenopacketBuilder;
+import org.phenopackets.phenotools.builder.builders.*;
+import org.phenopackets.schema.v2.Phenopacket;
+import org.phenopackets.schema.v2.core.Biosample;
+import org.phenopackets.schema.v2.core.MetaData;
+import org.phenopackets.schema.v2.core.OntologyClass;
+import org.phenopackets.validator.core.ValidationItem;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.phenopackets.phenotools.builder.builders.OntologyClassBuilder.ontologyClass;
+
+public class BiosampleValidatorTest extends JsonSchemaValidatorTestBase {
+ private static final String PHENOPACKET_ID = "arbitrary.id";
+ private static final String PROBAND_ID = "proband A";
+ private static final OntologyClass BIOPSY = ontologyClass("NCIT:C15189", "Biopsy");
+ private static final MetaData metadata = MetaDataBuilder.create("2021-05-14T10:35:00Z", "anonymous biocurator")
+ .resource(Resources.ncitVersion("21.05d"))
+ .resource(Resources.efoVersion("3.34.0"))
+ .resource(Resources.uberonVersion("2021-07-27"))
+ .resource(Resources.ncbiTaxonVersion("2021-06-10"))
+ .build();
+ private static final Biosample lymphNodeBiopsy = BiosampleBuilder.create("biosample 2")
+ .individualId(PROBAND_ID)
+ .timeOfCollection(TimeElements.age("P48Y3M"))
+ .sampledTissue(ontologyClass("NCIT:C139196", "Esophageal Lymph Node"))
+ .tumorProgression(ontologyClass("NCIT:C84509", "Primary Malignant Neoplasm"))
+ .histologicalDiagnosis(ontologyClass("NCIT:C4024", "Esophageal Squamous Cell Carcinoma"))
+ .diagnosticMarker(ontologyClass("NCIT:C131711", "Human Papillomavirus-18 Positive"))
+ .procedure(ProcedureBuilder.create(BIOPSY).build())
+ .build();
+
+ private static final Biosample lungBiopsy = BiosampleBuilder.create("biosample 3")
+ .individualId(PROBAND_ID)
+ .timeOfCollection(TimeElements.age("P50Y7M"))
+ .sampledTissue(ontologyClass("NCIT:C12468", "Lung"))
+ .tumorProgression(ontologyClass("NCIT:C3261", "Metastatic Neoplasm"))
+ .procedure(ProcedureBuilder.create(BIOPSY).build())
+ .build();
+
+ private static Phenopacket phenopacketWithBiosample() {
+ return PhenopacketBuilder.create(PHENOPACKET_ID, metadata)
+ .biosample(lymphNodeBiopsy)
+ .build();
+ }
+
+ private static final Phenopacket phenopacket = phenopacketWithBiosample();
+
+ @Test
+ public void testValidityOfBiosample() throws InvalidProtocolBufferException {
+ String json = JsonFormat.printer().print(phenopacket);
+ List extends ValidationItem> errors = validator.validate(json);
+ assertTrue(errors.isEmpty());
+ }
+
+ /**
+ * The only required element in a Biosample is the ID
+ */
+ @Test
+ public void testBiosampleLacksId() throws InvalidProtocolBufferException {
+ assertEquals(1, phenopacket.getBiosamplesCount());
+ Biosample biosample = phenopacket.getBiosamples(0);
+ biosample = Biosample.newBuilder(biosample).clearId().build();
+ Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setBiosamples(0, biosample).build();
+ String json = JsonFormat.printer().print(p1);
+ List extends ValidationItem> errors = validator.validate(json);
+ assertEquals(1, errors.size());
+ String expectedErrorMsg = "$.biosamples[0].id: is missing but it is required";
+ assertEquals(expectedErrorMsg, errors.get(0).message());
+ }
+
+
+
+}
diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ComplexValueValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ComplexValueValidatorTest.java
new file mode 100644
index 0000000..2d88586
--- /dev/null
+++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ComplexValueValidatorTest.java
@@ -0,0 +1,99 @@
+package org.phenopackets.validator.jsonschema;
+
+
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.google.protobuf.util.JsonFormat;
+import org.junit.jupiter.api.Test;
+import org.phenopackets.phenotools.builder.PhenopacketBuilder;
+import org.phenopackets.phenotools.builder.builders.*;
+import org.phenopackets.schema.v2.Phenopacket;
+import org.phenopackets.schema.v2.core.*;
+import org.phenopackets.validator.core.ValidationItem;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.phenopackets.phenotools.builder.builders.OntologyClassBuilder.ontologyClass;
+
+/**
+ * Test a few errors in the Value and ComplexValue messages
+ * @author Peter N Robinson
+ */
+public class ComplexValueValidatorTest extends JsonSchemaValidatorTestBase{
+
+ private static Phenopacket phenopacketWithValueAndComplexValue() {
+ // Simple value for blood platelets
+ // Complex value for blood pressure
+ OntologyClass systolic = ontologyClass("NCIT:C25298", "Systolic Blood Pressure");
+ OntologyClass diastolic = ontologyClass("NCIT:C25299", "Diastolic Blood Pressure");
+ OntologyClass mmHg = ontologyClass("NCIT:C49670", "Millimeter of Mercury");
+ ComplexValue complexValue = ComplexValueBuilder.create()
+ .typedQuantity(systolic, QuantityBuilder.create(mmHg, 120).build())
+ .typedQuantity(diastolic, QuantityBuilder.create(mmHg, 70).build())
+ .build();
+ OntologyClass bpAssay = ontologyClass("CMO:0000003", "blood pressure measurement");
+ Measurement bpM = MeasurementBuilder.complexValue(bpAssay, complexValue).build();
+ return PhenopacketBuilder.create("id:A", metadataWithHpo)
+ .measurement(bpM)
+ .build();
+ }
+
+ private static final Phenopacket phenopacket = phenopacketWithValueAndComplexValue();
+
+ /**
+ * First test that the unaltered phenopacket is OK
+ */
+ @Test
+ public void validatePhenopacket() throws InvalidProtocolBufferException {
+ String json = JsonFormat.printer().print(phenopacket);
+ //System.out.println(json);
+ List extends ValidationItem> errors = validator.validate(json);
+ for (var e : errors) {
+ System.out.println(e.message());
+ }
+ assertTrue(errors.isEmpty());
+ }
+
+ /**
+ * Each measurement requires an ontology term for the assay
+ */
+ @Test
+ public void testMissingAssay() throws InvalidProtocolBufferException {
+ // Alter the first measurement, which is a ComplexValue
+ Measurement m1 = phenopacket.getMeasurements(0);
+ assertTrue(m1.hasComplexValue());
+ m1 = Measurement.newBuilder(m1).clearAssay().build();
+ Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setMeasurements(0, m1).build();
+ String json = JsonFormat.printer().print(p1);
+ List extends ValidationItem> errors = validator.validate(json);
+ for (var e : errors) {
+ System.out.println(e);
+ }
+ }
+
+ /**
+ * Each measurement requires an ontology term for the assay
+ */
+ @Test
+ public void testComplexValueHasNoTypedValue() throws InvalidProtocolBufferException {
+ // Alter the first measurement, which is a ComplexValue
+ Measurement m1 = phenopacket.getMeasurements(0);
+ assertTrue(m1.hasComplexValue());
+ ComplexValue cvale = m1.getComplexValue();
+ cvale = ComplexValue.newBuilder(cvale).clearTypedQuantities().build();
+ m1 = Measurement.newBuilder(m1).setComplexValue(cvale).build();
+ Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setMeasurements(0, m1).build();
+ String json = JsonFormat.printer().print(p1);
+ //System.out.println(json);
+ List extends ValidationItem> errors = validator.validate(json);
+ for (var e : errors) {
+ System.out.println(e);
+ }
+ assertEquals(1, errors.size());
+ String expectedErrorMsg = "$.measurements[0].complexValue.typedQuantities: is missing but it is required";
+ assertEquals(expectedErrorMsg, errors.get(0).message());
+ }
+
+
+}
diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/DiseaseValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/DiseaseValidatorTest.java
new file mode 100644
index 0000000..d48a469
--- /dev/null
+++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/DiseaseValidatorTest.java
@@ -0,0 +1,87 @@
+package org.phenopackets.validator.jsonschema;
+
+
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.google.protobuf.util.JsonFormat;
+import org.junit.jupiter.api.Test;
+import org.phenopackets.phenotools.builder.PhenopacketBuilder;
+import org.phenopackets.phenotools.builder.builders.DiseaseBuilder;
+import org.phenopackets.schema.v2.Phenopacket;
+import org.phenopackets.schema.v2.core.Disease;
+import org.phenopackets.schema.v2.core.TimeElement;
+import org.phenopackets.validator.core.ValidationItem;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.phenopackets.phenotools.builder.builders.OntologyClassBuilder.ontologyClass;
+import static org.phenopackets.validator.jsonschema.JsonValidationItemTypes.*;
+import static org.phenopackets.validator.testdatagen.DatagenBase.*;
+
+/**
+ * This class creates a simple phenopacket with a Disease object and creates some variations with
+ * validation errors.
+ * @author Peter N Robinson
+ */
+public class DiseaseValidatorTest extends JsonSchemaValidatorTestBase {
+
+ private static Phenopacket phenopacketWithDisease() {
+ var nyha3 = ontologyClass("NCIT:C66907", "New York Heart Association Class III");
+ var childhood = TimeElement.newBuilder().setOntologyClass(CHILDHOOD_ONSET).build();
+ var chagasCardiomyopathy = DiseaseBuilder.create("MONDO:0005491", "Chagas cardiomyopathy")
+ .diseaseStage(nyha3)
+ .onset(childhood)
+ .build();
+ return PhenopacketBuilder.create("id:A",metadataWithHpo)
+ .disease(chagasCardiomyopathy)
+ .build();
+ }
+
+ private static final Phenopacket phenopacket = phenopacketWithDisease();
+
+ @Test
+ public void testPhenopacketValidity() throws InvalidProtocolBufferException {
+ String json = JsonFormat.printer().print(phenopacket);
+ List extends ValidationItem> errors = validator.validate(json);
+ assertTrue(errors.isEmpty());
+ }
+
+ @Test
+ public void testLacksId() throws InvalidProtocolBufferException {
+ // the Phenopacket is not valid if we remove the id
+ Phenopacket p1 = Phenopacket.newBuilder(phenopacket).clearId().build();
+ String json = JsonFormat.printer().print(p1);
+ List extends ValidationItem> errors = validator.validate(json);
+ for (var e: errors) {
+ System.out.println(e);
+ }
+ assertEquals(1, errors.size());
+ ValidationItem error = errors.get(0);
+ assertEquals(JSON_REQUIRED, error.type());
+ assertEquals("$.id: is missing but it is required", error.message());
+ }
+
+ @Test
+ public void testDiseaseLacksOntologyTerm() throws InvalidProtocolBufferException {
+ // Disease must have an ontology term
+ // the phenopacket has a single Disease message
+ Disease disease = phenopacket.getDiseases(0);
+ // remove the Ontology Term from the disease
+ disease = Disease.newBuilder(disease).clearTerm().build();
+ // replace the original disease object with the new disease object
+ Phenopacket p1 = Phenopacket.newBuilder(phenopacket)
+ .setDiseases(0, disease).build();
+ String json = JsonFormat.printer().print(p1);
+ //System.out.println(json);
+ List extends ValidationItem> errors = validator.validate(json);
+ assertEquals(1, errors.size());
+ ValidationItem error = errors.get(0);
+ assertEquals(JSON_REQUIRED, error.type());
+ // this error means that the first disease lacks a term
+ assertEquals("$.diseases[0].term: is missing but it is required", error.message());
+ }
+
+
+
+}
diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/JsonSchemaValidatorTestBase.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/JsonSchemaValidatorTestBase.java
new file mode 100644
index 0000000..83216ae
--- /dev/null
+++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/JsonSchemaValidatorTestBase.java
@@ -0,0 +1,34 @@
+package org.phenopackets.validator.jsonschema;
+
+import org.phenopackets.phenotools.builder.builders.MetaDataBuilder;
+import org.phenopackets.phenotools.builder.builders.Resources;
+import org.phenopackets.phenotools.builder.exceptions.PhenotoolsRuntimeException;
+import org.phenopackets.schema.v2.core.MetaData;
+import org.phenopackets.validator.core.PhenopacketValidator;
+import org.phenopackets.validator.core.ValidatorInfo;
+
+import java.util.Map;
+
+public class JsonSchemaValidatorTestBase {
+ private static final Map genericValidatorMap = JsonSchemaValidators.genericValidator();
+ private static final Map rareHpoValidatorMap = JsonSchemaValidators.rareHpoValidator();
+
+ protected final PhenopacketValidator validator = genericValidatorMap.values().stream()
+ .findFirst()
+ .orElseThrow(() -> new PhenotoolsRuntimeException("Could not retrieve generic validator"));
+
+ protected final PhenopacketValidator rareDiseaseValidator = rareHpoValidatorMap.values().stream()
+ .findFirst()
+ .orElseThrow(() -> new PhenotoolsRuntimeException("Could not retrieve rare disease validator"));
+
+ protected static final MetaData metadataWithHpo = MetaDataBuilder.create("2021-07-01T19:32:35Z", "anonymous biocurator")
+ .submittedBy("anonymous submitter")
+ .resource(Resources.hpoVersion("2021-08-02"))
+ .resource(Resources.mondoVersion("2021-09-01"))
+ .externalReference("PMID:20842687",
+ "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter")
+ .build();
+
+
+
+}
diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/PhenotypicFeatureValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/PhenotypicFeatureValidatorTest.java
new file mode 100644
index 0000000..a47b909
--- /dev/null
+++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/PhenotypicFeatureValidatorTest.java
@@ -0,0 +1,78 @@
+package org.phenopackets.validator.jsonschema;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.google.protobuf.util.JsonFormat;
+import org.junit.jupiter.api.Test;
+import org.phenopackets.phenotools.builder.PhenopacketBuilder;
+import org.phenopackets.phenotools.builder.builders.EvidenceBuilder;
+import org.phenopackets.phenotools.builder.builders.PhenotypicFeatureBuilder;
+import org.phenopackets.schema.v2.Phenopacket;
+import org.phenopackets.schema.v2.core.Evidence;
+import org.phenopackets.schema.v2.core.PhenotypicFeature;
+import org.phenopackets.validator.core.ValidationItem;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.phenopackets.validator.jsonschema.JsonValidationItemTypes.JSON_REQUIRED;
+
+
+public class PhenotypicFeatureValidatorTest extends JsonSchemaValidatorTestBase {
+ private static final String PMID = "PMID:30808312";
+ private static final String publication = "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report";
+
+ private static Phenopacket phenopacketWithOnePhenotypicFeature() {
+ var authorAssertion = EvidenceBuilder.authorStatementEvidence(PMID, publication);
+ var VSD =
+ PhenotypicFeatureBuilder.create("HP:0001629","Ventricular septal defect")
+ .congenitalOnset()
+ .evidence(authorAssertion)
+ .build();
+ return PhenopacketBuilder.create("ID:A", metadataWithHpo)
+ .phenotypicFeature(VSD)
+ .build();
+ }
+
+ private final Phenopacket phenopacket = phenopacketWithOnePhenotypicFeature();
+
+ @Test
+ public void testPhenotypicFeatureWithoutOntologyTerm() throws InvalidProtocolBufferException {
+ assertTrue(phenopacket.getPhenotypicFeaturesCount() > 0);
+ PhenotypicFeature pf = phenopacket.getPhenotypicFeatures(0);
+ pf = PhenotypicFeature.newBuilder(pf).clearType().build();
+ Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setPhenotypicFeatures(0, pf).build();
+ String json = JsonFormat.printer().print(p1);
+ List extends ValidationItem> errors = validator.validate(json);
+ assertEquals(1, errors.size());
+ ValidationItem error = errors.get(0);
+ assertEquals(JSON_REQUIRED, error.type());
+ String expectedErrorMsg = "$.phenotypicFeatures[0].type: is missing but it is required";
+ assertEquals(expectedErrorMsg, error.message());
+ }
+
+ /**
+ * This Phenopacket is invalid, because the PhenotypicFeature has an Evidence element that is lacking
+ * evidenceCode
+ */
+ @Test
+ public void testPhenotypicFeatureWithInvalidEvidence() throws InvalidProtocolBufferException {
+ assertTrue(phenopacket.getPhenotypicFeaturesCount() > 0);
+ PhenotypicFeature pf = phenopacket.getPhenotypicFeatures(0);
+ assertTrue(pf.getEvidenceCount() > 0);
+ Evidence evi = pf.getEvidence(0);
+ evi = Evidence.newBuilder(evi).clearEvidenceCode().build();
+ pf = PhenotypicFeature.newBuilder(pf).setEvidence(0, evi).build();
+ Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setPhenotypicFeatures(0, pf).build();
+ String json = JsonFormat.printer().print(p1);
+ // System.out.println(json);
+ List extends ValidationItem> errors = validator.validate(json);
+ assertEquals(1, errors.size());
+ ValidationItem error = errors.get(0);
+ assertEquals(JSON_REQUIRED, error.type());
+ String expectedErrorMsg = "$.phenotypicFeatures[0].evidence[0].evidenceCode: is missing but it is required";
+ assertEquals(expectedErrorMsg, error.message());
+ }
+
+
+}
diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/SimpleValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/SimpleValidatorTest.java
new file mode 100644
index 0000000..e194245
--- /dev/null
+++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/SimpleValidatorTest.java
@@ -0,0 +1,87 @@
+package org.phenopackets.validator.jsonschema;
+
+import org.junit.jupiter.api.Test;
+import org.phenopackets.schema.v2.Phenopacket;
+import org.phenopackets.validator.testdatagen.RareDiseasePhenopacket;
+import org.phenopackets.validator.testdatagen.SimplePhenopacket;
+import org.phenopackets.validator.core.ValidationItem;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+
+import com.google.protobuf.util.JsonFormat;
+
+import static org.junit.jupiter.api.Assertions.*;
+import static org.phenopackets.validator.jsonschema.JsonValidationItemTypes.JSON_ADDITIONAL_PROPERTIES;
+import static org.phenopackets.validator.jsonschema.JsonValidationItemTypes.JSON_REQUIRED;
+
+public class SimpleValidatorTest extends JsonSchemaValidatorTestBase {
+
+ private static final SimplePhenopacket simplePhenopacket = new SimplePhenopacket();
+
+ private static final RareDiseasePhenopacket rareDiseasePhenopacket = new RareDiseasePhenopacket();
+
+ private static File fileFromClasspath(String path) {
+ String fname = Objects.requireNonNull(Thread.currentThread().getContextClassLoader().getResource(path)).getPath();
+ return new File(fname);
+ }
+
+ @Test
+ public void testValidationOfSimpleValidPhenopacket() throws Exception {
+ Phenopacket phenopacket = simplePhenopacket.getPhenopacket();
+ String json = JsonFormat.printer().print(phenopacket);
+ List extends ValidationItem> errors = validator.validate(json);
+ assertTrue(errors.isEmpty());
+ // the Phenopacket is not valid if we remove the id
+ phenopacket = Phenopacket.newBuilder(phenopacket).clearId().build();
+ json = JsonFormat.printer().print(phenopacket);
+ errors = validator.validate(json);
+ assertEquals(1, errors.size());
+ ValidationItem error = errors.get(0);
+ assertEquals(JSON_REQUIRED, error.type());
+ assertEquals("$.id: is missing but it is required", error.message());
+ }
+
+ /**
+ * This example phenopacket does not contain anything except {"disney" : "donald"}
+ * It does not contain an id or a metaData element and thus should fail.
+ */
+ @Test
+ public void testValidationOfSimpleInValidPhenopacket() {
+ String invalidPhenopacketJson = "{\"disney\" : \"donald\"}";
+ List extends ValidationItem> errors = validator.validate(invalidPhenopacketJson);
+ assertEquals(3, errors.size());
+ ValidationItem error = errors.get(0);
+ assertEquals(JSON_REQUIRED, error.type());
+ assertEquals("$.id: is missing but it is required", error.message());
+ error = errors.get(1);
+ assertEquals(JSON_REQUIRED, error.type());
+ assertEquals("$.metaData: is missing but it is required", error.message());
+ error = errors.get(2);
+ assertEquals(JSON_ADDITIONAL_PROPERTIES, error.type());
+ assertEquals("$.disney: is not defined in the schema and the schema does not allow additional properties", error.message());
+ }
+
+ @Test
+ public void testRareDiseaseBethlemahmValidPhenopacket() throws Exception {
+ Phenopacket bethlehamMyopathy = rareDiseasePhenopacket.getPhenopacket();
+ String json = JsonFormat.printer().print(bethlehamMyopathy);
+ List extends ValidationItem> errors = validator.validate(json);
+ assertTrue(errors.isEmpty());
+ }
+
+ @Test
+ public void testRareDiseaseBethlemahmInvalidValidPhenopacket() throws IOException {
+ File invalidMyopathyPhenopacket = fileFromClasspath("json/bethlehamMyopathyInvalidExample.json");
+ List extends ValidationItem> errors = validator.validate(invalidMyopathyPhenopacket);
+ assertEquals(1, errors.size());
+ ValidationItem error = errors.get(0);
+ assertEquals(JSON_ADDITIONAL_PROPERTIES, error.type());
+ String expectedErrorMsg = "$.phenotypicFeaturesMALFORMED: is not defined in the schema and the schema does not allow additional properties";
+ assertEquals(expectedErrorMsg, error.message());
+ }
+
+
+}
\ No newline at end of file
diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ValueValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ValueValidatorTest.java
new file mode 100644
index 0000000..7197a10
--- /dev/null
+++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ValueValidatorTest.java
@@ -0,0 +1,117 @@
+package org.phenopackets.validator.jsonschema;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.google.protobuf.util.JsonFormat;
+import org.junit.jupiter.api.Test;
+import org.phenopackets.phenotools.builder.PhenopacketBuilder;
+import org.phenopackets.phenotools.builder.builders.*;
+import org.phenopackets.schema.v2.Phenopacket;
+import org.phenopackets.schema.v2.core.*;
+import org.phenopackets.validator.core.ValidationItem;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+import static org.phenopackets.phenotools.builder.builders.OntologyClassBuilder.ontologyClass;
+
+/**
+ *
+ * Test a few errors in the Value messages
+ * @author Peter N Robinson
+ */
+public class ValueValidatorTest extends JsonSchemaValidatorTestBase {
+
+ private static Phenopacket phenopacketWithValueAndComplexValue() {
+ MetaData meta = MetaDataBuilder.create("2021-07-01T19:32:35Z", "anonymous biocurator")
+ .submittedBy("anonymous submitter")
+ .resource(Resources.hpoVersion("2021-08-02"))
+ .resource(Resources.mondoVersion("2021-09-01"))
+ .build();
+ // Simple value for blood platelets
+ OntologyClass cellsPerMl = ontologyClass("UO:0000316", "cells per microliter");
+ ReferenceRange rrange = ReferenceRangeCreator.create(cellsPerMl, 150_000, 450_000);
+ Quantity quantity = QuantityBuilder.create(cellsPerMl,24000)
+ .referenceRange(rrange)
+ .build();
+ Value value = ValueBuilder.create(quantity).build();
+ OntologyClass plateletAssay = ontologyClass( "LOINC:26515-7", "Platelets [#/volume] in Blood");
+ Measurement plateletM = MeasurementBuilder.value(plateletAssay, value).build();
+ return PhenopacketBuilder.create("id:A", meta)
+ .measurement(plateletM)
+ .build();
+ }
+
+ private static final Phenopacket phenopacket = phenopacketWithValueAndComplexValue();
+
+ /**
+ * First test that the unaltered phenopacket is OK
+ */
+ @Test
+ public void validatePhenopacket() throws InvalidProtocolBufferException {
+ String json = JsonFormat.printer().print(phenopacket);
+ //System.out.println(json);
+ List extends ValidationItem> errors = validator.validate(json);
+ for (var e : errors) {
+ System.out.println(e.message());
+ }
+ assertTrue(errors.isEmpty());
+ }
+
+ /**
+ * Test what happens if we remove the ontology class from the Measurement
+ * Note that the validation tool reports two errors
+ * $.measurements[0].value: is missing but it is required
+ * $.measurements[0].complexValue: is missing but it is required
+ * The second of which is incorrect but I think this is more a limitation of JSON Schema validation
+ * of oneOf elements
+ */
+ @Test
+ public void testMissingOntologyTerm() throws InvalidProtocolBufferException {
+ Measurement m1 = Phenopacket.newBuilder(phenopacket).getMeasurements(0);
+ m1 = Measurement.newBuilder(m1).clearValue().build();
+ Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setMeasurements(0, m1).build();
+ String json = JsonFormat.printer().print(p1);
+ System.out.println(json);
+ List extends ValidationItem> errors = validator.validate(json);
+ for (var e : errors) {
+ System.out.println(e.message());
+ }
+ assertFalse(errors.isEmpty());
+ String expectedErrorMsg = "$.measurements[0].value: is missing but it is required";
+ boolean foundError = errors.stream().map(ValidationItem::message).anyMatch(m -> m.equals(expectedErrorMsg));
+ assertTrue(foundError, "was expecting to find \"$.measurements[0].value: is missing but it is required\" but did not");
+ }
+
+ @Test
+ public void referenceRangeMissingLowValue() throws InvalidProtocolBufferException {
+ Measurement m1 = Phenopacket.newBuilder(phenopacket).getMeasurements(0);
+ Value v1 = m1.getValue();
+ ReferenceRange rrange = v1.getQuantity().getReferenceRange();
+ rrange = ReferenceRange.newBuilder(rrange).clearLow().build();
+ Quantity q1= Quantity.newBuilder(v1.getQuantity()).setReferenceRange(rrange).build();
+ v1 = Value.newBuilder(v1).setQuantity(q1).build();
+ m1 = Measurement.newBuilder(m1).setValue(v1).build();
+ Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setMeasurements(0, m1).build();
+ String json = JsonFormat.printer().print(p1);
+ //System.out.println(json);
+ List extends ValidationItem> errors = validator.validate(json);
+ assertEquals(1, errors.size());
+ String expectedErrorMsg = "$.measurements[0].value.quantity.referenceRange.low: is missing but it is required";
+ assertEquals(expectedErrorMsg, errors.get(0).message());
+ }
+
+ @Test
+ public void measurementMissingAssay() throws InvalidProtocolBufferException {
+ Measurement m1 = Phenopacket.newBuilder(phenopacket).getMeasurements(0);
+ m1 = Measurement.newBuilder(m1).clearAssay().build();
+ Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setMeasurements(0, m1).build();
+ String json = JsonFormat.printer().print(p1);
+ List extends ValidationItem> errors = validator.validate(json);
+ assertEquals(1, errors.size());
+ String expectedErrorMsg = "$.measurements[0].assay: is missing but it is required";
+ assertEquals(expectedErrorMsg, errors.get(0).message());
+ }
+
+
+
+}
diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/DatagenBase.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/DatagenBase.java
new file mode 100644
index 0000000..48db3de
--- /dev/null
+++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/DatagenBase.java
@@ -0,0 +1,21 @@
+package org.phenopackets.validator.testdatagen;
+
+
+import org.phenopackets.schema.v2.core.OntologyClass;
+
+
+import static org.phenopackets.phenotools.builder.builders.OntologyClassBuilder.ontologyClass;
+
+
+public class DatagenBase {
+ public static final String SCHEMA_VERSION = "2.0";
+ public static final OntologyClass CONGENITAL_ONSET = ontologyClass("HP:0003577", "Congenital onset");
+ public static final OntologyClass CHILDHOOD_ONSET = ontologyClass("HP:0011463", "Childhood onset");
+ public static final OntologyClass ADULT_ONSET = ontologyClass("HP:0003581", "Adult onset");
+ public static final OntologyClass SEVERE = ontologyClass("HP:0012828", "Severe");
+
+
+
+
+
+}
diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/RareDiseasePhenopacket.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/RareDiseasePhenopacket.java
new file mode 100644
index 0000000..9f65081
--- /dev/null
+++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/RareDiseasePhenopacket.java
@@ -0,0 +1,60 @@
+package org.phenopackets.validator.testdatagen;
+
+import org.phenopackets.phenotools.builder.PhenopacketBuilder;
+import org.phenopackets.phenotools.builder.builders.*;
+import org.phenopackets.schema.v2.Phenopacket;
+import org.phenopackets.schema.v2.core.File;
+import org.phenopackets.schema.v2.core.Individual;
+import org.phenopackets.schema.v2.core.MetaData;
+
+public class RareDiseasePhenopacket extends DatagenBase {
+
+
+ private final Phenopacket phenopacket;
+
+ public RareDiseasePhenopacket() {
+ Individual proband = IndividualBuilder.create("patient 1")
+ .dateOfBirth("1998-01-01T00:00:00Z")
+ .ageAtLastEncounter("P3Y")
+ .male()
+ .build();
+
+
+ var syndactyly = PhenotypicFeatureBuilder.create("HP:0001159", "Syndactyly")
+ .congenitalOnset().build();
+ var pneumonia = PhenotypicFeatureBuilder.create("HP:0002090", "Pneumonia")
+ .childhoodOnset().build();
+ var cryptorchidism = PhenotypicFeatureBuilder.create("HP:0000028", "Cryptorchidism")
+ .congenitalOnset().build();
+ var sinusitis = PhenotypicFeatureBuilder.create("HP:0011109", "Chronic sinusitis")
+ .severe().adultOnset().build();
+
+ MetaData meta = MetaDataBuilder.create("2021-07-01T19:32:35Z", "anonymous biocurator")
+ .submittedBy("anonymous submitter")
+ .resource(Resources.hpoVersion("2021-08-02"))
+ .resource(Resources.mondoVersion("2021-09-01"))
+ .externalReference("PMID:20842687",
+ "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter")
+ .build();
+ File vcf = FileBuilder.hg38vcf("file://data/file.vcf.gz")
+ .individualToFileIdentifier("kindred 1A", "SAME000234")
+ .build();
+
+ this.phenopacket = PhenopacketBuilder.create("phenopacket-id-1", meta)
+ .individual(proband)
+ .phenotypicFeature(syndactyly)
+ .phenotypicFeature(pneumonia)
+ .phenotypicFeature(cryptorchidism)
+ .phenotypicFeature(sinusitis)
+ .file(vcf)
+ .build();
+
+ }
+
+ public Phenopacket getPhenopacket() {
+ return phenopacket;
+ }
+
+
+
+}
diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/SimplePhenopacket.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/SimplePhenopacket.java
new file mode 100644
index 0000000..20735de
--- /dev/null
+++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/SimplePhenopacket.java
@@ -0,0 +1,34 @@
+package org.phenopackets.validator.testdatagen;
+
+import org.phenopackets.phenotools.builder.builders.MetaDataBuilder;
+import org.phenopackets.phenotools.builder.builders.Resources;
+import org.phenopackets.schema.v2.Phenopacket;
+import org.phenopackets.schema.v2.core.MetaData;
+
+
+
+/**
+ * Build the simplest possible phenopacket for validation
+ */
+public class SimplePhenopacket extends DatagenBase {
+
+ private final Phenopacket phenopacket;
+
+ public SimplePhenopacket() {
+ MetaData meta = MetaDataBuilder.create("2021-07-01T19:32:35Z", "anonymous biocurator")
+ .submittedBy("anonymous submitter")
+ .resource(Resources.hpoVersion("2021-08-02"))
+ .build();
+ phenopacket = Phenopacket.newBuilder()
+ .setId("hello world")
+ .setMetaData(meta)
+ .build();
+ }
+
+
+ public Phenopacket getPhenopacket() {
+ return phenopacket;
+ }
+
+
+}
diff --git a/validator-core/src/test/resources/json/bethlehamMyopathyExample.json b/validator-jsonschema/src/test/resources/json/bethlehamMyopathyExample.json
similarity index 100%
rename from validator-core/src/test/resources/json/bethlehamMyopathyExample.json
rename to validator-jsonschema/src/test/resources/json/bethlehamMyopathyExample.json
diff --git a/validator-core/src/test/resources/json/bethlehamMyopathyInvalidExample.json b/validator-jsonschema/src/test/resources/json/bethlehamMyopathyInvalidExample.json
similarity index 100%
rename from validator-core/src/test/resources/json/bethlehamMyopathyInvalidExample.json
rename to validator-jsonschema/src/test/resources/json/bethlehamMyopathyInvalidExample.json
diff --git a/validator-jsonschema/src/test/resources/logback-test.xml b/validator-jsonschema/src/test/resources/logback-test.xml
new file mode 100644
index 0000000..0f78182
--- /dev/null
+++ b/validator-jsonschema/src/test/resources/logback-test.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+ %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/validator-ontology/pom.xml b/validator-ontology/pom.xml
new file mode 100644
index 0000000..3bd16a5
--- /dev/null
+++ b/validator-ontology/pom.xml
@@ -0,0 +1,36 @@
+
+
+
+ validator
+ org.phenopackets.validator
+ 0.1.1-SNAPSHOT
+
+ 4.0.0
+
+ validator-ontology
+
+
+
+ org.phenopackets.validator
+ validator-core
+ ${project.parent.version}
+
+
+ org.monarchinitiative.phenol
+ phenol-core
+
+
+ org.phenopackets
+ phenopacket-schema
+
+
+
+ com.google.protobuf
+ protobuf-java-util
+
+
+
+
\ No newline at end of file
diff --git a/validator-ontology/src/main/java/org/phenopackets/validator/ontology/HpoValidator.java b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/HpoValidator.java
new file mode 100644
index 0000000..b4d4dd4
--- /dev/null
+++ b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/HpoValidator.java
@@ -0,0 +1,110 @@
+package org.phenopackets.validator.ontology;
+
+import com.google.protobuf.util.JsonFormat;
+import org.monarchinitiative.phenol.base.PhenolRuntimeException;
+import org.monarchinitiative.phenol.ontology.data.Ontology;
+import org.monarchinitiative.phenol.ontology.data.TermId;
+import org.phenopackets.schema.v2.Phenopacket;
+import org.phenopackets.schema.v2.core.PhenotypicFeature;
+import org.phenopackets.validator.core.ValidationItem;
+import org.phenopackets.validator.core.ValidationItemTypes;
+import org.phenopackets.validator.core.ValidatorInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+
+/**
+ * The validator checks the Phenotypic features for HPO terms. If HPO terms
+ * are used there, then we check that the HPO terms are included in the ontology (thus, we check for
+ * invalid HPO ids, and also check if the id used is the latest or if it is an alternate id).
+ *
+ * The validator performs logical checks too. Due to the annotation propagation rule, where presence of a child term
+ * implies presence of all its ancestors, it is an error to specify both a child term and its parent, and only the
+ * child term must be used. The rule is checked for both observed and excluded terms.
+ *
+ * In addition, if there are both observed and excluded terms, we check that there is no logical inconsistency, e.g.,
+ * NOT Abnormal liver morphology but OBSERVED Hepatic fibrosis. The latter is not possible because
+ * Hepatic fibrosis implies Abnormal liver morphology.
+ */
+public class HpoValidator extends OntologyValidator {
+ private static final Logger LOGGER = LoggerFactory.getLogger(HpoValidator.class);
+
+ private static final String HPO_PREFIX = "HP";
+
+ public HpoValidator(Ontology hpoOntology) {
+ super(hpoOntology, ValidatorInfo.of(ONTOLOGY_VALIDATOR, "Human Phenotype Ontology"));
+ }
+
+ private static Phenopacket readPhenopacket(InputStream inputStream) throws IOException {
+ String phenopacketJsonString = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8);
+ Phenopacket.Builder phenoPacketBuilder = Phenopacket.newBuilder();
+ JsonFormat.parser().merge(phenopacketJsonString, phenoPacketBuilder);
+ return phenoPacketBuilder.build();
+ }
+
+ @Override
+ public ValidatorInfo info() {
+ return validatorInfo;
+ }
+
+ @Override
+ public List validate(InputStream inputStream) {
+ Phenopacket phenopacket;
+ try {
+ phenopacket = readPhenopacket(inputStream);
+ } catch (IOException e) {
+ LOGGER.warn("Error while validating: {}", e.getMessage(), e);
+ return List.of(ValidationItem.of(validatorInfo, ValidationItemTypes.syntaxError(), e.getMessage()));
+ }
+
+ List errors = new ArrayList<>();
+ Set observedFeatures = new HashSet<>();
+ Set excludedFeatures = new HashSet<>();
+
+ partitionTermsToObservedAndExcluded(phenopacket.getPhenotypicFeaturesList(), errors, observedFeatures, excludedFeatures);
+
+ checkTermsArePresentInOntology(observedFeatures, errors);
+ checkTermsUsePrimaryId(observedFeatures, errors);
+
+ checkTermsArePresentInOntology(excludedFeatures, errors);
+ checkTermsUsePrimaryId(excludedFeatures, errors);
+
+ checkAncestry(observedFeatures, excludedFeatures, errors);
+
+ return errors;
+ }
+
+ private void partitionTermsToObservedAndExcluded(Iterable phenotypicFeatures,
+ List errors,
+ Set observedFeatures,
+ Set excludedFeatures) {
+ for (var pf : phenotypicFeatures) {
+ TermId tid;
+ String id = pf.getType().getId();
+ try {
+ tid = TermId.of(id);
+ if (!tid.getPrefix().equals(HPO_PREFIX)) {
+ continue; // only check HPO terms
+ }
+ } catch (PhenolRuntimeException pe) {
+ String message = String.format("Invalid ontology term id: %s", id);
+ errors.add(ValidationItem.of(validatorInfo, OntologyValidationItemTypes.ONTOLOGY_INVALID_ID, message));
+ continue;
+ }
+
+ if (pf.getExcluded()) {
+ excludedFeatures.add(tid);
+ } else {
+ observedFeatures.add(tid);
+ }
+ }
+ }
+}
diff --git a/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidationItemTypes.java b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidationItemTypes.java
new file mode 100644
index 0000000..45cc3b0
--- /dev/null
+++ b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidationItemTypes.java
@@ -0,0 +1,29 @@
+package org.phenopackets.validator.ontology;
+
+import org.phenopackets.validator.core.ValidationItemType;
+
+class OntologyValidationItemTypes {
+
+ /**
+ * This error type refers to the use of a term ID, e.g., HP:1234567, which is syntactically correct but which
+ * does not existing in the actual ontology.
+ */
+ static final ValidationItemType ONTOLOGY_INVALID_ID =
+ ValidationItemType.of("Invalid Ontology Term ID", "Term id does not exist in ontology");
+
+ /**
+ * This error type refers to the use of a term ID that is an alternate_id, i.e., an outdated ID that should
+ * be replaced by the primary ID of the corresponding Ontology term.
+ */
+ static final ValidationItemType ONTOLOGY_TERM_WITH_ALTERNATE_ID =
+ ValidationItemType.of("Use of outdated Ontology Term ID", "Term id is not the primary id for this term");
+
+ static final ValidationItemType TERM_AND_ANCESTOR_ARE_USED =
+ ValidationItemType.of("Term and ancestor are used", "Only the child/specific term should be used");
+
+ static final ValidationItemType TERM_IS_USED_AND_ANCESTOR_IS_EXCLUDED =
+ ValidationItemType.of("Term is used while ancestor is excluded", "The term is used despite the ancestor term has been excluded");
+
+ private OntologyValidationItemTypes() {}
+
+}
diff --git a/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidator.java b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidator.java
new file mode 100644
index 0000000..28ecf2f
--- /dev/null
+++ b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidator.java
@@ -0,0 +1,98 @@
+package org.phenopackets.validator.ontology;
+
+import org.monarchinitiative.phenol.ontology.data.Ontology;
+import org.monarchinitiative.phenol.ontology.data.TermId;
+import org.phenopackets.validator.core.PhenopacketValidator;
+
+import org.phenopackets.validator.core.ValidationItem;
+import org.phenopackets.validator.core.ValidatorInfo;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.Set;
+
+public abstract class OntologyValidator implements PhenopacketValidator {
+ private static final Logger LOGGER = LoggerFactory.getLogger(OntologyValidator.class);
+
+ protected final static String ONTOLOGY_VALIDATOR = "Ontology validation";
+
+ protected final ValidatorInfo validatorInfo;
+ protected final Ontology ontology;
+
+ protected OntologyValidator(Ontology ontology, ValidatorInfo vinfo) {
+ this.ontology = ontology;
+ this.validatorInfo = vinfo;
+ }
+
+ protected void checkTermsArePresentInOntology(Iterable termIds, List errors) {
+ for (TermId termId : termIds) {
+ if (!ontology.containsTerm(termId)) {
+ String message = String.format("Could not find term id: %s", termId.getValue());
+ ValidationItem item = ValidationItem.of(validatorInfo, OntologyValidationItemTypes.ONTOLOGY_INVALID_ID, message);
+ errors.add(item);
+ }
+ }
+ }
+
+ protected void checkTermsUsePrimaryId(Iterable termIds, List errors) {
+ for (TermId termId : termIds) {
+ TermId primaryId = ontology.getPrimaryTermId(termId);
+ if (primaryId == null)
+ // not in ontology, this is checked in `checkTermsArePresentInOntology`
+ continue;
+ if (!primaryId.equals(termId)) {
+ String message = String.format("Using alternate (obsolete) id (%s) instead of primary id (%s)", termId.getValue(), primaryId.getValue());
+ ValidationItem item = ValidationItem.of(validatorInfo, OntologyValidationItemTypes.ONTOLOGY_TERM_WITH_ALTERNATE_ID, message);
+ errors.add(item);
+ }
+ }
+ }
+
+ protected void checkAncestry(Set observedTerms, Set excludedTerms, List errors) {
+ /*
+ OBSERVED TERMS:
+ It is an error to:
+ - provide term and its ancestor,
+ - provide term while the ancestor is excluded.
+ */
+ for (TermId term : observedTerms) {
+ Set ancestors = ontology.getAncestorTermIds(term);
+
+ for (TermId ancestor : ancestors) {
+ if (ancestor.equals(term))
+ continue;
+
+ if (observedTerms.contains(ancestor)) {
+ String message = String.format("Using both term %s and its ancestor %s", term.getValue(), ancestor.getValue());
+ errors.add(ValidationItem.of(validatorInfo, OntologyValidationItemTypes.TERM_AND_ANCESTOR_ARE_USED, message));
+ }
+
+ if (excludedTerms.contains(ancestor)) {
+ String message = String.format("Term %s is present while its ancestor %s is excluded", term.getValue(), ancestor.getValue());
+ errors.add(ValidationItem.of(validatorInfo, OntologyValidationItemTypes.TERM_IS_USED_AND_ANCESTOR_IS_EXCLUDED, message));
+ }
+ }
+ }
+
+ /*
+ EXCLUDED TERMS:
+ It is an error to provide term and its ancestor. It is, however, OK to provide term while the ancestor is present,
+ i.e. `Arachnodactyly` is excluded, while `Long fingers` are present.
+ */
+ for (TermId term : excludedTerms) {
+ Set ancestors = ontology.getAncestorTermIds(term);
+
+ for (TermId ancestor : ancestors) {
+ if (ancestor.equals(term))
+ continue;
+
+ if (excludedTerms.contains(ancestor)) {
+ String message = String.format("Using both term %s and its ancestor %s", term.getValue(), ancestor.getValue());
+ errors.add(ValidationItem.of(validatorInfo, OntologyValidationItemTypes.TERM_AND_ANCESTOR_ARE_USED, message));
+ }
+ }
+ }
+ }
+}
diff --git a/validator-ontology/src/test/java/org/phenopackets/validator/ontology/HpoValidatorTest.java b/validator-ontology/src/test/java/org/phenopackets/validator/ontology/HpoValidatorTest.java
new file mode 100644
index 0000000..3d6cd96
--- /dev/null
+++ b/validator-ontology/src/test/java/org/phenopackets/validator/ontology/HpoValidatorTest.java
@@ -0,0 +1,178 @@
+package org.phenopackets.validator.ontology;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.google.protobuf.util.JsonFormat;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+import org.monarchinitiative.phenol.ontology.data.Ontology;
+import org.monarchinitiative.phenol.ontology.data.TermId;
+import org.phenopackets.schema.v2.Phenopacket;
+import org.phenopackets.schema.v2.core.OntologyClass;
+import org.phenopackets.schema.v2.core.PhenotypicFeature;
+import org.phenopackets.validator.core.ValidationItem;
+
+import java.io.ByteArrayInputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Set;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasSize;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class HpoValidatorTest {
+
+ public Ontology ontology;
+
+ public HpoValidator validator;
+
+ private static byte[] wrapInPhenopacketAndGetJsonBytes(List phenotypicFeatures) throws InvalidProtocolBufferException {
+ Phenopacket phenopacket = Phenopacket.newBuilder()
+ .addAllPhenotypicFeatures(phenotypicFeatures)
+ .build();
+ return JsonFormat.printer().print(phenopacket).getBytes(StandardCharsets.UTF_8);
+ }
+
+ private static PhenotypicFeature makePhenotypicFeature(String termId, boolean excluded) {
+ return PhenotypicFeature.newBuilder()
+ .setType(OntologyClass.newBuilder().setId(termId).build())
+ .setExcluded(excluded)
+ .build();
+ }
+
+ // set up the Ontology mocks. There is no better way to test this unless we stash ~2mb hp.json.gz in the repo
+ private static Ontology prepareMiniOntology() {
+ Ontology ontology = mock(Ontology.class);
+ TermId one = TermId.of("HP:0000001"); // the root
+ TermId two = TermId.of("HP:0000002"); // one's child
+ TermId three = TermId.of("HP:0000003"); // one's child
+ TermId four = TermId.of("HP:0000004"); // three's child
+ TermId five = TermId.of("HP:0000005"); // three's child
+ when(ontology.containsTerm(one)).thenReturn(true);
+ when(ontology.getPrimaryTermId(one)).thenReturn(one);
+ when(ontology.containsTerm(two)).thenReturn(true);
+ when(ontology.getPrimaryTermId(two)).thenReturn(two);
+ when(ontology.containsTerm(three)).thenReturn(true);
+ when(ontology.getPrimaryTermId(three)).thenReturn(three);
+ when(ontology.containsTerm(four)).thenReturn(true);
+ when(ontology.getPrimaryTermId(four)).thenReturn(four);
+ when(ontology.containsTerm(five)).thenReturn(true);
+ when(ontology.getPrimaryTermId(five)).thenReturn(five);
+
+ when(ontology.getAncestorTermIds(one)).thenReturn(Set.of(one));
+ when(ontology.getAncestorTermIds(two)).thenReturn(Set.of(two, one));
+ when(ontology.getAncestorTermIds(three)).thenReturn(Set.of(three, one));
+ when(ontology.getAncestorTermIds(four)).thenReturn(Set.of(four, three, one));
+ when(ontology.getAncestorTermIds(five)).thenReturn(Set.of(five, three, one));
+
+ return ontology;
+ }
+
+ @BeforeEach
+ public void setUp() {
+ ontology = prepareMiniOntology();
+ validator = new HpoValidator(ontology);
+ }
+
+ @Nested
+ public class NoErrorsInValidation {
+
+ @Test
+ public void disjointTerms() throws Exception {
+ List phenotypicFeatures = List.of(
+ makePhenotypicFeature("HP:0000002", false),
+ makePhenotypicFeature("HP:0000005", false));
+ byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures);
+
+ List items = validator.validate(new ByteArrayInputStream(bytes));
+
+ assertThat(items, hasSize(0));
+ }
+
+ @Test
+ public void parentIsPresentWhileChildrenAreExcluded() throws Exception {
+ List phenotypicFeatures = List.of(
+ makePhenotypicFeature("HP:0000003", false),
+ makePhenotypicFeature("HP:0000004", true),
+ makePhenotypicFeature("HP:0000005", true)
+ );
+ byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures);
+
+ List items = validator.validate(new ByteArrayInputStream(bytes));
+
+ assertThat(items, hasSize(0));
+ }
+
+ }
+
+ @Nested
+ public class ValidationFails {
+
+ @Test
+ public void termIdIsAbsentFromOntology() throws Exception {
+ List phenotypicFeatures = List.of(makePhenotypicFeature("HP:9999999", false));
+ byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures);
+
+ List items = validator.validate(new ByteArrayInputStream(bytes));
+
+ assertThat(items, hasSize(1));
+ ValidationItem expected = ValidationItem.of(validator.info(), OntologyValidationItemTypes.ONTOLOGY_INVALID_ID, "Could not find term id: HP:9999999");
+ assertThat(items.get(0), equalTo(expected));
+ }
+
+ @Test
+ public void nonPrimaryTermIdIsUsed() throws Exception {
+ List phenotypicFeatures = List.of(makePhenotypicFeature("HP:9999999", false));
+ byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures);
+
+ when(ontology.containsTerm(TermId.of("HP:9999999"))).thenReturn(true);
+ when(ontology.getPrimaryTermId(TermId.of("HP:9999999"))).thenReturn(TermId.of("HP:0000001"));
+
+ List items = validator.validate(new ByteArrayInputStream(bytes));
+
+ assertThat(items, hasSize(1));
+ ValidationItem expected = ValidationItem.of(validator.info(),
+ OntologyValidationItemTypes.ONTOLOGY_TERM_WITH_ALTERNATE_ID,
+ "Using alternate (obsolete) id (HP:9999999) instead of primary id (HP:0000001)");
+ assertThat(items.get(0), equalTo(expected));
+ }
+
+ @Test
+ public void usingTermAlongWithItsAncestor() throws Exception {
+ List phenotypicFeatures = List.of(
+ makePhenotypicFeature("HP:0000003", false),
+ makePhenotypicFeature("HP:0000004", false));
+ byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures);
+
+
+ List items = validator.validate(new ByteArrayInputStream(bytes));
+
+ assertThat(items, hasSize(1));
+ ValidationItem expected = ValidationItem.of(validator.info(),
+ OntologyValidationItemTypes.TERM_AND_ANCESTOR_ARE_USED,
+ "Using both term HP:0000004 and its ancestor HP:0000003");
+ assertThat(items.get(0), equalTo(expected));
+ }
+
+ @Test
+ public void usingTermWhileParentTermIsExcluded() throws Exception {
+ List phenotypicFeatures = List.of(
+ makePhenotypicFeature("HP:0000003", true),
+ makePhenotypicFeature("HP:0000004", false));
+ byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures);
+
+
+ List items = validator.validate(new ByteArrayInputStream(bytes));
+
+ assertThat(items, hasSize(1));
+ ValidationItem expected = ValidationItem.of(validator.info(),
+ OntologyValidationItemTypes.TERM_IS_USED_AND_ANCESTOR_IS_EXCLUDED,
+ "Term HP:0000004 is present while its ancestor HP:0000003 is excluded");
+ assertThat(items.get(0), equalTo(expected));
+ }
+ }
+
+}
\ No newline at end of file