diff --git a/.gitignore b/.gitignore index 28ae4e6..7e1893a 100644 --- a/.gitignore +++ b/.gitignore @@ -125,3 +125,4 @@ nb-configuration.xml .nb-gradle/ # Created by .ignore support plugin (hsz.mobi) +/phenopacket-validation.tsv diff --git a/README.md b/README.md index 7b08824..72b4f52 100644 --- a/README.md +++ b/README.md @@ -9,3 +9,8 @@ Library and tools to help validate phenopackets This app uses version 2.0.0 (branch v2) of [phenopacket-schema](https://github.com/phenopackets/phenopacket-schema). To build this app, clone phenopacket-schema and ``mvn install`` the ``v2`` branch locally. The you should be able to build and run this app with standard maven/java. Note the app requires Java 11 or higher. + +It also uses version 0.0.1-SNAPSHOT of [phenopacket-tools](https://github.com/phenopackets/phenopacket-tools) +to create the test cases. It also needs to be install locally using maven. + +We plan to put all requirements into maven central shortly to simplify the build. \ No newline at end of file diff --git a/mvnw b/mvnw old mode 100644 new mode 100755 diff --git a/pom.xml b/pom.xml index da1fe05..33dfa05 100644 --- a/pom.xml +++ b/pom.xml @@ -4,15 +4,19 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - org.phenopackets.phenopacket-schema + org.phenopackets.validator validator 0.1.1-SNAPSHOT pom + Phenopacket validator + Library and tools to help validate phenopackets validator-core validator-cli + validator-jsonschema + validator-ontology @@ -60,7 +64,12 @@ UTF-8 - 11 + 16 + + 1.6.3 + 3.14.0 + 2.0.0 + 5.7.1 @@ -83,39 +92,76 @@ + + + + org.monarchinitiative.phenol + phenol-core + ${phenol.version} + + + org.monarchinitiative.phenol + phenol-io + ${phenol.version} + + + org.phenopackets + phenopacket-schema + ${phenopacket-schema.version} + + + com.google.protobuf + protobuf-java-util + ${protobuf.version} + + + com.google.guava + guava + + + + + + com.networknt + json-schema-validator + 1.0.42 + + + + + org.mockito + mockito-core + 3.12.4 + + + + org.slf4j slf4j-api + - ch.qos.logback - logback-classic + org.junit.jupiter + junit-jupiter test - - - - - - - org.junit.jupiter - junit-jupiter-api - ${junit.jupiter.version} + org.hamcrest + hamcrest-core + test + + + org.mockito + mockito-core test - org.junit.jupiter - junit-jupiter-engine - ${junit.jupiter.version} - test - - - org.hamcrest - hamcrest-core - test - + org.slf4j + slf4j-simple + test + \ No newline at end of file diff --git a/validator-cli/pom.xml b/validator-cli/pom.xml index 1b1d521..69831cf 100644 --- a/validator-cli/pom.xml +++ b/validator-cli/pom.xml @@ -5,58 +5,36 @@ 4.0.0 - org.phenopackets.phenopacket-schema + org.phenopackets.validator validator 0.1.1-SNAPSHOT validator-cli - 0.1.1-SNAPSHOT - jar validator-cli Command-line utility for validating phenopackets - - UTF-8 - UTF-8 - 1.8 - - - org.phenopackets.phenopacket-schema - validator-core + org.phenopackets.validator + validator-jsonschema ${project.parent.version} - org.springframework.boot - spring-boot-autoconfigure + org.phenopackets.validator + validator-ontology + ${project.parent.version} - ch.qos.logback - logback-classic + org.monarchinitiative.phenol + phenol-io info.picocli - picocli-spring-boot-starter + picocli 4.6.1 - - - - src/main/resources - true - - - - - org.springframework.boot - spring-boot-maven-plugin - - - - diff --git a/validator-cli/src/main/java/org/phenopackets/schema/validator/cli/ValidateCommand.java b/validator-cli/src/main/java/org/phenopackets/schema/validator/cli/ValidateCommand.java deleted file mode 100644 index a1e2e94..0000000 --- a/validator-cli/src/main/java/org/phenopackets/schema/validator/cli/ValidateCommand.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.phenopackets.schema.validator.cli; - - -import org.phenopackets.schema.validator.core.PhenopacketValidator; -import org.phenopackets.schema.validator.core.jsonschema.JsonSchemaValidator; -import org.phenopackets.schema.validator.core.validation.ValidationItem; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.stereotype.Component; -import picocli.CommandLine.Command; -import picocli.CommandLine.Option; -import picocli.CommandLine.Parameters; - -import java.util.List; -import java.util.concurrent.Callable; - -@Component -@Command(name = "validate", - mixinStandardHelpOptions = true) -public class ValidateCommand implements Callable { - private static Logger LOG = LoggerFactory - .getLogger(ValidateCommand.class); - @Option(names = {"-p","--phenopacket"}, description = "Path to phenopacket", required = true) - private String phenopacketPath; - - @Option(names = "--rare", description = "apply HPO rare-disease constraints") - private boolean rareHpoConstraints = false; - - @Parameters(description = "one or more phenopacket files") - private List positionals; - - - - - @Override - public Integer call() { - System.out.printf("mycommand was called with --rare=%s and positionals: %s%n", rareHpoConstraints, positionals); - LOG.info("EXECUTING : command line runner"); - PhenopacketValidator validator; - if (rareHpoConstraints) { - validator = new PhenopacketValidator(phenopacketPath, PhenopacketValidator.ValidationType.RARE_DISEASE_VALIDATION); - } else if (positionals.size() > 0) { - String[] itemsArray = new String[positionals.size()]; - itemsArray = positionals.toArray(itemsArray); - validator = new PhenopacketValidator(phenopacketPath, itemsArray); - } else { - validator = new PhenopacketValidator(phenopacketPath); - } - List errors = validator.getValidationErrors(); - if (errors.isEmpty()) { - System.out.println("\t no errors found"); - } else { - for (ValidationItem ve : errors) { - System.out.println("\t(" + ve.errorType() + ") " + ve.message()); - } - } - return 0; - } - -} \ No newline at end of file diff --git a/validator-cli/src/main/java/org/phenopackets/schema/validator/cli/ValidatorApplication.java b/validator-cli/src/main/java/org/phenopackets/schema/validator/cli/ValidatorApplication.java deleted file mode 100644 index a4757ff..0000000 --- a/validator-cli/src/main/java/org/phenopackets/schema/validator/cli/ValidatorApplication.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.phenopackets.schema.validator.cli; - -import org.phenopackets.schema.validator.core.jsonschema.JsonSchemaValidator; -import org.phenopackets.schema.validator.core.validation.ValidationItem; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.boot.SpringApplication; -import org.springframework.boot.autoconfigure.SpringBootApplication; - - -/** - * @author Jules Jacobsen - */ -@SpringBootApplication -public class ValidatorApplication { - - private static Logger LOG = LoggerFactory - .getLogger(ValidatorApplication.class); - - public static void main(String[] args) { - LOG.info("STARTING THE APPLICATION"); - SpringApplication.run(ValidatorApplication.class, args); - LOG.info("APPLICATION FINISHED"); - } - - -} diff --git a/validator-cli/src/main/java/org/phenopackets/schema/validator/cli/ValidatorApplicationRunner.java b/validator-cli/src/main/java/org/phenopackets/schema/validator/cli/ValidatorApplicationRunner.java deleted file mode 100644 index 1cecee4..0000000 --- a/validator-cli/src/main/java/org/phenopackets/schema/validator/cli/ValidatorApplicationRunner.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.phenopackets.schema.validator.cli; - -import org.phenopackets.schema.validator.core.jsonschema.JsonSchemaValidator; -import org.phenopackets.schema.validator.core.validation.ValidationItem; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import picocli.CommandLine; -import picocli.CommandLine.IFactory; - -import org.springframework.boot.CommandLineRunner; -import org.springframework.boot.ExitCodeGenerator; -import org.springframework.stereotype.Component; - -@Component -public class ValidatorApplicationRunner implements CommandLineRunner, ExitCodeGenerator { - private static Logger LOG = LoggerFactory - .getLogger(ValidatorApplicationRunner.class); - private final ValidateCommand myCommand; - - private final IFactory factory; // auto-configured to inject PicocliSpringFactory - - private int exitCode; - - public ValidatorApplicationRunner(ValidateCommand myCommand, IFactory factory) { - this.myCommand = myCommand; - this.factory = factory; - } - - - @Override - public void run(String... args) { - exitCode = new CommandLine(myCommand, factory).execute(args); - } - - @Override - public int getExitCode() { - return exitCode; - } -} - diff --git a/validator-cli/src/main/java/org/phenopackets/validator/cli/ValidatorApplication.java b/validator-cli/src/main/java/org/phenopackets/validator/cli/ValidatorApplication.java new file mode 100644 index 0000000..b9713c1 --- /dev/null +++ b/validator-cli/src/main/java/org/phenopackets/validator/cli/ValidatorApplication.java @@ -0,0 +1,110 @@ +package org.phenopackets.validator.cli; + +import org.monarchinitiative.phenol.io.OntologyLoader; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.phenopackets.validator.cli.results.ValidationItemTsvVisualizer; +import org.phenopackets.validator.cli.results.ValidationTsvVisualizer; +import org.phenopackets.validator.core.PhenopacketValidator; +import org.phenopackets.validator.core.PhenopacketValidatorRegistry; +import org.phenopackets.validator.core.ValidationItem; +import org.phenopackets.validator.core.ValidatorInfo; +import org.phenopackets.validator.core.except.PhenopacketValidatorRuntimeException; +import org.phenopackets.validator.jsonschema.JsonSchemaValidators; +import org.phenopackets.validator.jsonschema.JsonSchemaValidator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.*; +import java.nio.file.Files; +import java.util.*; +import java.io.IOException; + + +@CommandLine.Command(name = "PhenopacketValidator", version = "0.1.0", mixinStandardHelpOptions = true) +public class ValidatorApplication implements Runnable { + private static final Logger LOGGER = LoggerFactory.getLogger(ValidatorApplication.class); + + @CommandLine.Option(names = "--rare", description = "apply HPO rare-disease constraints") + public boolean rareHpoConstraints = false; + + @CommandLine.Parameters(arity = "0..*", description = "one or more JSON Schema configuration files") + public List jsonSchemaFiles = List.of(); + + @CommandLine.Option(names = "--hp", required = true, description = "check with HPO (hp.json)") + public File hpoJsonPath; + + @CommandLine.Option(names = {"-p", "--phenopacket"}, required = true, description = "Phenopacket file to be validated") + public String phenopacketFilePath; + + @CommandLine.Option(names = {"-o", "--out"}, description = "name of output file (default ${DEFAULT_VALUE})") + public String outfileName = "phenopacket-validation.tsv"; + + private static final String CUSTOM_JSON_VALIDATOR_TYPE = "Custom JSON Schema validation"; + + + public static void main(String[] args) { + LOGGER.info("STARTING THE APPLICATION"); + int exitCode = new CommandLine(new ValidatorApplication()).execute(args); + System.exit(exitCode); + LOGGER.info("APPLICATION FINISHED"); + } + + + @Override + public void run() { + File phenopacketFile = new File(phenopacketFilePath); + LOGGER.info("Validating {} phenopacket", phenopacketFile); + Map validatorMap = new HashMap<>(JsonSchemaValidators.genericValidator()); + for (File jsonSchema : jsonSchemaFiles) { + // we will create ValidatorInfo objects based on the names and paths of the files. + String baseName = jsonSchema.getName(); + ValidatorInfo vinfo = ValidatorInfo.of(CUSTOM_JSON_VALIDATOR_TYPE, baseName); + PhenopacketValidator jvalid = JsonSchemaValidator.of(jsonSchema, vinfo); + validatorMap.put(vinfo, jvalid); + LOGGER.info("Adding configuration file at `{}`", vinfo); + } + Ontology hpoOntology = OntologyLoader.loadOntology(hpoJsonPath); + PhenopacketValidator hpoValidator = new HpoValidator(hpoOntology); + validatorMap.put(hpoValidator.info(), hpoValidator); + PhenopacketValidatorRegistry registry = PhenopacketValidatorRegistry.of(validatorMap); + + // poor man's formatting + LOGGER.info(""); + LOGGER.info("--------------------------------------------------------------------------------"); + LOGGER.info(""); + ValidationTsvVisualizer resultVisualizer = new ValidationTsvVisualizer(); + try { + String phenopacketJson = Files.readString(phenopacketFile.toPath()); + Set validatorInfoSet = registry.getValidationTypeSet(); + for (var vinfo : validatorInfoSet) { + var opt = registry.getValidatorForType(vinfo); + if (opt.isPresent()) { + PhenopacketValidator validator = opt.get(); + List validationItems = validator.validate(phenopacketJson); + if (validationItems.isEmpty()) { + resultVisualizer.errorFree(vinfo); + } else { + for (var item : validationItems) { + resultVisualizer.error(vinfo, new ValidationItemTsvVisualizer(item)); + } + } + } + } + } catch (IOException e) { + LOGGER.warn("Error opening the phenopacket", e); + } + // poor man's formatting + LOGGER.info(""); + LOGGER.info("--------------------------------------------------------------------------------"); + LOGGER.info(""); + + // TSV output + try (BufferedWriter bw = new BufferedWriter(new FileWriter(outfileName))) { + resultVisualizer.write(bw); + } catch (IOException e) { + LOGGER.warn("Error writing the validation results", e); + } + } + +} diff --git a/validator-cli/src/main/java/org/phenopackets/validator/cli/results/ValidationItemTsvVisualizer.java b/validator-cli/src/main/java/org/phenopackets/validator/cli/results/ValidationItemTsvVisualizer.java new file mode 100644 index 0000000..d72c961 --- /dev/null +++ b/validator-cli/src/main/java/org/phenopackets/validator/cli/results/ValidationItemTsvVisualizer.java @@ -0,0 +1,23 @@ +package org.phenopackets.validator.cli.results; + +import org.phenopackets.validator.core.ValidationItem; + +import java.util.List; + +/** + * Visualizer for validationItems + */ +public class ValidationItemTsvVisualizer { + private final ValidationItem error; + public ValidationItemTsvVisualizer(ValidationItem item) { + this.error = item; + } + + /** + * Get a list of fields for display + * @return A list of Strings that can be used to create a row of a TSV or CSV file + */ + public List getFields() { + return List.of(error.type().itemType(), error.message(), error.validatorInfo().validatorId()); + } +} diff --git a/validator-cli/src/main/java/org/phenopackets/validator/cli/results/ValidationTsvVisualizer.java b/validator-cli/src/main/java/org/phenopackets/validator/cli/results/ValidationTsvVisualizer.java new file mode 100644 index 0000000..2bcf80a --- /dev/null +++ b/validator-cli/src/main/java/org/phenopackets/validator/cli/results/ValidationTsvVisualizer.java @@ -0,0 +1,53 @@ +package org.phenopackets.validator.cli.results; + +import org.phenopackets.validator.core.ValidatorInfo; + +import java.io.IOException; +import java.io.Writer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class ValidationTsvVisualizer { + + private final List errorFreeValidations; + private final Map errors; + + + public ValidationTsvVisualizer() { + errorFreeValidations = new ArrayList<>(); + errors = new HashMap<>(); + } + + /** + * Add a ValidationInfo (i.e., validation type) for which no errors were found. + * @param vinfo validation info for a test that was error free + */ + public void errorFree(ValidatorInfo vinfo) { + errorFreeValidations.add(vinfo); + } + + public void error(ValidatorInfo vinfo, ValidationItemTsvVisualizer result) { + errors.put(vinfo, result); + } + + + + private static String tsvHeader() { + return String.join("\t", List.of("Error Type", "Message", "Validator Id")); + } + + + public void write(Writer writer) throws IOException { + writer.write(tsvHeader() + "\n"); + for (var result : this.errors.values()) { + writer.write(String.join("\t",result.getFields()) + "\n"); + } + for (var vinfo : this.errorFreeValidations) { + writer.write(String.join("\t",List.of(vinfo.validatorName(), "no errors", vinfo.validatorId())) + "\n"); + } + } + + +} diff --git a/validator-cli/src/main/resources/log4j2.xml b/validator-cli/src/main/resources/log4j2.xml new file mode 100644 index 0000000..2c48de3 --- /dev/null +++ b/validator-cli/src/main/resources/log4j2.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/validator-cli/src/main/resources/logback-spring.xml b/validator-cli/src/main/resources/logback-spring.xml deleted file mode 100644 index 3a6c413..0000000 --- a/validator-cli/src/main/resources/logback-spring.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/validator-core/pom.xml b/validator-core/pom.xml index 0a0999f..e61b377 100644 --- a/validator-core/pom.xml +++ b/validator-core/pom.xml @@ -5,106 +5,15 @@ 4.0.0 - org.phenopackets.phenopacket-schema + org.phenopackets.validator validator 0.1.1-SNAPSHOT validator-core - 0.1.1-SNAPSHOT - jar validator-core Validator utilities for phenopackets - - 3.14.0 - 2.0.0 - - - - org.phenopackets - phenopacket-schema - ${phenopacket-schema.version} - - - - com.networknt - json-schema-validator - 1.0.42 - - - - - org.prefixcommons - curie-util - 0.0.2 - - - com.google.protobuf - protobuf-java - ${protobuf.version} - - - com.google.protobuf - protobuf-java-util - ${protobuf.version} - - - - - - - - maven-resources-plugin - 3.2.0 - - - copy-resources - validate - - copy-resources - - - ${project.build.directory}/resources - - - src/main/resources - - true - - schema - log4j2.xml - - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - 11 - 11 - - - - - - - - \ No newline at end of file diff --git a/validator-core/src/main/java/org/phenopackets/schema/validator/core/PhenopacketValidator.java b/validator-core/src/main/java/org/phenopackets/schema/validator/core/PhenopacketValidator.java deleted file mode 100644 index 335a0d5..0000000 --- a/validator-core/src/main/java/org/phenopackets/schema/validator/core/PhenopacketValidator.java +++ /dev/null @@ -1,103 +0,0 @@ -package org.phenopackets.schema.validator.core; - -import org.phenopackets.schema.validator.core.except.PhenopacketValidatorRuntimeException; -import org.phenopackets.schema.validator.core.jsonschema.AdditionalJsonFileJsonSchemaValidator; -import org.phenopackets.schema.validator.core.jsonschema.HpoRareDiseaseJsonSchemaValidator; -import org.phenopackets.schema.validator.core.jsonschema.JsonSchemaValidator; -import org.phenopackets.schema.validator.core.validation.ValidationItem; - -import java.io.File; -import java.util.List; - -/** - * Entry point for the library. Performs JSON schema validation. If an additional config file is provided, it performs - * additional validation. Several default validation files are provided that can be used with command line flags. - * For instance, adding the --rare flag will additionally apply the {@code hpo-rare-disease-schema.json} specification. - * @author Peter N Robinson - */ -public class PhenopacketValidator { - - List validationErrors; - - public enum ValidationType { GENERIC, RARE_DISEASE_VALIDATION} - - private final File phenopacket; - - public PhenopacketValidator(String phenopacketPath) { - phenopacket = initPhenopacketFile(phenopacketPath); - validationErrors = genericValidation(); - - } - - /** Can be used to validate a Phenopacket against the built-in additional JSON-Schema files, - * including HPO-rare disease TODO add another two examples. - * @param phenopacketPath Path to the phenopacket file - * @param validationType One of the built-in types, see {@link ValidationType}. - */ - public PhenopacketValidator(String phenopacketPath, ValidationType validationType) { - phenopacket = initPhenopacketFile(phenopacketPath); - validationErrors = genericValidation(); - - switch (validationType) { - case RARE_DISEASE_VALIDATION: - List specErrors = hpoRareDiseaseValidation(); - validationErrors.addAll(specErrors); - break; - } - } - - /** - * This constructor can be used with one or more user defined JSON spec files that are passed from the command line - * @param phenopacketPath Path to the phenopacket file - * @param jsonSchemPaths Paths to user-defined JSON Schema files - */ - public PhenopacketValidator(String phenopacketPath, String... jsonSchemPaths) { - phenopacket = initPhenopacketFile(phenopacketPath); - validationErrors = genericValidation(); - for (String jsonPath : jsonSchemPaths) { - List specErrors = additionalValidation(jsonPath); - validationErrors.addAll(specErrors); - } - } - - private List additionalValidation(String jsonPath) { - File jsonFile = new File(jsonPath); - if (! jsonFile.isFile()) { - throw new PhenopacketValidatorRuntimeException("Could not find input json file \"" + jsonFile.getAbsolutePath() + "\""); - } - JsonSchemaValidator validator = new AdditionalJsonFileJsonSchemaValidator(phenopacket, jsonFile); - return validator.validate(); - } - - /** - * Create a file object and check that the file exists - * @param phenopacketPath String to the input phenopacket file - * @return File object representing the input phenopacket - */ - File initPhenopacketFile(String phenopacketPath) { - File f = new File(phenopacketPath); - if (! f.isFile()) { - throw new PhenopacketValidatorRuntimeException("Could not find file \"" + phenopacketPath + "\""); - } - return f; - } - - /** - * Validate the Phenopacket file against the generic JSON Schema - */ - private List genericValidation() { - JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(phenopacket); - return jsonSchemaValidator.validate(); - } - /** - * Validate the Phenopacket file for HPO-rare disease specific requirements. - */ - private List hpoRareDiseaseValidation() { - JsonSchemaValidator hpoRareDiseaseValidator = new HpoRareDiseaseJsonSchemaValidator(phenopacket); - return hpoRareDiseaseValidator.validate(); - } - - public List getValidationErrors() { - return validationErrors; - } -} diff --git a/validator-core/src/main/java/org/phenopackets/schema/validator/core/except/PhenopacketValidatorRuntimeException.java b/validator-core/src/main/java/org/phenopackets/schema/validator/core/except/PhenopacketValidatorRuntimeException.java deleted file mode 100644 index 72093d6..0000000 --- a/validator-core/src/main/java/org/phenopackets/schema/validator/core/except/PhenopacketValidatorRuntimeException.java +++ /dev/null @@ -1,7 +0,0 @@ -package org.phenopackets.schema.validator.core.except; - -public class PhenopacketValidatorRuntimeException extends RuntimeException { - - public PhenopacketValidatorRuntimeException() { super(); } - public PhenopacketValidatorRuntimeException(String msg) { super(msg); } -} diff --git a/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/AdditionalJsonFileJsonSchemaValidator.java b/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/AdditionalJsonFileJsonSchemaValidator.java deleted file mode 100644 index 4ffd0c3..0000000 --- a/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/AdditionalJsonFileJsonSchemaValidator.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.phenopackets.schema.validator.core.jsonschema; - -import com.networknt.schema.JsonSchemaFactory; -import org.phenopackets.schema.validator.core.except.PhenopacketValidatorRuntimeException; - -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; - -public class AdditionalJsonFileJsonSchemaValidator extends JsonSchemaValidator{ - - public AdditionalJsonFileJsonSchemaValidator(File phenopacketFile, File jsonFile) { - this.jsonFile = phenopacketFile; - if (! phenopacketFile.isFile()) { - throw new PhenopacketValidatorRuntimeException("Could not open file at \"" + phenopacketFile.getAbsolutePath() + "\""); - } - JsonSchemaFactory schemaFactory = JsonSchemaFactory.getInstance(VERSION_FLAG); - try { - InputStream targetStream = new FileInputStream(jsonFile); - this.jsonSchema = schemaFactory.getSchema(targetStream); - } catch (Exception e) { - e.printStackTrace(); - } - } - - -} diff --git a/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/HpoRareDiseaseJsonSchemaValidator.java b/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/HpoRareDiseaseJsonSchemaValidator.java deleted file mode 100644 index 6d60c68..0000000 --- a/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/HpoRareDiseaseJsonSchemaValidator.java +++ /dev/null @@ -1,38 +0,0 @@ -package org.phenopackets.schema.validator.core.jsonschema; - - -import com.networknt.schema.JsonSchemaFactory; - -import org.phenopackets.schema.validator.core.except.PhenopacketValidatorRuntimeException; - - -import java.io.File; -import java.io.InputStream; - -/** - * This class implements additional validation of a phenopacket that is intended to be used - * for HPO rare disease phenotyping. By assumption, the phenopacket will have been first - * checked agoinst the generic specification. This class performs validation with the - * file {@code hpo-rare-disease-schema.json}. - * @author Peter N Robinson - */ -public class HpoRareDiseaseJsonSchemaValidator extends JsonSchemaValidator { - - - public HpoRareDiseaseJsonSchemaValidator(File f) { - this.jsonFile = f; - if (! f.isFile()) { - throw new PhenopacketValidatorRuntimeException("Could not open file at \"" + f.getAbsolutePath() + "\""); - } - JsonSchemaFactory schemaFactory = JsonSchemaFactory.getInstance(VERSION_FLAG); - try { - InputStream baseSchemaStream = inputStreamFromClasspath("schema/hpo-rare-disease-schema.json"); - this.jsonSchema = schemaFactory.getSchema(baseSchemaStream); - } catch (Exception e) { - e.printStackTrace(); - } - } - - - -} diff --git a/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/JsonSchemaValidator.java b/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/JsonSchemaValidator.java deleted file mode 100644 index b75f9c2..0000000 --- a/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/JsonSchemaValidator.java +++ /dev/null @@ -1,83 +0,0 @@ -package org.phenopackets.schema.validator.core.jsonschema; - - - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; - -import com.networknt.schema.JsonSchema; -import com.networknt.schema.JsonSchemaFactory; -import com.networknt.schema.SpecVersion; -import com.networknt.schema.ValidationMessage; -import org.phenopackets.schema.validator.core.except.PhenopacketValidatorRuntimeException; -import org.phenopackets.schema.validator.core.validation.JsonValidator; -import org.phenopackets.schema.validator.core.validation.ValidationItem; - -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - - -public class JsonSchemaValidator implements JsonValidator { - - protected JsonSchema jsonSchema; - protected ObjectMapper objectMapper = new ObjectMapper(); - /** The latest version of the spec that is supported by our JSON SCHEMA library is 2019/09. */ - protected static final SpecVersion.VersionFlag VERSION_FLAG = SpecVersion.VersionFlag.V201909; - protected File jsonFile; - - /** - * Constructor for validation using JSON Schema - * @param f a JSON file. - */ - public JsonSchemaValidator(File f) { - this.jsonFile = f; - if (! f.isFile()) { - throw new PhenopacketValidatorRuntimeException("Could not open file at \"" + f.getAbsolutePath() + "\""); - } - JsonSchemaFactory schemaFactory = JsonSchemaFactory.getInstance(VERSION_FLAG); - try { - InputStream baseSchemaStream = inputStreamFromClasspath("schema/phenopacket-general-schema.json"); - this.jsonSchema = schemaFactory.getSchema(baseSchemaStream); - } catch (Exception e) { - e.printStackTrace(); - } - } - - public JsonSchemaValidator() { - - } - - - /** - * Get a resource from the maven src/main/resources directory - * @param path Path to a file resource - * @return corresponding input stream - */ - protected static InputStream inputStreamFromClasspath(String path) { - return Thread.currentThread().getContextClassLoader().getResourceAsStream(path); - } - - /** Validate the file at fpath (assumed to be a Phenopacket formated in JSON) using the - * base validation schema - * @return List of {@link JsonValidationError} objects (empty list if there were no errors) - */ - @Override - public List validate() { - List errors = new ArrayList<>(); - try { - InputStream jsonStream = new FileInputStream(jsonFile); - JsonNode json = objectMapper.readTree(jsonStream); - Set validationResult = jsonSchema.validate(json); - if (! validationResult.isEmpty()) { - validationResult.forEach(e -> errors.add(new JsonValidationError(e))); - } - } catch (Exception e) { - e.printStackTrace(); - } - return errors; - } -} diff --git a/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/JsonValidationError.java b/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/JsonValidationError.java deleted file mode 100644 index 8e4b80c..0000000 --- a/validator-core/src/main/java/org/phenopackets/schema/validator/core/jsonschema/JsonValidationError.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.phenopackets.schema.validator.core.jsonschema; -import com.networknt.schema.ValidationMessage; -import org.phenopackets.schema.validator.core.validation.ErrorType; -import org.phenopackets.schema.validator.core.validation.ValidationItem; - -import java.util.Objects; - -/** - * POJO to represent errors identified by JSON Schema validation. - * @author Peter N Robinson - */ -public final class JsonValidationError implements ValidationItem { - - private final ErrorType errorType; - private final String message; - - public JsonValidationError(ValidationMessage validationMessage) { - this.errorType = ErrorType.stringToErrorType(validationMessage.getType()); - this.message = validationMessage.getMessage(); - } - - - @Override - public String message() { - return message; - } - - @Override - public ErrorType errorType() { - return this.errorType; - } - - @Override - public int hashCode() { - return Objects.hash(message, errorType); - } - - @Override - public boolean equals(Object obj) { - if (! (obj instanceof JsonValidationError)) return false; - JsonValidationError that = (JsonValidationError) obj; - return this.message.equals(that.message) && this.errorType.equals(that.errorType); - } - -} diff --git a/validator-core/src/main/java/org/phenopackets/schema/validator/core/validation/ErrorType.java b/validator-core/src/main/java/org/phenopackets/schema/validator/core/validation/ErrorType.java deleted file mode 100644 index 30a469b..0000000 --- a/validator-core/src/main/java/org/phenopackets/schema/validator/core/validation/ErrorType.java +++ /dev/null @@ -1,37 +0,0 @@ -package org.phenopackets.schema.validator.core.validation; - -import org.phenopackets.schema.validator.core.except.PhenopacketValidatorRuntimeException; - -public enum ErrorType { - /** JSON schema error meaning that the JSON code contained a property not present in the schema. */ - JSON_ADDITIONAL_PROPERTIES("additionalProperties"), - /** JSON schema error meaning that the JSON code failed to contain a property required by the schema. */ - JSON_REQUIRED("required"), - PHENOPACKET_SUBJECT_LACKS_AGE("phenopacket subject lacks age"), - PHENOPACKET_LACKS_SUBJECT("phenopacket lacks subject"), - INVALID_ONTOLOGY("invalid ontology"), - PHENOPACKET_LACKS_PHENOTYPIC_FEATURE("phenopacket lacks phenotypic feature"); - - private final String name; - - - ErrorType(String value) { - this.name = value; - } - - @Override - public String toString() { - return this.name; - } - - - public static ErrorType stringToErrorType(String error) { - switch (error) { - case "additionalProperties": return JSON_ADDITIONAL_PROPERTIES; - case "required": return JSON_REQUIRED; - - default: - throw new PhenopacketValidatorRuntimeException("Did not recognize error type: \"" + error + "\""); - } - } -} diff --git a/validator-core/src/main/java/org/phenopackets/schema/validator/core/validation/JsonValidator.java b/validator-core/src/main/java/org/phenopackets/schema/validator/core/validation/JsonValidator.java deleted file mode 100644 index 61c9e9c..0000000 --- a/validator-core/src/main/java/org/phenopackets/schema/validator/core/validation/JsonValidator.java +++ /dev/null @@ -1,13 +0,0 @@ -package org.phenopackets.schema.validator.core.validation; - -import java.util.List; - -/** - * Common interface for validation by JSON schema. - * @author Peter N Robinson - */ -public interface JsonValidator { - - List validate(); - -} diff --git a/validator-core/src/main/java/org/phenopackets/schema/validator/core/validation/ValidationItem.java b/validator-core/src/main/java/org/phenopackets/schema/validator/core/validation/ValidationItem.java deleted file mode 100644 index f22109d..0000000 --- a/validator-core/src/main/java/org/phenopackets/schema/validator/core/validation/ValidationItem.java +++ /dev/null @@ -1,8 +0,0 @@ -package org.phenopackets.schema.validator.core.validation; - -public interface ValidationItem { - - ErrorType errorType(); - String message(); - -} diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/DefaultValidationInfo.java b/validator-core/src/main/java/org/phenopackets/validator/core/DefaultValidationInfo.java new file mode 100644 index 0000000..15d8285 --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/DefaultValidationInfo.java @@ -0,0 +1,60 @@ +package org.phenopackets.validator.core; + +import java.util.Objects; + +class DefaultValidationInfo implements ValidatorInfo { + + private static final DefaultValidationInfo GENERIC = of("GENERIC", "validation with generic JSON Schema"); + private static final DefaultValidationInfo RARE_DISEASE_VALIDATOR = of("RARE_DISEASE_VALIDATOR", "Validation of rare disease Phenopacket constraints"); + + static ValidatorInfo generic() { + return GENERIC; + } + + static ValidatorInfo rareDiseaseValidator() { + return RARE_DISEASE_VALIDATOR; + } + + private final String validatorId; + private final String validatorName; + + static DefaultValidationInfo of(String validatorId, String validatorName) { + return new DefaultValidationInfo(validatorId, validatorName); + } + + private DefaultValidationInfo(String validationId, String validatorName) { + this.validatorId = validationId; + this.validatorName = validatorName; + } + + @Override + public String validatorId() { + return validatorId; + } + + @Override + public String validatorName() { + return validatorName; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DefaultValidationInfo that = (DefaultValidationInfo) o; + return Objects.equals(validatorId, that.validatorId) && Objects.equals(validatorName, that.validatorName); + } + + @Override + public int hashCode() { + return Objects.hash(validatorId, validatorName); + } + + @Override + public String toString() { + return "DefaultValidationInfo{" + + "validatorId='" + validatorId + '\'' + + ", validatorName='" + validatorName + '\'' + + '}'; + } +} diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/DefaultValidatorRegistry.java b/validator-core/src/main/java/org/phenopackets/validator/core/DefaultValidatorRegistry.java new file mode 100644 index 0000000..b987b75 --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/DefaultValidatorRegistry.java @@ -0,0 +1,39 @@ +package org.phenopackets.validator.core; + + +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; + +class DefaultValidatorRegistry implements PhenopacketValidatorRegistry { + + private final Map validatorMap; + + DefaultValidatorRegistry(Map validMap) { + validatorMap = Map.copyOf(validMap); + } + + @Override + public Optional getValidatorForType(ValidatorInfo type) { + return Optional.ofNullable(validatorMap.get(type)); + } + + @Override + public Set getValidationTypeSet() { + return validatorMap.keySet(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DefaultValidatorRegistry that = (DefaultValidatorRegistry) o; + return Objects.equals(validatorMap, that.validatorMap); + } + + @Override + public int hashCode() { + return Objects.hash(validatorMap); + } +} diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidator.java b/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidator.java new file mode 100644 index 0000000..51c13c4 --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidator.java @@ -0,0 +1,36 @@ +package org.phenopackets.validator.core; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.List; + +/** + * Phenopacket validator applies rules to check that the provided data meets the requirements for a valid Phenopacket. + *

+ * @author Daniel Danis + * @author Peter N Robinson + */ +public interface PhenopacketValidator { + + ValidatorInfo info(); + + List validate(InputStream inputStream); + + // ----------------------------------------------------------------------------------------------------------------- + + default List validate(File phenopacket) throws IOException { + try (InputStream inputStream = Files.newInputStream(phenopacket.toPath())) { + byte[] bytes = inputStream.readAllBytes(); + return validate(bytes); + } + } + + default List validate(String content) { + return validate(new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8))); + } + +} diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidatorRegistry.java b/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidatorRegistry.java new file mode 100644 index 0000000..b48b36c --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/PhenopacketValidatorRegistry.java @@ -0,0 +1,23 @@ +package org.phenopackets.validator.core; + +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +/** + * The phenopacket validator registry stores available Validators designed to perform specific {@link ValidatorInfo}. + *

+ * @author Daniel Danis + * @author Peter N Robinson + */ +public interface PhenopacketValidatorRegistry { + + static PhenopacketValidatorRegistry of(Map validMap) { + return new DefaultValidatorRegistry(validMap); + } + + Optional getValidatorForType(ValidatorInfo type); + + Set getValidationTypeSet(); +} + diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItem.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItem.java new file mode 100644 index 0000000..44efe0f --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItem.java @@ -0,0 +1,30 @@ +package org.phenopackets.validator.core; + +/** + * The interface represents a single issue found during validation. + *

+ * @author Daniel Danis + * @author Peter N Robinson + */ +public interface ValidationItem { + + static ValidationItem of(ValidatorInfo validatorInfo, ValidationItemType type, String message) { + return new ValidationItemDefault(validatorInfo, type, message); + } + + /** + * @return basic description of the validator that produced this issue. + */ + ValidatorInfo validatorInfo(); + + /** + * @return description of the issue in a standard way intended for both grouping of the issues and human consumption. + */ + ValidationItemType type(); + + /** + * @return string with description of the issue intended for human consumption. + */ + String message(); + +} diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemDefault.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemDefault.java new file mode 100644 index 0000000..34f8860 --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemDefault.java @@ -0,0 +1,3 @@ +package org.phenopackets.validator.core; + +record ValidationItemDefault(ValidatorInfo validatorInfo, ValidationItemType type, String message) implements ValidationItem {} diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemType.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemType.java new file mode 100644 index 0000000..ac8489d --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemType.java @@ -0,0 +1,13 @@ +package org.phenopackets.validator.core; + +public interface ValidationItemType { + + static ValidationItemType of(String itemType, String itemDescription) { + return new ValidationItemTypeDefault(itemType, itemDescription); + } + + String itemType(); + + String itemDescription(); + +} diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypeDefault.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypeDefault.java new file mode 100644 index 0000000..5f30f99 --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypeDefault.java @@ -0,0 +1,3 @@ +package org.phenopackets.validator.core; + +record ValidationItemTypeDefault(String itemType, String itemDescription) implements ValidationItemType {} diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypes.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypes.java new file mode 100644 index 0000000..2499929 --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidationItemTypes.java @@ -0,0 +1,13 @@ +package org.phenopackets.validator.core; + +public class ValidationItemTypes { + + private static final ValidationItemType SYNTAX_ERROR = ValidationItemType.of("Syntax error", "The input does not conform the with phenopacket syntax"); + + private ValidationItemTypes() {} + + public static ValidationItemType syntaxError() { + return SYNTAX_ERROR; + } + +} diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/ValidatorInfo.java b/validator-core/src/main/java/org/phenopackets/validator/core/ValidatorInfo.java new file mode 100644 index 0000000..64915d3 --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/ValidatorInfo.java @@ -0,0 +1,30 @@ +package org.phenopackets.validator.core; + +public interface ValidatorInfo { + + static ValidatorInfo generic() { + return DefaultValidationInfo.generic(); + } + + /** + * This class implements additional validation of a phenopacket that is intended to be used + * for HPO rare disease phenotyping. By assumption, the phenopacket will have been first + * checked against the {@link ValidatorInfo#generic()} specification. This class performs validation with the + * file {@code hpo-rare-disease-schema.json}. + */ + static ValidatorInfo rareDiseaseValidation() { + return DefaultValidationInfo.rareDiseaseValidator(); + } + + static ValidatorInfo of(String validatorId, String validatorName) { + return DefaultValidationInfo.of(validatorId, validatorName); + } + + String validatorId(); + + String validatorName(); + + int hashCode(); + + boolean equals(Object o); +} diff --git a/validator-core/src/main/java/org/phenopackets/validator/core/except/PhenopacketValidatorRuntimeException.java b/validator-core/src/main/java/org/phenopackets/validator/core/except/PhenopacketValidatorRuntimeException.java new file mode 100644 index 0000000..f640744 --- /dev/null +++ b/validator-core/src/main/java/org/phenopackets/validator/core/except/PhenopacketValidatorRuntimeException.java @@ -0,0 +1,24 @@ +package org.phenopackets.validator.core.except; + +public class PhenopacketValidatorRuntimeException extends RuntimeException { + + public PhenopacketValidatorRuntimeException() { + super(); + } + + public PhenopacketValidatorRuntimeException(String message) { + super(message); + } + + public PhenopacketValidatorRuntimeException(String message, Throwable cause) { + super(message, cause); + } + + public PhenopacketValidatorRuntimeException(Throwable cause) { + super(cause); + } + + protected PhenopacketValidatorRuntimeException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/validator-core/src/test/java/org/phenopackets/schema/validator/core/jsonschema/JsonSchemaValidatorTest.java b/validator-core/src/test/java/org/phenopackets/schema/validator/core/jsonschema/JsonSchemaValidatorTest.java deleted file mode 100644 index 252b38e..0000000 --- a/validator-core/src/test/java/org/phenopackets/schema/validator/core/jsonschema/JsonSchemaValidatorTest.java +++ /dev/null @@ -1,73 +0,0 @@ -package org.phenopackets.schema.validator.core.jsonschema; - -import org.junit.jupiter.api.Test; -import org.phenopackets.schema.validator.core.validation.ErrorType; -import org.phenopackets.schema.validator.core.validation.ValidationItem; - -import java.io.File; -import java.io.IOException; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class JsonSchemaValidatorTest { - - private static File fileFromClasspath(String path) { - String fname = Thread.currentThread().getContextClassLoader().getResource(path).getPath(); - return new File(fname); - } - - @Test - public void testValidationOfSimpleValidPhenopacket() throws IOException { - File validSimplePhenopacket = fileFromClasspath("json/validSimplePhenopacket.json"); - JsonSchemaValidator validator = new JsonSchemaValidator(validSimplePhenopacket); - List errors = validator.validate(); - assertTrue(errors.isEmpty()); - } - - /** - * This example phenopacket does not contain anything except {"disney" : "donald"} - * It does not contain an id or a metaData element and thus should fail. - */ - @Test - public void testValidationOfSimpleInValidPhenopacket() { - File invalidSimplePhenopacket = fileFromClasspath("json/invalidSimplePhenopacket.json"); - JsonSchemaValidator validator = new JsonSchemaValidator(invalidSimplePhenopacket); - List errors = validator.validate(); - assertEquals(3, errors.size()); - ValidationItem error = errors.get(0); - assertEquals(ErrorType.JSON_REQUIRED, error.errorType()); - assertEquals("$.id: is missing but it is required", error.message()); - error = errors.get(1); - assertEquals(ErrorType.JSON_REQUIRED, error.errorType()); - assertEquals("$.metaData: is missing but it is required", error.message()); - error = errors.get(2); - assertEquals(ErrorType.JSON_ADDITIONAL_PROPERTIES, error.errorType()); - assertEquals("$.disney: is not defined in the schema and the schema does not allow additional properties", error.message()); - } - - @Test - public void testRareDiseaseBethlemahmValidPhenopacket() throws IOException { - File myopathyPhenopacket = fileFromClasspath("json/bethlehamMyopathyExample.json"); - JsonSchemaValidator validator = new JsonSchemaValidator(myopathyPhenopacket); - List errors = validator.validate(); - assertTrue(errors.isEmpty()); - } - - @Test - public void testRareDiseaseBethlemahmInvalidValidPhenopacket() throws IOException { - File invalidMyopathyPhenopacket = fileFromClasspath("json/bethlehamMyopathyInvalidExample.json"); - JsonSchemaValidator validator = new JsonSchemaValidator(invalidMyopathyPhenopacket); - List errors = validator.validate(); -// for (ValidationError ve : errors) { -// System.out.println(ve.getMessage()); -// } - assertEquals(1, errors.size()); - ValidationItem error = errors.get(0); - assertEquals(ErrorType.JSON_ADDITIONAL_PROPERTIES, error.errorType()); - assertEquals("$.phenotypicFeaturesMALFORMED: is not defined in the schema and the schema does not allow additional properties", error.message()); - } - - -} diff --git a/validator-core/src/test/resources/json/invalidSimplePhenopacket.json b/validator-core/src/test/resources/json/invalidSimplePhenopacket.json deleted file mode 100644 index 8258e4e..0000000 --- a/validator-core/src/test/resources/json/invalidSimplePhenopacket.json +++ /dev/null @@ -1 +0,0 @@ -{"disney" : "donald"} \ No newline at end of file diff --git a/validator-core/src/test/resources/json/validSimplePhenopacket.json b/validator-core/src/test/resources/json/validSimplePhenopacket.json deleted file mode 100644 index eb6c3bb..0000000 --- a/validator-core/src/test/resources/json/validSimplePhenopacket.json +++ /dev/null @@ -1,16 +0,0 @@ -{"id" : "world", - "metaData": { - "created": "2021-07-01T19:32:35Z", - "createdBy": "Hpo Case Annotator : 1.0.15-SNAPSHOT", - "submittedBy": "HPO:probinson", - "resources": [{ - "id": "hp", - "name": "human phenotype ontology", - "url": "http://purl.obolibrary.org/obo/hp.owl", - "version": "2018-03-08", - "namespacePrefix": "HP", - "iriPrefix": "http://purl.obolibrary.org/obo/HP_" - }], - "phenopacketSchemaVersion": "2.0" - } -} \ No newline at end of file diff --git a/validator-jsonschema/pom.xml b/validator-jsonschema/pom.xml new file mode 100644 index 0000000..8fffb20 --- /dev/null +++ b/validator-jsonschema/pom.xml @@ -0,0 +1,46 @@ + + + + validator + org.phenopackets.validator + 0.1.1-SNAPSHOT + + 4.0.0 + + validator-jsonschema + + + + org.phenopackets.validator + validator-core + ${project.parent.version} + + + + com.networknt + json-schema-validator + + + + + org.phenopackets + phenopacket-schema + test + + + com.google.protobuf + protobuf-java-util + test + + + + org.phenopackets.phenotools + phenotools-builder + 0.0.1-SNAPSHOT + test + + + + \ No newline at end of file diff --git a/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidator.java b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidator.java new file mode 100644 index 0000000..17565c5 --- /dev/null +++ b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidator.java @@ -0,0 +1,113 @@ +package org.phenopackets.validator.jsonschema; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.networknt.schema.JsonSchema; +import com.networknt.schema.JsonSchemaFactory; +import com.networknt.schema.SpecVersion; +import org.phenopackets.validator.core.*; +import org.phenopackets.validator.core.except.PhenopacketValidatorRuntimeException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + + +public class JsonSchemaValidator implements PhenopacketValidator { + + private static final Logger LOGGER = LoggerFactory.getLogger(JsonSchemaValidator.class); + + /** + * The latest version of the spec that is supported by our JSON SCHEMA library is 2019/09. + */ + private static final SpecVersion.VersionFlag VERSION_FLAG = SpecVersion.VersionFlag.V201909; + private final JsonSchema jsonSchema; + private final ObjectMapper objectMapper = new ObjectMapper(); + private final ValidatorInfo validatorInfo; + + + private JsonSchemaValidator(JsonSchema jsonSchema, ValidatorInfo validatorInfo) { + this.jsonSchema = jsonSchema; + this.validatorInfo = validatorInfo; + } + + /** + * @param jsonSchema path to JSON schema specification file + * @throws PhenopacketValidatorRuntimeException if jsonSchema is not a valid file or if the file is + * not a valid JSON schema specification + */ + public static JsonSchemaValidator of(File jsonSchema, ValidatorInfo validatorInfo) { + if (!jsonSchema.isFile()) { + throw new PhenopacketValidatorRuntimeException("Could not open file at \"" + jsonSchema.getAbsolutePath() + "\""); + } + LOGGER.debug("Reading JSON schema from `{}`", jsonSchema.getAbsolutePath()); + try (InputStream inputStream = Files.newInputStream(jsonSchema.toPath())) { + return of(inputStream, validatorInfo); + } catch (Exception e) { + throw new PhenopacketValidatorRuntimeException("Invalid JSON schema specification: " + e.getMessage()); + } + } + + public static JsonSchemaValidator of(InputStream inputStream, ValidatorInfo validatorInfo) { + JsonSchemaFactory schemaFactory = JsonSchemaFactory.getInstance(VERSION_FLAG); + return new JsonSchemaValidator(schemaFactory.getSchema(inputStream), validatorInfo); + } + + private static ValidationItemType stringToErrorType(String error) { + return switch (error) { + case "additionalProperties" -> JsonValidationItemTypes.JSON_ADDITIONAL_PROPERTIES; + case "required" -> JsonValidationItemTypes.JSON_REQUIRED; + case "type", "enum" -> JsonValidationItemTypes.JSON_TYPE; + default -> throw new PhenopacketValidatorRuntimeException("Did not recognize JSON error type: \"" + error + "\""); + }; + } + + @Override + public ValidatorInfo info() { + return validatorInfo; + } + + /** + * Validate the {@code inputStream} content (assumed to be a Phenopacket formated in JSON) + * + * @return List of {@link ValidationItem} objects (empty list if there were no errors) + */ + @Override + public List validate(InputStream inputStream) { + List errors = new ArrayList<>(); + try { + JsonNode json = objectMapper.readTree(inputStream); + jsonSchema.validate(json) + .forEach(e -> errors.add(ValidationItem.of(validatorInfo, stringToErrorType(e.getType()), e.getMessage()))); + + return errors; + } catch (Exception e) { + if (e instanceof IOException) { + LOGGER.warn("Error while decoding JSON content: {}", e.getMessage(), e); + } else { + LOGGER.warn("Error while validating: {}", e.getMessage(), e); + } + + return List.of(ValidationItem.of(validatorInfo, ValidationItemTypes.syntaxError(), e.getMessage())); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + JsonSchemaValidator that = (JsonSchemaValidator) o; + return Objects.equals(jsonSchema, that.jsonSchema) && Objects.equals(objectMapper, that.objectMapper) && Objects.equals(validatorInfo, that.validatorInfo); + } + + @Override + public int hashCode() { + return Objects.hash(jsonSchema, objectMapper, validatorInfo); + } +} diff --git a/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidators.java b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidators.java new file mode 100644 index 0000000..9c75490 --- /dev/null +++ b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonSchemaValidators.java @@ -0,0 +1,40 @@ +package org.phenopackets.validator.jsonschema; + +import org.phenopackets.validator.core.PhenopacketValidator; +import org.phenopackets.validator.core.ValidatorInfo; +import org.phenopackets.validator.core.except.PhenopacketValidatorRuntimeException; + +import java.io.InputStream; +import java.util.Map; + +/** + * Utility class that uses JSON schema definitions that are bundled within the application (on classpath). + *

+ * @author Daniel Danis + * @author Peter N Robinson + */ +public class JsonSchemaValidators { + + public static Map genericValidator() { + return Map.of( + ValidatorInfo.generic(), makeJsonValidator("/schema/phenopacket-generic.json", ValidatorInfo.generic()) + ); + } + + public static Map rareHpoValidator() { + return Map.of( + ValidatorInfo.generic(), makeJsonValidator("/schema/hpo-rare-disease-schema.json", ValidatorInfo.generic()) + ); + } + + private static PhenopacketValidator makeJsonValidator(String schemaPath, ValidatorInfo validationName) { + InputStream inputStream = JsonSchemaValidators.class.getResourceAsStream(schemaPath); + if (inputStream == null) + throw new PhenopacketValidatorRuntimeException("Invalid JSON schema path `" + schemaPath + '`'); + + return JsonSchemaValidator.of(inputStream, validationName); + } + + private JsonSchemaValidators() {} + +} diff --git a/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonValidationItemTypes.java b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonValidationItemTypes.java new file mode 100644 index 0000000..5c1d621 --- /dev/null +++ b/validator-jsonschema/src/main/java/org/phenopackets/validator/jsonschema/JsonValidationItemTypes.java @@ -0,0 +1,26 @@ +package org.phenopackets.validator.jsonschema; + +import org.phenopackets.validator.core.ValidationItemType; + +public class JsonValidationItemTypes { + + /** + * JSON schema error meaning that the JSON code contained a property not present in the schema. + */ + public final static ValidationItemType JSON_ADDITIONAL_PROPERTIES = ValidationItemType.of("additionalProperties", "Use of a property not present in the schema"); + + /** + * JSON schema error meaning that the JSON code failed to contain a property required by the schema. + */ + public final static ValidationItemType JSON_REQUIRED = ValidationItemType.of("required", "failure to contain a property required by the schema"); + + /** + * The type of an object is not as required by the schema, e.g., we get a string instead of an array. + */ + public final static ValidationItemType JSON_TYPE = ValidationItemType.of("type", "incorrect data type of object"); + + // TODO - add item description + public final static ValidationItemType JSON_ENUM = ValidationItemType.of("enum", "TODO"); + + private JsonValidationItemTypes() {} +} diff --git a/validator-core/src/main/resources/schema/hpo-rare-disease-schema.json b/validator-jsonschema/src/main/resources/schema/hpo-rare-disease-schema.json similarity index 70% rename from validator-core/src/main/resources/schema/hpo-rare-disease-schema.json rename to validator-jsonschema/src/main/resources/schema/hpo-rare-disease-schema.json index b3c2450..fc7325b 100644 --- a/validator-core/src/main/resources/schema/hpo-rare-disease-schema.json +++ b/validator-jsonschema/src/main/resources/schema/hpo-rare-disease-schema.json @@ -3,7 +3,6 @@ "$id": "https://www.ga4gh.org/phenopackets", "title": "HPO Rare Disease Phenopacket Schema", "description": "HPO Rare Disease Schema for GGA4GH Phenopacket", - "_comment": "Here we require the phenopacket to have the following elements that are not required by the default schema 1. subject (proband being investigated) 2. at least one phenotypicFeature element 3. time_at_last encounter (subelement of subject), representing the age of the proband. In addition, we require that Human Phenotype Ontology (HPO) terms are used to represent phenotypicFeature", "type": "object", "properties": { "subject": { diff --git a/validator-core/src/main/resources/schema/phenopacket-general-schema.json b/validator-jsonschema/src/main/resources/schema/phenopacket-generic.json similarity index 89% rename from validator-core/src/main/resources/schema/phenopacket-general-schema.json rename to validator-jsonschema/src/main/resources/schema/phenopacket-generic.json index 1d0b4ca..00aa0f5 100644 --- a/validator-core/src/main/resources/schema/phenopacket-general-schema.json +++ b/validator-jsonschema/src/main/resources/schema/phenopacket-generic.json @@ -32,10 +32,18 @@ "$ref": "#/definitions/timeElement" }, "vitalStatus": { - "enum": ["UNKNOWN_STATUS", "ALIVE", "DECEASED"] + "type": "object", + "properties": { + "status": {"enum": ["UNKNOWN_STATUS", "ALIVE", "DECEASED"]}, + "timeOfDeath": {"$ref": "#/definitions/timeElement"}, + "causeOfDeath": {"$ref": "#/definitions/ontologyClass"}, + "survivalTimeInDays": "integer" + }, + "required": [ "status"], + "additionalProperties": false }, "sex": { - "enum": ["UNKNOWN_SEX", "FEMALE", "MALE", "UNKNOWN_SEX"] + "enum": ["UNKNOWN_SEX", "FEMALE", "MALE"] }, "karyotypicSex": { "enum": ["UNKNOWN_KARYOTYPE", "XX", "XY", "XO", "XXY","XXX","XXYY", "XXXY", "XXXX", "XYY", "OTHER_KARYOTYPE"] @@ -176,23 +184,27 @@ "reference": { "$ref": "#/definitions/externalReference" } - } + }, + "required": ["evidenceCode"], + "additionalProperties": false }, "timeElement": { + "type": "object", "properties": { "gestationalAge": {"$ref": "#/definitions/gestationalAge"}, "age" : {"$ref": "#/definitions/age"}, "ageRange": { "$ref": "#/definitions/ageRange"}, "ontologyClass": { "$ref": "#/definitions/ontologyClass"}, "timestamp": { "type": "string", "format": "date-time"}, - "timeInterval": {"$ref": "#/definitions/timeInterval"} + "interval": {"$ref": "#/definitions/timeInterval"} }, + "additionalProperties": false, "oneOf": [ { "required": [ "gestationalAge" ]}, { "required": [ "age" ]}, { "required": [ "ageRange"]}, { "required": [ "ontologyClass" ]}, { "required": [ "timestamp" ]}, - { "required": [ "timeInterval" ]} + { "required": [ "interval" ]} ] }, "update": { @@ -291,19 +303,61 @@ "additionalProperties": false }, "value": { - "oneOf": [ - { + "type": "object", + "properties": { + "quantity": { + "$ref": "#/definitions/quantity" + }, + "ontologyClass": { + "$ref": "#/definitions/ontologyClass" + } + }, + "additionalProperties": false, + "oneOf": [ { "required": [ "quantity" ]}, + { "required": [ "ontologyClass" ]} + ] + }, + "typedQuantity": { + "type": "object", + "properties": { + "type": { "$ref": "#/definitions/ontologyClass" }, - { + "quantity": { "$ref": "#/definitions/quantity" } - ] + }, + "required": ["type", "quantity"], + "additionalProperties": false }, "complexValue": { - "type": "null" + "type": "object", + "properties": { + "typedQuantities": { + "type": "array", + "items": { + "$ref": "#/definitions/typedQuantity" + }, + "minItems": 1, + "uniqueItems": true + } + }, + "required": ["typedQuantities"], + "additionalProperties": false }, "measurement": { + "type": "object", + "properties": { + "description": "string", + "assay": { "$ref": "#/definitions/ontologyClass" }, + "value": {"$ref": "#/definitions/value" }, + "complexValue": { "$ref": "#/definitions/complexValue" }, + "timeObserved": { "$ref": "#/definitions/timeElement"}, + "procedure": { "$ref": "#/definitions/procedure"} + }, + "required": ["assay"], + "additionalProperties": false, + "oneOf": [ { "required": [ "value" ]}, { "required": [ "complexValue" ]}] }, "age": { "type": "object", @@ -467,7 +521,10 @@ "$ref": "#/definitions/timeElement" }, "diseaseStage": { - "$ref": "#/definitions/ontologyClass" + "type": "array", + "items": { + "$ref": "#/definitions/ontologyClass" + } }, "clinicalTnmFinding": { "$ref": "#/definitions/ontologyClass" @@ -478,7 +535,9 @@ "laterality": { "$ref": "#/definitions/ontologyClass" } - } + }, + "required": ["term"], + "additionalProperties": false }, "interpretation": { "type": "object", diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/BiosampleValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/BiosampleValidatorTest.java new file mode 100644 index 0000000..963ea74 --- /dev/null +++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/BiosampleValidatorTest.java @@ -0,0 +1,81 @@ +package org.phenopackets.validator.jsonschema; + +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.util.JsonFormat; +import org.junit.jupiter.api.Test; +import org.phenopackets.phenotools.builder.PhenopacketBuilder; +import org.phenopackets.phenotools.builder.builders.*; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.Biosample; +import org.phenopackets.schema.v2.core.MetaData; +import org.phenopackets.schema.v2.core.OntologyClass; +import org.phenopackets.validator.core.ValidationItem; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.phenopackets.phenotools.builder.builders.OntologyClassBuilder.ontologyClass; + +public class BiosampleValidatorTest extends JsonSchemaValidatorTestBase { + private static final String PHENOPACKET_ID = "arbitrary.id"; + private static final String PROBAND_ID = "proband A"; + private static final OntologyClass BIOPSY = ontologyClass("NCIT:C15189", "Biopsy"); + private static final MetaData metadata = MetaDataBuilder.create("2021-05-14T10:35:00Z", "anonymous biocurator") + .resource(Resources.ncitVersion("21.05d")) + .resource(Resources.efoVersion("3.34.0")) + .resource(Resources.uberonVersion("2021-07-27")) + .resource(Resources.ncbiTaxonVersion("2021-06-10")) + .build(); + private static final Biosample lymphNodeBiopsy = BiosampleBuilder.create("biosample 2") + .individualId(PROBAND_ID) + .timeOfCollection(TimeElements.age("P48Y3M")) + .sampledTissue(ontologyClass("NCIT:C139196", "Esophageal Lymph Node")) + .tumorProgression(ontologyClass("NCIT:C84509", "Primary Malignant Neoplasm")) + .histologicalDiagnosis(ontologyClass("NCIT:C4024", "Esophageal Squamous Cell Carcinoma")) + .diagnosticMarker(ontologyClass("NCIT:C131711", "Human Papillomavirus-18 Positive")) + .procedure(ProcedureBuilder.create(BIOPSY).build()) + .build(); + + private static final Biosample lungBiopsy = BiosampleBuilder.create("biosample 3") + .individualId(PROBAND_ID) + .timeOfCollection(TimeElements.age("P50Y7M")) + .sampledTissue(ontologyClass("NCIT:C12468", "Lung")) + .tumorProgression(ontologyClass("NCIT:C3261", "Metastatic Neoplasm")) + .procedure(ProcedureBuilder.create(BIOPSY).build()) + .build(); + + private static Phenopacket phenopacketWithBiosample() { + return PhenopacketBuilder.create(PHENOPACKET_ID, metadata) + .biosample(lymphNodeBiopsy) + .build(); + } + + private static final Phenopacket phenopacket = phenopacketWithBiosample(); + + @Test + public void testValidityOfBiosample() throws InvalidProtocolBufferException { + String json = JsonFormat.printer().print(phenopacket); + List errors = validator.validate(json); + assertTrue(errors.isEmpty()); + } + + /** + * The only required element in a Biosample is the ID + */ + @Test + public void testBiosampleLacksId() throws InvalidProtocolBufferException { + assertEquals(1, phenopacket.getBiosamplesCount()); + Biosample biosample = phenopacket.getBiosamples(0); + biosample = Biosample.newBuilder(biosample).clearId().build(); + Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setBiosamples(0, biosample).build(); + String json = JsonFormat.printer().print(p1); + List errors = validator.validate(json); + assertEquals(1, errors.size()); + String expectedErrorMsg = "$.biosamples[0].id: is missing but it is required"; + assertEquals(expectedErrorMsg, errors.get(0).message()); + } + + + +} diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ComplexValueValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ComplexValueValidatorTest.java new file mode 100644 index 0000000..2d88586 --- /dev/null +++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ComplexValueValidatorTest.java @@ -0,0 +1,99 @@ +package org.phenopackets.validator.jsonschema; + + +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.util.JsonFormat; +import org.junit.jupiter.api.Test; +import org.phenopackets.phenotools.builder.PhenopacketBuilder; +import org.phenopackets.phenotools.builder.builders.*; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.*; +import org.phenopackets.validator.core.ValidationItem; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.phenopackets.phenotools.builder.builders.OntologyClassBuilder.ontologyClass; + +/** + * Test a few errors in the Value and ComplexValue messages + * @author Peter N Robinson + */ +public class ComplexValueValidatorTest extends JsonSchemaValidatorTestBase{ + + private static Phenopacket phenopacketWithValueAndComplexValue() { + // Simple value for blood platelets + // Complex value for blood pressure + OntologyClass systolic = ontologyClass("NCIT:C25298", "Systolic Blood Pressure"); + OntologyClass diastolic = ontologyClass("NCIT:C25299", "Diastolic Blood Pressure"); + OntologyClass mmHg = ontologyClass("NCIT:C49670", "Millimeter of Mercury"); + ComplexValue complexValue = ComplexValueBuilder.create() + .typedQuantity(systolic, QuantityBuilder.create(mmHg, 120).build()) + .typedQuantity(diastolic, QuantityBuilder.create(mmHg, 70).build()) + .build(); + OntologyClass bpAssay = ontologyClass("CMO:0000003", "blood pressure measurement"); + Measurement bpM = MeasurementBuilder.complexValue(bpAssay, complexValue).build(); + return PhenopacketBuilder.create("id:A", metadataWithHpo) + .measurement(bpM) + .build(); + } + + private static final Phenopacket phenopacket = phenopacketWithValueAndComplexValue(); + + /** + * First test that the unaltered phenopacket is OK + */ + @Test + public void validatePhenopacket() throws InvalidProtocolBufferException { + String json = JsonFormat.printer().print(phenopacket); + //System.out.println(json); + List errors = validator.validate(json); + for (var e : errors) { + System.out.println(e.message()); + } + assertTrue(errors.isEmpty()); + } + + /** + * Each measurement requires an ontology term for the assay + */ + @Test + public void testMissingAssay() throws InvalidProtocolBufferException { + // Alter the first measurement, which is a ComplexValue + Measurement m1 = phenopacket.getMeasurements(0); + assertTrue(m1.hasComplexValue()); + m1 = Measurement.newBuilder(m1).clearAssay().build(); + Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setMeasurements(0, m1).build(); + String json = JsonFormat.printer().print(p1); + List errors = validator.validate(json); + for (var e : errors) { + System.out.println(e); + } + } + + /** + * Each measurement requires an ontology term for the assay + */ + @Test + public void testComplexValueHasNoTypedValue() throws InvalidProtocolBufferException { + // Alter the first measurement, which is a ComplexValue + Measurement m1 = phenopacket.getMeasurements(0); + assertTrue(m1.hasComplexValue()); + ComplexValue cvale = m1.getComplexValue(); + cvale = ComplexValue.newBuilder(cvale).clearTypedQuantities().build(); + m1 = Measurement.newBuilder(m1).setComplexValue(cvale).build(); + Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setMeasurements(0, m1).build(); + String json = JsonFormat.printer().print(p1); + //System.out.println(json); + List errors = validator.validate(json); + for (var e : errors) { + System.out.println(e); + } + assertEquals(1, errors.size()); + String expectedErrorMsg = "$.measurements[0].complexValue.typedQuantities: is missing but it is required"; + assertEquals(expectedErrorMsg, errors.get(0).message()); + } + + +} diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/DiseaseValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/DiseaseValidatorTest.java new file mode 100644 index 0000000..d48a469 --- /dev/null +++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/DiseaseValidatorTest.java @@ -0,0 +1,87 @@ +package org.phenopackets.validator.jsonschema; + + +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.util.JsonFormat; +import org.junit.jupiter.api.Test; +import org.phenopackets.phenotools.builder.PhenopacketBuilder; +import org.phenopackets.phenotools.builder.builders.DiseaseBuilder; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.Disease; +import org.phenopackets.schema.v2.core.TimeElement; +import org.phenopackets.validator.core.ValidationItem; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.phenopackets.phenotools.builder.builders.OntologyClassBuilder.ontologyClass; +import static org.phenopackets.validator.jsonschema.JsonValidationItemTypes.*; +import static org.phenopackets.validator.testdatagen.DatagenBase.*; + +/** + * This class creates a simple phenopacket with a Disease object and creates some variations with + * validation errors. + * @author Peter N Robinson + */ +public class DiseaseValidatorTest extends JsonSchemaValidatorTestBase { + + private static Phenopacket phenopacketWithDisease() { + var nyha3 = ontologyClass("NCIT:C66907", "New York Heart Association Class III"); + var childhood = TimeElement.newBuilder().setOntologyClass(CHILDHOOD_ONSET).build(); + var chagasCardiomyopathy = DiseaseBuilder.create("MONDO:0005491", "Chagas cardiomyopathy") + .diseaseStage(nyha3) + .onset(childhood) + .build(); + return PhenopacketBuilder.create("id:A",metadataWithHpo) + .disease(chagasCardiomyopathy) + .build(); + } + + private static final Phenopacket phenopacket = phenopacketWithDisease(); + + @Test + public void testPhenopacketValidity() throws InvalidProtocolBufferException { + String json = JsonFormat.printer().print(phenopacket); + List errors = validator.validate(json); + assertTrue(errors.isEmpty()); + } + + @Test + public void testLacksId() throws InvalidProtocolBufferException { + // the Phenopacket is not valid if we remove the id + Phenopacket p1 = Phenopacket.newBuilder(phenopacket).clearId().build(); + String json = JsonFormat.printer().print(p1); + List errors = validator.validate(json); + for (var e: errors) { + System.out.println(e); + } + assertEquals(1, errors.size()); + ValidationItem error = errors.get(0); + assertEquals(JSON_REQUIRED, error.type()); + assertEquals("$.id: is missing but it is required", error.message()); + } + + @Test + public void testDiseaseLacksOntologyTerm() throws InvalidProtocolBufferException { + // Disease must have an ontology term + // the phenopacket has a single Disease message + Disease disease = phenopacket.getDiseases(0); + // remove the Ontology Term from the disease + disease = Disease.newBuilder(disease).clearTerm().build(); + // replace the original disease object with the new disease object + Phenopacket p1 = Phenopacket.newBuilder(phenopacket) + .setDiseases(0, disease).build(); + String json = JsonFormat.printer().print(p1); + //System.out.println(json); + List errors = validator.validate(json); + assertEquals(1, errors.size()); + ValidationItem error = errors.get(0); + assertEquals(JSON_REQUIRED, error.type()); + // this error means that the first disease lacks a term + assertEquals("$.diseases[0].term: is missing but it is required", error.message()); + } + + + +} diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/JsonSchemaValidatorTestBase.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/JsonSchemaValidatorTestBase.java new file mode 100644 index 0000000..83216ae --- /dev/null +++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/JsonSchemaValidatorTestBase.java @@ -0,0 +1,34 @@ +package org.phenopackets.validator.jsonschema; + +import org.phenopackets.phenotools.builder.builders.MetaDataBuilder; +import org.phenopackets.phenotools.builder.builders.Resources; +import org.phenopackets.phenotools.builder.exceptions.PhenotoolsRuntimeException; +import org.phenopackets.schema.v2.core.MetaData; +import org.phenopackets.validator.core.PhenopacketValidator; +import org.phenopackets.validator.core.ValidatorInfo; + +import java.util.Map; + +public class JsonSchemaValidatorTestBase { + private static final Map genericValidatorMap = JsonSchemaValidators.genericValidator(); + private static final Map rareHpoValidatorMap = JsonSchemaValidators.rareHpoValidator(); + + protected final PhenopacketValidator validator = genericValidatorMap.values().stream() + .findFirst() + .orElseThrow(() -> new PhenotoolsRuntimeException("Could not retrieve generic validator")); + + protected final PhenopacketValidator rareDiseaseValidator = rareHpoValidatorMap.values().stream() + .findFirst() + .orElseThrow(() -> new PhenotoolsRuntimeException("Could not retrieve rare disease validator")); + + protected static final MetaData metadataWithHpo = MetaDataBuilder.create("2021-07-01T19:32:35Z", "anonymous biocurator") + .submittedBy("anonymous submitter") + .resource(Resources.hpoVersion("2021-08-02")) + .resource(Resources.mondoVersion("2021-09-01")) + .externalReference("PMID:20842687", + "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter") + .build(); + + + +} diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/PhenotypicFeatureValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/PhenotypicFeatureValidatorTest.java new file mode 100644 index 0000000..a47b909 --- /dev/null +++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/PhenotypicFeatureValidatorTest.java @@ -0,0 +1,78 @@ +package org.phenopackets.validator.jsonschema; + +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.util.JsonFormat; +import org.junit.jupiter.api.Test; +import org.phenopackets.phenotools.builder.PhenopacketBuilder; +import org.phenopackets.phenotools.builder.builders.EvidenceBuilder; +import org.phenopackets.phenotools.builder.builders.PhenotypicFeatureBuilder; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.Evidence; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.phenopackets.validator.core.ValidationItem; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.phenopackets.validator.jsonschema.JsonValidationItemTypes.JSON_REQUIRED; + + +public class PhenotypicFeatureValidatorTest extends JsonSchemaValidatorTestBase { + private static final String PMID = "PMID:30808312"; + private static final String publication = "COL6A1 mutation leading to Bethlem myopathy with recurrent hematuria: a case report"; + + private static Phenopacket phenopacketWithOnePhenotypicFeature() { + var authorAssertion = EvidenceBuilder.authorStatementEvidence(PMID, publication); + var VSD = + PhenotypicFeatureBuilder.create("HP:0001629","Ventricular septal defect") + .congenitalOnset() + .evidence(authorAssertion) + .build(); + return PhenopacketBuilder.create("ID:A", metadataWithHpo) + .phenotypicFeature(VSD) + .build(); + } + + private final Phenopacket phenopacket = phenopacketWithOnePhenotypicFeature(); + + @Test + public void testPhenotypicFeatureWithoutOntologyTerm() throws InvalidProtocolBufferException { + assertTrue(phenopacket.getPhenotypicFeaturesCount() > 0); + PhenotypicFeature pf = phenopacket.getPhenotypicFeatures(0); + pf = PhenotypicFeature.newBuilder(pf).clearType().build(); + Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setPhenotypicFeatures(0, pf).build(); + String json = JsonFormat.printer().print(p1); + List errors = validator.validate(json); + assertEquals(1, errors.size()); + ValidationItem error = errors.get(0); + assertEquals(JSON_REQUIRED, error.type()); + String expectedErrorMsg = "$.phenotypicFeatures[0].type: is missing but it is required"; + assertEquals(expectedErrorMsg, error.message()); + } + + /** + * This Phenopacket is invalid, because the PhenotypicFeature has an Evidence element that is lacking + * evidenceCode + */ + @Test + public void testPhenotypicFeatureWithInvalidEvidence() throws InvalidProtocolBufferException { + assertTrue(phenopacket.getPhenotypicFeaturesCount() > 0); + PhenotypicFeature pf = phenopacket.getPhenotypicFeatures(0); + assertTrue(pf.getEvidenceCount() > 0); + Evidence evi = pf.getEvidence(0); + evi = Evidence.newBuilder(evi).clearEvidenceCode().build(); + pf = PhenotypicFeature.newBuilder(pf).setEvidence(0, evi).build(); + Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setPhenotypicFeatures(0, pf).build(); + String json = JsonFormat.printer().print(p1); + // System.out.println(json); + List errors = validator.validate(json); + assertEquals(1, errors.size()); + ValidationItem error = errors.get(0); + assertEquals(JSON_REQUIRED, error.type()); + String expectedErrorMsg = "$.phenotypicFeatures[0].evidence[0].evidenceCode: is missing but it is required"; + assertEquals(expectedErrorMsg, error.message()); + } + + +} diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/SimpleValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/SimpleValidatorTest.java new file mode 100644 index 0000000..e194245 --- /dev/null +++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/SimpleValidatorTest.java @@ -0,0 +1,87 @@ +package org.phenopackets.validator.jsonschema; + +import org.junit.jupiter.api.Test; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.validator.testdatagen.RareDiseasePhenopacket; +import org.phenopackets.validator.testdatagen.SimplePhenopacket; +import org.phenopackets.validator.core.ValidationItem; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +import com.google.protobuf.util.JsonFormat; + +import static org.junit.jupiter.api.Assertions.*; +import static org.phenopackets.validator.jsonschema.JsonValidationItemTypes.JSON_ADDITIONAL_PROPERTIES; +import static org.phenopackets.validator.jsonschema.JsonValidationItemTypes.JSON_REQUIRED; + +public class SimpleValidatorTest extends JsonSchemaValidatorTestBase { + + private static final SimplePhenopacket simplePhenopacket = new SimplePhenopacket(); + + private static final RareDiseasePhenopacket rareDiseasePhenopacket = new RareDiseasePhenopacket(); + + private static File fileFromClasspath(String path) { + String fname = Objects.requireNonNull(Thread.currentThread().getContextClassLoader().getResource(path)).getPath(); + return new File(fname); + } + + @Test + public void testValidationOfSimpleValidPhenopacket() throws Exception { + Phenopacket phenopacket = simplePhenopacket.getPhenopacket(); + String json = JsonFormat.printer().print(phenopacket); + List errors = validator.validate(json); + assertTrue(errors.isEmpty()); + // the Phenopacket is not valid if we remove the id + phenopacket = Phenopacket.newBuilder(phenopacket).clearId().build(); + json = JsonFormat.printer().print(phenopacket); + errors = validator.validate(json); + assertEquals(1, errors.size()); + ValidationItem error = errors.get(0); + assertEquals(JSON_REQUIRED, error.type()); + assertEquals("$.id: is missing but it is required", error.message()); + } + + /** + * This example phenopacket does not contain anything except {"disney" : "donald"} + * It does not contain an id or a metaData element and thus should fail. + */ + @Test + public void testValidationOfSimpleInValidPhenopacket() { + String invalidPhenopacketJson = "{\"disney\" : \"donald\"}"; + List errors = validator.validate(invalidPhenopacketJson); + assertEquals(3, errors.size()); + ValidationItem error = errors.get(0); + assertEquals(JSON_REQUIRED, error.type()); + assertEquals("$.id: is missing but it is required", error.message()); + error = errors.get(1); + assertEquals(JSON_REQUIRED, error.type()); + assertEquals("$.metaData: is missing but it is required", error.message()); + error = errors.get(2); + assertEquals(JSON_ADDITIONAL_PROPERTIES, error.type()); + assertEquals("$.disney: is not defined in the schema and the schema does not allow additional properties", error.message()); + } + + @Test + public void testRareDiseaseBethlemahmValidPhenopacket() throws Exception { + Phenopacket bethlehamMyopathy = rareDiseasePhenopacket.getPhenopacket(); + String json = JsonFormat.printer().print(bethlehamMyopathy); + List errors = validator.validate(json); + assertTrue(errors.isEmpty()); + } + + @Test + public void testRareDiseaseBethlemahmInvalidValidPhenopacket() throws IOException { + File invalidMyopathyPhenopacket = fileFromClasspath("json/bethlehamMyopathyInvalidExample.json"); + List errors = validator.validate(invalidMyopathyPhenopacket); + assertEquals(1, errors.size()); + ValidationItem error = errors.get(0); + assertEquals(JSON_ADDITIONAL_PROPERTIES, error.type()); + String expectedErrorMsg = "$.phenotypicFeaturesMALFORMED: is not defined in the schema and the schema does not allow additional properties"; + assertEquals(expectedErrorMsg, error.message()); + } + + +} \ No newline at end of file diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ValueValidatorTest.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ValueValidatorTest.java new file mode 100644 index 0000000..7197a10 --- /dev/null +++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/jsonschema/ValueValidatorTest.java @@ -0,0 +1,117 @@ +package org.phenopackets.validator.jsonschema; + +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.util.JsonFormat; +import org.junit.jupiter.api.Test; +import org.phenopackets.phenotools.builder.PhenopacketBuilder; +import org.phenopackets.phenotools.builder.builders.*; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.*; +import org.phenopackets.validator.core.ValidationItem; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; +import static org.phenopackets.phenotools.builder.builders.OntologyClassBuilder.ontologyClass; + +/** + * + * Test a few errors in the Value messages + * @author Peter N Robinson + */ +public class ValueValidatorTest extends JsonSchemaValidatorTestBase { + + private static Phenopacket phenopacketWithValueAndComplexValue() { + MetaData meta = MetaDataBuilder.create("2021-07-01T19:32:35Z", "anonymous biocurator") + .submittedBy("anonymous submitter") + .resource(Resources.hpoVersion("2021-08-02")) + .resource(Resources.mondoVersion("2021-09-01")) + .build(); + // Simple value for blood platelets + OntologyClass cellsPerMl = ontologyClass("UO:0000316", "cells per microliter"); + ReferenceRange rrange = ReferenceRangeCreator.create(cellsPerMl, 150_000, 450_000); + Quantity quantity = QuantityBuilder.create(cellsPerMl,24000) + .referenceRange(rrange) + .build(); + Value value = ValueBuilder.create(quantity).build(); + OntologyClass plateletAssay = ontologyClass( "LOINC:26515-7", "Platelets [#/volume] in Blood"); + Measurement plateletM = MeasurementBuilder.value(plateletAssay, value).build(); + return PhenopacketBuilder.create("id:A", meta) + .measurement(plateletM) + .build(); + } + + private static final Phenopacket phenopacket = phenopacketWithValueAndComplexValue(); + + /** + * First test that the unaltered phenopacket is OK + */ + @Test + public void validatePhenopacket() throws InvalidProtocolBufferException { + String json = JsonFormat.printer().print(phenopacket); + //System.out.println(json); + List errors = validator.validate(json); + for (var e : errors) { + System.out.println(e.message()); + } + assertTrue(errors.isEmpty()); + } + + /** + * Test what happens if we remove the ontology class from the Measurement + * Note that the validation tool reports two errors + * $.measurements[0].value: is missing but it is required + * $.measurements[0].complexValue: is missing but it is required + * The second of which is incorrect but I think this is more a limitation of JSON Schema validation + * of oneOf elements + */ + @Test + public void testMissingOntologyTerm() throws InvalidProtocolBufferException { + Measurement m1 = Phenopacket.newBuilder(phenopacket).getMeasurements(0); + m1 = Measurement.newBuilder(m1).clearValue().build(); + Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setMeasurements(0, m1).build(); + String json = JsonFormat.printer().print(p1); + System.out.println(json); + List errors = validator.validate(json); + for (var e : errors) { + System.out.println(e.message()); + } + assertFalse(errors.isEmpty()); + String expectedErrorMsg = "$.measurements[0].value: is missing but it is required"; + boolean foundError = errors.stream().map(ValidationItem::message).anyMatch(m -> m.equals(expectedErrorMsg)); + assertTrue(foundError, "was expecting to find \"$.measurements[0].value: is missing but it is required\" but did not"); + } + + @Test + public void referenceRangeMissingLowValue() throws InvalidProtocolBufferException { + Measurement m1 = Phenopacket.newBuilder(phenopacket).getMeasurements(0); + Value v1 = m1.getValue(); + ReferenceRange rrange = v1.getQuantity().getReferenceRange(); + rrange = ReferenceRange.newBuilder(rrange).clearLow().build(); + Quantity q1= Quantity.newBuilder(v1.getQuantity()).setReferenceRange(rrange).build(); + v1 = Value.newBuilder(v1).setQuantity(q1).build(); + m1 = Measurement.newBuilder(m1).setValue(v1).build(); + Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setMeasurements(0, m1).build(); + String json = JsonFormat.printer().print(p1); + //System.out.println(json); + List errors = validator.validate(json); + assertEquals(1, errors.size()); + String expectedErrorMsg = "$.measurements[0].value.quantity.referenceRange.low: is missing but it is required"; + assertEquals(expectedErrorMsg, errors.get(0).message()); + } + + @Test + public void measurementMissingAssay() throws InvalidProtocolBufferException { + Measurement m1 = Phenopacket.newBuilder(phenopacket).getMeasurements(0); + m1 = Measurement.newBuilder(m1).clearAssay().build(); + Phenopacket p1 = Phenopacket.newBuilder(phenopacket).setMeasurements(0, m1).build(); + String json = JsonFormat.printer().print(p1); + List errors = validator.validate(json); + assertEquals(1, errors.size()); + String expectedErrorMsg = "$.measurements[0].assay: is missing but it is required"; + assertEquals(expectedErrorMsg, errors.get(0).message()); + } + + + +} diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/DatagenBase.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/DatagenBase.java new file mode 100644 index 0000000..48db3de --- /dev/null +++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/DatagenBase.java @@ -0,0 +1,21 @@ +package org.phenopackets.validator.testdatagen; + + +import org.phenopackets.schema.v2.core.OntologyClass; + + +import static org.phenopackets.phenotools.builder.builders.OntologyClassBuilder.ontologyClass; + + +public class DatagenBase { + public static final String SCHEMA_VERSION = "2.0"; + public static final OntologyClass CONGENITAL_ONSET = ontologyClass("HP:0003577", "Congenital onset"); + public static final OntologyClass CHILDHOOD_ONSET = ontologyClass("HP:0011463", "Childhood onset"); + public static final OntologyClass ADULT_ONSET = ontologyClass("HP:0003581", "Adult onset"); + public static final OntologyClass SEVERE = ontologyClass("HP:0012828", "Severe"); + + + + + +} diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/RareDiseasePhenopacket.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/RareDiseasePhenopacket.java new file mode 100644 index 0000000..9f65081 --- /dev/null +++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/RareDiseasePhenopacket.java @@ -0,0 +1,60 @@ +package org.phenopackets.validator.testdatagen; + +import org.phenopackets.phenotools.builder.PhenopacketBuilder; +import org.phenopackets.phenotools.builder.builders.*; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.File; +import org.phenopackets.schema.v2.core.Individual; +import org.phenopackets.schema.v2.core.MetaData; + +public class RareDiseasePhenopacket extends DatagenBase { + + + private final Phenopacket phenopacket; + + public RareDiseasePhenopacket() { + Individual proband = IndividualBuilder.create("patient 1") + .dateOfBirth("1998-01-01T00:00:00Z") + .ageAtLastEncounter("P3Y") + .male() + .build(); + + + var syndactyly = PhenotypicFeatureBuilder.create("HP:0001159", "Syndactyly") + .congenitalOnset().build(); + var pneumonia = PhenotypicFeatureBuilder.create("HP:0002090", "Pneumonia") + .childhoodOnset().build(); + var cryptorchidism = PhenotypicFeatureBuilder.create("HP:0000028", "Cryptorchidism") + .congenitalOnset().build(); + var sinusitis = PhenotypicFeatureBuilder.create("HP:0011109", "Chronic sinusitis") + .severe().adultOnset().build(); + + MetaData meta = MetaDataBuilder.create("2021-07-01T19:32:35Z", "anonymous biocurator") + .submittedBy("anonymous submitter") + .resource(Resources.hpoVersion("2021-08-02")) + .resource(Resources.mondoVersion("2021-09-01")) + .externalReference("PMID:20842687", + "Severe dystonic encephalopathy without hyperphenylalaninemia associated with an 18-bp deletion within the proximal GCH1 promoter") + .build(); + File vcf = FileBuilder.hg38vcf("file://data/file.vcf.gz") + .individualToFileIdentifier("kindred 1A", "SAME000234") + .build(); + + this.phenopacket = PhenopacketBuilder.create("phenopacket-id-1", meta) + .individual(proband) + .phenotypicFeature(syndactyly) + .phenotypicFeature(pneumonia) + .phenotypicFeature(cryptorchidism) + .phenotypicFeature(sinusitis) + .file(vcf) + .build(); + + } + + public Phenopacket getPhenopacket() { + return phenopacket; + } + + + +} diff --git a/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/SimplePhenopacket.java b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/SimplePhenopacket.java new file mode 100644 index 0000000..20735de --- /dev/null +++ b/validator-jsonschema/src/test/java/org/phenopackets/validator/testdatagen/SimplePhenopacket.java @@ -0,0 +1,34 @@ +package org.phenopackets.validator.testdatagen; + +import org.phenopackets.phenotools.builder.builders.MetaDataBuilder; +import org.phenopackets.phenotools.builder.builders.Resources; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.MetaData; + + + +/** + * Build the simplest possible phenopacket for validation + */ +public class SimplePhenopacket extends DatagenBase { + + private final Phenopacket phenopacket; + + public SimplePhenopacket() { + MetaData meta = MetaDataBuilder.create("2021-07-01T19:32:35Z", "anonymous biocurator") + .submittedBy("anonymous submitter") + .resource(Resources.hpoVersion("2021-08-02")) + .build(); + phenopacket = Phenopacket.newBuilder() + .setId("hello world") + .setMetaData(meta) + .build(); + } + + + public Phenopacket getPhenopacket() { + return phenopacket; + } + + +} diff --git a/validator-core/src/test/resources/json/bethlehamMyopathyExample.json b/validator-jsonschema/src/test/resources/json/bethlehamMyopathyExample.json similarity index 100% rename from validator-core/src/test/resources/json/bethlehamMyopathyExample.json rename to validator-jsonschema/src/test/resources/json/bethlehamMyopathyExample.json diff --git a/validator-core/src/test/resources/json/bethlehamMyopathyInvalidExample.json b/validator-jsonschema/src/test/resources/json/bethlehamMyopathyInvalidExample.json similarity index 100% rename from validator-core/src/test/resources/json/bethlehamMyopathyInvalidExample.json rename to validator-jsonschema/src/test/resources/json/bethlehamMyopathyInvalidExample.json diff --git a/validator-jsonschema/src/test/resources/logback-test.xml b/validator-jsonschema/src/test/resources/logback-test.xml new file mode 100644 index 0000000..0f78182 --- /dev/null +++ b/validator-jsonschema/src/test/resources/logback-test.xml @@ -0,0 +1,14 @@ + + + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + \ No newline at end of file diff --git a/validator-ontology/pom.xml b/validator-ontology/pom.xml new file mode 100644 index 0000000..3bd16a5 --- /dev/null +++ b/validator-ontology/pom.xml @@ -0,0 +1,36 @@ + + + + validator + org.phenopackets.validator + 0.1.1-SNAPSHOT + + 4.0.0 + + validator-ontology + + + + org.phenopackets.validator + validator-core + ${project.parent.version} + + + org.monarchinitiative.phenol + phenol-core + + + org.phenopackets + phenopacket-schema + + + + com.google.protobuf + protobuf-java-util + + + + \ No newline at end of file diff --git a/validator-ontology/src/main/java/org/phenopackets/validator/ontology/HpoValidator.java b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/HpoValidator.java new file mode 100644 index 0000000..b4d4dd4 --- /dev/null +++ b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/HpoValidator.java @@ -0,0 +1,110 @@ +package org.phenopackets.validator.ontology; + +import com.google.protobuf.util.JsonFormat; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.phenopackets.validator.core.ValidationItem; +import org.phenopackets.validator.core.ValidationItemTypes; +import org.phenopackets.validator.core.ValidatorInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + + +/** + * The validator checks the Phenotypic features for HPO terms. If HPO terms + * are used there, then we check that the HPO terms are included in the ontology (thus, we check for + * invalid HPO ids, and also check if the id used is the latest or if it is an alternate id). + *

+ * The validator performs logical checks too. Due to the annotation propagation rule, where presence of a child term + * implies presence of all its ancestors, it is an error to specify both a child term and its parent, and only the + * child term must be used. The rule is checked for both observed and excluded terms. + *

+ * In addition, if there are both observed and excluded terms, we check that there is no logical inconsistency, e.g., + * NOT Abnormal liver morphology but OBSERVED Hepatic fibrosis. The latter is not possible because + * Hepatic fibrosis implies Abnormal liver morphology. + */ +public class HpoValidator extends OntologyValidator { + private static final Logger LOGGER = LoggerFactory.getLogger(HpoValidator.class); + + private static final String HPO_PREFIX = "HP"; + + public HpoValidator(Ontology hpoOntology) { + super(hpoOntology, ValidatorInfo.of(ONTOLOGY_VALIDATOR, "Human Phenotype Ontology")); + } + + private static Phenopacket readPhenopacket(InputStream inputStream) throws IOException { + String phenopacketJsonString = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8); + Phenopacket.Builder phenoPacketBuilder = Phenopacket.newBuilder(); + JsonFormat.parser().merge(phenopacketJsonString, phenoPacketBuilder); + return phenoPacketBuilder.build(); + } + + @Override + public ValidatorInfo info() { + return validatorInfo; + } + + @Override + public List validate(InputStream inputStream) { + Phenopacket phenopacket; + try { + phenopacket = readPhenopacket(inputStream); + } catch (IOException e) { + LOGGER.warn("Error while validating: {}", e.getMessage(), e); + return List.of(ValidationItem.of(validatorInfo, ValidationItemTypes.syntaxError(), e.getMessage())); + } + + List errors = new ArrayList<>(); + Set observedFeatures = new HashSet<>(); + Set excludedFeatures = new HashSet<>(); + + partitionTermsToObservedAndExcluded(phenopacket.getPhenotypicFeaturesList(), errors, observedFeatures, excludedFeatures); + + checkTermsArePresentInOntology(observedFeatures, errors); + checkTermsUsePrimaryId(observedFeatures, errors); + + checkTermsArePresentInOntology(excludedFeatures, errors); + checkTermsUsePrimaryId(excludedFeatures, errors); + + checkAncestry(observedFeatures, excludedFeatures, errors); + + return errors; + } + + private void partitionTermsToObservedAndExcluded(Iterable phenotypicFeatures, + List errors, + Set observedFeatures, + Set excludedFeatures) { + for (var pf : phenotypicFeatures) { + TermId tid; + String id = pf.getType().getId(); + try { + tid = TermId.of(id); + if (!tid.getPrefix().equals(HPO_PREFIX)) { + continue; // only check HPO terms + } + } catch (PhenolRuntimeException pe) { + String message = String.format("Invalid ontology term id: %s", id); + errors.add(ValidationItem.of(validatorInfo, OntologyValidationItemTypes.ONTOLOGY_INVALID_ID, message)); + continue; + } + + if (pf.getExcluded()) { + excludedFeatures.add(tid); + } else { + observedFeatures.add(tid); + } + } + } +} diff --git a/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidationItemTypes.java b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidationItemTypes.java new file mode 100644 index 0000000..45cc3b0 --- /dev/null +++ b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidationItemTypes.java @@ -0,0 +1,29 @@ +package org.phenopackets.validator.ontology; + +import org.phenopackets.validator.core.ValidationItemType; + +class OntologyValidationItemTypes { + + /** + * This error type refers to the use of a term ID, e.g., HP:1234567, which is syntactically correct but which + * does not existing in the actual ontology. + */ + static final ValidationItemType ONTOLOGY_INVALID_ID = + ValidationItemType.of("Invalid Ontology Term ID", "Term id does not exist in ontology"); + + /** + * This error type refers to the use of a term ID that is an alternate_id, i.e., an outdated ID that should + * be replaced by the primary ID of the corresponding Ontology term. + */ + static final ValidationItemType ONTOLOGY_TERM_WITH_ALTERNATE_ID = + ValidationItemType.of("Use of outdated Ontology Term ID", "Term id is not the primary id for this term"); + + static final ValidationItemType TERM_AND_ANCESTOR_ARE_USED = + ValidationItemType.of("Term and ancestor are used", "Only the child/specific term should be used"); + + static final ValidationItemType TERM_IS_USED_AND_ANCESTOR_IS_EXCLUDED = + ValidationItemType.of("Term is used while ancestor is excluded", "The term is used despite the ancestor term has been excluded"); + + private OntologyValidationItemTypes() {} + +} diff --git a/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidator.java b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidator.java new file mode 100644 index 0000000..28ecf2f --- /dev/null +++ b/validator-ontology/src/main/java/org/phenopackets/validator/ontology/OntologyValidator.java @@ -0,0 +1,98 @@ +package org.phenopackets.validator.ontology; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.validator.core.PhenopacketValidator; + +import org.phenopackets.validator.core.ValidationItem; +import org.phenopackets.validator.core.ValidatorInfo; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Set; + +public abstract class OntologyValidator implements PhenopacketValidator { + private static final Logger LOGGER = LoggerFactory.getLogger(OntologyValidator.class); + + protected final static String ONTOLOGY_VALIDATOR = "Ontology validation"; + + protected final ValidatorInfo validatorInfo; + protected final Ontology ontology; + + protected OntologyValidator(Ontology ontology, ValidatorInfo vinfo) { + this.ontology = ontology; + this.validatorInfo = vinfo; + } + + protected void checkTermsArePresentInOntology(Iterable termIds, List errors) { + for (TermId termId : termIds) { + if (!ontology.containsTerm(termId)) { + String message = String.format("Could not find term id: %s", termId.getValue()); + ValidationItem item = ValidationItem.of(validatorInfo, OntologyValidationItemTypes.ONTOLOGY_INVALID_ID, message); + errors.add(item); + } + } + } + + protected void checkTermsUsePrimaryId(Iterable termIds, List errors) { + for (TermId termId : termIds) { + TermId primaryId = ontology.getPrimaryTermId(termId); + if (primaryId == null) + // not in ontology, this is checked in `checkTermsArePresentInOntology` + continue; + if (!primaryId.equals(termId)) { + String message = String.format("Using alternate (obsolete) id (%s) instead of primary id (%s)", termId.getValue(), primaryId.getValue()); + ValidationItem item = ValidationItem.of(validatorInfo, OntologyValidationItemTypes.ONTOLOGY_TERM_WITH_ALTERNATE_ID, message); + errors.add(item); + } + } + } + + protected void checkAncestry(Set observedTerms, Set excludedTerms, List errors) { + /* + OBSERVED TERMS: + It is an error to: + - provide term and its ancestor, + - provide term while the ancestor is excluded. + */ + for (TermId term : observedTerms) { + Set ancestors = ontology.getAncestorTermIds(term); + + for (TermId ancestor : ancestors) { + if (ancestor.equals(term)) + continue; + + if (observedTerms.contains(ancestor)) { + String message = String.format("Using both term %s and its ancestor %s", term.getValue(), ancestor.getValue()); + errors.add(ValidationItem.of(validatorInfo, OntologyValidationItemTypes.TERM_AND_ANCESTOR_ARE_USED, message)); + } + + if (excludedTerms.contains(ancestor)) { + String message = String.format("Term %s is present while its ancestor %s is excluded", term.getValue(), ancestor.getValue()); + errors.add(ValidationItem.of(validatorInfo, OntologyValidationItemTypes.TERM_IS_USED_AND_ANCESTOR_IS_EXCLUDED, message)); + } + } + } + + /* + EXCLUDED TERMS: + It is an error to provide term and its ancestor. It is, however, OK to provide term while the ancestor is present, + i.e. `Arachnodactyly` is excluded, while `Long fingers` are present. + */ + for (TermId term : excludedTerms) { + Set ancestors = ontology.getAncestorTermIds(term); + + for (TermId ancestor : ancestors) { + if (ancestor.equals(term)) + continue; + + if (excludedTerms.contains(ancestor)) { + String message = String.format("Using both term %s and its ancestor %s", term.getValue(), ancestor.getValue()); + errors.add(ValidationItem.of(validatorInfo, OntologyValidationItemTypes.TERM_AND_ANCESTOR_ARE_USED, message)); + } + } + } + } +} diff --git a/validator-ontology/src/test/java/org/phenopackets/validator/ontology/HpoValidatorTest.java b/validator-ontology/src/test/java/org/phenopackets/validator/ontology/HpoValidatorTest.java new file mode 100644 index 0000000..3d6cd96 --- /dev/null +++ b/validator-ontology/src/test/java/org/phenopackets/validator/ontology/HpoValidatorTest.java @@ -0,0 +1,178 @@ +package org.phenopackets.validator.ontology; + +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.util.JsonFormat; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.schema.v2.Phenopacket; +import org.phenopackets.schema.v2.core.OntologyClass; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.phenopackets.validator.core.ValidationItem; + +import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class HpoValidatorTest { + + public Ontology ontology; + + public HpoValidator validator; + + private static byte[] wrapInPhenopacketAndGetJsonBytes(List phenotypicFeatures) throws InvalidProtocolBufferException { + Phenopacket phenopacket = Phenopacket.newBuilder() + .addAllPhenotypicFeatures(phenotypicFeatures) + .build(); + return JsonFormat.printer().print(phenopacket).getBytes(StandardCharsets.UTF_8); + } + + private static PhenotypicFeature makePhenotypicFeature(String termId, boolean excluded) { + return PhenotypicFeature.newBuilder() + .setType(OntologyClass.newBuilder().setId(termId).build()) + .setExcluded(excluded) + .build(); + } + + // set up the Ontology mocks. There is no better way to test this unless we stash ~2mb hp.json.gz in the repo + private static Ontology prepareMiniOntology() { + Ontology ontology = mock(Ontology.class); + TermId one = TermId.of("HP:0000001"); // the root + TermId two = TermId.of("HP:0000002"); // one's child + TermId three = TermId.of("HP:0000003"); // one's child + TermId four = TermId.of("HP:0000004"); // three's child + TermId five = TermId.of("HP:0000005"); // three's child + when(ontology.containsTerm(one)).thenReturn(true); + when(ontology.getPrimaryTermId(one)).thenReturn(one); + when(ontology.containsTerm(two)).thenReturn(true); + when(ontology.getPrimaryTermId(two)).thenReturn(two); + when(ontology.containsTerm(three)).thenReturn(true); + when(ontology.getPrimaryTermId(three)).thenReturn(three); + when(ontology.containsTerm(four)).thenReturn(true); + when(ontology.getPrimaryTermId(four)).thenReturn(four); + when(ontology.containsTerm(five)).thenReturn(true); + when(ontology.getPrimaryTermId(five)).thenReturn(five); + + when(ontology.getAncestorTermIds(one)).thenReturn(Set.of(one)); + when(ontology.getAncestorTermIds(two)).thenReturn(Set.of(two, one)); + when(ontology.getAncestorTermIds(three)).thenReturn(Set.of(three, one)); + when(ontology.getAncestorTermIds(four)).thenReturn(Set.of(four, three, one)); + when(ontology.getAncestorTermIds(five)).thenReturn(Set.of(five, three, one)); + + return ontology; + } + + @BeforeEach + public void setUp() { + ontology = prepareMiniOntology(); + validator = new HpoValidator(ontology); + } + + @Nested + public class NoErrorsInValidation { + + @Test + public void disjointTerms() throws Exception { + List phenotypicFeatures = List.of( + makePhenotypicFeature("HP:0000002", false), + makePhenotypicFeature("HP:0000005", false)); + byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures); + + List items = validator.validate(new ByteArrayInputStream(bytes)); + + assertThat(items, hasSize(0)); + } + + @Test + public void parentIsPresentWhileChildrenAreExcluded() throws Exception { + List phenotypicFeatures = List.of( + makePhenotypicFeature("HP:0000003", false), + makePhenotypicFeature("HP:0000004", true), + makePhenotypicFeature("HP:0000005", true) + ); + byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures); + + List items = validator.validate(new ByteArrayInputStream(bytes)); + + assertThat(items, hasSize(0)); + } + + } + + @Nested + public class ValidationFails { + + @Test + public void termIdIsAbsentFromOntology() throws Exception { + List phenotypicFeatures = List.of(makePhenotypicFeature("HP:9999999", false)); + byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures); + + List items = validator.validate(new ByteArrayInputStream(bytes)); + + assertThat(items, hasSize(1)); + ValidationItem expected = ValidationItem.of(validator.info(), OntologyValidationItemTypes.ONTOLOGY_INVALID_ID, "Could not find term id: HP:9999999"); + assertThat(items.get(0), equalTo(expected)); + } + + @Test + public void nonPrimaryTermIdIsUsed() throws Exception { + List phenotypicFeatures = List.of(makePhenotypicFeature("HP:9999999", false)); + byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures); + + when(ontology.containsTerm(TermId.of("HP:9999999"))).thenReturn(true); + when(ontology.getPrimaryTermId(TermId.of("HP:9999999"))).thenReturn(TermId.of("HP:0000001")); + + List items = validator.validate(new ByteArrayInputStream(bytes)); + + assertThat(items, hasSize(1)); + ValidationItem expected = ValidationItem.of(validator.info(), + OntologyValidationItemTypes.ONTOLOGY_TERM_WITH_ALTERNATE_ID, + "Using alternate (obsolete) id (HP:9999999) instead of primary id (HP:0000001)"); + assertThat(items.get(0), equalTo(expected)); + } + + @Test + public void usingTermAlongWithItsAncestor() throws Exception { + List phenotypicFeatures = List.of( + makePhenotypicFeature("HP:0000003", false), + makePhenotypicFeature("HP:0000004", false)); + byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures); + + + List items = validator.validate(new ByteArrayInputStream(bytes)); + + assertThat(items, hasSize(1)); + ValidationItem expected = ValidationItem.of(validator.info(), + OntologyValidationItemTypes.TERM_AND_ANCESTOR_ARE_USED, + "Using both term HP:0000004 and its ancestor HP:0000003"); + assertThat(items.get(0), equalTo(expected)); + } + + @Test + public void usingTermWhileParentTermIsExcluded() throws Exception { + List phenotypicFeatures = List.of( + makePhenotypicFeature("HP:0000003", true), + makePhenotypicFeature("HP:0000004", false)); + byte[] bytes = wrapInPhenopacketAndGetJsonBytes(phenotypicFeatures); + + + List items = validator.validate(new ByteArrayInputStream(bytes)); + + assertThat(items, hasSize(1)); + ValidationItem expected = ValidationItem.of(validator.info(), + OntologyValidationItemTypes.TERM_IS_USED_AND_ANCESTOR_IS_EXCLUDED, + "Term HP:0000004 is present while its ancestor HP:0000003 is excluded"); + assertThat(items.get(0), equalTo(expected)); + } + } + +} \ No newline at end of file