From 36716cbae5b2da2cf88ff6de98897a5ef0b6f6d6 Mon Sep 17 00:00:00 2001 From: Danny Berger Date: Sat, 14 Oct 2023 14:29:40 -0600 Subject: [PATCH 1/5] Remove path from Origin header to match spec https://www.rfc-editor.org/rfc/rfc6454#section-7 --- iiif_validator/validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iiif_validator/validator.py b/iiif_validator/validator.py index 37f7210..6b1d6c3 100644 --- a/iiif_validator/validator.py +++ b/iiif_validator/validator.py @@ -257,7 +257,7 @@ def get_uri_for_rel(self, links, rel): def fetch(self, url): # Make it look like a real browser request - HEADERS = {"Origin": "http://iiif.io/", + HEADERS = {"Origin": "http://iiif.io", "Referer": "http://iiif.io/api/image/validator", "User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3pre) Gecko/20081130 Minefield/3.1b3pre"} req = Request(url, headers=HEADERS) From 2e42990bec944596acbcfec4f67b341c566e9110 Mon Sep 17 00:00:00 2001 From: Danny Berger Date: Sat, 14 Oct 2023 14:29:40 -0600 Subject: [PATCH 2/5] Support non-wildcard CORS response headers Wildcards are recommended for broadest access, but request-based origins are still valid. https://iiif.io/api/image/3.0/#71-cors https://www.w3.org/TR/cors/ --- iiif_validator/tests/cors.py | 5 +++-- iiif_validator/validator.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/iiif_validator/tests/cors.py b/iiif_validator/tests/cors.py index d8547f4..dd6cada 100644 --- a/iiif_validator/tests/cors.py +++ b/iiif_validator/tests/cors.py @@ -1,4 +1,4 @@ -from .test import BaseTest +from .test import BaseTest, ValidatorError class Test_Cors(BaseTest): label = 'Cross Origin Headers' @@ -14,5 +14,6 @@ def run(self, result): if k.lower() == 'access-control-allow-origin': cors = v break - self.validationInfo.check('CORS', cors, '*', result, 'Failed to get correct CORS header.') + if cors != '*' and cors != result.get_request_origin(): + raise ValidatorError('cors', cors, '*', result, 'Failed to match Access-Control-Allow-Origin response header') return result diff --git a/iiif_validator/validator.py b/iiif_validator/validator.py index 6b1d6c3..4208412 100644 --- a/iiif_validator/validator.py +++ b/iiif_validator/validator.py @@ -255,9 +255,12 @@ def get_uri_for_rel(self, links, rel): return uri return None + def get_request_origin(self): + return "http://iiif.io" + def fetch(self, url): # Make it look like a real browser request - HEADERS = {"Origin": "http://iiif.io", + HEADERS = {"Origin": self.get_request_origin(), "Referer": "http://iiif.io/api/image/validator", "User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3pre) Gecko/20081130 Minefield/3.1b3pre"} req = Request(url, headers=HEADERS) From bdbc85bb72831e12b20694dd764c4522dd761743 Mon Sep 17 00:00:00 2001 From: Danny Berger Date: Sat, 14 Oct 2023 14:29:40 -0600 Subject: [PATCH 3/5] Fix Link header parser for URLs with semicolons The `>` character is a more canonical delimiter for the end of a URI, and fixes the original issue of failed parses of URIs containing a valid semicolon. The original use of semicolon and backtracking it seems intentional, but not sure what the original intent or test case might have been. --- iiif_validator/validator.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/iiif_validator/validator.py b/iiif_validator/validator.py index 4208412..9ea07f0 100644 --- a/iiif_validator/validator.py +++ b/iiif_validator/validator.py @@ -178,12 +178,10 @@ def parse_links(self, header): elif state == "uri": uri = [] d = data.pop(0) - while d != ";": + while d != ">": uri.append(d) d = data.pop(0) uri = ''.join(uri) - uri = uri[:-1] - data.insert(0, ';') # Not an error to have the same URI multiple times (I think!) if (uri not in links): links[uri] = {} From 48b9bb5a7628c96f5fd5254687c2df719c98391d Mon Sep 17 00:00:00 2001 From: Danny Berger Date: Sat, 14 Oct 2023 14:29:40 -0600 Subject: [PATCH 4/5] Fix WEBP detection for byte strings Python3 changed behavior of strings vs bytes such that a WEBP string constant is not considered equivalent to thel bytes from reading. Change summarized in: https://blog.feabhas.com/2019/02/python-3-unicode-and-byte-strings/ --- iiif_validator/tests/format_webp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iiif_validator/tests/format_webp.py b/iiif_validator/tests/format_webp.py index 9950e04..4319ae0 100644 --- a/iiif_validator/tests/format_webp.py +++ b/iiif_validator/tests/format_webp.py @@ -25,7 +25,7 @@ def run(self, result): raise ValidatorError('format', 'http response code: {}'.format(error.code), url, result, 'Failed to retrieve webp, got response code {}'.format(error.code)) img = wh.read() wh.close() - if img[8:12] != "WEBP": + if img[8:12] != b'WEBP': raise ValidatorError('format', 'unknown', 'WEBP', result) else: result.tests.append('format') From 870860464072920092c4e52df5608116d94e744b Mon Sep 17 00:00:00 2001 From: Danny Berger Date: Sat, 14 Oct 2023 14:29:40 -0600 Subject: [PATCH 5/5] Fix JP2 format detection This appears to have been a confusion between filemagic and python-magic, but not sure where the regression would have been introduced. It was erroring with: exception: __enter__ Which was a suppression of the original error: AttributeError: 'Magic' object has no attribute 'id_buffer' --- iiif_validator/tests/format_jp2.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/iiif_validator/tests/format_jp2.py b/iiif_validator/tests/format_jp2.py index ba5bc12..4833206 100644 --- a/iiif_validator/tests/format_jp2.py +++ b/iiif_validator/tests/format_jp2.py @@ -30,12 +30,10 @@ def run(self, result): if wh.getcode() != 200: raise ValidatorError('format', 'http response code: {}'.format(wh.getcode()), url, result, 'Failed to retrieve jp2, got response code {}'.format(wh.getcode())) - with magic.Magic() as m: - print ('test') - info = m.id_buffer(img) - if not info.startswith('JPEG 2000'): - # Not JP2 - raise ValidatorError('format', info, 'JPEG 2000', result) - else: - result.tests.append('format') - return result + info = magic.from_buffer(img[:1024]) + if not info.startswith('JPEG 2000'): + # Not JP2 + raise ValidatorError('format', info, 'JPEG 2000', result) + else: + result.tests.append('format') + return result