From 36716cbae5b2da2cf88ff6de98897a5ef0b6f6d6 Mon Sep 17 00:00:00 2001
From: Danny Berger <dpb587@gmail.com>
Date: Sat, 14 Oct 2023 14:29:40 -0600
Subject: [PATCH 1/5] Remove path from Origin header to match spec

https://www.rfc-editor.org/rfc/rfc6454#section-7
---
 iiif_validator/validator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/iiif_validator/validator.py b/iiif_validator/validator.py
index 37f7210..6b1d6c3 100644
--- a/iiif_validator/validator.py
+++ b/iiif_validator/validator.py
@@ -257,7 +257,7 @@ def get_uri_for_rel(self, links, rel):
 
     def fetch(self, url):
         # Make it look like a real browser request
-        HEADERS = {"Origin": "http://iiif.io/", 
+        HEADERS = {"Origin": "http://iiif.io",
             "Referer": "http://iiif.io/api/image/validator",
             "User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3pre) Gecko/20081130 Minefield/3.1b3pre"}
         req = Request(url, headers=HEADERS)

From 2e42990bec944596acbcfec4f67b341c566e9110 Mon Sep 17 00:00:00 2001
From: Danny Berger <dpb587@gmail.com>
Date: Sat, 14 Oct 2023 14:29:40 -0600
Subject: [PATCH 2/5] Support non-wildcard CORS response headers

Wildcards are recommended for broadest access, but request-based origins
are still valid.

https://iiif.io/api/image/3.0/#71-cors
https://www.w3.org/TR/cors/
---
 iiif_validator/tests/cors.py | 5 +++--
 iiif_validator/validator.py  | 5 ++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/iiif_validator/tests/cors.py b/iiif_validator/tests/cors.py
index d8547f4..dd6cada 100644
--- a/iiif_validator/tests/cors.py
+++ b/iiif_validator/tests/cors.py
@@ -1,4 +1,4 @@
-from .test import BaseTest
+from .test import BaseTest, ValidatorError
 
 class Test_Cors(BaseTest):
     label = 'Cross Origin Headers'
@@ -14,5 +14,6 @@ def run(self, result):
             if k.lower() == 'access-control-allow-origin':
                 cors = v
                 break
-        self.validationInfo.check('CORS', cors, '*', result, 'Failed to get correct CORS header.')
+        if cors != '*' and cors != result.get_request_origin():
+            raise ValidatorError('cors', cors, '*', result, 'Failed to match Access-Control-Allow-Origin response header')
         return result
diff --git a/iiif_validator/validator.py b/iiif_validator/validator.py
index 6b1d6c3..4208412 100644
--- a/iiif_validator/validator.py
+++ b/iiif_validator/validator.py
@@ -255,9 +255,12 @@ def get_uri_for_rel(self, links, rel):
                 return uri
         return None
 
+    def get_request_origin(self):
+        return "http://iiif.io"
+
     def fetch(self, url):
         # Make it look like a real browser request
-        HEADERS = {"Origin": "http://iiif.io",
+        HEADERS = {"Origin": self.get_request_origin(),
             "Referer": "http://iiif.io/api/image/validator",
             "User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3pre) Gecko/20081130 Minefield/3.1b3pre"}
         req = Request(url, headers=HEADERS)

From bdbc85bb72831e12b20694dd764c4522dd761743 Mon Sep 17 00:00:00 2001
From: Danny Berger <dpb587@gmail.com>
Date: Sat, 14 Oct 2023 14:29:40 -0600
Subject: [PATCH 3/5] Fix Link header parser for URLs with semicolons

The `>` character is a more canonical delimiter for the end of a URI, and
fixes the original issue of failed parses of URIs containing a valid
semicolon.

The original use of semicolon and backtracking it seems intentional, but
not sure what the original intent or test case might have been.
---
 iiif_validator/validator.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/iiif_validator/validator.py b/iiif_validator/validator.py
index 4208412..9ea07f0 100644
--- a/iiif_validator/validator.py
+++ b/iiif_validator/validator.py
@@ -178,12 +178,10 @@ def parse_links(self, header):
             elif state == "uri":
                 uri = []
                 d = data.pop(0)                
-                while d != ";":
+                while d != ">":
                     uri.append(d)
                     d = data.pop(0)
                 uri = ''.join(uri)
-                uri = uri[:-1]
-                data.insert(0, ';')
                 # Not an error to have the same URI multiple times (I think!)
                 if (uri not in links):
                     links[uri] = {}

From 48b9bb5a7628c96f5fd5254687c2df719c98391d Mon Sep 17 00:00:00 2001
From: Danny Berger <dpb587@gmail.com>
Date: Sat, 14 Oct 2023 14:29:40 -0600
Subject: [PATCH 4/5] Fix WEBP detection for byte strings

Python3 changed behavior of strings vs bytes such that a WEBP string
constant is not considered equivalent to thel bytes from reading.

Change summarized in: https://blog.feabhas.com/2019/02/python-3-unicode-and-byte-strings/
---
 iiif_validator/tests/format_webp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/iiif_validator/tests/format_webp.py b/iiif_validator/tests/format_webp.py
index 9950e04..4319ae0 100644
--- a/iiif_validator/tests/format_webp.py
+++ b/iiif_validator/tests/format_webp.py
@@ -25,7 +25,7 @@ def run(self, result):
             raise ValidatorError('format', 'http response code: {}'.format(error.code), url, result, 'Failed to retrieve webp, got response code {}'.format(error.code))
         img = wh.read()
         wh.close()
-        if img[8:12] != "WEBP":
+        if img[8:12] != b'WEBP':
             raise ValidatorError('format', 'unknown', 'WEBP', result)
         else:
             result.tests.append('format')

From 870860464072920092c4e52df5608116d94e744b Mon Sep 17 00:00:00 2001
From: Danny Berger <dpb587@gmail.com>
Date: Sat, 14 Oct 2023 14:29:40 -0600
Subject: [PATCH 5/5] Fix JP2 format detection

This appears to have been a confusion between filemagic and python-magic,
but not sure where the regression would have been introduced. It was erroring
with:

    exception: __enter__

Which was a suppression of the original error:

    AttributeError: 'Magic' object has no attribute 'id_buffer'
---
 iiif_validator/tests/format_jp2.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/iiif_validator/tests/format_jp2.py b/iiif_validator/tests/format_jp2.py
index ba5bc12..4833206 100644
--- a/iiif_validator/tests/format_jp2.py
+++ b/iiif_validator/tests/format_jp2.py
@@ -30,12 +30,10 @@ def run(self, result):
         if wh.getcode() != 200:
             raise ValidatorError('format', 'http response code: {}'.format(wh.getcode()), url, result, 'Failed to retrieve jp2, got response code {}'.format(wh.getcode()))
 
-        with magic.Magic() as m:
-            print ('test')
-            info = m.id_buffer(img)
-            if not info.startswith('JPEG 2000'):
-                # Not JP2
-                raise ValidatorError('format', info, 'JPEG 2000', result)
-            else:
-                result.tests.append('format')
-                return result
+        info = magic.from_buffer(img[:1024])
+        if not info.startswith('JPEG 2000'):
+            # Not JP2
+            raise ValidatorError('format', info, 'JPEG 2000', result)
+        else:
+            result.tests.append('format')
+            return result