From 80a08185b85b66b37d6c25a65da268851cb5d429 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Thu, 30 Oct 2025 11:05:24 +0100 Subject: [PATCH] Bug 1997236 - Don't try to re-parse JSON in util.taskcluster.get_artifact The previous code was only checking if the returned response from the taskcluster API was a dict to verify if it had been parsed or not. That works well until the JSON file we're getting contains something like a list in which case we skip the first condition, end up with a parsed response in `if path.endswith(".json")` and boom `AttributeError: 'list' object has no attribute 'json'`. Since we already rely on the response type being `requests.Request`, I changed the logic to be: - If we receive a dict, it might be either the parsed artifact content, or taskcluster giving us `{"response": requests.Response}` because it was unable to parse it. - In the case where it's taskcluster's (detected by checking if "response" is in the dict and if it has the right type), peel it off, response is now the `Response` object and we can read from it. - At this point we have 2 choices again, either response is a `request.Response` that we have to read, or anything else. If it is anything else then it's already been parsed by taskcluster so we just return it as is. - Continue with the previous logic, try to parse the file based on the extension. I left the JSON case in there even though we should not be hitting it because it will raise on invalid JSON, which was the case previously too. Taskcluster masks JSON failures by returning the response object instead. --- src/taskgraph/util/taskcluster.py | 20 +++++++++++--------- test/test_util_taskcluster.py | 11 +++++++++++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/taskgraph/util/taskcluster.py b/src/taskgraph/util/taskcluster.py index a8e6e2e6f..cec8a8f0e 100644 --- a/src/taskgraph/util/taskcluster.py +++ b/src/taskgraph/util/taskcluster.py @@ -73,15 +73,17 @@ def get_taskcluster_client(service: str): def _handle_artifact( path: str, response: Union[requests.Response, dict[str, Any]] ) -> Any: - if isinstance(response, dict): - # When taskcluster client returns non-JSON responses, it wraps them in {"response": } - if "response" in response and isinstance( - response["response"], requests.Response - ): - response = response["response"] - else: - # If we already a dict (parsed JSON), return it directly. - return response + # When taskcluster client returns non-JSON responses, it wraps them in {"response": } + if ( + isinstance(response, dict) + and "response" in response + and isinstance(response["response"], requests.Response) + ): + response = response["response"] + + if not isinstance(response, requests.Response): + # At this point, if we don't have a response object, it's already parsed, return it + return response # We have a response object, load the content based on the path extension. if path.endswith(".json"): diff --git a/test/test_util_taskcluster.py b/test/test_util_taskcluster.py index 6f98a0df4..d6e97aaa2 100644 --- a/test/test_util_taskcluster.py +++ b/test/test_util_taskcluster.py @@ -124,6 +124,17 @@ def test_get_artifact(responses, root_url): result = tc.get_artifact(tid, "artifact.json") assert result == {"foo": "bar"} + # Test JSON artifact that isn't a dict (bug 1997236) + responses.get("http://foo.bar/artifact.json", json=[1, 2, 3]) + responses.get( + f"{root_url}/api/queue/v1/task/{tid}/artifacts/artifact.json", + body=b'{"type": "s3", "url": "http://foo.bar/artifact.json"}', + status=303, + headers={"Location": "http://foo.bar/artifact.json"}, + ) + result = tc.get_artifact(tid, "artifact.json") + assert result == [1, 2, 3] + # Test YAML artifact expected_result = {"foo": b"\xe2\x81\x83".decode()} responses.get(