Skip to content

Commit 2c3472d

Browse files
committed
1 parent 9a0fcc1 commit 2c3472d

4 files changed

Lines changed: 86 additions & 6 deletions

File tree

sp_api/api/reports/reports.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,9 @@ def get_report_document(
378378
with client.stream("GET", res.payload.get("url")) as document_response:
379379
if not character_code:
380380
character_code = resolve_character_code(
381-
document_response.encoding, fallback="iso-8859-1"
381+
document_response.encoding,
382+
fallback="iso-8859-1",
383+
response_charset_encoding=getattr(document_response, "charset_encoding", None),
382384
)
383385
stream_to_file_sync(
384386
document_response,
@@ -390,7 +392,9 @@ def get_report_document(
390392
document_response = client.get(res.payload.get("url"))
391393
if not character_code:
392394
character_code = resolve_character_code(
393-
document_response.encoding, fallback="iso-8859-1"
395+
document_response.encoding,
396+
fallback="iso-8859-1",
397+
response_charset_encoding=getattr(document_response, "charset_encoding", None),
394398
)
395399
document = decompress_bytes(
396400
document_response.content, compression_algorithm

sp_api/asyncio/api/reports/reports.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,9 @@ async def get_report_document(
373373
) as document_response:
374374
if not character_code:
375375
character_code = resolve_character_code(
376-
document_response.encoding, fallback="iso-8859-1"
376+
document_response.encoding,
377+
fallback="iso-8859-1",
378+
response_charset_encoding=getattr(document_response, "charset_encoding", None),
377379
)
378380
await stream_to_file_async(
379381
document_response,
@@ -389,7 +391,9 @@ async def get_report_document(
389391
)
390392
if not character_code:
391393
character_code = resolve_character_code(
392-
document_response.encoding, fallback="iso-8859-1"
394+
document_response.encoding,
395+
fallback="iso-8859-1",
396+
response_charset_encoding=getattr(document_response, "charset_encoding", None),
393397
)
394398
document = decompress_bytes(
395399
document_response.content, compression_algorithm

sp_api/util/report_document.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,24 @@
33
from io import BytesIO, StringIO
44

55

6-
def resolve_character_code(response_encoding, fallback="iso-8859-1"):
7-
character_code = response_encoding or fallback
6+
def resolve_character_code(
7+
response_encoding=None,
8+
fallback="iso-8859-1",
9+
response_charset_encoding=None,
10+
):
11+
"""Resolve report text encoding while preserving pre-httpx behavior.
12+
13+
requests exposed ``None`` when a response had no charset, and the reports
14+
client then fell back to iso-8859-1. httpx exposes ``utf-8`` by default in
15+
the same scenario, so we treat implicit utf-8 as "missing" and keep the
16+
historical fallback.
17+
"""
18+
if response_charset_encoding:
19+
character_code = response_charset_encoding
20+
elif response_encoding and response_encoding.lower() != "utf-8":
21+
character_code = response_encoding
22+
else:
23+
character_code = fallback
824
if character_code and character_code.lower() == "windows-31j":
925
character_code = "cp932"
1026
return character_code
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from sp_api.util.report_document import resolve_character_code
2+
3+
4+
def test_resolve_character_code_uses_charset_encoding_when_available():
5+
assert (
6+
resolve_character_code(
7+
response_encoding="utf-8",
8+
response_charset_encoding="iso-8859-1",
9+
fallback="iso-8859-1",
10+
)
11+
== "iso-8859-1"
12+
)
13+
14+
15+
def test_resolve_character_code_falls_back_from_default_utf8_without_charset():
16+
assert (
17+
resolve_character_code(
18+
response_encoding="utf-8",
19+
response_charset_encoding=None,
20+
fallback="iso-8859-1",
21+
)
22+
== "iso-8859-1"
23+
)
24+
25+
26+
def test_resolve_character_code_falls_back_when_encoding_is_missing():
27+
assert (
28+
resolve_character_code(
29+
response_encoding=None,
30+
response_charset_encoding=None,
31+
fallback="iso-8859-1",
32+
)
33+
== "iso-8859-1"
34+
)
35+
36+
37+
def test_resolve_character_code_keeps_non_utf8_encoding():
38+
assert (
39+
resolve_character_code(
40+
response_encoding="windows-31j",
41+
response_charset_encoding=None,
42+
fallback="iso-8859-1",
43+
)
44+
== "cp932"
45+
)
46+
47+
48+
def test_resolve_character_code_keeps_explicit_utf8_charset():
49+
assert (
50+
resolve_character_code(
51+
response_encoding="utf-8",
52+
response_charset_encoding="utf-8",
53+
fallback="iso-8859-1",
54+
)
55+
== "utf-8"
56+
)

0 commit comments

Comments
 (0)