-
Notifications
You must be signed in to change notification settings - Fork 98
Description
- What versions are you using?
Give your Oracle Database version, e.g.:
Oracle Database Version: 19.29.0.0.0
Give your Oracle Client version (if you are using Thick mode):
Also run Python and show the output of:
platform.platform: Linux-6.1.102-111.182.amzn2023.x86_64-x86_64-with-glibc2.34
sys.maxsize > 2**32: True
platform.python_version: 3.13.1
oracledb.version: 3.4.2
- Is it an error or a hang or a crash?
Data corruption - no error is raised, but data is stored incorrectly. - What error(s) or behavior you are seeing?
Issue A: CLOB columns store data as UTF-16 instead of database character set
When inserting the string "Danielle" (8 characters) into a CLOB column:
Expected: LENGTH() = 8, DUMP() = 68,97,110,105,101,108,108,101
Actual: LENGTH() = 16, DUMP() = 0,68,0,97,0,110,0,105,0,101,0,108,0,108,0,101
Null bytes are interleaved, doubling the string length.
Issue B: VARCHAR2 columns store raw UTF-8 bytes instead of converting to database character set
When inserting the string "¿Hola?" into a VARCHAR2 column:
Expected: ¿ stored as single byte 191 (WE8MSWIN1252 encoding)
Actual: ¿ stored as two bytes 194,191 (raw UTF-8 bytes)
- Does your application call init_oracle_client()?
No
- Include a runnable Python script that shows the problem.
import oracledb
import sys
import platform
# Connect to a WE8MSWIN1252 database in thin mode
conn = oracledb.connect(user="", password="", dsn="")
print(f"Oracle Database Version: {conn.version}")
print("platform.platform:", platform.platform())
print("sys.maxsize > 2**32:", sys.maxsize > 2**32)
print("platform.python_version:", platform.python_version())
print("oracledb.__version__:", oracledb.__version__)
oracledb.defaults.fetch_lobs = False
cursor = conn.cursor()
# Verify database character set
cursor.execute("SELECT value FROM NLS_DATABASE_PARAMETERS WHERE parameter = 'NLS_CHARACTERSET'")
print(f"NLS_CHARACTERSET: {cursor.fetchone()[0]}")
# ============================================================
# TEST 1: CLOB UTF-16 encoding bug
# ============================================================
print("\n" + "="*60)
print("TEST 1: CLOB with direct_path_load")
print("="*60)
try:
cursor.execute("DROP TABLE test_clob_bug PURGE")
except:
pass
cursor.execute("CREATE TABLE test_clob_bug (id NUMBER, name CLOB)")
data = [
(1, "Danielle"),
(2, "Angel"),
(3, "Joshua")
]
conn.direct_path_load(
schema_name="ADMIN",
table_name="TEST_CLOB_BUG",
column_names=["ID", "NAME"],
data=data
)
cursor.execute("""
SELECT id, name, LENGTH(name), DUMP(TO_CHAR(SUBSTR(name, 1, 10)))
FROM test_clob_bug
""")
print("\nResults (CLOB + direct_path_load):")
print("Expected: LENGTH=8 for 'Danielle', DUMP without null bytes (0s)")
print("-" * 40)
for row in cursor:
print(row)
# ============================================================
# TEST 2: VARCHAR2 special character encoding bug
# ============================================================
print("\n" + "="*60)
print("TEST 2: VARCHAR2 special characters with direct_path_load")
print("="*60)
try:
cursor.execute("DROP TABLE test_varchar2_bug PURGE")
except:
pass
cursor.execute("CREATE TABLE test_varchar2_bug (id NUMBER, name VARCHAR2(100))")
data_special = [
(1, "¿Hola?"),
(2, "Café"),
(3, "Niño"),
(4, "€100"),
(5, "Ñoño")
]
conn.direct_path_load(
schema_name="ADMIN",
table_name="TEST_VARCHAR2_BUG",
column_names=["ID", "NAME"],
data=data_special
)
cursor.execute("""
SELECT id, name, LENGTH(name), DUMP(name)
FROM test_varchar2_bug
ORDER BY id
""")
print("\nResults (VARCHAR2 + direct_path_load):")
print("Expected: ¿ should be DUMP 191 (single byte), not 194,191 (UTF-8 bytes)")
print("-" * 40)
for row in cursor:
print(row)
cursor.close()
conn.close()
Oracle Database Version: 19.29.0.0.0
platform.platform: Linux-6.1.102-111.182.amzn2023.x86_64-x86_64-with-glibc2.34
sys.maxsize > 2**32: True
platform.python_version: 3.13.1
oracledb.__version__: 3.4.2
NLS_CHARACTERSET: WE8MSWIN1252
============================================================
TEST 1: CLOB with direct_path_load
============================================================
Results (CLOB + direct_path_load):
Expected: LENGTH=8 for 'Danielle', DUMP without null bytes (0s)
----------------------------------------
(1, '\x00D\x00a\x00n\x00i\x00e\x00l\x00l\x00e', 16, 'Typ=1 Len=10: 0,68,0,97,0,110,0,105,0,101')
(2, '\x00A\x00n\x00g\x00e\x00l', 10, 'Typ=1 Len=10: 0,65,0,110,0,103,0,101,0,108')
(3, '\x00J\x00o\x00s\x00h\x00u\x00a', 12, 'Typ=1 Len=10: 0,74,0,111,0,115,0,104,0,117')
============================================================
TEST 2: VARCHAR2 special characters with direct_path_load
============================================================
Results (VARCHAR2 + direct_path_load):
Expected: ¿ should be DUMP 191 (single byte), not 194,191 (UTF-8 bytes)
----------------------------------------
(1, '¿Hola?', 7, 'Typ=1 Len=7: 194,191,72,111,108,97,63')
(2, 'Café', 5, 'Typ=1 Len=5: 67,97,102,195,169')
(3, 'Niño', 5, 'Typ=1 Len=5: 78,105,195,177,111')
(4, '€100', 6, 'Typ=1 Len=6: 226,130,172,49,48,48')
(5, 'Ñoño', 6, 'Typ=1 Len=6: 195,145,111,195,177,111')