Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 80 additions & 58 deletions mssql_python/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,14 @@ def __init__(self, connection: "Connection", timeout: int = 0) -> None:
self._conn_native_uuid = getattr(self.connection, "_native_uuid", None)
self._next_row_index = 0 # internal: index of the next row the driver will return (0-based)
self._has_result_set = False # Track if we have an active result set
# Cache decoding encoding strings — these don't change between fetches,
# so we avoid 2 method calls + 2 dict.get() per fetch call.
self._cached_char_encoding = self._get_decoding_settings(ddbc_sql_const.SQL_CHAR.value).get(
"encoding", "utf-8"
)
self._cached_wchar_encoding = self._get_decoding_settings(
ddbc_sql_const.SQL_WCHAR.value
).get("encoding", "utf-16le")
self._skip_increment_for_next_fetch = (
False # Track if we need to skip incrementing the row index
)
Expand All @@ -173,11 +181,7 @@ def _is_unicode_string(self, param: str) -> bool:
Returns:
True if the string contains non-ASCII characters, False otherwise.
"""
try:
param.encode("ascii")
return False # Can be encoded to ASCII, so not Unicode
except UnicodeEncodeError:
return True # Contains non-ASCII characters, so treat as Unicode
return not param.isascii()

def _parse_date(self, param: str) -> Optional[datetime.date]:
"""
Expand Down Expand Up @@ -895,45 +899,51 @@ def _reset_inputsizes(self) -> None:
"""Reset input sizes after execution"""
self._inputsizes = None

# Pre-built constant lookup table — avoids rebuilding ~30 entries on every call.
# Used by setinputsizes fallback path (PR #549 fast path doesn't need this).
_SQL_TO_C_TYPE = None

@classmethod
def _get_sql_to_c_type_map(cls):
if cls._SQL_TO_C_TYPE is None:
cls._SQL_TO_C_TYPE = {
ddbc_sql_const.SQL_CHAR.value: ddbc_sql_const.SQL_C_CHAR.value,
ddbc_sql_const.SQL_VARCHAR.value: ddbc_sql_const.SQL_C_CHAR.value,
ddbc_sql_const.SQL_LONGVARCHAR.value: ddbc_sql_const.SQL_C_CHAR.value,
ddbc_sql_const.SQL_WCHAR.value: ddbc_sql_const.SQL_C_WCHAR.value,
ddbc_sql_const.SQL_WVARCHAR.value: ddbc_sql_const.SQL_C_WCHAR.value,
ddbc_sql_const.SQL_WLONGVARCHAR.value: ddbc_sql_const.SQL_C_WCHAR.value,
ddbc_sql_const.SQL_DECIMAL.value: ddbc_sql_const.SQL_C_NUMERIC.value,
ddbc_sql_const.SQL_NUMERIC.value: ddbc_sql_const.SQL_C_NUMERIC.value,
ddbc_sql_const.SQL_BIT.value: ddbc_sql_const.SQL_C_BIT.value,
ddbc_sql_const.SQL_TINYINT.value: ddbc_sql_const.SQL_C_TINYINT.value,
ddbc_sql_const.SQL_SMALLINT.value: ddbc_sql_const.SQL_C_SHORT.value,
ddbc_sql_const.SQL_INTEGER.value: ddbc_sql_const.SQL_C_LONG.value,
ddbc_sql_const.SQL_BIGINT.value: ddbc_sql_const.SQL_C_SBIGINT.value,
ddbc_sql_const.SQL_REAL.value: ddbc_sql_const.SQL_C_FLOAT.value,
ddbc_sql_const.SQL_FLOAT.value: ddbc_sql_const.SQL_C_DOUBLE.value,
ddbc_sql_const.SQL_DOUBLE.value: ddbc_sql_const.SQL_C_DOUBLE.value,
ddbc_sql_const.SQL_BINARY.value: ddbc_sql_const.SQL_C_BINARY.value,
ddbc_sql_const.SQL_VARBINARY.value: ddbc_sql_const.SQL_C_BINARY.value,
ddbc_sql_const.SQL_LONGVARBINARY.value: ddbc_sql_const.SQL_C_BINARY.value,
ddbc_sql_const.SQL_SS_UDT.value: ddbc_sql_const.SQL_C_BINARY.value,
ddbc_sql_const.SQL_TYPE_DATE.value: ddbc_sql_const.SQL_C_TYPE_DATE.value,
ddbc_sql_const.SQL_TYPE_TIME.value: ddbc_sql_const.SQL_C_TYPE_TIME.value,
ddbc_sql_const.SQL_TYPE_TIMESTAMP.value: ddbc_sql_const.SQL_C_TYPE_TIMESTAMP.value,
ddbc_sql_const.SQL_SS_TIME2.value: ddbc_sql_const.SQL_C_TYPE_TIME.value,
ddbc_sql_const.SQL_DATETIMEOFFSET.value: ddbc_sql_const.SQL_C_SS_TIMESTAMPOFFSET.value,
ddbc_sql_const.SQL_DATE.value: ddbc_sql_const.SQL_C_TYPE_DATE.value,
ddbc_sql_const.SQL_TIME.value: ddbc_sql_const.SQL_C_TYPE_TIME.value,
ddbc_sql_const.SQL_TIMESTAMP.value: ddbc_sql_const.SQL_C_TYPE_TIMESTAMP.value,
ddbc_sql_const.SQL_GUID.value: ddbc_sql_const.SQL_C_GUID.value,
ddbc_sql_const.SQL_SS_XML.value: ddbc_sql_const.SQL_C_WCHAR.value,
ddbc_sql_const.SQL_SS_VARIANT.value: ddbc_sql_const.SQL_C_BINARY.value,
}
return cls._SQL_TO_C_TYPE

def _get_c_type_for_sql_type(self, sql_type: int) -> int:
"""Map SQL type to appropriate C type for parameter binding."""
sql_to_c_type = {
ddbc_sql_const.SQL_CHAR.value: ddbc_sql_const.SQL_C_CHAR.value,
ddbc_sql_const.SQL_VARCHAR.value: ddbc_sql_const.SQL_C_CHAR.value,
ddbc_sql_const.SQL_LONGVARCHAR.value: ddbc_sql_const.SQL_C_CHAR.value,
ddbc_sql_const.SQL_WCHAR.value: ddbc_sql_const.SQL_C_WCHAR.value,
ddbc_sql_const.SQL_WVARCHAR.value: ddbc_sql_const.SQL_C_WCHAR.value,
ddbc_sql_const.SQL_WLONGVARCHAR.value: ddbc_sql_const.SQL_C_WCHAR.value,
ddbc_sql_const.SQL_DECIMAL.value: ddbc_sql_const.SQL_C_NUMERIC.value,
ddbc_sql_const.SQL_NUMERIC.value: ddbc_sql_const.SQL_C_NUMERIC.value,
ddbc_sql_const.SQL_BIT.value: ddbc_sql_const.SQL_C_BIT.value,
ddbc_sql_const.SQL_TINYINT.value: ddbc_sql_const.SQL_C_TINYINT.value,
ddbc_sql_const.SQL_SMALLINT.value: ddbc_sql_const.SQL_C_SHORT.value,
ddbc_sql_const.SQL_INTEGER.value: ddbc_sql_const.SQL_C_LONG.value,
ddbc_sql_const.SQL_BIGINT.value: ddbc_sql_const.SQL_C_SBIGINT.value,
ddbc_sql_const.SQL_REAL.value: ddbc_sql_const.SQL_C_FLOAT.value,
ddbc_sql_const.SQL_FLOAT.value: ddbc_sql_const.SQL_C_DOUBLE.value,
ddbc_sql_const.SQL_DOUBLE.value: ddbc_sql_const.SQL_C_DOUBLE.value,
ddbc_sql_const.SQL_BINARY.value: ddbc_sql_const.SQL_C_BINARY.value,
ddbc_sql_const.SQL_VARBINARY.value: ddbc_sql_const.SQL_C_BINARY.value,
ddbc_sql_const.SQL_LONGVARBINARY.value: ddbc_sql_const.SQL_C_BINARY.value,
ddbc_sql_const.SQL_SS_UDT.value: ddbc_sql_const.SQL_C_BINARY.value,
# ODBC 3.x date/time types (reported by ODBC 18 driver)
ddbc_sql_const.SQL_TYPE_DATE.value: ddbc_sql_const.SQL_C_TYPE_DATE.value,
ddbc_sql_const.SQL_TYPE_TIME.value: ddbc_sql_const.SQL_C_TYPE_TIME.value,
ddbc_sql_const.SQL_TYPE_TIMESTAMP.value: ddbc_sql_const.SQL_C_TYPE_TIMESTAMP.value,
ddbc_sql_const.SQL_SS_TIME2.value: ddbc_sql_const.SQL_C_TYPE_TIME.value,
ddbc_sql_const.SQL_DATETIMEOFFSET.value: ddbc_sql_const.SQL_C_SS_TIMESTAMPOFFSET.value,
# ODBC 2.x aliases (accepted by setinputsizes via SQLTypes)
ddbc_sql_const.SQL_DATE.value: ddbc_sql_const.SQL_C_TYPE_DATE.value,
ddbc_sql_const.SQL_TIME.value: ddbc_sql_const.SQL_C_TYPE_TIME.value,
ddbc_sql_const.SQL_TIMESTAMP.value: ddbc_sql_const.SQL_C_TYPE_TIMESTAMP.value,
# Other types
ddbc_sql_const.SQL_GUID.value: ddbc_sql_const.SQL_C_GUID.value,
ddbc_sql_const.SQL_SS_XML.value: ddbc_sql_const.SQL_C_WCHAR.value,
ddbc_sql_const.SQL_SS_VARIANT.value: ddbc_sql_const.SQL_C_BINARY.value,
}
return sql_to_c_type.get(sql_type, ddbc_sql_const.SQL_C_DEFAULT.value)
return self._get_sql_to_c_type_map().get(sql_type, ddbc_sql_const.SQL_C_DEFAULT.value)

def _create_parameter_types_list( # pylint: disable=too-many-arguments,too-many-positional-arguments
self,
Expand Down Expand Up @@ -2453,26 +2463,27 @@ def fetchone(self) -> Union[None, Row]:
"""
self._check_closed() # Check if the cursor is closed

char_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_CHAR.value)
wchar_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_WCHAR.value)
# Use cached encoding strings — eliminates 2 method calls + 2 dict.get() per fetch
char_enc = self._cached_char_encoding
wchar_enc = self._cached_wchar_encoding

# Fetch raw data
row_data = []
try:
ret = ddbc_bindings.DDBCSQLFetchOne(
self.hstmt,
row_data,
char_decoding.get("encoding", "utf-8"),
wchar_decoding.get("encoding", "utf-16le"),
char_enc,
wchar_enc,
)

if self.hstmt:
# Only retrieve diag records on SQL_SUCCESS_WITH_INFO.
if ret == ddbc_sql_const.SQL_SUCCESS_WITH_INFO.value and self.hstmt:
self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))

if ret == ddbc_sql_const.SQL_NO_DATA.value:
# No more data available
if self._next_row_index == 0 and self.description is not None:
# This is an empty result set, set rowcount to 0
self.rowcount = 0
return None

Expand All @@ -2487,6 +2498,9 @@ def fetchone(self) -> Union[None, Row]:

# Get column and converter maps
column_map, converter_map = self._get_column_and_converter_maps()
# Fast path: skip __init__ overhead when no converters/UUID processing
if not converter_map and not self._uuid_str_indices:
return Row._fast_create(row_data, column_map, self)
return Row(
row_data,
column_map,
Expand Down Expand Up @@ -2518,8 +2532,9 @@ def fetchmany(self, size: Optional[int] = None) -> List[Row]:
if size <= 0:
return []

char_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_CHAR.value)
wchar_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_WCHAR.value)
# Use cached encoding strings
char_enc = self._cached_char_encoding
wchar_enc = self._cached_wchar_encoding

# Fetch raw data
rows_data = []
Expand All @@ -2528,11 +2543,11 @@ def fetchmany(self, size: Optional[int] = None) -> List[Row]:
self.hstmt,
rows_data,
size,
char_decoding.get("encoding", "utf-8"),
wchar_decoding.get("encoding", "utf-16le"),
char_enc,
wchar_enc,
)

if self.hstmt:
if ret == ddbc_sql_const.SQL_SUCCESS_WITH_INFO.value and self.hstmt:
self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))

# Update rownumber for the number of rows actually fetched
Expand All @@ -2552,6 +2567,9 @@ def fetchmany(self, size: Optional[int] = None) -> List[Row]:

# Convert raw data to Row objects
uuid_idx = self._uuid_str_indices
# Fast path: build Row objects in C++ — avoids Python loop overhead
if not converter_map and not uuid_idx:
return ddbc_bindings.construct_rows(rows_data, Row, column_map, self)
return [
Row(
row_data,
Expand All @@ -2577,23 +2595,24 @@ def fetchall(self) -> List[Row]:
if not self._has_result_set and self.description:
self._reset_rownumber()

char_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_CHAR.value)
wchar_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_WCHAR.value)
# Use cached encoding strings
char_enc = self._cached_char_encoding
wchar_enc = self._cached_wchar_encoding

# Fetch raw data
rows_data = []
try:
ret = ddbc_bindings.DDBCSQLFetchAll(
self.hstmt,
rows_data,
char_decoding.get("encoding", "utf-8"),
wchar_decoding.get("encoding", "utf-16le"),
char_enc,
wchar_enc,
)

# Check for errors
check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)

if self.hstmt:
if ret == ddbc_sql_const.SQL_SUCCESS_WITH_INFO.value and self.hstmt:
self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))

# Update rownumber for the number of rows actually fetched
Expand All @@ -2612,6 +2631,9 @@ def fetchall(self) -> List[Row]:

# Convert raw data to Row objects
uuid_idx = self._uuid_str_indices
# Fast path: build Row objects in C++ — avoids Python loop overhead
if not converter_map and not uuid_idx:
return ddbc_bindings.construct_rows(rows_data, Row, column_map, self)
return [
Row(
row_data,
Expand Down
56 changes: 56 additions & 0 deletions mssql_python/pybind/ddbc_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5850,6 +5850,56 @@ void DDBCSetDecimalSeparator(const std::string& separator) {
#endif

// Functions/data to be exposed to Python as a part of ddbc_bindings module
// ---------------------------------------------------------------------------
// construct_rows — Build Row objects entirely in C++.
//
// Replaces the Python list comprehension:
// [Row._fast_create(rd, column_map, cursor) for rd in rows_data]
//
// By doing tp_alloc + slot assignment in a tight C loop, this avoids:
// - Python bytecode dispatch (FOR_ITER, LOAD_FAST, CALL_FUNCTION)
// - Keyword argument processing overhead per Row
// - Python function call frame setup per iteration
//
// Requires Row to have __slots__ = ('_values', '_column_map', '_cursor').
// Semantically identical to _fast_create — no converter or UUID processing.
// ---------------------------------------------------------------------------
py::list construct_rows(const py::list& rows_data,
const py::object& row_class,
const py::object& column_map,
const py::object& cursor_obj) {
PyTypeObject* row_type = reinterpret_cast<PyTypeObject*>(row_class.ptr());
Py_ssize_t n = PyList_GET_SIZE(rows_data.ptr());

// Pre-intern slot name strings (cached by CPython after first call)
static PyObject* attr_values = PyUnicode_InternFromString("_values");
static PyObject* attr_column_map = PyUnicode_InternFromString("_column_map");
static PyObject* attr_cursor = PyUnicode_InternFromString("_cursor");

py::list result(n);

for (Py_ssize_t i = 0; i < n; ++i) {
// Allocate Row without calling __init__
PyObject* row = row_type->tp_alloc(row_type, 0);
if (!row) throw py::error_already_set();

PyObject* row_data = PyList_GET_ITEM(rows_data.ptr(), i);

// Set __slots__ via GenericSetAttr (uses descriptor offsets — fast path)
if (PyObject_GenericSetAttr(row, attr_values, row_data) < 0 ||
PyObject_GenericSetAttr(row, attr_column_map, column_map.ptr()) < 0 ||
PyObject_GenericSetAttr(row, attr_cursor, cursor_obj.ptr()) < 0) {
Py_DECREF(row);
throw py::error_already_set();
}

// PyList_SET_ITEM steals the reference — don't Py_DECREF row
PyList_SET_ITEM(result.ptr(), i, row);
}

return result;
}

PYBIND11_MODULE(ddbc_bindings, m) {
m.doc() = "msodbcsql driver api bindings for Python";

Expand Down Expand Up @@ -6007,6 +6057,12 @@ PYBIND11_MODULE(ddbc_bindings, m) {
// Add a version attribute
m.attr("__version__") = "1.0.0";

// Fast Row construction in C++ — replaces Python list comprehension
m.def("construct_rows", &construct_rows,
"Build Row objects in C++ for fetchall/fetchmany fast path",
py::arg("rows_data"), py::arg("row_class"),
py::arg("column_map"), py::arg("cursor"));

// Expose logger bridge function to Python
m.def("update_log_level", &mssql_python::logging::LoggerBridge::updateLevel,
"Update the cached log level in C++ bridge");
Expand Down
55 changes: 39 additions & 16 deletions mssql_python/row.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,24 @@ class Row:
print(row.column_name) # Access by column name (case sensitivity varies)
"""

# __slots__ eliminates per-instance __dict__ (~232 bytes/row savings),
# and makes attribute access ~30% faster (array index vs dict lookup).
__slots__ = ("_values", "_column_map", "_cursor")

@staticmethod
def _fast_create(values, column_map, cursor):
"""Construct a Row bypassing __init__ — for the common fast path.

Used by fetchall/fetchmany when no output converters and no UUID
stringification are needed (the vast majority of queries). Skips
the entire if/elif/else chain and keyword argument overhead in __init__.
"""
r = Row.__new__(Row)
r._values = values
r._column_map = column_map
r._cursor = cursor
return r

def __init__(self, values, column_map, cursor=None, converter_map=None, uuid_str_indices=None):
"""
Initialize a Row object with values and pre-built column map.
Expand All @@ -39,24 +57,29 @@ def __init__(self, values, column_map, cursor=None, converter_map=None, uuid_str
converted to str. Pre-computed once per result set when native_uuid=False.
None means no conversion (native_uuid=True, the default).
"""
# Apply output converters if available using pre-computed converter map
if converter_map:
self._values = self._apply_output_converters_optimized(values, converter_map)
elif (
cursor
and hasattr(cursor.connection, "_output_converters")
and cursor.connection._output_converters
):
# Fallback to original method for backward compatibility
self._values = self._apply_output_converters(values, cursor)
# Fast path: no converters and no UUID stringification (common case).
# Avoids the converter_map iteration and list copy entirely.
if not converter_map and not uuid_str_indices:
if (
cursor
and hasattr(cursor.connection, "_output_converters")
and cursor.connection._output_converters
):
# Fallback to original method for backward compatibility
self._values = self._apply_output_converters(values, cursor)
else:
# Zero-copy: just store the reference directly
self._values = values
else:
self._values = values
# Apply output converters if available using pre-computed converter map
if converter_map:
self._values = self._apply_output_converters_optimized(values, converter_map)
else:
self._values = values

# Convert UUID columns to str when native_uuid=False.
# uuid_str_indices is pre-computed once at execute() time, so this is
# O(num_uuid_columns) per row — zero cost when native_uuid=True (the default).
if uuid_str_indices:
self._stringify_uuids(uuid_str_indices)
# Convert UUID columns to str when native_uuid=False.
if uuid_str_indices:
self._stringify_uuids(uuid_str_indices)

self._column_map = column_map
self._cursor = cursor
Expand Down
Loading