From db0c4d5202ee761cfd83e6027aaafac6e3c27bb2 Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Fri, 8 May 2026 14:49:15 +0530 Subject: [PATCH 1/6] Fix executemany RuntimeError when decimals change signs (GH-557) Root cause: In executemany(), Decimal values in the SMALLMONEY/MONEY range are bound as SQL_VARCHAR with column_size derived from a single sample value's formatted string length. The sample is chosen by _compute_column_type() based on precision/scale, not string length. When the sample is positive (e.g. '1.0' = 3 chars) but the batch contains a negative value (e.g. '-0.1' = 4 chars), the leading '-' makes it exceed the allocated buffer, causing the C++ layer to throw RuntimeError. Fix: After paraminfo is created for auto-detected types, scan all Decimal values in the column to find the true maximum formatted string length and adjust columnSize accordingly. This mirrors the existing pattern used for binary data sizing. Added test_executemany_decimal_sign_change covering: negative-then-positive, positive-then-negative, mixed sign batches, and data correctness verification. Closes #557 --- mssql_python/cursor.py | 14 ++++++++++++ tests/test_004_cursor.py | 48 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 05324875..537ca046 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -2322,6 +2322,20 @@ def executemany( # pylint: disable=too-many-locals,too-many-branches,too-many-s paraminfo.paramSQLType = ddbc_sql_const.SQL_VARCHAR.value paraminfo.columnSize = 1 + # Special handling for Decimal columns sent as SQL_VARCHAR (GH-557) + # The column_size was computed from a single sample value, but + # negative signs can make other rows' formatted strings longer. + # Scan all rows to find the true maximum formatted length. + if paraminfo.paramSQLType == ddbc_sql_const.SQL_VARCHAR.value: + max_decimal_size = paraminfo.columnSize + for row in seq_of_parameters: + value = row[col_index] + if value is not None and isinstance(value, decimal.Decimal): + formatted_len = len(format(value, "f")) + if formatted_len > max_decimal_size: + max_decimal_size = formatted_len + paraminfo.columnSize = max_decimal_size + # Special handling for binary data in auto-detected types if paraminfo.paramSQLType in ( ddbc_sql_const.SQL_BINARY.value, diff --git a/tests/test_004_cursor.py b/tests/test_004_cursor.py index 17e06961..214964e6 100644 --- a/tests/test_004_cursor.py +++ b/tests/test_004_cursor.py @@ -2304,6 +2304,54 @@ def test_executemany_Decimal_list(cursor, db_connection): db_connection.commit() +def test_executemany_decimal_sign_change(cursor, db_connection): + """Test executemany with decimals that change signs (GH-557). + + When the sample value chosen for column sizing is shorter than a negative + value in the batch, the formatted string (with a leading '-') can exceed + the allocated column_size, causing a RuntimeError. + """ + try: + cursor.execute("CREATE TABLE #pytest_decimal_sign (col_1 DECIMAL(28, 14))") + + # Case 1: negative first, then positive — previously worked + data1 = [(decimal.Decimal("-0.1"),), (decimal.Decimal("1.0"),)] + cursor.executemany("INSERT INTO #pytest_decimal_sign VALUES (?)", data1) + + # Case 2: positive first, then negative — previously failed + data2 = [(decimal.Decimal("0.1"),), (decimal.Decimal("-0.1"),)] + cursor.executemany("INSERT INTO #pytest_decimal_sign VALUES (?)", data2) + + # Case 3: positive then negative with different integer parts + data3 = [(decimal.Decimal("1.0"),), (decimal.Decimal("-0.1"),)] + cursor.executemany("INSERT INTO #pytest_decimal_sign VALUES (?)", data3) + + # Case 4: multiple sign changes in a single batch + data4 = [ + (decimal.Decimal("100.5"),), + (decimal.Decimal("-0.001"),), + (decimal.Decimal("0.5"),), + (decimal.Decimal("-999.99"),), + ] + cursor.executemany("INSERT INTO #pytest_decimal_sign VALUES (?)", data4) + + db_connection.commit() + + # Verify row count + cursor.execute("SELECT COUNT(*) FROM #pytest_decimal_sign") + count = cursor.fetchone()[0] + assert count == 10 + + # Verify data correctness for the originally-failing case + cursor.execute("SELECT col_1 FROM #pytest_decimal_sign ORDER BY col_1") + rows = [row[0] for row in cursor.fetchall()] + assert decimal.Decimal("-999.99") in [r.quantize(decimal.Decimal("0.01")) for r in rows] + assert decimal.Decimal("0.1") in [r.quantize(decimal.Decimal("0.1")) for r in rows] + finally: + cursor.execute("DROP TABLE IF EXISTS #pytest_decimal_sign") + db_connection.commit() + + def test_executemany_DecimalString_list(cursor, db_connection): """Test executemany with an string of decimal parameter list.""" try: From 813c06d9403283f9695e21f096e4fce6d6f8b044 Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Mon, 11 May 2026 09:57:49 +0530 Subject: [PATCH 2/6] Optimize GH-557 fix: guard decimal scan with column check, reuse materialized column list --- mssql_python/cursor.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 537ca046..27a02f05 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -2322,19 +2322,24 @@ def executemany( # pylint: disable=too-many-locals,too-many-branches,too-many-s paraminfo.paramSQLType = ddbc_sql_const.SQL_VARCHAR.value paraminfo.columnSize = 1 - # Special handling for Decimal columns sent as SQL_VARCHAR (GH-557) - # The column_size was computed from a single sample value, but - # negative signs can make other rows' formatted strings longer. - # Scan all rows to find the true maximum formatted length. - if paraminfo.paramSQLType == ddbc_sql_const.SQL_VARCHAR.value: - max_decimal_size = paraminfo.columnSize - for row in seq_of_parameters: - value = row[col_index] - if value is not None and isinstance(value, decimal.Decimal): - formatted_len = len(format(value, "f")) - if formatted_len > max_decimal_size: - max_decimal_size = formatted_len - paraminfo.columnSize = max_decimal_size + # Only scan when the already-materialized column actually contains + # Decimal values inferred as SQL_VARCHAR, and reuse that column data + # to avoid re-iterating the whole batch (GH-557). + if ( + paraminfo.paramSQLType == ddbc_sql_const.SQL_VARCHAR.value + and any( + value is not None and isinstance(value, decimal.Decimal) + for value in column + ) + ): + paraminfo.columnSize = max( + paraminfo.columnSize, + max( + len(format(value, "f")) + for value in column + if value is not None and isinstance(value, decimal.Decimal) + ), + ) # Special handling for binary data in auto-detected types if paraminfo.paramSQLType in ( From 2712bf25b739d87b89cbf767a90a2db148523540 Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Mon, 11 May 2026 09:58:10 +0530 Subject: [PATCH 3/6] Optimize GH-557 fix: guard decimal scan with column check, reuse materialized column list --- mssql_python/cursor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 27a02f05..6d3d6ec6 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -2324,7 +2324,7 @@ def executemany( # pylint: disable=too-many-locals,too-many-branches,too-many-s # Only scan when the already-materialized column actually contains # Decimal values inferred as SQL_VARCHAR, and reuse that column data - # to avoid re-iterating the whole batch (GH-557). + # to avoid re-iterating the whole batch. if ( paraminfo.paramSQLType == ddbc_sql_const.SQL_VARCHAR.value and any( From 74feb1cab3007b42409baace393a2c87f5dd2442 Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Mon, 11 May 2026 09:58:43 +0530 Subject: [PATCH 4/6] Optimize GH-557 fix: guard decimal scan with column check, reuse materialized column list --- mssql_python/cursor.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 6d3d6ec6..57c92d0f 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -2325,12 +2325,8 @@ def executemany( # pylint: disable=too-many-locals,too-many-branches,too-many-s # Only scan when the already-materialized column actually contains # Decimal values inferred as SQL_VARCHAR, and reuse that column data # to avoid re-iterating the whole batch. - if ( - paraminfo.paramSQLType == ddbc_sql_const.SQL_VARCHAR.value - and any( - value is not None and isinstance(value, decimal.Decimal) - for value in column - ) + if paraminfo.paramSQLType == ddbc_sql_const.SQL_VARCHAR.value and any( + value is not None and isinstance(value, decimal.Decimal) for value in column ): paraminfo.columnSize = max( paraminfo.columnSize, From 77bd7ffd8b50c47c588fc1b9dc62fcf7eb02b15c Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Mon, 11 May 2026 12:10:56 +0530 Subject: [PATCH 5/6] Eliminate double scan: compute max decimal formatted len inside _compute_column_type --- mssql_python/cursor.py | 44 +++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 57c92d0f..4887da22 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -2054,19 +2054,29 @@ def _compute_column_type(self, column): sample_value: Representative value for type inference and modified_row. min_val: Minimum for integers (None otherwise). max_val: Maximum for integers (None otherwise). + max_decimal_formatted_len: Maximum len(format(d, 'f')) across all + Decimal values in the column (0 when no Decimals are present). + Used by executemany to correct the SQL_VARCHAR column size when + the sample value's formatted string is shorter than another + value's (e.g. positive sample vs negative row value) (GH-557). """ non_nulls = [v for v in column if v is not None] if not non_nulls: - return None, None, None + return None, None, None, 0 int_values = [v for v in non_nulls if isinstance(v, int)] if int_values: min_val, max_val = min(int_values), max(int_values) sample_value = max(int_values, key=abs) - return sample_value, min_val, max_val + return sample_value, min_val, max_val, 0 sample_value = None + max_decimal_formatted_len = 0 for v in non_nulls: + if isinstance(v, decimal.Decimal): + max_decimal_formatted_len = max( + max_decimal_formatted_len, len(format(v, "f")) + ) if not sample_value: sample_value = v elif isinstance(v, (str, bytes, bytearray)) and isinstance( @@ -2120,7 +2130,7 @@ def _compute_column_type(self, column): # If comparing Decimal to non-Decimal, prefer Decimal for better type inference sample_value = v - return sample_value, None, None + return sample_value, None, None, max_decimal_formatted_len def executemany( # pylint: disable=too-many-locals,too-many-branches,too-many-statements self, operation: str, seq_of_parameters: Union[List[Sequence[Any]], List[Mapping[str, Any]]] @@ -2225,7 +2235,7 @@ def executemany( # pylint: disable=too-many-locals,too-many-branches,too-many-s if hasattr(seq_of_parameters, "__getitem__") else [] ) - sample_value, min_val, max_val = self._compute_column_type(column) + sample_value, min_val, max_val, _ = self._compute_column_type(column) if self._inputsizes and col_index < len(self._inputsizes): # Use explicitly set input sizes @@ -2301,7 +2311,9 @@ def executemany( # pylint: disable=too-many-locals,too-many-branches,too-many-s if hasattr(seq_of_parameters, "__getitem__") else [] ) - sample_value, min_val, max_val = self._compute_column_type(column) + sample_value, min_val, max_val, max_decimal_len = self._compute_column_type( + column + ) dummy_row = list(sample_row) paraminfo = self._create_parameter_types_list( @@ -2322,20 +2334,16 @@ def executemany( # pylint: disable=too-many-locals,too-many-branches,too-many-s paraminfo.paramSQLType = ddbc_sql_const.SQL_VARCHAR.value paraminfo.columnSize = 1 - # Only scan when the already-materialized column actually contains - # Decimal values inferred as SQL_VARCHAR, and reuse that column data - # to avoid re-iterating the whole batch. - if paraminfo.paramSQLType == ddbc_sql_const.SQL_VARCHAR.value and any( - value is not None and isinstance(value, decimal.Decimal) for value in column + # Correct column size for Decimal columns sent as SQL_VARCHAR (GH-557). + # The sample value's formatted string may be shorter than another + # row's (e.g. positive sample "1.0" = 3 chars vs negative "-0.1" = 4). + # max_decimal_len was already computed during _compute_column_type + # so no extra iteration is needed. + if ( + paraminfo.paramSQLType == ddbc_sql_const.SQL_VARCHAR.value + and max_decimal_len > paraminfo.columnSize ): - paraminfo.columnSize = max( - paraminfo.columnSize, - max( - len(format(value, "f")) - for value in column - if value is not None and isinstance(value, decimal.Decimal) - ), - ) + paraminfo.columnSize = max_decimal_len # Special handling for binary data in auto-detected types if paraminfo.paramSQLType in ( From e58e17e1ada2d3042ef782a94ddc4f43f763217a Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Mon, 11 May 2026 13:57:10 +0530 Subject: [PATCH 6/6] Performing linting --- mssql_python/cursor.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 4887da22..5cec03bd 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -2074,9 +2074,7 @@ def _compute_column_type(self, column): max_decimal_formatted_len = 0 for v in non_nulls: if isinstance(v, decimal.Decimal): - max_decimal_formatted_len = max( - max_decimal_formatted_len, len(format(v, "f")) - ) + max_decimal_formatted_len = max(max_decimal_formatted_len, len(format(v, "f"))) if not sample_value: sample_value = v elif isinstance(v, (str, bytes, bytearray)) and isinstance( @@ -2311,9 +2309,7 @@ def executemany( # pylint: disable=too-many-locals,too-many-branches,too-many-s if hasattr(seq_of_parameters, "__getitem__") else [] ) - sample_value, min_val, max_val, max_decimal_len = self._compute_column_type( - column - ) + sample_value, min_val, max_val, max_decimal_len = self._compute_column_type(column) dummy_row = list(sample_row) paraminfo = self._create_parameter_types_list(