Skip to content

Commit ef2f3c4

Browse files
Merge branch 'main' into docs/llm-usage-guide
2 parents c8f3c60 + 7fe5431 commit ef2f3c4

12 files changed

Lines changed: 667 additions & 868 deletions

main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@
1515
print(f"Database ID: {row[0]}, Name: {row[1]}")
1616

1717
cursor.close()
18-
conn.close()
18+
conn.close()

mssql_python/cursor.py

Lines changed: 185 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import uuid
1616
import datetime
1717
import warnings
18-
from typing import List, Union, Any, Optional, Tuple, Sequence, TYPE_CHECKING
18+
from typing import List, Union, Any, Optional, Tuple, Sequence, TYPE_CHECKING, Iterable
1919
from mssql_python.constants import ConstantsDDBC as ddbc_sql_const, SQLTypes
2020
from mssql_python.helpers import check_error
2121
from mssql_python.logging import logger
@@ -2451,6 +2451,190 @@ def nextset(self) -> Union[bool, None]:
24512451
)
24522452
return True
24532453

2454+
def _bulkcopy(
2455+
self, table_name: str, data: Iterable[Union[Tuple, List]], **kwargs
2456+
): # pragma: no cover
2457+
"""
2458+
Perform bulk copy operation for high-performance data loading.
2459+
2460+
Args:
2461+
table_name: Target table name (can include schema, e.g., 'dbo.MyTable').
2462+
The table must exist and the user must have INSERT permissions.
2463+
2464+
data: Iterable of tuples or lists containing row data to be inserted.
2465+
2466+
Data Format Requirements:
2467+
- Each element in the iterable represents one row
2468+
- Each row should be a tuple or list of column values
2469+
- Column order must match the target table's column order (by ordinal
2470+
position), unless column_mappings is specified
2471+
- The number of values in each row must match the number of columns
2472+
in the target table
2473+
2474+
**kwargs: Additional bulk copy options.
2475+
2476+
column_mappings (List[Tuple[int, str]], optional):
2477+
Maps source data column indices to target table column names.
2478+
Each tuple is (source_index, target_column_name) where:
2479+
- source_index: 0-based index of the column in the source data
2480+
- target_column_name: Name of the target column in the database table
2481+
2482+
When omitted: Columns are mapped by ordinal position (first data
2483+
column → first table column, second → second, etc.)
2484+
2485+
When specified: Only the mapped columns are inserted; unmapped
2486+
source columns are ignored, and unmapped target columns must
2487+
have default values or allow NULL.
2488+
2489+
Returns:
2490+
Dictionary with bulk copy results including:
2491+
- rows_copied: Number of rows successfully copied
2492+
- batch_count: Number of batches processed
2493+
- elapsed_time: Time taken for the operation
2494+
2495+
Raises:
2496+
ImportError: If mssql_py_core library is not installed
2497+
TypeError: If data is None, not iterable, or is a string/bytes
2498+
ValueError: If table_name is empty or parameters are invalid
2499+
RuntimeError: If connection string is not available
2500+
"""
2501+
try:
2502+
import mssql_py_core
2503+
except ImportError as exc:
2504+
raise ImportError(
2505+
"Bulk copy requires the mssql_py_core library which is not installed. "
2506+
"To install, run: pip install mssql_py_core "
2507+
) from exc
2508+
2509+
# Validate inputs
2510+
if not table_name or not isinstance(table_name, str):
2511+
raise ValueError("table_name must be a non-empty string")
2512+
2513+
# Validate that data is iterable (but not a string or bytes, which are technically iterable)
2514+
if data is None:
2515+
raise TypeError("data must be an iterable of tuples or lists, got None")
2516+
if isinstance(data, (str, bytes)):
2517+
raise TypeError(
2518+
f"data must be an iterable of tuples or lists, got {type(data).__name__}. "
2519+
"Strings and bytes are not valid row collections."
2520+
)
2521+
if not hasattr(data, "__iter__"):
2522+
raise TypeError(
2523+
f"data must be an iterable of tuples or lists, got non-iterable {type(data).__name__}"
2524+
)
2525+
2526+
# Extract and validate kwargs with defaults
2527+
batch_size = kwargs.get("batch_size", None)
2528+
timeout = kwargs.get("timeout", 30)
2529+
2530+
# Validate batch_size type and value (only if explicitly provided)
2531+
if batch_size is not None:
2532+
if not isinstance(batch_size, (int, float)):
2533+
raise TypeError(
2534+
f"batch_size must be a positive integer, got {type(batch_size).__name__}"
2535+
)
2536+
if batch_size <= 0:
2537+
raise ValueError(f"batch_size must be positive, got {batch_size}")
2538+
2539+
# Validate timeout type and value
2540+
if not isinstance(timeout, (int, float)):
2541+
raise TypeError(f"timeout must be a positive number, got {type(timeout).__name__}")
2542+
if timeout <= 0:
2543+
raise ValueError(f"timeout must be positive, got {timeout}")
2544+
2545+
# Get and parse connection string
2546+
if not hasattr(self.connection, "connection_str"):
2547+
raise RuntimeError("Connection string not available for bulk copy")
2548+
2549+
# Use the proper connection string parser that handles braced values
2550+
from mssql_python.connection_string_parser import _ConnectionStringParser
2551+
2552+
parser = _ConnectionStringParser(validate_keywords=False)
2553+
params = parser._parse(self.connection.connection_str)
2554+
2555+
if not params.get("server"):
2556+
raise ValueError("SERVER parameter is required in connection string")
2557+
2558+
if not params.get("database"):
2559+
raise ValueError(
2560+
"DATABASE parameter is required in connection string for bulk copy. "
2561+
"Specify the target database explicitly to avoid accidentally writing to system databases."
2562+
)
2563+
2564+
# Build connection context for bulk copy library
2565+
# Note: Password is extracted separately to avoid storing it in the main context
2566+
# dict that could be accidentally logged or exposed in error messages.
2567+
trust_cert = params.get("trustservercertificate", "yes").lower() in ("yes", "true")
2568+
2569+
# Parse encryption setting from connection string
2570+
encrypt_param = params.get("encrypt")
2571+
if encrypt_param is not None:
2572+
encrypt_value = encrypt_param.strip().lower()
2573+
if encrypt_value in ("yes", "true", "mandatory", "required"):
2574+
encryption = "Required"
2575+
elif encrypt_value in ("no", "false", "optional"):
2576+
encryption = "Optional"
2577+
else:
2578+
# Pass through unrecognized values (e.g., "Strict") to the underlying driver
2579+
encryption = encrypt_param
2580+
else:
2581+
encryption = "Optional"
2582+
2583+
context = {
2584+
"server": params.get("server"),
2585+
"database": params.get("database"),
2586+
"user_name": params.get("uid", ""),
2587+
"trust_server_certificate": trust_cert,
2588+
"encryption": encryption,
2589+
}
2590+
2591+
# Extract password separately to avoid storing it in generic context that may be logged
2592+
password = params.get("pwd", "")
2593+
pycore_context = dict(context)
2594+
pycore_context["password"] = password
2595+
2596+
pycore_connection = None
2597+
pycore_cursor = None
2598+
try:
2599+
pycore_connection = mssql_py_core.PyCoreConnection(pycore_context)
2600+
pycore_cursor = pycore_connection.cursor()
2601+
2602+
result = pycore_cursor.bulkcopy(table_name, iter(data), **kwargs)
2603+
2604+
return result
2605+
2606+
except Exception as e:
2607+
# Log the error for debugging (without exposing credentials)
2608+
logger.debug(
2609+
"Bulk copy operation failed for table '%s': %s: %s",
2610+
table_name,
2611+
type(e).__name__,
2612+
str(e),
2613+
)
2614+
# Re-raise without exposing connection context in the error chain
2615+
# to prevent credential leakage in stack traces
2616+
raise type(e)(str(e)) from None
2617+
2618+
finally:
2619+
# Clear sensitive data to minimize memory exposure
2620+
password = ""
2621+
if pycore_context:
2622+
pycore_context["password"] = ""
2623+
pycore_context["user_name"] = ""
2624+
# Clean up bulk copy resources
2625+
for resource in (pycore_cursor, pycore_connection):
2626+
if resource and hasattr(resource, "close"):
2627+
try:
2628+
resource.close()
2629+
except Exception as cleanup_error:
2630+
# Log cleanup errors at debug level to aid troubleshooting
2631+
# without masking the original exception
2632+
logger.debug(
2633+
"Failed to close bulk copy resource %s: %s",
2634+
type(resource).__name__,
2635+
cleanup_error,
2636+
)
2637+
24542638
def __enter__(self):
24552639
"""
24562640
Enter the runtime context for the cursor.

mssql_python/logging.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import atexit
1818
from typing import Optional
1919

20-
2120
# Single DEBUG level - all or nothing philosophy
2221
# If you need logging, you need to see everything
2322
DEBUG = logging.DEBUG # 10

mssql_python/pybind/ddbc_bindings.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2892,7 +2892,7 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
28922892
SQLHSTMT hStmt = StatementHandle->get();
28932893

28942894
// Cache decimal separator to avoid repeated system calls
2895-
std::string decimalSeparator = GetDecimalSeparator();
2895+
28962896

28972897
for (SQLSMALLINT i = 1; i <= colCount; ++i) {
28982898
SQLWCHAR columnName[256];
@@ -3615,7 +3615,7 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
36153615
columnInfos[col].processedColumnSize + 1; // +1 for null terminator
36163616
}
36173617

3618-
std::string decimalSeparator = GetDecimalSeparator(); // Cache decimal separator
3618+
36193619

36203620
// Performance: Build function pointer dispatch table (once per batch)
36213621
// This eliminates the switch statement from the hot loop - 10,000 rows × 10

tests/test_001_globals.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -388,8 +388,7 @@ def test_decimal_separator_with_db_operations(db_connection):
388388
try:
389389
# Create a test table with decimal values
390390
cursor = db_connection.cursor()
391-
cursor.execute(
392-
"""
391+
cursor.execute("""
393392
DROP TABLE IF EXISTS #decimal_separator_test;
394393
CREATE TABLE #decimal_separator_test (
395394
id INT,
@@ -400,8 +399,7 @@ def test_decimal_separator_with_db_operations(db_connection):
400399
(2, 678.90),
401400
(3, 0.01),
402401
(4, 999.99);
403-
"""
404-
)
402+
""")
405403
cursor.close()
406404

407405
# Test 1: Fetch with default separator
@@ -469,8 +467,7 @@ def test_decimal_separator_batch_operations(db_connection):
469467
try:
470468
# Create test data
471469
cursor = db_connection.cursor()
472-
cursor.execute(
473-
"""
470+
cursor.execute("""
474471
DROP TABLE IF EXISTS #decimal_batch_test;
475472
CREATE TABLE #decimal_batch_test (
476473
id INT,
@@ -481,8 +478,7 @@ def test_decimal_separator_batch_operations(db_connection):
481478
(1, 123.456, 12345.67890),
482479
(2, 0.001, 0.00001),
483480
(3, 999.999, 9999.99999);
484-
"""
485-
)
481+
""")
486482
cursor.close()
487483

488484
# Test 1: Fetch results with default separator

tests/test_003_connection.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -992,16 +992,14 @@ def test_execute_with_large_parameters(db_connection, conn_str):
992992
pytest.skip("Skipping for Azure SQL - large parameter tests may cause timeouts")
993993

994994
# Test with a temporary table for large data
995-
cursor = db_connection.execute(
996-
"""
995+
cursor = db_connection.execute("""
997996
DROP TABLE IF EXISTS #large_params_test;
998997
CREATE TABLE #large_params_test (
999998
id INT,
1000999
large_text NVARCHAR(MAX),
10011000
large_binary VARBINARY(MAX)
10021001
)
1003-
"""
1004-
)
1002+
""")
10051003
cursor.close()
10061004

10071005
try:
@@ -2126,12 +2124,10 @@ def test_timeout_long_query(db_connection):
21262124
while retry_count < max_retries:
21272125
start_time = time.perf_counter()
21282126
try:
2129-
cursor.execute(
2130-
"""
2127+
cursor.execute("""
21312128
SELECT COUNT(*) FROM sys.objects a, sys.objects b, sys.objects c
21322129
WHERE a.object_id = b.object_id * c.object_id
2133-
"""
2134-
)
2130+
""")
21352131
cursor.fetchall()
21362132
elapsed_time = time.perf_counter() - start_time
21372133
break # Success, exit retry loop

0 commit comments

Comments
 (0)