Reference-LAPACK
diff --git a/‎BLAS/docs/TOLERANCES.md‎
Lines changed: 26 additions & 60 deletions b/‎BLAS/docs/TOLERANCES.md‎
Lines changed: 26 additions & 60 deletions
diff --git a/‎BLAS/test/test_caxpy.f90‎
Lines changed: 18 additions & 16 deletions b/‎BLAS/test/test_caxpy.f90‎
Lines changed: 18 additions & 16 deletions
diff --git a/‎BLAS/test/test_caxpy_reverse.f90‎
Lines changed: 3 additions & 3 deletions b/‎BLAS/test/test_caxpy_reverse.f90‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎BLAS/test/test_caxpy_vector_forward.f90‎
Lines changed: 3 additions & 3 deletions b/‎BLAS/test/test_caxpy_vector_forward.f90‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎BLAS/test/test_caxpy_vector_reverse.f90‎
Lines changed: 3 additions & 3 deletions b/‎BLAS/test/test_caxpy_vector_reverse.f90‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎BLAS/test/test_ccopy.f90‎
Lines changed: 4 additions & 3 deletions b/‎BLAS/test/test_ccopy.f90‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎BLAS/test/test_ccopy_reverse.f90‎
Lines changed: 3 additions & 3 deletions b/‎BLAS/test/test_ccopy_reverse.f90‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎BLAS/test/test_ccopy_vector_forward.f90‎
Lines changed: 3 additions & 3 deletions b/‎BLAS/test/test_ccopy_vector_forward.f90‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎BLAS/test/test_ccopy_vector_reverse.f90‎
Lines changed: 3 additions & 3 deletions b/‎BLAS/test/test_ccopy_vector_reverse.f90‎
Lines changed: 3 additions & 3 deletions
@@ -1,79 +1,45 @@
 # Differentiation test tolerances
 
-Tolerances and step sizes used for finite-difference checks in BLAS differentiation tests (scalar/vector, forward/reverse). All modes use the same precision-based scheme unless a mixed-precision override applies.
+Tolerances and step sizes for finite-difference derivative checks in the BLAS differentiation test generator.
 
 ---
 
-## Base tolerances by precision
+## Defaults
 
-| Family | Description           | rtol    | atol    |
-|--------|-----------------------|---------|---------|
-| S      | single real (`S*`)    | 2.0e-3  | 2.0e-3  |
-| C      | single complex (`C*`) | 1.0e-3  | 1.0e-3  |
-| D      | double real (`D*`)    | 1.0e-5  | 1.0e-5  |
-| Z      | double complex (`Z*`) | 1.0e-5  | 1.0e-5  |
+### rtol/atol by precision family
 
-These values are used in:
+| Family | Meaning | rtol | atol |
+|--------|---------|------|------|
+| S      | `S*` (single real) | 2.0e-3 | 2.0e-3 |
+| C      | `C*` (single complex) | 1.0e-3 | 1.0e-3 |
+| D      | `D*` (double real) | 1.0e-5 | 1.0e-5 |
+| Z      | `Z*` (double complex) | 1.0e-5 | 1.0e-5 |
 
-- Scalar forward
-- Scalar reverse
-- Vector forward
-- Vector reverse
+### step size h by precision family
 
----
-
-## Step size (h)
-
-For non–mixed-precision functions:
-
-| Precision   | h        |
-|------------|----------|
-| S*, C*     | 1.0e-3   |
-| D*, Z*     | 1.0e-7   |
-
-(≈ 10·√ε for double precision.)
+| Family | h |
+|--------|---|
+| S, C   | 1.0e-3 |
+| D, Z   | 1.0e-7 |
 
 ---
 
-## Mixed-precision override
-
-For routines whose **output is double precision** but whose **first differentiable input** is **single precision** (e.g. `DSDOT`), the generator uses single-precision–style settings so the finite-difference check matches the conditioning of the inputs:
+## Overrides
 
-- **h** = 1.0e-3  
-- **rtol** = 2.0e-3  
-- **atol** = 2.0e-3  
+### Mixed-precision D* (single-precision first differentiable input)
 
-This override is applied in:
+Applies when the routine behaves like “double output, but first differentiable input is single precision” (e.g. `DSDOT` with **SX** first; the generator also treats **SY** and **SB** as single-precision inputs for `D*`).
 
-- Scalar reverse
-- Vector forward
-- Vector reverse  
+- **Scalar forward**: override **h = 1.0e-3** (rtol/atol remain `D*` base = 1.0e-5)
+- **Scalar reverse / vector forward / vector reverse**: override **h = 1.0e-3**, **rtol = atol = 2.0e-3**
 
-Detection: `precision_type == real(8)` and the first entry in the `inputs` list has `get_param_precision(first_input, func_name, param_types) == "real(4)"`. In the generator, `get_param_precision` returns `real(4)` for **D\*** functions when the parameter is one of **SX**, **SY**, **SB**.
-
----
-
-## Mixed-precision tests (list)
-
-A test is treated as mixed-precision if it is for a **D\*** (or **Z\***) routine and the **first differentiable input** is single precision. The generator explicitly treats **SX**, **SY**, and **SB** as single precision for **D\*** routines.
-
-**Routines that use the mixed-precision override** (when present in the suite and documented with that input order):
-
-| Routine | First input(s) | Modes using override        |
-|---------|----------------|-----------------------------|
-| **DSDOT** | SX (then SY) | Scalar reverse, vector forward, vector reverse |
-
-**Note:** Any other **D\*** routine whose first `\param[in]` is **SX**, **SY**, or **SB** will also get the override. There is no **Z\*** branch for single-precision inputs in `get_param_precision`, so currently only **D\*** routines can be mixed-precision in this sense. If you add a **D\*** (or in future **Z\***) routine with a single-precision first input, it will automatically receive the same h and tolerances as above.
-
----
+### Relaxed C* tolerance in vector reverse
 
-## Summary table (all modes)
+Only for **single-precision complex** (`C*`) **vector reverse** tests:
 
-| Mode             | S* / C* (h) | D* / Z* (h) | Mixed-precision (h, rtol, atol)      |
-|------------------|-------------|-------------|---------------------------------------|
-| Scalar forward   | 1e-3 / 2e-3 or 1e-3 | 1e-7 / 1e-5 | h = 1e-3 only (rtol/atol stay 1e-5)   |
-| Scalar reverse   | 1e-3 / 2e-3 or 1e-3 | 1e-7 / 1e-5 | 1e-3, 2e-3, 2e-3                      |
-| Vector forward   | 1e-3 / 2e-3 or 1e-3 | 1e-7 / 1e-5 | 1e-3, 2e-3, 2e-3                      |
-| Vector reverse   | 1e-3 / 2e-3 or 1e-3 | 1e-7 / 1e-5 | 1e-3, 2e-3, 2e-3                      |
+| Routine family (examples) | rtol/atol |
+|---------------------------|-----------|
+| DOT (e.g. `CDOTC`)        | 2.5e-2 |
+| BLAS3 (e.g. `CGEMM`, `CSYMM`, `CHEMM`) | 1.0e-2 |
 
-(Base tolerances for S/C/D/Z are as in the first table; mixed-precision replaces h and rtol/atol only where indicated. In scalar forward, mixed-precision only changes the step size h to 1e-3; rtol/atol remain 1e-5.)
+All other `C*` modes use the base tolerance (1.0e-3). `Z*` does not use relaxed tolerances.
@@ -11,17 +11,17 @@ program test_caxpy
 
   integer :: n_test
   integer :: seed_array(33)
-  integer :: test_sizes(1)
+  integer :: test_sizes(3)
   integer :: i
   logical :: passed, all_passed
 
   seed_array = 42
   call random_seed(put=seed_array)
 
-  test_sizes = (/ 4 /)
+  test_sizes = (/ 4, 10, 25 /)
   write(*,*) 'Testing CAXPY (multi-size: n = 4)'
   all_passed = .true.
-  do i = 1, 1
+  do i = 1, 3
     n_test = test_sizes(i)
     call run_test_for_size(n_test, passed)
     all_passed = all_passed .and. passed
@@ -48,13 +48,13 @@ subroutine run_test_for_size(n, passed)
 
     ! Derivative variables
     complex(4), dimension(n) :: cx_d
-    complex(4), dimension(n) :: cy_d
     complex(4) :: ca_d
+    complex(4), dimension(n) :: cy_d
 
     ! Array restoration and derivative storage
     complex(4), dimension(n) :: cx_orig, cx_d_orig
-    complex(4), dimension(n) :: cy_orig, cy_d_orig
     complex(4) :: ca_orig, ca_d_orig
+    complex(4), dimension(n) :: cy_orig, cy_d_orig
     real(4) :: temp_re, temp_im  ! For complex random init
     integer :: i, j
 
@@ -82,42 +82,44 @@ subroutine run_test_for_size(n, passed)
       call random_number(temp_im)
       cx_d(i) = cmplx(temp_re * 2.0 - 1.0, temp_im * 2.0 - 1.0, kind=4)
     end do
+    call random_number(temp_re)
+    call random_number(temp_im)
+    ca_d = cmplx(temp_re * 2.0 - 1.0, temp_im * 2.0 - 1.0, kind=4)
     do i = 1, n
       call random_number(temp_re)
       call random_number(temp_im)
       cy_d(i) = cmplx(temp_re * 2.0 - 1.0, temp_im * 2.0 - 1.0, kind=4)
     end do
-    call random_number(temp_re)
-    call random_number(temp_im)
-    ca_d = cmplx(temp_re * 2.0 - 1.0, temp_im * 2.0 - 1.0, kind=4)
 
     ! Store _orig and _d_orig
     cx_d_orig = cx_d
-    cy_d_orig = cy_d
     ca_d_orig = ca_d
+    cy_d_orig = cy_d
     cx_orig = cx
-    cy_orig = cy
     ca_orig = ca
+    cy_orig = cy
 
     write(*,*) 'Testing CAXPY (n =', n, ')'
     cy_orig = cy
 
     ! Call the differentiated function
     call caxpy_d(nsize, ca, ca_d, cx, cx_d, 1, cy, cy_d, 1)
+    cx_d = cx_d_orig
+    ca_d = ca_d_orig
 
     write(*,*) 'Function calls completed successfully'
 
     ! Numerical differentiation check
-    call check_derivatives_numerically(n, nsize, cx_orig, cy_orig, ca_orig, cx_d_orig, cy_d_orig, ca_d_orig, cy_d, passed)
+    call check_derivatives_numerically(n, nsize, cy_orig, cx_orig, ca_orig, cy_d_orig, cx_d_orig, ca_d_orig, cy_d, passed)
 
   end subroutine run_test_for_size
 
-  subroutine check_derivatives_numerically(n, nsize, cx_orig, cy_orig, ca_orig, cx_d_orig, cy_d_orig, ca_d_orig, cy_d, passed)
+  subroutine check_derivatives_numerically(n, nsize, cy_orig, cx_orig, ca_orig, cy_d_orig, cx_d_orig, ca_d_orig, cy_d, passed)
     implicit none
     integer, intent(in) :: n
     integer, intent(in) :: nsize
-    complex(4), intent(in) :: cx_orig(n), cx_d_orig(n)
     complex(4), intent(in) :: cy_orig(n), cy_d_orig(n)
+    complex(4), intent(in) :: cx_orig(n), cx_d_orig(n)
     complex(4), intent(in) :: ca_orig, ca_d_orig
     complex(4), intent(in) :: cy_d(n)
     logical, intent(out) :: passed
@@ -129,8 +131,8 @@ subroutine check_derivatives_numerically(n, nsize, cx_orig, cy_orig, ca_orig, cx
     logical :: has_large_errors
     complex(4), dimension(n) :: cy_forward, cy_backward
     integer :: i, j
-    complex(4), dimension(n) :: cx
     complex(4), dimension(n) :: cy
+    complex(4), dimension(n) :: cx
     complex(4) :: ca
 
     max_error = 0.0e0
@@ -140,15 +142,15 @@ subroutine check_derivatives_numerically(n, nsize, cx_orig, cy_orig, ca_orig, cx
     write(*,*) 'Step size h =', h
 
     ! Forward perturbation: f(x + h)
-    cx = cx_orig + h * cx_d_orig
     cy = cy_orig + h * cy_d_orig
+    cx = cx_orig + h * cx_d_orig
     ca = ca_orig + h * ca_d_orig
     call caxpy(nsize, ca, cx, 1, cy, 1)
     cy_forward = cy
 
     ! Backward perturbation: f(x - h)
-    cx = cx_orig - h * cx_d_orig
     cy = cy_orig - h * cy_d_orig
+    cx = cx_orig - h * cx_d_orig
     ca = ca_orig - h * ca_d_orig
     call caxpy(nsize, ca, cx, 1, cy, 1)
     cy_backward = cy
 
@@ -11,17 +11,17 @@ program test_caxpy_reverse
 
   integer :: n_test
   integer :: seed_array(33)
-  integer :: test_sizes(1)
+  integer :: test_sizes(3)
   integer :: i
   logical :: passed, all_passed
 
   seed_array = 42
   call random_seed(put=seed_array)
 
-  test_sizes = (/ 4 /)
+  test_sizes = (/ 4, 10, 25 /)
   write(*,*) 'Testing CAXPY (multi-size: n = 4)'
   all_passed = .true.
-  do i = 1, 1
+  do i = 1, 3
     n_test = test_sizes(i)
     call run_test_for_size(n_test, passed)
     all_passed = all_passed .and. passed
 
@@ -12,17 +12,17 @@ program test_caxpy_vector_forward
   integer :: nbdirs
   integer :: n_test
   integer :: seed_array(33)
-  integer :: test_sizes(1)
+  integer :: test_sizes(3)
   integer :: i
   logical :: passed, all_passed
 
   seed_array = 42
   call random_seed(put=seed_array)
 
-  test_sizes = (/ 4 /)
+  test_sizes = (/ 4, 10, 25 /)
   write(*,*) 'Testing CAXPY (Vector Forward, multi-size: n = 4)'
   all_passed = .true.
-  do i = 1, 1
+  do i = 1, 3
     n_test = test_sizes(i)
     nbdirs = test_sizes(i)
     call run_test_for_size(n_test, passed, nbdirs)
 
@@ -12,17 +12,17 @@ program test_caxpy_vector_reverse
   integer :: nbdirs
   integer :: n_test
   integer :: seed_array(33)
-  integer :: test_sizes(1)
+  integer :: test_sizes(3)
   integer :: i
   logical :: passed, all_passed
 
   seed_array = 42
   call random_seed(put=seed_array)
 
-  test_sizes = (/ 4 /)
+  test_sizes = (/ 4, 10, 25 /)
   write(*,*) 'Testing CAXPY (Vector Reverse, multi-size: n =', test_sizes(1), ')'
   all_passed = .true.
-  do i = 1, 1
+  do i = 1, 3
     n_test = test_sizes(i)
     nbdirs = test_sizes(i)
     call run_test_for_size(n_test, passed, nbdirs)
 
@@ -11,17 +11,17 @@ program test_ccopy
 
   integer :: n_test
   integer :: seed_array(33)
-  integer :: test_sizes(1)
+  integer :: test_sizes(3)
   integer :: i
   logical :: passed, all_passed
 
   seed_array = 42
   call random_seed(put=seed_array)
 
-  test_sizes = (/ 4 /)
+  test_sizes = (/ 4, 10, 25 /)
   write(*,*) 'Testing CCOPY (multi-size: n = 4)'
   all_passed = .true.
-  do i = 1, 1
+  do i = 1, 3
     n_test = test_sizes(i)
     call run_test_for_size(n_test, passed)
     all_passed = all_passed .and. passed
@@ -96,6 +96,7 @@ subroutine run_test_for_size(n, passed)
 
     ! Call the differentiated function
     call ccopy_d(nsize, cx, cx_d, 1, cy, cy_d, 1)
+    cx_d = cx_d_orig
 
     ! Reset ISIZE globals to uninitialized (-1)
     call set_ISIZE1OFCy(-1)
 
@@ -11,17 +11,17 @@ program test_ccopy_reverse
 
   integer :: n_test
   integer :: seed_array(33)
-  integer :: test_sizes(1)
+  integer :: test_sizes(3)
   integer :: i
   logical :: passed, all_passed
 
   seed_array = 42
   call random_seed(put=seed_array)
 
-  test_sizes = (/ 4 /)
+  test_sizes = (/ 4, 10, 25 /)
   write(*,*) 'Testing CCOPY (multi-size: n = 4)'
   all_passed = .true.
-  do i = 1, 1
+  do i = 1, 3
     n_test = test_sizes(i)
     call run_test_for_size(n_test, passed)
     all_passed = all_passed .and. passed
 
@@ -12,17 +12,17 @@ program test_ccopy_vector_forward
   integer :: nbdirs
   integer :: n_test
   integer :: seed_array(33)
-  integer :: test_sizes(1)
+  integer :: test_sizes(3)
   integer :: i
   logical :: passed, all_passed
 
   seed_array = 42
   call random_seed(put=seed_array)
 
-  test_sizes = (/ 4 /)
+  test_sizes = (/ 4, 10, 25 /)
   write(*,*) 'Testing CCOPY (Vector Forward, multi-size: n = 4)'
   all_passed = .true.
-  do i = 1, 1
+  do i = 1, 3
     n_test = test_sizes(i)
     nbdirs = test_sizes(i)
     call run_test_for_size(n_test, passed, nbdirs)
 
@@ -12,17 +12,17 @@ program test_ccopy_vector_reverse
   integer :: nbdirs
   integer :: n_test
   integer :: seed_array(33)
-  integer :: test_sizes(1)
+  integer :: test_sizes(3)
   integer :: i
   logical :: passed, all_passed
 
   seed_array = 42
   call random_seed(put=seed_array)
 
-  test_sizes = (/ 4 /)
+  test_sizes = (/ 4, 10, 25 /)
   write(*,*) 'Testing CCOPY (Vector Reverse, multi-size: n =', test_sizes(1), ')'
   all_passed = .true.
-  do i = 1, 1
+  do i = 1, 3
     n_test = test_sizes(i)
     nbdirs = test_sizes(i)
     call run_test_for_size(n_test, passed, nbdirs)