3030! > The mathematical formulas used for C and S are
3131! >
3232! > sgn(x) = { x / |x|, x != 0
33- ! > { 1, x = 0
33+ ! > { 1, x = 0
3434! >
3535! > R = sgn(F) * sqrt(|F|**2 + |G|**2)
3636! >
3737! > C = |F| / sqrt(|F|**2 + |G|**2)
3838! >
3939! > S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2)
4040! >
41+ ! > Special conditions:
42+ ! > If G=0, then C=1 and S=0.
43+ ! > If F=0, then C=0 and S is chosen so that R is real.
44+ ! >
4145! > When F and G are real, the formulas simplify to C = F/R and
4246! > S = G/R, and the returned values of C, S, and R should be
43- ! > identical to those returned by CLARTG .
47+ ! > identical to those returned by SLARTG .
4448! >
4549! > The algorithm used to compute these quantities incorporates scaling
4650! > to avoid overflow or underflow in computing the square root of the
4751! > sum of squares.
4852! >
49- ! > This is a faster version of the BLAS1 routine CROTG, except for
50- ! > the following differences:
51- ! > F and G are unchanged on return.
52- ! > If G=0, then C=1 and S=0.
53- ! > If F=0, then C=0 and S is chosen so that R is real.
53+ ! > This is the same routine CROTG fom BLAS1, except that
54+ ! > F and G are unchanged on return.
5455! >
5556! > Below, wp=>sp stands for single precision from LA_CONSTANTS module.
5657! > \endverbatim
9192! Authors:
9293! ========
9394!
94- ! > \author Edward Anderson, Lockheed Martin
95+ ! > \author Weslley Pereira, University of Colorado Denver, USA
9596!
96- ! > \date August 2016
97+ ! > \date December 2021
9798!
9899! > \ingroup OTHERauxiliary
99100!
100- ! > \par Contributors:
101- ! ==================
102- ! >
103- ! > Weslley Pereira, University of Colorado Denver, USA
104- !
105101! > \par Further Details:
106102! =====================
107103! >
108104! > \verbatim
109105! >
106+ ! > Based on the algorithm from
107+ ! >
110108! > Anderson E. (2017)
111109! > Algorithm 978: Safe Scaling in the Level 1 BLAS
112110! > ACM Trans Math Softw 44:1--28
117115subroutine CLARTG ( f , g , c , s , r )
118116 use LA_CONSTANTS, &
119117 only: wp= >sp, zero= >szero, one= >sone, two= >stwo, czero, &
120- rtmin = >srtmin, rtmax = >srtmax, safmin= >ssafmin, safmax= >ssafmax
118+ safmin= >ssafmin, safmax= >ssafmax
121119!
122120! -- LAPACK auxiliary routine --
123121! -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -129,7 +127,7 @@ subroutine CLARTG( f, g, c, s, r )
129127 complex (wp) f, g, r, s
130128! ..
131129! .. Local Scalars ..
132- real (wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
130+ real (wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax
133131 complex (wp) :: fs, gs, t
134132! ..
135133! .. Intrinsic Functions ..
@@ -141,6 +139,9 @@ subroutine CLARTG( f, g, c, s, r )
141139! .. Statement Function definitions ..
142140 ABSSQ( t ) = real ( t )** 2 + aimag ( t )** 2
143141! ..
142+ ! .. Constants ..
143+ rtmin = sqrt ( safmin )
144+ ! ..
144145! .. Executable Statements ..
145146!
146147 if ( g == czero ) then
@@ -149,30 +150,43 @@ subroutine CLARTG( f, g, c, s, r )
149150 r = f
150151 else if ( f == czero ) then
151152 c = zero
152- g1 = max ( abs (real (g)), abs (aimag (g)) )
153- if ( g1 > rtmin .and. g1 < rtmax ) then
153+ if ( real (g) == zero ) then
154+ r = abs (aimag (g))
155+ s = conjg ( g ) / r
156+ elseif ( aimag (g) == zero ) then
157+ r = abs (real (g))
158+ s = conjg ( g ) / r
159+ else
160+ g1 = max ( abs (real (g)), abs (aimag (g)) )
161+ rtmax = sqrt ( safmax/ 2 )
162+ if ( g1 > rtmin .and. g1 < rtmax ) then
154163!
155164! Use unscaled algorithm
156165!
157- g2 = ABSSQ( g )
158- d = sqrt ( g2 )
159- s = conjg ( g ) / d
160- r = d
161- else
166+ ! The following two lines can be replaced by `d = abs( g )`.
167+ ! This algorithm do not use the intrinsic complex abs.
168+ g2 = ABSSQ( g )
169+ d = sqrt ( g2 )
170+ s = conjg ( g ) / d
171+ r = d
172+ else
162173!
163174! Use scaled algorithm
164175!
165- u = min ( safmax, max ( safmin, g1 ) )
166- uu = one / u
167- gs = g* uu
168- g2 = ABSSQ( gs )
169- d = sqrt ( g2 )
170- s = conjg ( gs ) / d
171- r = d* u
176+ u = min ( safmax, max ( safmin, g1 ) )
177+ gs = g / u
178+ ! The following two lines can be replaced by `d = abs( gs )`.
179+ ! This algorithm do not use the intrinsic complex abs.
180+ g2 = ABSSQ( gs )
181+ d = sqrt ( g2 )
182+ s = conjg ( gs ) / d
183+ r = d* u
184+ end if
172185 end if
173186 else
174187 f1 = max ( abs (real (f)), abs (aimag (f)) )
175188 g1 = max ( abs (real (g)), abs (aimag (g)) )
189+ rtmax = sqrt ( safmax/ 4 )
176190 if ( f1 > rtmin .and. f1 < rtmax .and. &
177191 g1 > rtmin .and. g1 < rtmax ) then
178192!
@@ -181,52 +195,95 @@ subroutine CLARTG( f, g, c, s, r )
181195 f2 = ABSSQ( f )
182196 g2 = ABSSQ( g )
183197 h2 = f2 + g2
184- if ( f2 > rtmin .and. h2 < rtmax ) then
185- d = sqrt ( f2* h2 )
198+ ! safmin <= f2 <= h2 <= safmax
199+ if ( f2 >= h2 * safmin ) then
200+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
201+ c = sqrt ( f2 / h2 )
202+ r = f / c
203+ rtmax = rtmax * 2
204+ if ( f2 > rtmin .and. h2 < rtmax ) then
205+ ! safmin <= sqrt( f2*h2 ) <= safmax
206+ s = conjg ( g ) * ( f / sqrt ( f2* h2 ) )
207+ else
208+ s = conjg ( g ) * ( r / h2 )
209+ end if
186210 else
187- d = sqrt ( f2 )* sqrt ( h2 )
211+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
212+ ! Moreover,
213+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
214+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
215+ ! Also,
216+ ! g2 >> f2, which means that h2 = g2.
217+ d = sqrt ( f2 * h2 )
218+ c = f2 / d
219+ if ( c >= safmin ) then
220+ r = f / c
221+ else
222+ ! f2 / sqrt(f2 * h2) < safmin, then
223+ ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
224+ r = f * ( h2 / d )
225+ end if
226+ s = conjg ( g ) * ( f / d )
188227 end if
189- p = 1 / d
190- c = f2* p
191- s = conjg ( g )* ( f* p )
192- r = f* ( h2* p )
193228 else
194229!
195230! Use scaled algorithm
196231!
197232 u = min ( safmax, max ( safmin, f1, g1 ) )
198- uu = one / u
199- gs = g* uu
233+ gs = g / u
200234 g2 = ABSSQ( gs )
201- if ( f1* uu < rtmin ) then
235+ if ( f1 / u < rtmin ) then
202236!
203237! f is not well-scaled when scaled by g1.
204238! Use a different scaling for f.
205239!
206240 v = min ( safmax, max ( safmin, f1 ) )
207- vv = one / v
208- w = v * uu
209- fs = f* vv
241+ w = v / u
242+ fs = f / v
210243 f2 = ABSSQ( fs )
211244 h2 = f2* w** 2 + g2
212245 else
213246!
214247! Otherwise use the same scaling for f and g.
215248!
216249 w = one
217- fs = f* uu
250+ fs = f / u
218251 f2 = ABSSQ( fs )
219252 h2 = f2 + g2
220253 end if
221- if ( f2 > rtmin .and. h2 < rtmax ) then
222- d = sqrt ( f2* h2 )
254+ ! safmin <= f2 <= h2 <= safmax
255+ if ( f2 >= h2 * safmin ) then
256+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
257+ c = sqrt ( f2 / h2 )
258+ r = fs / c
259+ rtmax = rtmax * 2
260+ if ( f2 > rtmin .and. h2 < rtmax ) then
261+ ! safmin <= sqrt( f2*h2 ) <= safmax
262+ s = conjg ( gs ) * ( fs / sqrt ( f2* h2 ) )
263+ else
264+ s = conjg ( gs ) * ( r / h2 )
265+ end if
223266 else
224- d = sqrt ( f2 )* sqrt ( h2 )
267+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
268+ ! Moreover,
269+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
270+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
271+ ! Also,
272+ ! g2 >> f2, which means that h2 = g2.
273+ d = sqrt ( f2 * h2 )
274+ c = f2 / d
275+ if ( c >= safmin ) then
276+ r = fs / c
277+ else
278+ ! f2 / sqrt(f2 * h2) < safmin, then
279+ ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
280+ r = fs * ( h2 / d )
281+ end if
282+ s = conjg ( gs ) * ( fs / d )
225283 end if
226- p = 1 / d
227- c = ( f2* p )* w
228- s = conjg ( gs )* ( fs* p )
229- r = ( fs* ( h2* p ) )* u
284+ ! Rescale c and r
285+ c = c * w
286+ r = r * u
230287 end if
231288 end if
232289 return
0 commit comments