comparison libcruft/arpack/src/snapps.f @ 12274:9f5d2ef078e8 release-3-4-x

import ARPACK sources to libcruft from Debian package libarpack2 2.1+parpack96.dfsg-3+b1
author John W. Eaton <jwe@octave.org>
date Fri, 28 Jan 2011 14:04:33 -0500
parents
children
comparison
equal deleted inserted replaced
12273:83133b5bf392 12274:9f5d2ef078e8
1 c-----------------------------------------------------------------------
2 c\BeginDoc
3 c
4 c\Name: snapps
5 c
6 c\Description:
7 c Given the Arnoldi factorization
8 c
9 c A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
10 c
11 c apply NP implicit shifts resulting in
12 c
13 c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
14 c
15 c where Q is an orthogonal matrix which is the product of rotations
16 c and reflections resulting from the NP bulge chage sweeps.
17 c The updated Arnoldi factorization becomes:
18 c
19 c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
20 c
21 c\Usage:
22 c call snapps
23 c ( N, KEV, NP, SHIFTR, SHIFTI, V, LDV, H, LDH, RESID, Q, LDQ,
24 c WORKL, WORKD )
25 c
26 c\Arguments
27 c N Integer. (INPUT)
28 c Problem size, i.e. size of matrix A.
29 c
30 c KEV Integer. (INPUT/OUTPUT)
31 c KEV+NP is the size of the input matrix H.
32 c KEV is the size of the updated matrix HNEW. KEV is only
33 c updated on ouput when fewer than NP shifts are applied in
34 c order to keep the conjugate pair together.
35 c
36 c NP Integer. (INPUT)
37 c Number of implicit shifts to be applied.
38 c
39 c SHIFTR, Real array of length NP. (INPUT)
40 c SHIFTI Real and imaginary part of the shifts to be applied.
41 c Upon, entry to snapps, the shifts must be sorted so that the
42 c conjugate pairs are in consecutive locations.
43 c
44 c V Real N by (KEV+NP) array. (INPUT/OUTPUT)
45 c On INPUT, V contains the current KEV+NP Arnoldi vectors.
46 c On OUTPUT, V contains the updated KEV Arnoldi vectors
47 c in the first KEV columns of V.
48 c
49 c LDV Integer. (INPUT)
50 c Leading dimension of V exactly as declared in the calling
51 c program.
52 c
53 c H Real (KEV+NP) by (KEV+NP) array. (INPUT/OUTPUT)
54 c On INPUT, H contains the current KEV+NP by KEV+NP upper
55 c Hessenber matrix of the Arnoldi factorization.
56 c On OUTPUT, H contains the updated KEV by KEV upper Hessenberg
57 c matrix in the KEV leading submatrix.
58 c
59 c LDH Integer. (INPUT)
60 c Leading dimension of H exactly as declared in the calling
61 c program.
62 c
63 c RESID Real array of length N. (INPUT/OUTPUT)
64 c On INPUT, RESID contains the the residual vector r_{k+p}.
65 c On OUTPUT, RESID is the update residual vector rnew_{k}
66 c in the first KEV locations.
67 c
68 c Q Real KEV+NP by KEV+NP work array. (WORKSPACE)
69 c Work array used to accumulate the rotations and reflections
70 c during the bulge chase sweep.
71 c
72 c LDQ Integer. (INPUT)
73 c Leading dimension of Q exactly as declared in the calling
74 c program.
75 c
76 c WORKL Real work array of length (KEV+NP). (WORKSPACE)
77 c Private (replicated) array on each PE or array allocated on
78 c the front end.
79 c
80 c WORKD Real work array of length 2*N. (WORKSPACE)
81 c Distributed array used in the application of the accumulated
82 c orthogonal matrix Q.
83 c
84 c\EndDoc
85 c
86 c-----------------------------------------------------------------------
87 c
88 c\BeginLib
89 c
90 c\Local variables:
91 c xxxxxx real
92 c
93 c\References:
94 c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
95 c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
96 c pp 357-385.
97 c
98 c\Routines called:
99 c ivout ARPACK utility routine that prints integers.
100 c arscnd ARPACK utility routine for timing.
101 c smout ARPACK utility routine that prints matrices.
102 c svout ARPACK utility routine that prints vectors.
103 c slabad LAPACK routine that computes machine constants.
104 c slacpy LAPACK matrix copy routine.
105 c slamch LAPACK routine that determines machine constants.
106 c slanhs LAPACK routine that computes various norms of a matrix.
107 c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully.
108 c slarf LAPACK routine that applies Householder reflection to
109 c a matrix.
110 c slarfg LAPACK Householder reflection construction routine.
111 c slartg LAPACK Givens rotation construction routine.
112 c slaset LAPACK matrix initialization routine.
113 c sgemv Level 2 BLAS routine for matrix vector multiplication.
114 c saxpy Level 1 BLAS that computes a vector triad.
115 c scopy Level 1 BLAS that copies one vector to another .
116 c sscal Level 1 BLAS that scales a vector.
117 c
118 c\Author
119 c Danny Sorensen Phuong Vu
120 c Richard Lehoucq CRPC / Rice University
121 c Dept. of Computational & Houston, Texas
122 c Applied Mathematics
123 c Rice University
124 c Houston, Texas
125 c
126 c\Revision history:
127 c xx/xx/92: Version ' 2.4'
128 c
129 c\SCCS Information: @(#)
130 c FILE: napps.F SID: 2.4 DATE OF SID: 3/28/97 RELEASE: 2
131 c
132 c\Remarks
133 c 1. In this version, each shift is applied to all the sublocks of
134 c the Hessenberg matrix H and not just to the submatrix that it
135 c comes from. Deflation as in LAPACK routine slahqr (QR algorithm
136 c for upper Hessenberg matrices ) is used.
137 c The subdiagonals of H are enforced to be non-negative.
138 c
139 c\EndLib
140 c
141 c-----------------------------------------------------------------------
142 c
143 subroutine snapps
144 & ( n, kev, np, shiftr, shifti, v, ldv, h, ldh, resid, q, ldq,
145 & workl, workd )
146 c
147 c %----------------------------------------------------%
148 c | Include files for debugging and timing information |
149 c %----------------------------------------------------%
150 c
151 include 'debug.h'
152 include 'stat.h'
153 c
154 c %------------------%
155 c | Scalar Arguments |
156 c %------------------%
157 c
158 integer kev, ldh, ldq, ldv, n, np
159 c
160 c %-----------------%
161 c | Array Arguments |
162 c %-----------------%
163 c
164 Real
165 & h(ldh,kev+np), resid(n), shifti(np), shiftr(np),
166 & v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np)
167 c
168 c %------------%
169 c | Parameters |
170 c %------------%
171 c
172 Real
173 & one, zero
174 parameter (one = 1.0E+0, zero = 0.0E+0)
175 c
176 c %------------------------%
177 c | Local Scalars & Arrays |
178 c %------------------------%
179 c
180 integer i, iend, ir, istart, j, jj, kplusp, msglvl, nr
181 logical cconj, first
182 Real
183 & c, f, g, h11, h12, h21, h22, h32, ovfl, r, s, sigmai,
184 & sigmar, smlnum, ulp, unfl, u(3), t, tau, tst1
185 save first, ovfl, smlnum, ulp, unfl
186 c
187 c %----------------------%
188 c | External Subroutines |
189 c %----------------------%
190 c
191 external saxpy, scopy, sscal, slacpy, slarfg, slarf,
192 & slaset, slabad, arscnd, slartg
193 c
194 c %--------------------%
195 c | External Functions |
196 c %--------------------%
197 c
198 Real
199 & slamch, slanhs, slapy2
200 external slamch, slanhs, slapy2
201 c
202 c %----------------------%
203 c | Intrinsics Functions |
204 c %----------------------%
205 c
206 intrinsic abs, max, min
207 c
208 c %----------------%
209 c | Data statments |
210 c %----------------%
211 c
212 data first / .true. /
213 c
214 c %-----------------------%
215 c | Executable Statements |
216 c %-----------------------%
217 c
218 if (first) then
219 c
220 c %-----------------------------------------------%
221 c | Set machine-dependent constants for the |
222 c | stopping criterion. If norm(H) <= sqrt(OVFL), |
223 c | overflow should not occur. |
224 c | REFERENCE: LAPACK subroutine slahqr |
225 c %-----------------------------------------------%
226 c
227 unfl = slamch( 'safe minimum' )
228 ovfl = one / unfl
229 call slabad( unfl, ovfl )
230 ulp = slamch( 'precision' )
231 smlnum = unfl*( n / ulp )
232 first = .false.
233 end if
234 c
235 c %-------------------------------%
236 c | Initialize timing statistics |
237 c | & message level for debugging |
238 c %-------------------------------%
239 c
240 call arscnd (t0)
241 msglvl = mnapps
242 kplusp = kev + np
243 c
244 c %--------------------------------------------%
245 c | Initialize Q to the identity to accumulate |
246 c | the rotations and reflections |
247 c %--------------------------------------------%
248 c
249 call slaset ('All', kplusp, kplusp, zero, one, q, ldq)
250 c
251 c %----------------------------------------------%
252 c | Quick return if there are no shifts to apply |
253 c %----------------------------------------------%
254 c
255 if (np .eq. 0) go to 9000
256 c
257 c %----------------------------------------------%
258 c | Chase the bulge with the application of each |
259 c | implicit shift. Each shift is applied to the |
260 c | whole matrix including each block. |
261 c %----------------------------------------------%
262 c
263 cconj = .false.
264 do 110 jj = 1, np
265 sigmar = shiftr(jj)
266 sigmai = shifti(jj)
267 c
268 if (msglvl .gt. 2 ) then
269 call ivout (logfil, 1, jj, ndigit,
270 & '_napps: shift number.')
271 call svout (logfil, 1, sigmar, ndigit,
272 & '_napps: The real part of the shift ')
273 call svout (logfil, 1, sigmai, ndigit,
274 & '_napps: The imaginary part of the shift ')
275 end if
276 c
277 c %-------------------------------------------------%
278 c | The following set of conditionals is necessary |
279 c | in order that complex conjugate pairs of shifts |
280 c | are applied together or not at all. |
281 c %-------------------------------------------------%
282 c
283 if ( cconj ) then
284 c
285 c %-----------------------------------------%
286 c | cconj = .true. means the previous shift |
287 c | had non-zero imaginary part. |
288 c %-----------------------------------------%
289 c
290 cconj = .false.
291 go to 110
292 else if ( jj .lt. np .and. abs( sigmai ) .gt. zero ) then
293 c
294 c %------------------------------------%
295 c | Start of a complex conjugate pair. |
296 c %------------------------------------%
297 c
298 cconj = .true.
299 else if ( jj .eq. np .and. abs( sigmai ) .gt. zero ) then
300 c
301 c %----------------------------------------------%
302 c | The last shift has a nonzero imaginary part. |
303 c | Don't apply it; thus the order of the |
304 c | compressed H is order KEV+1 since only np-1 |
305 c | were applied. |
306 c %----------------------------------------------%
307 c
308 kev = kev + 1
309 go to 110
310 end if
311 istart = 1
312 20 continue
313 c
314 c %--------------------------------------------------%
315 c | if sigmai = 0 then |
316 c | Apply the jj-th shift ... |
317 c | else |
318 c | Apply the jj-th and (jj+1)-th together ... |
319 c | (Note that jj < np at this point in the code) |
320 c | end |
321 c | to the current block of H. The next do loop |
322 c | determines the current block ; |
323 c %--------------------------------------------------%
324 c
325 do 30 i = istart, kplusp-1
326 c
327 c %----------------------------------------%
328 c | Check for splitting and deflation. Use |
329 c | a standard test as in the QR algorithm |
330 c | REFERENCE: LAPACK subroutine slahqr |
331 c %----------------------------------------%
332 c
333 tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) )
334 if( tst1.eq.zero )
335 & tst1 = slanhs( '1', kplusp-jj+1, h, ldh, workl )
336 if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) ) then
337 if (msglvl .gt. 0) then
338 call ivout (logfil, 1, i, ndigit,
339 & '_napps: matrix splitting at row/column no.')
340 call ivout (logfil, 1, jj, ndigit,
341 & '_napps: matrix splitting with shift number.')
342 call svout (logfil, 1, h(i+1,i), ndigit,
343 & '_napps: off diagonal element.')
344 end if
345 iend = i
346 h(i+1,i) = zero
347 go to 40
348 end if
349 30 continue
350 iend = kplusp
351 40 continue
352 c
353 if (msglvl .gt. 2) then
354 call ivout (logfil, 1, istart, ndigit,
355 & '_napps: Start of current block ')
356 call ivout (logfil, 1, iend, ndigit,
357 & '_napps: End of current block ')
358 end if
359 c
360 c %------------------------------------------------%
361 c | No reason to apply a shift to block of order 1 |
362 c %------------------------------------------------%
363 c
364 if ( istart .eq. iend ) go to 100
365 c
366 c %------------------------------------------------------%
367 c | If istart + 1 = iend then no reason to apply a |
368 c | complex conjugate pair of shifts on a 2 by 2 matrix. |
369 c %------------------------------------------------------%
370 c
371 if ( istart + 1 .eq. iend .and. abs( sigmai ) .gt. zero )
372 & go to 100
373 c
374 h11 = h(istart,istart)
375 h21 = h(istart+1,istart)
376 if ( abs( sigmai ) .le. zero ) then
377 c
378 c %---------------------------------------------%
379 c | Real-valued shift ==> apply single shift QR |
380 c %---------------------------------------------%
381 c
382 f = h11 - sigmar
383 g = h21
384 c
385 do 80 i = istart, iend-1
386 c
387 c %-----------------------------------------------------%
388 c | Contruct the plane rotation G to zero out the bulge |
389 c %-----------------------------------------------------%
390 c
391 call slartg (f, g, c, s, r)
392 if (i .gt. istart) then
393 c
394 c %-------------------------------------------%
395 c | The following ensures that h(1:iend-1,1), |
396 c | the first iend-2 off diagonal of elements |
397 c | H, remain non negative. |
398 c %-------------------------------------------%
399 c
400 if (r .lt. zero) then
401 r = -r
402 c = -c
403 s = -s
404 end if
405 h(i,i-1) = r
406 h(i+1,i-1) = zero
407 end if
408 c
409 c %---------------------------------------------%
410 c | Apply rotation to the left of H; H <- G'*H |
411 c %---------------------------------------------%
412 c
413 do 50 j = i, kplusp
414 t = c*h(i,j) + s*h(i+1,j)
415 h(i+1,j) = -s*h(i,j) + c*h(i+1,j)
416 h(i,j) = t
417 50 continue
418 c
419 c %---------------------------------------------%
420 c | Apply rotation to the right of H; H <- H*G |
421 c %---------------------------------------------%
422 c
423 do 60 j = 1, min(i+2,iend)
424 t = c*h(j,i) + s*h(j,i+1)
425 h(j,i+1) = -s*h(j,i) + c*h(j,i+1)
426 h(j,i) = t
427 60 continue
428 c
429 c %----------------------------------------------------%
430 c | Accumulate the rotation in the matrix Q; Q <- Q*G |
431 c %----------------------------------------------------%
432 c
433 do 70 j = 1, min( i+jj, kplusp )
434 t = c*q(j,i) + s*q(j,i+1)
435 q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
436 q(j,i) = t
437 70 continue
438 c
439 c %---------------------------%
440 c | Prepare for next rotation |
441 c %---------------------------%
442 c
443 if (i .lt. iend-1) then
444 f = h(i+1,i)
445 g = h(i+2,i)
446 end if
447 80 continue
448 c
449 c %-----------------------------------%
450 c | Finished applying the real shift. |
451 c %-----------------------------------%
452 c
453 else
454 c
455 c %----------------------------------------------------%
456 c | Complex conjugate shifts ==> apply double shift QR |
457 c %----------------------------------------------------%
458 c
459 h12 = h(istart,istart+1)
460 h22 = h(istart+1,istart+1)
461 h32 = h(istart+2,istart+1)
462 c
463 c %---------------------------------------------------------%
464 c | Compute 1st column of (H - shift*I)*(H - conj(shift)*I) |
465 c %---------------------------------------------------------%
466 c
467 s = 2.0*sigmar
468 t = slapy2 ( sigmar, sigmai )
469 u(1) = ( h11 * (h11 - s) + t * t ) / h21 + h12
470 u(2) = h11 + h22 - s
471 u(3) = h32
472 c
473 do 90 i = istart, iend-1
474 c
475 nr = min ( 3, iend-i+1 )
476 c
477 c %-----------------------------------------------------%
478 c | Construct Householder reflector G to zero out u(1). |
479 c | G is of the form I - tau*( 1 u )' * ( 1 u' ). |
480 c %-----------------------------------------------------%
481 c
482 call slarfg ( nr, u(1), u(2), 1, tau )
483 c
484 if (i .gt. istart) then
485 h(i,i-1) = u(1)
486 h(i+1,i-1) = zero
487 if (i .lt. iend-1) h(i+2,i-1) = zero
488 end if
489 u(1) = one
490 c
491 c %--------------------------------------%
492 c | Apply the reflector to the left of H |
493 c %--------------------------------------%
494 c
495 call slarf ('Left', nr, kplusp-i+1, u, 1, tau,
496 & h(i,i), ldh, workl)
497 c
498 c %---------------------------------------%
499 c | Apply the reflector to the right of H |
500 c %---------------------------------------%
501 c
502 ir = min ( i+3, iend )
503 call slarf ('Right', ir, nr, u, 1, tau,
504 & h(1,i), ldh, workl)
505 c
506 c %-----------------------------------------------------%
507 c | Accumulate the reflector in the matrix Q; Q <- Q*G |
508 c %-----------------------------------------------------%
509 c
510 call slarf ('Right', kplusp, nr, u, 1, tau,
511 & q(1,i), ldq, workl)
512 c
513 c %----------------------------%
514 c | Prepare for next reflector |
515 c %----------------------------%
516 c
517 if (i .lt. iend-1) then
518 u(1) = h(i+1,i)
519 u(2) = h(i+2,i)
520 if (i .lt. iend-2) u(3) = h(i+3,i)
521 end if
522 c
523 90 continue
524 c
525 c %--------------------------------------------%
526 c | Finished applying a complex pair of shifts |
527 c | to the current block |
528 c %--------------------------------------------%
529 c
530 end if
531 c
532 100 continue
533 c
534 c %---------------------------------------------------------%
535 c | Apply the same shift to the next block if there is any. |
536 c %---------------------------------------------------------%
537 c
538 istart = iend + 1
539 if (iend .lt. kplusp) go to 20
540 c
541 c %---------------------------------------------%
542 c | Loop back to the top to get the next shift. |
543 c %---------------------------------------------%
544 c
545 110 continue
546 c
547 c %--------------------------------------------------%
548 c | Perform a similarity transformation that makes |
549 c | sure that H will have non negative sub diagonals |
550 c %--------------------------------------------------%
551 c
552 do 120 j=1,kev
553 if ( h(j+1,j) .lt. zero ) then
554 call sscal( kplusp-j+1, -one, h(j+1,j), ldh )
555 call sscal( min(j+2, kplusp), -one, h(1,j+1), 1 )
556 call sscal( min(j+np+1,kplusp), -one, q(1,j+1), 1 )
557 end if
558 120 continue
559 c
560 do 130 i = 1, kev
561 c
562 c %--------------------------------------------%
563 c | Final check for splitting and deflation. |
564 c | Use a standard test as in the QR algorithm |
565 c | REFERENCE: LAPACK subroutine slahqr |
566 c %--------------------------------------------%
567 c
568 tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) )
569 if( tst1.eq.zero )
570 & tst1 = slanhs( '1', kev, h, ldh, workl )
571 if( h( i+1,i ) .le. max( ulp*tst1, smlnum ) )
572 & h(i+1,i) = zero
573 130 continue
574 c
575 c %-------------------------------------------------%
576 c | Compute the (kev+1)-st column of (V*Q) and |
577 c | temporarily store the result in WORKD(N+1:2*N). |
578 c | This is needed in the residual update since we |
579 c | cannot GUARANTEE that the corresponding entry |
580 c | of H would be zero as in exact arithmetic. |
581 c %-------------------------------------------------%
582 c
583 if (h(kev+1,kev) .gt. zero)
584 & call sgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero,
585 & workd(n+1), 1)
586 c
587 c %----------------------------------------------------------%
588 c | Compute column 1 to kev of (V*Q) in backward order |
589 c | taking advantage of the upper Hessenberg structure of Q. |
590 c %----------------------------------------------------------%
591 c
592 do 140 i = 1, kev
593 call sgemv ('N', n, kplusp-i+1, one, v, ldv,
594 & q(1,kev-i+1), 1, zero, workd, 1)
595 call scopy (n, workd, 1, v(1,kplusp-i+1), 1)
596 140 continue
597 c
598 c %-------------------------------------------------%
599 c | Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
600 c %-------------------------------------------------%
601 c
602 call slacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv)
603 c
604 c %--------------------------------------------------------------%
605 c | Copy the (kev+1)-st column of (V*Q) in the appropriate place |
606 c %--------------------------------------------------------------%
607 c
608 if (h(kev+1,kev) .gt. zero)
609 & call scopy (n, workd(n+1), 1, v(1,kev+1), 1)
610 c
611 c %-------------------------------------%
612 c | Update the residual vector: |
613 c | r <- sigmak*r + betak*v(:,kev+1) |
614 c | where |
615 c | sigmak = (e_{kplusp}'*Q)*e_{kev} |
616 c | betak = e_{kev+1}'*H*e_{kev} |
617 c %-------------------------------------%
618 c
619 call sscal (n, q(kplusp,kev), resid, 1)
620 if (h(kev+1,kev) .gt. zero)
621 & call saxpy (n, h(kev+1,kev), v(1,kev+1), 1, resid, 1)
622 c
623 if (msglvl .gt. 1) then
624 call svout (logfil, 1, q(kplusp,kev), ndigit,
625 & '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}')
626 call svout (logfil, 1, h(kev+1,kev), ndigit,
627 & '_napps: betak = e_{kev+1}^T*H*e_{kev}')
628 call ivout (logfil, 1, kev, ndigit,
629 & '_napps: Order of the final Hessenberg matrix ')
630 if (msglvl .gt. 2) then
631 call smout (logfil, kev, kev, h, ldh, ndigit,
632 & '_napps: updated Hessenberg matrix H for next iteration')
633 end if
634 c
635 end if
636 c
637 9000 continue
638 call arscnd (t1)
639 tnapps = tnapps + (t1 - t0)
640 c
641 return
642 c
643 c %---------------%
644 c | End of snapps |
645 c %---------------%
646 c
647 end