Actual source code: dvd_blas.c
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7:
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
22: #include <private/vecimplslepc.h>
23: #include davidson.h
25: PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
26: PetscLogEvent SLEPC_SlepcDenseNorm = 0;
27: PetscLogEvent SLEPC_SlepcDenseOrth = 0;
28: PetscLogEvent SLEPC_SlepcDenseCopy = 0;
29: PetscLogEvent SLEPC_VecsMult = 0;
31: void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
32: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
33: void *ptr);
37: /*
38: Compute C <- a*A*B + b*C, where
39: ldC, the leading dimension of C,
40: ldA, the leading dimension of A,
41: rA, cA, rows and columns of A,
42: At, if true use the transpose of A instead,
43: ldB, the leading dimension of B,
44: rB, cB, rows and columns of B,
45: Bt, if true use the transpose of B instead
46: */
47: PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
48: PetscScalar a,
49: const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscBool At,
50: const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscBool Bt)
51: {
52: PetscErrorCode ierr;
53: PetscInt tmp;
54: PetscBLASInt m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
55: const char *N = "N", *T = "C", *qA = N, *qB = N;
59: if ((rA == 0) || (cB == 0)) { return(0); }
64: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
66: /* Transpose if needed */
67: if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
68: if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
69:
70: /* Check size */
71: if (cA != rB) {
72: SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
73: }
74:
75: /* Do stub */
76: if ((rA == 1) && (cA == 1) && (cB == 1)) {
77: if (!At && !Bt) *C = *A * *B;
78: else if (At && !Bt) *C = PetscConj(*A) * *B;
79: else if (!At && Bt) *C = *A * PetscConj(*B);
80: else *C = PetscConj(*A) * PetscConj(*B);
81: m = n = k = 1;
82: } else {
83: m = rA; n = cB; k = cA;
84: BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
85: &ldB, &b, C, &ldC);
86: }
88: PetscLogFlops(m*n*2*k);
89: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
91: return(0);
92: }
96: /*
97: Compute C <- A*B, where
98: sC, structure of C,
99: ldC, the leading dimension of C,
100: sA, structure of A,
101: ldA, the leading dimension of A,
102: rA, cA, rows and columns of A,
103: At, if true use the transpose of A instead,
104: sB, structure of B,
105: ldB, the leading dimension of B,
106: rB, cB, rows and columns of B,
107: Bt, if true use the transpose of B instead
108: */
109: PetscErrorCode SlepcDenseMatProdTriang(
110: PetscScalar *C, MatType_t sC, PetscInt ldC,
111: const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
112: PetscBool At,
113: const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
114: PetscBool Bt)
115: {
116: PetscErrorCode ierr;
117: PetscInt tmp;
118: PetscScalar one=1.0, zero=0.0;
119: PetscBLASInt rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
123: if ((rA == 0) || (cB == 0)) { return(0); }
128: /* Transpose if needed */
129: if (At) tmp = rA, rA = cA, cA = tmp;
130: if (Bt) tmp = rB, rB = cB, cB = tmp;
131:
132: /* Check size */
133: if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
134: if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");
136: /* Optimized version: trivial case */
137: if ((rA == 1) && (cA == 1) && (cB == 1)) {
138: if (!At && !Bt) *C = *A * *B;
139: else if (At && !Bt) *C = PetscConj(*A) * *B;
140: else if (!At && Bt) *C = *A * PetscConj(*B);
141: else if (At && Bt) *C = PetscConj(*A) * PetscConj(*B);
142: return(0);
143: }
144:
145: /* Optimized versions: sA == 0 && sB == 0 */
146: if ((sA == 0) && (sB == 0)) {
147: if (At) tmp = rA, rA = cA, cA = tmp;
148: if (Bt) tmp = rB, rB = cB, cB = tmp;
149: SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
150: cB, Bt);
151: PetscFunctionReturn(ierr);
152: }
154: /* Optimized versions: A hermitian && (B not triang) */
155: if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
156: DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
157: DVD_ISNOT(sB,DVD_MAT_LTRIANG) ) {
158: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
159: rC = rA; cC = cB;
160: BLAShemm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
161: (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
162: PetscLogFlops(rA*cB*cA);
163: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
164: return(0);
165: }
167: /* Optimized versions: B hermitian && (A not triang) */
168: if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
169: DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
170: DVD_ISNOT(sA,DVD_MAT_LTRIANG) ) {
171: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
172: rC = rA; cC = cB;
173: BLAShemm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
174: (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
175: PetscLogFlops(rA*cB*cA);
176: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
177: return(0);
178: }
179:
180: SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
181: }
185: /*
186: Normalize the columns of the matrix A, where
187: ldA, the leading dimension of A,
188: rA, cA, rows and columns of A.
189: if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
190: are normalized as being one column.
191: */
192: PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
193: PetscInt cA, PetscScalar *eigi)
194: {
195: PetscErrorCode ierr;
196: PetscInt i;
197: PetscScalar norm, norm0;
198: PetscBLASInt rA = _rA, one=1;
204: PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);
206: for(i=0; i<cA; i++) {
207: if(eigi && eigi[i] != 0.0) {
208: norm = BLASnrm2_(&rA, &A[i*ldA], &one);
209: norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
210: norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
211: BLASscal_(&rA, &norm, &A[i*ldA], &one);
212: BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
213: i++;
214: } else {
215: norm = BLASnrm2_(&rA, &A[i*ldA], &one);
216: norm = 1.0 / norm;
217: BLASscal_(&rA, &norm, &A[i*ldA], &one);
218: }
219: }
221: PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);
223: return(0);
224: }
225:
229: /*
230: Compute A <- orth(A), where
231: ldA, the leading dimension of A,
232: rA, cA, rows and columns of A,
233: auxS, auxiliary vector of more size than cA+min(rA,cA),
234: lauxS, size of auxS,
235: ncA, new number of columns of A
236: */
237: PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
238: PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
239: PetscInt *ncA)
240: {
241: PetscErrorCode ierr;
242: PetscBLASInt ldA = _ldA, rA = _rA, cA = _cA,
243: info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
244: PetscScalar *tau = auxS, *w = tau + ltau;
248: /* Quick exit */
249: if ((_rA == 0) || (cA == 0)) { return(0); }
254: /* Memory check */
255: if (lw < cA) SETERRQ(PETSC_COMM_SELF,1, "Insufficient memory for xGEQRF");
256:
257: PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);
258: LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
259: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
260: LAPACKorgqr_(&rA, <au, <au, A, &ldA, tau, w, &lw, &info);
261: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
262: PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);
264: if (ncA) *ncA = ltau;
266: return(0);
267: }
271: /*
272: Y <- X, where
273: ldX, leading dimension of X,
274: rX, cX, rows and columns of X
275: ldY, leading dimension of Y
276: */
277: PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
278: PetscInt ldX, PetscInt rX, PetscInt cX)
279: {
280: PetscErrorCode ierr;
281: PetscInt i;
287: if ((ldX < rX) || (ldY < rX)) {
288: SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
289: }
290:
291: /* Quick exit */
292: if (Y == X) {
293: if (ldX != ldY) {
294: SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
295: }
296: return(0);
297: }
299: PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
300: for(i=0; i<cX; i++) {
301: PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
302:
303: }
304: PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
306: return(0);
307: }
311: /*
312: Y <- X, where
313: ldX, leading dimension of X,
314: rX, cX, rows and columns of X
315: ldY, leading dimension of Y
316: */
317: PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
318: PetscScalar *X, MatType_t sX, PetscInt ldX,
319: PetscInt rX, PetscInt cX)
320: {
321: PetscErrorCode ierr;
322: PetscInt i,j,c;
328: if ((ldX < rX) || (ldY < rX)) {
329: SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
330: }
332: if (sY == 0 && sX == 0) {
333: SlepcDenseCopy(Y, ldY, X, ldX, rX, cX);
334: return(0);
335: }
337: if (rX != cX) {
338: SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");
339: }
341: if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
342: DVD_ISNOT(sX,DVD_MAT_LTRIANG)) { /* UpTr to ... */
343: if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
344: DVD_ISNOT(sY,DVD_MAT_LTRIANG)) /* ... UpTr, */
345: c = 0; /* so copy */
346: else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
347: DVD_IS(sY,DVD_MAT_LTRIANG)) /* ... LoTr, */
348: c = 1; /* so transpose */
349: else /* ... Full, */
350: c = 2; /* so reflect from up */
351: } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
352: DVD_IS(sX,DVD_MAT_LTRIANG)) { /* LoTr to ... */
353: if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
354: DVD_ISNOT(sY,DVD_MAT_LTRIANG)) /* ... UpTr, */
355: c = 1; /* so transpose */
356: else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
357: DVD_IS(sY,DVD_MAT_LTRIANG)) /* ... LoTr, */
358: c = 0; /* so copy */
359: else /* ... Full, */
360: c = 3; /* so reflect fr. down */
361: } else /* Full to any, */
362: c = 0; /* so copy */
363:
364: PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
366: switch(c) {
367: case 0: /* copy */
368: for(i=0; i<cX; i++) {
369: PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
370:
371: }
372: break;
374: case 1: /* transpose */
375: for(i=0; i<cX; i++)
376: for(j=0; j<rX; j++)
377: Y[ldY*j+i] = PetscConj(X[ldX*i+j]);
378: break;
380: case 2: /* reflection from up */
381: for(i=0; i<cX; i++)
382: for(j=0; j<PetscMin(i+1,rX); j++)
383: Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
384: break;
386: case 3: /* reflection from down */
387: for(i=0; i<cX; i++)
388: for(j=i; j<rX; j++)
389: Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
390: break;
391: }
392: PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
394: return(0);
395: }
400: /*
401: Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
402: where X and Y are contiguous global vectors.
403: */
404: PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
405: Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
406: PetscInt cM)
407: {
408: PetscErrorCode ierr;
412: SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
413:
415: return(0);
416: }
421: /*
422: Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
423: where X and Y are contiguous global vectors.
424: */
425: PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
426: PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
427: PetscInt ldM, PetscInt rM, PetscInt cM)
428: {
429: PetscErrorCode ierr;
430: const PetscScalar *px;
431: PetscScalar *py;
432: PetscInt rX, rY, ldX, ldY, i, rcX;
435: SlepcValidVecsContiguous(Y,cM*dY,1);
436: SlepcValidVecsContiguous(X,cX,5);
439: /* Compute the real number of columns */
440: rcX = cX/dX;
441: if (rcX != rM) {
442: SETERRQ(((PetscObject)*Y)->comm,1, "Matrix dimensions do not match");
443: }
445: if ((rcX == 0) || (rM == 0) || (cM == 0)) {
446: return(0);
447: } else if ((Y + cM <= X) || (X + cX <= Y) ||
448: ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
449: /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
451: /* Get the dense matrices and dimensions associated to Y and X */
452: VecGetLocalSize(X[0], &rX);
453: VecGetLocalSize(Y[0], &rY);
454: if (rX != rY) {
455: SETERRQ(((PetscObject)*Y)->comm,1, "The multivectors do not have the same dimension");
456: }
457: VecGetArrayRead(X[0], &px);
458: VecGetArray(Y[0], &py);
460: /* Update the strides */
461: ldX = rX*dX; ldY= rY*dY;
463: /* Do operation */
464: SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
465: PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE);
466:
467: VecRestoreArrayRead(X[0], &px);
468: VecRestoreArray(Y[0], &py);
469: for(i=1; i<cM; i++) {
470: PetscObjectStateIncrease((PetscObject)Y[dY*i]);
471: }
473: } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
474: /* If not, call to SlepcUpdateVectors */
475: SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
476: ldM, PETSC_FALSE);
477: if (alpha != 1.0)
478: for (i=0; i<cM; i++) {
479: VecScale(Y[i], alpha);
480: }
481: } else {
482: SETERRQ(((PetscObject)*Y)->comm,1, "Unsupported case");
483: }
485: return(0);
486: }
490: /*
491: Compute X <- alpha * X[0:dX:cX-1] * M
492: where X is a matrix with non-consecutive columns
493: */
494: PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
495: const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
496: PetscScalar *work, PetscInt lwork)
497: {
499: PetscScalar **px, *Y, *Z;
500: PetscInt rX, i, j, rY, rY0, ldY;
503: SlepcValidVecsContiguous(X,cX,1);
507: if (cX != rM) {
508: SETERRQ(((PetscObject)*X)->comm,1, "Matrix dimensions do not match");
509: }
511: rY = (lwork/2)/cX;
512: if (rY <= 0) {
513: SETERRQ(((PetscObject)*X)->comm,1, "Insufficient work space given");
514: }
515: Y = work; Z = &Y[cX*rY]; ldY = rY;
517: if ((cX == 0) || (rM == 0) || (cM == 0)) {
518: return(0);
519: }
521: /* Get the dense vectors associated to the columns of X */
522: PetscMalloc(sizeof(Vec)*cX, &px);
523: for(i=0; i<cX; i++) {
524: VecGetArray(X[i], &px[i]);
525: }
526: VecGetLocalSize(X[0], &rX);
528: for(i=0, rY0=0; i<rX; i+=rY0) {
529: rY0 = PetscMin(rY, rX-i);
531: /* Y <- X[i0:i1,:] */
532: for(j=0; j<cX; j++) {
533: SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
534:
535: }
537: /* Z <- Y * M */
538: SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
539: M, ldM, rM, cM, PETSC_FALSE);
540:
542: /* X <- Z */
543: for(j=0; j<cM; j++) {
544: SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
545:
546: }
547: }
549: for(i=0; i<cX; i++) {
550: VecRestoreArray(X[i], &px[i]);
551: }
552: PetscFree(px);
554: return(0);
555: }
561: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
562: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
563: where W = U' * V.
564: workS0 and workS1 are an auxiliary scalar vector of size
565: (eU-sU)*sV*(sU!=0)+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
566: is needed, and of size eU*eV.
567: */
568: PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
569: Vec *U, PetscInt sU, PetscInt eU,
570: Vec *V, PetscInt sV, PetscInt eV,
571: PetscScalar *workS0, PetscScalar *workS1)
572: {
573: PetscErrorCode ierr;
574: PetscInt ldU, ldV, i, j, k, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
575: const PetscScalar *pu, *pv;
576: PetscScalar *W, *Wr;
580: /* Check if quick exit */
581: if ((eU-sU == 0) || (eV-sV == 0))
582: return(0);
584: SlepcValidVecsContiguous(U,eU,4);
585: SlepcValidVecsContiguous(V,eV,7);
587:
588: /* Get the dense matrices and dimensions associated to U and V */
589: VecGetLocalSize(U[0], &ldU);
590: VecGetLocalSize(V[0], &ldV);
591: if (ldU != ldV) {
592: SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
593: }
594: VecGetArrayRead(U[0], &pu);
595: VecGetArrayRead(V[0], &pv);
597: if (workS0) {
599: W = workS0;
600: } else {
601: PetscMalloc(sizeof(PetscScalar)*ms, &W);
602:
603: }
605: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
607: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
608: /* Use the smart memory usage version */
610: /* W <- U' * V */
611: SlepcDenseMatProdTriang(W, sM, eU,
612: pu, 0, ldU, ldU, eU, PETSC_TRUE,
613: pv, 0, ldV, ldV, eV, PETSC_FALSE);
614:
615:
616: /* ReduceAll(W, SUM) */
617: MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
618: ((PetscObject)U[0])->comm);
619: /* Full M matrix */
620: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
621: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
622: if (workS1) {
624: Wr = workS1;
625: if (PetscAbs(PetscMin(W-workS1, workS1-W)) < ms) {
626: SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
627: }
628: } else {
629: PetscMalloc(sizeof(PetscScalar)*ms, &Wr);
630:
631: }
632:
633: /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
634: if (sU > 0) {
635: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
636: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
637: pv , ldV, ldV, sV, PETSC_FALSE);
638:
639: }
640:
641: /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
642: SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
643: pu, ldU, ldU, eU, PETSC_TRUE,
644: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
645:
646:
647: /* ReduceAll(W, SUM) */
648: MPI_Allreduce(W, Wr, ms, MPIU_SCALAR,
649: MPIU_SUM, ((PetscObject)U[0])->comm);
650:
651: /* M(...,...) <- W */
652: k = 0;
653: if (sU > 0) for (i=0; i<sV; i++)
654: for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
655: for (i=sV; i<eV; i++)
656: for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
657:
658: if (!workS1) {
659: PetscFree(Wr);
660: }
662: /* Upper triangular M matrix */
663: } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
664: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
665: if (workS1) {
667: Wr = workS1;
668: if (PetscAbs(PetscMin(W-workS1,workS1-W)) < (eV-sV)*eU) {
669: SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
670: }
671: } else {
672: PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
673:
674: }
675:
676: /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
677: SlepcDenseMatProd(W, eU, 0.0, 1.0,
678: pu, ldU, ldU, eU, PETSC_TRUE,
679: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
680:
681:
682: /* ReduceAll(W, SUM) */
683: MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
684: ((PetscObject)U[0])->comm);
685:
686: /* M(...,...) <- W */
687: for (i=sV,k=0; i<eV; i++)
688: for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
690: if (!workS1) {
691: PetscFree(Wr);
692: }
694: /* Lower triangular M matrix */
695: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
696: DVD_IS(sM,DVD_MAT_LTRIANG)) {
697: if (workS1) {
699: Wr = workS1;
700: if (PetscMin(W - workS1, workS1 - W) < (eU-sU)*eV) {
701: SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
702: }
703: } else {
704: PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
705:
706: }
707:
708: /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
709: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
710: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
711: pv , ldV, ldV, eV, PETSC_FALSE);
712:
713:
714: /* ReduceAll(W, SUM) */
715: MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
716: ((PetscObject)U[0])->comm);
717:
718: /* M(...,...) <- W */
719: for (i=0,k=0; i<eV; i++)
720: for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
721:
722: if (!workS1) {
723: PetscFree(Wr);
724: }
725: }
727: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
729: if (!workS0) {
730: PetscFree(W);
731: }
733: VecRestoreArrayRead(U[0], &pu);
734: VecRestoreArrayRead(V[0], &pv);
735: return(0);
736: }
741: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
742: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
743: where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
744: workS0 and workS1 are an auxiliary scalar vector of size
745: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
746: is needed, and of size eU*eV.
747: */
748: PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
749: Vec *U, PetscInt sU, PetscInt eU,
750: Vec *V, PetscInt sV, PetscInt eV)
751: {
752: PetscErrorCode ierr;
753: PetscInt ldU, ldV;
754: PetscScalar *pu, *pv;
758: /* Check if quick exit */
759: if ((eU-sU == 0) || (eV-sV == 0))
760: return(0);
761:
762: SlepcValidVecsContiguous(U,eU,4);
763: SlepcValidVecsContiguous(V,eV,7);
766: /* Get the dense matrices and dimensions associated to U and V */
767: VecGetLocalSize(U[0], &ldU);
768: VecGetLocalSize(V[0], &ldV);
769: if (ldU != ldV) {
770: SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
771: }
772: VecGetArray(U[0], &pu);
773: VecGetArray(V[0], &pv);
775: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
776: /* M <- local_U' * local_V */
777: SlepcDenseMatProdTriang(M, sM, eU,
778: pu, 0, ldU, ldU, eU, PETSC_TRUE,
779: pv, 0, ldV, ldV, eV, PETSC_FALSE);
780:
781:
782: /* Full M matrix */
783: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
784: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
785: /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
786: SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
787: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
788: pv , ldV, ldV, sV, PETSC_FALSE);
789:
790:
791: /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
792: SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
793: pu, ldU, ldU, eU, PETSC_TRUE,
794: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
795:
796:
797: /* Other structures */
798: } else SETERRQ(((PetscObject)*U)->comm,1, "Matrix structure not supported");
800: VecRestoreArray(U[0], &pu);
801: PetscObjectStateDecrease((PetscObject)U[0]);
802: VecRestoreArray(V[0], &pv);
803: PetscObjectStateDecrease((PetscObject)V[0]);
805: return(0);
806: }
811: /* Computes M <- nprocs*M
812: where nprocs is the number of processors.
813: */
814: PetscErrorCode VecsMultIc(PetscScalar *M, MatType_t sM, PetscInt ldM,
815: PetscInt rM, PetscInt cM, Vec V)
816: {
817: int i,j,n;
821: /* Check if quick exit */
822: if ((rM == 0) || (cM == 0))
823: return(0);
825:
826: if (sM != 0) SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
828: MPI_Comm_size(((PetscObject)V)->comm, &n);
830: for(i=0; i<cM; i++)
831: for(j=0; j<rM; j++)
832: M[ldM*i+j]/= (PetscScalar)n;
834: return(0);
835: }
840: /* Computes N <- Allreduce( [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ] )
841: ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
842: where W = U' * V.
843: workS0 and workS1 are an auxiliary scalar vector of size
844: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
845: is needed, and of size eU*eV.
846: */
847: PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
848: PetscInt rM, PetscInt cM, PetscScalar *auxS,
849: Vec V)
850: {
851: PetscErrorCode ierr;
852: PetscScalar *W, *Wr;
856: /* Check if quick exit */
857: if ((rM == 0) || (cM == 0))
858: return(0);
861:
862: if (auxS)
863: W = auxS;
864: else {
865: PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
866:
867: }
868: Wr = W + rM*cM;
870: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
872: if (sM == 0) {
873: /* W <- M */
874: SlepcDenseCopy(W, rM, M, ldM, rM, cM);
876: /* Wr <- ReduceAll(W, SUM) */
877: MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
878: ((PetscObject)V)->comm);
880: /* M <- Wr */
881: SlepcDenseCopy(M, ldM, Wr, rM, rM, cM);
883: /* Other structures */
884: } else SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
886: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
888: if (!auxS) {
889: PetscFree(W);
890: }
892: return(0);
893: }
898: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
899: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
900: where W = U' * V.
901: r, a DvdReduction structure,
902: sr, an structure DvdMult_copy_func.
903: */
904: PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
905: Vec *U, PetscInt sU, PetscInt eU,
906: Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
907: DvdMult_copy_func *sr)
908: {
909: PetscErrorCode ierr;
910: PetscInt ldU, ldV, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
911: const PetscScalar *pu, *pv;
912: PetscScalar *W;
916: /* Check if quick exit */
917: if ((eU-sU == 0) || (eV-sV == 0))
918: return(0);
919:
920: SlepcValidVecsContiguous(U,eU,4);
921: SlepcValidVecsContiguous(V,eV,7);
924: /* Get the dense matrices and dimensions associated to U and V */
925: VecGetLocalSize(U[0], &ldU);
926: VecGetLocalSize(V[0], &ldV);
927: if (ldU != ldV) {
928: SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
929: }
930: VecGetArrayRead(U[0], &pu);
931: VecGetArrayRead(V[0], &pv);
933: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
935: if ((sU == 0) && (sV == 0)) {
936: /* Use the smart memory usage version */
938: /* Add the reduction to r */
939: SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
940:
942: /* W <- U' * V */
943: SlepcDenseMatProdTriang(W, sM, eU,
944: pu, 0, ldU, ldU, eU, PETSC_TRUE,
945: pv, 0, ldV, ldV, eV, PETSC_FALSE);
946:
947:
948: /* M <- ReduceAll(W, SUM) */
949: sr->M = M; sr->ld = ldM;
950: sr->i0 = 0; sr->i1 = eV; sr->s0 = sU; sr->e0 = eU;
951: sr->i2 = eV;
953: /* Full M matrix */
954: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
955: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
956: /* Add the reduction to r */
957: SlepcAllReduceSum(r, ms, VecsMultS_copy_func, sr, &W);
958:
960: /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
961: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
962: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
963: pv , ldV, ldV, sV, PETSC_FALSE);
964:
965:
966: /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
967: SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
968: pu, ldU, ldU, eU, PETSC_TRUE,
969: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
970:
971:
972: /* M <- ReduceAll(W, SUM) */
973: sr->M = M; sr->ld = ldM;
974: sr->i0 = sU>0?0:sV; sr->i1 = sV; sr->s0 = sU; sr->e0 = eU;
975: sr->i2 = eV; sr->s1 = 0; sr->e1 = eU;
977: /* Upper triangular M matrix */
978: } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
979: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
980: /* Add the reduction to r */
981: SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
982:
983:
984: /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
985: SlepcDenseMatProd(W, eU, 0.0, 1.0,
986: pu, ldU, ldU, eU, PETSC_TRUE,
987: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
988:
989:
990: /* M <- ReduceAll(W, SUM) */
991: sr->M = M; sr->ld = ldM;
992: sr->i0 = sV; sr->i1 = eV; sr->s0 = 0; sr->e0 = eU;
993: sr->i2 = eV;
994:
995: /* Lower triangular M matrix */
996: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
997: DVD_IS(sM,DVD_MAT_LTRIANG)) {
998: /* Add the reduction to r */
999: SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
1000:
1001:
1002: /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
1003: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
1004: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
1005: pv , ldV, ldV, eV, PETSC_FALSE);
1006:
1007:
1008: /* ReduceAll(W, SUM) */
1009: sr->M = M; sr->ld = ldM;
1010: sr->i0 = 0; sr->i1 = eV; sr->s0 = sU; sr->e0 = eU;
1011: sr->i2 = eV;
1012: }
1014: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
1016: VecRestoreArrayRead(U[0], &pu);
1017: VecRestoreArrayRead(V[0], &pv);
1018: return(0);
1019: }
1023: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
1024: void *ptr)
1025: {
1026: PetscInt i, j, k;
1027: DvdMult_copy_func
1028: *sr = (DvdMult_copy_func*)ptr;
1033: for (i=sr->i0,k=0; i<sr->i1; i++)
1034: for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
1035: for (i=sr->i1; i<sr->i2; i++)
1036: for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
1038: if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");
1040: return(0);
1041: }
1045: /* Orthonormalize a chunk of parallel vector.
1046: NOTE: wS0 and wS1 must be of size n*n.
1047: */
1048: PetscErrorCode VecsOrthonormalize(Vec *V, PetscInt n, PetscScalar *wS0,
1049: PetscScalar *wS1)
1050: {
1051: PetscErrorCode ierr;
1052: PetscBLASInt nn = n, info, ld;
1053: PetscInt ldV, i;
1054: PetscScalar *H, *T, one=1.0, *pv;
1055:
1058: if (!wS0) {
1059: PetscMalloc(sizeof(PetscScalar)*n*n, &H);
1060: } else {
1062: H = wS0;
1063: }
1064: if (!wS1) {
1065: PetscMalloc(sizeof(PetscScalar)*n*n, &T);
1066: } else {
1068: T = wS1;
1069: }
1071: /* H <- V' * V */
1072: VecsMult(H, 0, n, V, 0, n, V, 0, n, T, PETSC_NULL);
1074: /* H <- chol(H) */
1075: LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info);
1076: if (info) SETERRQ1(((PetscObject)*V)->comm,PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);
1078: /* V <- V * inv(H) */
1079: VecGetLocalSize(V[0], &ldV);
1080: VecGetArray(V[0], &pv);
1081: ld = ldV;
1082: BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld);
1083: VecRestoreArray(V[0], &pv);
1084: for(i=1; i<n; i++) {
1085: PetscObjectStateIncrease((PetscObject)V[i]);
1086: }
1088: if (!wS0) {
1089: PetscFree(H);
1090: }
1091: if (!wS1) {
1092: PetscFree(T);
1093: }
1094:
1095: return(0);
1096: }
1100: /*
1101: Sum up several arrays with only one call to MPIReduce.
1102: */
1103: PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
1104: PetscInt max_size_ops,
1105: PetscScalar *in, PetscScalar *out,
1106: PetscInt max_size_in, DvdReduction *r,
1107: MPI_Comm comm)
1108: {
1113: r->in = in;
1114: r->out = out;
1115: r->size_in = 0;
1116: r->max_size_in = max_size_in;
1117: r->ops = ops;
1118: r->size_ops = 0;
1119: r->max_size_ops = max_size_ops;
1120: r->comm = comm;
1122: return(0);
1123: }
1127: PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1128: DvdReductionPostF f, void *ptr,
1129: PetscScalar **in)
1130: {
1133: *in = r->in + r->size_in;
1134: r->ops[r->size_ops].out = r->out + r->size_in;
1135: r->ops[r->size_ops].size_out = size_in;
1136: r->ops[r->size_ops].f = f;
1137: r->ops[r->size_ops].ptr = ptr;
1138: if (++(r->size_ops) > r->max_size_ops) {
1139: SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
1140: }
1141: if ((r->size_in+= size_in) > r->max_size_in) {
1142: SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
1143: }
1145: return(0);
1146: }
1151: PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1152: {
1153: PetscErrorCode ierr;
1154: PetscInt i;
1158: /* Check if quick exit */
1159: if (r->size_ops == 0)
1160: return(0);
1162: /* Call the MPIAllReduce routine */
1163: MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1164: r->comm);
1166: /* Call the postponed routines */
1167: for(i=0; i<r->size_ops; i++) {
1168: r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1169:
1170: }
1172: /* Tag the operation as done */
1173: r->size_ops = 0;
1175: return(0);
1176: }
1181: /* auxS: size_cX+V_new_e */
1182: PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1183: PetscInt size_cX, Vec *V, PetscInt V_new_s,
1184: PetscInt V_new_e, PetscScalar *auxS,
1185: PetscRandom rand)
1186: {
1187: PetscErrorCode ierr;
1188: PetscInt i, j;
1189: PetscBool lindep;
1190: PetscReal norm;
1191: PetscScalar *auxS0 = auxS;
1192:
1194:
1195: /* Orthonormalize V with IP */
1196: for (i=V_new_s; i<V_new_e; i++) {
1197: for(j=0; j<3; j++) {
1198: if (j>0) { SlepcVecSetRandom(V[i], rand); }
1199: if (cX + size_cX == V) {
1200: /* If cX and V are contiguous, orthogonalize in one step */
1201: IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1202: V[i], auxS0, &norm, &lindep);
1203: } else if (DS) {
1204: /* Else orthogonalize first against DS, and then against cX and V */
1205: IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1206: V[i], auxS0, PETSC_NULL, &lindep);
1207: if(!lindep) {
1208: IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1209: V[i], auxS0, &norm, &lindep);
1210: }
1211: } else {
1212: /* Else orthogonalize first against cX and then against V */
1213: IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1214: V[i], auxS0, &norm, &lindep);
1215: }
1216: if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
1217: PetscInfo1(ip,"Orthonormalization problems adding the vector %D to the searching subspace\n",i);
1218: }
1219: if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
1220: SETERRQ(((PetscObject)ip)->comm,1, "Error during orthonormalization of eigenvectors");
1221: }
1222: VecScale(V[i], 1.0/norm);
1223: }
1224:
1225: return(0);
1226: }
1231: /* auxS: size_cX+V_new_e+1 */
1232: PetscErrorCode dvd_BorthV(IP ip, Vec *DS, Vec *BDS, PetscInt size_DS, Vec *cX,
1233: Vec *BcX, PetscInt size_cX, Vec *V, Vec *BV,
1234: PetscInt V_new_s, PetscInt V_new_e,
1235: PetscScalar *auxS, PetscRandom rand)
1236: {
1237: PetscErrorCode ierr;
1238: PetscInt i, j;
1239: PetscBool lindep;
1240: PetscReal norm;
1241: PetscScalar *auxS0 = auxS;
1242:
1244:
1245: /* Orthonormalize V with IP */
1246: for (i=V_new_s; i<V_new_e; i++) {
1247: for(j=0; j<3; j++) {
1248: if (j>0) { SlepcVecSetRandom(V[i], rand); }
1249: if (cX + size_cX == V) {
1250: /* If cX and V are contiguous, orthogonalize in one step */
1251: IPBOrthogonalize(ip, size_DS, DS, BDS, size_cX+i, PETSC_NULL, cX, BcX,
1252: V[i], BV[i], auxS0, &norm, &lindep);
1253: } else if (DS) {
1254: /* Else orthogonalize first against DS, and then against cX and V */
1255: IPBOrthogonalize(ip, size_DS, DS, BDS, size_cX, PETSC_NULL, cX, BcX,
1256: V[i], BV[i], auxS0, PETSC_NULL, &lindep);
1257: if(!lindep) {
1258: IPBOrthogonalize(ip, 0, PETSC_NULL, PETSC_NULL, i, PETSC_NULL, V, BV,
1259: V[i], BV[i], auxS0, &norm, &lindep);
1260: }
1261: } else {
1262: /* Else orthogonalize first against cX and then against V */
1263: IPBOrthogonalize(ip, size_cX, cX, BcX, i, PETSC_NULL, V, BV,
1264: V[i], BV[i], auxS0, &norm, &lindep);
1265: }
1266: if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
1267: PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1268:
1269: }
1270: if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
1271: SETERRQ(((PetscObject)ip)->comm,1, "Error during the orthonormalization of the eigenvectors!");
1272: }
1273: VecScale(V[i], 1.0/norm);
1274: VecScale(BV[i], 1.0/norm);
1275: }
1276:
1277: return(0);
1278: }
1279:
1282: /*
1283: Compute eigenvectors associated to the Schur decomposition (S, T) and
1284: save the left vectors in pY and the right vectors in pX, where
1285: n, size of the eigenproblem
1286: ldS, ldT, leading dimension of S and T
1287: ldpX, ldpY, leading dimension of pX and pY
1288: auxS, auxiliar scalar of length:
1289: double standard 3n, double generalized 6n,
1290: complex standard 3n, complex generalized 3n
1291: size_auxS, the length of auxS
1292: doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1293: */
1294: PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
1295: PetscInt ldS_, PetscScalar *T, PetscInt ldT_, PetscScalar *pX,
1296: PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
1297: PetscInt size_auxS, PetscBool doProd)
1298: {
1299: #if defined(SLEPC_MISSING_LAPACK_GGEV)
1301: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GGEV - Lapack routine is unavailable.");
1302: #else
1303: PetscBLASInt n, ldpX, ldpY, nout, info, ldS, ldT;
1304: const char *side, *howmny;
1305: #if defined(PETSC_USE_COMPLEX)
1306: PetscReal *auxR;
1307: #endif
1308:
1315: n = PetscBLASIntCast(n_);
1316: ldpX = PetscBLASIntCast(PetscMax(ldpX_,1));
1317: ldpY = PetscBLASIntCast(PetscMax(ldpY_,1));
1318: ldS = PetscBLASIntCast(PetscMax(ldS_,1));
1319: ldT = PetscBLASIntCast(PetscMax(ldT_,1));
1321: if (pX && pY) side = "B";
1322: else if (pX) side = "R";
1323: else if (pY) side = "L";
1324: else { return(0); }
1326: howmny = doProd?"B":"A";
1328: if (T) {
1329: /* [eigr, pX] = eig(S, T) */
1330: #if defined(PETSC_USE_COMPLEX)
1331: auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1332: if (size_auxS < 2*n)
1333: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTGEVC");
1334: LAPACKtgevc_(side,howmny,PETSC_NULL,&n,S,&ldS,T,&ldT,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1335: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTGEVC %d",info);
1336: #else
1337: if (size_auxS < 6*n)
1338: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTGEVC");
1339: LAPACKtgevc_(side,howmny,PETSC_NULL,&n,S,&ldS,T,&ldT,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1340: #endif
1341: } else {
1342: /* [eigr, pX] = eig(S) */
1343: #if defined(PETSC_USE_COMPLEX)
1344: auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1345: if (size_auxS < 2*n)
1346: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTREVC");
1347: LAPACKtrevc_(side,howmny,PETSC_NULL,&n,S,&ldS,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1348: #else
1349: if (size_auxS < 3*n)
1350: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTREVC");
1351: LAPACKtrevc_(side,howmny,PETSC_NULL,&n,S,&ldS,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1352: #endif
1353: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
1354: }
1355: return(0);
1356: #endif
1357: }
1362: PetscErrorCode EPSSortDenseHEP(EPS eps, PetscInt n, PetscInt k, PetscScalar *w, PetscScalar *V, PetscInt ldV)
1363: {
1364: PetscInt i, j, result, pos;
1365: PetscReal re;
1366: PetscScalar t;
1367: PetscBLASInt n_,one=1;
1368: PetscErrorCode ierr;
1373: n_ = PetscBLASIntCast(n);
1375: /* selection sort */
1376: for (i=k;i<n-1;i++) {
1377: re = PetscRealPart(w[i]);
1378: pos = 0;
1379: /* find minimum eigenvalue */
1380: for (j=i+1;j<n;j++) {
1381: EPSCompareEigenvalues(eps,re,0,PetscRealPart(w[j]),0,&result);
1382: if (result > 0) {
1383: re = PetscRealPart(w[j]);
1384: pos = j;
1385: }
1386: }
1387: /* interchange the pairs i and pos */
1388: if (pos) {
1389: BLASswap_(&n_, &V[ldV*pos], &one, &V[ldV*i], &one);
1390: t = w[i]; w[i] = w[pos]; w[pos] = t;
1391: }
1392: }
1394: return(0);
1395: }
1399: /* Write zeros from the column k to n in the lower triangular part of the
1400: matrices S and T, and inside 2-by-2 diagonal blocks of T in order to
1401: make (S,T) a valid Schur decompositon.
1402: */
1403: PetscErrorCode EPSCleanDenseSchur(PetscInt n,PetscInt k,PetscScalar *S,PetscInt ldS,PetscScalar *T,PetscInt ldT,PetscScalar *eigi,PetscScalar *X,PetscInt ldX,PetscBool doProd)
1404: {
1405: PetscInt i, j;
1406: #if defined(PETSC_USE_COMPLEX)
1407: PetscScalar s;
1408: #endif
1414: if (!doProd && X) {
1415: for (i=0; i<n; i++) for (j=0; j<n; j++) X[ldX*i+j] = 0.0;
1416: for (i=0; i<n; i++) X[ldX*i+i] = 1.0;
1417: }
1419: #if defined(PETSC_USE_COMPLEX)
1420: for (i=k; i<n; i++) {
1421: /* Some functions need the diagonal elements in T be real */
1422: if (T && PetscImaginaryPart(T[ldT*i+i]) != 0.0) {
1423: s = PetscConj(T[ldT*i+i])/PetscAbsScalar(T[ldT*i+i]);
1424: for(j=0; j<=i; j++)
1425: T[ldT*i+j]*= s,
1426: S[ldS*i+j]*= s;
1427: T[ldT*i+i] = PetscRealPart(T[ldT*i+i]);
1428: if (X) for(j=0; j<n; j++) X[ldX*i+j]*= s;
1429: }
1430: if ((j=i+1) < n) {
1431: S[ldS*i+j] = 0.0;
1432: if (T) T[ldT*i+j] = 0.0;
1433: }
1434: }
1435: #else
1436: for (i=k; i<n; i++) {
1437: if (S[ldS*i+i+1] != 0.0 && eigi && eigi[i] != 0.0) {
1438: if ((j=i+2) < n) S[ldS*(i+1)+j] = 0.0;
1439: if (T) {
1440: /* T[ldT*(i+1)+i] = 0.0; */
1441: {
1442: /* Check if T(i+1,i) is negligible */
1443: if (PetscAbs(T[ldT*(i+1)+i])+PetscAbs(T[ldT*i+i+1]) > (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1]))*PETSC_MACHINE_EPSILON) {
1444: PetscBLASInt ldS_,ldT_,n_i,n_i_1,one=1,n_,i_1,i_;
1445: PetscScalar b11,b22,sr,cr,sl,cl;
1446: ldS_ = PetscBLASIntCast(ldS);
1447: ldT_ = PetscBLASIntCast(ldT);
1448: n_i = PetscBLASIntCast(n-i);
1449: n_i_1 = n_i - 1;
1450: i_1 = PetscBLASIntCast(i+1);
1451: i_ = PetscBLASIntCast(i);
1452: n_ = PetscBLASIntCast(n);
1453: LAPACKlasv2_(&T[ldT*i+i],&T[ldT*i+i+1],&T[ldT*(i+1)+i+1],&b22,&b11,&sr,&cr,&sl,&cl);
1454: if (b11 < 0.0) { cr=-cr; sr=-sr; b11=-b11; b22=-b22; }
1455: BLASrot_(&n_i,&S[ldS*i+i],&ldS_,&S[ldS*i+i+1],&ldS_,&cl,&sl);
1456: BLASrot_(&i_1,&S[ldS*i],&one,&S[ldS*(i+1)],&one,&cr,&sr);
1457: if (n_i_1>0) BLASrot_(&n_i_1,&T[ldT*(i+2)+i],&ldT_,&T[ldT*(i+2)+i],&ldT_,&cl,&sl);
1458: BLASrot_(&i_,&T[ldT*i],&one,&T[ldT*(i+1)],&one,&cr,&sr);
1459: if (X) BLASrot_(&n_,&X[ldX*i],&one,&X[ldX*(i+1)],&one,&cr,&sr);
1460: T[ldT*i+i] = b11; T[ldT*i+i+1] = T[ldT*(i+1)+i] = 0.0; T[ldT*(i+1)+i+1] = b22;
1461: } else {
1462: T[ldT*(i+1)+i] = T[ldT*i+i+1] = 0.0;
1463: }
1464: }
1465: if ((j=i+1) < n) T[ldT*i+j] = 0.0;
1466: if ((j=i+2) < n) T[ldT*(i+1)+j] = 0.0;
1467: }
1468: i++;
1469: } else {
1470: if ((j=i+1) < n) {
1471: S[ldS*i+j] = 0.0;
1472: if (T) T[ldT*i+j] = 0.0;
1473: }
1474: }
1475: }
1476: #endif
1478: return(0);
1479: }