Actual source code: veccomp0.h

  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:       
  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */

 22: #include <private/vecimpl.h>     /*I  "petsvec.h"  I*/

 24: #ifdef __WITH_MPI__
 26: #else
 28: #endif


 36: PetscErrorCode __SUF__(VecDot_Comp)(Vec a,Vec b,PetscScalar *z)
 37: {
 38:   PetscScalar    sum = 0.0,work;
 39:   PetscInt       i;
 41:   Vec_Comp       *as = (Vec_Comp*)a->data,*bs = (Vec_Comp*)b->data;

 46:   if (as->x[0]->ops->dot_local) {
 47:     for (i=0,sum=0.0;i<as->n->n;i++) {
 48:       as->x[i]->ops->dot_local(as->x[i],bs->x[i],&work);
 49:       sum += work;
 50:     }
 51: #ifdef __WITH_MPI__
 52:     work = sum;
 53:     MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);
 54: #endif
 55:   } else {
 56:     for(i=0,sum=0.0; i<as->n->n; i++) {
 57:       VecDot(as->x[i],bs->x[i],&work);
 58:       sum += work;
 59:     }
 60:   }
 61:   *z = sum;
 62:   return(0);
 63: }

 67: PetscErrorCode __SUF__(VecMDot_Comp)(Vec a,PetscInt n,const Vec b[],PetscScalar *z)
 68: {
 69:   PetscScalar    *work,*work0,*r;
 71:   Vec_Comp       *as = (Vec_Comp*)a->data;
 72:   Vec            *bx;
 73:   PetscInt       i,j;


 79:   if (as->n->n == 0) {
 80:     *z = 0;
 81:     return(0);
 82:   }

 84:   PetscMalloc(sizeof(PetscScalar)*n,&work0);
 85:   PetscMalloc(sizeof(Vec)*n,&bx);

 87: #ifdef __WITH_MPI__
 88:   if (as->x[0]->ops->mdot_local) {
 89:     r = work0; work = z;
 90:   } else
 91: #endif
 92:   {
 93:     r = z; work = work0;
 94:   }

 96:   /* z[i] <- a.x' * b[i].x */
 97:   for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[0];
 98:   if (as->x[0]->ops->mdot_local) {
 99:     as->x[0]->ops->mdot_local(as->x[0],n,bx,r);
100:   } else {
101:     VecMDot(as->x[0],n,bx,r);
102:   }
103:   for (j=0;j<as->n->n;j++) {
104:     for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[j];
105:     if (as->x[0]->ops->mdot_local) {
106:       as->x[j]->ops->mdot_local(as->x[j],n,bx,work);
107:     } else {
108:       VecMDot(as->x[j],n,bx,work);
109:     }
110:     for (i=0;i<n;i++) r[i] += work[i];
111:   }

113:   /* If def(__WITH_MPI__) and exists mdot_local */
114: #ifdef __WITH_MPI__
115:   if (as->x[0]->ops->mdot_local) {
116:     /* z[i] <- Allreduce(work[i]) */
117:     MPI_Allreduce(r,z,n,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);
118:   }
119: #endif

121:   PetscFree(work0);
122:   PetscFree(bx);
123:   return(0);
124: }


129: PetscErrorCode __SUF__(VecTDot_Comp)(Vec a,Vec b,PetscScalar *z)
130: {
131:   PetscScalar    sum = 0.0,work;
132:   PetscInt       i;
134:   Vec_Comp       *as = (Vec_Comp*)a->data,*bs = (Vec_Comp*)b->data;

139:   if (as->x[0]->ops->tdot_local) {
140:     for (i=0,sum=0.0;i<as->n->n;i++) {
141:       as->x[i]->ops->tdot_local(as->x[i],bs->x[i],&work);
142:       sum += work;
143:     }
144: #ifdef __WITH_MPI__
145:     work = sum;
146:     MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);
147: #endif
148:   } else {
149:     for(i=0,sum=0.0; i<as->n->n; i++) {
150:       VecTDot(as->x[i],bs->x[i],&work);
151:       sum += work;
152:     }
153:   }
154:   *z = sum;
155:   return(0);
156: }

160: PetscErrorCode __SUF__(VecMTDot_Comp)(Vec a,PetscInt n,const Vec b[],PetscScalar *z)
161: {
162:   PetscScalar    *work,*work0,*r;
164:   Vec_Comp       *as = (Vec_Comp*)a->data;
165:   Vec            *bx;
166:   PetscInt       i,j;


172:   if (as->n->n == 0) {
173:     *z = 0;
174:     return(0);
175:   }

177:   PetscMalloc(sizeof(PetscScalar)*n,&work0);
178:   PetscMalloc(sizeof(Vec)*n,&bx);

180: #ifdef __WITH_MPI__
181:   if (as->x[0]->ops->mtdot_local) {
182:     r = work0; work = z;
183:   } else
184: #endif
185:   {
186:     r = z; work = work0;
187:   }

189:   /* z[i] <- a.x' * b[i].x */
190:   for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[0];
191:   if (as->x[0]->ops->mtdot_local) {
192:     as->x[0]->ops->mtdot_local(as->x[0],n,bx,r);
193:   } else {
194:     VecMTDot(as->x[0],n,bx,r);
195:   }
196:   for (j=0;j<as->n->n;j++) {
197:     for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[j];
198:     if (as->x[0]->ops->mtdot_local) {
199:       as->x[j]->ops->mtdot_local(as->x[j],n,bx,work);
200:     } else {
201:       VecMTDot(as->x[j],n,bx,work);
202:     }
203:     for (i=0;i<n;i++) r[i] += work[i];
204:   }

206:   /* If def(__WITH_MPI__) and exists mtdot_local */
207: #ifdef __WITH_MPI__
208:   if (as->x[0]->ops->mtdot_local) {
209:     /* z[i] <- Allreduce(work[i]) */
210:     MPI_Allreduce(r,z,n,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);
211:   }
212: #endif

214:   PetscFree(work0);
215:   PetscFree(bx);
216:   return(0);
217: }


220: #ifndef __VEC_NORM2_FUNCS_
222: PETSC_STATIC_INLINE void SumNorm2(PetscReal *ssq0,PetscReal *scale0,PetscReal *ssq1,PetscReal *scale1)
223: {
224:   PetscReal q;
225:   if (*scale0 > *scale1) { q = *scale1/(*scale0); *ssq1 = *ssq0 + q*q*(*ssq1); *scale1 = *scale0; }
226:   else                   { q = *scale0/(*scale1); *ssq1+=         q*q*(*ssq0); }
227: }

229: PETSC_STATIC_INLINE PetscReal GetNorm2(PetscReal ssq,PetscReal scale)
230: {
231:   return scale*PetscSqrtReal(ssq);
232: }

234: PETSC_STATIC_INLINE void AddNorm2(PetscReal *ssq,PetscReal *scale,PetscReal x)
235: {
236:   if (x != 0.0) {
237:     PetscReal absx = PetscAbs(x), q;
238:     if (*scale < absx) { q = *scale/absx; *ssq = 1.0 + *ssq*q*q; *scale = absx; }
239:     else               { q = absx/(*scale); *ssq+= q*q; }
240:   }
241: }

243: MPI_Datatype MPIU_NORM2=0, MPIU_NORM1_AND_2=0;
244: MPI_Op MPIU_NORM2_SUM=0;

246: EXTERN_C_BEGIN
249: void SlepcSumNorm2_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
250: {
251:   PetscInt       i,count = *cnt;

254:   if (*datatype == MPIU_NORM2) {
255:     PetscReal *xin = (PetscReal*)in,*xout = (PetscReal*)out;
256:     for (i=0; i<count; i++) {
257:       SumNorm2(&xin[i*2],&xin[i*2+1],&xout[i*2],&xout[i*2+1]);
258:     }
259:   } else if (*datatype == MPIU_NORM1_AND_2) {
260:     PetscReal *xin = (PetscReal*)in,*xout = (PetscReal*)out;
261:     for (i=0; i<count; i++) {
262:       xout[i*3]+= xin[i*3];
263:       SumNorm2(&xin[i*3+1],&xin[i*3+2],&xout[i*3+1],&xout[i*3+2]);
264:     }
265:   } else {
266:     (*PetscErrorPrintf)("Can only handle MPIU_NORM* data types");
267:     MPI_Abort(MPI_COMM_WORLD,1);
268:   }
269:   PetscFunctionReturnVoid();
270: }

274: PetscErrorCode VecNormCompEnd(void)
275: {

279:   MPI_Type_free(&MPIU_NORM2);
280:   MPI_Type_free(&MPIU_NORM1_AND_2);
281:   MPI_Op_free(&MPIU_NORM2_SUM);
282: 
283:   return(0);
284: }
285: EXTERN_C_END

289: PetscErrorCode VecNormCompInit()
290: {

294:   MPI_Type_contiguous(sizeof(PetscReal)*2,MPI_BYTE,&MPIU_NORM2);
295:   MPI_Type_commit(&MPIU_NORM2);
296:   MPI_Type_contiguous(sizeof(PetscReal)*3,MPI_BYTE,&MPIU_NORM1_AND_2);
297:   MPI_Type_commit(&MPIU_NORM1_AND_2);
298:   MPI_Op_create(SlepcSumNorm2_Local,1,&MPIU_NORM2_SUM);
299:   PetscRegisterFinalize(VecNormCompEnd);
300: 
301:   return(0);
302: }
303: #endif

307: PetscErrorCode __SUF__(VecNorm_Comp)(Vec a,NormType t,PetscReal *norm)
308: {
309:   PetscReal      work[3],s=0.0;
311:   Vec_Comp       *as = (Vec_Comp*)a->data;
312:   PetscInt       i;

316:   /* Initialize norm */
317:   switch(t) {
318:   case NORM_1: case NORM_INFINITY: *norm = 0.0; break;
319:   case NORM_2: case NORM_FROBENIUS: *norm = 1.0; s = 0.0; break;
320:   case NORM_1_AND_2: norm[0] = 0.0; norm[1] = 1.0; s = 0.0; break;
321:   }
322:   for (i=0;i<as->n->n;i++) {
323:     if (as->x[0]->ops->norm_local) {
324:       as->x[0]->ops->norm_local(as->x[i],t,work);
325:     } else {
326:       VecNorm(as->x[i],t,work);
327:     }
328:     /* norm+= work */
329:     switch(t) {
330:     case NORM_1: *norm+= *work; break;
331:     case NORM_2: case NORM_FROBENIUS: AddNorm2(norm,&s,*work); break;
332:     case NORM_1_AND_2: norm[0]+= work[0]; AddNorm2(&norm[1],&s,work[1]); break;
333:     case NORM_INFINITY: *norm = PetscMax(*norm,*work); break;
334:     }
335:   }

337:   /* If def(__WITH_MPI__) and exists norm_local */
338: #ifdef __WITH_MPI__
339:   if (as->x[0]->ops->norm_local) {
340:     PetscReal work0[3];
341:     /* norm <- Allreduce(work) */
342:     switch(t) {
343:     case NORM_1:
344:       work[0] = *norm;
345:       MPI_Allreduce(work,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)a)->comm);
346:       break;
347:     case NORM_2: case NORM_FROBENIUS:
348:       work[0] = *norm; work[1] = s;
349:       MPI_Allreduce(work,work0,1,MPIU_NORM2,MPIU_NORM2_SUM,((PetscObject)a)->comm);
350:       *norm = GetNorm2(work0[0],work0[1]);
351:       break;
352:     case NORM_1_AND_2:
353:       work[0] = norm[0]; work[1] = norm[1]; work[2] = s;
354:       MPI_Allreduce(work,work0,1,MPIU_NORM1_AND_2,MPIU_NORM2_SUM,((PetscObject)a)->comm);
355:       norm[0] = work0[0];
356:       norm[1] = GetNorm2(work0[1],work0[2]);
357:       break;
358:     case NORM_INFINITY:
359:       work[0] = *norm;
360:       MPI_Allreduce(work,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)a)->comm);
361:       break;
362:     }
363:   }
364: #else
365:   /* Norm correction */
366:   switch(t) {
367:   case NORM_2: case NORM_FROBENIUS: *norm = GetNorm2(*norm,s); break;
368:   case NORM_1_AND_2: norm[1] = GetNorm2(norm[1],s); break;
369:   default: ;
370:   }
371: #endif

373:   return(0);
374: }

378: PetscErrorCode __SUF__(VecDotNorm2_Comp)(Vec v,Vec w,PetscScalar *dp,PetscScalar *nm)
379: {
380:   PetscScalar    *vx,*wx,dp0,nm0,dp1,nm1;
382:   Vec_Comp       *vs = (Vec_Comp*)v->data,*ws = (Vec_Comp*)w->data;
383:   PetscInt       i,n;
384:   PetscBool      t0,t1;
385: #ifdef __WITH_MPI__
386:   PetscScalar    work[4];
387: #endif

390:   /* Compute recursively the local part */
391:   dp0 = nm0 = 0.0;
392:   PetscTypeCompare((PetscObject)v,VECCOMP,&t0);
393:   PetscTypeCompare((PetscObject)w,VECCOMP,&t1);
394:   if (t0 == PETSC_TRUE && t1 == PETSC_TRUE) {
397:     for (i=0;i<vs->n->n;i++) {
398:       VecDotNorm2_Comp_Seq(vs->x[i],ws->x[i],&dp1,&nm1);
399:       dp0 += dp1;
400:       nm0 += nm1;
401:     }
402:   } else if (t0 == PETSC_FALSE && t1 == PETSC_FALSE) {
403:     VecGetLocalSize(v,&n);
404:     VecGetArray(v,&vx);
405:     VecGetArray(w,&wx);
406:     for (i=0;i<n;i++) {
407:       dp0 += vx[i]*PetscConj(wx[i]);
408:       nm0 += wx[i]*PetscConj(wx[i]);
409:     }
410:     VecRestoreArray(v,&vx);
411:     VecRestoreArray(w,&wx);
412:   } else
413:     SETERRQ(((PetscObject)v)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector types");

415: #ifdef __WITH_MPI__
416:     /* [dp, nm] <- Allreduce([dp0, nm0]) */
417:     work[0] = dp0; work[1] = nm0;
418:     MPI_Allreduce(&work,&work[2],2,MPIU_SCALAR,MPIU_SUM,((PetscObject)v)->comm);
419:     *dp = work[2]; *nm = work[3];
420: #else
421:     *dp = dp0; *nm = nm0;
422: #endif
423:   return(0);
424: }