Actual source code: mpicuda.cu
petsc-3.7.4 2016-10-02
2: /*
3: This file contains routines for Parallel vector operations.
4: */
5: #define PETSC_SKIP_SPINLOCK
7: #include <petscconf.h>
8: #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
9: #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
13: PetscErrorCode VecDestroy_MPICUDA(Vec v)
14: {
16: cudaError_t err;
19: if (v->spptr) {
20: if (((Vec_CUDA*)v->spptr)->GPUarray) {
21: err = cudaFree(((Vec_CUDA*)v->spptr)->GPUarray);CHKERRCUDA(err);
22: ((Vec_CUDA*)v->spptr)->GPUarray = NULL;
23: }
24: err = cudaStreamDestroy(((Vec_CUDA*)v->spptr)->stream);CHKERRCUDA(err);
25: PetscFree(v->spptr);
26: }
27: VecDestroy_MPI(v);
28: return(0);
29: }
33: PetscErrorCode VecNorm_MPICUDA(Vec xin,NormType type,PetscReal *z)
34: {
35: PetscReal sum,work = 0.0;
39: if (type == NORM_2 || type == NORM_FROBENIUS) {
40: VecNorm_SeqCUDA(xin,NORM_2,&work);
41: work *= work;
42: MPIU_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
43: *z = PetscSqrtReal(sum);
44: } else if (type == NORM_1) {
45: /* Find the local part */
46: VecNorm_SeqCUDA(xin,NORM_1,&work);
47: /* Find the global max */
48: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
49: } else if (type == NORM_INFINITY) {
50: /* Find the local max */
51: VecNorm_SeqCUDA(xin,NORM_INFINITY,&work);
52: /* Find the global max */
53: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
54: } else if (type == NORM_1_AND_2) {
55: PetscReal temp[2];
56: VecNorm_SeqCUDA(xin,NORM_1,temp);
57: VecNorm_SeqCUDA(xin,NORM_2,temp+1);
58: temp[1] = temp[1]*temp[1];
59: MPIU_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
60: z[1] = PetscSqrtReal(z[1]);
61: }
62: return(0);
63: }
67: PetscErrorCode VecDot_MPICUDA(Vec xin,Vec yin,PetscScalar *z)
68: {
69: PetscScalar sum,work;
73: VecDot_SeqCUDA(xin,yin,&work);
74: MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
75: *z = sum;
76: return(0);
77: }
81: PetscErrorCode VecTDot_MPICUDA(Vec xin,Vec yin,PetscScalar *z)
82: {
83: PetscScalar sum,work;
87: VecTDot_SeqCUDA(xin,yin,&work);
88: MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
89: *z = sum;
90: return(0);
91: }
95: PetscErrorCode VecMDot_MPICUDA(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
96: {
97: PetscScalar awork[128],*work = awork;
101: if (nv > 128) {
102: PetscMalloc1(nv,&work);
103: }
104: VecMDot_SeqCUDA(xin,nv,y,work);
105: MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
106: if (nv > 128) {
107: PetscFree(work);
108: }
109: return(0);
110: }
112: /*MC
113: VECMPICUDA - VECMPICUDA = "mpicuda" - The basic parallel vector, modified to use CUDA
115: Options Database Keys:
116: . -vec_type mpicuda - sets the vector type to VECMPICUDA during a call to VecSetFromOptions()
118: Level: beginner
120: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECMPI, VecType, VecCreateMPI()
121: M*/
126: PetscErrorCode VecDuplicate_MPICUDA(Vec win,Vec *v)
127: {
129: Vec_MPI *vw,*w = (Vec_MPI*)win->data;
130: PetscScalar *array;
133: VecCreate(PetscObjectComm((PetscObject)win),v);
134: PetscLayoutReference(win->map,&(*v)->map);
136: VecCreate_MPI_Private(*v,PETSC_FALSE,w->nghost,0);
137: vw = (Vec_MPI*)(*v)->data;
138: PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));
140: /* save local representation of the parallel vector (and scatter) if it exists */
141: if (w->localrep) {
142: VecGetArray(*v,&array);
143: VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);
144: PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));
145: VecRestoreArray(*v,&array);
146: PetscLogObjectParent((PetscObject)*v,(PetscObject)vw->localrep);
147: vw->localupdate = w->localupdate;
148: if (vw->localupdate) {
149: PetscObjectReference((PetscObject)vw->localupdate);
150: }
151: }
153: /* New vector should inherit stashing property of parent */
154: (*v)->stash.donotstash = win->stash.donotstash;
155: (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
157: /* change type_name appropriately */
158: PetscObjectChangeTypeName((PetscObject)(*v),VECMPICUDA);
160: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);
161: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);
162: (*v)->map->bs = PetscAbs(win->map->bs);
163: (*v)->bstash.bs = win->bstash.bs;
164: return(0);
165: }
169: PetscErrorCode VecDotNorm2_MPICUDA(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
170: {
172: PetscScalar work[2],sum[2];
175: VecDotNorm2_SeqCUDA(s,t,work,work+1);
176: MPIU_Allreduce(&work,&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));
177: *dp = sum[0];
178: *nm = sum[1];
179: return(0);
180: }
184: PETSC_EXTERN PetscErrorCode VecCreate_MPICUDA(Vec vv)
185: {
189: VecCreate_MPI_Private(vv,PETSC_FALSE,0,0);
190: PetscObjectChangeTypeName((PetscObject)vv,VECMPICUDA);
192: vv->ops->dotnorm2 = VecDotNorm2_MPICUDA;
193: vv->ops->waxpy = VecWAXPY_SeqCUDA;
194: vv->ops->duplicate = VecDuplicate_MPICUDA;
195: vv->ops->dot = VecDot_MPICUDA;
196: vv->ops->mdot = VecMDot_MPICUDA;
197: vv->ops->tdot = VecTDot_MPICUDA;
198: vv->ops->norm = VecNorm_MPICUDA;
199: vv->ops->scale = VecScale_SeqCUDA;
200: vv->ops->copy = VecCopy_SeqCUDA;
201: vv->ops->set = VecSet_SeqCUDA;
202: vv->ops->swap = VecSwap_SeqCUDA;
203: vv->ops->axpy = VecAXPY_SeqCUDA;
204: vv->ops->axpby = VecAXPBY_SeqCUDA;
205: vv->ops->maxpy = VecMAXPY_SeqCUDA;
206: vv->ops->aypx = VecAYPX_SeqCUDA;
207: vv->ops->axpbypcz = VecAXPBYPCZ_SeqCUDA;
208: vv->ops->pointwisemult = VecPointwiseMult_SeqCUDA;
209: vv->ops->setrandom = VecSetRandom_SeqCUDA;
210: vv->ops->placearray = VecPlaceArray_SeqCUDA;
211: vv->ops->replacearray = VecReplaceArray_SeqCUDA;
212: vv->ops->resetarray = VecResetArray_SeqCUDA;
213: vv->ops->dot_local = VecDot_SeqCUDA;
214: vv->ops->tdot_local = VecTDot_SeqCUDA;
215: vv->ops->norm_local = VecNorm_SeqCUDA;
216: vv->ops->mdot_local = VecMDot_SeqCUDA;
217: vv->ops->destroy = VecDestroy_MPICUDA;
218: vv->ops->pointwisedivide = VecPointwiseDivide_SeqCUDA;
219: vv->ops->getlocalvector = VecGetLocalVector_SeqCUDA;
220: vv->ops->restorelocalvector = VecRestoreLocalVector_SeqCUDA;
221: vv->ops->getlocalvectorread = VecGetLocalVector_SeqCUDA;
222: vv->ops->restorelocalvectorread = VecRestoreLocalVector_SeqCUDA;
223: VecCUDAAllocateCheck(vv);CHKERRCUDA(ierr);
224: vv->valid_GPU_array = PETSC_CUDA_GPU;
225: VecSet(vv,0.0);
226: return(0);
227: }
231: PETSC_EXTERN PetscErrorCode VecCreate_CUDA(Vec v)
232: {
234: PetscMPIInt size;
237: MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);
238: if (size == 1) {
239: VecSetType(v,VECSEQCUDA);
240: } else {
241: VecSetType(v,VECMPICUDA);
242: }
243: return(0);
244: }