Actual source code: vpscat.h
petsc-3.7.4 2016-10-02
2: /*
3: Defines the methods VecScatterBegin/End_1,2,......
4: This is included by vpscat.c with different values for BS
6: This is a terrible way of doing "templates" in C.
7: */
8: #define PETSCMAP1_a(a,b) a ## _ ## b
9: #define PETSCMAP1_b(a,b) PETSCMAP1_a(a,b)
10: #define PETSCMAP1(a) PETSCMAP1_b(a,BS)
14: PetscErrorCode PETSCMAP1(VecScatterBegin)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
15: {
16: VecScatter_MPI_General *to,*from;
17: PetscScalar *xv,*yv,*svalues;
18: MPI_Request *rwaits,*swaits;
19: PetscErrorCode ierr;
20: PetscInt i,*indices,*sstarts,nrecvs,nsends,bs;
23: if (mode & SCATTER_REVERSE) {
24: to = (VecScatter_MPI_General*)ctx->fromdata;
25: from = (VecScatter_MPI_General*)ctx->todata;
26: rwaits = from->rev_requests;
27: swaits = to->rev_requests;
28: } else {
29: to = (VecScatter_MPI_General*)ctx->todata;
30: from = (VecScatter_MPI_General*)ctx->fromdata;
31: rwaits = from->requests;
32: swaits = to->requests;
33: }
34: bs = to->bs;
35: svalues = to->values;
36: nrecvs = from->n;
37: nsends = to->n;
38: indices = to->indices;
39: sstarts = to->starts;
40: #if defined(PETSC_HAVE_CUSP)
41: VecCUSPAllocateCheckHost(xin);
42: if (xin->valid_GPU_array == PETSC_CUSP_GPU) {
43: if (xin->spptr && ctx->spptr) {
44: VecCUSPCopyFromGPUSome_Public(xin,(PetscCUSPIndices)ctx->spptr);
45: } else {
46: VecCUSPCopyFromGPU(xin);
47: }
48: }
49: xv = *((PetscScalar**)xin->data);
50: #elif defined(PETSC_HAVE_VECCUDA)
51: VecCUDAAllocateCheckHost(xin);
52: if (xin->valid_GPU_array == PETSC_CUDA_GPU) {
53: if (xin->spptr && ctx->spptr) {
54: VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr);
55: } else {
56: VecCUDACopyFromGPU(xin);
57: }
58: }
59: xv = *((PetscScalar**)xin->data);
60: #else
61: VecGetArrayRead(xin,(const PetscScalar**)&xv);
62: #endif
64: if (xin != yin) {VecGetArray(yin,&yv);}
65: else yv = xv;
67: if (!(mode & SCATTER_LOCAL)) {
68: if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv & !to->use_window) {
69: /* post receives since they were not previously posted */
70: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
71: }
73: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
74: if (to->use_alltoallw && addv == INSERT_VALUES) {
75: MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,PetscObjectComm((PetscObject)ctx));
76: } else
77: #endif
78: if (ctx->packtogether || to->use_alltoallv || to->use_window) {
79: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
80: PETSCMAP1(Pack)(sstarts[nsends],indices,xv,svalues,bs);
81: if (to->use_alltoallv) {
82: MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
83: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
84: } else if (to->use_window) {
85: PetscInt cnt;
87: MPI_Win_fence(0,from->window);
88: for (i=0; i<nsends; i++) {
89: cnt = bs*(to->starts[i+1]-to->starts[i]);
90: MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
91: }
92: #endif
93: } else if (nsends) {
94: MPI_Startall_isend(to->starts[to->n],nsends,swaits);
95: }
96: } else {
97: /* this version packs and sends one at a time */
98: for (i=0; i<nsends; i++) {
99: PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
100: MPI_Start_isend(sstarts[i+1]-sstarts[i],swaits+i);
101: }
102: }
104: if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
105: /* post receives since they were not previously posted */
106: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
107: }
108: }
110: /* take care of local scatters */
111: if (to->local.n) {
112: if (to->local.is_copy && addv == INSERT_VALUES) {
113: if (yv != xv || from->local.copy_start != to->local.copy_start) {
114: PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
115: }
116: } else {
117: if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
118: /* only copy entries that do not share identical memory locations */
119: PETSCMAP1(Scatter)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
120: } else {
121: PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
122: }
123: }
124: }
125: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
126: if (xin != yin) {VecRestoreArray(yin,&yv);}
127: return(0);
128: }
130: /* --------------------------------------------------------------------------------------*/
134: PetscErrorCode PETSCMAP1(VecScatterEnd)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
135: {
136: VecScatter_MPI_General *to,*from;
137: PetscScalar *rvalues,*yv;
138: PetscErrorCode ierr;
139: PetscInt nrecvs,nsends,*indices,count,*rstarts,bs;
140: PetscMPIInt imdex;
141: MPI_Request *rwaits,*swaits;
142: MPI_Status xrstatus,*rstatus,*sstatus;
145: if (mode & SCATTER_LOCAL) return(0);
146: VecGetArray(yin,&yv);
148: to = (VecScatter_MPI_General*)ctx->todata;
149: from = (VecScatter_MPI_General*)ctx->fromdata;
150: rwaits = from->requests;
151: swaits = to->requests;
152: sstatus = to->sstatus; /* sstatus and rstatus are always stored in to */
153: rstatus = to->rstatus;
154: if (mode & SCATTER_REVERSE) {
155: to = (VecScatter_MPI_General*)ctx->fromdata;
156: from = (VecScatter_MPI_General*)ctx->todata;
157: rwaits = from->rev_requests;
158: swaits = to->rev_requests;
159: }
160: bs = from->bs;
161: rvalues = from->values;
162: nrecvs = from->n;
163: nsends = to->n;
164: indices = from->indices;
165: rstarts = from->starts;
167: if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
168: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
169: if (to->use_window) {MPI_Win_fence(0,from->window);}
170: else
171: #endif
172: if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
173: PETSCMAP1(UnPack)(from->starts[from->n],from->values,indices,yv,addv,bs);
174: } else if (!to->use_alltoallw) {
175: /* unpack one at a time */
176: count = nrecvs;
177: while (count) {
178: if (ctx->reproduce) {
179: imdex = count - 1;
180: MPI_Wait(rwaits+imdex,&xrstatus);
181: } else {
182: MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
183: }
184: /* unpack receives into our local space */
185: PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
186: count--;
187: }
188: }
189: if (from->use_readyreceiver) {
190: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
191: MPI_Barrier(PetscObjectComm((PetscObject)ctx));
192: }
194: /* wait on sends */
195: if (nsends && !to->use_alltoallv && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
196: VecRestoreArray(yin,&yv);
197: return(0);
198: }
200: #undef PETSCMAP1_a
201: #undef PETSCMAP1_b
202: #undef PETSCMAP1
203: #undef BS