Actual source code: cuspvecimpl.h

petsc-3.7.4 2016-10-02
Report Typos and Errors
4: #if defined(__CUDACC__) 6: #include <petsccusp.h> 7: #include <petsc/private/vecimpl.h> 9: #include <algorithm> 10: #include <vector> 11: #include <string> 13: #if defined(CUSP_VERSION) && CUSP_VERSION >= 500 14: #include <cusp/blas/blas.h> 15: #else 16: #include <cusp/blas.h> 17: #endif 18: #include <thrust/host_vector.h> 19: #include <thrust/device_vector.h> 20: #include <thrust/iterator/constant_iterator.h> 21: #include <thrust/transform.h> 22: #include <thrust/iterator/permutation_iterator.h> 24: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory> 25: #define CUSPARRAYCPU cusp::array1d<PetscScalar,cusp::host_memory> 26: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory> 27: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory> 28: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err) 29: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0]) 30: #define WaitForGPU() PetscCUSPSynchronize ? cudaThreadSynchronize() : 0 32: struct Vec_CUSP { 33: CUSPARRAY *GPUarray; /* this always holds the GPU data */ 34: cudaStream_t stream; /* A stream for doing asynchronous data transfers */ 35: PetscBool hostDataRegisteredAsPageLocked; 36: }; 38: #endif 41: #include <cuda_runtime.h> 43: PETSC_INTERN PetscErrorCode VecDotNorm2_SeqCUSP(Vec,Vec,PetscScalar*, PetscScalar*); 44: PETSC_INTERN PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec,Vec,Vec); 45: PETSC_INTERN PetscErrorCode VecWAXPY_SeqCUSP(Vec,PetscScalar,Vec,Vec); 46: PETSC_INTERN PetscErrorCode VecMDot_SeqCUSP(Vec,PetscInt,const Vec[],PetscScalar*); 47: PETSC_EXTERN PetscErrorCode VecSet_SeqCUSP(Vec,PetscScalar); 48: PETSC_INTERN PetscErrorCode VecMAXPY_SeqCUSP(Vec,PetscInt,const PetscScalar*,Vec*); 49: PETSC_INTERN PetscErrorCode VecAXPBYPCZ_SeqCUSP(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec); 50: PETSC_INTERN PetscErrorCode VecPointwiseMult_SeqCUSP(Vec,Vec,Vec); 51: PETSC_INTERN PetscErrorCode VecPlaceArray_SeqCUSP(Vec,const PetscScalar*); 52: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUSP(Vec); 53: PETSC_INTERN PetscErrorCode VecReplaceArray_SeqCUSP(Vec,const PetscScalar*); 54: PETSC_INTERN PetscErrorCode VecDot_SeqCUSP(Vec,Vec,PetscScalar*); 55: PETSC_INTERN PetscErrorCode VecTDot_SeqCUSP(Vec,Vec,PetscScalar*); 56: PETSC_INTERN PetscErrorCode VecScale_SeqCUSP(Vec,PetscScalar); 57: PETSC_EXTERN PetscErrorCode VecCopy_SeqCUSP(Vec,Vec); 58: PETSC_INTERN PetscErrorCode VecSwap_SeqCUSP(Vec,Vec); 59: PETSC_INTERN PetscErrorCode VecAXPY_SeqCUSP(Vec,PetscScalar,Vec); 60: PETSC_INTERN PetscErrorCode VecAXPBY_SeqCUSP(Vec,PetscScalar,PetscScalar,Vec); 61: PETSC_INTERN PetscErrorCode VecDuplicate_SeqCUSP(Vec,Vec*); 62: PETSC_INTERN PetscErrorCode VecConjugate_SeqCUSP(Vec xin); 63: PETSC_INTERN PetscErrorCode VecNorm_SeqCUSP(Vec,NormType,PetscReal*); 64: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU(Vec); 65: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck(Vec); 66: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUSP(Vec); 67: PETSC_EXTERN PetscErrorCode VecView_Seq(Vec,PetscViewer); 68: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUSP(Vec); 69: PETSC_INTERN PetscErrorCode VecAYPX_SeqCUSP(Vec,PetscScalar,Vec); 70: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUSP(Vec,PetscRandom); 71: PETSC_INTERN PetscErrorCode VecGetLocalVector_SeqCUSP(Vec,Vec); 72: PETSC_INTERN PetscErrorCode VecRestoreLocalVector_SeqCUSP(Vec,Vec); 73: PETSC_INTERN PetscErrorCode VecCopy_SeqCUSP_Private(Vec xin,Vec yin); 74: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUSP_Private(Vec xin,PetscRandom r); 75: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUSP_Private(Vec v); 76: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUSP_Private(Vec vin); 77: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU_Public(Vec); 78: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck_Public(Vec); 79: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPUSome(Vec v, PetscCUSPIndices ci); 80: PETSC_INTERN PetscErrorCode VecCUSPCopyFromGPUSome(Vec v, PetscCUSPIndices ci); 83: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUSPIndices*); 84: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*); 85: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*); 86: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode); 88: typedef enum {VEC_SCATTER_CUSP_STOS, VEC_SCATTER_CUSP_PTOP} VecCUSPScatterType; 89: typedef enum {VEC_SCATTER_CUSP_GENERAL, VEC_SCATTER_CUSP_STRIDED} VecCUSPSequentialScatterMode; 91: struct _p_VecScatterCUSPIndices_PtoP { 92: PetscInt ns; 93: PetscInt sendLowestIndex; 94: PetscInt nr; 95: PetscInt recvLowestIndex; 96: }; 98: struct _p_VecScatterCUSPIndices_StoS { 99: /* from indices data */ 100: PetscInt *fslots; 101: PetscInt fromFirst; 102: PetscInt fromStep; 103: VecCUSPSequentialScatterMode fromMode; 105: /* to indices data */ 106: PetscInt *tslots; 107: PetscInt toFirst; 108: PetscInt toStep; 109: VecCUSPSequentialScatterMode toMode; 111: PetscInt n; 112: PetscInt MAX_BLOCKS; 113: PetscInt MAX_CORESIDENT_THREADS; 114: cudaStream_t stream; 115: }; 117: struct _p_PetscCUSPIndices { 118: void * scatter; 119: VecCUSPScatterType scatterType; 120: }; 122: #endif