Actual source code: vecviennacl.cxx
petsc-3.13.1 2020-05-02
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include <vector>
12: #include "viennacl/linalg/inner_prod.hpp"
13: #include "viennacl/linalg/norm_1.hpp"
14: #include "viennacl/linalg/norm_2.hpp"
15: #include "viennacl/linalg/norm_inf.hpp"
17: #ifdef VIENNACL_WITH_OPENCL
18: #include "viennacl/ocl/backend.hpp"
19: #endif
22: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
23: {
28: *a = 0;
29: VecViennaCLCopyToGPU(v);
30: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
31: ViennaCLWaitForGPU();
32: return(0);
33: }
35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
36: {
41: v->offloadmask = PETSC_OFFLOAD_GPU;
43: PetscObjectStateIncrease((PetscObject)v);
44: return(0);
45: }
47: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
48: {
53: *a = 0;
54: VecViennaCLCopyToGPU(v);
55: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
56: ViennaCLWaitForGPU();
57: return(0);
58: }
60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
61: {
64: return(0);
65: }
67: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
68: {
73: *a = 0;
74: VecViennaCLAllocateCheck(v);
75: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
76: ViennaCLWaitForGPU();
77: return(0);
78: }
80: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
81: {
86: v->offloadmask = PETSC_OFFLOAD_GPU;
88: PetscObjectStateIncrease((PetscObject)v);
89: return(0);
90: }
94: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
95: {
96: PetscErrorCode ierr;
97: char string[20];
98: PetscBool flg,flg_cuda,flg_opencl,flg_openmp;
101: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102: PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,12,&flg);
103: if (flg) {
104: try {
105: PetscStrcasecmp(string,"cuda",&flg_cuda);
106: PetscStrcasecmp(string,"opencl",&flg_opencl);
107: PetscStrcasecmp(string,"openmp",&flg_openmp);
109: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111: #if defined(PETSC_HAVE_CUDA)
112: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113: #endif
114: #if defined(PETSC_HAVE_OPENCL)
115: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116: #endif
117: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118: } catch (std::exception const & ex) {
119: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120: }
121: }
123: #if defined(PETSC_HAVE_OPENCL)
124: /* ViennaCL OpenCL device type configuration */
125: PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,12,&flg);
126: if (flg) {
127: try {
128: PetscStrcasecmp(string,"cpu",&flg);
129: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
131: PetscStrcasecmp(string,"gpu",&flg);
132: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
134: PetscStrcasecmp(string,"accelerator",&flg);
135: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136: } catch (std::exception const & ex) {
137: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138: }
139: }
140: #endif
142: /* Print available backends */
143: PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
144: if (flg) {
145: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
146: #if defined(PETSC_HAVE_CUDA)
147: PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
148: #endif
149: #if defined(PETSC_HAVE_OPENCL)
150: PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
151: #endif
152: #if defined(PETSC_HAVE_OPENMP)
153: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
154: #else
155: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
156: #endif
157: PetscPrintf(PETSC_COMM_WORLD, "\n");
159: /* Print selected backends */
160: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: ");
161: #if defined(PETSC_HAVE_CUDA)
162: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163: PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
164: }
165: #endif
166: #if defined(PETSC_HAVE_OPENCL)
167: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168: PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
169: }
170: #endif
171: #if defined(PETSC_HAVE_OPENMP)
172: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
174: }
175: #else
176: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
178: }
179: #endif
180: PetscPrintf(PETSC_COMM_WORLD, "\n");
181: }
182: return(0);
183: }
185: /*
186: Allocates space for the vector array on the Host if it does not exist.
187: Does NOT change the PetscViennaCLFlag for the vector
188: Does NOT zero the ViennaCL array
189: */
190: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191: {
193: PetscScalar *array;
194: Vec_Seq *s;
195: PetscInt n = v->map->n;
198: s = (Vec_Seq*)v->data;
199: VecViennaCLAllocateCheck(v);
200: if (s->array == 0) {
201: PetscMalloc1(n,&array);
202: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
203: s->array = array;
204: s->array_allocated = array;
205: }
206: return(0);
207: }
210: /*
211: Allocates space for the vector array on the GPU if it does not exist.
212: Does NOT change the PetscViennaCLFlag for the vector
213: Does NOT zero the ViennaCL array
215: */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219: int rank;
222: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
223: // First allocate memory on the GPU if needed
224: if (!v->spptr) {
225: try {
226: v->spptr = new Vec_ViennaCL;
227: ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);
229: } catch(std::exception const & ex) {
230: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
231: }
232: }
233: return(0);
234: }
237: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
238: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
239: {
244: VecViennaCLAllocateCheck(v);
245: if (v->map->n > 0) {
246: if (v->offloadmask == PETSC_OFFLOAD_CPU) {
247: PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
248: try {
249: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
250: viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
251: ViennaCLWaitForGPU();
252: } catch(std::exception const & ex) {
253: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
254: }
255: PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));
256: PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
257: v->offloadmask = PETSC_OFFLOAD_BOTH;
258: }
259: }
260: return(0);
261: }
265: /*
266: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
267: */
268: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
269: {
274: VecViennaCLAllocateCheckHost(v);
275: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
276: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
277: try {
278: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
279: viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
280: ViennaCLWaitForGPU();
281: } catch(std::exception const & ex) {
282: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
283: }
284: PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));
285: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
286: v->offloadmask = PETSC_OFFLOAD_BOTH;
287: }
288: return(0);
289: }
292: /* Copy on CPU */
293: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
294: {
295: PetscScalar *ya;
296: const PetscScalar *xa;
297: PetscErrorCode ierr;
300: VecViennaCLAllocateCheckHost(xin);
301: VecViennaCLAllocateCheckHost(yin);
302: if (xin != yin) {
303: VecGetArrayRead(xin,&xa);
304: VecGetArray(yin,&ya);
305: PetscArraycpy(ya,xa,xin->map->n);
306: VecRestoreArrayRead(xin,&xa);
307: VecRestoreArray(yin,&ya);
308: }
309: return(0);
310: }
312: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
313: {
315: PetscInt n = xin->map->n,i;
316: PetscScalar *xx;
319: VecGetArray(xin,&xx);
320: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
321: VecRestoreArray(xin,&xx);
322: return(0);
323: }
325: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
326: {
327: Vec_Seq *vs = (Vec_Seq*)v->data;
331: PetscObjectSAWsViewOff(v);
332: #if defined(PETSC_USE_LOG)
333: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
334: #endif
335: if (vs->array_allocated) { PetscFree(vs->array_allocated); }
336: PetscFree(vs);
337: return(0);
338: }
340: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
341: {
342: Vec_Seq *v = (Vec_Seq*)vin->data;
345: v->array = v->unplacedarray;
346: v->unplacedarray = 0;
347: return(0);
348: }
351: /*MC
352: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
354: Options Database Keys:
355: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
357: Level: beginner
359: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
360: M*/
363: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
364: {
365: const ViennaCLVector *xgpu;
366: ViennaCLVector *ygpu;
367: PetscErrorCode ierr;
370: VecViennaCLGetArrayRead(xin,&xgpu);
371: VecViennaCLGetArray(yin,&ygpu);
372: PetscLogGpuTimeBegin();
373: try {
374: if (alpha != 0.0 && xin->map->n > 0) {
375: *ygpu = *xgpu + alpha * *ygpu;
376: PetscLogGpuFlops(2.0*yin->map->n);
377: } else {
378: *ygpu = *xgpu;
379: }
380: ViennaCLWaitForGPU();
381: } catch(std::exception const & ex) {
382: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
383: }
384: PetscLogGpuTimeEnd();
385: VecViennaCLRestoreArrayRead(xin,&xgpu);
386: VecViennaCLRestoreArray(yin,&ygpu);
387: return(0);
388: }
391: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
392: {
393: const ViennaCLVector *xgpu;
394: ViennaCLVector *ygpu;
395: PetscErrorCode ierr;
398: if (alpha != 0.0 && xin->map->n > 0) {
399: VecViennaCLGetArrayRead(xin,&xgpu);
400: VecViennaCLGetArray(yin,&ygpu);
401: PetscLogGpuTimeBegin();
402: try {
403: *ygpu += alpha * *xgpu;
404: ViennaCLWaitForGPU();
405: } catch(std::exception const & ex) {
406: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
407: }
408: PetscLogGpuTimeEnd();
409: VecViennaCLRestoreArrayRead(xin,&xgpu);
410: VecViennaCLRestoreArray(yin,&ygpu);
411: PetscLogGpuFlops(2.0*yin->map->n);
412: }
413: return(0);
414: }
417: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
418: {
419: const ViennaCLVector *xgpu,*ygpu;
420: ViennaCLVector *wgpu;
421: PetscErrorCode ierr;
424: if (xin->map->n > 0) {
425: VecViennaCLGetArrayRead(xin,&xgpu);
426: VecViennaCLGetArrayRead(yin,&ygpu);
427: VecViennaCLGetArrayWrite(win,&wgpu);
428: PetscLogGpuTimeBegin();
429: try {
430: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
431: ViennaCLWaitForGPU();
432: } catch(std::exception const & ex) {
433: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
434: }
435: PetscLogGpuTimeEnd();
436: PetscLogGpuFlops(win->map->n);
437: VecViennaCLRestoreArrayRead(xin,&xgpu);
438: VecViennaCLRestoreArrayRead(yin,&ygpu);
439: VecViennaCLRestoreArrayWrite(win,&wgpu);
440: }
441: return(0);
442: }
445: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
446: {
447: const ViennaCLVector *xgpu,*ygpu;
448: ViennaCLVector *wgpu;
449: PetscErrorCode ierr;
452: if (alpha == 0.0 && xin->map->n > 0) {
453: VecCopy_SeqViennaCL(yin,win);
454: } else {
455: VecViennaCLGetArrayRead(xin,&xgpu);
456: VecViennaCLGetArrayRead(yin,&ygpu);
457: VecViennaCLGetArrayWrite(win,&wgpu);
458: PetscLogGpuTimeBegin();
459: if (alpha == 1.0) {
460: try {
461: *wgpu = *ygpu + *xgpu;
462: } catch(std::exception const & ex) {
463: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
464: }
465: PetscLogGpuFlops(win->map->n);
466: } else if (alpha == -1.0) {
467: try {
468: *wgpu = *ygpu - *xgpu;
469: } catch(std::exception const & ex) {
470: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
471: }
472: PetscLogGpuFlops(win->map->n);
473: } else {
474: try {
475: *wgpu = *ygpu + alpha * *xgpu;
476: } catch(std::exception const & ex) {
477: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
478: }
479: PetscLogGpuFlops(2*win->map->n);
480: }
481: ViennaCLWaitForGPU();
482: PetscLogGpuTimeEnd();
483: VecViennaCLRestoreArrayRead(xin,&xgpu);
484: VecViennaCLRestoreArrayRead(yin,&ygpu);
485: VecViennaCLRestoreArrayWrite(win,&wgpu);
486: }
487: return(0);
488: }
491: /*
492: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
493: *
494: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
495: * hence there is an iterated application of these until the final result is obtained
496: */
497: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
498: {
500: PetscInt j;
503: for (j = 0; j < nv; ++j) {
504: if (j+1 < nv) {
505: VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
506: ++j;
507: } else {
508: VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
509: }
510: }
511: ViennaCLWaitForGPU();
512: return(0);
513: }
516: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
517: {
518: const ViennaCLVector *xgpu,*ygpu;
519: PetscErrorCode ierr;
522: if (xin->map->n > 0) {
523: VecViennaCLGetArrayRead(xin,&xgpu);
524: VecViennaCLGetArrayRead(yin,&ygpu);
525: PetscLogGpuTimeBegin();
526: try {
527: *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
528: } catch(std::exception const & ex) {
529: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
530: }
531: ViennaCLWaitForGPU();
532: PetscLogGpuTimeEnd();
533: if (xin->map->n >0) {
534: PetscLogGpuFlops(2.0*xin->map->n-1);
535: }
536: VecViennaCLRestoreArrayRead(xin,&xgpu);
537: VecViennaCLRestoreArrayRead(yin,&ygpu);
538: } else *z = 0.0;
539: return(0);
540: }
544: /*
545: * Operation z[j] = dot(x, y[j])
546: *
547: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
548: */
549: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
550: {
551: PetscErrorCode ierr;
552: PetscInt n = xin->map->n,i;
553: const ViennaCLVector *xgpu,*ygpu;
554: Vec *yyin = (Vec*)yin;
555: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
558: if (xin->map->n > 0) {
559: VecViennaCLGetArrayRead(xin,&xgpu);
560: for (i=0; i<nv; i++) {
561: VecViennaCLGetArrayRead(yyin[i],&ygpu);
562: ygpu_array[i] = ygpu;
563: }
564: PetscLogGpuTimeBegin();
565: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
566: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
567: viennacl::copy(result.begin(), result.end(), z);
568: for (i=0; i<nv; i++) {
569: VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
570: }
571: ViennaCLWaitForGPU();
572: PetscLogGpuTimeEnd();
573: VecViennaCLRestoreArrayRead(xin,&xgpu);
574: PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));
575: } else {
576: for (i=0; i<nv; i++) z[i] = 0.0;
577: }
578: return(0);
579: }
581: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
582: {
586: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
587: VecMDot_SeqViennaCL(xin,nv,yin,z);
588: ViennaCLWaitForGPU();
589: return(0);
590: }
593: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
594: {
595: ViennaCLVector *xgpu;
599: if (xin->map->n > 0) {
600: VecViennaCLGetArrayWrite(xin,&xgpu);
601: PetscLogGpuTimeBegin();
602: try {
603: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
604: ViennaCLWaitForGPU();
605: } catch(std::exception const & ex) {
606: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
607: }
608: PetscLogGpuTimeEnd();
609: VecViennaCLRestoreArrayWrite(xin,&xgpu);
610: }
611: return(0);
612: }
614: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
615: {
616: ViennaCLVector *xgpu;
620: if (alpha == 0.0 && xin->map->n > 0) {
621: VecSet_SeqViennaCL(xin,alpha);
622: PetscLogGpuFlops(xin->map->n);
623: } else if (alpha != 1.0 && xin->map->n > 0) {
624: VecViennaCLGetArray(xin,&xgpu);
625: PetscLogGpuTimeBegin();
626: try {
627: *xgpu *= alpha;
628: ViennaCLWaitForGPU();
629: } catch(std::exception const & ex) {
630: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
631: }
632: PetscLogGpuTimeEnd();
633: VecViennaCLRestoreArray(xin,&xgpu);
634: PetscLogGpuFlops(xin->map->n);
635: }
636: return(0);
637: }
640: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
641: {
645: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
646: VecDot_SeqViennaCL(xin, yin, z);
647: ViennaCLWaitForGPU();
648: return(0);
649: }
652: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
653: {
654: const ViennaCLVector *xgpu;
655: ViennaCLVector *ygpu;
656: PetscErrorCode ierr;
659: if (xin != yin && xin->map->n > 0) {
660: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
661: VecViennaCLGetArrayRead(xin,&xgpu);
662: VecViennaCLGetArrayWrite(yin,&ygpu);
663: PetscLogGpuTimeBegin();
664: try {
665: *ygpu = *xgpu;
666: ViennaCLWaitForGPU();
667: } catch(std::exception const & ex) {
668: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
669: }
670: PetscLogGpuTimeEnd();
671: VecViennaCLRestoreArrayRead(xin,&xgpu);
672: VecViennaCLRestoreArrayWrite(yin,&ygpu);
674: } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
675: /* copy in CPU if we are on the CPU*/
676: VecCopy_SeqViennaCL_Private(xin,yin);
677: ViennaCLWaitForGPU();
678: } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
679: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
680: if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
681: /* copy in CPU */
682: VecCopy_SeqViennaCL_Private(xin,yin);
683: ViennaCLWaitForGPU();
684: } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
685: /* copy in GPU */
686: VecViennaCLGetArrayRead(xin,&xgpu);
687: VecViennaCLGetArrayWrite(yin,&ygpu);
688: PetscLogGpuTimeBegin();
689: try {
690: *ygpu = *xgpu;
691: ViennaCLWaitForGPU();
692: } catch(std::exception const & ex) {
693: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
694: }
695: PetscLogGpuTimeEnd();
696: VecViennaCLRestoreArrayRead(xin,&xgpu);
697: VecViennaCLRestoreArrayWrite(yin,&ygpu);
698: } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
699: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
700: default to copy in GPU (this is an arbitrary choice) */
701: VecViennaCLGetArrayRead(xin,&xgpu);
702: VecViennaCLGetArrayWrite(yin,&ygpu);
703: PetscLogGpuTimeBegin();
704: try {
705: *ygpu = *xgpu;
706: ViennaCLWaitForGPU();
707: } catch(std::exception const & ex) {
708: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
709: }
710: PetscLogGpuTimeEnd();
711: VecViennaCLRestoreArrayRead(xin,&xgpu);
712: VecViennaCLRestoreArrayWrite(yin,&ygpu);
713: } else {
714: VecCopy_SeqViennaCL_Private(xin,yin);
715: ViennaCLWaitForGPU();
716: }
717: }
718: }
719: return(0);
720: }
723: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
724: {
726: ViennaCLVector *xgpu,*ygpu;
729: if (xin != yin && xin->map->n > 0) {
730: VecViennaCLGetArray(xin,&xgpu);
731: VecViennaCLGetArray(yin,&ygpu);
732: PetscLogGpuTimeBegin();
733: try {
734: viennacl::swap(*xgpu, *ygpu);
735: ViennaCLWaitForGPU();
736: } catch(std::exception const & ex) {
737: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
738: }
739: PetscLogGpuTimeEnd();
740: VecViennaCLRestoreArray(xin,&xgpu);
741: VecViennaCLRestoreArray(yin,&ygpu);
742: }
743: return(0);
744: }
747: // y = alpha * x + beta * y
748: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
749: {
750: PetscErrorCode ierr;
751: PetscScalar a = alpha,b = beta;
752: const ViennaCLVector *xgpu;
753: ViennaCLVector *ygpu;
756: if (a == 0.0 && xin->map->n > 0) {
757: VecScale_SeqViennaCL(yin,beta);
758: } else if (b == 1.0 && xin->map->n > 0) {
759: VecAXPY_SeqViennaCL(yin,alpha,xin);
760: } else if (a == 1.0 && xin->map->n > 0) {
761: VecAYPX_SeqViennaCL(yin,beta,xin);
762: } else if (b == 0.0 && xin->map->n > 0) {
763: VecViennaCLGetArrayRead(xin,&xgpu);
764: VecViennaCLGetArray(yin,&ygpu);
765: PetscLogGpuTimeBegin();
766: try {
767: *ygpu = *xgpu * alpha;
768: ViennaCLWaitForGPU();
769: } catch(std::exception const & ex) {
770: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
771: }
772: PetscLogGpuTimeEnd();
773: PetscLogGpuFlops(xin->map->n);
774: VecViennaCLRestoreArrayRead(xin,&xgpu);
775: VecViennaCLRestoreArray(yin,&ygpu);
776: } else if (xin->map->n > 0) {
777: VecViennaCLGetArrayRead(xin,&xgpu);
778: VecViennaCLGetArray(yin,&ygpu);
779: PetscLogGpuTimeBegin();
780: try {
781: *ygpu = *xgpu * alpha + *ygpu * beta;
782: ViennaCLWaitForGPU();
783: } catch(std::exception const & ex) {
784: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
785: }
786: PetscLogGpuTimeEnd();
787: VecViennaCLRestoreArrayRead(xin,&xgpu);
788: VecViennaCLRestoreArray(yin,&ygpu);
789: PetscLogGpuFlops(3.0*xin->map->n);
790: }
791: return(0);
792: }
795: /* operation z = alpha * x + beta *y + gamma *z*/
796: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
797: {
798: PetscErrorCode ierr;
799: PetscInt n = zin->map->n;
800: const ViennaCLVector *xgpu,*ygpu;
801: ViennaCLVector *zgpu;
804: VecViennaCLGetArrayRead(xin,&xgpu);
805: VecViennaCLGetArrayRead(yin,&ygpu);
806: VecViennaCLGetArray(zin,&zgpu);
807: if (alpha == 0.0 && xin->map->n > 0) {
808: PetscLogGpuTimeBegin();
809: try {
810: if (beta == 0.0) {
811: *zgpu = gamma * *zgpu;
812: ViennaCLWaitForGPU();
813: PetscLogGpuFlops(1.0*n);
814: } else if (gamma == 0.0) {
815: *zgpu = beta * *ygpu;
816: ViennaCLWaitForGPU();
817: PetscLogGpuFlops(1.0*n);
818: } else {
819: *zgpu = beta * *ygpu + gamma * *zgpu;
820: ViennaCLWaitForGPU();
821: PetscLogGpuFlops(3.0*n);
822: }
823: } catch(std::exception const & ex) {
824: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
825: }
826: PetscLogGpuTimeEnd();
827: PetscLogGpuFlops(3.0*n);
828: } else if (beta == 0.0 && xin->map->n > 0) {
829: PetscLogGpuTimeBegin();
830: try {
831: if (gamma == 0.0) {
832: *zgpu = alpha * *xgpu;
833: ViennaCLWaitForGPU();
834: PetscLogGpuFlops(1.0*n);
835: } else {
836: *zgpu = alpha * *xgpu + gamma * *zgpu;
837: ViennaCLWaitForGPU();
838: PetscLogGpuFlops(3.0*n);
839: }
840: } catch(std::exception const & ex) {
841: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
842: }
843: PetscLogGpuTimeEnd();
844: } else if (gamma == 0.0 && xin->map->n > 0) {
845: PetscLogGpuTimeBegin();
846: try {
847: *zgpu = alpha * *xgpu + beta * *ygpu;
848: ViennaCLWaitForGPU();
849: } catch(std::exception const & ex) {
850: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
851: }
852: PetscLogGpuTimeEnd();
853: PetscLogGpuFlops(3.0*n);
854: } else if (xin->map->n > 0) {
855: PetscLogGpuTimeBegin();
856: try {
857: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
858: if (gamma != 1.0)
859: *zgpu *= gamma;
860: *zgpu += alpha * *xgpu + beta * *ygpu;
861: ViennaCLWaitForGPU();
862: } catch(std::exception const & ex) {
863: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
864: }
865: PetscLogGpuTimeEnd();
866: VecViennaCLRestoreArray(zin,&zgpu);
867: VecViennaCLRestoreArrayRead(xin,&xgpu);
868: VecViennaCLRestoreArrayRead(yin,&ygpu);
869: PetscLogGpuFlops(5.0*n);
870: }
871: return(0);
872: }
874: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
875: {
876: PetscErrorCode ierr;
877: PetscInt n = win->map->n;
878: const ViennaCLVector *xgpu,*ygpu;
879: ViennaCLVector *wgpu;
882: if (xin->map->n > 0) {
883: VecViennaCLGetArrayRead(xin,&xgpu);
884: VecViennaCLGetArrayRead(yin,&ygpu);
885: VecViennaCLGetArray(win,&wgpu);
886: PetscLogGpuTimeBegin();
887: try {
888: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
889: ViennaCLWaitForGPU();
890: } catch(std::exception const & ex) {
891: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
892: }
893: PetscLogGpuTimeEnd();
894: VecViennaCLRestoreArrayRead(xin,&xgpu);
895: VecViennaCLRestoreArrayRead(yin,&ygpu);
896: VecViennaCLRestoreArray(win,&wgpu);
897: PetscLogGpuFlops(n);
898: }
899: return(0);
900: }
903: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
904: {
905: PetscErrorCode ierr;
906: PetscInt n = xin->map->n;
907: PetscBLASInt bn;
908: const ViennaCLVector *xgpu;
911: if (xin->map->n > 0) {
912: PetscBLASIntCast(n,&bn);
913: VecViennaCLGetArrayRead(xin,&xgpu);
914: if (type == NORM_2 || type == NORM_FROBENIUS) {
915: PetscLogGpuTimeBegin();
916: try {
917: *z = viennacl::linalg::norm_2(*xgpu);
918: ViennaCLWaitForGPU();
919: } catch(std::exception const & ex) {
920: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
921: }
922: PetscLogGpuTimeEnd();
923: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
924: } else if (type == NORM_INFINITY) {
925: VecViennaCLGetArrayRead(xin,&xgpu);
926: PetscLogGpuTimeBegin();
927: try {
928: *z = viennacl::linalg::norm_inf(*xgpu);
929: ViennaCLWaitForGPU();
930: } catch(std::exception const & ex) {
931: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
932: }
933: PetscLogGpuTimeEnd();
934: VecViennaCLRestoreArrayRead(xin,&xgpu);
935: } else if (type == NORM_1) {
936: PetscLogGpuTimeBegin();
937: try {
938: *z = viennacl::linalg::norm_1(*xgpu);
939: ViennaCLWaitForGPU();
940: } catch(std::exception const & ex) {
941: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
942: }
943: PetscLogGpuTimeEnd();
944: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
945: } else if (type == NORM_1_AND_2) {
946: PetscLogGpuTimeBegin();
947: try {
948: *z = viennacl::linalg::norm_1(*xgpu);
949: *(z+1) = viennacl::linalg::norm_2(*xgpu);
950: ViennaCLWaitForGPU();
951: } catch(std::exception const & ex) {
952: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
953: }
954: PetscLogGpuTimeEnd();
955: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
956: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
957: }
958: VecViennaCLRestoreArrayRead(xin,&xgpu);
959: } else if (type == NORM_1_AND_2) {
960: *z = 0.0;
961: *(z+1) = 0.0;
962: } else *z = 0.0;
963: return(0);
964: }
967: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
968: {
972: VecSetRandom_SeqViennaCL_Private(xin,r);
973: xin->offloadmask = PETSC_OFFLOAD_CPU;
974: return(0);
975: }
977: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
978: {
983: VecViennaCLCopyFromGPU(vin);
984: VecResetArray_SeqViennaCL_Private(vin);
985: vin->offloadmask = PETSC_OFFLOAD_CPU;
986: return(0);
987: }
989: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
990: {
995: VecViennaCLCopyFromGPU(vin);
996: VecPlaceArray_Seq(vin,a);
997: vin->offloadmask = PETSC_OFFLOAD_CPU;
998: return(0);
999: }
1001: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
1002: {
1007: VecViennaCLCopyFromGPU(vin);
1008: VecReplaceArray_Seq(vin,a);
1009: vin->offloadmask = PETSC_OFFLOAD_CPU;
1010: return(0);
1011: }
1014: /*@C
1015: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
1017: Collective
1019: Input Parameter:
1020: + comm - the communicator, should be PETSC_COMM_SELF
1021: - n - the vector length
1023: Output Parameter:
1024: . V - the vector
1026: Notes:
1027: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1028: same type as an existing vector.
1030: Level: intermediate
1032: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1033: @*/
1034: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
1035: {
1039: VecCreate(comm,v);
1040: VecSetSizes(*v,n,n);
1041: VecSetType(*v,VECSEQVIENNACL);
1042: return(0);
1043: }
1046: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1047: *
1048: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1049: */
1050: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1051: {
1052: PetscErrorCode ierr;
1055: VecDot_SeqViennaCL(s,t,dp);
1056: VecNorm_SeqViennaCL(t,NORM_2,nm);
1057: *nm *= *nm; //squared norm required
1058: return(0);
1059: }
1061: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1062: {
1066: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1067: PetscLayoutReference(win->map,&(*V)->map);
1068: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1069: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1070: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1071: return(0);
1072: }
1074: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1075: {
1079: try {
1080: if (v->spptr) {
1081: delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
1082: delete (Vec_ViennaCL*) v->spptr;
1083: }
1084: } catch(char *ex) {
1085: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1086: }
1087: VecDestroy_SeqViennaCL_Private(v);
1088: return(0);
1089: }
1091: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1092: {
1096: V->boundtocpu = flg;
1097: if (flg) {
1098: VecViennaCLCopyFromGPU(V);
1099: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1100: V->ops->dot = VecDot_Seq;
1101: V->ops->norm = VecNorm_Seq;
1102: V->ops->tdot = VecTDot_Seq;
1103: V->ops->scale = VecScale_Seq;
1104: V->ops->copy = VecCopy_Seq;
1105: V->ops->set = VecSet_Seq;
1106: V->ops->swap = VecSwap_Seq;
1107: V->ops->axpy = VecAXPY_Seq;
1108: V->ops->axpby = VecAXPBY_Seq;
1109: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1110: V->ops->pointwisemult = VecPointwiseMult_Seq;
1111: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1112: V->ops->setrandom = VecSetRandom_Seq;
1113: V->ops->dot_local = VecDot_Seq;
1114: V->ops->tdot_local = VecTDot_Seq;
1115: V->ops->norm_local = VecNorm_Seq;
1116: V->ops->mdot_local = VecMDot_Seq;
1117: V->ops->mtdot_local = VecMTDot_Seq;
1118: V->ops->maxpy = VecMAXPY_Seq;
1119: V->ops->mdot = VecMDot_Seq;
1120: V->ops->mtdot = VecMTDot_Seq;
1121: V->ops->aypx = VecAYPX_Seq;
1122: V->ops->waxpy = VecWAXPY_Seq;
1123: V->ops->dotnorm2 = NULL;
1124: V->ops->placearray = VecPlaceArray_Seq;
1125: V->ops->replacearray = VecReplaceArray_Seq;
1126: V->ops->resetarray = VecResetArray_Seq;
1127: V->ops->duplicate = VecDuplicate_Seq;
1128: } else {
1129: V->ops->dot = VecDot_SeqViennaCL;
1130: V->ops->norm = VecNorm_SeqViennaCL;
1131: V->ops->tdot = VecTDot_SeqViennaCL;
1132: V->ops->scale = VecScale_SeqViennaCL;
1133: V->ops->copy = VecCopy_SeqViennaCL;
1134: V->ops->set = VecSet_SeqViennaCL;
1135: V->ops->swap = VecSwap_SeqViennaCL;
1136: V->ops->axpy = VecAXPY_SeqViennaCL;
1137: V->ops->axpby = VecAXPBY_SeqViennaCL;
1138: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1139: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1140: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1141: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1142: V->ops->dot_local = VecDot_SeqViennaCL;
1143: V->ops->tdot_local = VecTDot_SeqViennaCL;
1144: V->ops->norm_local = VecNorm_SeqViennaCL;
1145: V->ops->mdot_local = VecMDot_SeqViennaCL;
1146: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1147: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1148: V->ops->mdot = VecMDot_SeqViennaCL;
1149: V->ops->mtdot = VecMTDot_SeqViennaCL;
1150: V->ops->aypx = VecAYPX_SeqViennaCL;
1151: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1152: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1153: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1154: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1155: V->ops->resetarray = VecResetArray_SeqViennaCL;
1156: V->ops->destroy = VecDestroy_SeqViennaCL;
1157: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1158: }
1159: return(0);
1160: }
1162: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1163: {
1165: PetscMPIInt size;
1168: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1169: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1170: VecCreate_Seq_Private(V,0);
1171: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1173: VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1174: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1176: VecViennaCLAllocateCheck(V);
1177: VecViennaCLAllocateCheckHost(V);
1178: VecSet(V,0.0);
1179: VecSet_Seq(V,0.0);
1180: V->offloadmask = PETSC_OFFLOAD_BOTH;
1181: return(0);
1182: }