-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathLinearAlgebraOperation.cpp
More file actions
105 lines (86 loc) · 2.69 KB
/
LinearAlgebraOperation.cpp
File metadata and controls
105 lines (86 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#include "LinearAlgebraOperation.h"
using namespace std;
LinearAlgebraOperation::LinearAlgebraOperation(ComputePlatform& Platform) :
ThePlatform(Platform),
ReduceOperation(Platform)
{
LinearAlgebraProgram = ThePlatform.CreateProgram("kernels/LinearAlgebra.cl");
}
cl::Event LinearAlgebraOperation::MatVecMul(cl::CommandQueue Queue, cl::Buffer MatrixBuffer, cl::Buffer VectorBuffer, cl::Buffer ResultBuffer, int Count)
{
auto Device = Queue.getInfo<CL_QUEUE_DEVICE>();
auto DeviceType = Device.getInfo<CL_DEVICE_TYPE>();
DEBUG_OPERATION;
switch (DeviceType)
{
case CL_DEVICE_TYPE_CPU:
return MatVecMulCPU(Queue, MatrixBuffer, VectorBuffer, ResultBuffer, Count);
case CL_DEVICE_TYPE_GPU:
return MatVecMulGPU(Queue, MatrixBuffer, VectorBuffer, ResultBuffer, Count);
default:
break;
}
return cl::Event();
}
double LinearAlgebraOperation::DotProduct(cl::CommandQueue Queue, cl::Buffer A, cl::Buffer B, int Count, cl::Buffer CacheBuffer)
{
DEBUG_OPERATION;
if (CacheBuffer.getInfo<CL_MEM_SIZE>() != Count * sizeof(double))
{
throw runtime_error("CacheBuffer must have at least " + to_string(Count * sizeof(double)) + " bytes");
}
auto VecMulKernel = cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int>(LinearAlgebraProgram, "VecMul");
auto VecMulEvent = VecMulKernel(cl::EnqueueArgs(Queue, cl::NDRange(Count)), A, B, CacheBuffer, Count);
return ReduceOperation.ReduceDouble(Queue, CacheBuffer, Count, ReductionOp::Sum);
}
cl::Event LinearAlgebraOperation::MatVecMulCPU(cl::CommandQueue Queue, cl::Buffer MatrixBuffer, cl::Buffer VectorBuffer, cl::Buffer ResultBuffer, int Count)
{
auto MatVecMulKernel = cl::make_kernel<
cl::Buffer,
cl::Buffer,
int,
cl::Buffer
>(LinearAlgebraProgram, "MatVecMulCPUKernel");
return MatVecMulKernel(
cl::EnqueueArgs(Queue, cl::NDRange(Count)),
MatrixBuffer,
VectorBuffer,
Count,
ResultBuffer
);
}
cl::Event LinearAlgebraOperation::MatVecMulGPU(cl::CommandQueue Queue, cl::Buffer MatrixBuffer, cl::Buffer VectorBuffer, cl::Buffer ResultBuffer, int Count)
{
auto MatVecMulKernel = cl::make_kernel<
cl::Buffer,
cl::Buffer,
cl::Buffer,
cl::LocalSpaceArg,
int,
int
>(LinearAlgebraProgram, "MatVecMulGPUKernel");
const int PThreads = 8;
int WorkGroupCount = Count;
while (WorkGroupCount % PThreads != 0)
{
WorkGroupCount++;
}
int WorkItemCount = 64;
while (WorkGroupCount % WorkItemCount != 0)
{
WorkItemCount >>= 1;
}
return MatVecMulKernel(
cl::EnqueueArgs(
Queue,
cl::NDRange(WorkGroupCount, PThreads),
cl::NDRange(WorkItemCount, PThreads)
),
MatrixBuffer,
VectorBuffer,
ResultBuffer,
cl::Local(WorkItemCount * PThreads * sizeof(double)),
Count,
Count
);
}