-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathOpenCV_gpu_ocl_test.cpp
More file actions
144 lines (119 loc) · 4.7 KB
/
OpenCV_gpu_ocl_test.cpp
File metadata and controls
144 lines (119 loc) · 4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#include <chrono>
#include <iostream>
#include <string>
#include <sstream>
#include <opencv2/core/core.hpp>
#include <opencv2/gpu/gpu.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ocl/ocl.hpp>
#pragma comment( lib, "opencv_core249.lib" )
#pragma comment( lib, "opencv_gpu249.lib" )
#pragma comment( lib, "opencv_highgui249.lib" )
#pragma comment( lib, "opencv_imgproc249.lib" )
#pragma comment( lib, "opencv_ocl249.lib" )
template<class _FUNC>
size_t benchmark(_FUNC func, unsigned int uTimes = 1)
{
auto tpNow = std::chrono::high_resolution_clock::now();
for (int i = 0; i < uTimes; ++ i )
func();
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - tpNow).count();
}
void WriteFile(const std::string& rFile, const cv::Mat& mResult)
{
static int iCounter = 0;
std::stringstream ss;
ss << rFile << "." << ++iCounter << ".tif";
std::string sOutFile = ss.str();
std::cout << "Save File " << benchmark([&mResult, &sOutFile](){
cv::imwrite(sOutFile, mResult);
}) << " milliseconds" << std::endl;
}
int main(int argc, char** argv)
{
cv::Mat mSource, mResult;
std::string sFile = argv[1], sOutFile;
unsigned int uTimes = std::atoi( argv[2] );
unsigned int uTimes2 = std::atoi(argv[3]);
std::cout << "Load File " << benchmark([&mSource, &sFile](){
mSource = cv::imread(sFile, CV_LOAD_IMAGE_UNCHANGED);
} ) << " milliseconds" << std::endl;
std::cout << "\nBuild rotate matrix" << std::endl;
cv::Point2f ptCenter(mSource.cols / 2,mSource.rows / 2);
double dAngle = 30;
cv::Mat mRotate = cv::getRotationMatrix2D(ptCenter, dAngle, 1.0);
std::cout << "\n============\nCPU Test" << std::endl;
cv::Mat cSource = mSource.clone();
std::cout << "Rotate in " << benchmark([&cSource, &mResult, &mRotate, &uTimes2](){
for (int i = 0; i < uTimes2; ++i)
{
cv::warpAffine(cSource, mResult, mRotate, cSource.size(), CV_INTER_CUBIC);
std::swap(cSource, mResult);
}
}, uTimes) << " milliseconds" << std::endl;
WriteFile( sFile + ".cpu",mResult );
cv::ocl::PlatformsInfo vPInfo;
cv::ocl::getOpenCLPlatforms(vPInfo);
std::cout << "\n============\nOpenCL Test : " << vPInfo.size() << " platforms" << std::endl;
for (const cv::ocl::PlatformInfo* pInfo : vPInfo)
{
std::cout << "\n>>>>>>>>>\n" << pInfo->platformName << " : " << pInfo->devices.size() << " devices" << std::endl;
for (const cv::ocl::DeviceInfo* pDevice : pInfo->devices)
{
std::cout << "\n-----------\n" << pDevice->deviceName << std::endl;
std::cout << " > Initial in " << benchmark([&pDevice](){
cv::ocl::setDevice(pDevice);
},1) << " milliseconds" << std::endl;
std::cout << " > dummy upload " << benchmark([](){
cv::Mat mM1(16, 16, CV_8UC1);
cv::ocl::oclMat mMat;
mMat.upload(mM1);
}, 1) << " milliseconds" << std::endl;
cv::ocl::oclMat mGpuSource, mGpuResult;
std::cout << " > Upload to GPU in " << benchmark([&mSource, &mGpuSource](){
mGpuSource.upload(mSource);
}, uTimes) << " milliseconds" << std::endl;
std::cout << " > Rotate in " << benchmark([&mGpuSource, &mGpuResult, &mRotate, &uTimes2](){
for (int i = 0; i < uTimes2; ++i)
{
cv::ocl::warpAffine(mGpuSource, mGpuResult, mRotate, mGpuSource.size(), CV_INTER_CUBIC);
mGpuResult.swap(mGpuSource);
}
}, uTimes) << " milliseconds" << std::endl;
std::cout << " > Download from GPU in " << benchmark([&mResult, &mGpuResult](){
mGpuResult.download(mResult);
}, uTimes) << " milliseconds" << std::endl;
WriteFile(sFile + ".ocl", mResult);
}
}
int iNum = cv::gpu::getCudaEnabledDeviceCount();
std::cout << "\n============\nCUDA Test : " << iNum << " devices" << std::endl;
for (int i = 0; i < iNum; ++i)
{
std::cout << " > Initial in " << benchmark([&i](){
cv::gpu::setDevice(i);
}, 1) << " milliseconds" << std::endl;
std::cout << " > dummy upload " << benchmark([](){
cv::Mat mM1(16, 16, CV_8UC1);
cv::gpu::GpuMat mMat;
mMat.upload(mM1);
}, 1) << " milliseconds" << std::endl;
cv::gpu::printCudaDeviceInfo(i);
cv::gpu::GpuMat mGpuSource, mGpuResult;
std::cout << " > Upload to GPU in " << benchmark([&mSource, &mGpuSource](){
mGpuSource.upload(mSource);
}, uTimes) << " milliseconds" << std::endl;
std::cout << " > Rotate in " << benchmark([&mGpuSource, &mGpuResult, &mRotate, &uTimes2](){
for (int i = 0; i < uTimes2; ++i)
{
cv::gpu::warpAffine(mGpuSource, mGpuResult, mRotate, mGpuSource.size(), CV_INTER_CUBIC);
mGpuResult.swap(mGpuSource);
}
}, uTimes) << " milliseconds" << std::endl;
std::cout << " > Download from GPU in " << benchmark([&mResult, &mGpuResult](){
mGpuResult.download(mResult);
}, uTimes) << " milliseconds" << std::endl;
WriteFile(sFile + ".cuda", mResult);
}
}