OpenCV環(huán)境下CUDA編程示例_第1頁(yè)
OpenCV環(huán)境下CUDA編程示例_第2頁(yè)
OpenCV環(huán)境下CUDA編程示例_第3頁(yè)
OpenCV環(huán)境下CUDA編程示例_第4頁(yè)
OpenCV環(huán)境下CUDA編程示例_第5頁(yè)
已閱讀5頁(yè),還剩16頁(yè)未讀, 繼續(xù)免費(fèi)閱讀

下載本文檔

版權(quán)說(shuō)明:本文檔由用戶提供并上傳,收益歸屬內(nèi)容提供方,若內(nèi)容存在侵權(quán),請(qǐng)進(jìn)行舉報(bào)或認(rèn)領(lǐng)

文檔簡(jiǎn)介

1、OpenCV 環(huán)境下 CUDA 編程示例在 CUDA 平臺(tái)上對(duì)圖像算法進(jìn)行并行加速是目前并行計(jì)算方面比較簡(jiǎn)單易行的一種方式,而同時(shí)利用 OpenCV 提供的一些庫(kù)函數(shù)的話,那么事情將會(huì)變得更加easy。以下是我個(gè)人采用的一種模板,這個(gè)模板是從 OpenCV 里的算法CUDA 源碼挖掘出來(lái)的,我感覺(jué)這個(gè)用起來(lái)比較傲方便,所 以經(jīng)常采用。首先大牛們寫(xiě)的源碼都很魯棒,考慮的比較全 面(如大部分算法將 1,3,4 通道的圖像同時(shí)搞定) ,感覺(jué)還有個(gè)比較神奇的地方在于 CPU 端 GpuMat 和 GPU 端PtrStepSzb 的轉(zhuǎn)換,讓我欲罷不能,一個(gè)不太理想的地方在 于第一幀的初始化時(shí)間比較長(zhǎng),應(yīng)

2、該是 CPU 到 GPU 的數(shù)據(jù) 傳輸。代碼中有考慮流,但貌似沒(méi)有使用。我使用的是趙開(kāi)勇的 CUDA_VS_Wizard ,主函數(shù)還是用的cu 文件。以下代碼是對(duì) Vibe 背景建模算法的并行,背景建 模算法是目前接觸到易于并行的一類,如 GMM 等,而且加 速效果不錯(cuò),因?yàn)橐粋€(gè)線程執(zhí)行的數(shù)據(jù)就是對(duì)應(yīng)一個(gè)像素 點(diǎn)。代碼如下:sample.cucpp view plaincopy<spanstyle="font-size:14px;">/*sample.cu* * This is a example of the CUDA program.*#in

3、clude <stdio.h>#include#include<stdlib.h> #include <cutil_inline.h><iostream> #include <string> #include "opencv2/core/core.hpp" #include "opencv2/gpu/gpu.hpp" #include "opencv2/highgui/highgui.hpp" #

4、include "Vibe_M_kernel.cu" #include "Vibe_M.h" using namespacestd; using namespace cv; using namespace cv:gpu;enum Method FGD_STAT,MOG,MOG2,VIBE,GMG ; int main(int argc,const char* argv) cv:CommandLineParser cmd(argc,argv," c | camera | flase| use camera "" m |gmg)

5、 "" h | help | false| print help" f | file | 768x576.avi | input video file "method | vibe| method (fgd, mog, mog2, vibe,message ");<< endl;if (cmd.get<bool>("help")cout << "Usage : bgfg_segm options"cout <

6、< "Avaible options:"0;bool useCamera =cmd.get<bool>("camera");string file =cmd.get<string>("file");string method =cmd.get<string>("method");if (method != "fgd"&& method != "mog&quo

7、t; && method !="mog2" && method != "vibe" &&method != "gmg")cerr << "Incorrectmethod" << endl;return -1;Method m = method = "fgd" ? FGD_STAT : method ="mog" ? MOG :

8、 method = "mog2" ? MOG2 : method ="vibe" ? VIBE : GMG;VideoCapture cap;if(useCamera)cap.open(0);elsecap.open(file);if (!cap.isOpened()cerr << "can not open camera or video file"<< endl;return -1;Mat origin,frame;cap >> origin;cv

9、tColor(origin,frame,CV_BGR2GRAY);GpuMatd_frame(frame);Vibe_M vibe;GpuMat d_fgmask;Mat fgmask;Mat fgimg;Mat bgimg;case VIBE:break;switch (m) vibe.initialize(d_frame);namedWindow("image", WINDOW_NORMAL);namedWindow("foreground mask", WINDOW_NORMAL);for(;)cap >> origin

10、;ifbreak;(origin.empty()cvtColor(origin,frame,CV_BGR2GRAY);d_frame.upload(frame);/update the modelswitch (m)case VIBE:vibe(d_frame, d_fgmask);break;d_fgmask.download(fgmask);imshow("image",frame);imshow("foreground mask", fgmask);int key = waitKey(30);if (key = 27)break;else if(k

11、ey = ' ')cvWaitKey(0);exit(0);</span> Vibe_M.cppcpp view plaincopy<span style="font-size:14px;">#include "Vibe_M.h"voidnamespace cv namespace gpu namespace devicenamespace vibe_mloadConstants(int nbSamples, int reqMatches, int radius, intsubsam

12、plingFactor);void init_gpu(PtrStepSzbframe, int cn, PtrStepSzb samples, PtrStepSz<unsignedvoidint> randStates, cudaStream_t stream);update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask,PtrStepSzb samples, PtrStepSz<unsigned int> randStates,cudaStream_t stream); namespac

13、econst int defaultNbSamples = 20;const intdefaultReqMatches = 2;const int defaultRadius = 20;const int defaultSubsamplingFactor = 16; Vibe_M:Vibe_M(unsigned long rngSeed) : frameSize_(0, 0),rngSeed_(rngSeed) nbSamples = defaultNbSamples;reqMatches = defaultReqMatches;radius = defaultRadius;subsampli

14、ngFactor = defaultSubsamplingFactor; voidVibe_M:initialize(const GpuMat& firstFrame,Stream& s) using namespacecv:gpu:device:vibe_m;CV_Assert(firstFrame.type()/cudaStream_t= CV_8UC1 | firstFrame.type() = CV_8UC3 | firstFrame.type() = CV_8UC4);stream = StreamAccessor:getStream(s);loadC

15、onstants(nbSamples, reqMatches, radius,subsamplingFactor);frameSize_ = firstFrame.size();if (randStates_.size() != frameSize_)cv:RNG rng(rngSeed_);cv:Math_randStates(frameSize_, CV_8UC4);int ch =rng.fill(h_randStates, cv:RNG:UNIFORM, 0, 255);randStates_.upload(h_randStates);firstFrame.channels();int

16、 sample_ch = ch = 1 ? 1 : 4;samples_.create(nbSamples * frameSize_.height, frameSize_.width, CV_8UC(sample_ch);init_gpu(firstFrame, ch, samples_, randStates_, 0); voidVibe_M:operator()(const GpuMat& frame, GpuMat&fgmask, Stream& s) using namespacecv:gpu:device:vibe_m;CV_Asser

17、t(frame.depth() =CV_8U);int ch = frame.channels();intsample_ch = ch = 1 ? 1 : 4;if (frame.size() !=frameSize_ | sample_ch != samples_.channels()initialize(frame);fgmask.create(frameSize_,CV_8UC1);update_gpu(frame, ch, fgmask, samples_,randStates_, StreamAccessor:getStream(s); voidVibe_M:release() fr

18、ameSize_ = Size(0, 0);randStates_.release();samples_.release(); </span> Vibe_M.hcpp view plaincopy<span style="font-size:14px;">#ifndef _VIBE_M_H_ #defineVIBE_M_H#ifndef SKIP_INCLUDES #include<vector>#include <memory> #include<

19、iosfwd>#endif #include "opencv2/core/core.hpp"#include "opencv2/core/gpumat.hpp" #include"opencv2/gpu/gpu.hpp" #include"opencv2/imgproc/imgproc.hpp" #include"opencv2/objdetect/objdetect.hpp" #include"opencv2/features2d/features2d.hpp"

20、; using namespace std;using namespace cv; using namespace cv:gpu; classVibe_M public:/! the default constructorexplicit Vibe_M(unsigned long rngSeed = 1234567);/!re-initiaization methodvoid initialize(constGpuMat& firstFrame, Stream& stream =Stream:Null();/! the update operatorvoidGp

21、uMat randStates_;GpuMat samples_; ;/! releases alloperator()(const GpuMat& frame, GpuMat& fgmask,Stream& stream = Stream:Null();inner buffersvoid release();int nbSamples;/ number of samples per pixelint reqMatches;/ #_minint radius;/ RintsubsamplingFactor; / amount of random

22、subsamplingprivate:Size frameSize_;unsigned long rngSeed_;Stream& s) using namespace#endif</span> Vibe_M.cuhtml view plaincopy<span style="font-size:14px;">#include "Vibe_M.h" #include "opencv2/gpu/stream_accessor.hpp" namespace cvnam

23、espacevibe_m namespace gpu namespace device void loadConstants(int nbSamples,int reqMatches, int radius, int subsamplingFactor);void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples,PtrStepSz<unsigned int> randStates, cudaStream_t stream);void update_gpu(PtrStepSzb frame, int cn,

24、PtrStepSzb fgmask,PtrStepSzb samples, PtrStepSz<unsigned int> randStates,cudaStream_t stream); namespaceconst int defaultNbSamples = 20;const intdefaultReqMatches = 2;const int defaultRadius = 20;const int defaultSubsamplingFactor = 16; Vibe_M:Vibe_M(unsigned long rngSeed) : frameSize_

25、(0, 0),rngSeed_(rngSeed) nbSamples = defaultNbSamples;reqMatches = defaultReqMatches;radius = defaultRadius;subsamplingFactor = defaultSubsamplingFactor; voidVibe_M:initialize(const GpuMat& firstFrame,cv:gpu:device:vibe_m;CV_Assert(firstFrame.type()cudaStream_t stream= CV_8UC1 | firstFrame.t

26、ype() = CV_8UC3 | firstFrame.type() = CV_8UC4);= cv:gpu:StreamAccessor:getStream(s);loadConstants(nbSamples, reqMatches, radius,subsamplingFactor);frameSize_ = firstFrame.size();if (randStates_.size() != frameSize_)cv:RNG rng(rngSeed_);cv:Math_randStates(frameSize_, CV_8UC4);rng.fill(h_randStates, c

27、v:RNG:UNIFORM, 0, 255);randStates_.upload(h_randStates);int ch =firstFrame.channels();int sample_ch = ch = 1 ? 1 : 4;samples_.create(nbSamples * frameSize_.height,frameSize_.width, CV_8UC(sample_ch);init_gpu(firstFrame, ch, samples_, randStates_, stream); void Vibe_M:operator()(const GpuMat&

28、 frame,GpuMat& fgmask, Stream& s) usingnamespace cv:gpu:device:vibe_m;CV_Assert(frame.depth() = CV_8U);int ch =frame.channels();int sample_ch = ch = 1 ? 1 : 4;if (frame.size() != frameSize_ | sample_ch !=samples_.channels()initialize(frame);fgmask.create(frameSize_, CV_8UC1);update_g

29、pu(frame, ch, fgmask, samples_, randStates_,cv:gpu:StreamAccessor:getStream(s); voidVibe_M:release() frameSize_ = Size(0, 0);randStates_.release();samples_.release(); </span> Vibe_M_kernel.cucpp view plaincopy<spanstyle="font-size:14px;">#include"opencv2/gp

30、u/device/common.hpp" #include"opencv2/gpu/device/vec_math.hpp" namespace cv namespace gpu namespace device namespacevibe_mconstant_ int c_nbSamples;constant_ int c_reqMatches;constant_ intc_radius;constant_ int c_subsamplingFactor;void loadConstants(int nbSamples, int reqMatches, int

31、radius,int subsamplingFactor)cudaSafeCall( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int) );cudaSafeCall( cudaMemcpyToSymbol(c_reqMatches,&reqMatches, sizeof(int) );cudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int) );cudaSafeCall( cudaMemcpyToSymbol(c_s

32、ubsamplingFactor, &subsamplingFactor, sizeof(int) );device_ _forceinline_ uint nextRand(uint& state)/const unsigned int CV_RNG_COEFF =4164903690U;/ 已經(jīng)定義state = state *CV_RNG_COEFF + (state >> 16);return state;constant_ intc_xoff9 = -1, 0, 1, -1, 1, -1, 0, 1, 0;constant_

33、 int c_yoff9 = -1, -1, -1,0, 0, 1, 1, 1, 0;device_ _forceinline_ int2 chooseRandomNeighbor(int x, int y, uint& randState, int count = 8)int idx = nextRand(randState) % count;return make_int2(x + c_xoffidx, y + c_yoffidx);device_ _forceinline_ uchar cvt(uchar val)return val;device_ _forceinli

34、ne_ uchar4 cvt(const uchar3& val)return make_uchar4(val.x, val.y, val.z,0);device_ _forceinline_ uchar4cvt(const uchar4& val)return val;template <typename SrcT, typename SampleT>global_ void init(const PtrStepSz<SrcT> frame,PtrStep<SampleT> s

35、amples, PtrStep<uint>randStates)const int x =blockIdx.x * blockDim.x + threadIdx.x;constint y = blockIdx.y * blockDim.y + threadIdx.y;if (x >= frame.cols | y >= frame.rows)return;uint localState = randStates(y, x);for (int k = 0; k < c_nbSamples; +k)int2 np = choos

36、eRandomNeighbor(x, y,localState, 9);np.x= :max(0, :min(np.x, frame.cols - 1);np.y = :max(0, :min(np.y, frame.rows - 1);SrcT pix = frame(np.y, np.x);samples(k* frame.rows + y, x) = cvt(pix);randStates(y, x) = localState;template <typename SrcT, typename SampleT>void init_caller(PtrStepS

37、zb frame, PtrStepSzb samples,PtrStepSz<uint> randStates, cudaStream_t stream)dim3 block(32, 8);dim3grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y);cudaSafeCall( cudaFuncSetCacheConfig(init<SrcT,SampleT>, cudaFuncCachePreferL1) );init<SrcT, SampleT&g

38、t;<<<grid, block, 0, stream>>>(PtrStepSz<SrcT>) frame,if(PtrStepSz<SampleT>) samples, randStates);cudaSafeCall( cudaGetLastError() );(stream = 0) cudaSafeCall( cudaDeviceSynchronize() );void init_gpu(PtrStepSzb frame, int cn, PtrS

39、tepSzb samples,PtrStepSz<uint> randStates, cudaStream_t stream)typedef void (*func_t)(PtrStepSzb frame,PtrStepSzb samples, PtrStepSz<uint> randStates,cudaStream_t stream);static const func_tfuncs =0,init_caller<uchar, uchar>, 0, init_caller<uchar3,uch

40、ar4>, init_caller<uchar4,uchar4>funcscn(frame, samples, randStates, stream);device_ _forceinline_ int calcDist(uchar a, uchar b)return :abs(a - b);device_ _forceinline_ int calcDist(const uchar3& a,const uchar4& b)return(:abs(a.x - b.x) + :abs(a.y - b.y) + :abs(a

41、.z - b.z) /3;device_ _forceinline_ intcalcDist(const uchar4& a, const uchar4& b)return (:abs(a.x - b.x) + :abs(a.y - b.y)+ :abs(a.z - b.z) / 3;template<typename SrcT, typename SampleT>global_ void update(const PtrStepSz<SrcT> frame,PtrStepb fgmask, PtrStep

42、<SampleT> samples,PtrStep<uint> randStates)const int x = blockIdx.x * blockDim.x +threadIdx.x;const int y = blockIdx.y *blockDim.y + threadIdx.y;if (x >=frame.cols | y >= frame.rows)return;uint localState = randStates(y, x);SrcTimgPix = frame(y, x);/ compari

43、son with themodelint count = 0;for (intk = 0; (count < c_reqMatches) && (k <c_nbSamples); +k)SampleT samplePix = samples(k *frame.rows + y, x);int distance =calcDist(imgPix, samplePix);if(distance < c_radius)+count;/ pixelclassification according to reqMatches fg

44、mask(y, x) = (uchar) (-(count < c_reqMatches);/ 當(dāng)count<2 時(shí),為前景 當(dāng)計(jì)數(shù)器 count>=2 時(shí),為背景if (count >= c_reqMatches)/ the pixel belongs to the background/ gets a random number between 0 and subsamplingFactor-1 int randomNumber = nextRand(localState) %/ update of thec_subsamplingFactor;current pixel model if (randomNumber =0)/ randomsubsamplingint k =nextRand(localState) % c_nbSamples;samples(k * frame.rows + y, x) =cvt(imgPix);/update of a neighboring pixel modelrandomNumber = nextRand(localState) % c_subsamplingFactor;if (randomNumber = 0)/ random subsamp

溫馨提示

  • 1. 本站所有資源如無(wú)特殊說(shuō)明,都需要本地電腦安裝OFFICE2007和PDF閱讀器。圖紙軟件為CAD,CAXA,PROE,UG,SolidWorks等.壓縮文件請(qǐng)下載最新的WinRAR軟件解壓。
  • 2. 本站的文檔不包含任何第三方提供的附件圖紙等,如果需要附件,請(qǐng)聯(lián)系上傳者。文件的所有權(quán)益歸上傳用戶所有。
  • 3. 本站RAR壓縮包中若帶圖紙,網(wǎng)頁(yè)內(nèi)容里面會(huì)有圖紙預(yù)覽,若沒(méi)有圖紙預(yù)覽就沒(méi)有圖紙。
  • 4. 未經(jīng)權(quán)益所有人同意不得將文件中的內(nèi)容挪作商業(yè)或盈利用途。
  • 5. 人人文庫(kù)網(wǎng)僅提供信息存儲(chǔ)空間,僅對(duì)用戶上傳內(nèi)容的表現(xiàn)方式做保護(hù)處理,對(duì)用戶上傳分享的文檔內(nèi)容本身不做任何修改或編輯,并不能對(duì)任何下載內(nèi)容負(fù)責(zé)。
  • 6. 下載文件中如有侵權(quán)或不適當(dāng)內(nèi)容,請(qǐng)與我們聯(lián)系,我們立即糾正。
  • 7. 本站不保證下載資源的準(zhǔn)確性、安全性和完整性, 同時(shí)也不承擔(dān)用戶因使用這些下載資源對(duì)自己和他人造成任何形式的傷害或損失。

評(píng)論

0/150

提交評(píng)論