版權說明:本文檔由用戶提供并上傳,收益歸屬內容提供方,若內容存在侵權,請進行舉報或認領
文檔簡介
用CilkPlus實現并行性#include<stdio.h>#include<windows.h>#include<mmsystem.h>#include<math.h>#include<cilk/cilk.h>#include<cilk/reducer_opadd.h>constlongintVERYBIG=100000;//*******************************************************************************************intmain(void){ inti; DWORDstarttime,elapsedtime; //---------------------------------------------------------------------- //Outputastartmessage //printf("NoneParallelTimingsfor%diterations\n\n",VERYBIG); printf("CilkPlusParallelTimingsfor%diterations\n\n",VERYBIG); //repeatexperimentseveraltimes for(i=0;i<6;i++) { //getstartingtime starttime=timeGetTime(); //resetchecksum&runningtotal cilk::reducer_opadd<longint>sum(0); cilk::reducer_opadd<double>total(0.0); //WorkLoop,dosomeworkbyloopingVERYBIGtimes cilk_for(intj=0;j<VERYBIG;j++) { longintk; doublesumx,sumy; //incrementchecksum sum+=1; //Calculatefirstarithmeticseries sumx=0.0; for(k=0;k<j;k++) sumx=sumx+(double)k; //Calculatesecondarithmeticseries sumy=0.0; for(k=j;k>0;k--) sumy=sumy+(double)k; if(sumx>0.0)total=total+1.0/sqrt(sumx); if(sumy>0.0)total=total+1.0/sqrt(sumy); } //getendingtimeanduseittodetermineelapsedtime elapsedtime=timeGetTime()-starttime; //reportelapsedtime printf("TimeElapsed%10dmSecsTotal=%lfCheckSum=%ld\n",(int)elapsedtime,total.get_value(),sum.get_value()); } //returnintegerasrequiredbyfunctionheader return0;}Windows多線程SemaphoreOpenMP錯誤檢測#include<stdio.h>#include<omp.h>staticlongnum_steps=10000*4;doublestep,gsum1,gsum2;voidSafeAdd(doublesum1,doublesum2,omp_lock_t&lock1,omp_lock_t&lock2){ //lockgsum1andupdate omp_set_lock(&lock1); gsum1+=sum1; //lockgsum2andupdate omp_set_lock(&lock2); gsum2+=sum2; omp_unset_lock(&lock2); omp_unset_lock(&lock1);}intmain(){ inti; doublex1,x2; omp_lock_tlock1,lock2; gsum1=0.0; gsum2=0.0; omp_init_lock(&lock1); omp_init_lock(&lock2); printf("CalculatingPi...\n"); step=1.0/(double)num_steps; for(i=0;i<num_steps;i+=4) { doublesum1,sum2; #pragmaompparallelsections//#pragmaompparallelsectionsprivate(x1,x2,sum1,sum2)數據競爭 { #pragmaompsection { //calculatefirstbar x1=(i+0.5)*step; x1*=x1; sum1=4.0/(1.0+x1); //calculatesecondbar x2=(i+1.5)*step; x2*=x2; sum2=4.0/(1.0+x2); SafeAdd(sum1,sum2,lock1,lock2); } #pragmaompsection { //calculatethirdbar x1=(i+2.5)*step; x1*=x1; sum1=4.0/(1.0+x1); //calculatefourthbar x2=(i+3.5)*step; x2*=x2; sum2=4.0/(1.0+x2); SafeAdd(sum1,sum2,lock2,lock1);//死鎖 } } } //calacvalueofpi doublepi=step*(gsum1+gsum2); printf("pi:%2.21f\n",pi); omp_destroy_lock(&lock1); omp_destroy_lock(&lock2);}內存錯誤MPI#include"mpi.h"#include<stdio.h>#include<string.h>intmain(intargc,char*argv[]){inti,rank,size,namelen;charname[MPI_MAX_PROCESSOR_NAME];MPI_Statusstat;MPI_Init(&argc,&argv);MPI_Comm_size(MPI_COMM_WORLD,&size);MPI_Comm_rank(MPI_COMM_WORLD,&rank);MPI_Get_processor_name(name,&namelen);if(rank==0){ printf("Helloworld:rank%dof%drunningon%s\n",rank,size,name); for(i=1;i<size;i++){ MPI_Recv(&rank,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(&size,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(&namelen,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(name,namelen+1,MPI_CHAR,i,1,MPI_COMM_WORLD,&stat); printf("Helloworld:rank%dof%drunningon%s\n",rank,size,name); }}else{ MPI_Send(&rank,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(&size,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(&namelen,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(name,namelen+1,MPI_CHAR,0,1,MPI_COMM_WORLD);}MPI_Finalize();return(0);}CUDA#include<stdio.h>#include<cuda_runtime.h>#defineNUM_THREADS256#defineN1000boolInitCUDA();voidmatgen(float*a,intlda,intn);clock_tmatmult(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn);voidcompare_mat(constfloat*a,intlda,constfloat*b,intldb,intn);clock_tmatmultCUDA(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn);__global__staticvoidmatMultCUDA(constfloat*a,size_tlda,constfloat*b,size_tldb,float*c,size_tldc,intn);intmain(){ float*a,*b,*c,*d; if(!InitCUDA())return0; a=(float*)malloc(sizeof(float)*N*N); b=(float*)malloc(sizeof(float)*N*N); c=(float*)malloc(sizeof(float)*N*N); d=(float*)malloc(sizeof(float)*N*N); srand(0); matgen(a,N,N); matgen(b,N,N); clock_ttime1=matmultCUDA(a,N,b,N,c,N,N); clock_ttime2=matmult(a,N,b,N,d,N,N); compare_mat(c,N,d,N,N); doublesec1=(double)time1/CLOCKS_PER_SEC; doublesec2=(double)time2/CLOCKS_PER_SEC; printf("Timeused:%.2fseconds(%.2lfGFLOPS)inCUDA,Timeused:%.2fseconds(%.2lfGFLOPS)inCPU\n",sec1,2.0*N*N*N/(sec1*1E9),sec2,2.0*N*N*N/(sec2*1E9)); return0;}boolInitCUDA(){ intcount; cudaGetDeviceCount(&count); if(count==0){ fprintf(stderr,"Thereisnodevice.\n"); returnfalse; } inti; for(i=0;i<count;i++){ cudaDevicePropprop; if(cudaGetDeviceProperties(&prop,i)==cudaSuccess){ if(prop.major>=1){ break; } } } if(i==count){ fprintf(stderr,"ThereisnodevicesupportingCUDA1.x.\n"); returnfalse; } cudaSetDevice(i); returntrue;}voidmatgen(float*a,intlda,intn){ inti,j; for(i=0;i<n;i++){ for(j=0;j<n;j++){ a[i*lda+j]=(float)rand()/RAND_MAX+ (float)rand()/(RAND_MAX*RAND_MAX); } }}clock_tmatmult(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn){ clock_tstart,end; inti,j,k; start=clock(); for(i=0;i<n;i++){ for(j=0;j<n;j++){ doublet=0; for(k=0;k<n;k++){ t+=a[i*lda+k]*b[k*ldb+j]; } c[i*ldc+j]=t; } } end=clock(); returnend-start;}voidcompare_mat(constfloat*a,intlda,constfloat*b,intldb,intn){ floatmax_err=0; floataverage_err=0; inti,j; for(i=0;i<n;i++){ for(j=0;j<n;j++){ if(b[i*ldb+j]!=0){ floaterr=fabs((a[i*lda+j]- b[i*ldb+j])/b[i*ldb+j]); if(max_err<err)max_err=err; average_err+=err; } } } printf("Maxerror:%gAverageerror:%g\n",max_err,average_err/(n*n));}clock_tmatmultCUDA(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn){ float*ac,*bc,*cc; clock_tstart,end; start=clock(); size_tpitch_a,pitch_b,pitch_c; cudaMallocPitch((void**)&ac,&pitch_a,sizeof(float)*n,n); cudaMallocPitch((void**)&bc,&pitch_b,sizeof(float)*n,n); cudaMallocPitch((void**)&cc,&pitch_c,sizeof(float)*n,n); cudaMemcpy2D(ac,pitch_a,a,sizeof(float)*lda,sizeof(float)*n,n,cudaMemcpyHostToDevice); cudaMemcpy2D(bc,pitch_b,b,sizeof(float)*ldb,sizeof(float)*n,n,cudaMemcpyHostToDevice); //intblocks=(n+NUM_THREADS-1)/NUM_THREADS; matMultCUDA<<<n,NUM_THREADS,sizeof(float)*n>>>(ac,pitch_a/sizeof(float),bc,pitch_b/sizeof(float),cc,pitch_c/sizeof(float),n); cudaMemcpy2D(c,sizeof(float)*ldc,cc,pitch_c,sizeof(float)*n,n,cudaMemcpyDeviceToHost); cudaFree(ac); cudaFree(bc); cudaFree(cc); end=clock(); returnend-start;}__global__staticvoidmatMultCUDA(constfloat*a,size_tlda,constfloat*b,size_tldb,float*c,size_tldc,intn){ extern__shared__floatdata[]; constinttid=threadIdx.x; constintrow=blockIdx.x; inti,j; for(i=tid;i<n;i+=blockDim.x){ data[i]=a[row*lda+i]; } __syncthreads(); for(j=tid;j<n;j+=blockDim.x){ floatt=0; floaty=0; for(i=0;i<n;i++){ floatr; y-=data[i]*b[i*ldb+j]; r=t-y; y=(r-t)+y; t=r; } c[row*ldc+j]=t; }}Win32全局變量Win32事件Win32CriticalSectionWin32MutexesLinux#include<pthread.h>#include<stdlib.h>#defineMAX_THREADS512void*compute_pi(void*);....main(){...pthread_tp_threads[MAX_THREADS];pthread_attr_tattr;pthread_attr_init(&attr);for(i=0;i<num_threads;i++){hits[i]=i;pthread_create(&p_threads[i],&attr,compute_pi,(void*)&hits[i]);}for(i=0;i<num_threads;i++){pthread_join(p_threads[i],NULL);total_hits+=hits[i];}void*compute_pi(void*s){……pthread_exit(0);}Linux#include "unpthread.h"void *copyto(void*);staticint sockfd; /*globalforboththreadstoaccess*/staticFILE *fp;voidstr_cli(FILE*fp_arg,intsockfd_arg){ char recvline[MAXLINE]; pthread_t tid; sockfd=sockfd_arg; /*copyargumentstoexternals*/ fp=fp_arg; Pthread_create(&tid,NULL,copyto,NULL); while(Readline(sockfd,recvline,MAXLINE)>0) Fputs(recvline,stdout);}void*copyto(void*arg){ char sendline[MAXLINE]; while(Fgets(sendline,MAXLINE,fp)!=NULL) Writen(sockfd,sendline,strlen(sendline)); Shutdown(sockfd,SHUT_WR); /*EOFonstdin,sendFIN*/ return(NULL); /*return(i.e.,threadterminates)whenend-of-fileonstdin*/}#include "unpthread.h"staticvoid *doit(void*); /*eachthreadexecutesthisfunction*/intmain(intargc,char**argv){ int listenfd,connfd; socklen_t addrlen,len; structsockaddr *cliaddr; if(argc==2) listenfd=Tcp_listen(NULL,argv[1],&addrlen); elseif(argc==3) listenfd=Tcp_listen(argv[1],argv[2],&addrlen); else err_quit("usage:tcpserv01[<host>]<serviceorport>"); cliaddr=Malloc(addrlen); for(;;){ len=addrlen; connfd=Accept(listenfd,cliaddr,&len); Pthread_create(NULL,NULL,&doit,(void*)connfd); }}staticvoid*doit(void*arg){ Pthread_detach(pthread_self()); str_echo((int)arg); /*samefunctionasbefore*/ Close((int)arg); /*wearedonewithconnectedsocket*/ return(NULL);}Linuxmutexmain(){....pthread_mutex_init(&minimum_value_lock,NULL);....}void*find_min(void*list_ptr){....pthread_mutex_lock(&minimum_value_lock);if(my_min<minimum_value)minimum_value=my_min;/*andunlockthemutex*/pthread_mutex_unlock(&minimum_value_lock);#include<stdio.h>#include<pthread.h>#defineTHREAD_NUMBER10pthread_mutex_tmutex=PTHREAD_MUTEX_INITIALIZER;pthread_cond_tcond=PTHREAD_COND_INITIALIZER;intsum=0;void*th_counter(void*argc){inti;i=*(int*)argc;sleep(1);pthread_mutex_lock(&mutex);sum=sum+i;if(sum>10)pthread_cond_signal(&cond);pthread_mutex_unlock(&mutex);printf("count%disover\n",i);return;}void*waitsum(void*argc){pthread_mutex_lock(&mutex);while(sum<=10)pthread_cond_wait(&cond,&mutex);printf("Getasignalthatthesumhasbeenupto10!\n");pthread_mutex_unlock(&mutex);}intmain(void){pthread_tpt[THREAD_NUMBER];inti;intarg[THREAD_NUMBER];pthread_create(&pt[THREAD_NUMBER-1],NULL,waitsum,NULL);for(i=0;i<THREAD_NUMBER-1;i++){arg[i]=i;pthread_create(&pt[i],NULL,th_counter,(void*)&arg[i]);}for(i=0;i<THREAD_NUMBER;i++)pthread_detach(pt[i]);//pthread_join(pt[i],NULL);printf("Themainthreadiswaitingforallthethreadsfinishing...\n");sleep(5);printf("sumis%d\n",sum);pthread_mutex_destroy(&mutex);pthread_cond_destroy(&cond);return0;}Linux生產者消費者pthread_cond_tcond_queue_empty,cond_queue_full;pthread_mutex_ttask_queue_cond_lock;inttask_available;/*otherdatastructureshere*/main(){/*declarationsandinitializations*/task_available=0;pthread_init();pthread_cond_init(&cond_queue_empty,NULL);pthread_cond_init(&cond_queue_full,NULL);pthread_mutex_init(&task_queue_cond_lock,NULL);/*createandjoinproducerandconsumerthreads*/}void*producer(void*producer_thread_data){intinserted;while(!done()){create_task();pthread_mutex_lock(&task_queue_cond_lock);while(task_available==1)pthread_cond_wait(&cond_queue_empty,task_queue_cond_lock);insert_into_queue();task_available=1;pthread_cond_signal(&cond_queue_full);pthread_mutex_unlock(&task_queue_cond_lock);}}void*consumer(void*consumer_thread_data){while(!done()){pthread_mutex_lock(&task_queue_cond_lock);while(task_available==0)pthread_cond_wait(&cond_queue_full,&task_queue_cond_lock);my_task=extract_from_queue();task_available=0;pthread_cond_signal(&cond_queue_empty);pthread_mutex_unlock(&task_queue_cond_lock);process_task(my_task);}}Linux讀寫鎖typedefstruct{intreaders;intwriter;pthread_cond_treaders_proceed;pthread_cond_twriter_proceed;intpending_writers;pthread_mutex_tread_write_lock;}mylib_rwlock_t;voidmylib_rwlock_init(mylib_rwlock_t*l){l->readers=l->writer=l->pending_writers=0;pthread_mutex_init(&(l->read_write_lock),NULL);pthread_cond_init(&(l->readers_proceed),NULL);pthread_cond_init(&(l->writer_proceed),NULL);}voidmylib_rwlock_rlock(mylib_rwlock_t*l){/*ifthereisawritelockorpendingwriters,performconditionwait..elseincrementcountofreadersandgrantreadlock*/pthread_mutex_lock(&(l->read_write_lock));while((l->pending_writers>0)||(l->writer>0))pthread_cond_wait(&(l->readers_proceed),&(l->read_write_lock));l->readers++;pthread_mutex_unlock(&(l->read_write_lock));}voidmylib_rwlock_wlock(mylib_rwlock_t*l){/*iftherearereadersorwriters,incrementpendingwriterscountandwait.Onbeingwoken,decrementpendingwriterscountandincrementwritercount*/pthread_mutex_lock(&(l->read_write_lock));while((l->writer>0)||(l->readers>0)){l->pending_writers++;pthread_cond_wait(&(l->writer_proceed),&(l->read_write_lock));}l->pending_writers--;l->writer++;pthread_mutex_unlock(&(l->read_write_lock));}voidmylib_rwlock_unlock(mylib_rwlock_t*l){/*ifthereisawritelockthenunlock,elseiftherearereadlocks,decrementcountofreadlocks.Ifthecountis0andthereisapendingwriter,letitthrough,elseiftherearependingreaders,letthemallgothrough*/pthread_mutex_lock(&(l->read_write_lock));if(l->writer>0)l->writer=0;elseif(l->readers>0)l->readers--;pthread_mutex_unlock(&(l->read_write_lock
溫馨提示
- 1. 本站所有資源如無特殊說明,都需要本地電腦安裝OFFICE2007和PDF閱讀器。圖紙軟件為CAD,CAXA,PROE,UG,SolidWorks等.壓縮文件請下載最新的WinRAR軟件解壓。
- 2. 本站的文檔不包含任何第三方提供的附件圖紙等,如果需要附件,請聯(lián)系上傳者。文件的所有權益歸上傳用戶所有。
- 3. 本站RAR壓縮包中若帶圖紙,網頁內容里面會有圖紙預覽,若沒有圖紙預覽就沒有圖紙。
- 4. 未經權益所有人同意不得將文件中的內容挪作商業(yè)或盈利用途。
- 5. 人人文庫網僅提供信息存儲空間,僅對用戶上傳內容的表現方式做保護處理,對用戶上傳分享的文檔內容本身不做任何修改或編輯,并不能對任何下載內容負責。
- 6. 下載文件中如有侵權或不適當內容,請與我們聯(lián)系,我們立即糾正。
- 7. 本站不保證下載資源的準確性、安全性和完整性, 同時也不承擔用戶因使用這些下載資源對自己和他人造成任何形式的傷害或損失。
最新文檔
- 永州職業(yè)技術學院《康樂服務與管理》2023-2024學年第一學期期末試卷
- 永州師范高等??茖W校《設計基礎造型》2023-2024學年第一學期期末試卷
- 營口理工學院《男生極限飛盤》2023-2024學年第一學期期末試卷
- 鷹潭職業(yè)技術學院《物理化學(2-2)》2023-2024學年第一學期期末試卷
- 銀川能源學院《化工熱力學A》2023-2024學年第一學期期末試卷
- 2024印刷廠印刷材料采購與供應鏈優(yōu)化承包合同3篇
- 益陽職業(yè)技術學院《環(huán)境設計與溝通》2023-2024學年第一學期期末試卷
- 銀川科技學院《AM與嵌入式系統(tǒng)》2023-2024學年第一學期期末試卷
- 2024年度擔保公司能源行業(yè)擔保合同詳細范本3篇
- 長期出租車合同3篇
- 裝修工作的進度報告
- 《食品包裝與安全》課件
- 普外科護士長述職報告
- 混凝土組織供應運輸售后服務方案
- +山東省泰安市肥城市2023-2024學年七年級上學期期末考試地理試題+
- 內蒙古自治區(qū)呼和浩特市部分學校2023-2024學年九年級上學期期末數學試卷
- 文物保護工作的調研報告(16篇)
- 成長計劃300字初中綜合素質評價初三
- 口腔科會員制度
- 2023新能源場站一次調頻控制系統(tǒng)技術規(guī)范
- 胸痛中心培訓急性胸痛患者的早期快速甄別
評論
0/150
提交評論