Research Article

Optimized Data Transfers Based on the OpenCL Event Management Mechanism

Listing 4

A Himeno benchmark code with the proposed OpenCL extension.
() cl_command_queue cmd1, cmd2;
() cl_mem p_new, p_old, p_tmp;
() cl_event e[2];
()
() for(int i(0);i<N;++i){
()  p_tmp = p_new; p_new = p_old; p_old = p_tmp;
()  if( rank%2 == 0){
()    jacobi_kernel_even_A(cmd1,p_new0,NULL,&e[0]);
()    clEnqueueSendrecvBuffer(cmd2,p_old,0,NULL,&e[1]);
()  jacobi_kernel_even_B(cmd2,p_new1,&e[1],NULL);
()  clEnqueueSendrecvBuffer(cmd1,p_new,1,&e[0],NULL);
() }
() else{
()  jacobi_kernel_odd_B(cmd2,p_new0,NULL,&e[0]);
()  clEnqueueSendrecvBuffer(cmd1,p_old,0,NULL,&e[1]);
()  jacobi_kernel_odd_A(cmd1,p_new1,&e[1],NULL);
()  clEnqueueSendrecvBuffer(cmd2,p_new,1,&e[0],NULL);
() }
() clFinish(cmd1);clFinish(cmd2);
() /* error calculation */
() }