Research Article

Multi-GPU Support on Single Node Using Directive-Based Programming Model

Algorithm 4

Single GPU implementation of 2D heat equation.
void step_kernel{}
{
#pragma acc parallel present(temp_in[0:ninj], temp_out[0:ninj])  
num_gangs(32) vector_length(32)
{
// loop over all points in domain (except boundary)
#pragma acc loop gang
for (j=1; j < nj-1; j++) {
#pragma acc loop vector
for (i=1; i < ni-1; i++) {
// find indices into linear memory
// for central point and neighbours
i00 = I2D(ni, i, j);
im10 = I2D(ni, i-1, j);
ip10 = I2D(ni, i+1, j);
i0m1 = I2D(ni, i, j-1);
i0p1 = I2D(ni, i, j+1);
// evaluate derivatives
d2tdx2 = temp_in[im10]-2temp_in[i00]+temp_in[ip10];
d2tdy2 = temp_in[i0m1]-2temp_in[i00]+temp_in[i0p1];
// update temperatures
temp_out[i00]  = temp_in[i00]+tfac(d2tdx2 + d2tdy2);
}
}
}
}
#pragma acc data copy(temp1[0:ninj])  
copyin(temp2[0:ninj])
{
for (istep=0; istep < nstep; istep++) {
step_kernel(ni, nj, tfac, temp1, temp2);
// swap the temp pointers
temp = temp1;
temp1 = temp2;
temp2 = temp;
}
}