Research Article

Effective SIMD Vectorization for Intel Xeon Phi Coprocessors

Algorithm 5

An example of OpenMP parallel for and SIMD combined usage.
#pragma omp declare SIMD uniform(max_iter) SIMDlen(32)
uint32_t mandel (fcomplex c, uint32_t max_iter)
// Computes number of iterations(count variable)
// that it takes for parameter c to be known to
// be outside mandelbrot set
uint32_t count = 1; fcomplex z = c;
for (int32_t i = 0; i < max_iter; i += 1) {
z = z   z + c;
int t = (cabsf(z) < 2.0f);
count += t;
if (t == 0) {  break;}
}
return count;
Caller site code:
int main() {
    
#pragma omp parallel for schedule(guided)
for (int32_t y = 0; y < ImageHeight; ++y) {
float c_im = max_imag - y   imag_factor;
  #pragma omp SIMD safelen(32)
for (int32_t x = 0; x < ImageWidth; ++x) {
fcomplex in_val;
in_val = (min_real + xreal_factor) + (c_im1.0iF);
countyx  = mandel (in_val, max_iter);
}
}