9.10. Doacross Loop Nest#

An ordered clause can be used on a loop construct with an integer parameter argument to define the number of associated loops within a doacross loop nest where cross-iteration dependences exist. A doacross clause on an ordered construct within an ordered loop describes the dependences of the doacross loops.

In the code below, the doacross(sink:i-1) clause defines an i-1 to i cross-iteration dependence that specifies a wait point for the completion of computation from iteration i-1 before proceeding to the subsequent statements. The doacross(source:omp_cur_iteration) or doacross(source:) clause indicates the completion of computation from the current iteration ( i ) to satisfy the cross-iteration dependence that arises from the iteration. The omp_cur_iteration keyword is optional for the source dependence type. For this example the same sequential ordering could have been achieved with an ordered clause without a parameter, on the loop directive, and a single ordered directive without the doacross clause specified for the statement executing the bar function.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: doacross.1
* type: C
* version: omp_5.2
*/

float foo(int i);
float bar(float a, float b);
float baz(float b);

void work( int N, float *A, float *B, float *C )
{
  int i;

  #pragma omp for ordered(1)
  for (i=1; i<N; i++)
  {
    A[i] = foo(i);

  #pragma omp ordered doacross(sink: i-1)
    B[i] = bar(A[i], B[i-1]);
  #pragma omp ordered doacross(source: omp_cur_iteration)

    C[i] = baz(B[i]);
  }
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: doacross.1
! type: F-free
! version:    omp_5.2

subroutine work( N, A, B, C )
  integer :: N, i
  real, dimension(N) :: A, B, C
  real, external :: foo, bar, baz

  !$omp do ordered(1)
  do i=2, N
    A(i) = foo(i)

  !$omp ordered doacross(sink: i-1)
    B(i) = bar(A(i), B(i-1))
  !$omp ordered doacross(source: omp_cur_iteration)

    C(i) = baz(B(i))
  end do
end subroutine

The following code is similar to the previous example but with doacross loop nest extended to two nested loops, i and j , as specified by the ordered(2) clause on the loop directive. In the C/C++ code, the i and j loops are the first and second associated loops, respectively, whereas in the Fortran code, the j and i loops are the first and second associated loops, respectively. The doacross(sink:i-1,j) and doacross(sink:i,j-1) clauses in the C/C++ code define cross-iteration dependences in two dimensions from iterations ( i-1, j ) and ( i, j-1 ) to iteration ( i, j ). Likewise, the doacross(sink:j-1,i) and doacross(sink:j,i-1) clauses in the Fortran code define cross-iteration dependences from iterations ( j-1, i ) and ( j, i-1 ) to iteration ( j, i ).

//%compiler: clang
//%cflags: -fopenmp

/*
* name: doacross.2
* type: C
* version: omp_5.2
*/

float foo(int i, int j);
float bar(float a, float b, float c);
float baz(float b);

void work( int N, int M, float **A, float **B, float **C )
{
  int i, j;

  #pragma omp for ordered(2)
  for (i=1; i<N; i++)
  {
    for (j=1; j<M; j++)
    {
      A[i][j] = foo(i, j);

  #pragma omp ordered doacross(sink: i-1,j) doacross(sink: i,j-1)
      B[i][j] = bar(A[i][j], B[i-1][j], B[i][j-1]);
  #pragma omp ordered doacross(source:)

      C[i][j] = baz(B[i][j]);
    }
  }
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: doacross.2
! type: F-free
! version:    omp_5.2

subroutine work( N, M, A, B, C )
  integer :: N, M, i, j
  real, dimension(M,N) :: A, B, C
  real, external :: foo, bar, baz

  !$omp do ordered(2)
  do j=2, N
    do i=2, M
      A(i,j) = foo(i, j)

    !$omp ordered doacross(sink: j-1,i) doacross(sink: j,i-1)
      B(i,j) = bar(A(i,j), B(i-1,j), B(i,j-1))
    !$omp ordered doacross(source:)

      C(i,j) = baz(B(i,j))
    end do
  end do
end subroutine

The following example shows the incorrect use of the ordered directive with a doacross clause. There are two issues with the code. The first issue is a missing ordered doacross(source:) directive, which could cause a deadlock. The second issue is the doacross(sink:i+1,j) and doacross(sink:i,j+1) clauses define dependences on lexicographically later source iterations ( i+1, j ) and ( i, j+1 ), which could cause a deadlock as well since they may not start to execute until the current iteration completes.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: doacross.3
* type: C
* version: omp_5.2
*/

#define N 100

void work_wrong(double p[][N][N])
{
  int i, j, k;

  #pragma omp parallel for ordered(2) private(i,j,k)
  for (i=1; i<N-1; i++)
  {
    for (j=1; j<N-1; j++)
    {
  #pragma omp ordered doacross(sink: i-1,j) doacross(sink: i+1,j) \
                      doacross(sink: i,j-1) doacross(sink: i,j+1)
      for (k=1; k<N-1; k++)
      {
        double tmp1 = p[i-1][j][k] + p[i+1][j][k];
        double tmp2 = p[i][j-1][k] + p[i][j+1][k];
        double tmp3 = p[i][j][k-1] + p[i][j][k+1];
        p[i][j][k] = (tmp1 + tmp2 + tmp3) / 6.0;
      }
/* missing #pragma omp ordered doacross(source:) */
    }
  }
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: doacross.3
! type: F-free
! version:    omp_5.2

subroutine work_wrong(N, p)
  integer :: N
  real(8), dimension(N,N,N) :: p
  integer :: i, j, k
  real(8) :: tmp1, tmp2, tmp3

!$omp parallel do ordered(2) private(i,j,k,tmp1,tmp2,tmp3)
  do i=2, N-1
    do j=2, N-1
    !$omp ordered doacross(sink: i-1,j) doacross(sink: i+1,j) &
    !$omp&        doacross(sink: i,j-1) doacross(sink: i,j+1)
      do k=2, N-1
        tmp1 = p(k-1,j,i) + p(k+1,j,i)
        tmp2 = p(k,j-1,i) + p(k,j+1,i)
        tmp3 = p(k,j,i-1) + p(k,j,i+1)
        p(k,j,i) = (tmp1 + tmp2 + tmp3) / 6.0
      end do
! missing !$omp ordered doacross(source:)
    end do
  end do
end subroutine

The following example illustrates the use of the collapse clause for a doacross loop nest . The i and j loops are the associated loops for the collapsed loop as well as for the doacross loop nest . The example also shows a compliant usage of the dependence source directive placed before the corresponding sink directive. Checking the completion of computation from previous iterations at the sink point can occur after the source statement.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: doacross.4
* type: C
* version: omp_5.2
*/

double foo(int i, int j);

void work( int N, int M, double **A, double **B, double **C )
{
  int i, j;
  double alpha = 1.2;

  #pragma omp for collapse(2) ordered(2)
  for (i = 1; i < N-1; i++)
  {
    for (j = 1; j < M-1; j++)
    {
      A[i][j] = foo(i, j);
  #pragma omp ordered doacross(source:)

      B[i][j] = alpha * A[i][j];

  #pragma omp ordered doacross(sink: i-1,j) doacross(sink: i,j-1)
      C[i][j] = 0.2 * (A[i-1][j] + A[i+1][j] +
                A[i][j-1] + A[i][j+1] + A[i][j]);
    }
  }
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: doacross.4
! type: F-free
! version:    omp_5.2

subroutine work( N, M, A, B, C )
  integer :: N, M
  real(8), dimension(M, N) :: A, B, C
  real(8), external :: foo
  integer :: i, j
  real(8) :: alpha = 1.2

  !$omp do collapse(2) ordered(2)
  do j=2, N-1
    do i=2, M-1
      A(i,j) = foo(i, j)
    !$omp ordered doacross(source:)

      B(i,j) = alpha * A(i,j)

    !$omp ordered doacross(sink: j,i-1) doacross(sink: j-1,i)
      C(i,j) = 0.2 * (A(i-1,j) + A(i+1,j) +  &
               A(i,j-1) + A(i,j+1) + A(i,j))
    end do
  end do
end subroutine