5.3. Task Dependences#

5.3.1. Flow Dependence#

This example shows a simple flow dependence using a depend clause on the task construct.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.1
* type: C
* version: omp_4.0
*/
#include <stdio.h>
int main() {
   int x = 1;
   #pragma omp parallel
   #pragma omp single
   {
      #pragma omp task shared(x) depend(out: x)
         x = 2;
      #pragma omp task shared(x) depend(in: x)
         printf("x = %d\n", x);
   }
   return 0;
}

!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.1
! type: F-free
! version:    omp_4.0
program example
   integer :: x
   x = 1
   !$omp parallel
   !$omp single
      !$omp task shared(x) depend(out: x)
         x = 2
      !$omp end task
      !$omp task shared(x) depend(in: x)
         print*, "x = ", x
      !$omp end task
   !$omp end single
   !$omp end parallel
end program

The program will always print x = 2 , because the depend clauses enforce the ordering of the tasks. If the depend clauses had been omitted, then the tasks could execute in any order and the program and the program would have a race condition.

5.3.2. Anti-dependence#

This example shows an anti-dependence using the depend clause on the task construct.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.2
* type: C
* version: omp_4.0
*/
#include <stdio.h>
int main()
{
   int x = 1;
   #pragma omp parallel
   #pragma omp single
   {
      #pragma omp task shared(x) depend(in: x)
         printf("x = %d\n", x);
      #pragma omp task shared(x) depend(out: x)
         x = 2;
   }
   return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.2
! type: F-free
! version:    omp_4.0
program example
   integer :: x
   x = 1
   !$omp parallel
   !$omp single
      !$omp task shared(x) depend(in: x)
         print*, "x = ", x
      !$omp end task
      !$omp task shared(x) depend(out: x)
         x = 2
      !$omp end task
   !$omp end single
   !$omp end parallel
end program

The program will always print x = 1 , because the depend clauses enforce the ordering of the tasks. If the depend clauses had been omitted, then the tasks could execute in any order and the program would have a race condition.

5.3.3. Output Dependence#

task dependences!output dependence

This example shows an output dependence using the depend clause on the task construct.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.3
* type: C
* version: omp_4.0
*/
#include <stdio.h>
int main() {
   int x;
   #pragma omp parallel
   #pragma omp single
   {
      #pragma omp task shared(x) depend(out: x)
         x = 1;
      #pragma omp task shared(x) depend(out: x)
         x = 2;
      #pragma omp taskwait
      printf("x = %d\n", x);
   }
   return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.3
! type: F-free
! version:    omp_4.0
program example
   integer :: x
   !$omp parallel
   !$omp single
      !$omp task shared(x) depend(out: x)
         x = 1
      !$omp end task
      !$omp task shared(x) depend(out: x)
         x = 2
      !$omp end task
      !$omp taskwait
      print*, "x = ", x
   !$omp end single
   !$omp end parallel
end program

The program will always print x = 2 , because the depend clauses enforce the ordering of the tasks. If the depend clauses had been omitted, then the tasks could execute in any order and the program would have a race condition.

5.3.4. Concurrent Execution with Dependences#

In this example we show potentially concurrent execution of tasks using multiple flow dependences expressed using the depend clause on the task construct.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.4
* type: C
* version: omp_4.0
*/
#include <stdio.h>
int main() {
   int x = 1;
   #pragma omp parallel
   #pragma omp single
   {
      #pragma omp task shared(x) depend(out: x)
         x = 2;
      #pragma omp task shared(x) depend(in: x)
         printf("x + 1 = %d. ", x+1);
      #pragma omp task shared(x) depend(in: x)
         printf("x + 2 = %d\n", x+2);
   }
   return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.4
! type: F-free
! version:    omp_4.0

program example
   integer :: x

   x = 1

   !$omp parallel
   !$omp single

      !$omp task shared(x) depend(out: x)
         x = 2
      !$omp end task

      !$omp task shared(x) depend(in: x)
         print*, "x + 1 = ", x+1, "."
      !$omp end task

      !$omp task shared(x) depend(in: x)
         print*, "x + 2 = ", x+2, "."
      !$omp end task

   !$omp end single
   !$omp end parallel
end program

The last two tasks are dependent on the first task. However, there is no dependence between the last two tasks, which may execute in any order (or concurrently if more than one thread is available). Thus, the possible outputs are x + 1 = 3. x + 2 = 4. and x + 2 = 4. x + 1 = 3. . If the depend clauses had been omitted, then all of the tasks could execute in any order and the program would have a race condition.

5.3.5. Matrix multiplication#

This example shows a task-based blocked matrix multiplication. Matrices are of NxN elements, and the multiplication is implemented using blocks of BSxBS elements.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.5
* type: C
* version: omp_4.0
*/
// Assume BS divides N perfectly
void matmul_depend(int N, int BS, float A[N][N], float B[N][N], float
C[N][N] )
{
   int i, j, k, ii, jj, kk;
   for (i = 0; i < N; i+=BS) {
      for (j = 0; j < N; j+=BS) {
         for (k = 0; k < N; k+=BS) {
// Note 1: i, j, k, A, B, C are firstprivate by default
// Note 2: A, B and C are just pointers
#pragma omp task private(ii, jj, kk) \
            depend ( in: A[i:BS][k:BS], B[k:BS][j:BS] ) \
            depend ( inout: C[i:BS][j:BS] )
            for (ii = i; ii < i+BS; ii++ )
               for (jj = j; jj < j+BS; jj++ )
                  for (kk = k; kk < k+BS; kk++ )
                     C[ii][jj] = C[ii][jj] + A[ii][kk] * B[kk][jj];
         }
      }
   }
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.5
! type: F-free
! version:    omp_4.0
! Assume BS divides N perfectly
subroutine matmul_depend (N, BS, A, B, C)
   implicit none
   integer :: N, BS, BM
   real, dimension(N, N) :: A, B, C
   integer :: i, j, k, ii, jj, kk
   BM = BS - 1
   do i = 1, N, BS
      do j = 1, N, BS
         do k = 1, N, BS
!$omp task shared(A,B,C) private(ii,jj,kk) &
!$omp depend ( in: A(i:i+BM, k:k+BM), B(k:k+BM, j:j+BM) ) &
!$omp depend ( inout: C(i:i+BM, j:j+BM) )
!  I,J,K are firstprivate by default
            do ii = i, i+BM
               do jj = j, j+BM
                  do kk = k, k+BM
                     C(jj,ii) = C(jj,ii) + A(kk,ii) * B(jj,kk)
                  end do
               end do
            end do
!$omp end task
         end do
      end do
   end do
end subroutine

5.3.6. taskwait with Dependences#

In this subsection three examples illustrate how the depend clause can be applied to a taskwait construct to make the generating task wait for specific child tasks to complete. This is an OpenMP 5.0 feature. In the same manner that dependences can order executions among child tasks with depend clauses on task constructs, the generating task can be scheduled to wait on child tasks at a taskwait before it can proceed.

Note: Since the depend clause on a taskwait construct relaxes the default synchronization behavior (waiting for all children to finish), it is important to realize that child tasks that are not predecessor tasks, as determined by the depend clause of the taskwait construct, may be running concurrently while the generating task is executing after the taskwait.

In the first example the generating task waits at the taskwait construct for the completion of the first child task because a dependence on the first task is produced by x with an in dependence type within the depend clause of the taskwait construct. Immediately after the first taskwait construct it is safe to access the x variable by the generating task, as shown in the print statement. There is no completion restraint on the second child task. Hence, immediately after the first taskwait it is unsafe to access the y variable since the second child task may still be executing. The second taskwait ensures that the second child task has completed; hence it is safe to access the y variable in the following print statement.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.6
* type: C
* version: omp_5.0
*/
#include<stdio.h>

void foo()
{
    int x = 0, y = 2;

    #pragma omp task depend(inout: x) shared(x)
    x++;                                        // 1st child task

    #pragma omp task shared(y)
    y--;                                        // 2nd child task

    #pragma omp taskwait depend(in: x)          // 1st taskwait

    printf("x=%d\n",x);

    // Second task may not be finished.
    // Accessing y here will create a race condition.

    #pragma omp taskwait                        // 2nd taskwait

    printf("y=%d\n",y);
}

int main()
{
    #pragma omp parallel
    #pragma omp single
    foo();

    return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.6
! type: F-free
! version: omp_5.0
subroutine foo()
    implicit none
    integer :: x, y

    x = 0
    y = 2

    !$omp task depend(inout: x) shared(x)
        x = x + 1                         !! 1st child task
    !$omp end task

    !$omp task shared(y)
        y = y - 1                         !! 2nd child task
    !$omp end task

    !$omp taskwait depend(in: x)          !! 1st taskwait

    print*, "x=", x

    !! Second task may not be finished.
    !! Accessing y here will create a race condition.

    !$omp taskwait                        !! 2nd taskwait

    print*, "y=", y

end subroutine foo

program p
    implicit none
    !$omp parallel
    !$omp single
       call foo()
    !$omp end single
    !$omp end parallel
end program p

In this example the first two tasks are serialized, because a dependence on the first child is produced by x with the in dependence type in the depend clause of the second task. However, the generating task at the first taskwait waits only on the first child task to complete, because a dependence on only the first child task is produced by x with an in dependence type within the depend clause of the taskwait construct. The second taskwait (without a depend clause) is included to guarantee completion of the second task before y is accessed. (While unnecessary, the depend(inout: y) clause on the 2nd child task is included to illustrate how the child task dependences can be completely annotated in a data-flow model.)

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.7
* type: C
* version: omp_5.0
*/
#include<stdio.h>

void foo()
{
    int x = 0, y = 2;

    #pragma omp task depend(inout: x) shared(x)
    x++;                                          // 1st child task

    #pragma omp task depend(in: x) depend(inout: y) shared(x, y)
    y -= x;                                       // 2nd child task

    #pragma omp taskwait depend(in: x)            // 1st taskwait

    printf("x=%d\n",x);

    // Second task may not be finished.
    // Accessing y here would create a race condition.

    #pragma omp taskwait                          // 2nd taskwait

    printf("y=%d\n",y);

}

int main()
{
    #pragma omp parallel
    #pragma omp single
    foo();

    return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.7
! type: F-free
! version: omp_5.0
subroutine foo()
implicit none
integer :: x, y

    x = 0
    y = 2

    !$omp task depend(inout: x) shared(x)
        x = x + 1                         !! 1st child task
    !$omp end task

    !$omp task depend(in: x) depend(inout: y) shared(x, y)
        y = y - x                         !! 2nd child task
    !$omp end task

    !$omp taskwait depend(in: x)          !! 1st taskwait

    print*, "x=", x

    !! Second task may not be finished.
    !! Accessing y here would create a race condition.

    !$omp taskwait                        !! 2nd taskwait

    print*, "y=", y

end subroutine foo

program p
implicit none
    !$omp parallel
    !$omp single
       call foo()
    !$omp end single
    !$omp end parallel
end program p

This example is similar to the previous one, except the generating task is directed to also wait for completion of the second task.

The depend clause of the taskwait construct now includes an in dependence type for y . Hence the generating task must now wait on completion of any child task having y with an out (here inout) dependence type in its depend clause. So, the depend clause of the taskwait construct now constrains the second task to complete at the taskwait, too. (This change makes the second taskwait of the previous example unnecessary– it has been removed in this example.)

Note: While a taskwait construct ensures that all child tasks have completed; a depend clause on a taskwait construct only waits for specific child tasks (prescribed by the dependence type and list items in the taskwait’s depend clause). This and the previous example illustrate the need to carefully determine the dependence type of variables in the taskwait depend clause when selecting child tasks that the generating task must wait on, so that its execution after the taskwait does not produce race conditions on variables accessed by non-completed child tasks.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.8
* type: C
* version: omp_5.0
*/
#include<stdio.h>

void foo()
{
    int x = 0, y = 2;

    #pragma omp task depend(inout: x) shared(x)
    x++;                                          // 1st child task

    #pragma omp task depend(in: x) depend(inout: y) shared(x, y)
    y -= x;                                       // 2st child task

    #pragma omp taskwait depend(in: x,y)

    printf("x=%d\n",x);
    printf("y=%d\n",y);

}

int main()
{
    #pragma omp parallel
    #pragma omp single
    foo();

    return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.8
! type: F-free
! version: omp_5.0
subroutine foo()
implicit nonE
integer :: x, y

    x = 0
    y = 2

    !$omp task depend(inout: x) shared(x)
        x = x + 1                         !! 1st child task
    !$omp end task

    !$omp task depend(in: x) depend(inout: y) shared(x, y)
        y = y - x                         !! 2nd child task
    !$omp end task

    !$omp taskwait depend(in: x,y)

    print*, "x=", x
    print*, "y=", y

end subroutine foo

program p
implicit none
    !$omp parallel
    !$omp single
       call foo()
    !$omp end single
    !$omp end parallel
end program p

5.3.7. Mutually Exclusive Execution with Dependences#

In this example we show a series of tasks, including mutually exclusive tasks, expressing dependences using the depend clause on the task construct.

The program will always print 6. Tasks T1, T2 and T3 will be scheduled first, in any order. Task T4 will be scheduled after tasks T1 and T2 are completed. T5 will be scheduled after tasks T1 and T3 are completed. Due to the mutexinoutset dependence type on c, T4 and T5 may be scheduled in any order with respect to each other, but not at the same time. Tasks T6 will be scheduled after both T4 and T5 are completed.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.9
* type: C
* version: omp_5.0
*/
#include <stdio.h>
int main()
{
   int a, b, c, d;
   #pragma omp parallel
   #pragma omp single
   {
      #pragma omp task depend(out: c)
         c = 1;   /* Task T1 */
      #pragma omp task depend(out: a)
         a = 2;   /* Task T2 */
      #pragma omp task depend(out: b)
         b = 3;   /* Task T3 */
      #pragma omp task depend(in: a) depend(mutexinoutset: c)
         c += a;  /* Task T4 */
      #pragma omp task depend(in: b) depend(mutexinoutset: c)
         c += b;  /* Task T5 */
      #pragma omp task depend(in: c)
         d = c;   /* Task T6 */
   }
   printf("%d\n", d);
   return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.9
! type: F-free
! version:    omp_5.0
program example
   integer :: a, b, c, d
   !$omp parallel
   !$omp single
      !$omp task depend(out: c)
      c = 1      ! Task T1
      !$omp end task
      !$omp task depend(out: a)
      a = 2      ! Task T2
      !$omp end task
      !$omp task depend(out: b)
      b = 3      ! Task T3
      !$omp end task
      !$omp task depend(in: a) depend(mutexinoutset: c)
      c = c + a  ! Task T4
      !$omp end task
      !$omp task depend(in: b) depend(mutexinoutset: c)
      c = c + b  ! Task T5
      !$omp end task
      !$omp task depend(in: c)
      d = c      ! Task T6
      !$omp end task
   !$omp end single
   !$omp end parallel
   print *, d
end program

The following example demonstrates a situation where the mutexinoutset dependence type is advantageous. If shortTaskB completes before longTaskA, the runtime can take advantage of this by scheduling longTaskBC before shortTaskAC.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.10
* type: C
* version: omp_5.0
*/
extern int longTaskA(), shortTaskB();
extern int shortTaskAC(int,int), longTaskBC(int,int);
void foo (void)
{
  int a, b, c;
  c = 0;
  #pragma omp parallel
  #pragma omp single
  {
     #pragma omp task depend(out: a)
        a = longTaskA();
     #pragma omp task depend(out: b)
        b = shortTaskB();
     #pragma omp task depend(in: a) depend(mutexinoutset: c)
        c = shortTaskAC(a,c);
     #pragma omp task depend(in: b) depend(mutexinoutset: c)
        c = longTaskBC(b,c);
  }
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.10
! type: F-free
! version:    omp_5.0
subroutine foo
   integer :: a,b,c
   c = 0
   !$omp parallel
   !$omp single
      !$omp task depend(out: a)
         a = longTaskA()
      !$omp end task
      !$omp task depend(out: b)
         b = shortTaskB()
      !$omp end task
      !$omp task depend(in: a) depend(mutexinoutset: c)
         c = shortTaskAC(a,c)
      !$omp end task
      !$omp task depend(in: b) depend(mutexinoutset: c)
         c = longTaskBC(b,c)
      !$omp end task
   !$omp end single
   !$omp end parallel
end subroutine foo

5.3.8. Multidependences Using Iterators#

The following example uses an iterator to define a dynamic number of dependences.

In the single construct of a parallel region a loop generates n tasks and each task has an out dependence specified through an element of the v array. This is followed by a single task that defines an in dependence on each element of the array. This is accomplished by using the iterator modifier in the depend clause, supporting a dynamic number of dependences ( n here).

The task for the print_all_elements function is not executed until all dependences prescribed (or registered) by the iterator are fulfilled; that is, after all the tasks generated by the loop have completed.

Note, one cannot simply use an array section in the depend clause of the second task construct because this would violate the depend clause restriction:

“List items used in depend clauses of the same task or sibling tasks must indicate identical storage locations or disjoint storage locations”.

In this case each of the loop tasks use a single disjoint (different storage) element in their depend clause; however, the array-section storage area prescribed in the commented directive is neither identical nor disjoint to the storage prescribed by the elements of the loop tasks. The iterator overcomes this restriction by effectively creating n disjoint storage areas.

//%compiler: clang
//%cflags: -fopenmp

/*
* name:       task_dep.11
* type:       C
* version: omp_5.0
*/
#include<stdio.h>

void set_an_element(int *p, int val) {
    *p = val;
}

void print_all_elements(int *v, int n) {
    int i;
    for (i = 0; i < n; ++i) {
        printf("%d, ", v[i]);
    }
    printf("\n");
}

void parallel_computation(int n) {
    int v[n];
    #pragma omp parallel
    #pragma omp single
    {
        int i;
        for (i = 0; i < n; ++i)
            #pragma omp task depend(out: v[i])
            set_an_element(&v[i], i);

        #pragma omp task depend(iterator(it = 0:n), in: v[it])
     // The following violates array-section restriction:
     // #pragma omp task depend(in: v[0:n])
        print_all_elements(v, n);
    }
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.11
! type: F-free
! version:    omp_5.0
subroutine set_an_element(e, val)
    implicit none
    integer :: e, val

    e = val

end subroutine

subroutine print_all_elements(v, n)
    implicit none
    integer :: n, v(n)

    print *, v

end subroutine

subroutine parallel_computation(n)
    implicit none
    integer :: n
    integer :: i, v(n)

    !$omp parallel
    !$omp single
        do i=1, n
            !$omp task depend(out: v(i))
                 call set_an_element(v(i), i)
            !$omp end task
        enddo

        !$omp task depend(iterator(it = 1:n), in: v(it))
       !!$omp task depend(in: v(1:n)) Violates Array section restriction.
            call print_all_elements(v, n)
        !$omp end task

    !$omp end single
    !$omp end parallel
end subroutine

5.3.9. Dependence for Undeferred Tasks#

In the following example, we show that even if a task is undeferred as specified by an if clause that evaluates to false , task dependences are still honored.

The depend clauses of the first and second explicit tasks specify that the first task is completed before the second task.

The second explicit task has an if clause that evaluates to false . This means that the execution of the generating task (the implicit task of the single region) must be suspended until the second explicit task is completed. But, because of the dependence, the first explicit task must complete first, then the second explicit task can execute and complete, and only then the generating task can resume to the print statement. Thus, the program will always print ” x = 2 “.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: task_dep.12
* type: C
* version: omp_4.0
*/
#include <stdio.h>
int main (int argc, char *argv[])
{
  int x = 0;
  #pragma omp parallel
  #pragma omp single
  {
    /* first explicit task */
    #pragma omp task shared(x) depend(out: x)
      x = 1;

    /* second explicit task */
    #pragma omp task shared(x) depend(inout: x) if(0)
      x = 2;

    /* statement executed by parent implicit task
       prints: x = 2 */
    printf("x = %d\n", x);
  }
  return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.12
! type: F-free
! version: omp_4.0
program example
   integer :: x
   x = 0
   !$omp parallel
   !$omp single
     !... first explicit task
      !$omp task shared(x) depend(out: x)
         x = 1
      !$omp end task

     !... second explicit task
      !$omp task shared(x) depend(inout: x) if(.false.)
         x = 2
      !$omp end task

     !... statement executed by parent implicit task
     ! prints: x = 2
      print*, "x = ", x
   !$omp end single
   !$omp end parallel
end program

In OpenMP 5.1 the omp_all_memory reserved locator was introduced to specify storage of all objects in memory. In the following example, it is used in Task 4 as a convenient way to specify that the locator (list item) denotes the storage of all objects (locations) in memory, and will therefore match the a and d locators of Task 2, Task 3 and Task 6. The dependences guarantee the ordered execution of Tasks 2 and 3 before 4, and Task 4 before Task 6. Since there are no dependences imposed on Task 1 and Task 5, they can be scheduled to execute at any time, with no ordering.

//%compiler: clang
//%cflags: -fopenmp

/*
* name:       task_dep.13
* type:       C
* version:    omp_5.1
*/
#include <stdio.h>

int main(){
   int a=1, d=1;

   #pragma omp parallel masked num_threads(5)
   {
      #pragma omp task                               // Task 1
      { printf("T1\n"); }

      #pragma omp task depend(out: a)                // Task 2
      { a++;
        printf("T2 a=%i\n", a); }

      #pragma omp task depend(out: d)                // Task 3
      { d++;
        printf("T3 d=%i\n", d); }

      #pragma omp task depend(inout: omp_all_memory) // Task 4
      { a++; d++;
        printf("T4 a=%i d=%i\n",   a,d);}

      #pragma omp task                               // Task 5
      { printf("T5\n"); }

      #pragma omp task depend(in: a,d)               // Task 6
      { a++; d++;
        printf("T6 a=%i d=%i\n", a,d); }
   }
}

/* OUTPUT: ordered {T2,T3 any order}, {T4}, {T6}
    T2 a=2
    T3 d=2
    T4 a=3 d=3
    T6 a=4 d=4

  OUTPUT: unordered (can appear interspersed in ordered output)
    T1
    T5
*/
!!%compiler: gfortran
!!%cflags: -fopenmp

! name: task_dep.13
! type: F-free
! version: omp_5.1
program main
  integer :: a=1, d=1

  !$omp parallel masked  num_threads(5)

    !$omp task                                 !! Task 1
       write(*,'("T1")')
    !$omp end task

    !$omp task depend(out: a)                  !! Task 2
       a=a+1
       write(*,'("T2 a=",i1)') a
    !$omp end task

    !$omp task depend(out: d)                  !! Task 3
       d=d+1
       write(*,'("T3 d=",i1)') d
    !$omp end task


    !$omp task depend(inout: omp_all_memory)   !! Task 4
       a=a+1; d=d+1
       write(*,'("T4 a=",i1," d=",i1)') a, d
    !$omp end task

    !$omp task                                 !! Task 5
       write(*,'("T5")')
    !$omp end task

    !$omp task depend(in: a,d)                 !! Task 6
       a=a+1; d=d+1
       write(*,'("T6 a=",i1," d=",i1)') a, d
    !$omp end task

  !$omp end parallel masked

end program

! OUTPUT: ordered  {T2,T3 any order}, {T4}, {T6}
! T2 a=2
! T3 d=2
! T4 a=3 d=3
! T6 a=4 d=4
! OUTPUT: unordered (can appear interspersed in ordered output)
! T1
! T5