3.3. teams Construct on Host#

constructs!teams teams teams construct teams construct

Originally the teams construct was created for devices (such as GPUs) for independent executions of a structured block by teams within a league (on SMs). It was only available through offloading with the target construct, and the execution of a teams region could only be directed to host execution by various means such as if and device clauses, and the OMP_TARGET_OFFLOAD environment variable.

In OpenMP 5.0 the teams construct was extended to enable the host to execute a teams region (without an associated target construct), with anticipation of further affinity and threading controls in future OpenMP releases.

In the example below the teams construct is used to create two teams, one to execute single precision code, and the other to execute double precision code. Two teams are required, and the thread limit for each team is set to 1/2 of the number of available processors.

//%compiler: clang
//%cflags: -fopenmp

/*
* name: host_teams.1
* type: C
* version: omp_5.0
*/
#include <stdio.h>
#include <stdlib.h>
#include  <math.h>
#include   <omp.h>
#define    N 1000

int main(){
   int     nteams_required=2, max_thrds, tm_id;
   float   sp_x[N], sp_y[N], sp_a=0.0001e0;
   double  dp_x[N], dp_y[N], dp_a=0.0001e0;

   max_thrds = omp_get_num_procs()/nteams_required;

   // Create 2 teams, each team works in a different precision
   #pragma omp teams num_teams(nteams_required) \
                     thread_limit(max_thrds)  private(tm_id)
   {
      tm_id = omp_get_team_num();

      if( omp_get_num_teams() != 2 )   //if only getting 1, quit
      { printf("error: Insufficient teams on host, 2 required\n");
        exit(0);
      }

      if(tm_id == 0)  // Do Single Precision Work (SAXPY) with this team
      {
         #pragma omp parallel
         {
            #pragma omp for                             //init
            for(int i=0; i<N; i++){sp_x[i] = i*0.0001;  sp_y[i]=i; }

            #pragma omp for simd simdlen(8)
            for(int i=0; i<N; i++){sp_x[i] = sp_a*sp_x[i] + sp_y[i];}
         }
      }

      if(tm_id == 1)  // Do Double Precision Work (DAXPY) with this team
      {
         #pragma omp parallel
         {
            #pragma omp for                             //init
            for(int i=0; i<N; i++){dp_x[i] = i*0.0001;  dp_y[i]=i; }

            #pragma omp for simd simdlen(4)
            for(int i=0; i<N; i++){dp_x[i] = dp_a*dp_x[i] + dp_y[i];}
         }
      }
   }

   printf("i=%d  sp|dp  %f %f \n",N-1, sp_x[N-1], dp_x[N-1]);
   printf("i=%d  sp|dp  %f %f \n",N/2, sp_x[N/2], dp_x[N/2]);
 //OUTPUT1:i=999  sp|dp  999.000000 999.000010
 //OUTPUT2:i=500  sp|dp  500.000000 500.000005

   return 0;
}

!!%compiler: gfortran
!!%cflags: -fopenmp

! name: host_teams.1
! type: F-free
! version: omp_5.0

program main
   use omp_lib
   integer           :: nteams_required=2, max_thrds, tm_id
   integer,parameter ::  N=1000
   real              :: sp_x(N), sp_y(N), sp_a=0.0001e0
   double precision  :: dp_x(N), dp_y(N), dp_a=0.0001d0

   max_thrds = omp_get_num_procs()/nteams_required

   !! Create 2 teams, each team works in a different precision
   !$omp teams num_teams(nteams_required) thread_limit(max_thrds) \
               private(tm_id)

      tm_id = omp_get_team_num()

      if( omp_get_num_teams() /= 2 ) then   !! if only getting 1, quit
         stop "error: Insufficient teams on host, 2 required."
      endif

      !! Do Single Precision Work (SAXPY) with this team
      if(tm_id == 0) then

         !$omp parallel
            !$omp do         !! init
            do i = 1,N
               sp_x(i) = i*0.0001e0
               sp_y(i) = i
            end do

            !$omp do simd simdlen(8)
            do i = 1,N
               sp_x(i) = sp_a*sp_x(i) + sp_y(i)
            end do
         !$omp end parallel

      endif

      !! Do Double Precision Work (DAXPY) with this team
      if(tm_id == 1) then

         !$omp parallel
            !$omp do         !! init
            do i = 1,N
               dp_x(i) = i*0.0001d0
               dp_y(i) = i
            end do

            !$omp do simd simdlen(4)
            do i = 1,N
               dp_x(i) = dp_a*dp_x(i) + dp_y(i)
            end do
         !$omp end parallel

      endif
   !$omp end teams

   write(*,'( "i=",i4," sp|dp= ", e15.7, d25.16  )') &
            N, sp_x(N), dp_x(N)
   write(*,'( "i=",i4," sp|dp= ", e15.7, d25.16  )') &
            N/2, sp_x(N/2), dp_x(N/2)
            !! i=1000 sp|dp=   0.1000000E+04   0.1000000010000000D+04
            !! i= 500 sp|dp=   0.5000000E+03   0.5000000050000000D+03
end program