teams Construct on Host
3.3. teams Construct on Host#
constructs!teams teams teams construct teams construct
Originally the teams construct was created for devices (such as GPUs) for independent executions of a structured block by teams within a league (on SMs). It was only available through offloading with the target construct, and the execution of a teams region could only be directed to host execution by various means such as if and device clauses, and the OMP_TARGET_OFFLOAD environment variable.
In OpenMP 5.0 the teams construct was extended to enable the host to execute a teams region (without an associated target construct), with anticipation of further affinity and threading controls in future OpenMP releases.
In the example below the teams construct is used to create two teams, one to execute single precision code, and the other to execute double precision code. Two teams are required, and the thread limit for each team is set to 1/2 of the number of available processors.
//%compiler: clang
//%cflags: -fopenmp
/*
* name: host_teams.1
* type: C
* version: omp_5.0
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <omp.h>
#define N 1000
int main(){
int nteams_required=2, max_thrds, tm_id;
float sp_x[N], sp_y[N], sp_a=0.0001e0;
double dp_x[N], dp_y[N], dp_a=0.0001e0;
max_thrds = omp_get_num_procs()/nteams_required;
// Create 2 teams, each team works in a different precision
#pragma omp teams num_teams(nteams_required) \
thread_limit(max_thrds) private(tm_id)
{
tm_id = omp_get_team_num();
if( omp_get_num_teams() != 2 ) //if only getting 1, quit
{ printf("error: Insufficient teams on host, 2 required\n");
exit(0);
}
if(tm_id == 0) // Do Single Precision Work (SAXPY) with this team
{
#pragma omp parallel
{
#pragma omp for //init
for(int i=0; i<N; i++){sp_x[i] = i*0.0001; sp_y[i]=i; }
#pragma omp for simd simdlen(8)
for(int i=0; i<N; i++){sp_x[i] = sp_a*sp_x[i] + sp_y[i];}
}
}
if(tm_id == 1) // Do Double Precision Work (DAXPY) with this team
{
#pragma omp parallel
{
#pragma omp for //init
for(int i=0; i<N; i++){dp_x[i] = i*0.0001; dp_y[i]=i; }
#pragma omp for simd simdlen(4)
for(int i=0; i<N; i++){dp_x[i] = dp_a*dp_x[i] + dp_y[i];}
}
}
}
printf("i=%d sp|dp %f %f \n",N-1, sp_x[N-1], dp_x[N-1]);
printf("i=%d sp|dp %f %f \n",N/2, sp_x[N/2], dp_x[N/2]);
//OUTPUT1:i=999 sp|dp 999.000000 999.000010
//OUTPUT2:i=500 sp|dp 500.000000 500.000005
return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp
! name: host_teams.1
! type: F-free
! version: omp_5.0
program main
use omp_lib
integer :: nteams_required=2, max_thrds, tm_id
integer,parameter :: N=1000
real :: sp_x(N), sp_y(N), sp_a=0.0001e0
double precision :: dp_x(N), dp_y(N), dp_a=0.0001d0
max_thrds = omp_get_num_procs()/nteams_required
!! Create 2 teams, each team works in a different precision
!$omp teams num_teams(nteams_required) thread_limit(max_thrds) \
private(tm_id)
tm_id = omp_get_team_num()
if( omp_get_num_teams() /= 2 ) then !! if only getting 1, quit
stop "error: Insufficient teams on host, 2 required."
endif
!! Do Single Precision Work (SAXPY) with this team
if(tm_id == 0) then
!$omp parallel
!$omp do !! init
do i = 1,N
sp_x(i) = i*0.0001e0
sp_y(i) = i
end do
!$omp do simd simdlen(8)
do i = 1,N
sp_x(i) = sp_a*sp_x(i) + sp_y(i)
end do
!$omp end parallel
endif
!! Do Double Precision Work (DAXPY) with this team
if(tm_id == 1) then
!$omp parallel
!$omp do !! init
do i = 1,N
dp_x(i) = i*0.0001d0
dp_y(i) = i
end do
!$omp do simd simdlen(4)
do i = 1,N
dp_x(i) = dp_a*dp_x(i) + dp_y(i)
end do
!$omp end parallel
endif
!$omp end teams
write(*,'( "i=",i4," sp|dp= ", e15.7, d25.16 )') &
N, sp_x(N), dp_x(N)
write(*,'( "i=",i4," sp|dp= ", e15.7, d25.16 )') &
N/2, sp_x(N/2), dp_x(N/2)
!! i=1000 sp|dp= 0.1000000E+04 0.1000000010000000D+04
!! i= 500 sp|dp= 0.5000000E+03 0.5000000050000000D+03
end program