OpenMP Memory Model
11.1. OpenMP Memory Model#
The following examples illustrate two major concerns for concurrent thread execution: ordering of thread execution and memory accesses that may or may not lead to race conditions.
In the following example, at Print 1, the value of xval could be either 2 or 5, depending on the timing of the threads. The atomic directives are necessary for the accesses to x by threads 1 and 2 to avoid a data race. If the atomic write completes before the atomic read, thread 1 is guaranteed to see 5 in xval. Otherwise, thread 1 is guaranteed to see 2 in xval.
flushes!implicit atomic construct atomic construct constructs!atomic atomic The barrier after Print 1 contains implicit flushes on all threads, as well as a thread synchronization, so the programmer is guaranteed that the value 5 will be printed by both Print 2 and Print 3. Since neither Print 2 or Print 3 are modifying x, they may concurrently access x without requiring atomic directives to avoid a data race.
//%compiler: clang
//%cflags: -fopenmp
/*
* name: mem_model.1
* type: C
* version: omp_3.1
*/
#include <stdio.h>
#include <omp.h>
int main(){
int x;
x = 2;
#pragma omp parallel num_threads(2) shared(x)
{
if (omp_get_thread_num() == 0) {
#pragma omp atomic write
x = 5;
} else {
int xval;
#pragma omp atomic read
xval = x;
/* Print 1: xval can be 2 or 5 */
printf("1: Thread# %d: x = %d\n", omp_get_thread_num(), xval);
}
#pragma omp barrier
if (omp_get_thread_num() == 0) {
/* Print 2 */
printf("2: Thread# %d: x = %d\n", omp_get_thread_num(), x);
} else {
/* Print 3 */
printf("3: Thread# %d: x = %d\n", omp_get_thread_num(), x);
}
}
return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp
! name: mem_model.1
! type: F-free
! version: omp_3.1
PROGRAM MEMMODEL
INCLUDE "omp_lib.h" ! or USE OMP_LIB
INTEGER X, XVAL
X = 2
!$OMP PARALLEL NUM_THREADS(2) SHARED(X)
IF (OMP_GET_THREAD_NUM() .EQ. 0) THEN
!$OMP ATOMIC WRITE
X = 5
ELSE
!$OMP ATOMIC READ
XVAL = X
! PRINT 1: XVAL can be 2 or 5
PRINT *,"1: THREAD# ", OMP_GET_THREAD_NUM(), "X = ", XVAL
ENDIF
!$OMP BARRIER
IF (OMP_GET_THREAD_NUM() .EQ. 0) THEN
! PRINT 2
PRINT *,"2: THREAD# ", OMP_GET_THREAD_NUM(), "X = ", X
ELSE
! PRINT 3
PRINT *,"3: THREAD# ", OMP_GET_THREAD_NUM(), "X = ", X
ENDIF
!$OMP END PARALLEL
END PROGRAM MEMMODEL
The following example demonstrates why synchronization is difficult to perform correctly through variables. The write to flag on thread 0 and the read from flag in the loop on thread 1 must be atomic to avoid a data race. When thread 1 breaks out of the loop, flag will have the value of 1. However, data will still be undefined at the first print statement. Only after the flush of both flag and data after the first print statement will data have the well-defined value of 42.
//%compiler: clang
//%cflags: -fopenmp
/*
* name: mem_model.2
* type: C
* version: omp_3.1
*/
#include <omp.h>
#include <stdio.h>
int main()
{
int data;
int flag=0;
#pragma omp parallel num_threads(2)
{
if (omp_get_thread_num()==0)
{
/* Write to the data buffer that will be
* read by thread */
data = 42;
/* Flush data to thread 1 and strictly order
* the write to data relative to the write to the flag */
#pragma omp flush(flag, data)
/* Set flag to release thread 1 */
#pragma omp atomic write
flag = 1;
}
else if(omp_get_thread_num()==1)
{
/* Loop until we see the update to the flag */
#pragma omp flush(flag, data)
int flag_val = 0;
while (flag_val < 1)
{
#pragma omp atomic read
flag_val = flag;
}
/* Value of flag is 1; value of data is undefined */
printf("flag=%d data=%d\n", flag, data);
#pragma omp flush(flag, data)
/* Value of flag is 1; value of data is 42 */
printf("flag=%d data=%d\n", flag, data);
}
}
return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp
! name: mem_model.2
! type: F-fixed
! version: omp_3.1
PROGRAM EXAMPLE
INCLUDE "omp_lib.h" ! or USE OMP_LIB
INTEGER DATA
INTEGER FLAG, FLAG_VAL
FLAG = 0
!$OMP PARALLEL NUM_THREADS(2)
IF(OMP_GET_THREAD_NUM() .EQ. 0) THEN
! Write to the data buffer that will be read by thread 1
DATA = 42
! Flush DATA to thread 1 and strictly order the write to DATA
! relative to the write to the FLAG
!$OMP FLUSH(FLAG, DATA)
! Set FLAG to release thread 1
!$OMP ATOMIC WRITE
FLAG = 1
ELSE IF(OMP_GET_THREAD_NUM() .EQ. 1) THEN
! Loop until we see the update to the FLAG
!$OMP FLUSH(FLAG, DATA)
FLAG_VAL = 0
DO WHILE(FLAG_VAL .LT. 1)
!$OMP ATOMIC READ
FLAG_VAL = FLAG
ENDDO
! Value of FLAG is 1; value of DATA is undefined
PRINT *, 'FLAG=', FLAG, ' DATA=', DATA
!$OMP FLUSH(FLAG, DATA)
! Value of FLAG is 1; value of DATA is 42
PRINT *, 'FLAG=', FLAG, ' DATA=', DATA
ENDIF
!$OMP END PARALLEL
END
The next example demonstrates why synchronization is difficult to perform correctly through variables. As in the preceding example, the updates to flag and the reading of flag in the loops on threads 1 and 2 are performed atomically to avoid data races on flag. However, the code still contains data race due to the incorrect use of “flush with a list’’ after the assignment to data1 on thread 1. By not including flag in the flush-set of that flush directive, the assignment can be reordered with respect to the subsequent atomic update to flag. Consequentially, data1 is undefined at the print statement on thread 2.
//%compiler: clang
//%cflags: -fopenmp
/*
* name: mem_model.3
* type: C
* version: omp_3.1
*/
#include <omp.h>
#include <stdio.h>
int data0 = 0, data1 = 0;
int main()
{
int flag=0;
#pragma omp parallel num_threads(3)
{
if(omp_get_thread_num()==0)
{
data0 = 17;
#pragma omp flush
/* Set flag to release thread 1 */
#pragma omp atomic update
flag++;
/* Flush of flag is implied by the atomic directive */
}
else if(omp_get_thread_num()==1)
{
int flag_val = 0;
/* Loop until we see that flag reaches 1*/
while(flag_val < 0)
{
#pragma omp atomic read
flag_val = flag;
}
#pragma omp flush(data0)
/* data0 is 17 here */
printf("Thread 1 awoken (data0 = %d)\n", data0);
data1 = 42;
#pragma omp flush(data1)
/* Set flag to release thread 2 */
#pragma omp atomic update
flag++;
/* Flush of flag is implied by the atomic directive */
}
else if(omp_get_thread_num()==2)
{
int flag_val = 0;
/* Loop until we see that flag reaches 2 */
while(flag_val < 2)
{
#pragma omp atomic read
flag_val = flag;
}
#pragma omp flush(data0,data1)
/* there is a data race here;
data0 is 17 and data1 is undefined */
printf("Thread 2 awoken (data0 = %d, data1 = %d)\n",
data0, data1);
}
}
return 0;
}
!!%compiler: gfortran
!!%cflags: -fopenmp
! name: mem_model.3
! type: F-fixed
! version: omp_3.1
PROGRAM EXAMPLE
INCLUDE "omp_lib.h" ! or USE OMP_LIB
INTEGER FLAG, FLAG_VAL
INTEGER DATA0, DATA1
FLAG = 0
!$OMP PARALLEL NUM_THREADS(3)
IF(OMP_GET_THREAD_NUM() .EQ. 0) THEN
DATA0 = 17
!$OMP FLUSH
! Set flag to release thread 1
!$OMP ATOMIC UPDATE
FLAG = FLAG + 1
! Flush of FLAG is implied by the atomic directive
ELSE IF(OMP_GET_THREAD_NUM() .EQ. 1) THEN
! Loop until we see that FLAG reaches 1
!$OMP FLUSH(FLAG, DATA)
FLAG_VAL = 0
DO WHILE(FLAG_VAL .LT. 1)
!$OMP ATOMIC READ
FLAG_VAL = FLAG
ENDDO
!$OMP FLUSH
! DATA0 is 17 here
PRINT *, 'Thread 1 awoken. DATA0 = ', DATA0
DATA1 = 42
!$OMP FLUSH(DATA1)
! Set FLAG to release thread 2
!$OMP ATOMIC UPDATE
FLAG = FLAG + 1
! Flush of FLAG is implied by the atomic directive
ELSE IF(OMP_GET_THREAD_NUM() .EQ. 2) THEN
! Loop until we see that FLAG reaches 2
FLAG_VAL = 0
DO WHILE(FLAG_VAL .LT. 2)
!$OMP ATOMIC READ
FLAG_VAL = FLAG
ENDDO
!$OMP FLUSH(DATA0, DATA1)
! There is a data race here; data0 is 17 and data1 is undefined
PRINT *, 'Thread 2 awoken. DATA0 = ', DATA0,
& ' and DATA1 = ', DATA1
ENDIF
!$OMP END PARALLEL
END
The following two examples illustrate the ordering properties of the flush operation. The flush operations are strong flushes that are applied to the specified flush lists. However, use of a flush construct with a list is extremely error prone and users are strongly discouraged from attempting it. In the codes the programmer intends to prevent simultaneous execution of the protected section by the two threads. The atomic directives in the codes ensure that the accesses to shared variables a and b are atomic write and atomic read operations. Otherwise both examples would contain data races and automatically result in unspecified behavior.
In the following incorrect code example, operations on variables a and b are not ordered with respect to each other. For instance, nothing prevents the compiler from moving the flush of b on thread 0 or the flush of a on thread 1 to a position completely after the protected section (assuming that the protected section on thread 0 does not reference b and the protected section on thread 1 does not reference a ). If either re-ordering happens, both threads can simultaneously execute the protected section. Any shared data accessed in the protected section is not guaranteed to be current or consistent during or after the protected section.
//%compiler: clang
//%cflags: -fopenmp
/*
* name: mem_model.4a
* type: C
* version: omp_3.1
*/
#include <omp.h>
void flush_incorrect()
{
int a, b;
a = b = 0;
#pragma omp parallel num_threads(2)
{
int myid = omp_get_thread_num();
int tmp;
if ( myid == 0 ) { // thread 0
#pragma omp atomic write
b = 1;
#pragma omp flush(b) // flushes are not ordered
#pragma omp flush(a) // compiler may move them around
#pragma omp atomic read
tmp = a;
}
else { // thread 1
#pragma omp atomic write
a = 1;
#pragma omp flush(a) // flushes are not ordered
#pragma omp flush(b) // compiler may move them around
#pragma omp atomic read
tmp = b;
}
if ( tmp == 0 ) { // exclusive access not guaranteed
/* protected section */
}
}
}
!!%compiler: gfortran
!!%cflags: -fopenmp
! name: mem_model.4a
! type: F-free
! version: omp_3.1
subroutine flush_incorrect
use omp_lib
implicit none
integer a, b, tmp
integer myid
a = 0; b = 0
!$omp parallel private(myid,tmp) num_threads(2)
myid = omp_get_thread_num()
if ( myid == 0 ) then ! thread 0
!$omp atomic write
b = 1
!$omp flush(b) ! flushes are not ordered
!$omp flush(a) ! compiler may move them around
!$omp atomic read
tmp = a
else ! thread 1
!$omp atomic write
a = 1
!$omp flush(a) ! flushes are not ordered
!$omp flush(b) ! compiler may move them around
!$omp atomic read
tmp = b
endif
if ( tmp == 0 ) then ! exclusive access not guaranteed
!! protected section
endif
!$omp end parallel
end subroutine
The following code example correctly ensures that the protected section is executed by only one thread at a time. Execution of the protected section by neither thread is considered correct in this example. This occurs if both flushes complete prior to either thread executing its if statement for the protected section. The compiler is prohibited from moving the flush at all for either thread, ensuring that the respective assignment is complete and the data is flushed before the if statement is executed.
//%compiler: clang
//%cflags: -fopenmp
/*
* name: mem_model.4b
* type: C
* version: omp_3.1
*/
#include <omp.h>
void flush_correct()
{
int a, b;
a = b = 0;
#pragma omp parallel num_threads(2)
{
int myid = omp_get_thread_num();
int tmp;
if ( myid == 0 ) { // thread 0
#pragma omp atomic write
b = 1;
#pragma omp flush(a,b) // flushes are ordered
#pragma omp atomic read
tmp = a;
}
else { // thread 1
#pragma omp atomic write
a = 1;
#pragma omp flush(a,b) // flushes are ordered
#pragma omp atomic read
tmp = b;
}
if ( tmp == 0 ) { // access by single thread
/* protected section */
}
}
}
!!%compiler: gfortran
!!%cflags: -fopenmp
! name: mem_model.4b
! type: F-free
! version: omp_3.1
subroutine flush_correct
use omp_lib
implicit none
integer a, b, tmp
integer myid
a = 0; b = 0
!$omp parallel private(myid,tmp) num_threads(2)
myid = omp_get_thread_num()
if ( myid == 0 ) then ! thread 0
!$omp atomic write
b = 1
!$omp flush(a,b) ! flushes are ordered
!$omp atomic read
tmp = a
else ! thread 1
!$omp atomic write
a = 1
!$omp flush(a,b) ! flushes are ordered
!$omp atomic read
tmp = b
endif
if ( tmp == 0 ) then ! access by single thread
!! protected section
endif
!$omp end parallel
end subroutine