8.1. tile Construct#

In the following example a tile construct transforms two nested loops within the func1 function into four nested loops. The tile sizes in the sizes clause are applied from outermost to innermost loops (left-to-right). The effective tiling operation is illustrated in the func2 function. (For easier illustration, tile sizes for all examples in this section evenly divide the iteration counts so that there are no remainders.)

In the following C/C++ code the inner loop traverses columns and the outer loop traverses the rows of a 100x128 (row x column) matrix. The sizes(5,16) clause of the tile construct specifies a 5x16 blocking, applied to the outer (row) and inner (column) loops. The worksharing-loop construct before the tile construct is applied after the transform.

//%compiler: clang
//%cflags: -fopenmp

/*
* name:       tile.1
* type:       C
* version:    omp_5.1
*/
void func1(int A[100][128])
{
   #pragma omp parallel for
   #pragma omp tile sizes(5,16)
   for (int i = 0; i < 100; ++i)
       for (int j = 0; j < 128; ++j)
           A[i][j] = i*1000 + j;
}

void func2(int A[100][128])
{
   #pragma omp parallel for
   for (int i1 = 0; i1 < 100; i1+=5)
       for (int j1 = 0; j1 < 128; j1+=16)
           for (int i2 = i1; i2 < i1+5; ++i2)
               for (int j2 = j1; j2 < j1+16; ++j2)
                  A[i2][j2] = i2*1000 + j2;
}

In the following Fortran code the inner loop traverses rows and the outer loop traverses the columns of a 128x100 (row x column) matrix. The sizes(5,16) clause of the tile construct specifies a 5x16 blocking, applied to the outer (column) and inner (row) loops. The worksharing-loop construct before the tile construct is applied after the transform.

!!%compiler: gfortran
!!%cflags: -fopenmp

! name:       tile.1
! type:       F-free
! version:    omp_5.1

subroutine func1(A)
    integer :: A(128,100)
    integer :: i, j
    !$omp parallel do
    !$omp tile sizes(5,16)
    do i = 1, 100
    do j = 1, 128
        A(j,i) = j*1000 + i
    end do; end do
end subroutine

subroutine func2(A)
    integer :: A(128,100)
    integer :: i1, j1, i2, j2
    !$omp parallel do
    do i1 = 1, 100,5
    do j1 = 1, 128,16
       do i2 = i1, i1+( 5-1)
       do j2 = j1, j1+(16-1)
          A(j2,i2) = j2*1000 + i2
       end do; end do
    end do; end do
end subroutine

This example illustrates transformation nesting. Here, a 4x4 “outer’’ tile construct is applied to the “inner’’ tile transform shown in the example above. The effect of the inner loop is shown in func2 (cf. func2 in tile.1.c). The outer tile construct’s sizes(4,4) clause applies a 4x4 tile upon the resulting blocks of the inner transform. The effective looping is shown in func3 .

//%compiler: clang
//%cflags: -fopenmp

/*
* name:       tile.2
* type:       C
* version:    omp_5.1
*/
void func1(int A[100][128])
{
   #pragma omp tile sizes(4, 4)
   #pragma omp tile sizes(5,16)
   for (int i = 0; i < 100; ++i)
       for (int j = 0; j < 128; ++j)
           A[i][j] = i*1000 + j;
}

void func2(int A[100][128])
{
   #pragma omp tile sizes(4,4)
   for (int i1 = 0; i1 < 100; i1+=5)
       for (int j1 = 0; j1 < 128; j1+=16)
           for (int i2 = i1; i2 < i1+5; ++i2)
               for (int j2 = j1; j2 < j1+16; ++j2)
                  A[i2][j2] = i2*1000 + j2;
}

void func3(int A[100][128])
{
   for (int i11 = 0; i11 < 100; i11+= 5*4)
   for (int j11 = 0; j11 < 128; j11+=16*4)

       for (int i12 = i11; i12 < i11+( 5*4); i12+= 5)
       for (int j12 = j11; j12 < j11+(16*4); j12+=16)

           for (int i2 = i12; i2 < i12+ 5; ++i2)
           for (int j2 = j12; j2 < j12+16; ++j2)
              A[i2][j2] = i2*1000 + j2;
}
!!%compiler: gfortran
!!%cflags: -fopenmp

! name:       tile.2
! type:       F-free
! version:    omp_5.1

subroutine func1(A)
    integer :: A(128,100)
    integer :: i, j
    !$omp tile sizes(4, 4)
    !$omp tile sizes(5,16)
    do i = 1, 100
    do j = 1, 128
       A(j,i) = j*1000 + i
    end do; end do
end subroutine

subroutine func2(A)
    integer :: A(128,100)
    integer :: i1, j1, i2, j2
    !$omp tile sizes(4,4)
    do i1 = 1, 100,5
    do j1 = 1, 128,16
       do i2 = i1, i1+( 5-1)
       do j2 = j1, j1+(16-1)
          A(j2,i2) = j2*1000 + i2
       end do; end do
    end do; end do

end subroutine

subroutine func3(A)
    integer :: A(128,100)
    integer :: i11, j11, i12, j12, i2, j2
    do i11 = 1, 100,  5*4
    do j11 = 1, 128, 16*4
       do i12 = i11, i11+( 5*4-1),  5
       do j12 = j11, j11+(16*4-1), 16
          do i2 = i12, i12+ 5-1
          do j2 = j12, j12+16-1
             A(j2,i2) = j2*1000 + i2
          enddo; enddo;
       enddo; enddo;
    enddo; enddo

end subroutine