Let suppose we have a buffer named mtx
stotred in a shared buffer:
Its size is 10x6 and there are 4 kernels that want to access it: 2 for wrtiting onto it and 2 for reading from it:
kernel k1, k2, k3, k4;
shared_buffer<int> mtx;
mygraph()
{
k1 = kernel::create(func1); k2 = kernel::create(func2);
k3 = kernel::create(func3); k4 = kernel::create(func4);
mtx = shared_buffer<int>::create({10, 6}, 2, 2); // Size:10x6, 2 write-inputs, 2 read-outputs
…
}
Kernel k1
is willing to write to the buffer tile by tile. Each tile has a size 3x2 and the origin of the read access is (0,0):
The access scheme is as follows:
Dimension 0: 2 blocks, 3 samples apart
Dimension 1: 3 blocks, 2 samples apart.
write_access(mtx.in[0]) = tiling({
.buffer_dimension={10,6}, .tiling_dimension={3,2}, .offset={0,0}, .tile_traversal = {{.dimension=0, .stride=3, .wrap=2}, {.dimension=1, .stride=2, .wrap=3}}});
Kernel k2
is also willing to write to the buffer but with different tile size and order:
The access scheme is as follows:
Dimension 1: 2 blocks, 3 samples apart
Dimension 0: 2 blocks, 2 samples apart
The origin of the subset is sample at position (6,0):
write_access(mtx.in[1]) = tiling({
.buffer_dimension={10,6}, .tiling_dimension={2,3}, .offset={6,0},
.tile_traversal = {{.dimension=1, .stride=3, .wrap=2},{.dimension=0, .stride=2, .wrap=2}}});
The 2 other kernels k3, k4
are willing to read from the buffer in a completely different way the buffer has been written to:
These access schemes are defined in the graph with:
read_access(mtx.out[0]) = tiling({
.buffer_dimension={10,6}, .tiling_dimension={2,6}, .offset={0,0},
.tile_traversal = {{.dimension=0, .stride=2, .wrap=2}}});
read_access(mtx.out[1]) = tiling({
.buffer_dimension={10,6}, .tiling_dimension={3,6}, .offset={4,0}
.tile_traversal = {{.dimension=0, .stride=3, .wrap=2}}});
The overall C++ code, including the connections of the kernels to the shared_buffer will look like this:
class mygraph : public graph
{
kernel k1, k2, k3, k4;
shared_buffer<int> mtx;
mygraph()
{
k1 = kernel::create(func1); k2 = kernel::create(func2); k3 = kernel::create(func3); k4 = kernel::create(func4);
mtx = shared_buffer<int>::create({10, 6}, 2, 2); // 10x6, 2 write-inputs, 2 read-outputs
connect<> n1(k1.out[0], mtx.in[0]);
write_access(mtx.in[0]) = tiling({
.buffer_dimension={10,6}, .tiling_dimension={3,2}, .offset={0,0}, .tile_traversal = {{.order=0, .stride=3, .wrap=2}, {.order=1, .stride=2, .wrap=3}}});
connect<> n2(k2.out[0], mtx.in[1]);
write_access(mtx.in[1]) = tiling({
.buffer_dimension={10,6}, .tiling_dimension={2,3}, .offset={6,0},
.tile_traversal = {{.order=0, .stride=2, .wrap=2}, {.order=1, .stride=3, .wrap=2}}});
connect<> n3(mtx.out[0], k3.in[0]);
read_access(mtx.out[0]) = tiling({
.buffer_dimension={10,6}, .tiling_dimension={2,6}, .offset={0,0},
.tile_traversal = {{.order=0, .stride=2, .wrap=2}}});
connect<> n4(mtx.out[1], k4.in[0]);
read_access(mtx.out[1]) = tiling({
.buffer_dimension={10,6}, .tiling_dimension={3,6}, .offset={4,0},
.tile_traversal = {{.order=0, .stride=3, .wrap=2}}});
}
};