Access to external memory must be declared and managed in the graph. The DMA related to the external memory is located in the interface tile.
External Memory Related API
A buffer located in external memory must be declared in the
graph
class and parameterized in the constructor of
the class. This type of buffer is called external_buffer
. The following code snippet shows the APIs related to
external
buffers:using namespace adf;
class ExtBufGraph : public graph {
public:
adf::kernel mk;
adf::external_buffer<uint32> ddrin,ddrout;
ExtBufGraph() {
mk = adf::kernel::create(maker_buf1d);
adf::source(mk) = "maker.cpp";
adf::runtime<ratio>(mk) = 0.9;
// Declare 2 buffers residing in the external memory
// both are 2-dimensional and the first one has an input port
// and the second one has an output port
ddrin = adf::external_buffer<uint32>::create({FRAME_LENGTH, NFRAMES}, 0, 1);
ddrout = adf::external_buffer<uint32>::create({FRAME_LENGTH, NFRAMES}, 1, 0);
// Specify the read and write access scheme for each iteration
adf::read_access(ddrin.out[0]) = adf::tiling(/* Tiling Parameters */);
adf::write_access(ddrout.in[0]) = adf::tiling(/* Tiling Parameters */);
adf::connect(ddrin.out[0],mk.in[0]);
adf::connect(mk.out[0], ddrout.in[0]);
location<dma>(ddrin.out[0]) = dma_channel(shim_tile, <column>, <row>, <channel_number>);
location<dma>(ddrout.in[0]) = dma_channel(shim_tile, <column>, <row>, <channel_number>);
};
};
The line adf::external_buffer<int32>
ddrin,ddrout;
declares two buffers located in external memory called
ddrin
and ddrout
that will contain int32
data.
The data type can be any type supported by the AI
Engine-ML.
Later, in the
graph
class
constructor, the buffers are parameterized
with:ddrin = adf::external_buffer<uint32>::create({FRAME_LENGTH, NFRAMES}, 0, 1);
ddrout = adf::external_buffer<uint32>::create({FRAME_LENGTH, NFRAMES}, 1, 0);
The external buffers are parameterized with three attributes:
- Size
- A vector of 1, 2 or 3 attributes that defines the sizes over the various dimensions of the buffer. These sizes are defined in terms of data samples, whatever its size in bytes.
- Number of input ports
- The number of physical input ports that are used to write to the external buffer.
- Number of output ports
- The number of physical output ports that are used to read from this buffer.
Note: External buffers cannot be used as
intermediate data nodes in a data flow graph. It must be either an input or an
output to the graph.
The write order to the buffer as well as the read order from the
buffer must be defined for each port. This is explained in Tiling Parameters and Buffer Descriptors and Tiling Parameters Specification. There is no restriction on the pattern of reads and writes. Overlapping data
(reads and writes) is allowed, but the reads and writes cannot access data outside
the buffer dimensions. These access schemes are specified using
read_access
and write_access
API:adf::write_access(ddrout.in[0]) = adf::tiling(/* Tiling Parameters */);
adf::read_access(ddrin.out[0]) = adf::tiling(/* Tiling Parameters */);
The
dma_channel
constraints can be
optionally used to constrain the location of the external
buffers;location<dma>(ddrin.out[0]) = dma_channel(shim_tile, <column>, <row>, <channel_number>);
location<dma>(ddrout.in[0]) = dma_channel(shim_tile, <column>, <row>, <channel_number>);
Host Code API
The host code contains memory allocation and buffer management
instructions for the external buffers. The API list
follows:
// Set the address of the external_buffer at runtime.
void
external_buffer::setAddress(const void* ptr);
// Initiate external buffer to AIE transfer on external_buffer output port.
// (Non-blocking API)
return_code external_buffer::gm2aie_nb(adf::port<adf::output>& out);
// Initiate AIE to external transfer on external_buffer input port.
// (Non-blocking API)
return_code external_buffer::aie2gm_nb(adf::port<adf::input>& in);
// Wait for the transactions on the external buffer input port
// to complete (Blocking API)
return_code external_buffer::wait(adf::port<adf::input>& in);
// Wait for the transactions on the external buffer output port
// to complete (Blocking API)
return_code external_buffer::wait(adf::port<adf::output>& out);
In addition to the API list above, you should use
Following is an example code using XRT API in the host code for
Linux:GMIO::malloc
and GMIO::free
to allocate and free physical memory in the DDR. Here is an
example
code:using namespace std;
using namespace adf;
FullControl EG;
const int InSize = NITERATIONS*InSizePerIteration;
const int OutSize = NITERATIONS*OutSizePerIteration;
int main(int argc, char ** argv) {
// Prepare External Memory content for EG graph
uint32_t* input_data = (uint32_t*)GMIO::malloc(InSize*sizeof(uint32_t));
uint32_t* output_data = (uint32_t*)GMIO::malloc(OutSize*sizeof(uint32_t));
for(uint32_t i = 0;i<InSize;i++) // memset could be used
input_data[i] = i;
for(uint32_t i = 0;i<OutSize;i++) // memset could be used
output_data[i] = 99999;
EG.init();
for(int kiter=0;kiter<NITERATIONS;kiter++)
{
cout << "Iteration: " << kiter << endl;
EG.ddrin.setAddress(input_data + kiter*InSizePerIteration);
EG.ddrout.setAddress(output_data + kiter*OutSizePerIteration);
cout << "\tAddresses set. " << endl;
EG.ddrin.gm2aie_nb(EG.ddrin.out[0]);
EG.ddrout.aie2gm_nb(EG.ddrout.in[0]);
cout << "\tTransactions sent. " << endl;
EG.run(1);
EG.ddrin.wait(EG.ddrin.out[0]);
EG.ddrout.wait(EG.ddrout.in[0]);
EG.wait();
}
cout << "OFM Interface DMA transfer done, ready for file I/O!!" << endl;
EG.end();
GMIO::free(input_data);
GMIO::free(output_data);
return 0;
}
#include "xrt/xrt_kernel.h"
#include "xrt/xrt_graph.h"
#include "xrt/xrt_aie.h"
const int NITERATIONS=16;
const int BLOCK_SIZE_in_Bytes=NITERATIONS*InSizePerIteration;
const int BLOCK_SIZE_out_Bytes=NITERATIONS*OutSizePerIteration;
int main(int argc, char ** argv) {
// Create XRT device handle for ADF API
char* xclbinFilename = argv[1];
// Open xclbin
auto device = xrt::device(0); //device index=0
auto uuid = device.load_xclbin(xclbinFilename);
auto din_buffer = xrt::aie::bo (device, BLOCK_SIZE_in_Bytes, xrt::bo::flags::normal, /*memory group*/0); //Only non-cacheable buffer is supported
int* dinArray= din_buffer.map<int*>();
for(int i=0;i<BLOCK_SIZE_in_Bytes/4;i++){//Initialize input data
dinArray[i]=i;
}
auto dout_buffer = xrt::aie::bo (device, BLOCK_SIZE_out_Bytes, xrt::bo::flags::normal, /*memory group*/0); //Only non-cacheable buffer is supported
int* doutArray= dout_buffer.map<int*>();
auto ghdl=xrt::graph(device,uuid,"EG"); //Suppose graph instance name is EG
ghdl.run(ITERATION);
auto dout_buffer_run=dout_buffer.async("EG.ddrout",XCL_BO_SYNC_BO_AIE_TO_GMIO,BLOCK_SIZE_out_Bytes,/*offset*/0);
auto din_buffer_run=din_buffer.async("EG.ddrin",XCL_BO_SYNC_BO_GMIO_TO_AIE,BLOCK_SIZE_in_Bytes,/*offset*/0);
std::cout<<"Wait for external buffer"<<std::endl;
dout_buffer_run.wait();//Wait for gmioOut to complete
std::cout<<"Wait for graph"<<std::endl;
ghdl.end();
int error=0;
//post processing ......
if(error==0){
std::cout<<"TEST PASSED!"<<std::endl;
}else{
std::cout<<"ERROR!"<<std::endl;
}
return error;
};