Access to external memory must be declared and managed in the graph. The DMA related to the external memory is located in the interface tile.
External Memory Related API
graph class and parameterized in the constructor of the class.
This type of buffer is called external_buffer. The
following code snippet shows the APIs related to external
buffers:
class ExtBufGraph : public adf::graph {
public:
adf::kernel mk;
adf::external_buffer<uint32> ddrin,ddrout;
ExtBufGraph() {
mk = adf::kernel::create(maker_buf1d);
adf::source(mk) = "maker.cpp";
adf::runtime<ratio>(mk) = 0.9;
// Declare 2 buffers residing in the external memory
// both are 2-dimensional and the first one has an input port
// and the second one has an output port
ddrin = adf::external_buffer<uint32>::create({FRAME_LENGTH, NFRAMES}, 0, 1);
ddrout = adf::external_buffer<uint32>::create({FRAME_LENGTH, NFRAMES}, 1, 0);
// Specify the read and write access scheme for each iteration
adf::read_access(ddrin.out[0]) = adf::tiling(/* Tiling Parameters */);
adf::write_access(ddrout.in[0]) = adf::tiling(/* Tiling Parameters */);
adf::connect(ddrin.out[0],mk.in[0]);
adf::connect(mk.out[0], ddrout.in[0]);
adf::location<dma>(ddrin.out[0]) = adf::dma_channel(adf::shim_tile, <column>, <row>, <channel_number>);
adf::location<dma>(ddrout.in[0]) = adf::dma_channel(adf::shim_tile, <column>, <row>, <channel_number>);
};
};
The line adf::external_buffer<int32>
ddrin,ddrout; declares two buffers located in external memory called ddrin and ddrout that contain
int32 data. The data type can be any type supported by
the AI Engine-ML.
graph class constructor,
the buffers are parameterized
with:ddrin = adf::external_buffer<uint32>::create({FRAME_LENGTH, NFRAMES}, 0, 1);
ddrout = adf::external_buffer<uint32>::create({FRAME_LENGTH, NFRAMES}, 1, 0);
The external buffers are parameterized with three attributes:
- Size
- A vector of 1, 2 or 3 attributes that defines the sizes over the various dimensions of the buffer. These sizes are defined in terms of data samples, whatever its size in bytes.
- Number of input ports
- The number of physical input ports that are used to write to the external buffer.
- Number of output ports
- The number of physical output ports that are used to read from this buffer.
read_access and write_access
API:adf::write_access(ddrout.in[0]) = adf::tiling(/* Tiling Parameters */);
adf::read_access(ddrin.out[0]) = adf::tiling(/* Tiling Parameters */);
dma_channel constraints can be
optionally used to constrain the location of the external
buffers;adf::location<dma>(ddrin.out[0]) = adf::dma_channel(adf::shim_tile, <column>, <row>, <channel_number>);
adf::location<dma>(ddrout.in[0]) = adf::dma_channel(adf::shim_tile, <column>, <row>, <channel_number>);
Host Code API
// Set the address of the external_buffer at runtime.
void
adf::external_buffer::setAddress(const void* ptr);
// Initiate external buffer to AIE transfer on external_buffer output port.
// (Non-blocking API)
return_code adf::external_buffer::gm2aie_nb(adf::port<adf::output>& out);
// Initiate AIE to external transfer on external_buffer input port.
// (Non-blocking API)
return_code adf::external_buffer::aie2gm_nb(adf::port<adf::input>& in);
// Wait for the transactions on the external buffer input port
// to complete (Blocking API)
return_code adf::external_buffer::wait(adf::port<adf::input>& in);
// Wait for the transactions on the external buffer output port
// to complete (Blocking API)
return_code adf::external_buffer::wait(adf::port<adf::output>& out);
GMIO::malloc and GMIO::free to allocate and
free physical memory in the DDR. Here is an example
code:using namespace std;
FullControl EG;
const int InSize = NITERATIONS*InSizePerIteration;
const int OutSize = NITERATIONS*OutSizePerIteration;
int main(int argc, char ** argv) {
// Prepare External Memory content for EG graph
uint32_t* input_data = (uint32_t*)GMIO::malloc(InSize*sizeof(uint32_t));
uint32_t* output_data = (uint32_t*)GMIO::malloc(OutSize*sizeof(uint32_t));
for(uint32_t i = 0;i<InSize;i++) // memset could be used
input_data[i] = i;
for(uint32_t i = 0;i<OutSize;i++) // memset could be used
output_data[i] = 99999;
EG.init();
for(int kiter=0;kiter<NITERATIONS;kiter++)
{
cout << "Iteration: " << kiter << endl;
EG.ddrin.setAddress(input_data + kiter*InSizePerIteration);
EG.ddrout.setAddress(output_data + kiter*OutSizePerIteration);
cout << "\tAddresses set. " << endl;
EG.ddrin.gm2aie_nb(EG.ddrin.out[0]);
EG.ddrout.aie2gm_nb(EG.ddrout.in[0]);
cout << "\tTransactions sent. " << endl;
EG.run(1);
EG.ddrin.wait(EG.ddrin.out[0]);
EG.ddrout.wait(EG.ddrout.in[0]);
EG.wait();
}
cout << "OFM Interface DMA transfer done, ready for file I/O!!" << endl;
EG.end();
GMIO::free(input_data);
GMIO::free(output_data);
return 0;
}
#include "xrt/xrt_kernel.h"
#include "xrt/xrt_graph.h"
#include "xrt/xrt_aie.h"
const int NITERATIONS=16;
const int BLOCK_SIZE_in_Bytes=NITERATIONS*InSizePerIteration;
const int BLOCK_SIZE_out_Bytes=NITERATIONS*OutSizePerIteration;
int main(int argc, char ** argv) {
// Create XRT device handle for ADF API
char* xclbinFilename = argv[1];
// Open xclbin
auto device = xrt::device(0); //device index=0
auto uuid = device.load_xclbin(xclbinFilename);
auto din_buffer = xrt::aie::bo (device, BLOCK_SIZE_in_Bytes, xrt::bo::flags::normal, /*memory group*/0); //Only non-cacheable buffer is supported
int* dinArray= din_buffer.map<int*>();
for(int i=0;i<BLOCK_SIZE_in_Bytes/4;i++){//Initialize input data
dinArray[i]=i;
}
auto dout_buffer = xrt::aie::bo (device, BLOCK_SIZE_out_Bytes, xrt::bo::flags::normal, /*memory group*/0); //Only non-cacheable buffer is supported
int* doutArray= dout_buffer.map<int*>();
auto ghdl=xrt::graph(device,uuid,"EG"); //Suppose graph instance name is EG
ghdl.run(ITERATION);
auto dout_buffer_run=dout_buffer.async("EG.ddrout",XCL_BO_SYNC_BO_AIE_TO_GMIO,BLOCK_SIZE_out_Bytes,/*offset*/0);
auto din_buffer_run=din_buffer.async("EG.ddrin",XCL_BO_SYNC_BO_GMIO_TO_AIE,BLOCK_SIZE_in_Bytes,/*offset*/0);
std::cout<<"Wait for external buffer"<<std::endl;
dout_buffer_run.wait();//Wait for gmioOut to complete
std::cout<<"Wait for graph"<<std::endl;
ghdl.end();
int error=0;
//post processing ......
if(error==0){
std::cout<<"TEST PASSED!"<<std::endl;
}else{
std::cout<<"ERROR!"<<std::endl;
}
return error;
};
External Buffers as Ping Pong Buffers
The buffers declared in external memory can also be exercised as ping pong buffers in DDR/LPDDR memory. Using external buffers as ping pong buffers helps pipeline the data flow.
External Buffers can be used as both Inputs or Outputs to a graph. By default these are configured as linear buffers but can be configured as ping pong buffers.
The following is an example code to configure external buffers as ping pong buffers.
#include "add.h"
#include <adf.h>
using namespace adf;
class mygraph : public graph
{
public:
kernel k1;
kernel k2;
external_buffer<int32> input;
external_buffer<int32> output;
external_buffer<int32> ext;
shared_buffer<int32> ifm;
shared_buffer<int32> ofm;
mygraph()
{
input = external_buffer<int32>::create({4096},0,1);
ifm = shared_buffer<int32>::create({4096},1,1);
ofm = shared_buffer<int32>::create({4096},1,1);
output = external_buffer<int32>::create({4096},1,0);
ext = external_buffer<int32>::create({1024},1,1);
//Configuring the external buffer as a ping pong buffer
num_buffers(ext) = 2;
k1 = kernel::create(add);
k2 = kernel::create(add);
....
}
In the graph above the, buffer ext is
configured to be a ping pong buffer between kernel k1 and k2. setupPingPongBuffers() API is used to set up the ping pong buffers in graph.cpp:
#include <fstream>
#include <iostream>
#include "graph.h"
mygraph g;
#if defined(__AIESIM__) || defined(__ADF_FRONTEND__)
int main()
{
uint32_t* inputArray = (uint32_t*)GMIO::malloc(4096 * sizeof(uint32_t));
uint32_t* outputArray = (uint32_t*)GMIO::malloc(4096 * sizeof(uint32_t));
for (int i = 0; i < 4096; i++) {
inputArray[i] = i+1;
outputArray[i] = 0;
}
g.init();
g.input.setAddress(inputArray);
g.output.setAddress(outputArray);
g.input.gm2aie_nb(g.input.out[0]);
g.output.aie2gm_nb(g.output.in[0]);
//Allocating ping and pong buffers
uint32_t* ping = (uint32_t*)GMIO::malloc(1024 * sizeof(uint32_t));
uint32_t* pong = (uint32_t*)GMIO::malloc(1024 * sizeof(uint32_t));
for (int i = 0; i < 1024; i++) {
ping[i] = 0;
pong[i] = 0;
}
//Connecting the allocated buffers to the external memory
g.ext.setupPingPongBuffers(ping, pong);
g.run(1);
g.output.wait(g.output.in[0]);
g.end();
std::ofstream ofs;
ofs.open("data/output.txt", std::ofstream::out | std::ofstream::trunc);
for (int j=0; j< 4096; j++)
ofs << outputArray[j] << std::endl;
ofs.close();
return 0;
}
#endif