Hardware Emulation and Hardware Flows - 2024.2 English - UG1603

AI Engine-ML Kernel and Graph Programming Guide (UG1603)

Document ID
UG1603
Release Date
2024-11-28
Version
2024.2 English

input_gmio/output_gmio is not only used with the aiesimulator, but can also work in hardware emulation and hardware flows. To allow it to work in hardware emulation and hardware flows, add the following code to graph.cpp.

#if !defined(__AIESIM__) && !defined(__X86SIM__)
    #include "adf/adf_api/XRTConfig.h"
    #include "experimental/xrt_kernel.h"
    // Create XRT device handle for ADF API
    
    char* xclbinFilename = argv[1];
    auto dhdl = xrtDeviceOpen(0);//device index=0
    xrtDeviceLoadXclbinFile(dhdl,xclbinFilename);
    xuid_t uuid;
    xrtDeviceGetXclbinUUID(dhdl, uuid);
       
    adf::registerXRT(dhdl, uuid);
#endif

Using the guard macro __AIESIM__ and __X86SIM__ , the same version of graph.cpp can work for the aiesimulator, x86simulator, hardware emulation, and hardware flows. Note that the preceding code should be placed before calling the graph or the GMIO ADF APIs. At the end of the program, close the device using the xrtDeviceClose() API.

#if !defined(__AIESIM__)
    xrtDeviceClose(dhdl);
#endif

To compile the code for hardware flow, see Programming the PS Host Application in the AI Engine Tools and Flows User Guide (UG1076).

You can use the Runtime library (XRT) API and the ADF API to control the GMIO operations. The XRT API performs GMIO operations found in xrt/xrt_aie.h.

Following is an example host code using C++ XRT APIs on GMIO data transferring:

#include <adf.h>
#include <unistd.h>
#include <fstream>
#include "xrt/xrt_kernel.h"
#include "xrt/xrt_aie.h"
#include <graph.cpp>
void ref_func(int32* din,int32* dout,int size){
  ......
}
const int ITERATION=4;
const int BLOCK_SIZE_in_Bytes=ITERATION*1024*sizeof(int);
int main(int argc, char ** argv) {
  char* xclbinFilename = argv[1];
  // Open xclbin
  auto device = xrt::device(0); //device index=0
  auto uuid = device.load_xclbin(xclbinFilename);

  auto din_buffer = xrt::aie::bo (device, BLOCK_SIZE_in_Bytes, xrt::bo::flags::normal, /*memory group*/0);
  int* dinArray= din_buffer.map<int*>();
  auto dout_buffer = xrt::aie::bo (device, BLOCK_SIZE_in_Bytes, xrt::bo::flags::normal, /*memory group*/0);
  int* doutArray= dout_buffer.map<int*>();
  int* doutRef=new int[BLOCK_SIZE_in_Bytes];

  int ret=0;
  int error=0;
  for(int i=0;i<ITERATION*1024;i++){
    dinArray[i]=i;
  }
  ref_func(dinArray,doutRef,ITERATION*1024);

  auto ghdl=xrt::graph(device,uuid,"gr");
  ghdl.run(ITERATION);

  din_buffer.async("gr.gmioIn",XCL_BO_SYNC_BO_GMIO_TO_AIE,BLOCK_SIZE_in_Bytes,/*offset*/0);
  auto dout_buffer_run=dout_buffer.async("gr.gmioOut",XCL_BO_SYNC_BO_AIE_TO_GMIO,BLOCK_SIZE_in_Bytes,/*offset*/0);

  ghdl.wait();
  dout_buffer_run.wait();

  for(int i=0;i<ITERATION*1024;i++){
    if(doutArray[i]!=doutRef[i]){
      std::cout<<"ERROR:dout["<<i<<"]="<<doutArray[i]<<",gold="<<doutRef[i]<<std::endl;
      error++;
      break;
    } 
  }

  if(error==0){
    std::cout<<"PASS!"<<std::endl;
  }else{
    std::cout<<"ERROR!"<<std::endl;
  }

  delete doutRef;
  return error;
};