On data center platforms, it is more efficient to allocate memory aligned
on 4k page boundaries. On embedded platforms it is more efficient to perform contiguous
memory allocation. In either case, you can let the XRT allocate host memory when
creating the buffers. This is done by using the CL_MEM_ALLOC_HOST_PTR
flag when creating the buffers, and then mapping the
allocated memory to user-space pointers using
clEnqueueMapBuffer
. With this approach, it is not necessary to
create a host space pointer aligned to the 4K boundary.
The clEnqueueMapBuffer
API maps the
specified buffer and returns a pointer created by XRT to this mapped region. Then, fill
the host side pointer with your data, followed by clEnqueueMigrateMemObject
to transfer the data to and from the device. The
following code example uses this style:
// Two cl_mem buffer, for read and write by kernel
cl_mem dev_mem_read_ptr = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(int) * number_of_words, NULL, NULL);
cl_mem dev_mem_write_ptr = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(int) * number_of_words, NULL, NULL);
cl::Buffer in1_buf(context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, sizeof(int) * DATA_SIZE, NULL, &err);
// Setting arguments
clSetKernelArg(kernel, 0, sizeof(cl_mem), &dev_mem_read_ptr);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_mem_write_ptr);
// Get Host side pointer of the cl_mem buffer object
auto host_write_ptr = clEnqueueMapBuffer(queue,dev_mem_read_ptr,true,CL_MAP_WRITE,0,bytes,0,nullptr,nullptr,&err);
auto host_read_ptr = clEnqueueMapBuffer(queue,dev_mem_write_ptr,true,CL_MAP_READ,0,bytes,0,nullptr,nullptr,&err);
// Fill up the host_write_ptr to send the data to the FPGA
for(int i=0; i< MAX; i++) {
host_write_ptr[i] = <.... >
}
// Migrate
cl_mem mems[2] = {host_write_ptr,host_read_ptr};
clEnqueueMigrateMemObjects(queue,2,mems,0,0,nullptr,&migrate_event));
// Schedule the kernel
clEnqueueTask(queue,kernel,1,&migrate_event,&enqueue_event);
// Migrate data back to host
clEnqueueMigrateMemObjects(queue, 1, &dev_mem_write_ptr,
CL_MIGRATE_MEM_OBJECT_HOST,1,&enqueue_event, &data_read_event);
clWaitForEvents(1,&data_read_event);
// Now use the data from the host_read_ptr
To work with an example using clEnqueueMapBuffer
, refer to Data Transfer (C) in the Vitis Examples GitHub repository.