Histogram needs two frames to populate the histogram and to get correct auto white balance results. GTM and other tone-mapping functions need three frames to populate its parameters and apply those parameters to get a corrected image. For the specific example below, three iterations are needed because the GTM function is selected.
// Create a kernel:
OCL_CHECK(err, cl::Kernel kernel(program, "ISPPipeline_accel", &err));
for (int i = 0; i < 3; i++) {
OCL_CHECK(err, q.enqueueWriteBuffer(buffer_inVec, // buffer on the FPGA
CL_TRUE, // blocking call
0, // buffer offset in bytes
vec_in_size_bytes, // Size in bytes
gamma_lut));
OCL_CHECK(err, q.enqueueWriteBuffer(buffer_R_IR_C1, // buffer on the FPGA
CL_TRUE, // blocking call
0, // buffer offset in bytes
filter1_in_size_bytes, // Size in bytes
R_IR_C1_wgts));
OCL_CHECK(err, q.enqueueWriteBuffer(buffer_R_IR_C2, // buffer on the FPGA
CL_TRUE, // blocking call
0, // buffer offset in bytes
filter1_in_size_bytes, // Size in bytes
R_IR_C2_wgts));
OCL_CHECK(err, q.enqueueWriteBuffer(buffer_B_at_R, // buffer on the FPGA
CL_TRUE, // blocking call
0, // buffer offset in bytes
filter1_in_size_bytes, // Size in bytes
B_at_R_wgts));
OCL_CHECK(err, q.enqueueWriteBuffer(buffer_IR_at_R, // buffer on the FPGA
CL_TRUE, // blocking call
0, // buffer offset in bytes
filter2_in_size_bytes, // Size in bytes
IR_at_R_wgts));
OCL_CHECK(err, q.enqueueWriteBuffer(buffer_IR_at_B, // buffer on the FPGA
CL_TRUE, // blocking call
0, // buffer offset in bytes
filter2_in_size_bytes, // Size in bytes
IR_at_B_wgts));
OCL_CHECK(err, q.enqueueWriteBuffer(buffer_sub_wgts, // buffer on the FPGA
CL_TRUE, // blocking call
0, // buffer offset in bytes
sub_wgts_in_size_bytes, // Size in bytes
sub_wgts));
if (hdr_en) {
OCL_CHECK(err, q.enqueueWriteBuffer(buffer_inVec_Weights, // buffer on the FPGA
CL_TRUE, // blocking call
0, // buffer offset in bytes
vec_weight_size_bytes, // Size in bytes
wr_hls));
OCL_CHECK(err, q.enqueueWriteBuffer(imageToDevice,
CL_TRUE, 0,
image_in_size_bytes,
interleaved_img.data));
} else {
OCL_CHECK(err, q.enqueueWriteBuffer(imageToDevice,
CL_TRUE, 0,
image_in_size_bytes,
in_img1.data));
}
OCL_CHECK(err, q.enqueueWriteBuffer(buffer_inLut, // buffer on the FPGA
CL_TRUE, // blocking call
0, // buffer offset in bytes
lut_in_size_bytes, // Size in bytes
casted_lut, // Pointer to the data to copy
nullptr));
// Profiling Objects
cl_ulong start = 0;
cl_ulong end = 0;
double diff_prof = 0.0f;
cl::Event event_sp;
// Launch the kernel
OCL_CHECK(err, err = q.enqueueTask(kernel, NULL, &event_sp));
clWaitForEvents(1, (const cl_event*)&event_sp);
event_sp.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
event_sp.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
diff_prof = end - start;
std::cout << (diff_prof / 1000000) << "ms" << std::endl;
// Copying Device result data to Host memory
q.enqueueReadBuffer(imageFromDevice, CL_TRUE, 0, image_out_size_bytes, out_img.data);
if (rgbir_en) {
q.enqueueReadBuffer(imageFromDevice_ir, CL_TRUE, 0, image_out_ir_size_bytes, out_img_ir.data);
}
}
Resource Utilization
The following table summarizes the resource utilization of ISP all_in_one_adas generated using Vitis HLS 2022.2 tool on ZCU102 board.
Operating Mode | Operating Frequency (MHz) | Utilization Estimate | |||
---|---|---|---|---|---|
BRAM | DSP | CLB Registers | CLB LUT | ||
1 Pixel | 150 | 178 | 305 | 61210 | 63566 |
Performance Estimate
The following table summarizes the performance of the ISP all_in_one_adas in 1-pixel mode as generated using Vitis HLS 2022.2 tool on ZCU102 board.
Estimated average latency is obtained by running the accel with three iterations. The input to the accel is an interleaved image containing one long-exposure frame and one short-exposure frame which are both full-HD (1920x1080) images.
Operating Mode | Latency Estimate |
---|---|
Average latency (ms) | |
1 pixel operation (150 MHz) | 29.509 |