Create and Launch Kernel in the Testbench: - 2024.1 English

Vitis Libraries

Release Date
2024-08-06
Version
2024.1 English

Histogram needs two frames to populate the histogram and to get correct auto white balance results. GTM and other tone-mapping functions need three frames to populate its parameters and apply those parameters to get a corrected image. For the specific example below, three iterations are needed because the GTM function is selected.

    // Create a kernel:
    OCL_CHECK(err, cl::Kernel kernel(program, "ISPPipeline_accel", &err));

    for (int i = 0; i < 3; i++) {
    OCL_CHECK(err, q.enqueueWriteBuffer(buffer_inVec,                 // buffer on the FPGA
                                        CL_TRUE,                      // blocking call
                                        0,                            // buffer offset in bytes
                                        vec_in_size_bytes,            // Size in bytes
                                        gamma_lut));

    OCL_CHECK(err, q.enqueueWriteBuffer(buffer_R_IR_C1,               // buffer on the FPGA
                                        CL_TRUE,                      // blocking call
                                        0,                            // buffer offset in bytes
                                        filter1_in_size_bytes,        // Size in bytes
                                        R_IR_C1_wgts));

    OCL_CHECK(err, q.enqueueWriteBuffer(buffer_R_IR_C2,               // buffer on the FPGA
                                        CL_TRUE,                      // blocking call
                                        0,                            // buffer offset in bytes
                                        filter1_in_size_bytes,        // Size in bytes
                                        R_IR_C2_wgts));

    OCL_CHECK(err, q.enqueueWriteBuffer(buffer_B_at_R,                // buffer on the FPGA
                                        CL_TRUE,                      // blocking call
                                        0,                            // buffer offset in bytes
                                        filter1_in_size_bytes,        // Size in bytes
                                        B_at_R_wgts));

    OCL_CHECK(err, q.enqueueWriteBuffer(buffer_IR_at_R,               // buffer on the FPGA
                                        CL_TRUE,                      // blocking call
                                        0,                            // buffer offset in bytes
                                        filter2_in_size_bytes,        // Size in bytes
                                        IR_at_R_wgts));

    OCL_CHECK(err, q.enqueueWriteBuffer(buffer_IR_at_B,               // buffer on the FPGA
                                        CL_TRUE,                      // blocking call
                                        0,                            // buffer offset in bytes
                                        filter2_in_size_bytes,        // Size in bytes
                                        IR_at_B_wgts));

    OCL_CHECK(err, q.enqueueWriteBuffer(buffer_sub_wgts,              // buffer on the FPGA
                                        CL_TRUE,                      // blocking call
                                        0,                            // buffer offset in bytes
                                        sub_wgts_in_size_bytes,       // Size in bytes
                                        sub_wgts));

    if (hdr_en) {
        OCL_CHECK(err, q.enqueueWriteBuffer(buffer_inVec_Weights,     // buffer on the FPGA
                                            CL_TRUE,                  // blocking call
                                            0,                        // buffer offset in bytes
                                            vec_weight_size_bytes,    // Size in bytes
                                            wr_hls));

        OCL_CHECK(err, q.enqueueWriteBuffer(imageToDevice,
                                            CL_TRUE, 0,
                                            image_in_size_bytes,
                                            interleaved_img.data));

    } else {
        OCL_CHECK(err, q.enqueueWriteBuffer(imageToDevice,
                                            CL_TRUE, 0,
                                            image_in_size_bytes,
                                            in_img1.data));
    }
    OCL_CHECK(err, q.enqueueWriteBuffer(buffer_inLut,                 // buffer on the FPGA
                                        CL_TRUE,                      // blocking call
                                        0,                            // buffer offset in bytes
                                        lut_in_size_bytes,            // Size in bytes
                                        casted_lut,                   // Pointer to the data to copy
                                        nullptr));
    // Profiling Objects
    cl_ulong start = 0;
    cl_ulong end = 0;
    double diff_prof = 0.0f;
    cl::Event event_sp;

    // Launch the kernel

    OCL_CHECK(err, err = q.enqueueTask(kernel, NULL, &event_sp));

    clWaitForEvents(1, (const cl_event*)&event_sp);

    event_sp.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);

    event_sp.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);

    diff_prof = end - start;
    std::cout << (diff_prof / 1000000) << "ms" << std::endl;
    // Copying Device result data to Host memory
    q.enqueueReadBuffer(imageFromDevice, CL_TRUE, 0, image_out_size_bytes, out_img.data);
    if (rgbir_en) {
        q.enqueueReadBuffer(imageFromDevice_ir, CL_TRUE, 0, image_out_ir_size_bytes, out_img_ir.data);
    }
}

Resource Utilization

The following table summarizes the resource utilization of ISP all_in_one_adas generated using Vitis HLS 2022.2 tool on ZCU102 board.

Table 256 Table: ISP all_in_one_adas Resource Utilization Summary
Operating Mode Operating Frequency (MHz) Utilization Estimate
BRAM DSP CLB Registers CLB LUT
1 Pixel 150 178 305 61210 63566

Performance Estimate

The following table summarizes the performance of the ISP all_in_one_adas in 1-pixel mode as generated using Vitis HLS 2022.2 tool on ZCU102 board.

Estimated average latency is obtained by running the accel with three iterations. The input to the accel is an interleaved image containing one long-exposure frame and one short-exposure frame which are both full-HD (1920x1080) images.

Table 257 Table: ISP all_in_one_adas Performance Estimate Summary
Operating Mode Latency Estimate
Average latency (ms)
1 pixel operation (150 MHz) 29.509