Kernel Code (AI Engine API) - 2022.2 English

Vitis Tutorials: AI Engine (XD100)

Document ID
XD100
Release Date
2022-12-01
Version
2022.2 English
#include <aie_api/aie_adf.hpp>

#include "kernel.hpp"

template<unsigned id>
void SecondOrderSection(
	input_window<float> *idata,
	output_window<float> *odata,
	const float (&C_e)[48],			// run-time parameter: SIMD matrix of coefficients (even columns)
	const float (&C_o)[48]			// run-time parameter: SIMD matrix of coefficients (odd columns)
	) {
	static Vector8f state_reg = aie::zeros<float, 8>();	// clear states

	for (auto i = 0; i < burst_cnt; i++)
	//chess_prepare_for_pipelining
	{
		Vector8f xreg_hi = window_readincr_v<8>(idata);		// fetch input samples
		Vector16f xreg = aie::concat(state_reg, xreg_hi);

		VAcc8f acc_e = aie::zeros<accfloat, 8>();	// even accumulator
		VAcc8f acc_o = aie::zeros<accfloat, 8>();	// odd accumulator

		for (auto j = 0; j < 6; j++) {

			Vector8f coeff_e = aie::load_v<8>(&C_e[8 * j]);		// even columns
			float xreg_e = xreg[2 * j + 4];
			acc_e = aie::mac(acc_e, xreg_e, coeff_e);

			Vector8f coeff_o = aie::load_v<8>(&C_o[8 * j]);		// odd columns
			float xreg_o = xreg[2 * j + 5];
			acc_o = aie::mac(acc_o, xreg_o, coeff_o);

		} // end for (auto j = 0; j < 6; j ++)

		acc_o = aie::add(acc_o, acc_e.to_vector());	// acc_o += acc_e
		Vector8f yout = acc_o.to_vector();

		// update states
		state_reg = xreg_hi;
		state_reg[4] = yout[6];
		state_reg[5] = yout[7];
		window_writeincr(odata, yout);

	} // end for (auto i = 0; i < burst_cnt; i++)

} // end SecondOrderSection()

Note the 2 loops in the function:

for (auto i = 0; i < burst_cnt; i++) {	// process more samples to reduce overhead
	...
	for (auto j = 0; j < 6; j++) {	// matrix-vector multiplication
		...
	}
}

The outer for loop is added such that more samples can be processed during each function call, thereby reducing overhead and improving throughput.