Kernel Code - 2023.2 English

Vitis Tutorials: AI Engine (XD100)

Document ID
XD100
Release Date
2024-03-05
Version
2023.2 English

As a first step, we use the kernel code as follows:

template<unsigned id>
void SecondOrderSection(
	adf::input_buffer<float> & __restrict idata,	// 8 input samples per iteration
	adf::output_buffer<float> & __restrict odata,	// 8 output samples per iteration
    const float (&C)[96]	// RTP port for coefficient matrix
) {

	static Vector8f state_reg = aie::zeros<float, 8>();	// clear states

	// input/output iterators
	auto inIter = aie::begin_vector<8>(idata);
	auto outIter = aie::begin_vector<8>(odata);

	Vector8f xreg_hi = *inIter++;		// fetch input samples
	Vector16f xreg = aie::concat(state_reg, xreg_hi);	// xreg[4]: ym2; xreg[5]: ym1; xreg[6]: xm2; xreg[7]: xm1; xreg[8:15]: x0:x7
	Vector8f coeff;
	VAcc8f acc = aie::zeros<accfloat, 8>();

	for (auto i = 0; i < 12; i++) {
		coeff = aie::load_v<8>(&C[8 * i]);
		float xval = xreg[i + 4];
		acc = aie::mac(acc, coeff, xval);
	} // end for (auto i = 0; i < 12; i++)

	Vector8f yout = acc;	// transfer accumulator register to vector register to update states

	// update states
	state_reg = xreg_hi;
	state_reg[4] = yout[6];
	state_reg[5] = yout[7];

	*outIter++ = yout;

} // end SecondOrderSection()

The for loop scales each column of the coefficient matrix with an element in xreg and accumulates the result. This performs the matrix and vector multiplication in eqn. (4).