Code Example - 2024.2 English

Vitis Libraries

Release Date
2025-05-14
Version
2024.2 English

The following code example shows how the matrix_multiply_graph class can be used within a user super-graph, including how to set the runtime<ratio> of internal kernels. This example shows the matrix multiplier configured to multiply a 32x16 matrix by a 16x32 matrix giving a 32x32 matrix.

#include <adf.h>
#include "matrix_mult_graph.hpp"

using namespace adf;
namespace mm_example {

#define T_DATA_A cint16
#define T_DATA_B cint16
#define P_DIM_A 32
#define P_DIM_AB 16
#define P_DIM_B 32
#define P_SHIFT 16
#define P_ROUND_MODE 0
#define P_DIM_A_LEADING 0
#define P_DIM_B_LEADING 1
#define P_DIM_OUT_LEADING 0
#define P_ADD_TILING_A 0
#define P_ADD_TILING_B 0
#define P_ADD_DETILING_OUT 0
#define P_INPUT_WINDOW_VSIZE_A 512
#define P_INPUT_WINDOW_VSIZE_B 512
#define P_CASC_LEN 1
#define P_SSR 1

class test_mm : public adf::graph {
   public:
    xf::dsp::aie::port_array<input, P_SSR * P_CASC_LEN> inA;
    xf::dsp::aie::port_array<input, P_SSR * P_CASC_LEN> inB;
    xf::dsp::aie::port_array<output, P_SSR> out;
    xf::dsp::aie::blas::matrix_mult::matrix_mult_graph<T_DATA_A,
                                                       T_DATA_B,
                                                       P_DIM_A,
                                                       P_DIM_AB,
                                                       P_DIM_B,
                                                       P_SHIFT,
                                                       P_ROUND_MODE,
                                                       P_DIM_A_LEADING,
                                                       P_DIM_B_LEADING,
                                                       P_DIM_OUT_LEADING,
                                                       P_ADD_TILING_A,
                                                       P_ADD_TILING_B,
                                                       P_ADD_DETILING_OUT,
                                                       P_INPUT_WINDOW_VSIZE_A,
                                                       P_INPUT_WINDOW_VSIZE_B,
                                                       P_CASC_LEN,
                                                       P_SSR>
        matrixMult;
    test_mm() {
        kernel* kernels = matrixMult.getKernels();
        for (int ssrIdx = 0; ssrIdx < P_SSR; ssrIdx++) {
            for (int cascIdx = 0; cascIdx < P_CASC_LEN; cascIdx++) {
                // Set runtime ratio for each kernel
                runtime<ratio>(kernels[ssrIdx * P_CASC_LEN + cascIdx]) = 0.7;

                // Connect input A and B data
                connect<>(inA[ssrIdx * P_CASC_LEN + cascIdx], matrixMult.inA[ssrIdx * P_CASC_LEN + cascIdx]);
                connect<>(inB[ssrIdx * P_CASC_LEN + cascIdx], matrixMult.inB[ssrIdx * P_CASC_LEN + cascIdx]);
            }
            // Connect output data
            connect<>(matrixMult.out[ssrIdx], out[ssrIdx]);
        }
    };
};
};