There is a configuration class derived from the base configuration class xf::solver::choleskyTraits by redefining the appropriate class member.
struct my_cholesky_traits : xf::solver::choleskyTraits<LOWER_TRIANGULAR, DIM, MATRIX_IN_T, MATRIX_OUT_T> {
static const int ARCH = SEL_ARCH;
};
The configuration class is supplied to the xf::solver::cholesky function as a template parameter as follows:
xf::solver::cholesky<LOWER_TRIANGULAR,ROWS_COLS_A,MAT_IN_T,MAT_OUT_T, my_cholesky_traits>(A,L);
The definition of xf::solver::choleskyTraits is in the file L1/include/hw/cholesky.hpp .
The default xf::solver::choleskyTraits struct defining the internal variable types for the cholesky function, showed as follows:
template <bool LowerTriangularL, int RowsColsA, typename InputType, typename OutputType>
struct choleskyTraits {
typedef InputType PROD_T;
typedef InputType ACCUM_T;
typedef InputType ADD_T;
typedef InputType DIAG_T;
typedef InputType RECIP_DIAG_T;
typedef InputType OFF_DIAG_T;
typedef OutputType L_OUTPUT_T;
static const int ARCH = 1;
static const int INNER_II = 1;
static const int UNROLL_FACTOR = 1;
static const int UNROLL_DIM = (LowerTriangularL == true ? 1 : 2);
static const int ARCH2_ZERO_LOOP = true;
};
If the input datatype is hls::x_complex, the choleskyTrais is defined as bellow:
template <bool LowerTriangularL, int RowsColsA, typename InputBaseType, typename OutputBaseType>
struct choleskyTraits<LowerTriangularL, RowsColsA, hls::x_complex<InputBaseType>, hls::x_complex<OutputBaseType> > {
typedef hls::x_complex<InputBaseType> PROD_T;
typedef hls::x_complex<InputBaseType> ACCUM_T;
typedef hls::x_complex<InputBaseType> ADD_T;
typedef hls::x_complex<InputBaseType> DIAG_T;
typedef InputBaseType RECIP_DIAG_T;
typedef hls::x_complex<InputBaseType> OFF_DIAG_T;
typedef hls::x_complex<OutputBaseType> L_OUTPUT_T;
static const int ARCH = 1;
static const int INNER_II = 1;
static const int UNROLL_FACTOR = 1;
static const int UNROLL_DIM = (LowerTriangularL == true ? 1 : 2);
static const int ARCH2_ZERO_LOOP = true;
};
If the input datatype is std::complex, the choleskyTrais is defined as bellow:
template <bool LowerTriangularL, int RowsColsA, typename InputBaseType, typename OutputBaseType>
struct choleskyTraits<LowerTriangularL, RowsColsA, std::complex<InputBaseType>, std::complex<OutputBaseType> > {
typedef std::complex<InputBaseType> PROD_T;
typedef std::complex<InputBaseType> ACCUM_T;
typedef std::complex<InputBaseType> ADD_T;
typedef std::complex<InputBaseType> DIAG_T;
typedef InputBaseType RECIP_DIAG_T;
typedef std::complex<InputBaseType> OFF_DIAG_T;
typedef std::complex<OutputBaseType> L_OUTPUT_T;
static const int ARCH = 1;
static const int INNER_II = 1;
static const int UNROLL_FACTOR = 1;
static const int UNROLL_DIM = (LowerTriangularL == true ? 1 : 2);
static const int ARCH2_ZERO_LOOP = true;
};
If the input datatype is ap_fixed, the choleskyTrais is defined as bellow:
template <bool LowerTriangularL,
int RowsColsA,
int W1,
int I1,
ap_q_mode Q1,
ap_o_mode O1,
int N1,
int W2,
int I2,
ap_q_mode Q2,
ap_o_mode O2,
int N2>
struct choleskyTraits<LowerTriangularL, RowsColsA, ap_fixed<W1, I1, Q1, O1, N1>, ap_fixed<W2, I2, Q2, O2, N2> > {
typedef ap_fixed<W1 + W1, I1 + I1, AP_RND_CONV, AP_SAT, 0> PROD_T;
typedef ap_fixed<(W1 + W1) + BitWidth<RowsColsA>::Value,
(I1 + I1) + BitWidth<RowsColsA>::Value,
AP_RND_CONV,
AP_SAT,
0>
ACCUM_T;
typedef ap_fixed<W1 + 1, I1 + 1, AP_RND_CONV, AP_SAT, 0> ADD_T;
typedef ap_fixed<(W1 + 1) * 2, I1 + 1, AP_RND_CONV, AP_SAT, 0> DIAG_T; // Takes result of sqrt
typedef ap_fixed<(W1 + 1) * 2, I1 + 1, AP_RND_CONV, AP_SAT, 0> OFF_DIAG_T; // Takes result of /
typedef ap_fixed<2 + (W2 - I2) + W2, 2 + (W2 - I2), AP_RND_CONV, AP_SAT, 0> RECIP_DIAG_T;
typedef ap_fixed<W2, I2, AP_RND_CONV, AP_SAT, 0>
L_OUTPUT_T; // Takes new L value. Same as L output but saturation set
static const int ARCH = 1;
static const int INNER_II = 1;
static const int UNROLL_FACTOR = 1;
static const int UNROLL_DIM = (LowerTriangularL == true ? 1 : 2);
static const int ARCH2_ZERO_LOOP = true;
};
If the input datatype is hls::x_complex<ap_fixed>, the choleskyTrais is defined as bellow:
template <bool LowerTriangularL,
int RowsColsA,
int W1,
int I1,
ap_q_mode Q1,
ap_o_mode O1,
int N1,
int W2,
int I2,
ap_q_mode Q2,
ap_o_mode O2,
int N2>
struct choleskyTraits<LowerTriangularL,
RowsColsA,
hls::x_complex<ap_fixed<W1, I1, Q1, O1, N1> >,
hls::x_complex<ap_fixed<W2, I2, Q2, O2, N2> > > {
typedef hls::x_complex<ap_fixed<W1 + W1, I1 + I1, AP_RND_CONV, AP_SAT, 0> > PROD_T;
typedef hls::x_complex<ap_fixed<(W1 + W1) + BitWidth<RowsColsA>::Value,
(I1 + I1) + BitWidth<RowsColsA>::Value,
AP_RND_CONV,
AP_SAT,
0> >
ACCUM_T;
typedef hls::x_complex<ap_fixed<W1 + 1, I1 + 1, AP_RND_CONV, AP_SAT, 0> > ADD_T;
typedef hls::x_complex<ap_fixed<(W1 + 1) * 2, I1 + 1, AP_RND_CONV, AP_SAT, 0> > DIAG_T; // Takes result of sqrt
typedef hls::x_complex<ap_fixed<(W1 + 1) * 2, I1 + 1, AP_RND_CONV, AP_SAT, 0> > OFF_DIAG_T; // Takes result of /
typedef ap_fixed<2 + (W2 - I2) + W2, 2 + (W2 - I2), AP_RND_CONV, AP_SAT, 0> RECIP_DIAG_T;
typedef hls::x_complex<ap_fixed<W2, I2, AP_RND_CONV, AP_SAT, 0> >
L_OUTPUT_T; // Takes new L value. Same as L output but saturation set
static const int ARCH = 1;
static const int INNER_II = 1;
static const int UNROLL_FACTOR = 1;
static const int UNROLL_DIM = (LowerTriangularL == true ? 1 : 2);
static const int ARCH2_ZERO_LOOP = true;
};
If the input datatype is std::complex<ap_fixed>, the choleskyTrais is defined as bellow:
template <bool LowerTriangularL,
int RowsColsA,
int W1,
int I1,
ap_q_mode Q1,
ap_o_mode O1,
int N1,
int W2,
int I2,
ap_q_mode Q2,
ap_o_mode O2,
int N2>
struct choleskyTraits<LowerTriangularL,
RowsColsA,
std::complex<ap_fixed<W1, I1, Q1, O1, N1> >,
std::complex<ap_fixed<W2, I2, Q2, O2, N2> > > {
typedef std::complex<ap_fixed<W1 + W1, I1 + I1, AP_RND_CONV, AP_SAT, 0> > PROD_T;
typedef std::complex<ap_fixed<(W1 + W1) + BitWidth<RowsColsA>::Value,
(I1 + I1) + BitWidth<RowsColsA>::Value,
AP_RND_CONV,
AP_SAT,
0> >
ACCUM_T;
typedef std::complex<ap_fixed<W1 + 1, I1 + 1, AP_RND_CONV, AP_SAT, 0> > ADD_T;
typedef std::complex<ap_fixed<(W1 + 1) * 2, I1 + 1, AP_RND_CONV, AP_SAT, 0> > DIAG_T; // Takes result of sqrt
typedef std::complex<ap_fixed<(W1 + 1) * 2, I1 + 1, AP_RND_CONV, AP_SAT, 0> > OFF_DIAG_T; // Takes result of /
typedef ap_fixed<2 + (W2 - I2) + W2, 2 + (W2 - I2), AP_RND_CONV, AP_SAT, 0> RECIP_DIAG_T;
typedef std::complex<ap_fixed<W2, I2, AP_RND_CONV, AP_SAT, 0> >
L_OUTPUT_T; // Takes new L value. Same as L output but saturation set
static const int ARCH = 1;
static const int INNER_II = 1;
static const int UNROLL_FACTOR = 1;
static const int UNROLL_DIM = (LowerTriangularL == true ? 1 : 2);
static const int ARCH2_ZERO_LOOP = true;
};
Note
- ARCH: Select implementation: 0=Basic, 1=Lower latency architecture, 2=Further improved latency architecture
- INNER_II: Specify the pipelining target for the inner loop
- UNROLL_FACTOR: The inner loop unrolling factor for the choleskyAlt2 architecture(2) to increase throughput
- UNROLL_DIM: Dimension to unroll matrix
- ARCH2_ZERO_LOOP: Additional implementation “switch” for the choleskyAlt2 architecture (2).
Warning
The function assumes that the input matrix is symmetric positive definite (Hermitian positive definite for complex-valued inputs).
If the input matrix data type is ap_fixed or complex<ap_fixed>, please give proper parameters to ensure the input matrix is symmetric positive definite/Hermitian positive definte.
The definition of ap_[u]fixed<W,I,Q,O,N>
- W: the Word length in bits.
- I: the number of bits above the decimal point.
- Q: Quantization mode.
- O: Quantization mode.
- N: This defines the number of saturation bits in overflow wrap modes.