Post-operations allow you to fuse additional computations with GEMM operations, reducing memory bandwidth and improving performance. Operations are configured using the dlp_metadata_t structure and applied sequentially after the GEMM computation.
Metadata Structure:
The dlp_metadata_t structure is the core mechanism for configuring post-operations. For complete API reference, see dlp_metadata_t and the Post-Operations Guide Wiki.
Common Post-Operation Patterns:
Bias + ReLU Activation:
// Allocate and initialize metadata structure
dlp_metadata_t* metadata = (dlp_metadata_t*)malloc(sizeof(dlp_metadata_t));
memset(metadata, 0, sizeof(dlp_metadata_t));
// Set up sequence of operations
metadata->seq_length = 2; // Two operations: bias + ReLU
metadata->seq_vector = (DLP_POST_OP_TYPE*)malloc(2 * sizeof(DLP_POST_OP_TYPE));
metadata->seq_vector[0] = BIAS; // First operation
metadata->seq_vector[1] = ELTWISE; // Second operation
// Configure bias addition
metadata->bias = (dlp_post_op_bias*)malloc(sizeof(dlp_post_op_bias));
metadata->bias->bias = bias; // Pointer to bias vector
metadata->bias->stor_type = DLP_F32; // Bias data type
// Configure ReLU activation
metadata->eltwise = (dlp_post_op_eltwise*)malloc(sizeof(dlp_post_op_eltwise));
metadata->eltwise->sf = NULL;
metadata->eltwise->algo.alpha = NULL;
metadata->eltwise->algo.beta = NULL;
metadata->eltwise->algo.algo_type = RELU;
// Use in GEMM call
aocl_gemm_f32f32f32of32(
'R', 'N', 'N', m, n, k,
1.0f, a, lda, 'N',
b, ldb, 'N',
0.0f, c, ldc,
metadata // Pass metadata for post-operations
);
// Clean up
free(metadata->seq_vector);
free(metadata->bias);
free(metadata->eltwise);
free(metadata);
Scaling + GELU Activation:
// Allocate and initialize metadata structure
dlp_metadata_t* metadata = (dlp_metadata_t*)malloc(sizeof(dlp_metadata_t));
memset(metadata, 0, sizeof(dlp_metadata_t));
// Set up sequence of operations
metadata->seq_length = 2; // Two operations: GELU + scale
metadata->seq_vector = (DLP_POST_OP_TYPE*)malloc(2 * sizeof(DLP_POST_OP_TYPE));
metadata->seq_vector[0] = ELTWISE; // First operation
metadata->seq_vector[1] = SCALE; // Second operation
// Configure GELU activation (tanh approximation)
metadata->eltwise = (dlp_post_op_eltwise*)malloc(sizeof(dlp_post_op_eltwise));
metadata->eltwise->sf = NULL;
metadata->eltwise->algo.alpha = NULL;
metadata->eltwise->algo.beta = NULL;
metadata->eltwise->algo.algo_type = GELU_TANH;
// Configure scaling
metadata->scale = (dlp_scale_t*)malloc(sizeof(dlp_scale_t));
metadata->scale->sf = malloc(sizeof(dlp_sf_t));
metadata->scale->sf->scale_factor = malloc(sizeof(float));
*((float*)metadata->scale->sf->scale_factor) = 2.0f; // Scale factor
metadata->scale->sf->scale_factor_len = 1;
metadata->scale->sf->scale_factor_type = DLP_F32;
// Set up zero point (required for scale operation)
metadata->scale->zp = malloc(sizeof(dlp_zp_t));
metadata->scale->zp->zero_point = malloc(sizeof(int8_t));
*((int8_t*)metadata->scale->zp->zero_point) = 0;
metadata->scale->zp->zero_point_len = 1;
metadata->scale->zp->zero_point_type = DLP_S8;
// Use in GEMM call
aocl_gemm_f32f32f32of32(
'R', 'N', 'N', m, n, k,
1.0f, a, lda, 'N',
b, ldb, 'N',
0.0f, c, ldc,
metadata // Pass metadata for post-operations
);
// Clean up
free(metadata->seq_vector);
free(metadata->eltwise);
free(metadata->scale->sf->scale_factor);
free(metadata->scale->sf);
free(metadata->scale->zp->zero_point);
free(metadata->scale->zp);
free(metadata->scale);
free(metadata);