begin 迭代器示例
#include "aie_api/aie.hpp"
void simple(input_buffer<cint16, adf::extents<BUFFER_SIZE>> & in, output_buffer<cint16, adf::extents<BUFFER_SIZE>> & out)
{
// Use scalar iterator to traverse data
auto pIn = aie::begin(in);
auto pOut = aie::begin(out);
// For loop to go through all data from input_buffer via iterator
for (unsigned i=0; i<(BUFFER_SIZE); i++)
{
*pOut++ = *pIn++;
}
}
begin_vector 迭代器示例
#define VECTOR_SIZE 8
void simple(input_buffer<cint16, adf::extents<BUFFER_SIZE>> & in, output_buffer<cint16, adf::extents<BUFFER_SIZE>> & out)
{
// Use vectoriterator to traverse data
auto pIn = aie::begin_vector<VECTOR_SIZE>(in);
auto pOut = aie::begin_vector<VECTOR_SIZE>(out);
// For loop to go through all data from input_buffer via iterator
for (unsigned i=0; i<(BUFFER_SIZE/VECTOR_SIZE); i++)
{
*pOut++ = *pIn++;
}
}
cbegin 迭代器示例
void simple(input_buffer<cint16, adf::extents<BUFFER_SIZE>> & in, output_buffer<cint16, adf::extents<BUFFER_SIZE>> & out)
{
// Use scalar iterator to traverse data
auto pIn = aie::cbegin(in);
auto pOut = aie::begin(out);
// For loop to go through all data from input_buffer via iterator
for (unsigned i=0; i<(BUFFER_SIZE); i++)
{
*pOut++ = *pIn++;
}
}
注释:
cbegin
迭代器是只读迭代器。如果 pOut
迭代器声明为 cbegin
迭代器,aiecompiler
将出错退出。cbegin_vector 迭代器示例
#define VECTOR_SIZE 8
void simple(input_buffer<cint16, adf::extents<BUFFER_SIZE>> & in, output_buffer<cint16, adf::extents<BUFFER_SIZE>> & out)
{
// Use vector iterator to traverse data
auto pIn = aie::cbegin_vector<VECTOR_SIZE>(in);
auto pOut = aie::begin_vector<VECTOR_SIZE>(out);
// For loop to go through all data from input_buffer via vector iterator
// The buffer contains (BUFFER_SIZE/VECTOR_SIZE) vectors
for (unsigned i=0; i<(BUFFER_SIZE/VECTOR_SIZE); i++)
{
*pOut++ = *pIn++;
}
}
begin_random_circular 迭代器示例
void simple(input_circular_buffer<cint16, adf::extents<BUFFER_SIZE>> & in, output_circular_buffer<cint16, adf::extents<BUFFER_SIZE>> & out)
{
// Use scalar iterator to traverse data
auto pIn = aie::begin_random_circular(in);
auto pOut = aie::begin_random_circular(out);
// Position the pointer at the middle of the buffer
pIn += BUFFER_SIZE/2;
// Copies the second half, then the first half of the buffer onto the output
for (unsigned i=0; i<(BUFFER_SIZE); i++)
{
*pOut++ = *pIn++;
}
}
begin_vector_random_circular 迭代器示例
#define VECTOR_SIZE 8
void simple(input_circular_buffer<cint16, adf::extents<BUFFER_SIZE>> & in, output_circular_buffer<cint16, adf::extents<BUFFER_SIZE>> & out)
{
// Use vector iterator to traverse data
auto pIn = aie::begin_vector_random_circular<VECTOR_SIZE>(in);
auto pOut = aie::begin_vector_random_circular<VECTOR_SIZE>(out);
// Position the pointer at the end of the buffer
pIn += BUFFER_SIZE/VECTOR_SIZE;
// Copies the input buffer onto the output buffer
for (unsigned i=0; i<(BUFFER_SIZE/VECTOR_SIZE); i++)
{
*pOut++ = *pIn++;
}
}
注释:
- 循环缓冲器端口必须使用循环迭代器。线性缓冲器端口可使用线性迭代器或循环迭代器。
- 每次迭代都使用矢量迭代器访问
VECTOR_SIZE
个样本。其中,对于这个 data-type 为 cint16(32 位)的特定示例,VECTOR_SIZE
为 4(128 位)、8(256 位)、16(512 位)和 32(1024 位)。 - 如果迭代器每次在任一方向都需移动多步,请使用随机迭代器。
读取和写入数据
有多种方法可用于在一维缓冲器端口上读取和写入数据。
- 使用原始指针。请注意,此机制不得用于循环缓冲器端口。
void simple(input_buffer<int32> & in, output_buffer<int32> & out) { int32 * pin = in.data(); int32 * pout = out.data(); for (int i = 0; i < BUFFER_SIZE; i++) { *pout++ = *pin++; } ... }
- 使用标量迭代器。
void simple(input_buffer<int32> & in, output_buffer<int32> & out) { auto pin = aie:begin(in); auto pout = aie:begin(out); for (int i = 0; i < BUFFER_SIZE; i++) { *pout++ = *pin++; } ... }
- 使用矢量迭代器。
void simple(input_buffer<int32> & in, output_buffer<int32> & out) { auto pin = aie::begin_vector<VECTOR_SIZE>(in); auto pout = aie::begin_vector<VECTOR_SIZE>(out); for (int i = 0; i < BUFFER_SIZE/VECTOR_SIZE; i++) { *pout++ = *pin++; } ... }
使用输入和输出缓冲器作为中间存储器
获取输入或输出缓冲器后,在将其释放前,该缓冲器属于内核所有。该内核可以负责通过指针或迭代器来读取或写入缓冲器,而不会造成数据冲突。以下代码展示了使用一个异步输出缓冲器作为迭代之间的临时存储的示例:
#include <aie_api/aie.hpp>
#include <aie_api/aie_adf.hpp>
#include <aie_api/utils.hpp>
using namespace adf;
const int BUFFER_SIZE=1024;
const int VECTOR_SIZE=16;
const int TOTAL_N=2;
static int iteration=0;
__attribute__ ((noinline)) void accumulation(input_buffer<int32,extents<BUFFER_SIZE>> & __restrict in1,
input_buffer<int32,extents<BUFFER_SIZE>> & __restrict in2,
output_async_buffer<int32,extents<BUFFER_SIZE>> & __restrict out
){
auto pin1 = aie::begin_vector<VECTOR_SIZE>(in1);
auto pin2 = aie::begin_vector<VECTOR_SIZE>(in2);
if(iteration==0){
out.acquire();
//must be done after lock acquisition
auto pout = aie::begin_vector<VECTOR_SIZE>(out);
for (int i = 0; i < BUFFER_SIZE/VECTOR_SIZE; i++) {
*pout++ = aie::add(*pin1++, *pin2++);
}
iteration++;
}else{
auto pout=aie::begin_vector<VECTOR_SIZE>(out);//lock acquired
for (int i = 0; i < BUFFER_SIZE/VECTOR_SIZE; i++) {
auto tmp = aie::add(*pin1++, *pin2++);
auto tmp2=*pout;
*pout++ = aie::add(tmp2, tmp);
}
iteration++;
}
if(iteration==TOTAL_N){
iteration=0;
out.release();
}
}