// typedefs typedef int idct25_t typedef ap_int <24> idctm_t // enums enum { w1 = 2841 w2 = 2676 w3 = 2408 w5 = 1609 w6 = 1108 w7 = 565 w1pw7 = w1 + w7 w1mw7 = w1 - w7 w2pw6 = w2 + w6 w2mw6 = w2 - w6 w3pw5 = w3 + w5 w3mw5 = w3 - w5 r2 = 181 } // structs struct hls_huff_DHT struct hls_huff_segment struct sos_data // classes template < int _W, int _I, int _WBIT, int _NPPC > class BicubicInterpolator // global variables static const uint8_t hls_jpeg_zigzag_to_raster[64] const short hls_icos_base_8192_scaled[64]
decoder_jpg_full_top
#include "jpegDec/XAcc_idct.hpp"
template <int _WAxi> void decoder_jpg_full_top ( ap_uint <_WAxi>* ptr, const int sz, const int c, const uint16_t dht_tbl1 [2][2][1<< (9)], uint8_t ac_value_buckets [2][165], HCODE_T ac_huff_start_code [2][6], int16_t ac_huff_start_addr [2][16], uint8_t dc_value_buckets [2][12], HCODE_T dc_huff_start_code [2][3], int16_t dc_huff_start_addr [2][16], ap_uint <12> hls_cmp, const uint8_t hls_mbs [(3)], const uint8_t q_tables [2][8][8], const img_info img_info, const bas_info bas_info, int& rtn2, uint32_t& rst_cnt, ap_uint <64>* yuv_mcu_pointer )
Level 1 : decode all mcu with burst read data from DDR.
Parameters:
_WAxi | size of data path in dataflow region, in bit. when _WAxi is 16, the decoder could decode one symbol per cycle in about 99% cases. when _WAxi is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource. |
ptr | the pointer to DDR. |
sz | the total bytes to be read from DDR. |
c | the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char) |
dht_tbl1/dht_tbl2 | the segment data of Define huffman table marker. |
hls_cmp | the shift register organized by the index of each color component. |
hls_mbs | the number of blocks in mcu for each component. |
q_tables | the quent table of huffman. |
img_info | include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests. |
bas_info | the basic infomation for the image. |
yuv_mcu_pointer | pointer to the hls_mcuc*{hls_mbs[0~2]*{Y/U/V}} |
burstWrite
#include "jpegDec/XAcc_idct.hpp"
template <int _WAxi> void burstWrite ( ap_uint <_WAxi>* yuv_mcu_pointer, hls::stream <idct_out_t> strm_iDCT_x8 [8], const uint32_t all_blocks )
the template of stream width of _WAxi burst out.
Parameters:
_WAxi | width of axi port. |
wbuf | AXI master port to write to, ex. 64 bits. |
strm_iDCT_x8 | stream width is 8 bits |
parser_jpg_top
#include "jpegDec/XAcc_jfifparser.hpp"
void parser_jpg_top ( ap_uint < (16)>* datatoDDR, const int size, int& r, int& c, uint16_t dht_tbl1 [2][2][1<< (9)], uint8_t ac_value_buckets [2][165], HCODE_T ac_huff_start_code [2][6], int16_t ac_huff_start_addr [2][16], uint8_t dc_value_buckets [2][12], HCODE_T dc_huff_start_code [2][3], int16_t dc_huff_start_addr [2][16], ap_uint <12>& hls_cmp, int& left, uint8_t hls_mbs [(3)], uint8_t q_tables [2][8][8], int& rtn, img_info& img_info, cmp_info cmp_info [(3)], bas_info& bas_info )
Level 1 : parser the jfif register for the jepg decoder.
Parameters:
CH_W | size of data path in dataflow region, in bit. when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases. when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource. |
datatoDDR | the pointer to DDR. |
size | the total bytes to be read from DDR. |
r | the index of vector to be read from AXI in all cases |
c | the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char) |
dht_tbl1/dht_tbl2 | the segment data of Define huffman table marker. |
hls_cmp | the shift register organized by the index of each color component. |
left | the number of bytes to be read from DDR after parser. |
hls_mbs | the number of blocks in mcu for each component. |
q_tables | is quantization tables. |
rtn | return flag. |
image | info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests. |
cmp_info | image information may be used to generate the bas_info . |
bas_info | information used by next module. |
decoder_jpg_top
#include "jpegDec/XAcc_jfifparser.hpp"
void decoder_jpg_top ( ap_uint < (16)>* ptr, const int sz, const int c, const uint16_t dht_tbl1 [2][2][1<< (9)], uint8_t ac_value_buckets [2][165], HCODE_T ac_huff_start_code [2][6], int16_t ac_huff_start_addr [2][16], uint8_t dc_value_buckets [2][12], HCODE_T dc_huff_start_code [2][3], int16_t dc_huff_start_addr [2][16], ap_uint <12> hls_cmp, const uint8_t hls_mbs [(3)], const img_info img_info, int& rtn2, uint32_t& rst_cnt, hls::stream <ap_uint <24>>& block_strm )
Level 1 : decode all mcu with burst read data from DDR.
Parameters:
CH_W | size of data path in dataflow region, in bit. when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases. when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource. |
ptr | the pointer to DDR. |
sz | the total bytes to be read from DDR. |
c | the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char) |
dht_tbl1/dht_tbl2 | the segment data of Define huffman table marker. |
hls_cmp | the shift register organized by the index of each color component. |
hls_mbs | the number of blocks in mcu for each component. |
image | info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests. |
rtn | return flag. |
block_strm | the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val |
mcu_decoder
#include "jpegDec/XAcc_jpegdecoder.hpp"
template <typename CHTYPE> void mcu_decoder ( hls::stream <CHTYPE>& image_strm, hls::stream <bool>& eof_strm, const uint16_t dht_tbl1 [2][2][1<< (9)], const uint8_t ac_value_buckets [2][165], const HCODE_T ac_huff_start_code [2][6], const int16_t ac_huff_start_addr [2][16], const uint8_t dc_value_buckets [2][12], const HCODE_T dc_huff_start_code [2][3], const int16_t dc_huff_start_addr [2][16], ap_uint <12> hls_cmp, const uint8_t hls_cs_cmpc, const uint8_t hls_mbs [(3)], const uint16_t hls_mcuh, const uint32_t hls_mcuc, int& rtn2, uint32_t& rst_cnt, hls::stream <ap_uint <24>>& block_strm )
Level 1 : decode all mcu.
Parameters:
CH_W | size of data path in dataflow region, in bit. when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases. when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource. |
image_strm | the stream of compressed data after SOS marker. |
eof_strm | the stream of end flag for image_strm, synchronous signal using false and an addtional true in the end. |
dht_tbl1/dht_tbl2 | the segment data of Define huffman table marker. |
hls_cmp | the shift register organized by the index of each color component. |
image | info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests. |
block_strm | the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val |
hls_next_mcupos2
#include "jpegDec/XAcc_jpegdecoder.hpp"
void hls_next_mcupos2 ( hls::stream <ap_uint <24>>& block_strm, int16_t hls_block [(3)*(1036800)*64], int hls_sfv [4], int hls_sfh [4], const uint8_t hls_mbs [4], int hls_bch, int hls_bc, int32_t hls_mcuc, uint8_t hls_cs_cmpc, bool rtn2, int& sta )
convert strm to Aligned_block, for other integration ,not just JPEG
Parameters:
block_strm | the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val |
hls_block | the maxsize block, will be copy to the aligned_block |
hls_sfv | the sample factor vertical for each component |
hls_sfh | the sample factor horizontal for each component |
hls_mbs | the blocks in mcu for each component. |
hls_bch | the max block count horizontal (interleaved) |
hls_bc | the max block count (all) (interleaved) |
sta | the status of the process, 0: keep doing, 1: reset decoder(todo), 2: decode done |
loadToStrm
#include "resize/bicubicinterpolator.hpp"
void loadToStrm ( ap_uint <32> width, ap_uint <32> height, ap_uint <WDATA>* axi_src, hls::stream <ap_uint <WDATA>>& src_strm )
The function is loading the pixels of image into stream.
Parameters:
width | representing the number of input image each row |
height | representing the number of input image each column |
axi_src | the hbm port for input |
src_strm | the input stream of bicubic interpolator |
pickOutStrm
#include "resize/bicubicinterpolator.hpp"
void pickOutStrm ( hls::stream <ap_uint <72>>& dst_strm, hls::stream <bool>& e_dst, hls::stream <ap_uint <WDATA>>& pixel_strm )
The function is picking out valid value of interpolation from 72-bits, the (0, 63) saving the valid value and the (64, 71) representing the number of valid value of interpolation.
Parameters:
dst_strm | the output of bicubic interpolator |
e_dst | the flag of output |
pixel_strm | the compact 64-bits or representing 8 pixels |
loadToImage
#include "resize/bicubicinterpolator.hpp"
void loadToImage ( ap_uint <32> width, ap_uint <32> height, hls::stream <ap_uint <WDATA>>& pixel_strm, ap_uint <WDATA>* axi_dst )
The function is putting the result of interpolation into memory hbm.
Parameters:
width | representing the number of output image each row |
height | representing the number of output image each column |
dst_strm | the output stream of bicubic interpolator |
axi_dst | the hbm port for output |