/*******************************************************************************
 * This file is part of the PB2 library developed
 * within the EU Artemis project SMECY (Smart Multicore Embedded Systems)
 * Artemis JU 100230 and MSMT 7H10001,       http://www.smecy.eu
 * Copyright (C) 2011 UTIA AV CR, v.v.i.     http://sp.utia.cz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even 
 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
 *  PURPOSE.
 *
 * This file has been released within the SMECY project
 * consortium for the requirements of the SMECY project.
 * Any use outside the SMECY consortium and/or for any
 * developments outside the scope of the SMECY project is prohibited.
 *
 * For more details contact Roman Bartosinski <bartosr@utia.cas.cz>.
 *******************************************************************************
 * Filename  : pbbcelib.h
 * Authors   : Jaroslav Sykora <sykora@utia.cas.cz>
 * Project   : SMECY
 * Purpose   : Header file - API for firmware of BCE hardware accelerator
 * Release   : 
 * Version   : 0.1
 * Date      : 2012/01/18
 *
 * Long Description:
 *  Functions, constants and definitions specific for building firmware
 *  for MCU in Application Specific Vector Processor (ASVP).
 * 
 *******************************************************************************
 * Modifications:
 *  Author: Roman Bartosinski
 *  Date  : 2012/05/01
 *  Description: Updated comments
 *  --
 *  
 ******************************************************************************/

#ifndef PBBCELIB_H_V3
#define PBBCELIB_H_V3

/**********************************************************************/
/*** Definitions ***/

/**
 * enum dfu_operations - IDs of operations supported in the accelerator
 * @DFU_VCOPY:    vector copying M0[i] = M1[j]
 * @DFU_VADD:     vector addition M0[i] = M1[j] + M2[k]
 * @DFU_VMUL:     vector multiplication M0[i] = M1[j] * M2[k]
 * @DFU_VMAC:     vector multiply-accumulation M0[i] = M3[l] + (M1[j] * M2[k])
 * @DFU_DPROD:    vector dot product M0[i] = sum(M1[j] * M2[k])
 * @DFU_VSUB:     vector subtraction M0[i] = M1[j] - M2[k]
 * @DFU_VSUM:     summation of vector elements M0[0] <= SUM(M1[i])
 * @DFU_VCMPLT:   vector less-than comparison M0[i] <= (M1[i] < M2[i]) ? 0xffff_ffff : 0x0000_0000
 * @DFU_VSAD:     vector square-add M0[i] <= M2[j] + ( M1[k] * M1[k] )
 * @DFU_VMAX:     maximum value in a vector M0[0] <= MAX(M1[i])
 * @DFU_VMIN:     minimum value in a vector M0[0] <= MIN(M1[i])
 * @DFU_INDEXMAX: index of the maximal value in a vector M0[0] <= INDEXMAX(M1[i])
 * @DFU_INDEXMIN: index of the minimal value in a vector M0[0] <= INDEXMIN(M1[i])
 * @DFU_VSQR:     vector square M0[i] <= M1[j] * M1[j]
 * @DFU_VSELECT:  selection (like ternary operator in C) M0[i] <= (M1[j] != 0x0000_0000) ? M2[k] : M3[l]
 * @DFU_VGTE:     vector element-wise greater or equal M0[i] <= (M1[j] < M2[k]) ? M2[k] : M1[j]
 * @DFU_VLTE:     vector element-wise lesser or equal M0[i] <= (M1[j] < M2[k]) ? M1[j] : M2[k]
 * @DFU_VBAND:    vector element-wise binary AND  M0[i] <= BITWISE_AND(M1[j], M2[k])
 * @DFU_VBOR:     vector element-wise binary OR   M0[i] <= BITWISE_OR(M1[j], M2[k])
 * @DFU_VBNOT:    vector element-wise binary NOT  M0[i] <= BITWISE_NOT(M1[j])
 * @DFU_VCONVR:   convert RED color to float M0[i] <= color2float(M1[j], 3)
 * @DFU_VCONVG:   convert GREEN color to float M0[i] <= color2float(M1[j], 2)
 * @DFU_VCONVB:   convert BLUE color to float M0[i] <= color2float(M1[j], 1)
 * @DFU_VMSUBAC:  vector multiply-accumulation M0[i] = M3[l] - (M1[j] * M2[k])
 * @DFU_VAND3E:   logical AND between three consecutive elements M0[i] = M1[j-1] && M1[j] && M1[j+1]
 * @DFU_VAND3V:   logical AND between three elements with the same index M0[i] = M1[j] && M2[k] && M3[l]
 * @DFU_VOR3E:    logical OR between three consecutive elements M0[i] = M1[j-1] || M1[j] || M1[j+1]
 * @DFU_VOR3V:    logical OR between three elements with the same index M0[i] = M1[j] || M2[k] || M3[l]
 *
 * ID of operations performed in the accelerator.
 * They can be used in functions %pb2dfu_start_op and %pd2dfu_restart_op.
 * 
 */
/*enum dfu_operations {
	DFU_VCOPY    =  1,
	DFU_VADD     =  2,
	DFU_VMUL     =  3,
	DFU_VMAC     =  4,
	DFU_DPROD    =  5,
	DFU_VSUB     =  6,
  DFU_VSUM     =  7,
  DFU_VCMPLT   =  8,
  DFU_VSAD     =  9,
  DFU_VMAX     = 10,
  DFU_VMIN     = 11,
  DFU_INDEXMAX = 12,
  DFU_INDEXMIN = 13,
  DFU_VSQR     = 14,
  DFU_VSELECT  = 15,
  DFU_VGTE     = 16,
  DFU_VLTE     = 17,
  DFU_VBAND    = 18,
  DFU_VBOR     = 19,
  DFU_VBNOT    = 20,
  DFU_VCONVR   = 21,
  DFU_VCONVG   = 22,
  DFU_VCONVB   = 23,
	DFU_VMSUBAC  = 24,
  DFU_VAND3E   = 25,
  DFU_VAND3V   = 26,
  DFU_VOR3E    = 27,
  DFU_VOR3V    = 28
};
*/

#define DFU_VCOPY     1 /* copy vector          mi(0)[i] <= mi(1)[j] */
#define DFU_VADD      2 /* add vectors          mi(0)[i] <= mi(1)[j] + mi(2)[k] */
#define DFU_VMUL      3 /* mul vectors          mi(0)[i] <= mi(1)[j] * mi(2)[k] */
#define DFU_VMAC      4 /* multiple-accumulate   mi(0)[i] <= (mi(1)[j] * mi(2)[k]) + mi(3)[l]  */
#define DFU_DPROD     5 /* Dot Product          mi(0)[0] <= Sum(mi(1)[j] * mi(2)[k]) */
#define DFU_VSUB      6 /* Vector subtraction                     mi(0)[i] <= mi(1)[j] - mi(2)[k] */
#define DFU_VSUM      7 /* Summation                              mi(0)[0] <= Sum(mi(1)[i] - mi(2)[i]) */
#define DFU_VCMPLT    8 /* Vector less-than comparison            mi(0)[i] <= (mi(1)[i] < mi(2)[i]) ? 0xffff_ffff : 0x0000_0000 */
#define DFU_VSAD      9 /* Vector square-add                      mi(0)[i] <= mi(2)[j] + ( mi(1)[k] * mi(1)[k] ) */
#define DFU_VMAX     10 /* Maximum value in a vector              mi(0)[0] <= MAX(mi(1)[i]) */
#define DFU_VMIN     11 /* Minimum value in a vector              mi(0)[0] <= MIN(mi(1)[i]) */
#define DFU_INDEXMAX 12 /* Index of the maximal value in a vector mi(0)[0] <= INDEXMAX(mi(1)[i]) */
#define DFU_INDEXMIN 13 /* Index of the minimal value in a vector mi(0)[0] <= INDEXMIN(mi(1)[i]) */
#define DFU_VSQR     14 /* Vector square                          mi(0)[i] <= mi(1)[j] * mi(1)[j] */
#define DFU_VSELECT  15 /* Selection                              mi(0)[i] <= (mi(1)[j] != 0x0000_0000) ? mi(2)[k] : mi(3)[l] */
#define DFU_VGTE     16 /* Vector element-wise greater or equal   mi(0)[i] <= (mi(1)[j] < mi(2)[k]) ? mi(2)[k] : mi(1)[j] */
#define DFU_VLTE     17 /* Vector element-wise lesser or equal    mi(0)[i] <= (mi(1)[j] < mi(2)[k]) ? mi(1)[j] : mi(2)[k] */
#define DFU_VBAND    18 /* Vector element-wise binary AND         mi(0)[i] <= BITWISE_AND(mi(1)[j], mi(2)[k]) */
#define DFU_VBOR     19 /* Vector element-wise binary OR          mi(0)[i] <= BITWISE_OR(mi(1)[j], mi(2)[k]) */
#define DFU_VBNOT    20 /* Vector element-wise binary NOT         mi(0)[i] <= BITWISE_NOT(mi(1)[j]) */
#define DFU_VCONVR   21 /* Convert RED color to float             mi(0)[i] <= color2float(mi(1)[j], 3) */
#define DFU_VCONVG   22 /* Convert GREEN color to float           mi(0)[i] <= color2float(mi(1)[j], 2) */
#define DFU_VCONVB   23 /* Convert BLUE color to float            mi(0)[i] <= color2float(mi(1)[j], 1) */
#define DFU_VMSUBAC  24 /* multiple-subtract    mi(0)[i] <= mi[3][l] - (mi(1)[j] * mi(2)[k]) */
#define DFU_VAND3E   25 /* Logical AND between three consecutive elements */
#define DFU_VAND3V   26 /* Logical AND between three elements with the same index */
#define DFU_VOR3E    27 /* Logical OR between three consecutive elements */
#define DFU_VOR3V    28 /* Logical OR between three elements with the same index */


/**
 * enum dfu_ag_indices - IDs of DFU Arguments / Address Generators.
 * @DFUAG_0: address generator 0 - virtual vector M0 - result of an operation
 * @DFUAG_1: address generator 1 - virtual vector M1 - the first argument of an operation
 * @DFUAG_2: address generator 2 - virtual vector M2 - the second argument of an operation
 * @DFUAG_3: address generator 3 - virtual vector M3 - the third argument of an operation
 * @DFUAG_IDX_0: slave (index) address generator 0 
 * @DFUAG_IDX_1: slave (index) address generator 1
 * @DFUAG_IDX_2: slave (index) address generator 2
 * @DFUAG_IDX_3: slave (index) address generator 3
 * 
 * ID of address generators in BCE accelerator. 
 * They can be used in functions %pb2dfu_set_... 
 * (_addr, _bank, _fulladdr, _inc, _bound_addr, _agflags).
 */
/*enum dfu_ag_indices {
	DFUAG_0 = 0x30,
	DFUAG_1 = 0x40,
	DFUAG_2 = 0x50,
	DFUAG_3 = 0x60,
	DFUAG_IDX_0 = 0x70,
	DFUAG_IDX_1 = 0x80,
	DFUAG_IDX_2 = 0x90,
	DFUAG_IDX_3 = 0xA0,
};
*/
#define DFUAG_0         0x30
#define DFUAG_1         0x40
#define DFUAG_2         0x50
#define DFUAG_3         0x60
#define DFUAG_IDX_0     0x70
#define DFUAG_IDX_1     0x80
#define DFUAG_IDX_2     0x90
#define DFUAG_IDX_3     0xA0

/**
 * enum dfu_data_memories - IDs of local memory banks
 * @MBANK_A: Use data memory A
 * @MBANK_B: Use data memory B
 * @MBANK_C: Use data memory C
 * @MBANK_D: Use data memory D
 * 
 * IDs can be used in function %pb2dfu_set_bank to assign
 * a local memory bank to an address generator.
 */
/*enum dfu_data_memories {
	MBANK_A = 0,
	MBANK_B = 1,
	MBANK_C = 2,
	MBANK_D = 3
};
*/
#define MBANK_A         0x00
#define MBANK_B         0x01
#define MBANK_C         0x02
#define MBANK_D         0x03

/**
 * enum dfu_af_flags - Address generator flags/modes.
 * @AGFL_USE_IDX: Offset each address produced in the main AG by an index delivered from the slave AG.
 * @AGFL_STEP_IDXBND: Increment address of the main AG only when the slave AG is reaching boundary.
 * @AGFL_NUMBGEN: Number-generator mode (bypass BRAM)
 * 
 * Flags are used in pb2dfu_set_agflags().
 * NOTE: The AGFL_USE_IDX and AGFL_STEP_IDXBND flags can be used independently.
 *       That is, it is possible to step the main AG by the boundary condition of the slave AG,
 *       while not using the indices received from it. However, the slave AG will still read data from
 *       the BRAM even if they are not used in the main AG.
 */
/*enum dfu_ag_flags {
	AGFL_USE_IDX     = 0x01,
	AGFL_STEP_IDXBND = 0x02,
	AGFL_NUMBGEN     = 0x04
};
*/
#define AGFL_USE_IDX            0x01    ///< Offset each address produced in the main AG by an index delivered from the slave AG.
#define AGFL_STEP_IDXBND        0x02    ///< Increment address of the main AG only when the slave AG is reaching the boundary.
#define AGFL_NUMBGEN            0x04    ///< Number-generator mode (bypass BRAM)



/**
 * enum mcu_fw_opcodes - External opcodes incomming from MB, as given in WAL
 * @WAL_OP_GETID: firmware should return its ID (accelerator family, kind, etc.)
 * @WAL_OP_GETCAP: fimrware should return bitmap or another informations about supported operations
 * @WAL_OP_GETLIC: firmware should return information from license manager/counter
 * @WAL_OP_NONE: firmware shouldn't use this code for any useful operation
 */
/*enum pbfw_gen_opcodes {
  WAL_OP_GETID  = 0,
  WAL_OP_GETCAP = 1,
  WAL_OP_GETLIC = 2,
  WAL_OP_NONE   = 0xFF   // special
};
*/
#define WAL_OP_GETID    0
#define WAL_OP_GETCAP   1
#define WAL_OP_GETLIC   2
#define WAL_OP_NONE     0xFF




/* ****************************************************************** */
/*** DMA interface ***/

/**
 * enum dma_flags - flags for starting DMA channel; used in the function %dma_start_channel
 * @DMAFL_RNW: Read not write. Read data from the off-chip memory and write them to a local memory. Same as %DMAFL_READMEM.
 * @DMAFL_READMEM: Read data from the off-chip memory and write them to a local memory. Same as %DMAFL_RNW.
 * @DMAFL_WRITEMEM: Write data to the off-chip memory - read from a local memory.
 * @DMAFL_IRQ: Raise interrupt when transfer has finished. Interrupt is a single pulse [NOT IMPLEMENTED!]
 * @DMAFL_SUP: Auto-incrementation of address of the off-chip memory when transfer has finished (sram_addr += length).
 * @DMAFL_BUP: Auto-incrementation of address of the local memory when transfer has finished (bram_addr += length).
 */
/*enum dma_flags {
  DMAFL_RNW       = 0x01,
  DMAFL_READMEM   = 0x01,
  DMAFL_WRITEMEM  = 0x00,
  DMAFL_IRQ       = 0x02,
  DMAFL_SUP       = 0x04,
  DMAFL_BUP       = 0x08
};
*/
#define DMAFL_RNW       0x01    ///< read not write
#define DMAFL_READMEM   0x01    ///< read
#define DMAFL_WRITEMEM  0x00    ///< write
#define DMAFL_IRQ       0x02    ///< raise interrupt when transfer has finished. Interrupt is a single pulse [NOT IMPLEMENTED!!]
#define DMAFL_SUP       0x04    ///< sram_addr += length when transfer has finished
#define DMAFL_BUP       0x08    ///< bram_addr += length when transfer has finished

/**
 * enum dma_channels - indices of DMA channels, for use in the 'dmachan' parameter
 * @DMACHAN_0: index of DMA channel 0
 * @DMACHAN_1: index of DMA channel 1
 * @DMACHAN_2: index of DMA channel 2
 * @DMACHAN_3: index of DMA channel 3
 * @DMACHAN_4: index of DMA channel 4
 * @DMACHAN_5: index of DMA channel 5
 * @DMACHAN_6: index of DMA channel 6
 * @DMACHAN_7: index of DMA channel 7
 * These indices should be used for selection DMA channel in functions
 * %dma_set_extaddr, %dma_set_locaddr, %dma_set_length, %dma_set_all, %dma_start_channel.
 */
/*enum dma_channels {
  DMACHAN_0 = 0x00,
  DMACHAN_1 = 0x08,
  DMACHAN_2 = 0x10,
  DMACHAN_3 = 0x18,
  DMACHAN_4 = 0x20,
  DMACHAN_5 = 0x28,
  DMACHAN_6 = 0x30,
  DMACHAN_7 = 0x38,
};
*/
#define DMACHAN_0       0x00
#define DMACHAN_1       0x08
#define DMACHAN_2       0x10
#define DMACHAN_3       0x18
#define DMACHAN_4       0x20
#define DMACHAN_5       0x28
#define DMACHAN_6       0x30
#define DMACHAN_7       0x38

/**
 * enum dma_channel_masks - bit masks for all DMA channels
 * @DMACHMASK_0: bit mask for DMA channel 0
 * @DMACHMASK_1: bit mask for DMA channel 1
 * @DMACHMASK_2: bit mask for DMA channel 2
 * @DMACHMASK_3: bit mask for DMA channel 3
 * @DMACHMASK_4: bit mask for DMA channel 4
 * @DMACHMASK_5: bit mask for DMA channel 5
 * @DMACHMASK_6: bit mask for DMA channel 6
 * @DMACHMASK_7: bit mask for DMA channel 7
 * These masks should be used for checking a return value from the function %dma_get_status.
 * If result of (dma_get_status() & DMACHMASK_x) is zero then DMA channel 'x' is ready else the channel 'x' is busy.
 */
/*enum dma_channel_masks {
	 DMACHMASK_0 = 0x01,
	 DMACHMASK_1 = 0x02,
	 DMACHMASK_2 = 0x04,
	 DMACHMASK_3 = 0x08,
	 DMACHMASK_4 = 0x10,
	 DMACHMASK_5 = 0x20,
	 DMACHMASK_6 = 0x40,
	 DMACHMASK_7 = 0x80,
};
*/
#define DMACHMASK_0     0x01
#define DMACHMASK_1     0x02
#define DMACHMASK_2     0x04
#define DMACHMASK_3     0x08
#define DMACHMASK_4     0x10
#define DMACHMASK_5     0x20
#define DMACHMASK_6     0x40
#define DMACHMASK_7     0x80



/**********************************************************************/
/* Functions for interfacing the Host CPU and MCU in BCE */

/**
 * mbpb_exchange_data - Exchange data with the host CPU.
 * @data: 8 bit data sending to the host CPU
 * 
 * Exchange byte with the host CPU using the barrier synchronization.
 * This function blocks.
 * 
 * Return Value: The function returns 8 bit data received from the host CPU.
 */
unsigned char mbpb_exchange_data(unsigned char data);

/**
 * pb2mb_report_running - Inform the host CPU that firmware is running
 * Sets the R and B bits in the CFG Status register
 * to report that the firmware has sucessfully started and is busy.
 * This function does NOT block.
 * It should be called immediately when picoblaze starts up.
 * NOTE: This could be integrated into picoblaze C library
 * to be called automatically upon startup.
 * 
 * Return Value: The function doesn't return any value.
 */
static inline void pb2mb_report_running();

/**
 * write_bce_id_to_cmem - Write the BCE ID into the cfg output memory.
 * @fam1: BCE ID which can be read with function %wal_get_id in the host CPU application.
 * 
 * This function should be used as reaction to WAL_OP_GETID operation.
 * 
 * Return Value: The function doesn't return any value.
 */
void write_bce_id_to_cmem(unsigned char fam1);

/**
 * write_dfu_caps_to_cmem - copy DFU bitmap of capabilities from DFU to control memory
 * 
 * Write the whole DFU capabilities bitmap (256 bits = 32 B = 8 words) into the cfg output memory,
 * starting at address 0x81 of the ctrl/status memory.
 * This function should be used as reaction to WAL_OP_GETCAP operation.
 * 
 * Return Value: The function doesn't return any value.
 */
void write_dfu_caps_to_cmem();

/**
 * read_bce_cmem_u8 - Read u8 value from the BCE input cfg region.
 * @cfgaddr = word address in the config mem.
 * @byteidx = [0; 3] Byte index, 0 = LL, 3 = HH
 * 
 * Return Value: The function doesn't return any value.
 */
unsigned char read_bce_cmem_u8(unsigned char cfgaddr, unsigned char byteidx);

/**
 * read_bce_cmem_u16 - Read u16 value from the BCE input cfg region.
 * @cfgaddr = word address in the config mem.
 * @widx = {0; 2} Word index, 0 = Lo, 2 = Hi
 * 
 * Return Value: The function doesn't return any value.
 */
unsigned int read_bce_cmem_u16(unsigned char cfgaddr, unsigned char widx);

/**
 * write_bce_cmem_u16 - Write u16 value into BCE cfg memory.
 * The @cfgaddr shall be in the output status region, ie. 0x80 - 0xff.
 * @widx = {0; 2} Word index, 0 = Lo, 2 = Hi
 * 
 * Return Value: The function doesn't return any value.
 */
void write_bce_cmem_u16(unsigned char cfgaddr, unsigned char widx, unsigned int dt);

/**
 * pcnt_get_dfutime - Get time of the last DFU operation.
 * 
 * Get the running time in clock-cycles of the last DFU operation executed.
 * 
 * Return Value: The function returns value of 16 bit DFU operation-time counter.
 */
unsigned int pcnt_get_dfutime();

/**
 * pcnt_get_prgtime_lo - Get value of the programm running-time counter (lower 16 bit).
 * 
 * Get the program running time in clock-cycles.
 * The counter is 32 bit in total, thus we provide two functions to access its lo/hi parts.
 * 
 * Return Value: The function returns lower 16 bit of the program running-time counter.
 */
unsigned int pcnt_get_prgtime_lo();

/**
 * pcnt_get_prgtime_hi - Get value of the programm running-time counter (higher 16 bit).
 *
 * Get the program running time in clock-cycles.
 * The counter is 32 bit in total, thus we provide two functions to access its lo/hi parts.
 * 
 * Return Value: The function returns higher 16 bit of the program running-time counter.
 */
unsigned int pcnt_get_prgtime_hi();

/**
 * pcnt_reset_prgtime - Reset the program running-time counter.
 * 
 * Return Value: The function doesn't return any value.
 */
void pcnt_reset_prgtime();

/**
 * get_dfulic - Read licence flag from DFU.
 * 
 * The function reads and returns DFU license counter.
 * 
 * Return Value: The function returns flag which indicates that the DFU licence is run out.
 */
unsigned char get_dfulic();

/************************************/
/* functions for interfacing PB-DFU */

/**
 * pb2dfu_wait4hw - PB will wait for end of computation
 *
 * The function waits for finishing computation in the accelerator.
 * The function should be called before subsequent run of the next
 * operation. The next operation can be prepared before the waiting
 * to speed up the entire computation.
 * 
 * Return Value: Zero if ok, Non-zero on DFU error
 */
unsigned char pb2dfu_wait4hw();

/**
 * pb2dfu_start_op - start operation in DFU with specified length of data vectors
 * @op: DFU operation (constants %DFU_OP_xxx)
 * @cnt: length of input data vectors
 *
 * The function covers two functions (pb2dfu_set_cnt and pb2dfu_restart_op).
 * NOTE: When MCU_KIND=PB3A the function will stall the CPU until DFU is ready
 *       to accept the operation.
 *
 * Return Value: The function doesn't return any value.
 *
 */
static inline void pb2dfu_start_op(unsigned char op, unsigned int cnt);

/**
 * pb2dfu_restart_op - start operation in DFU
 * @op: DFU operation (constants DFU_OP_xxx)
 *
 * All parameters of the operation must be set before this function.
 * All parameters are registered and so only changed parameters from
 * previous operations must be set. On the other hand, the operation
 * must be always set because the function starts a required operation
 * in the DFU.
 * NOTE: When MCU_KIND=PB3A the function will stall the CPU until DFU is ready
 *       to accept the operation.
 * 
 * Return Value: The function doesn't return any value.
 *
 */
static inline void pb2dfu_restart_op(unsigned char op);

/**
 * pb2dfu_set_cnt - set length of input data vectors for the next operation
 * @cnt: length of input data vectors
 *
 * The function sets length of the input data vectors.
 * The simple operations (as VADD, VMULT) will be performed @cnt-times
 * as one pipelined operation.
 * 
 * Return Value: The function doesn't return any value.
 */
static inline void pb2dfu_set_cnt(unsigned int cnt);

/**
 * pb2dfu_set_addr - set the base address of vector for the given DFU argument
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @addr: the initial address of the vector
 * 
 * Return Value: The function doesn't return any value.
 */
static inline void pb2dfu_set_addr(unsigned char dfuag, unsigned int addr);

/**
 * pb2dfu_set_bank - select bank for specified memory
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @bank: the memory bank which will be used for the next operation (constant %MBANK_x)
 * 
 * Return Value: The function doesn't return any value.
 */
static inline void pb2dfu_set_bank(unsigned char dfuag, unsigned char mbank);

/**
 * pb2dfu_set_fulladdr - set full address (bank and offset) of the first element in the vector
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @mbank: the memory bank which will be used for the next operation (constant %MBANK_x)
 * @addr: the initial address of the vector
 * 
 * Return Value: The function doesn't return any value.
 */
static inline void pb2dfu_set_fulladdr(unsigned char dfuag, unsigned char mbank, unsigned int addr);

/**
 * pb2dfu_set_inc - set the stride of the vector for the DFU argument
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @inc: increment between two elements of vector
 * 
 * Return Value: The function doesn't return any value.
 */
static inline void pb2dfu_set_inc(unsigned char dfuag, int inc);

/**
 * pb2dfu_set_bound_addr - set boundary addresses for vector accesses
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @lo_bound: lower addres boundary
 * @hi_bound: higher addres boundary
 * 
 * Return Value: The function doesn't return any value.
 */
void pb2dfu_set_bound_addr(unsigned char dfuag, unsigned int lo_bound, unsigned int hi_bound);

/**
 * pb2dfu_set_agflags - set operation flags/mode of the specified address generator (DFU argument)
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @agflags: bitmap of flags to set (constants %AGFL_x)
 * 
 * Return Value: The function doesn't return any value.
 */
static inline void pb2dfu_set_agflags(unsigned char dfuag, unsigned char agflags);

/**
 * pb2dfu_set_repetitions - set the number of repetitions of a DFU operation.
 * @nrep: the number of times the following DFU operation will be restarted.
 * 
 * Return Value: The function doesn't return any value.
 */
static inline void pb2dfu_set_repetitions(unsigned char nrep);


/**********************************************************************/
/* Functions for scalar network access */

/** 
 * rh_get_node_id - get address of the local node
 * Returned value can other nodes use as the destination address for sending functions.
 * 
 * Return Value: The function returns address of the node.
 */
unsigned char rh_get_node_id(void);

/** 
 * rh_set_target - set the destination address: target
 * @target: 8 bit address of the destination. Target is fixed in hardware.
 * All functions for sending will use this destination address up to next setting.
 * 
 * Return Value: The function doesn't return any value.
 */
void rh_set_target(unsigned char target);

/** 
 * rh_set_endp - set the destination address: endpoint
 * @target: 8 bit address of the destination endpoint.
 * All functions for sending will use this destination address up to next setting.
 * Two endpoints are currently supported. 
 * Endpoint 0 is reserved for the user peer-to-peer communication among workers
 * (i.e. messages sent to this endpoint will be handled in PicoBlaze firmware).
 * Endpoint 1 is for the remote control of workers (i.e. as a substitution of
 * the Control/Status memory blocks).
 * 
 * Return Value: The function doesn't return any value.
 */
void rh_set_endp(unsigned char endp);

/**
 * rh_set_flkind - set the flit kind 
 * @flkind: kind of flit which will be sent
 * All flits will be sent with this kind.
 * Kind of received flit can be read with function %rh_get_rcv_flkind
 * 
 * Return Value: The function doesn't return any value.
 */
void rh_set_flkind(unsigned char flkind);

/** 
 * rh_set_target_endp_flkind - set all the target, endpoint, and flkind fields at once
 * @target: destination address - target
 * @endp: destination address - endpoint
 * @flkind: flit kind
 * Set all parameters at once. Information about target, endpoint and flkind are
 * in description of functions rh_set_target, rh_set_endp, rh_set_flkind respectively.
 * 
 * Return Value: The function doesn't return any value.
 */
void rh_set_target_endp_flkind(unsigned char target, unsigned char endp, unsigned char flkind);

/** 
 * rh_send_data_u8 - send 8 bit data to the destination 
 * @dt: 8 bit data
 * The function waits for allowed sending and then it sends 8 bit data with 
 * a preset flkind to a preset destination (target,endpoint). 
 * This function blocks.
 * 
 * Return Value: The function doesn't return any value.
 */
void rh_send_data_u8(unsigned char dt);

/** 
 * rh_send_data_u16 - send 16 bit data to the destination 
 * @dt: 16 bit data
 * The function waits for allowed sending and then it sends 16 bit data with
 * a preset flkind to a preset destination (target,endpoint). 
 * This function blocks.
 * 
 * Return Value: The function doesn't return any value.
 */
void rh_send_data_u16(unsigned int dt);

/**
 * rh_is_rcv - test if a message is available in the receive buffer
 * The function tests if a message is available in the receive buffer
 * This function does not block.
 * 
 * Return Value: It returns a non-zero value if any message is in the buffer.
 * It returns zero if there is no message in the receive buffer.
 */
unsigned char rh_is_rcv();

/** rh_wait_for_recv - wait until a message is available in the input buffer
 * The function waits until a message is available in the input buffer.
 * Then it reads and returns 8 bit data from the input buffer.
 * This function blocks.
 * 
 * Return Value: The function returns the u8 data received (ie. the same as rh_get_rcv_data_u8()).
 */
unsigned char rh_wait_for_recv();

/** rh_wait_for_recv_u16 - wait until a message is available in the input buffer
 * The function waits until a message is available in the input buffer.
 * Then it reads and returns 16 bit data from the input buffer.
 * This function blocks.
 * 
 * Return Value: The function returns the u16 data received (ie. the same as rh_get_rcv_data_u16()).
 */
unsigned int rh_wait_for_recv_u16();

/**
 * rh_get_rcv_flkind - read the flit kind of the message in the receive buffer
 * The function returns flit kind of received data.
 * If the receive buffer is empty, returned value is unspecified.
 * Precondition: rh_is_rcv() is true
 * 
 * Return Value: The function returns flit kind of received data.
 */
unsigned char rh_get_rcv_flkind();

/** 
 * rh_get_rcv_data_u8 - read 8 bit data in the receive buffer.
 * The function reads and returns 8 bit data from the receive buffer.
 * If the receive buffer is empty, returned value is unspecified.
 * Precondition: rh_is_rcv() is true
 * 
 * Return Value: The function returns 8 bit received message data.
 */
unsigned char rh_get_rcv_data_u8();

/** 
 * rh_get_rcv_data_u16 - read 16 bit data in the receive buffer.
 * The function reads and returns 16 bit data from the receive buffer.
 * If the receive buffer is empty, returned value is unspecified.
 * Precondition: rh_is_rcv() is true
 * 
 * Return Value: The function returns 16 bit received message data.
 */
unsigned int rh_get_rcv_data_u16();

/**
 * rh_remove_rcv - remove a message from the receive buffer.
 * The function removes a message from the receive buffer.
 * Precondition: rh_is_rcv() is true
 * 
 * Return Value: The function doesn't return any value.
 */
void rh_remove_rcv();


/* Functions for remote control of workers over the network */

/**
 * rh_remote_reset - reset the remote worker.
 * @target: address of the remote worker
 * The function sends a control message to reset the remote worker.
 * The remote target is reset by writing 0x00 into its Control Word.
 * All the fields (ITAG, PM, G) in the Control Word of the remote BCE are set to zero.
 * The function returns immediately.
 * 
 * Return Value: The function does not return any value.
 */
void rh_remote_reset(unsigned char target);

/**
 * rh_remote_start - start the remote worker.
 * @target: address of the remote worker
 * @pm: index of program memory used in the remote worker
 * Start the remote worker, using the program memory 'pm'.
 * In the remote Control Word the fields are set as follows:
 *   ITAG = 0, PM = pm, G = 1.
 * The function returns immediately.
 * 
 * Return Value: The function does not return any value.
 */
void rh_remote_start(unsigned char target, unsigned char pm);




/**********************************************************************/
/* Functions for accessing the DMA configuration registers */

/**
 * dma_set_extaddr - set 32 bit address of off-chip memory
 * @dmachan: select DMA channel %DMACHAN_x
 * @addr_hi: higher 16 bit of address
 * @addr_lo: lower 16 bit of address
 * 
 * Setup the external DDR memory adddress for the given DMA channel.
 * The 32 bit external address [hi:lo] has to be aligned on an 8-byte boundary.
 * The address is set directly in bytes.
 * 
 * Return Value: The function does not return any value.
 */
void dma_set_extaddr(unsigned char dmachan, unsigned int addr_hi, unsigned int addr_lo);

/**
 * dma_set_locaddr - set 16 bit address of local memory
 * @dmachan: select DMA channel %DMACHAN_x
 * @addr: 16 bit address of local memory (in bytes, not FP words)
 * Setup the local BRAM address for the given DMA channel.
 * The 16 bit local address has to be aligned on a 4-byte boundary.
 * DMA access to all local memory banks as one linear memory space
 * (addr = (MBANK_x * MBANK_SIZE_IN_FP + OFFSET_IN_FP)*4  ; where
 * MBANK_x is a bank index, MBANK_SIZE_IN_FP is size of memory bank in FP words - default 1024,
 * OFFSET_IN_FP is address of variable in the bank - in FP words).
 * 
 * Return Value: The function does not return any value.
 */
void dma_set_locaddr(unsigned char dmachan, unsigned int addr);

/**
 * dma_set_length - set length of transfered block
 * @dmachan: select DMA channel %DMACHAN_x
 * @length: length of the transfer block in bytes
 * Setup the transfer length for the given DMA channel.
 * The length has be aligned on an 8-byte boudary.
 * The minimal transfer length is 8 Bytes (2 words).
 * 
 * Return Value: The function does not return any value.
 */
void dma_set_length(unsigned char dmachan, unsigned int length);

/**
 * dma_set_all - set all the parameters of DMA transfer for the given channel
 * @dmachan: select DMA channel %DMACHAN_x
 * @extaddr_hi: higher 16 bit of off-chip address
 * @extaddr_lo: lower 16 bit of off-chip address
 * @addr: 16 bit address of local memory (in bytes, not FP words)
 * @length: length of the transfer block in bytes
 * Setup all the parameters of the DMA channel.
 * The 32 bit external address [hi:lo] has to be aligned on an 8-byte boundary.
 * The 16 bit local address has to be aligned on a 4-byte boundary.
 * The length has be aligned on an 8-byte boudary.
 * 
 * Return Value: The function does not return any value.
 */
void dma_set_all(unsigned char dmachan,
                 unsigned int extaddr_hi, unsigned int extaddr_lo,
                 unsigned int locaddr, unsigned int length);

/**
 * dma_start_channel - start DMA transfer on the given channel
 * @dmachan: select DMA channel %DMACHAN_x
 * @cmdfl: flags of transfer (%DMAFL_ )
 * Start the DMA transfer on the given channel.
 * The flags specify transfer direction (%DMAFL_RNW, %DMAFL_READMEM, %DMAFL_WRITEMEM),
 * interrupt generation (%DMAFL_IRQ) - NOT IMPLEMENTED!,
 * and address incrementation (%DMAFL_SUP, %DMAFL_BUP).
 * The function does not block.
 * 
 * Return Value: The function does not return any value.
 */
void dma_start_channel(unsigned char dmachan, unsigned char cmdfl);

/**
 * dma_get_status - read DMA status register
 * The status bits correspond to the eight DMA channels.
 * The return value should be tested against %DMACHMASK_ constants.
 * A value of 1 in a bit indicates that the corresponding channel is busy,
 * a value of 0 indicates an operation has completed.
 * The function does not block.
 *  
 * Return Value: The function returns bitmap of busy DMA channels.
 */
static inline unsigned char dma_get_status();


#include <pbbcelib-impl.h>

#ifdef PBBCELIB_WITH_INTL
  #include <pbbcelib-intl.h>
#endif


#endif /* PBBCELIB_H_V3 */
