/* Copyright (C) 1998, Cygnus Solutions */
+/* Debugguing PKE? */
+#define PKE_DEBUG
+
#include <stdlib.h>
#include "sky-pke.h"
#include "sky-dma.h"
#include "sky-vu1.h"
#include "sky-gpuif.h"
+
/* Imported functions */
void device_error (device *me, char* message); /* device.c */
unsigned, sim_cpu*, sim_cia);
static void pke_issue(struct pke_device*);
static void pke_pc_advance(struct pke_device*, int num_words);
-static unsigned_4* pke_pc_operand(struct pke_device*, int word_num);
-static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int word_num);
+static unsigned_4* pke_pc_operand(struct pke_device*, int operand_num);
+static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int operand_num,
+ unsigned_4** operand);
static int pke_track_write(struct pke_device*, const void* src, int len,
address_word dest, unsigned_4 sourceaddr);
static void pke_attach(SIM_DESC sd, struct pke_device* me);
+enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 };
+static int pke_check_stall(struct pke_device* me, enum pke_check_target what);
+static void pke_flip_dbf(struct pke_device* me);
+/* PKEcode handlers */
+static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_base(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_error(struct pke_device* me, unsigned_4 pkecode);
switch(reg_num)
{
case PKE_REG_FBRST:
- /* XXX: order of evaluation? STP && STC ?? */
- if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */
- {
- /* clear FIFO: also prevents re-execution attempt of
- possible stalled instruction */
- me->fifo_num_elements = me->fifo_pc;
- /* clear registers */
- memset(me->regs, 0, sizeof(me->regs));
- me->flags = 0;
- me->qw_pc = 0;
- }
- if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */
- {
- PKE_REG_MASK_SET(me, STAT, PFS, 1);
- }
- if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */
- {
- /* XXX: how to safely abort "currently executing" (=> stalled) instruction? */
- PKE_REG_MASK_SET(me, STAT, PSS, 1);
- }
+ /* Order these tests from least to most overriding, in case
+ multiple bits are set. */
if(BIT_MASK_GET(input[0], 2, 2)) /* STC bit */
{
/* clear a bunch of status bits */
PKE_REG_MASK_SET(me, STAT, ER1, 0);
/* will allow resumption of possible stalled instruction */
}
+ if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */
+ {
+ /* XXX: how to safely abort "currently executing" (=> stalled) instruction? */
+ PKE_REG_MASK_SET(me, STAT, PSS, 1);
+ }
+ if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */
+ {
+ PKE_REG_MASK_SET(me, STAT, PFS, 1);
+ }
+ if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */
+ {
+ /* clear FIFO by skipping to word after PC: also
+ prevents re-execution attempt of possible stalled
+ instruction */
+ me->fifo_num_elements = me->fifo_pc;
+ /* clear registers */
+ memset(me->regs, 0, sizeof(me->regs));
+ me->flags = 0;
+ me->qw_pc = 0;
+ }
break;
case PKE_REG_ERR:
(SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_PKTFLAG : DMA_CHANNEL1_PKTFLAG),
(void*) & fqw->dma_tag_present,
sizeof(unsigned_4));
- /* XXX: check RC */
me->fifo_num_elements++;
unsigned_4 fw;
unsigned_4 cmd, intr, num;
unsigned_4 imm;
- int next_pps_state; /* PPS after this instruction issue attempt */
/* 1 -- test go / no-go for PKE execution */
/* check for stall/halt control bits */
- /* XXX: What is the PEW bit for? */
- if(PKE_REG_MASK_GET(me, STAT, PSS) ||
+ if(PKE_REG_MASK_GET(me, STAT, PSS) || /* XXX: PSS may be a special case */
PKE_REG_MASK_GET(me, STAT, PFS) ||
+ /* PEW bit not a reason to keep stalling - it's re-checked below */
+ /* PGW bit not a reason to keep stalling - it's re-checked below */
/* maskable stall controls: ER0, ER1, PIS */
(PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) ||
(PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) ||
(PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII)))
{
- /* XXX */
+ /* try again next cycle; no state change */
+ return;
}
/* XXX: handle PSS by *skipping* instruction? */
/* 2 -- fetch PKE instruction */
- /* "fetch" instruction quadword */
- fqw = & me->fifo[me->fifo_pc];
+ /* skip over DMA tag, if present */
+ pke_pc_advance(me, 0);
- /* skip over DMA tags, if present */
- if((fqw->dma_tag_present != 0) && (me->qw_pc < 2))
- {
- ASSERT(me->qw_pc == 0);
- /* XXX: check validity of DMA tag; if bad, set ER0 flag */
- me->qw_pc = 2;
- }
-
- /* "fetch" instruction word */
+ /* "fetch" instruction quadword and word */
+ fqw = & me->fifo[me->fifo_pc];
fw = fqw->data[me->qw_pc];
- /* store it in PKECODE register */
+ /* store word in PKECODE register */
me->regs[PKE_REG_CODE][0] = fw;
/* decoding */
PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE);
- next_pps_state = PKE_REG_STAT_PPS_IDLE; /* assume instruction completes */
- /* decode */
+ /* decode & execute */
if(IS_PKE_CMD(cmd, PKENOP))
- {
- /* no work required, yey */
- pke_pc_advance(me, 1);
- }
+ pke_code_nop(me, fw);
else if(IS_PKE_CMD(cmd, STCYCL))
- {
- /* copy immediate value into CYCLE reg */
- me->regs[PKE_REG_CYCLE][0] = imm;
- pke_pc_advance(me, 1);
- }
+ pke_code_stcycl(me, fw);
else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET))
- {
- /* copy 10 bits to OFFSET field */
- PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9));
- /* clear DBF bit */
- PKE_REG_MASK_SET(me, DBF, DF, 0);
- /* clear other DBF bit */
- PKE_REG_MASK_SET(me, STAT, DBF, 0);
- /* set TOPS = BASE */
- PKE_REG_MASK_SET(me, TOPS, TOPS,
- PKE_REG_MASK_GET(me, BASE, BASE));
- pke_pc_advance(me, 1);
- }
+ pke_code_offset(me, fw);
else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE))
- {
- /* copy 10 bits to BASE field */
- PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9));
- /* clear DBF bit */
- PKE_REG_MASK_SET(me, DBF, DF, 0);
- /* clear other DBF bit */
- PKE_REG_MASK_SET(me, STAT, DBF, 0);
- /* set TOPS = BASE */
- PKE_REG_MASK_SET(me, TOPS, TOPS,
- PKE_REG_MASK_GET(me, BASE, BASE));
- pke_pc_advance(me, 1);
- }
+ pke_code_base(me, fw);
else if(IS_PKE_CMD(cmd, ITOP))
- {
- /* copy 10 bits to ITOPS field */
- PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9));
- pke_pc_advance(me, 1);
- }
+ pke_code_itop(me, fw);
else if(IS_PKE_CMD(cmd, STMOD))
- {
- /* copy 2 bits to MODE register */
- PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2));
- pke_pc_advance(me, 1);
- }
- else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) /* MSKPATH3 */
- {
- /* XXX: what to do with this? DMA control register? */
- pke_pc_advance(me, 1);
- }
+ pke_code_stmod(me, fw);
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3))
+ pke_code_mskpath3(me, fw);
else if(IS_PKE_CMD(cmd, PKEMARK))
- {
- /* copy 16 bits to MARK register */
- PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15));
- /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
- PKE_REG_MASK_SET(me, STAT, MRK, 1);
- pke_pc_advance(me, 1);
- }
+ pke_code_pkemark(me, fw);
else if(IS_PKE_CMD(cmd, FLUSHE))
- {
- /* read VU status word */
- unsigned_4 vu_stat;
- sim_read(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
- (void*) & vu_stat,
- sizeof(unsigned_4));
- /* XXX: check RC */
-
- /* check if VBS bit is clear, i.e., VU is idle */
- if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
- {
- /* VU idle */
- /* advance PC */
- pke_pc_advance(me, 1);
- }
- else
- {
- /* VU busy */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
- }
- }
+ pke_code_flushe(me, fw);
else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH))
+ pke_code_flush(me, fw);
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA))
+ pke_code_flusha(me, fw);
+ else if(IS_PKE_CMD(cmd, PKEMSCAL))
+ pke_code_pkemscal(me, fw);
+ else if(IS_PKE_CMD(cmd, PKEMSCNT))
+ pke_code_pkemscnt(me, fw);
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF))
+ pke_code_pkemscalf(me, fw);
+ else if(IS_PKE_CMD(cmd, STMASK))
+ pke_code_stmask(me, fw);
+ else if(IS_PKE_CMD(cmd, STROW))
+ pke_code_strow(me, fw);
+ else if(IS_PKE_CMD(cmd, STCOL))
+ pke_code_stcol(me, fw);
+ else if(IS_PKE_CMD(cmd, MPG))
+ pke_code_mpg(me, fw);
+ else if(IS_PKE_CMD(cmd, DIRECT))
+ pke_code_direct(me, fw);
+ else if(IS_PKE_CMD(cmd, DIRECTHL))
+ pke_code_directhl(me, fw);
+ else if(IS_PKE_CMD(cmd, UNPACK))
+ pke_code_unpack(me, fw);
+ /* ... other commands ... */
+ else
+ pke_code_error(me, fw);
+}
+
+
+
+/* advance the PC by given number of data words; update STAT/FQC
+ field; assume FIFO is filled enough */
+
+void
+pke_pc_advance(struct pke_device* me, int num_words)
+{
+ int num = num_words;
+ ASSERT(num_words > 0);
+
+ while(num > 0)
{
- /* read VU status word */
- unsigned_4 vu_stat;
- sim_read(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
- (void*) & vu_stat,
- sizeof(unsigned_4));
- /* XXX: check RC */
+ struct fifo_quadword* fq;
+
+ /* one word skipped */
+ num --;
- /* check if VGW bit is clear, i.e., PATH1 is idle */
- /* simulator design implies PATH2 is always "idle" */
- if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 &&
- BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 &&
- 1 /* PATH2 always idle */)
+ /* point to next word */
+ me->qw_pc ++;
+ if(me->qw_pc == 4)
{
- /* VU idle */
- /* PATH1 idle */
- /* PATH2 idle */
- /* advance PC */
- pke_pc_advance(me, 1);
+ me->qw_pc = 0;
+ me->fifo_pc ++;
}
- else
+
+ /* skip over DMA tag words if present in word 0 or 1 */
+ fq = & me->fifo[me->fifo_pc];
+ if(fq->dma_tag_present && (me->qw_pc < 2))
{
- /* GPUIF busy */
- /* retry this instruction next clock */
+ /* skip by going around loop an extra time */
+ num ++;
}
}
- else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA))
+
+ /* clear FQC if FIFO is now empty */
+ if(me->fifo_num_elements == me->fifo_pc)
{
- /* read VU status word */
- unsigned_4 vu_stat;
- sim_read(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
- (void*) & vu_stat,
- sizeof(unsigned_4));
- /* XXX: check RC */
-
- /* check if VGW bit is clear, i.e., PATH1 is idle */
- /* simulator design implies PATH2 is always "idle" */
- /* XXX: simulator design implies PATH3 is always "idle" */
- if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 &&
- BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 &&
- 1 /* PATH2 always idle */ &&
- 1 /* PATH3 always idle */)
- {
- /* VU idle */
- /* PATH1 idle */
- /* PATH2 idle */
- /* PATH3 idle */
- /* advance PC */
- pke_pc_advance(me, 1);
- }
- else
- {
- /* GPUIF busy */
- /* retry this instruction next clock */
- }
+ PKE_REG_MASK_SET(me, STAT, FQC, 0);
}
- else if(IS_PKE_CMD(cmd, PKEMSCAL))
- {
- /* read VU status word */
- unsigned_4 vu_stat;
- sim_read(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
- (void*) & vu_stat,
- sizeof(unsigned_4));
- /* XXX: check RC */
-
- /* check if VBS bit is clear, i.e., VU is idle */
- if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
- {
- /* VU idle */
- unsigned_4 vu_pc;
+}
- /* perform PKE1-unique processing for microprogram calls */
- if(me->pke_number == 1)
- {
- /* flip DBF */
- PKE_REG_MASK_SET(me, DBF, DF,
- PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
- PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
- /* compute new TOPS */
- PKE_REG_MASK_SET(me, TOPS, TOPS,
- (PKE_REG_MASK_GET(me, BASE, BASE) +
- (PKE_REG_MASK_GET(me, DBF, DF) *
- PKE_REG_MASK_GET(me, OFST, OFFSET))));
- /* compute new ITOP and TOP */
- PKE_REG_MASK_SET(me, ITOP, ITOP,
- PKE_REG_MASK_GET(me, ITOPS, ITOPS));
- PKE_REG_MASK_SET(me, TOP, TOP,
- PKE_REG_MASK_GET(me, TOPS, TOPS));
- }
- /* compute new PC */
- vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */
- /* write new PC; callback function gets VU running */
- sim_write(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
- (void*) & vu_pc,
- sizeof(unsigned_4));
- /* advance PC */
- pke_pc_advance(me, 1);
- }
- else
- {
- /* VU busy */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
- }
- }
- else if(IS_PKE_CMD(cmd, PKEMSCNT))
- {
- /* read VU status word */
- unsigned_4 vu_stat;
- sim_read(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
- (void*) & vu_stat,
- sizeof(unsigned_4));
- /* XXX: check RC */
- /* check if VBS bit is clear, i.e., VU is idle */
- if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
- {
- /* VU idle */
- unsigned_4 vu_pc;
+/* Return pointer to FIFO quadword containing given operand# in FIFO.
+ `operand_num' starts at 1. Return pointer to operand word in last
+ argument, if non-NULL. If FIFO is not full enough, return 0.
+ Signal an ER0 indication upon skipping a DMA tag. */
- /* flip DBF etc. for PKE1 */
- if(me->pke_number == 1)
- {
- PKE_REG_MASK_SET(me, DBF, DF,
- PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
- PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
- PKE_REG_MASK_SET(me, TOPS, TOPS,
- (PKE_REG_MASK_GET(me, BASE, BASE) +
- (PKE_REG_MASK_GET(me, DBF, DF) *
- PKE_REG_MASK_GET(me, OFST, OFFSET))));
- PKE_REG_MASK_SET(me, ITOP, ITOP,
- PKE_REG_MASK_GET(me, ITOPS, ITOPS));
- PKE_REG_MASK_SET(me, TOP, TOP,
- PKE_REG_MASK_GET(me, TOPS, TOPS));
- }
+struct fifo_quadword*
+pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand)
+{
+ int num = operand_num;
+ int new_qw_pc, new_fifo_pc;
+ struct fifo_quadword* operand_fifo;
- /* read old PC */
- sim_read(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
- (void*) & vu_pc,
- sizeof(unsigned_4));
- /* rewrite its PC; callback function gets VU running */
- sim_write(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
- (void*) & vu_pc,
- sizeof(unsigned_4));
- /* advance PC */
- pke_pc_advance(me, 1);
- }
- else
- {
- /* VU busy */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
- }
- }
- else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF))
- {
- /* read VU status word */
- unsigned_4 vu_stat;
- sim_read(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
- (void*) & vu_stat,
- sizeof(unsigned_4));
- /* XXX: check RC */
+ ASSERT(num > 0);
- /* check if VGW bit is clear, i.e., PATH1 is idle */
- /* simulator design implies PATH2 is always "idle" */
- if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 &&
- BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 &&
- 1 /* PATH2 always idle */)
- {
- /* VU idle */
- /* PATH1 idle */
- /* PATH2 idle */
- unsigned_4 vu_pc;
+ /* snapshot current pointers */
+ new_fifo_pc = me->fifo_pc;
+ new_qw_pc = me->qw_pc;
- /* flip DBF etc. for PKE1 */
- if(me->pke_number == 1)
- {
- PKE_REG_MASK_SET(me, DBF, DF,
- PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
- PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
- PKE_REG_MASK_SET(me, TOPS, TOPS,
- (PKE_REG_MASK_GET(me, BASE, BASE) +
- (PKE_REG_MASK_GET(me, DBF, DF) *
- PKE_REG_MASK_GET(me, OFST, OFFSET))));
- PKE_REG_MASK_SET(me, ITOP, ITOP,
- PKE_REG_MASK_GET(me, ITOPS, ITOPS));
- PKE_REG_MASK_SET(me, TOP, TOP,
- PKE_REG_MASK_GET(me, TOPS, TOPS));
- }
+ while(num > 0)
+ {
+ /* one word skipped */
+ num --;
- /* compute new PC */
- vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */
- /* write new PC; callback function gets VU running */
- sim_write(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
- (void*) & vu_pc,
- sizeof(unsigned_4));
- /* advance PC */
- pke_pc_advance(me, 1);
- }
- else
+ /* point to next word */
+ new_qw_pc ++;
+ if(new_qw_pc == 4)
{
- /* VU busy */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
+ new_qw_pc = 0;
+ new_fifo_pc ++;
}
- }
- else if(IS_PKE_CMD(cmd, STMASK))
- {
- /* check that FIFO has one more word for STMASK operand */
- unsigned_4* mask;
- mask = pke_pc_operand(me, 1);
- if(mask != NULL)
+ /* check for FIFO underflow */
+ if(me->fifo_num_elements == new_fifo_pc)
{
- /* "transferring" operand */
- PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
- /* fill the register */
- PKE_REG_MASK_SET(me, MASK, MASK, *mask);
- /* advance PC */
- pke_pc_advance(me, 2);
+ operand_fifo = NULL;
+ break;
}
- else
+
+ /* skip over DMA tag words if present in word 0 or 1 */
+ operand_fifo = & me->fifo[new_fifo_pc];
+ if(operand_fifo->dma_tag_present && (new_qw_pc < 2))
{
- /* need to wait for another word */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
+ /* mismatch error! */
+ PKE_REG_MASK_SET(me, STAT, ER0, 1);
+ /* skip by going around loop an extra time */
+ num ++;
}
}
- else if(IS_PKE_CMD(cmd, STROW))
- {
- /* check that FIFO has four more words for STROW operand */
- unsigned_4* last_op;
- last_op = pke_pc_operand(me, 4);
- if(last_op != NULL)
- {
- /* "transferring" operand */
- PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+ /* return pointer to operand word itself */
+ if(operand_fifo != NULL)
+ *operand = & operand_fifo->data[new_qw_pc];
- /* copy ROW registers: must all exist if 4th operand exists */
- me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1);
- me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2);
- me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3);
- me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4);
+ return operand_fifo;
+}
- /* advance PC */
- pke_pc_advance(me, 5);
- }
- else
- {
- /* need to wait for another word */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
- }
- }
- else if(IS_PKE_CMD(cmd, STCOL))
- {
- /* check that FIFO has four more words for STCOL operand */
- unsigned_4* last_op;
- last_op = pke_pc_operand(me, 4);
- if(last_op != NULL)
- {
- /* "transferring" operand */
- PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+/* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
+ If FIFO is not full enough, return 0. Skip over DMA tags, but mark
+ them as an error (ER0). */
- /* copy COL registers: must all exist if 4th operand exists */
- me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1);
- me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2);
- me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3);
- me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4);
+unsigned_4*
+pke_pc_operand(struct pke_device* me, int operand_num)
+{
+ unsigned_4* operand = NULL;
+ struct fifo_quadword* fifo_operand;
- /* advance PC */
- pke_pc_advance(me, 5);
- }
- else
- {
- /* need to wait for another word */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
- }
- }
- else if(IS_PKE_CMD(cmd, MPG))
- {
- unsigned_4* last_mpg_word;
+ fifo_operand = pke_pc_fifo(me, operand_num, & operand);
- /* map zero to max+1 */
- if(num==0) num=0x100;
+ if(fifo_operand == NULL)
+ ASSERT(operand == NULL); /* pke_pc_fifo() ought leave it untouched */
- /* check that FIFO has a few more words for MPG operand */
- last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */
- if(last_mpg_word != NULL)
- {
- /* perform implied FLUSHE */
- /* read VU status word */
- unsigned_4 vu_stat;
- sim_read(NULL,
- (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
- (void*) & vu_stat,
- sizeof(unsigned_4));
- /* XXX: check RC */
-
- /* check if VBS bit is clear, i.e., VU is idle */
- if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
- {
- /* VU idle */
- int i;
+ return operand;
+}
- /* "transferring" operand */
- PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
- /* transfer VU instructions, one word per iteration */
- for(i=0; i<num*2; i++)
- {
- address_word vu_addr_base, vu_addr;
- address_word vutrack_addr_base, vutrack_addr;
- struct fifo_quadword* fq = pke_pc_fifo(me, num);
- unsigned_4* operand = pke_pc_operand(me, num);
-
- /* imm: in 64-bit units for MPG instruction */
-
- /* XXX: set NUM */
-
- /* VU*_MEM0 : instruction memory */
- vu_addr_base = (me->pke_number == 0) ?
- VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START;
- vu_addr = vu_addr_base + (imm*2) + i;
-
- /* VU*_MEM0_TRACK : source-addr tracking table */
- vutrack_addr_base = (me->pke_number == 0) ?
- VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START;
- vutrack_addr = vu_addr_base + (imm*2) + i;
-
- /* write data into VU memory */
- pke_track_write(me, operand, sizeof(unsigned_4),
- vu_addr, fq->source_address);
-
- /* write srcaddr into VU srcaddr tracking table */
- sim_write(NULL,
- (SIM_ADDR) vutrack_addr,
- (void*) & fq->source_address,
- sizeof(unsigned_4));
- /* XXX: check RC */
- } /* VU xfer loop */
-
- /* advance PC */
- pke_pc_advance(me, 1 + num*2);
- }
- else
- {
- /* VU busy */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
- }
- } /* if FIFO full enough */
- else
- {
- /* need to wait for another word */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
- }
- }
- else if(IS_PKE_CMD(cmd, DIRECT) || IS_PKE_CMD(cmd, DIRECTHL)) /* treat identically */
- {
- /* check that FIFO has a few more words for DIRECT operand */
- unsigned_4* last_direct_word;
- /* map zero to max+1 */
- if(imm==0) imm=0x10000;
- last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */
- if(last_direct_word != NULL)
- {
- /* VU idle */
- int i;
- quadword fifo_data;
- /* "transferring" operand */
- PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
- /* transfer GPUIF quadwords, one word per iteration */
- for(i=0; i<imm*4; i++)
- {
- struct fifo_quadword* fq = pke_pc_fifo(me, num);
- unsigned_4* operand = pke_pc_operand(me, num);
-
- /* collect word into quadword */
- fifo_data[i%4] = *operand;
+/* Write a bunch of bytes into simulator memory. Store the given source address into the
+ PKE sourceaddr tracking word. */
+int
+pke_track_write(struct pke_device* me, const void* src, int len,
+ address_word dest, unsigned_4 sourceaddr)
+{
+ int rc;
+ unsigned_4 no_sourceaddr = 0;
- /* write to GPUIF FIFO only with full word */
- if(i%4 == 3)
- {
- address_word gpuif_fifo = GPUIF_PATH2_FIFO_ADDR+(i/4);
- pke_track_write(me, fifo_data, sizeof(quadword),
- (SIM_ADDR) gpuif_fifo, fq->source_address);
- /* XXX: check RC */
- } /* write collected quadword */
+ /* write srcaddr into PKE srcaddr tracking */
+ sim_write(NULL,
+ (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
+ (void*) & sourceaddr,
+ sizeof(unsigned_4));
+
+ /* write bytes into simulator */
+ rc = sim_write(NULL,
+ (SIM_ADDR) dest,
+ (void*) src,
+ len);
+
+ /* clear srcaddr from PKE srcaddr tracking */
+ sim_write(NULL,
+ (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
+ (void*) & no_sourceaddr,
+ sizeof(unsigned_4));
- } /* GPUIF xfer loop */
-
- /* advance PC */
- pke_pc_advance(me, 1 + imm*4);
- } /* if FIFO full enough */
- else
- {
- /* need to wait for another word */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
- }
+ return rc;
+}
+
+
+/* check for stall conditions on indicated devices (path* only on PKE1), do not change status
+ return 0 iff no stall */
+int
+pke_check_stall(struct pke_device* me, enum pke_check_target what)
+{
+ int any_stall = 0;
+
+ /* read VU status word - commonly used */
+ unsigned_4 vu_stat;
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
+ (void*) & vu_stat,
+ sizeof(unsigned_4));
+
+ /* perform checks */
+ if(what == chk_vu)
+ {
+ /* check if VBS bit is set, i.e., VU is busy */
+ if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 1)
+ any_stall = 1;
}
- else if(IS_PKE_CMD(cmd, UNPACK)) /* warning: monster complexity */
+ else if(what == chk_path1)
{
- short vn = BIT_MASK_GET(cmd, 2, 3);
- short vl = BIT_MASK_GET(cmd, 0, 1);
- short vnvl = BIT_MASK_GET(cmd, 0, 3);
- int m = BIT_MASK_GET(cmd, 4, 4);
- short cl = PKE_REG_MASK_GET(me, CYCLE, CL);
- short wl = PKE_REG_MASK_GET(me, CYCLE, WL);
- int n, num_operands;
- unsigned_4* last_operand_word;
-
- /* map zero to max+1 */
- if(num==0) num=0x100;
-
- /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
- if(wl <= cl)
- n = num;
- else
- n = cl * (num/wl) + PKE_LIMIT(num % wl, cl);
- num_operands = (((sizeof(unsigned_4) >> vl) * (vn+1) * n)/sizeof(unsigned_4));
+ /* only valid on PKE1 */
+ /* check if VGW bit is set, i.e., PATH1 is busy */
+ if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 1)
+ any_stall = 1;
+ }
+ else
+ {
+ ASSERT(0); /* XXX: not done yet */
+ }
- /* confirm that FIFO has enough words in it */
- last_operand_word = pke_pc_operand(me, num_operands);
- if(last_operand_word != NULL)
- {
- address_word vu_addr_base;
- int operand_num, vector_num;
+ /* any stall reasons? */
+ return any_stall;
+}
- /* "transferring" operand */
- PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
- /* XXX: don't check whether VU is idle?? */
+/* flip the DBF bit; recompute TOPS, ITOP & TOP */
+void
+pke_flip_dbf(struct pke_device* me)
+{
+ /* flip DBF */
+ PKE_REG_MASK_SET(me, DBF, DF,
+ PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
+ PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
+ /* compute new TOPS */
+ PKE_REG_MASK_SET(me, TOPS, TOPS,
+ (PKE_REG_MASK_GET(me, BASE, BASE) +
+ (PKE_REG_MASK_GET(me, DBF, DF) *
+ PKE_REG_MASK_GET(me, OFST, OFFSET))));
+ /* compute new ITOP and TOP */
+ PKE_REG_MASK_SET(me, ITOP, ITOP,
+ PKE_REG_MASK_GET(me, ITOPS, ITOPS));
+ PKE_REG_MASK_SET(me, TOP, TOP,
+ PKE_REG_MASK_GET(me, TOPS, TOPS));
+}
- if(me->pke_number == 0)
- vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
- else
- {
- vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
- if(BIT_MASK_GET(imm, 15, 15)) /* fetch R flag from imm word */
- vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS);
- }
- /* XXX: vu_addr overflow check */
- /* transfer given number of vectors */
- operand_num = 1; /* word index into instruction stream: 1..num_operands */
- vector_num = 0; /* vector number being processed: 0..num-1 */
- while(operand_num <= num_operands)
- {
- quadword vu_old_data;
- quadword vu_new_data;
- quadword unpacked_data;
- address_word vu_addr;
- struct fifo_quadword* fq;
- int i;
+/* PKEcode handler functions -- responsible for checking and
+ confirming old stall conditions, executing pkecode, updating PC and
+ status registers -- may assume being run on correct PKE unit */
+
+void
+pke_code_nop(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
- /* XXX: set NUM */
- /* compute VU destination address, as bytes in R5900 memory */
- if(cl >= wl)
- {
- /* map zero to max+1 */
- if(wl == 0) wl = 0x0100;
- vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl));
- }
- else
- vu_addr = vu_addr_base + 16*vector_num;
+void
+pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ /* copy immediate value into CYCLE reg */
+ me->regs[PKE_REG_CYCLE][0] = imm;
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
- /* read old VU data word at address */
- sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data));
- /* Let sourceaddr track the first operand */
- fq = pke_pc_fifo(me, operand_num);
+void
+pke_code_offset(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ /* copy 10 bits to OFFSET field */
+ PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9));
+ /* clear DBF bit */
+ PKE_REG_MASK_SET(me, DBF, DF, 0);
+ /* clear other DBF bit */
+ PKE_REG_MASK_SET(me, STAT, DBF, 0);
+ /* set TOPS = BASE */
+ PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
- /* For cyclic unpack, next operand quadword may come from instruction stream
- or be zero. */
- if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */
- {
- /* clear operand - used only in a "indeterminate" state */
- for(i = 0; i < 4; i++)
- unpacked_data[i] = 0;
- }
- else
- {
- /* compute unpacked words from instruction stream */
- switch(vnvl)
- {
- case PKE_UNPACK_S_32:
- case PKE_UNPACK_V2_32:
- case PKE_UNPACK_V3_32:
- case PKE_UNPACK_V4_32:
- /* copy (vn+1) 32-bit values */
- for(i = 0; i < vn+1; i++)
- {
- unsigned_4* operand = pke_pc_operand(me, operand_num);
- unpacked_data[i] = *operand;
- operand_num ++;
- }
- break;
-
- case PKE_UNPACK_S_16:
- case PKE_UNPACK_V2_16:
- case PKE_UNPACK_V3_16:
- case PKE_UNPACK_V4_16:
- /* copy (vn+1) 16-bit values, packed two-per-word */
- for(i=0; i<vn+1; i+=2)
- {
- unsigned_4* operand = pke_pc_operand(me, operand_num);
- unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 15, 31);
- unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 16, 31, 31);
- operand_num ++;
- }
- break;
-
- case PKE_UNPACK_S_8:
- case PKE_UNPACK_V2_8:
- case PKE_UNPACK_V3_8:
- case PKE_UNPACK_V4_8:
- /* copy (vn+1) 8-bit values, packed four-per-word */
- for(i=0; i<vn+1; i+=4)
- {
- unsigned_4* operand = pke_pc_operand(me, operand_num);
- unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 7, 31);
- unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 8, 15, 31);
- unpacked_data[i+2] = BIT_MASK_GET_SX(*operand, 16, 23, 31);
- unpacked_data[i+3] = BIT_MASK_GET_SX(*operand, 24, 31, 31);
- operand_num ++;
- }
- break;
-
- case PKE_UNPACK_V4_5:
- /* copy four 1/5/5/5-bit values, packed into a sixteen-bit */
- for(i=0; i<vn+1; i+=4)
- {
- unsigned_4* operand = pke_pc_operand(me, operand_num);
- unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 4, 31);
- unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 5, 9, 31);
- unpacked_data[i+2] = BIT_MASK_GET_SX(*operand, 10, 14, 31);
- unpacked_data[i+3] = BIT_MASK_GET_SX(*operand, 15, 15, 31);
- /* ignore other 16 bits in operand */
- operand_num ++;
- }
- break;
-
- default: /* bad UNPACK code */
- {
- /* XXX: how to handle? */
- /* set ER1 flag in STAT register */
- PKE_REG_MASK_SET(me, STAT, ER1, 1);
- }
- }
- }
-
- /* compute replacement word - function of vn, vl, mask */
- if(m) /* use mask register? */
- {
- /* compute index into mask register for this word */
- int mask_index = PKE_LIMIT(vector_num % wl, 3); /* wl != 0, set above */
- for(i=0; i<3; i++) /* loop over columns */
- {
- int mask_op = PKE_MASKREG_GET(me, mask_index, i);
- unsigned_4* masked_value = NULL;
- unsigned_4 zero = 0;
-
- switch(mask_op)
- {
- case PKE_MASKREG_INPUT:
- /* for vn == 0, all columns are copied from column 0 */
- if(vn == 0)
- masked_value = & unpacked_data[0];
- else if(i > vn)
- masked_value = & zero; /* XXX: what to put here? */
- else
- masked_value = & unpacked_data[i];
- break;
-
- case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
- masked_value = & me->regs[PKE_REG_R0 + i][0];
- break;
-
- case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
- masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0];
- break;
-
- case PKE_MASKREG_NOTHING:
- /* "write inhibit" by re-copying old data */
- masked_value = & vu_old_data[i];
- break;
-
- default:
- ASSERT(0);
- /* no other cases possible */
- }
-
- /* copy masked value for column */
- memcpy(& vu_new_data[i], masked_value, sizeof(unsigned_4));
- } /* loop over columns */
- }
- else
- {
- /* no mask - just copy over entire unpacked quadword */
- memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
- }
+void
+pke_code_base(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ /* copy 10 bits to BASE field */
+ PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9));
+ /* clear DBF bit */
+ PKE_REG_MASK_SET(me, DBF, DF, 0);
+ /* clear other DBF bit */
+ PKE_REG_MASK_SET(me, STAT, DBF, 0);
+ /* set TOPS = BASE */
+ PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
- /* process STMOD register for accumulation operations */
- switch(PKE_REG_MASK_GET(me, MODE, MDE))
- {
- case PKE_MODE_ADDROW: /* add row registers to output data */
- for(i=0; i<4; i++)
- /* exploit R0..R3 contiguity */
- vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
- break;
- case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */
- for(i=0; i<4; i++)
- {
- /* exploit R0..R3 contiguity */
- vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
- me->regs[PKE_REG_R0 + i][0] = vu_new_data[i];
- }
- break;
+void
+pke_code_itop(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ /* copy 10 bits to ITOPS field */
+ PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9));
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
- case PKE_MODE_INPUT: /* pass data through */
- default:
- ;
- }
- /* write replacement word */
- pke_track_write(me, vu_new_data, sizeof(vu_new_data),
- (SIM_ADDR) vu_addr, fq->source_address);
+void
+pke_code_stmod(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ /* copy 2 bits to MODE register */
+ PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2));
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
- /* next vector please */
- vector_num ++;
- } /* vector transfer loop */
- } /* PKE FIFO full enough */
- else
- {
- /* need to wait for another word */
- next_pps_state = PKE_REG_STAT_PPS_WAIT;
- /* retry this instruction next clock */
- }
+
+void
+pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode)
+{
+ ASSERT(0);
+ /* XXX: cannot handle this one yet */
+}
+
+
+void
+pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ /* copy 16 bits to MARK register */
+ PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15));
+ /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
+ PKE_REG_MASK_SET(me, STAT, MRK, 1);
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
+
+
+void
+pke_code_flushe(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* compute next PEW bit */
+ if(pke_check_stall(me, chk_vu))
+ {
+ /* VU busy */
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ /* try again next cycle */
}
- /* ... */
else
{
- /* set ER1 flag in STAT register */
- PKE_REG_MASK_SET(me, STAT, ER1, 1);
- /* advance over faulty word */
+ /* VU idle */
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
pke_pc_advance(me, 1);
}
-
- /* PKE is now idle or waiting */
- PKE_REG_MASK_SET(me, STAT, PPS, next_pps_state);
}
+void
+pke_code_flush(struct pke_device* me, unsigned_4 pkecode)
+{
+ int something_busy = 0;
+
+ /* compute next PEW, PGW bits */
+ if(pke_check_stall(me, chk_vu))
+ {
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
+ }
+ else
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
+ if(pke_check_stall(me, chk_path1) ||
+ pke_check_stall(me, chk_path2))
+ {
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PGW, 1);
+ }
+ else
+ PKE_REG_MASK_SET(me, STAT, PGW, 0);
+ /* go or no go */
+ if(something_busy)
+ {
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+ else
+ {
+ /* all idle */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
+}
-/* advance the PC by given number of words; update STAT/FQC field */
void
-pke_pc_advance(struct pke_device* me, int num_words)
+pke_code_flusha(struct pke_device* me, unsigned_4 pkecode)
{
- ASSERT(num_words > 0);
+ int something_busy = 0;
- me->qw_pc += num_words;
- /* handle overflow */
- while(me->qw_pc >= 4)
+ /* compute next PEW, PGW bits */
+ if(pke_check_stall(me, chk_vu))
{
- me->qw_pc -= 4;
- me->fifo_pc ++;
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
}
+ else
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
- /* clear FQC if FIFO is now empty */
- if(me->fifo_num_elements == me->fifo_pc)
+
+ if(pke_check_stall(me, chk_path1) ||
+ pke_check_stall(me, chk_path2) ||
+ pke_check_stall(me, chk_path3))
{
- PKE_REG_MASK_SET(me, STAT, FQC, 0);
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PGW, 1);
}
+ else
+ PKE_REG_MASK_SET(me, STAT, PGW, 0);
+ if(something_busy)
+ {
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+ else
+ {
+ /* all idle */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
}
+void
+pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* compute next PEW bit */
+ if(pke_check_stall(me, chk_vu))
+ {
+ /* VU busy */
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ /* try again next cycle */
+ }
+ else
+ {
+ unsigned_4 vu_pc;
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* VU idle */
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
+
+ /* flip DBF on PKE1 */
+ if(me->pke_number == 1)
+ pke_flip_dbf(me);
+
+ /* compute new PC for VU */
+ vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */
+ /* write new PC; callback function gets VU running */
+ sim_write(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
+ (void*) & vu_pc,
+ sizeof(unsigned_4));
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
+}
-/* Return pointer to given operand# in FIFO. `word_num' starts at 1.
- If FIFO is not full enough, return 0. */
-unsigned_4*
-pke_pc_operand(struct pke_device* me, int word_num)
+
+void
+pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode)
{
- int new_qw_pc = 0;
- int new_fifo_pc;
- unsigned_4* operand;
+ /* compute next PEW bit */
+ if(pke_check_stall(me, chk_vu))
+ {
+ /* VU busy */
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ /* try again next cycle */
+ }
+ else
+ {
+ unsigned_4 vu_pc;
- ASSERT(word_num > 0);
+ /* VU idle */
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
- new_fifo_pc = me->fifo_pc;
- new_qw_pc += me->qw_pc + word_num;
+ /* flip DBF on PKE1 */
+ if(me->pke_number == 1)
+ pke_flip_dbf(me);
- /* handle overflow */
- while(new_qw_pc >= 4)
+ /* read old PC */
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
+ (void*) & vu_pc,
+ sizeof(unsigned_4));
+
+ /* rewrite new PC; callback function gets VU running */
+ sim_write(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
+ (void*) & vu_pc,
+ sizeof(unsigned_4));
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
+}
+
+
+void
+pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode)
+{
+ int something_busy = 0;
+
+ /* compute next PEW, PGW bits */
+ if(pke_check_stall(me, chk_vu))
{
- new_qw_pc -= 4;
- new_fifo_pc ++;
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
}
+ else
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
- /* not enough elements */
- if(me->fifo_num_elements == me->fifo_pc)
- operand = NULL;
+
+ if(pke_check_stall(me, chk_path1) ||
+ pke_check_stall(me, chk_path2) ||
+ pke_check_stall(me, chk_path3))
+ {
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PGW, 1);
+ }
else
- operand = & me->fifo[new_fifo_pc].data[new_qw_pc];
+ PKE_REG_MASK_SET(me, STAT, PGW, 0);
- return operand;
+ /* go or no go */
+ if(something_busy)
+ {
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+ else
+ {
+ unsigned_4 vu_pc;
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* flip DBF on PKE1 */
+ if(me->pke_number == 1)
+ pke_flip_dbf(me);
+
+ /* compute new PC for VU */
+ vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */
+ /* write new PC; callback function gets VU running */
+ sim_write(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
+ (void*) & vu_pc,
+ sizeof(unsigned_4));
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
}
+void
+pke_code_stmask(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* check that FIFO has one more word for STMASK operand */
+ unsigned_4* mask;
+
+ mask = pke_pc_operand(me, 1);
+ if(mask != NULL)
+ {
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+ /* fill the register */
+ PKE_REG_MASK_SET(me, MASK, MASK, *mask);
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+}
-/* Return pointer to FIFO quadword containing given operand# in FIFO.
- `word_num' starts at 1. If FIFO is not full enough, return 0. */
-struct fifo_quadword*
-pke_pc_fifo(struct pke_device* me, int word_num)
+void
+pke_code_strow(struct pke_device* me, unsigned_4 pkecode)
{
- int new_qw_pc = 0;
- int new_fifo_pc;
- struct fifo_quadword* operand;
-
- ASSERT(word_num > 0);
+ /* check that FIFO has four more words for STROW operand */
+ unsigned_4* last_op;
+
+ last_op = pke_pc_operand(me, 4);
+ if(last_op != NULL)
+ {
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* copy ROW registers: must all exist if 4th operand exists */
+ me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1);
+ me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2);
+ me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3);
+ me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4);
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 5);
+ }
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+}
- new_fifo_pc = me->fifo_pc;
- new_qw_pc += me->qw_pc + word_num;
- /* handle overflow */
- while(new_qw_pc >= 4)
+void
+pke_code_stcol(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* check that FIFO has four more words for STCOL operand */
+ unsigned_4* last_op;
+
+ last_op = pke_pc_operand(me, 4);
+ if(last_op != NULL)
{
- new_qw_pc -= 4;
- new_fifo_pc ++;
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* copy COL registers: must all exist if 4th operand exists */
+ me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1);
+ me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2);
+ me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3);
+ me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4);
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 5);
}
-
- /* not enough elements */
- if(me->fifo_num_elements == me->fifo_pc)
- operand = NULL;
else
- operand = & me->fifo[new_fifo_pc];
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+}
- return operand;
+
+void
+pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
+{
+ unsigned_4* last_mpg_word;
+ int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* map zero to max+1 */
+ if(num==0) num=0x100;
+
+ /* check that FIFO has a few more words for MPG operand */
+ last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */
+ if(last_mpg_word != NULL)
+ {
+ /* perform implied FLUSHE */
+ /* read VU status word */
+ unsigned_4 vu_stat;
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
+ (void*) & vu_stat,
+ sizeof(unsigned_4));
+
+ /* check if VBS bit is clear, i.e., VU is idle */
+ if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
+ {
+ /* VU idle */
+ int i;
+
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* transfer VU instructions, one word per iteration */
+ for(i=0; i<num*2; i++)
+ {
+ address_word vu_addr_base, vu_addr;
+ address_word vutrack_addr_base, vutrack_addr;
+ unsigned_4* operand;
+ struct fifo_quadword* fq = pke_pc_fifo(me, num, & operand);
+
+ /* imm: in 64-bit units for MPG instruction */
+
+ /* XXX: set NUM */
+
+ /* VU*_MEM0 : instruction memory */
+ vu_addr_base = (me->pke_number == 0) ?
+ VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START;
+ vu_addr = vu_addr_base + (imm*2) + i;
+
+ /* VU*_MEM0_TRACK : source-addr tracking table */
+ vutrack_addr_base = (me->pke_number == 0) ?
+ VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START;
+ vutrack_addr = vu_addr_base + (imm*2) + i;
+
+ /* write data into VU memory */
+ pke_track_write(me, operand, sizeof(unsigned_4),
+ vu_addr, fq->source_address);
+
+ /* write srcaddr into VU srcaddr tracking table */
+ sim_write(NULL,
+ (SIM_ADDR) vutrack_addr,
+ (void*) & fq->source_address,
+ sizeof(unsigned_4));
+ } /* VU xfer loop */
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1 + num*2);
+ }
+ else
+ {
+ /* VU busy */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ /* retry this instruction next clock */
+ }
+ } /* if FIFO full enough */
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* retry this instruction next clock */
+ }
}
+void
+pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* check that FIFO has a few more words for DIRECT operand */
+ unsigned_4* last_direct_word;
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
+
+ /* map zero to max+1 */
+ if(imm==0) imm=0x10000;
+
+ last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */
+ if(last_direct_word != NULL)
+ {
+ /* VU idle */
+ int i;
+ quadword fifo_data;
+
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* transfer GPUIF quadwords, one word per iteration */
+ for(i=0; i<imm*4; i++)
+ {
+ unsigned_4* operand;
+ struct fifo_quadword* fq = pke_pc_fifo(me, num, &operand);
+
+ /* collect word into quadword */
+ fifo_data[i%4] = *operand;
+
+ /* write to GPUIF FIFO only with full word */
+ if(i%4 == 3)
+ {
+ address_word gpuif_fifo = GPUIF_PATH2_FIFO_ADDR+(i/4);
+ pke_track_write(me, fifo_data, sizeof(quadword),
+ (SIM_ADDR) gpuif_fifo, fq->source_address);
+ } /* write collected quadword */
+
+ } /* GPUIF xfer loop */
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1 + imm*4);
+ } /* if FIFO full enough */
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* retry this instruction next clock */
+ }
+}
-/* Write a bunch of bytes into simulator memory. Store the given source address into the
- PKE sourceaddr tracking word. */
-int
-pke_track_write(struct pke_device* me, const void* src, int len,
- address_word dest, unsigned_4 sourceaddr)
+
+void
+pke_code_directhl(struct pke_device* me, unsigned_4 pkecode)
{
- int rc;
- unsigned_4 no_sourceaddr = 0;
+ /* treat the same as DIRECTH */
+ pke_code_direct(me, pkecode);
+}
- /* write srcaddr into PKE srcaddr tracking */
- sim_write(NULL,
- (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
- (void*) & sourceaddr,
- sizeof(unsigned_4));
+
+void
+pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
+ int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
+
+ short vn = BIT_MASK_GET(cmd, 2, 3);
+ short vl = BIT_MASK_GET(cmd, 0, 1);
+ short vnvl = BIT_MASK_GET(cmd, 0, 3);
+ int m = BIT_MASK_GET(cmd, 4, 4);
+ short cl = PKE_REG_MASK_GET(me, CYCLE, CL);
+ short wl = PKE_REG_MASK_GET(me, CYCLE, WL);
+ int n, num_operands;
+ unsigned_4* last_operand_word;
- /* write bytes into simulator */
- rc = sim_write(NULL,
- (SIM_ADDR) dest,
- (void*) src,
- len);
+ /* map zero to max+1 */
+ if(num==0) num=0x100;
- /* clear srcaddr from PKE srcaddr tracking */
- sim_write(NULL,
- (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
- (void*) & no_sourceaddr,
- sizeof(unsigned_4));
+ /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
+ if(wl <= cl)
+ n = num;
+ else
+ n = cl * (num/wl) + PKE_LIMIT(num % wl, cl);
+ num_operands = (((sizeof(unsigned_4) >> vl) * (vn+1) * n)/sizeof(unsigned_4));
+
+ /* confirm that FIFO has enough words in it */
+ last_operand_word = pke_pc_operand(me, num_operands);
+ if(last_operand_word != NULL)
+ {
+ address_word vu_addr_base;
+ int operand_num, vector_num;
+
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* don't check whether VU is idle */
+
+ if(me->pke_number == 0)
+ vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
+ else
+ {
+ vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
+ if(BIT_MASK_GET(imm, 15, 15)) /* fetch R flag from imm word */
+ vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS);
+ }
+
+ /* XXX: vu_addr overflow check */
+
+ /* transfer given number of vectors */
+ operand_num = 1; /* word index into instruction stream: 1..num_operands */
+ vector_num = 0; /* vector number being processed: 0..num-1 */
+ while(operand_num <= num_operands)
+ {
+ quadword vu_old_data;
+ quadword vu_new_data;
+ quadword unpacked_data;
+ address_word vu_addr;
+ struct fifo_quadword* fq;
+ int i;
+
+ /* XXX: set NUM */
+
+ /* compute VU destination address, as bytes in R5900 memory */
+ if(cl >= wl)
+ {
+ /* map zero to max+1 */
+ if(wl == 0) wl = 0x0100;
+ vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl));
+ }
+ else
+ vu_addr = vu_addr_base + 16*vector_num;
+
+ /* read old VU data word at address */
+ sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data));
+
+ /* Let sourceaddr track the first operand */
+ fq = pke_pc_fifo(me, operand_num, NULL);
+
+ /* For cyclic unpack, next operand quadword may come from instruction stream
+ or be zero. */
+ if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */
+ {
+ /* clear operand - used only in a "indeterminate" state */
+ for(i = 0; i < 4; i++)
+ unpacked_data[i] = 0;
+ }
+ else
+ {
+ /* compute unpacked words from instruction stream */
+ switch(vnvl)
+ {
+ case PKE_UNPACK_S_32:
+ case PKE_UNPACK_V2_32:
+ case PKE_UNPACK_V3_32:
+ case PKE_UNPACK_V4_32:
+ /* copy (vn+1) 32-bit values */
+ for(i = 0; i < vn+1; i++)
+ {
+ unsigned_4* operand = pke_pc_operand(me, operand_num);
+ unpacked_data[i] = *operand;
+ operand_num ++;
+ }
+ break;
+
+ case PKE_UNPACK_S_16:
+ case PKE_UNPACK_V2_16:
+ case PKE_UNPACK_V3_16:
+ case PKE_UNPACK_V4_16:
+ /* copy (vn+1) 16-bit values, packed two-per-word */
+ for(i=0; i<vn+1; i+=2)
+ {
+ unsigned_4* operand = pke_pc_operand(me, operand_num);
+ unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 15, 31);
+ unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 16, 31, 31);
+ operand_num ++;
+ }
+ break;
+
+ case PKE_UNPACK_S_8:
+ case PKE_UNPACK_V2_8:
+ case PKE_UNPACK_V3_8:
+ case PKE_UNPACK_V4_8:
+ /* copy (vn+1) 8-bit values, packed four-per-word */
+ for(i=0; i<vn+1; i+=4)
+ {
+ unsigned_4* operand = pke_pc_operand(me, operand_num);
+ unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 7, 31);
+ unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 8, 15, 31);
+ unpacked_data[i+2] = BIT_MASK_GET_SX(*operand, 16, 23, 31);
+ unpacked_data[i+3] = BIT_MASK_GET_SX(*operand, 24, 31, 31);
+ operand_num ++;
+ }
+ break;
+
+ case PKE_UNPACK_V4_5:
+ /* copy four 1/5/5/5-bit values, packed into a sixteen-bit */
+ for(i=0; i<vn+1; i+=4)
+ {
+ unsigned_4* operand = pke_pc_operand(me, operand_num);
+ unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 4, 31);
+ unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 5, 9, 31);
+ unpacked_data[i+2] = BIT_MASK_GET_SX(*operand, 10, 14, 31);
+ unpacked_data[i+3] = BIT_MASK_GET_SX(*operand, 15, 15, 31);
+ operand_num ++;
+ }
+ break;
- return rc;
+ /* XXX: handle multiple rows of data in same word */
+ /* clue: increment operand_num less frequently */
+
+ default: /* bad UNPACK code */
+ {
+ /* treat as illegal instruction */
+ pke_code_error(me, pkecode);
+ return;
+ }
+ }
+ }
+
+ /* compute replacement word - function of vn, vl, mask */
+ if(m) /* use mask register? */
+ {
+ /* compute index into mask register for this word */
+ int mask_index = PKE_LIMIT(vector_num % wl, 3); /* wl != 0, set above */
+
+ for(i=0; i<3; i++) /* loop over columns */
+ {
+ int mask_op = PKE_MASKREG_GET(me, mask_index, i);
+ unsigned_4* masked_value = NULL;
+ unsigned_4 zero = 0;
+
+ switch(mask_op)
+ {
+ case PKE_MASKREG_INPUT:
+ /* for vn == 0, all columns are copied from column 0 */
+ if(vn == 0)
+ masked_value = & unpacked_data[0];
+ else if(i > vn)
+ masked_value = & zero; /* arbitrary data: undefined in spec */
+ else
+ masked_value = & unpacked_data[i];
+ break;
+
+ case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
+ masked_value = & me->regs[PKE_REG_R0 + i][0];
+ break;
+
+ case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
+ masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0];
+ break;
+
+ case PKE_MASKREG_NOTHING:
+ /* "write inhibit" by re-copying old data */
+ masked_value = & vu_old_data[i];
+ break;
+
+ default:
+ ASSERT(0);
+ /* no other cases possible */
+ }
+
+ /* copy masked value for column */
+ memcpy(& vu_new_data[i], masked_value, sizeof(unsigned_4));
+ } /* loop over columns */
+ }
+ else
+ {
+ /* no mask - just copy over entire unpacked quadword */
+ memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
+ }
+
+ /* process STMOD register for accumulation operations */
+ switch(PKE_REG_MASK_GET(me, MODE, MDE))
+ {
+ case PKE_MODE_ADDROW: /* add row registers to output data */
+ for(i=0; i<4; i++)
+ /* exploit R0..R3 contiguity */
+ vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
+ break;
+
+ case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */
+ for(i=0; i<4; i++)
+ {
+ /* exploit R0..R3 contiguity */
+ vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
+ me->regs[PKE_REG_R0 + i][0] = vu_new_data[i];
+ }
+ break;
+
+ case PKE_MODE_INPUT: /* pass data through */
+ default:
+ ;
+ }
+
+ /* write replacement word */
+ pke_track_write(me, vu_new_data, sizeof(vu_new_data),
+ (SIM_ADDR) vu_addr, fq->source_address);
+
+ /* next vector please */
+ vector_num ++;
+ } /* vector transfer loop */
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, num_operands);
+ } /* PKE FIFO full enough */
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* retry this instruction next clock */
+ }
+}
+
+
+void
+pke_code_error(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* set ER1 flag in STAT register */
+ PKE_REG_MASK_SET(me, STAT, ER1, 1);
+ /* advance over faulty word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
}