From: Frank Ch. Eigler Date: Wed, 11 Feb 1998 19:42:15 +0000 (+0000) Subject: - PKE simulation code almost complete. Still missing: X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=43a6998b412a4f30974081b19771da32c4375ea1;p=binutils-gdb.git - PKE simulation code almost complete. Still missing: * handling of super duper packed UNPACK arguments * skipping of in-progress instruction on break/stop * interrupt generation to 5900 * PATH2/PATH3 status checking & masking * ability to write to FIFO one word (instead of quadword) at a time --- diff --git a/sim/mips/sky-pke.c b/sim/mips/sky-pke.c index f46873bfc7a..eb64cd32c7b 100644 --- a/sim/mips/sky-pke.c +++ b/sim/mips/sky-pke.c @@ -1,5 +1,8 @@ /* Copyright (C) 1998, Cygnus Solutions */ +/* Debugguing PKE? */ +#define PKE_DEBUG + #include #include "sky-pke.h" #include "sky-dma.h" @@ -8,6 +11,7 @@ #include "sky-vu1.h" #include "sky-gpuif.h" + /* Imported functions */ void device_error (device *me, char* message); /* device.c */ @@ -21,11 +25,38 @@ static int pke_io_write_buffer(device*, const void*, int, address_word, unsigned, sim_cpu*, sim_cia); static void pke_issue(struct pke_device*); static void pke_pc_advance(struct pke_device*, int num_words); -static unsigned_4* pke_pc_operand(struct pke_device*, int word_num); -static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int word_num); +static unsigned_4* pke_pc_operand(struct pke_device*, int operand_num); +static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int operand_num, + unsigned_4** operand); static int pke_track_write(struct pke_device*, const void* src, int len, address_word dest, unsigned_4 sourceaddr); static void pke_attach(SIM_DESC sd, struct pke_device* me); +enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 }; +static int pke_check_stall(struct pke_device* me, enum pke_check_target what); +static void pke_flip_dbf(struct pke_device* me); +/* PKEcode handlers */ +static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_base(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_error(struct pke_device* me, unsigned_4 pkecode); @@ -292,26 +323,8 @@ pke_io_write_buffer(device *me_, switch(reg_num) { case PKE_REG_FBRST: - /* XXX: order of evaluation? STP && STC ?? */ - if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */ - { - /* clear FIFO: also prevents re-execution attempt of - possible stalled instruction */ - me->fifo_num_elements = me->fifo_pc; - /* clear registers */ - memset(me->regs, 0, sizeof(me->regs)); - me->flags = 0; - me->qw_pc = 0; - } - if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */ - { - PKE_REG_MASK_SET(me, STAT, PFS, 1); - } - if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */ - { - /* XXX: how to safely abort "currently executing" (=> stalled) instruction? */ - PKE_REG_MASK_SET(me, STAT, PSS, 1); - } + /* Order these tests from least to most overriding, in case + multiple bits are set. */ if(BIT_MASK_GET(input[0], 2, 2)) /* STC bit */ { /* clear a bunch of status bits */ @@ -323,6 +336,26 @@ pke_io_write_buffer(device *me_, PKE_REG_MASK_SET(me, STAT, ER1, 0); /* will allow resumption of possible stalled instruction */ } + if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */ + { + /* XXX: how to safely abort "currently executing" (=> stalled) instruction? */ + PKE_REG_MASK_SET(me, STAT, PSS, 1); + } + if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */ + { + PKE_REG_MASK_SET(me, STAT, PFS, 1); + } + if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */ + { + /* clear FIFO by skipping to word after PC: also + prevents re-execution attempt of possible stalled + instruction */ + me->fifo_num_elements = me->fifo_pc; + /* clear registers */ + memset(me->regs, 0, sizeof(me->regs)); + me->flags = 0; + me->qw_pc = 0; + } break; case PKE_REG_ERR: @@ -424,7 +457,6 @@ pke_io_write_buffer(device *me_, (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_PKTFLAG : DMA_CHANNEL1_PKTFLAG), (void*) & fqw->dma_tag_present, sizeof(unsigned_4)); - /* XXX: check RC */ me->fifo_num_elements++; @@ -450,20 +482,21 @@ pke_issue(struct pke_device* me) unsigned_4 fw; unsigned_4 cmd, intr, num; unsigned_4 imm; - int next_pps_state; /* PPS after this instruction issue attempt */ /* 1 -- test go / no-go for PKE execution */ /* check for stall/halt control bits */ - /* XXX: What is the PEW bit for? */ - if(PKE_REG_MASK_GET(me, STAT, PSS) || + if(PKE_REG_MASK_GET(me, STAT, PSS) || /* XXX: PSS may be a special case */ PKE_REG_MASK_GET(me, STAT, PFS) || + /* PEW bit not a reason to keep stalling - it's re-checked below */ + /* PGW bit not a reason to keep stalling - it's re-checked below */ /* maskable stall controls: ER0, ER1, PIS */ (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) || (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) || (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII))) { - /* XXX */ + /* try again next cycle; no state change */ + return; } /* XXX: handle PSS by *skipping* instruction? */ @@ -474,21 +507,14 @@ pke_issue(struct pke_device* me) /* 2 -- fetch PKE instruction */ - /* "fetch" instruction quadword */ - fqw = & me->fifo[me->fifo_pc]; + /* skip over DMA tag, if present */ + pke_pc_advance(me, 0); - /* skip over DMA tags, if present */ - if((fqw->dma_tag_present != 0) && (me->qw_pc < 2)) - { - ASSERT(me->qw_pc == 0); - /* XXX: check validity of DMA tag; if bad, set ER0 flag */ - me->qw_pc = 2; - } - - /* "fetch" instruction word */ + /* "fetch" instruction quadword and word */ + fqw = & me->fifo[me->fifo_pc]; fw = fqw->data[me->qw_pc]; - /* store it in PKECODE register */ + /* store word in PKECODE register */ me->regs[PKE_REG_CODE][0] = fw; @@ -510,894 +536,1106 @@ pke_issue(struct pke_device* me) /* decoding */ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE); - next_pps_state = PKE_REG_STAT_PPS_IDLE; /* assume instruction completes */ - /* decode */ + /* decode & execute */ if(IS_PKE_CMD(cmd, PKENOP)) - { - /* no work required, yey */ - pke_pc_advance(me, 1); - } + pke_code_nop(me, fw); else if(IS_PKE_CMD(cmd, STCYCL)) - { - /* copy immediate value into CYCLE reg */ - me->regs[PKE_REG_CYCLE][0] = imm; - pke_pc_advance(me, 1); - } + pke_code_stcycl(me, fw); else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET)) - { - /* copy 10 bits to OFFSET field */ - PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9)); - /* clear DBF bit */ - PKE_REG_MASK_SET(me, DBF, DF, 0); - /* clear other DBF bit */ - PKE_REG_MASK_SET(me, STAT, DBF, 0); - /* set TOPS = BASE */ - PKE_REG_MASK_SET(me, TOPS, TOPS, - PKE_REG_MASK_GET(me, BASE, BASE)); - pke_pc_advance(me, 1); - } + pke_code_offset(me, fw); else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE)) - { - /* copy 10 bits to BASE field */ - PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9)); - /* clear DBF bit */ - PKE_REG_MASK_SET(me, DBF, DF, 0); - /* clear other DBF bit */ - PKE_REG_MASK_SET(me, STAT, DBF, 0); - /* set TOPS = BASE */ - PKE_REG_MASK_SET(me, TOPS, TOPS, - PKE_REG_MASK_GET(me, BASE, BASE)); - pke_pc_advance(me, 1); - } + pke_code_base(me, fw); else if(IS_PKE_CMD(cmd, ITOP)) - { - /* copy 10 bits to ITOPS field */ - PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9)); - pke_pc_advance(me, 1); - } + pke_code_itop(me, fw); else if(IS_PKE_CMD(cmd, STMOD)) - { - /* copy 2 bits to MODE register */ - PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2)); - pke_pc_advance(me, 1); - } - else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) /* MSKPATH3 */ - { - /* XXX: what to do with this? DMA control register? */ - pke_pc_advance(me, 1); - } + pke_code_stmod(me, fw); + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) + pke_code_mskpath3(me, fw); else if(IS_PKE_CMD(cmd, PKEMARK)) - { - /* copy 16 bits to MARK register */ - PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15)); - /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */ - PKE_REG_MASK_SET(me, STAT, MRK, 1); - pke_pc_advance(me, 1); - } + pke_code_pkemark(me, fw); else if(IS_PKE_CMD(cmd, FLUSHE)) - { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VBS bit is clear, i.e., VU is idle */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) - { - /* VU idle */ - /* advance PC */ - pke_pc_advance(me, 1); - } - else - { - /* VU busy */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } + pke_code_flushe(me, fw); else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH)) + pke_code_flush(me, fw); + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA)) + pke_code_flusha(me, fw); + else if(IS_PKE_CMD(cmd, PKEMSCAL)) + pke_code_pkemscal(me, fw); + else if(IS_PKE_CMD(cmd, PKEMSCNT)) + pke_code_pkemscnt(me, fw); + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF)) + pke_code_pkemscalf(me, fw); + else if(IS_PKE_CMD(cmd, STMASK)) + pke_code_stmask(me, fw); + else if(IS_PKE_CMD(cmd, STROW)) + pke_code_strow(me, fw); + else if(IS_PKE_CMD(cmd, STCOL)) + pke_code_stcol(me, fw); + else if(IS_PKE_CMD(cmd, MPG)) + pke_code_mpg(me, fw); + else if(IS_PKE_CMD(cmd, DIRECT)) + pke_code_direct(me, fw); + else if(IS_PKE_CMD(cmd, DIRECTHL)) + pke_code_directhl(me, fw); + else if(IS_PKE_CMD(cmd, UNPACK)) + pke_code_unpack(me, fw); + /* ... other commands ... */ + else + pke_code_error(me, fw); +} + + + +/* advance the PC by given number of data words; update STAT/FQC + field; assume FIFO is filled enough */ + +void +pke_pc_advance(struct pke_device* me, int num_words) +{ + int num = num_words; + ASSERT(num_words > 0); + + while(num > 0) { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ + struct fifo_quadword* fq; + + /* one word skipped */ + num --; - /* check if VGW bit is clear, i.e., PATH1 is idle */ - /* simulator design implies PATH2 is always "idle" */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && - BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && - 1 /* PATH2 always idle */) + /* point to next word */ + me->qw_pc ++; + if(me->qw_pc == 4) { - /* VU idle */ - /* PATH1 idle */ - /* PATH2 idle */ - /* advance PC */ - pke_pc_advance(me, 1); + me->qw_pc = 0; + me->fifo_pc ++; } - else + + /* skip over DMA tag words if present in word 0 or 1 */ + fq = & me->fifo[me->fifo_pc]; + if(fq->dma_tag_present && (me->qw_pc < 2)) { - /* GPUIF busy */ - /* retry this instruction next clock */ + /* skip by going around loop an extra time */ + num ++; } } - else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA)) + + /* clear FQC if FIFO is now empty */ + if(me->fifo_num_elements == me->fifo_pc) { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VGW bit is clear, i.e., PATH1 is idle */ - /* simulator design implies PATH2 is always "idle" */ - /* XXX: simulator design implies PATH3 is always "idle" */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && - BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && - 1 /* PATH2 always idle */ && - 1 /* PATH3 always idle */) - { - /* VU idle */ - /* PATH1 idle */ - /* PATH2 idle */ - /* PATH3 idle */ - /* advance PC */ - pke_pc_advance(me, 1); - } - else - { - /* GPUIF busy */ - /* retry this instruction next clock */ - } + PKE_REG_MASK_SET(me, STAT, FQC, 0); } - else if(IS_PKE_CMD(cmd, PKEMSCAL)) - { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VBS bit is clear, i.e., VU is idle */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) - { - /* VU idle */ - unsigned_4 vu_pc; +} - /* perform PKE1-unique processing for microprogram calls */ - if(me->pke_number == 1) - { - /* flip DBF */ - PKE_REG_MASK_SET(me, DBF, DF, - PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); - PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); - /* compute new TOPS */ - PKE_REG_MASK_SET(me, TOPS, TOPS, - (PKE_REG_MASK_GET(me, BASE, BASE) + - (PKE_REG_MASK_GET(me, DBF, DF) * - PKE_REG_MASK_GET(me, OFST, OFFSET)))); - /* compute new ITOP and TOP */ - PKE_REG_MASK_SET(me, ITOP, ITOP, - PKE_REG_MASK_GET(me, ITOPS, ITOPS)); - PKE_REG_MASK_SET(me, TOP, TOP, - PKE_REG_MASK_GET(me, TOPS, TOPS)); - } - /* compute new PC */ - vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */ - /* write new PC; callback function gets VU running */ - sim_write(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), - (void*) & vu_pc, - sizeof(unsigned_4)); - /* advance PC */ - pke_pc_advance(me, 1); - } - else - { - /* VU busy */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } - else if(IS_PKE_CMD(cmd, PKEMSCNT)) - { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - /* check if VBS bit is clear, i.e., VU is idle */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) - { - /* VU idle */ - unsigned_4 vu_pc; +/* Return pointer to FIFO quadword containing given operand# in FIFO. + `operand_num' starts at 1. Return pointer to operand word in last + argument, if non-NULL. If FIFO is not full enough, return 0. + Signal an ER0 indication upon skipping a DMA tag. */ - /* flip DBF etc. for PKE1 */ - if(me->pke_number == 1) - { - PKE_REG_MASK_SET(me, DBF, DF, - PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); - PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); - PKE_REG_MASK_SET(me, TOPS, TOPS, - (PKE_REG_MASK_GET(me, BASE, BASE) + - (PKE_REG_MASK_GET(me, DBF, DF) * - PKE_REG_MASK_GET(me, OFST, OFFSET)))); - PKE_REG_MASK_SET(me, ITOP, ITOP, - PKE_REG_MASK_GET(me, ITOPS, ITOPS)); - PKE_REG_MASK_SET(me, TOP, TOP, - PKE_REG_MASK_GET(me, TOPS, TOPS)); - } +struct fifo_quadword* +pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand) +{ + int num = operand_num; + int new_qw_pc, new_fifo_pc; + struct fifo_quadword* operand_fifo; - /* read old PC */ - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), - (void*) & vu_pc, - sizeof(unsigned_4)); - /* rewrite its PC; callback function gets VU running */ - sim_write(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), - (void*) & vu_pc, - sizeof(unsigned_4)); - /* advance PC */ - pke_pc_advance(me, 1); - } - else - { - /* VU busy */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } - else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF)) - { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ + ASSERT(num > 0); - /* check if VGW bit is clear, i.e., PATH1 is idle */ - /* simulator design implies PATH2 is always "idle" */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && - BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && - 1 /* PATH2 always idle */) - { - /* VU idle */ - /* PATH1 idle */ - /* PATH2 idle */ - unsigned_4 vu_pc; + /* snapshot current pointers */ + new_fifo_pc = me->fifo_pc; + new_qw_pc = me->qw_pc; - /* flip DBF etc. for PKE1 */ - if(me->pke_number == 1) - { - PKE_REG_MASK_SET(me, DBF, DF, - PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); - PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); - PKE_REG_MASK_SET(me, TOPS, TOPS, - (PKE_REG_MASK_GET(me, BASE, BASE) + - (PKE_REG_MASK_GET(me, DBF, DF) * - PKE_REG_MASK_GET(me, OFST, OFFSET)))); - PKE_REG_MASK_SET(me, ITOP, ITOP, - PKE_REG_MASK_GET(me, ITOPS, ITOPS)); - PKE_REG_MASK_SET(me, TOP, TOP, - PKE_REG_MASK_GET(me, TOPS, TOPS)); - } + while(num > 0) + { + /* one word skipped */ + num --; - /* compute new PC */ - vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */ - /* write new PC; callback function gets VU running */ - sim_write(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), - (void*) & vu_pc, - sizeof(unsigned_4)); - /* advance PC */ - pke_pc_advance(me, 1); - } - else + /* point to next word */ + new_qw_pc ++; + if(new_qw_pc == 4) { - /* VU busy */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ + new_qw_pc = 0; + new_fifo_pc ++; } - } - else if(IS_PKE_CMD(cmd, STMASK)) - { - /* check that FIFO has one more word for STMASK operand */ - unsigned_4* mask; - mask = pke_pc_operand(me, 1); - if(mask != NULL) + /* check for FIFO underflow */ + if(me->fifo_num_elements == new_fifo_pc) { - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); - /* fill the register */ - PKE_REG_MASK_SET(me, MASK, MASK, *mask); - /* advance PC */ - pke_pc_advance(me, 2); + operand_fifo = NULL; + break; } - else + + /* skip over DMA tag words if present in word 0 or 1 */ + operand_fifo = & me->fifo[new_fifo_pc]; + if(operand_fifo->dma_tag_present && (new_qw_pc < 2)) { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ + /* mismatch error! */ + PKE_REG_MASK_SET(me, STAT, ER0, 1); + /* skip by going around loop an extra time */ + num ++; } } - else if(IS_PKE_CMD(cmd, STROW)) - { - /* check that FIFO has four more words for STROW operand */ - unsigned_4* last_op; - last_op = pke_pc_operand(me, 4); - if(last_op != NULL) - { - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + /* return pointer to operand word itself */ + if(operand_fifo != NULL) + *operand = & operand_fifo->data[new_qw_pc]; - /* copy ROW registers: must all exist if 4th operand exists */ - me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1); - me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2); - me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3); - me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4); + return operand_fifo; +} - /* advance PC */ - pke_pc_advance(me, 5); - } - else - { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } - else if(IS_PKE_CMD(cmd, STCOL)) - { - /* check that FIFO has four more words for STCOL operand */ - unsigned_4* last_op; - last_op = pke_pc_operand(me, 4); - if(last_op != NULL) - { - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); +/* Return pointer to given operand# in FIFO. `operand_num' starts at 1. + If FIFO is not full enough, return 0. Skip over DMA tags, but mark + them as an error (ER0). */ - /* copy COL registers: must all exist if 4th operand exists */ - me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1); - me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2); - me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3); - me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4); +unsigned_4* +pke_pc_operand(struct pke_device* me, int operand_num) +{ + unsigned_4* operand = NULL; + struct fifo_quadword* fifo_operand; - /* advance PC */ - pke_pc_advance(me, 5); - } - else - { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } - else if(IS_PKE_CMD(cmd, MPG)) - { - unsigned_4* last_mpg_word; + fifo_operand = pke_pc_fifo(me, operand_num, & operand); - /* map zero to max+1 */ - if(num==0) num=0x100; + if(fifo_operand == NULL) + ASSERT(operand == NULL); /* pke_pc_fifo() ought leave it untouched */ - /* check that FIFO has a few more words for MPG operand */ - last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */ - if(last_mpg_word != NULL) - { - /* perform implied FLUSHE */ - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VBS bit is clear, i.e., VU is idle */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) - { - /* VU idle */ - int i; + return operand; +} - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); - /* transfer VU instructions, one word per iteration */ - for(i=0; ipke_number == 0) ? - VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START; - vu_addr = vu_addr_base + (imm*2) + i; - - /* VU*_MEM0_TRACK : source-addr tracking table */ - vutrack_addr_base = (me->pke_number == 0) ? - VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START; - vutrack_addr = vu_addr_base + (imm*2) + i; - - /* write data into VU memory */ - pke_track_write(me, operand, sizeof(unsigned_4), - vu_addr, fq->source_address); - - /* write srcaddr into VU srcaddr tracking table */ - sim_write(NULL, - (SIM_ADDR) vutrack_addr, - (void*) & fq->source_address, - sizeof(unsigned_4)); - /* XXX: check RC */ - } /* VU xfer loop */ - - /* advance PC */ - pke_pc_advance(me, 1 + num*2); - } - else - { - /* VU busy */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } /* if FIFO full enough */ - else - { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } - else if(IS_PKE_CMD(cmd, DIRECT) || IS_PKE_CMD(cmd, DIRECTHL)) /* treat identically */ - { - /* check that FIFO has a few more words for DIRECT operand */ - unsigned_4* last_direct_word; - /* map zero to max+1 */ - if(imm==0) imm=0x10000; - last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */ - if(last_direct_word != NULL) - { - /* VU idle */ - int i; - quadword fifo_data; - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); - /* transfer GPUIF quadwords, one word per iteration */ - for(i=0; isource_address); - /* XXX: check RC */ - } /* write collected quadword */ + /* write srcaddr into PKE srcaddr tracking */ + sim_write(NULL, + (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, + (void*) & sourceaddr, + sizeof(unsigned_4)); + + /* write bytes into simulator */ + rc = sim_write(NULL, + (SIM_ADDR) dest, + (void*) src, + len); + + /* clear srcaddr from PKE srcaddr tracking */ + sim_write(NULL, + (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, + (void*) & no_sourceaddr, + sizeof(unsigned_4)); - } /* GPUIF xfer loop */ - - /* advance PC */ - pke_pc_advance(me, 1 + imm*4); - } /* if FIFO full enough */ - else - { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } + return rc; +} + + +/* check for stall conditions on indicated devices (path* only on PKE1), do not change status + return 0 iff no stall */ +int +pke_check_stall(struct pke_device* me, enum pke_check_target what) +{ + int any_stall = 0; + + /* read VU status word - commonly used */ + unsigned_4 vu_stat; + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), + (void*) & vu_stat, + sizeof(unsigned_4)); + + /* perform checks */ + if(what == chk_vu) + { + /* check if VBS bit is set, i.e., VU is busy */ + if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 1) + any_stall = 1; } - else if(IS_PKE_CMD(cmd, UNPACK)) /* warning: monster complexity */ + else if(what == chk_path1) { - short vn = BIT_MASK_GET(cmd, 2, 3); - short vl = BIT_MASK_GET(cmd, 0, 1); - short vnvl = BIT_MASK_GET(cmd, 0, 3); - int m = BIT_MASK_GET(cmd, 4, 4); - short cl = PKE_REG_MASK_GET(me, CYCLE, CL); - short wl = PKE_REG_MASK_GET(me, CYCLE, WL); - int n, num_operands; - unsigned_4* last_operand_word; - - /* map zero to max+1 */ - if(num==0) num=0x100; - - /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */ - if(wl <= cl) - n = num; - else - n = cl * (num/wl) + PKE_LIMIT(num % wl, cl); - num_operands = (((sizeof(unsigned_4) >> vl) * (vn+1) * n)/sizeof(unsigned_4)); + /* only valid on PKE1 */ + /* check if VGW bit is set, i.e., PATH1 is busy */ + if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 1) + any_stall = 1; + } + else + { + ASSERT(0); /* XXX: not done yet */ + } - /* confirm that FIFO has enough words in it */ - last_operand_word = pke_pc_operand(me, num_operands); - if(last_operand_word != NULL) - { - address_word vu_addr_base; - int operand_num, vector_num; + /* any stall reasons? */ + return any_stall; +} - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); - /* XXX: don't check whether VU is idle?? */ +/* flip the DBF bit; recompute TOPS, ITOP & TOP */ +void +pke_flip_dbf(struct pke_device* me) +{ + /* flip DBF */ + PKE_REG_MASK_SET(me, DBF, DF, + PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); + PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); + /* compute new TOPS */ + PKE_REG_MASK_SET(me, TOPS, TOPS, + (PKE_REG_MASK_GET(me, BASE, BASE) + + (PKE_REG_MASK_GET(me, DBF, DF) * + PKE_REG_MASK_GET(me, OFST, OFFSET)))); + /* compute new ITOP and TOP */ + PKE_REG_MASK_SET(me, ITOP, ITOP, + PKE_REG_MASK_GET(me, ITOPS, ITOPS)); + PKE_REG_MASK_SET(me, TOP, TOP, + PKE_REG_MASK_GET(me, TOPS, TOPS)); +} - if(me->pke_number == 0) - vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9); - else - { - vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9); - if(BIT_MASK_GET(imm, 15, 15)) /* fetch R flag from imm word */ - vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS); - } - /* XXX: vu_addr overflow check */ - /* transfer given number of vectors */ - operand_num = 1; /* word index into instruction stream: 1..num_operands */ - vector_num = 0; /* vector number being processed: 0..num-1 */ - while(operand_num <= num_operands) - { - quadword vu_old_data; - quadword vu_new_data; - quadword unpacked_data; - address_word vu_addr; - struct fifo_quadword* fq; - int i; +/* PKEcode handler functions -- responsible for checking and + confirming old stall conditions, executing pkecode, updating PC and + status registers -- may assume being run on correct PKE unit */ + +void +pke_code_nop(struct pke_device* me, unsigned_4 pkecode) +{ + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} - /* XXX: set NUM */ - /* compute VU destination address, as bytes in R5900 memory */ - if(cl >= wl) - { - /* map zero to max+1 */ - if(wl == 0) wl = 0x0100; - vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl)); - } - else - vu_addr = vu_addr_base + 16*vector_num; +void +pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + /* copy immediate value into CYCLE reg */ + me->regs[PKE_REG_CYCLE][0] = imm; + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} - /* read old VU data word at address */ - sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data)); - /* Let sourceaddr track the first operand */ - fq = pke_pc_fifo(me, operand_num); +void +pke_code_offset(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + /* copy 10 bits to OFFSET field */ + PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9)); + /* clear DBF bit */ + PKE_REG_MASK_SET(me, DBF, DF, 0); + /* clear other DBF bit */ + PKE_REG_MASK_SET(me, STAT, DBF, 0); + /* set TOPS = BASE */ + PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE)); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} - /* For cyclic unpack, next operand quadword may come from instruction stream - or be zero. */ - if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */ - { - /* clear operand - used only in a "indeterminate" state */ - for(i = 0; i < 4; i++) - unpacked_data[i] = 0; - } - else - { - /* compute unpacked words from instruction stream */ - switch(vnvl) - { - case PKE_UNPACK_S_32: - case PKE_UNPACK_V2_32: - case PKE_UNPACK_V3_32: - case PKE_UNPACK_V4_32: - /* copy (vn+1) 32-bit values */ - for(i = 0; i < vn+1; i++) - { - unsigned_4* operand = pke_pc_operand(me, operand_num); - unpacked_data[i] = *operand; - operand_num ++; - } - break; - - case PKE_UNPACK_S_16: - case PKE_UNPACK_V2_16: - case PKE_UNPACK_V3_16: - case PKE_UNPACK_V4_16: - /* copy (vn+1) 16-bit values, packed two-per-word */ - for(i=0; i vn) - masked_value = & zero; /* XXX: what to put here? */ - else - masked_value = & unpacked_data[i]; - break; - - case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */ - masked_value = & me->regs[PKE_REG_R0 + i][0]; - break; - - case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */ - masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0]; - break; - - case PKE_MASKREG_NOTHING: - /* "write inhibit" by re-copying old data */ - masked_value = & vu_old_data[i]; - break; - - default: - ASSERT(0); - /* no other cases possible */ - } - - /* copy masked value for column */ - memcpy(& vu_new_data[i], masked_value, sizeof(unsigned_4)); - } /* loop over columns */ - } - else - { - /* no mask - just copy over entire unpacked quadword */ - memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data)); - } +void +pke_code_base(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + /* copy 10 bits to BASE field */ + PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9)); + /* clear DBF bit */ + PKE_REG_MASK_SET(me, DBF, DF, 0); + /* clear other DBF bit */ + PKE_REG_MASK_SET(me, STAT, DBF, 0); + /* set TOPS = BASE */ + PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE)); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} - /* process STMOD register for accumulation operations */ - switch(PKE_REG_MASK_GET(me, MODE, MDE)) - { - case PKE_MODE_ADDROW: /* add row registers to output data */ - for(i=0; i<4; i++) - /* exploit R0..R3 contiguity */ - vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; - break; - case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */ - for(i=0; i<4; i++) - { - /* exploit R0..R3 contiguity */ - vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; - me->regs[PKE_REG_R0 + i][0] = vu_new_data[i]; - } - break; +void +pke_code_itop(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + /* copy 10 bits to ITOPS field */ + PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9)); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} - case PKE_MODE_INPUT: /* pass data through */ - default: - ; - } - /* write replacement word */ - pke_track_write(me, vu_new_data, sizeof(vu_new_data), - (SIM_ADDR) vu_addr, fq->source_address); +void +pke_code_stmod(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + /* copy 2 bits to MODE register */ + PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2)); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} - /* next vector please */ - vector_num ++; - } /* vector transfer loop */ - } /* PKE FIFO full enough */ - else - { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } + +void +pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode) +{ + ASSERT(0); + /* XXX: cannot handle this one yet */ +} + + +void +pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + /* copy 16 bits to MARK register */ + PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15)); + /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */ + PKE_REG_MASK_SET(me, STAT, MRK, 1); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} + + +void +pke_code_flushe(struct pke_device* me, unsigned_4 pkecode) +{ + /* compute next PEW bit */ + if(pke_check_stall(me, chk_vu)) + { + /* VU busy */ + PKE_REG_MASK_SET(me, STAT, PEW, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + /* try again next cycle */ } - /* ... */ else { - /* set ER1 flag in STAT register */ - PKE_REG_MASK_SET(me, STAT, ER1, 1); - /* advance over faulty word */ + /* VU idle */ + PKE_REG_MASK_SET(me, STAT, PEW, 0); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); pke_pc_advance(me, 1); } - - /* PKE is now idle or waiting */ - PKE_REG_MASK_SET(me, STAT, PPS, next_pps_state); } +void +pke_code_flush(struct pke_device* me, unsigned_4 pkecode) +{ + int something_busy = 0; + + /* compute next PEW, PGW bits */ + if(pke_check_stall(me, chk_vu)) + { + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PEW, 1); + } + else + PKE_REG_MASK_SET(me, STAT, PEW, 0); + if(pke_check_stall(me, chk_path1) || + pke_check_stall(me, chk_path2)) + { + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PGW, 1); + } + else + PKE_REG_MASK_SET(me, STAT, PGW, 0); + /* go or no go */ + if(something_busy) + { + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } + else + { + /* all idle */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } +} -/* advance the PC by given number of words; update STAT/FQC field */ void -pke_pc_advance(struct pke_device* me, int num_words) +pke_code_flusha(struct pke_device* me, unsigned_4 pkecode) { - ASSERT(num_words > 0); + int something_busy = 0; - me->qw_pc += num_words; - /* handle overflow */ - while(me->qw_pc >= 4) + /* compute next PEW, PGW bits */ + if(pke_check_stall(me, chk_vu)) { - me->qw_pc -= 4; - me->fifo_pc ++; + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PEW, 1); } + else + PKE_REG_MASK_SET(me, STAT, PEW, 0); - /* clear FQC if FIFO is now empty */ - if(me->fifo_num_elements == me->fifo_pc) + + if(pke_check_stall(me, chk_path1) || + pke_check_stall(me, chk_path2) || + pke_check_stall(me, chk_path3)) { - PKE_REG_MASK_SET(me, STAT, FQC, 0); + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PGW, 1); } + else + PKE_REG_MASK_SET(me, STAT, PGW, 0); + if(something_busy) + { + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } + else + { + /* all idle */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } } +void +pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode) +{ + /* compute next PEW bit */ + if(pke_check_stall(me, chk_vu)) + { + /* VU busy */ + PKE_REG_MASK_SET(me, STAT, PEW, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + /* try again next cycle */ + } + else + { + unsigned_4 vu_pc; + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* VU idle */ + PKE_REG_MASK_SET(me, STAT, PEW, 0); + + /* flip DBF on PKE1 */ + if(me->pke_number == 1) + pke_flip_dbf(me); + + /* compute new PC for VU */ + vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */ + /* write new PC; callback function gets VU running */ + sim_write(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), + (void*) & vu_pc, + sizeof(unsigned_4)); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } +} -/* Return pointer to given operand# in FIFO. `word_num' starts at 1. - If FIFO is not full enough, return 0. */ -unsigned_4* -pke_pc_operand(struct pke_device* me, int word_num) + +void +pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode) { - int new_qw_pc = 0; - int new_fifo_pc; - unsigned_4* operand; + /* compute next PEW bit */ + if(pke_check_stall(me, chk_vu)) + { + /* VU busy */ + PKE_REG_MASK_SET(me, STAT, PEW, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + /* try again next cycle */ + } + else + { + unsigned_4 vu_pc; - ASSERT(word_num > 0); + /* VU idle */ + PKE_REG_MASK_SET(me, STAT, PEW, 0); - new_fifo_pc = me->fifo_pc; - new_qw_pc += me->qw_pc + word_num; + /* flip DBF on PKE1 */ + if(me->pke_number == 1) + pke_flip_dbf(me); - /* handle overflow */ - while(new_qw_pc >= 4) + /* read old PC */ + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), + (void*) & vu_pc, + sizeof(unsigned_4)); + + /* rewrite new PC; callback function gets VU running */ + sim_write(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), + (void*) & vu_pc, + sizeof(unsigned_4)); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } +} + + +void +pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode) +{ + int something_busy = 0; + + /* compute next PEW, PGW bits */ + if(pke_check_stall(me, chk_vu)) { - new_qw_pc -= 4; - new_fifo_pc ++; + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PEW, 1); } + else + PKE_REG_MASK_SET(me, STAT, PEW, 0); - /* not enough elements */ - if(me->fifo_num_elements == me->fifo_pc) - operand = NULL; + + if(pke_check_stall(me, chk_path1) || + pke_check_stall(me, chk_path2) || + pke_check_stall(me, chk_path3)) + { + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PGW, 1); + } else - operand = & me->fifo[new_fifo_pc].data[new_qw_pc]; + PKE_REG_MASK_SET(me, STAT, PGW, 0); - return operand; + /* go or no go */ + if(something_busy) + { + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } + else + { + unsigned_4 vu_pc; + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* flip DBF on PKE1 */ + if(me->pke_number == 1) + pke_flip_dbf(me); + + /* compute new PC for VU */ + vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */ + /* write new PC; callback function gets VU running */ + sim_write(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), + (void*) & vu_pc, + sizeof(unsigned_4)); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } } +void +pke_code_stmask(struct pke_device* me, unsigned_4 pkecode) +{ + /* check that FIFO has one more word for STMASK operand */ + unsigned_4* mask; + + mask = pke_pc_operand(me, 1); + if(mask != NULL) + { + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + /* fill the register */ + PKE_REG_MASK_SET(me, MASK, MASK, *mask); + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } + else + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } +} -/* Return pointer to FIFO quadword containing given operand# in FIFO. - `word_num' starts at 1. If FIFO is not full enough, return 0. */ -struct fifo_quadword* -pke_pc_fifo(struct pke_device* me, int word_num) +void +pke_code_strow(struct pke_device* me, unsigned_4 pkecode) { - int new_qw_pc = 0; - int new_fifo_pc; - struct fifo_quadword* operand; - - ASSERT(word_num > 0); + /* check that FIFO has four more words for STROW operand */ + unsigned_4* last_op; + + last_op = pke_pc_operand(me, 4); + if(last_op != NULL) + { + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* copy ROW registers: must all exist if 4th operand exists */ + me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1); + me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2); + me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3); + me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 5); + } + else + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } +} - new_fifo_pc = me->fifo_pc; - new_qw_pc += me->qw_pc + word_num; - /* handle overflow */ - while(new_qw_pc >= 4) +void +pke_code_stcol(struct pke_device* me, unsigned_4 pkecode) +{ + /* check that FIFO has four more words for STCOL operand */ + unsigned_4* last_op; + + last_op = pke_pc_operand(me, 4); + if(last_op != NULL) { - new_qw_pc -= 4; - new_fifo_pc ++; + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* copy COL registers: must all exist if 4th operand exists */ + me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1); + me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2); + me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3); + me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 5); } - - /* not enough elements */ - if(me->fifo_num_elements == me->fifo_pc) - operand = NULL; else - operand = & me->fifo[new_fifo_pc]; + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } +} - return operand; + +void +pke_code_mpg(struct pke_device* me, unsigned_4 pkecode) +{ + unsigned_4* last_mpg_word; + int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* map zero to max+1 */ + if(num==0) num=0x100; + + /* check that FIFO has a few more words for MPG operand */ + last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */ + if(last_mpg_word != NULL) + { + /* perform implied FLUSHE */ + /* read VU status word */ + unsigned_4 vu_stat; + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), + (void*) & vu_stat, + sizeof(unsigned_4)); + + /* check if VBS bit is clear, i.e., VU is idle */ + if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) + { + /* VU idle */ + int i; + + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* transfer VU instructions, one word per iteration */ + for(i=0; ipke_number == 0) ? + VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START; + vu_addr = vu_addr_base + (imm*2) + i; + + /* VU*_MEM0_TRACK : source-addr tracking table */ + vutrack_addr_base = (me->pke_number == 0) ? + VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START; + vutrack_addr = vu_addr_base + (imm*2) + i; + + /* write data into VU memory */ + pke_track_write(me, operand, sizeof(unsigned_4), + vu_addr, fq->source_address); + + /* write srcaddr into VU srcaddr tracking table */ + sim_write(NULL, + (SIM_ADDR) vutrack_addr, + (void*) & fq->source_address, + sizeof(unsigned_4)); + } /* VU xfer loop */ + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1 + num*2); + } + else + { + /* VU busy */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + /* retry this instruction next clock */ + } + } /* if FIFO full enough */ + else + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* retry this instruction next clock */ + } } +void +pke_code_direct(struct pke_device* me, unsigned_4 pkecode) +{ + /* check that FIFO has a few more words for DIRECT operand */ + unsigned_4* last_direct_word; + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); + + /* map zero to max+1 */ + if(imm==0) imm=0x10000; + + last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */ + if(last_direct_word != NULL) + { + /* VU idle */ + int i; + quadword fifo_data; + + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* transfer GPUIF quadwords, one word per iteration */ + for(i=0; isource_address); + } /* write collected quadword */ + + } /* GPUIF xfer loop */ + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1 + imm*4); + } /* if FIFO full enough */ + else + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* retry this instruction next clock */ + } +} -/* Write a bunch of bytes into simulator memory. Store the given source address into the - PKE sourceaddr tracking word. */ -int -pke_track_write(struct pke_device* me, const void* src, int len, - address_word dest, unsigned_4 sourceaddr) + +void +pke_code_directhl(struct pke_device* me, unsigned_4 pkecode) { - int rc; - unsigned_4 no_sourceaddr = 0; + /* treat the same as DIRECTH */ + pke_code_direct(me, pkecode); +} - /* write srcaddr into PKE srcaddr tracking */ - sim_write(NULL, - (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, - (void*) & sourceaddr, - sizeof(unsigned_4)); + +void +pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E); + int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); + + short vn = BIT_MASK_GET(cmd, 2, 3); + short vl = BIT_MASK_GET(cmd, 0, 1); + short vnvl = BIT_MASK_GET(cmd, 0, 3); + int m = BIT_MASK_GET(cmd, 4, 4); + short cl = PKE_REG_MASK_GET(me, CYCLE, CL); + short wl = PKE_REG_MASK_GET(me, CYCLE, WL); + int n, num_operands; + unsigned_4* last_operand_word; - /* write bytes into simulator */ - rc = sim_write(NULL, - (SIM_ADDR) dest, - (void*) src, - len); + /* map zero to max+1 */ + if(num==0) num=0x100; - /* clear srcaddr from PKE srcaddr tracking */ - sim_write(NULL, - (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, - (void*) & no_sourceaddr, - sizeof(unsigned_4)); + /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */ + if(wl <= cl) + n = num; + else + n = cl * (num/wl) + PKE_LIMIT(num % wl, cl); + num_operands = (((sizeof(unsigned_4) >> vl) * (vn+1) * n)/sizeof(unsigned_4)); + + /* confirm that FIFO has enough words in it */ + last_operand_word = pke_pc_operand(me, num_operands); + if(last_operand_word != NULL) + { + address_word vu_addr_base; + int operand_num, vector_num; + + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* don't check whether VU is idle */ + + if(me->pke_number == 0) + vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9); + else + { + vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9); + if(BIT_MASK_GET(imm, 15, 15)) /* fetch R flag from imm word */ + vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS); + } + + /* XXX: vu_addr overflow check */ + + /* transfer given number of vectors */ + operand_num = 1; /* word index into instruction stream: 1..num_operands */ + vector_num = 0; /* vector number being processed: 0..num-1 */ + while(operand_num <= num_operands) + { + quadword vu_old_data; + quadword vu_new_data; + quadword unpacked_data; + address_word vu_addr; + struct fifo_quadword* fq; + int i; + + /* XXX: set NUM */ + + /* compute VU destination address, as bytes in R5900 memory */ + if(cl >= wl) + { + /* map zero to max+1 */ + if(wl == 0) wl = 0x0100; + vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl)); + } + else + vu_addr = vu_addr_base + 16*vector_num; + + /* read old VU data word at address */ + sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data)); + + /* Let sourceaddr track the first operand */ + fq = pke_pc_fifo(me, operand_num, NULL); + + /* For cyclic unpack, next operand quadword may come from instruction stream + or be zero. */ + if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */ + { + /* clear operand - used only in a "indeterminate" state */ + for(i = 0; i < 4; i++) + unpacked_data[i] = 0; + } + else + { + /* compute unpacked words from instruction stream */ + switch(vnvl) + { + case PKE_UNPACK_S_32: + case PKE_UNPACK_V2_32: + case PKE_UNPACK_V3_32: + case PKE_UNPACK_V4_32: + /* copy (vn+1) 32-bit values */ + for(i = 0; i < vn+1; i++) + { + unsigned_4* operand = pke_pc_operand(me, operand_num); + unpacked_data[i] = *operand; + operand_num ++; + } + break; + + case PKE_UNPACK_S_16: + case PKE_UNPACK_V2_16: + case PKE_UNPACK_V3_16: + case PKE_UNPACK_V4_16: + /* copy (vn+1) 16-bit values, packed two-per-word */ + for(i=0; i vn) + masked_value = & zero; /* arbitrary data: undefined in spec */ + else + masked_value = & unpacked_data[i]; + break; + + case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */ + masked_value = & me->regs[PKE_REG_R0 + i][0]; + break; + + case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */ + masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0]; + break; + + case PKE_MASKREG_NOTHING: + /* "write inhibit" by re-copying old data */ + masked_value = & vu_old_data[i]; + break; + + default: + ASSERT(0); + /* no other cases possible */ + } + + /* copy masked value for column */ + memcpy(& vu_new_data[i], masked_value, sizeof(unsigned_4)); + } /* loop over columns */ + } + else + { + /* no mask - just copy over entire unpacked quadword */ + memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data)); + } + + /* process STMOD register for accumulation operations */ + switch(PKE_REG_MASK_GET(me, MODE, MDE)) + { + case PKE_MODE_ADDROW: /* add row registers to output data */ + for(i=0; i<4; i++) + /* exploit R0..R3 contiguity */ + vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; + break; + + case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */ + for(i=0; i<4; i++) + { + /* exploit R0..R3 contiguity */ + vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; + me->regs[PKE_REG_R0 + i][0] = vu_new_data[i]; + } + break; + + case PKE_MODE_INPUT: /* pass data through */ + default: + ; + } + + /* write replacement word */ + pke_track_write(me, vu_new_data, sizeof(vu_new_data), + (SIM_ADDR) vu_addr, fq->source_address); + + /* next vector please */ + vector_num ++; + } /* vector transfer loop */ + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, num_operands); + } /* PKE FIFO full enough */ + else + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* retry this instruction next clock */ + } +} + + +void +pke_code_error(struct pke_device* me, unsigned_4 pkecode) +{ + /* set ER1 flag in STAT register */ + PKE_REG_MASK_SET(me, STAT, ER1, 1); + /* advance over faulty word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); } diff --git a/sim/mips/sky-pke.h b/sim/mips/sky-pke.h index 74b8bc77a09..c378c640eb0 100644 --- a/sim/mips/sky-pke.h +++ b/sim/mips/sky-pke.h @@ -7,10 +7,6 @@ #include "sky-device.h" -/* Debugguing PKE? */ - -#define PKE_DEBUG - /* External functions */ @@ -163,10 +159,11 @@ typedef unsigned_4 quadword[4]; #define PKE_REG_STAT_PPS_E 1 #define PKE_REG_STAT_PPS_B 0 -#define PKE_REG_STAT_PPS_IDLE 0x00 -#define PKE_REG_STAT_PPS_WAIT 0x01 -#define PKE_REG_STAT_PPS_DECODE 0x02 -#define PKE_REG_STAT_PPS_XFER 0x03 +#define PKE_REG_STAT_PPS_IDLE 0x00 /* ready to execute next instruction */ +#define PKE_REG_STAT_PPS_WAIT 0x01 /* not enough words in FIFO */ +#define PKE_REG_STAT_PPS_DECODE 0x02 /* decoding instruction */ +#define PKE_REG_STAT_PPS_STALL 0x02 /* alias state for FLUSHE stall */ +#define PKE_REG_STAT_PPS_XFER 0x03 /* transferring instruction operands */ /* DBF register */ #define PKE_REG_DBF_DF_E 0 @@ -364,7 +361,7 @@ struct pke_device struct fifo_quadword* fifo; int fifo_num_elements; /* no. of quadwords occupied in FIFO */ int fifo_buffer_size; /* no. of quadwords of space in FIFO */ - FILE* fifo_trace_file; /* or 0 for no trace */ + FILE* fifo_trace_file; /* or 0 for no trace */ /* XXX: tracing not done */ /* XXX: assumes FIFOs grow indefinately */ /* PC */