From b8075c01973fda9bdb06d2bff42c169a6865382e Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Tue, 9 Oct 2018 11:39:49 +0100 Subject: [PATCH] extend sv register file from 64 to 128 after discussion. evaluation of even embedded GPUs shows that they have really enormous register files. a fp16 x 4 to express quads, times four, takes up eight consecutive registers just on its own. --- riscv/processor.cc | 4 ++-- riscv/sv.h | 23 +++++++++++------------ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/riscv/processor.cc b/riscv/processor.cc index a57f8e6..c6c8457 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -401,7 +401,6 @@ void processor_t::set_csr(int which, reg_t val) r->elwidth = c->b.elwidth; r->regidx = c->b.regidx; r->isvec = c->b.isvec; - r->packed = c->b.packed; r->active = true; fprintf(stderr, "setting REGCFG type:%d isvec:%d %d %d\n", c->b.type, r->isvec, (int)idx, (int)r->regidx); @@ -449,7 +448,8 @@ void processor_t::set_csr(int which, reg_t val) r->regidx = c->b.regidx; r->zero = c->b.zero; r->inv = c->b.inv; - r->active = c->b.active; + r->packed = c->b.packed; + r->active = true; fprintf(stderr, "setting PREDCFG type:%d zero:%d %d %d\n", c->b.type, r->zero, (int)idx, (int)r->regidx); } diff --git a/riscv/sv.h b/riscv/sv.h index 748b9c6..21f07d4 100644 --- a/riscv/sv.h +++ b/riscv/sv.h @@ -6,10 +6,10 @@ #include "decode.h" // useful macros for constructing SV reg and predicate CSR CAM entries -#define SV_REG_CSR(type, regkey, elwidth, regidx, isvec, packed) \ - (regkey | (elwidth<<5) | (type<<7) | (regidx<<8) | (isvec<<14) | (packed<<15)) -#define SV_PRED_CSR(type, regkey, zero, inv, regidx, active) \ - (regkey | (zero<<5) | (inv<<6) | (type<<7) | (regidx<<8) | (active<<14)) +#define SV_REG_CSR(type, regkey, elwidth, regidx, isvec) \ + (regkey | (elwidth<<5) | (type<<7) | (regidx<<8) | (isvec<<15)) +#define SV_PRED_CSR(type, regkey, zero, inv, regidx, packed) \ + (regkey | (zero<<5) | (inv<<6) | (type<<7) | (regidx<<8) | (packed<<15)) // this table is for the CSRs (4? for RV32E, 16 for other types) // it's a CAM that's used to generate 2 tables (below) @@ -18,11 +18,10 @@ union sv_reg_csr_entry { struct { uint64_t regkey : 5; // 5 bits - unsigned int elwidth: 2; // 0=8-bit, 1=dflt, 2=dflt/2 3=dflt*2 + unsigned int elwidth: 2; // 0=dflt, 1=dflt/2, 2=dflt*2 3=8-bit unsigned int type : 1; // 1=INT, 0=FP - uint64_t regidx : 6; // yes 6 bits + uint64_t regidx : 7; // yes 6 bits unsigned int isvec : 1; // vector=1, scalar=0 - unsigned int packed : 1; // Packed SIMD=1 } b; unsigned short u; }; @@ -42,9 +41,8 @@ union sv_reg_csr_entry { // in SV however the instruction is STILL ONLY 5 BITS. typedef struct { unsigned int elwidth: 2; // 0=8-bit, 1=dflt, 2=dflt/2 3=dflt*2 - uint64_t regidx : 6; // yes 6 bits. + uint64_t regidx : 7; // yes 7 bits. unsigned int isvec : 1; // vector=1, scalar=0 - unsigned int packed : 1; // Packed SIMD=1 unsigned int active : 1; // enabled=1, disabled=0 } sv_reg_entry; @@ -54,8 +52,8 @@ union sv_pred_csr_entry { unsigned int zero : 1; // zeroing=1, skipping=0 unsigned int inv : 1; // inversion=1 unsigned int type : 1; // 1=INT, 0=FP - uint64_t regidx: 6; // 6 bits - unsigned int active: 1; // enabled=1, disabled=0 + uint64_t regidx: 7; // 7 bits + unsigned int packed : 1; // Packed SIMD=1 } b; unsigned short u; }; @@ -64,8 +62,9 @@ typedef struct { uint64_t regkey: 5; // 5 bits unsigned int zero : 1; // zeroing=1, skipping=0 unsigned int inv : 1; // inversion=1 - uint64_t regidx: 6; // 6 bits + uint64_t regidx: 7; // 7 bits unsigned int active: 1; // enabled=1, disabled=0 + unsigned int packed : 1; // Packed SIMD=1 } sv_pred_entry; bool sv_check_reg(bool intreg, uint64_t reg); -- 2.30.2