From fd7e88a19d655ccff4f73a364db2592831c12ba7 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Mon, 1 Aug 2022 20:06:19 +0100 Subject: [PATCH] cleanup, remove old pages --- alt_rvp.mdwn | 64 ---------------------------------------------------- 1 file changed, 64 deletions(-) delete mode 100644 alt_rvp.mdwn diff --git a/alt_rvp.mdwn b/alt_rvp.mdwn deleted file mode 100644 index 8c914b71c..000000000 --- a/alt_rvp.mdwn +++ /dev/null @@ -1,64 +0,0 @@ -This RVP proposal is an alternative that uses only the integer register file. It provides both SIMD groups equal to XLEN by partitioning the standard ALU, and provides SIMD groups larger than XLEN by effectively expanding the register file with additional lanes using a scheme inspired by the Hwacha microarchitecture. - -(All examples on this page assume RV32.) - ----- - -# Partitioning - -Each bit set in the "part" CSR inhibits carry-in to that position and defines an element boundary. Only the packed-data instructions honor the part CSR. - -ADD - -| | reg | 31..0 | -| -- | --- | ----- | -| | rs1 | x1 | -| + | rs2 | y1 | -| -> | rd | x1+y1 | - -PADD ("packed ADD") (with bits 5, 11, 16, 21, and 27 set in the "part" CSR for pairs of RGB565 data) - -| | reg | 31..27 | 26..21 | 20..16 | 15..11 | 10..5 | 4..0 | -| -- | --- | ------ | ------ | ------ | ------ | ----- | ---- | -| | rs1 | x2r | x2g | x2b | x1r | x1g | x1b | -| + | rs2 | y2r | y2g | y2b | y1r | y1g | y1b | -| -> | rd | x2r+y2r| x2g+y2g| x2b+y2b| x1r+y1r|x1g+y1g| x1b+y1b| - ----- - -# Lanes - -Each bit set in the "plane" CSR activates that lane. Only bits corresponding to implemented lanes are writable. Writing zero to "plane" disables all lanes, zeroes all registers except for lane 0, clears the status bits that indicate that other lanes need to be saved/restored, and stores "1" to "plane" to leave lane 0 active. - -ADD (with "plane" CSR == 0x0000000F) - -| | reg | Lane 0 | Lane 1 | Lane 2 | Lane 3 | -| -- | --- | ------ | ------ | ------ | ------ | -| | reg | 31..0 | 31..0 | 31..0 | 31..0 | -| | rs1 | x1 | x2 | x3 | x4 | -| + | rs2 | y1 | y2 | y3 | y4 | -| -> | rd | x1+y1 | x2+y2 | x3+y3 | x4+y4 | - -Example parallel add: - - /* XLEN and N are "baked-in" to the hardware */ - parameter XLEN; - parameter N; - /* note that N cannot be greater than XLEN */ - - register plane[XLEN]; - register x[N][32][XLEN]; - - function op_add(rd, rs1, rs2) { - /* note that this is ADD, not PADD */ - int i; - for (i = 0; i