From ef6e968a5dd9ee03343ea251f1ea2525fb3d0688 Mon Sep 17 00:00:00 2001 From: lkcl Date: Sat, 1 Apr 2023 17:56:04 +0100 Subject: [PATCH] --- openpower/sv/rfc/ls010.mdwn | 90 +++++++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 13 deletions(-) diff --git a/openpower/sv/rfc/ls010.mdwn b/openpower/sv/rfc/ls010.mdwn index 1bf582569..bacec089a 100644 --- a/openpower/sv/rfc/ls010.mdwn +++ b/openpower/sv/rfc/ls010.mdwn @@ -144,15 +144,16 @@ and it is left to the reader to translate to MSB0 numbering. The Canonical specification for how element-sequential numbering and element-width overrides is defined is expressed in the following c structure, assuming a Little-Endian system, and naturally using LSB0 -numbering everywhere because the ANSI c specification is inherently LSB0: +numbering everywhere because the ANSI c specification is inherently LSB0. +Note the deliberate similarity to how VSX register elements are defined: ``` #pragma pack typedef union { - uint8_t b[]; // elwidth 8 - uint16_t s[]; // elwidth 16 - uint32_t i[]; // elwidth 32 - uint64_t l[]; // elwidth 64 + uint8_t bytes[]; // elwidth 8 + uint16_t hwords[]; // elwidth 16 + uint32_t words[]; // elwidth 32 + uint64_t dwords[]; // elwidth 64 uint8_t actual_bytes[8]; } el_reg_t; @@ -160,18 +161,18 @@ numbering everywhere because the ANSI c specification is inherently LSB0: void get_register_element(el_reg_t* el, int gpr, int element, int width) { switch (width) { - case 64: el->l = int_regfile[gpr].l[element]; - case 32: el->i = int_regfile[gpr].i[element]; - case 16: el->s = int_regfile[gpr].s[element]; - case 8 : el->b = int_regfile[gpr].b[element]; + case 64: el->dwords = int_regfile[gpr].dwords[element]; + case 32: el->words = int_regfile[gpr].words[element]; + case 16: el->hwords = int_regfile[gpr].hwords[element]; + case 8 : el->bytes = int_regfile[gpr].bytes[element]; } } void set_register_element(el_reg_t* el, int gpr, int element, int width) { switch (width) { - case 64: int_regfile[gpr].l[element] = el->l; - case 32: int_regfile[gpr].i[element] = el->i; - case 16: int_regfile[gpr].s[element] = el->s; - case 8 : int_regfile[gpr].b[element] = el->b; + case 64: int_regfile[gpr].dwords[element] = el->dwords; + case 32: int_regfile[gpr].words[element] = el->words; + case 16: int_regfile[gpr].hwords[element] = el->hwords; + case 8 : int_regfile[gpr].bytes[element] = el->bytes; } } ``` @@ -206,6 +207,69 @@ write-enable line. It is up to the Hardware Architect to then amortise as simultaneous non-overlapping Register File writes, to achieve High Performance designs. +For a comparative data point the VSR Registers may be expressed in the +same fashion. The c code below is directly an expression of Figure 97 in +Power ISA Public v3.1 Book I Section 6.3 page 258, *after compensating for +MSB0 numbering and adapting in full to LSB0 numbering and obeying LE +ordering*. + +**Crucial to understanding why the subtraction from 1,3,7,15 is present +is because VSX Registers number elements also in MSB0 order**. SVP64 +very specifically numbers elements in **LSB0** order with the first +element being at the **LSB** end of the register, where VSX places +the numerically-lowest element at the **MSB** end of the register. + +``` + #pragma pack + typedef union { + uint8_t bytes[16]; // elwidth 8, QTY 16 FIXED total + uint16_t hwords[8]; // elwidth 16, QTY 8 FIXED total + uint32_t words[4]; // elwidth 32, QTY 8 FIXED total + uint64_t dwords[2]; // elwidth 64, QTY 2 FIXED total + uint8_t actual_bytes[16]; // totals 128-bit + } el_reg_t; + + elreg_t VSR_regfile[64]; + + static void check_num_elements(int elt, int width) { + switch (width) { + case 64: assert elt < 2; + case 32: assert elt < 4; + case 16: assert elt < 8; + case 8 : assert elt < 16; + } + } + void get_VSR_element(el_reg_t* el, int gpr, int elt, int width) { + check_num_elements(elt, width); + switch (width) { + case 64: el->dwords[1-elt] = VSR_regfile[gpr].dwords[1-elt]; + case 32: el->words[3-elt] = VSR_regfile[gpr].words[3-elt]; + case 16: el->hwords[7-elt] = VSR_regfile[gpr].hwords[7-elt]; + case 8 : el->bytes[15-elt] = VSR_regfile[gpr].bytes[15-elt]; + } + } + void set_register_element(el_reg_t* el, int gpr, int elt, int width) { + check_num_elements(elt, width); + switch (width) { + case 64: VSR_regfile[gpr].dwords[elt] = el->dwords[1-elt]; + case 32: VSR_regfile[gpr].words[3-elt] = el->words[3-elt]; + case 16: VSR_regfile[gpr].hwords[7-elt] = el->hwords[7-elt]; + case 8 : VSR_regfile[gpr].bytes[15-elt] = el->bytes[15-elt]; + } + } +``` + +For VSX Registers one key difference is that the overlay of different element +widths is clearly a *bounded quantity*, whereas for Simple-V the elements are +*unrestrained and permitted to flow into successive underlying Scalar registers*. +This difference is absolutely critical to a full understanding of the entire +Simple-V paradigm and why element-ordering, bit-numbering *and register numbering* +are all so strictly defined. + +Implementations are not permitted to violate the Canonical definition: software +will be critically relying on the wrapped (overflow) behaviour inherently +implied from the unbounded c arrays. + ## Scalar Identity Behaviour SVP64 is designed so that when the prefix is all zeros, and VL=1, no -- 2.30.2