(no commit message)

[libreriscv.git] / openpower / sv / 16_bit_compressed.mdwn
diff --git a/openpower/sv/16_bit_compressed.mdwn b/openpower/sv/16_bit_compressed.mdwn

index 42330cbd7019bfa4344e2a1e5d78553b53e9c2d8..564138e1b85b113924a28940363cf4287ace45e9 100644 (file)
--- a/openpower/sv/16_bit_compressed.mdwn
+++ b/openpower/sv/16_bit_compressed.mdwn
@@ -1,3 +1,5 @@
+[[!tag standards]]
+
  # 16 bit Compressed
  
  Similar to VLE (but without immediate-prefixing) this encoding is designed
@@ -39,7 +41,7 @@ standard 32 bit and 16 bit to intermingle cleanly.  To achieve the same
  thing on OpenPOWER would require a whopping 24 6-bit Major Opcodes which
  is clearly impractical: other schemes need to be devised.
  
-In addition we would like to add SV-C32 which is a Vectorised version
+In addition we would like to add SV-C32 which is a Vectorized version
  of 16 bit Compressed, and ideally have a variant that adds the 27-bit
  prefix format from SV-P64, as well.
  
@@ -111,7 +113,9 @@ to cross into or out of a function call.
  Thus it is the mandatory responsibility of the compiler to ensure that
  context returns to "v3.0B Standard" prior to entering a function call
  (responsibility of caller) and prior to exit from a function call
-(responsibility of callee).
+(responsibility of callee) by setting appropriate M and N bits.
+
+If however it is known to the compiler that certain static leaf node functions and their immediate callers will never, under any circumstances, be called by externsl ABI compliant code, then of course the compiler may choose to write such static functions as it sees fit.
  
  Trap Handlers also take responsibility for saving and restoring of
  Compressed Mode state, just as they already take responsibility for
@@ -201,7 +205,7 @@ Major Opcodes)
      | 0 | 1234 | 567  8 | 9  | a b | c  | d e | f | enc
      | N | immf | Cmaj.m | fld1     | fld2     | M | 16b
      | 1 | immf | Cmaj.m | fld1     | imm      | 1 | 16b imm
-    | fd3      | 001.1  | S1 | fd1 | S2 | fd2 | M | 16b sub
+    | N | fd3  | 001.1  | S1 | fd1 | S2 | fd2 | M | 16b sub
      | N | fd4  | 111.m  | fld1     | fld2     | M | 16b LDST
  
  Notes:
@@ -238,7 +242,8 @@ instruction counts from objdump on /bin/bash:
      | 1 | 1  | 0 | sh2 | | 001.0 | RA   | sh  | 1 | srawi.
      | 1 | 1  | 1 |     | | 001.0 | 000  | imm | 1 | TBD
      | 1 | 1  | 1 | i2  | | 001.0 | RA!=0| imm | 1 | addis
-    | 1 |              | | 010.0 | 000  |     | 1 | TBD
+    | 1 | 0  | i2      | | 010.0 | 000  | imm | 1 | setvli
+    | 1 | 1  | i2      | | 010.0 | 000  | imm | 1 | setmvli
      | 1 | i2           | | 010.0 | RA!=0| imm | 1 | addi
      | 1 | 0  | i2      | | 010.1 | RA   | imm | 1 | cmpdi
      | 1 | 1  | i2      | | 010.1 | RA   | imm | 1 | cmpwi
@@ -294,10 +299,11 @@ is "nop"
  
  16 bit mode only:
  
+    | 0 | 1 | 234 | | 567.8  | 9  ab | c   de | f |
      | - | - | --- | | -----  | ----- | ------ | - |
      | 1 | 0   000 | | 000.0  | 0  00 | 0   00 | 0 | nop
-    | 1 | 1   000 | | 000.0  | 0  00 | 0   00 | 0 | attn
-    | 1 | nonzero | | 000.0  | 0  00 | 0   00 | 0 | TBD
+    | 1 | 0   000 | | 000.0  | 0  00 | 0   00 | 1 | nop
+    | N | 1   000 | | 000.0  | 0  00 | 0   00 | M | attn
  
  Notes:
  
@@ -321,7 +327,7 @@ In essence the 2 nops are needed due to there being 2 different C forms:
  
  ### Branch
  
-TODO: document that branching whilst using mode-switching bits (M/N) is perfectly well permitted but is specifically and wholly the complier/assembler writers responsibility to obey ABI rules and ensure that even with branches and returns that, at no time, is an incorrect mode entered or left that could result in any instruction being misinterpreted.
+TODO: document that branching whilst using mode-switching bits (M/N) is perfectly well permitted, the caveat being: it is specifically and wholly the complier/assembler writers responsibility to obey ABI rules and ensure that even with branches and returns that, at no time, is an incorrect mode entered or left that could result in any instruction being misinterpreted.
  
      | 16-bit mode | | 10-bit mode                 |
      | 0 | 1 | 234 | | 567.8  | 9  ab | c   de | f |
@@ -428,9 +434,9 @@ Notes:
      | N | 0 |  RT | | 100.1 | RB  | RA!=0 | M | nand
      | N | 0 |  RT | | 101.0 | RB  | RA!=0 | M | or
      | N | 0 |  RT | | 101.1 | RB  | RA!=0 | M | nor/mr
-    | N | 0 |  RT | | 100.0 | RB  | 0 0 0 | M | extsw
+    | N | 0 |  RT | | 100.0 | RB  | 0 0 0 | M | popcnt
      | N | 0 |  RT | | 100.1 | RB  | 0 0 0 | M | cntlz
-    | N | 0 |  RT | | 101.0 | RB  | 0 0 0 | M | popcnt
+    | N | 0 |  RT | | 101.0 | RB  | 0 0 0 | M | extsw
      | N | 0 |  RT | | 101.1 | RB  | 0 0 0 | M | not
  
  16-bit mode only (note that bit 1 == 1):
@@ -441,9 +447,9 @@ Notes:
      | N | 1 |  RT | | 100.1 | RB  | RA!=0 | M | TBD
      | N | 1 |  RT | | 101.0 | RB  | RA!=0 | M | xor
      | N | 1 |  RT | | 101.1 | RB  | RA!=0 | M | eqv (xnor)
-    | N | 1 |  RT | | 100.0 | RB  | 0 0 0 | M | extsb
+    | N | 1 |  RT | | 100.0 | RB  | 0 0 0 | M | setvl.
      | N | 1 |  RT | | 100.1 | RB  | 0 0 0 | M | cnttz
-    | N | 1 |  RT | | 101.0 | RB  | 0 0 0 | M | TBD
+    | N | 1 |  RT | | 101.0 | RB  | 0 0 0 | M | extsb
      | N | 1 |  RT | | 101.1 | RB  | 0 0 0 | M | extsh
  
  10 bit mode:
@@ -559,24 +565,27 @@ space (when RA==0)
  
  **not available** in 10-bit mode, **only** in 16-bit mode:
  
-    | 0 | 1234 | | 567.8 | 9 ab | cde  | f |
-    | - | ---- | | ----- | ---- | ---- | - |
-    | N | 1110 | | 001.1 | 0 00 |  RT  | M | mtlr
-    | N | 1110 | | 001.1 | 0 01 |  RT  | M | mtctr
-    | N | 1110 | | 001.1 | 0 11 |  RT  | M | mtcr
-    | N | 1111 | | 001.1 | 0 00 |  RA  | M | mflr
-    | N | 1111 | | 001.1 | 0 01 |  RA  | M | mfctr
-    | N | 1111 | | 001.1 | 0 11 |  RA  | M | mfcr
+    | 0 | 1 | 234 | | 567.8 | 9 ab | cde  | f |
+    | - | ------- | | ----- | ---- | ---- | - |
+    | N | 1 | 111 | | 001.1 | 0 00 |  RT  | M | mtlr
+    | N | 1 | 111 | | 001.1 | 0 01 |  RT  | M | mtctr
+    | N | 1 | 111 | | 001.1 | 0 00 |  RA  | M | mflr
+    | N | 1 | 111 | | 001.1 | 0 01 |  RA  | M | mfctr
+    | N | 0 RA!=0 | | 000.0 | 0 00 |  000 | M | mtcr
+    | N | 1 RT!=0 | | 000.0 | 0 00 |  000 | M | mfcr
  
  ### Unallocated
  
-    | 0 | 123 | 4 | | 567.8 | 9 ab | cde  | f |
-    | - | --- | - | | ----- | ---- | ---- | - |
-    | N | 111 |   | | 001.1 | 0 10 |      | M |
+16-bit only:
+
+    | 0 | 1 | 234 | | 567.8 | 9 ab | cde  | f |
+    | - | - | --- | | ----- | ---- | ---- | - |
+    | N | 1 | 111 | | 001.1 | 0 10 |      | M |
+    | N | 1 | 111 | | 001.1 | 0 11 |      | M |
  
-## Other ideas (Attempt 2)
+# Other ideas (Attempt 2)
  
-### 8-bit mode-switching instructions, odd addresses for C mode
+## 8-bit mode-switching instructions, odd addresses for C mode
  
  Drop the complexity of the 16-bit encoding further reduced to 10-bit,
  and use a single byte instead of two to switch between modes.  This
@@ -619,6 +628,14 @@ Tables explaining encoding:
      | .. bit | 16 bit          | 8nop   |
      | v3.0B standard 32 bit instruction |
  
+# Other ideas (v3)
+
+FSM state switching and mode switching deemed too complex.  Instead cut back to
+
+1. 10bit only (actually, 11 bit)
+2. SV-Prefixed 16bit only (aka SV-C32)
+
+Each will be entirely different which is a huge amount of work.
  
  # TODO