From 6f41de63448539506491c6eb6c0eca041b32f92f Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan@linux.vnet.ibm.com>
Date: Thu, 7 Jun 2018 02:30:09 +0530
Subject: [PATCH] arch-power: Introduce proper opcode fields

This introduces separate extended opcode fields for DS, X, XFL,
XFX, XL and XO form instructions and renames the primary opcode
field from OPCODE to PO as listed in the Power ISA manual.

Scenarios where multiple instructions of different forms share
the same primary opcode have also been addressed by using the
correct extended opcode fields for decoding.

Change-Id: I4a01820f6a6326ef79330221b717952c6b9cbba3
Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
 src/arch/power/isa/bitfields.isa       |  10 +-
 src/arch/power/isa/decoder.isa         | 274 ++++++++++++++-----------
 src/arch/power/isa/formats/unimp.isa   |   3 +-
 src/arch/power/isa/formats/unknown.isa |   4 +-
 4 files changed, 162 insertions(+), 129 deletions(-)

diff --git a/src/arch/power/isa/bitfields.isa b/src/arch/power/isa/bitfields.isa
index 6cc67dd58..3ea6d8c36 100644
--- a/src/arch/power/isa/bitfields.isa
+++ b/src/arch/power/isa/bitfields.isa
@@ -34,10 +34,14 @@
 // are reversed sometimes. Not sure of a fix to this though...
 
 // Opcode fields
-def bitfield OPCODE        <31:26>;
-def bitfield X_XO          <10:0>;
-def bitfield XO_XO         <10:1>;
+def bitfield PO            <31:26>;
 def bitfield A_XO          <5:1>;
+def bitfield DS_XO         <1:0>;
+def bitfield X_XO          <10:1>;
+def bitfield XFL_XO        <10:1>;
+def bitfield XFX_XO        <10:1>;
+def bitfield XL_XO         <10:1>;
+def bitfield XO_XO         <9:1>;
 
 // Register fields
 def bitfield RA            <20:16>;
diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa
index cd08bfdce..4dd555c72 100644
--- a/src/arch/power/isa/decoder.isa
+++ b/src/arch/power/isa/decoder.isa
@@ -34,7 +34,7 @@
 // I've used the Power ISA Book I v2.06 for instruction formats,
 // opcode numbers, register names, etc.
 //
-decode OPCODE default Unknown::unknown() {
+decode PO default Unknown::unknown() {
 
     18: decode AA {
 
@@ -62,7 +62,7 @@ decode OPCODE default Unknown::unknown() {
         }
     }
 
-    19: decode XO_XO {
+    19: decode XL_XO {
 
         // Conditionally branch to address in LR based on CR and CTR.
         format BranchLrCondCtr {
@@ -145,9 +145,14 @@ decode OPCODE default Unknown::unknown() {
         40: lhz({{ Rt = Mem_uh; }});
         42: lha({{ Rt = Mem_sh; }});
         32: lwz({{ Rt = Mem; }});
-        58: lwa({{ Rt = Mem_sw; }},
-                {{ EA = Ra + (disp & 0xfffffffc); }},
-                {{ EA = disp & 0xfffffffc; }});
+    }
+
+    58: decode DS_XO {
+        format LoadDispOp {
+            2: lwa({{ Rt = Mem_sw; }},
+                   {{ EA = Ra + (disp & 0xfffffffc); }},
+                   {{ EA = disp & 0xfffffffc; }});
+        }
     }
 
     format LoadDispUpdateOp {
@@ -225,12 +230,18 @@ decode OPCODE default Unknown::unknown() {
                            (Ra & ~fullMask); }});
     }
 
-    // Some instructions use bits 21 - 30, others 22 - 30. We have to use
-    // the larger size to account for all opcodes. For those that use the
-    // smaller value, the OE bit is bit 21. Therefore, we have two versions
-    // of each instruction: 1 with OE set, the other without. For an
-    // example see 'add' and 'addo'.
-    31: decode XO_XO {
+    // There are a large number of instructions that have the same primary
+    // opcode (PO) of 31. In this case, the instructions are of different
+    // forms. For every form, the XO fields may vary in position and width.
+    // The X, XFL, XFX and XL form instructions use bits 21 - 30 and the
+    // XO form instructions use bits 22 - 30 as extended opcode (XO). To
+    // avoid conflicts, instructions of each form have to be defined under
+    // separate decode blocks. However, only a single decode block can be
+    // associated with a particular PO and it will recognize only one type
+    // of XO field. A solution for associating decode blocks for the other
+    // types of XO fields with the same PO is to have the other blocks as
+    // nested default cases.
+    31: decode X_XO {
 
         // All loads with an index register. The non-update versions
         // all use the value 0 if Ra == R0, not the value contained in
@@ -286,94 +297,6 @@ decode OPCODE default Unknown::unknown() {
             183: stwux({{ Mem = Rs; }});
         }
 
-        // These instructions can all be reduced to the form
-        // Rt = src1 + src2 [+ CA], therefore we just give src1 and src2
-        // (and, if necessary, CA) definitions and let the python script
-        // deal with setting things up correctly. We also give flags to
-        // say which control registers to set.
-        format IntSumOp {
-            266: add({{ Ra }}, {{ Rb }});
-            40: subf({{ ~Ra }}, {{ Rb }}, {{ 1 }});
-            10: addc({{ Ra }}, {{ Rb }},
-                     computeCA = true);
-            8: subfc({{ ~Ra }}, {{ Rb }}, {{ 1 }},
-                     true);
-            104: neg({{ ~Ra }}, {{ 1 }});
-            138: adde({{ Ra }}, {{ Rb }}, {{ xer.ca }},
-                      true);
-            234: addme({{ Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
-                       true);
-            136: subfe({{ ~Ra }}, {{ Rb }}, {{ xer.ca }},
-                       true);
-            232: subfme({{ ~Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
-                        true);
-            202: addze({{ Ra }}, {{ xer.ca }},
-                       computeCA = true);
-            200: subfze({{ ~Ra }}, {{ xer.ca }},
-                        computeCA = true);
-        }
-
-        // Arithmetic instructions all use source registers Ra and Rb,
-        // with destination register Rt.
-        format IntArithOp {
-            75: mulhw({{ int64_t prod = Ra_sd * Rb_sd; Rt = prod >> 32; }});
-            11: mulhwu({{ uint64_t prod = Ra_ud * Rb_ud; Rt = prod >> 32; }});
-            235: mullw({{ int64_t prod = Ra_sd * Rb_sd; Rt = prod; }});
-            747: mullwo({{
-                int64_t src1 = Ra_sd;
-                int64_t src2 = Rb;
-                int64_t prod = src1 * src2;
-                Rt = prod;
-            }},
-            true);
-
-            491: divw({{
-                int32_t src1 = Ra_sw;
-                int32_t src2 = Rb_sw;
-                if ((src1 != 0x80000000 || src2 != 0xffffffff)
-                    && src2 != 0) {
-                    Rt = src1 / src2;
-                } else {
-                    Rt = 0;
-                }
-            }});
-
-            1003: divwo({{
-                int32_t src1 = Ra_sw;
-                int32_t src2 = Rb_sw;
-                if ((src1 != 0x80000000 || src2 != 0xffffffff)
-                    && src2 != 0) {
-                    Rt = src1 / src2;
-                } else {
-                    Rt = 0;
-                    divSetOV = true;
-                }
-            }},
-            true);
-
-            459: divwu({{
-                uint32_t src1 = Ra_sw;
-                uint32_t src2 = Rb_sw;
-                if (src2 != 0) {
-                    Rt = src1 / src2;
-                } else {
-                    Rt = 0;
-                }
-            }});
-
-            971: divwuo({{
-              uint32_t src1 = Ra_sw;
-              uint32_t src2 = Rb_sw;
-              if (src2 != 0) {
-                  Rt = src1 / src2;
-              } else {
-                  Rt = 0;
-                  divSetOV = true;
-              }
-            }},
-            true);
-        }
-
         format IntOp {
             0: cmp({{
                 Xer xer = XER;
@@ -492,16 +415,6 @@ decode OPCODE default Unknown::unknown() {
 
         // Generic integer format instructions.
         format IntOp {
-            144: mtcrf({{
-                uint32_t mask = 0;
-                for (int i = 0; i < 8; ++i) {
-                    if (((FXM >> i) & 0x1) == 0x1) {
-                        mask |= 0xf << (4 * i);
-                    }
-                }
-                CR = (Rs & mask) | (CR & ~mask);
-                }});
-            19: mfcr({{ Rt = CR; }});
             339: decode SPR {
                 0x20: mfxer({{ Rt = XER; }});
                 0x100: mflr({{ Rt = LR; }});
@@ -532,6 +445,120 @@ decode OPCODE default Unknown::unknown() {
             598: sync({{ }}, [ IsMemBarrier ]);
             854: eieio({{ }}, [ IsMemBarrier ]);
         }
+
+        // These instructions are of XO form with bit 21 as the OE bit.
+        default: decode XO_XO {
+
+            // These instructions can all be reduced to the form
+            // Rt = src1 + src2 [+ CA], therefore we just give src1 and src2
+            // (and, if necessary, CA) definitions and let the python script
+            // deal with setting things up correctly. We also give flags to
+            // say which control registers to set.
+            format IntSumOp {
+                266: add({{ Ra }}, {{ Rb }});
+                40: subf({{ ~Ra }}, {{ Rb }}, {{ 1 }});
+                10: addc({{ Ra }}, {{ Rb }},
+                         computeCA = true);
+                8: subfc({{ ~Ra }}, {{ Rb }}, {{ 1 }},
+                         true);
+                104: neg({{ ~Ra }}, {{ 1 }});
+                138: adde({{ Ra }}, {{ Rb }}, {{ xer.ca }},
+                          true);
+                234: addme({{ Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
+                           true);
+                136: subfe({{ ~Ra }}, {{ Rb }}, {{ xer.ca }},
+                           true);
+                232: subfme({{ ~Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
+                            true);
+                202: addze({{ Ra }}, {{ xer.ca }},
+                           computeCA = true);
+                200: subfze({{ ~Ra }}, {{ xer.ca }},
+                            computeCA = true);
+            }
+
+            // Arithmetic instructions all use source registers Ra and Rb,
+            // with destination register Rt.
+            format IntArithOp {
+                75: mulhw({{
+                    int64_t prod = Ra_sd * Rb_sd;
+                    Rt = prod >> 32;
+                }});
+                11: mulhwu({{
+                    uint64_t prod = Ra_ud * Rb_ud;
+                    Rt = prod >> 32;
+                }});
+                235: mullw({{ int64_t prod = Ra_sd * Rb_sd; Rt = prod; }});
+                747: mullwo({{
+                    int64_t src1 = Ra_sd;
+                    int64_t src2 = Rb;
+                    int64_t prod = src1 * src2;
+                    Rt = prod;
+                }},
+                true);
+
+                491: divw({{
+                    int32_t src1 = Ra_sw;
+                    int32_t src2 = Rb_sw;
+                    if ((src1 != 0x80000000 || src2 != 0xffffffff)
+                        && src2 != 0) {
+                        Rt = src1 / src2;
+                    } else {
+                        Rt = 0;
+                    }
+                }});
+
+                1003: divwo({{
+                    int32_t src1 = Ra_sw;
+                    int32_t src2 = Rb_sw;
+                    if ((src1 != 0x80000000 || src2 != 0xffffffff)
+                        && src2 != 0) {
+                        Rt = src1 / src2;
+                    } else {
+                        Rt = 0;
+                        divSetOV = true;
+                    }
+                }},
+                true);
+
+                459: divwu({{
+                    uint32_t src1 = Ra_sw;
+                    uint32_t src2 = Rb_sw;
+                    if (src2 != 0) {
+                        Rt = src1 / src2;
+                    } else {
+                        Rt = 0;
+                    }
+                }});
+
+                971: divwuo({{
+                  uint32_t src1 = Ra_sw;
+                  uint32_t src2 = Rb_sw;
+                  if (src2 != 0) {
+                      Rt = src1 / src2;
+                  } else {
+                      Rt = 0;
+                      divSetOV = true;
+                  }
+                }},
+                true);
+            }
+
+            default: decode XFX_XO {
+                format IntOp {
+                    144: mtcrf({{
+                        uint32_t mask = 0;
+                        for (int i = 0; i < 8; ++i) {
+                            if (((FXM >> i) & 0x1) == 0x1) {
+                                mask |= 0xf << (4 * i);
+                            }
+                        }
+                        CR = (Rs & mask) | (CR & ~mask);
+                    }});
+
+                    19: mfcr({{ Rt = CR; }});
+                }
+            }
+        }
     }
 
     format LoadDispOp {
@@ -579,7 +606,7 @@ decode OPCODE default Unknown::unknown() {
             30: fnmsub({{ Ft = -((Fa * Fc) - Fb); }});
         }
 
-        default: decode XO_XO {
+        default: decode X_XO {
             format FloatRCCheckOp {
                 72: fmr({{ Ft = Fb; }});
                 264: fabs({{
@@ -616,20 +643,23 @@ decode OPCODE default Unknown::unknown() {
                     FPSCR = insertCRField(FPSCR, BF + (8 * (1 - W_FIELD)),
                                           U_FIELD);
                 }});
-                711: mtfsf({{
-                    if (L_FIELD == 1) { FPSCR = Fb_ud; }
-                    else {
-                        for (int i = 0; i < 8; ++i) {
-                            if (bits(FLM, i) == 1) {
-                                int k = 4 * (i + (8 * (1 - W_FIELD)));
-                                FPSCR = insertBits(FPSCR, k + 3, k,
-                                                   bits(Fb_ud, k + 3, k));
-                            }
-                        }
-                    }
-                }});
                 70: mtfsb0({{ FPSCR = insertBits(FPSCR, 31 - BT, 0); }});
                 38: mtfsb1({{ FPSCR = insertBits(FPSCR, 31 - BT, 1); }});
+
+                default: decode XFL_XO {
+                    711: mtfsf({{
+                        if (L_FIELD == 1) { FPSCR = Fb_ud; }
+                        else {
+                            for (int i = 0; i < 8; ++i) {
+                                if (bits(FLM, i) == 1) {
+                                    int k = 4 * (i + (8 * (1 - W_FIELD)));
+                                    FPSCR = insertBits(FPSCR, k + 3, k,
+                                                       bits(Fb_ud, k + 3, k));
+                                }
+                            }
+                        }
+                    }});
+                }
             }
         }
     }
diff --git a/src/arch/power/isa/formats/unimp.isa b/src/arch/power/isa/formats/unimp.isa
index fef28ce5b..007fcccbe 100644
--- a/src/arch/power/isa/formats/unimp.isa
+++ b/src/arch/power/isa/formats/unimp.isa
@@ -112,7 +112,7 @@ output exec {{
                                Trace::InstRecord *traceData) const
     {
         panic("attempt to execute unimplemented instruction '%s' "
-              "(inst 0x%08x, opcode 0x%x, binary:%s)", mnemonic, machInst, OPCODE,
+              "(inst 0x%08x, opcode 0x%x, binary:%s)", mnemonic, machInst, PO,
               inst2string(machInst));
         return std::make_shared<UnimplementedOpcodeFault>();
     }
@@ -140,4 +140,3 @@ def format WarnUnimpl() {{
     iop = InstObjParams(name, 'WarnUnimplemented')
     decode_block = BasicDecodeWithMnemonic.subst(iop)
 }};
-
diff --git a/src/arch/power/isa/formats/unknown.isa b/src/arch/power/isa/formats/unknown.isa
index d0f81f1ff..d83f79cf2 100644
--- a/src/arch/power/isa/formats/unknown.isa
+++ b/src/arch/power/isa/formats/unknown.isa
@@ -63,7 +63,7 @@ output decoder {{
             Addr pc, const Loader::SymbolTable *symtab) const
     {
         return csprintf("%-10s (inst 0x%x, opcode 0x%x, binary:%s)",
-                        "unknown", machInst, OPCODE, inst2string(machInst));
+                        "unknown", machInst, PO, inst2string(machInst));
     }
 }};
 
@@ -73,7 +73,7 @@ output exec {{
     {
         panic("attempt to execute unknown instruction at %#x"
               "(inst 0x%08x, opcode 0x%x, binary: %s)",
-              xc->pcState().pc(), machInst, OPCODE, inst2string(machInst));
+              xc->pcState().pc(), machInst, PO, inst2string(machInst));
         return std::make_shared<UnimplementedOpcodeFault>();
     }
 }};
-- 
2.30.2