x86: decode instructions with vex prefix
authorNilay Vaish <nilay@cs.wisc.edu>
Fri, 17 Jul 2015 16:31:22 +0000 (11:31 -0500)
committerNilay Vaish <nilay@cs.wisc.edu>
Fri, 17 Jul 2015 16:31:22 +0000 (11:31 -0500)
This patch updates the x86 decoder so that it can decode instructions with vex
prefix. It also updates the isa with opcodes from vex opcode maps 1, 2 and 3.
Note that none of the instructions have been implemented yet. The
implementations would be provided in due course of time.

src/arch/x86/decoder.cc
src/arch/x86/decoder.hh
src/arch/x86/decoder_tables.cc
src/arch/x86/isa/bitfields.isa
src/arch/x86/isa/decoder/decoder.isa
src/arch/x86/isa/decoder/vex_opcodes.isa [new file with mode: 0644]
src/arch/x86/isa_traits.hh
src/arch/x86/types.cc
src/arch/x86/types.hh

index 59f2e0f4f38bdecbf49c99c5a5ca761b26978800..fb5a4e001434e4fc277b440a8e2f0bf97bbae841 100644 (file)
@@ -48,6 +48,8 @@ Decoder::doResetState()
 
     emi.rex = 0;
     emi.legacy = 0;
+    emi.vex = 0;
+
     emi.opcode.type = BadOpcode;
     emi.opcode.op = 0;
 
@@ -93,6 +95,19 @@ Decoder::process()
           case PrefixState:
             state = doPrefixState(nextByte);
             break;
+
+          case TwoByteVexState:
+            state = doTwoByteVexState(nextByte);
+            break;
+
+          case ThreeByteVexFirstState:
+            state = doThreeByteVexFirstState(nextByte);
+            break;
+
+          case ThreeByteVexSecondState:
+            state = doThreeByteVexSecondState(nextByte);
+            break;
+
           case OneByteOpcodeState:
             state = doOneByteOpcodeState(nextByte);
             break;
@@ -206,15 +221,68 @@ Decoder::doPrefixState(uint8_t nextByte)
         DPRINTF(Decoder, "Found Rex prefix %#x.\n", nextByte);
         emi.rex = nextByte;
         break;
+
+      case Vex2Prefix:
+        DPRINTF(Decoder, "Found VEX two-byte prefix %#x.\n", nextByte);
+        emi.vex.zero = nextByte;
+        nextState = TwoByteVexState;
+        break;
+
+      case Vex3Prefix:
+        DPRINTF(Decoder, "Found VEX three-byte prefix %#x.\n", nextByte);
+        emi.vex.zero = nextByte;
+        nextState = ThreeByteVexFirstState;
+        break;
+
       case 0:
         nextState = OneByteOpcodeState;
         break;
+
       default:
         panic("Unrecognized prefix %#x\n", nextByte);
     }
     return nextState;
 }
 
+Decoder::State
+Decoder::doTwoByteVexState(uint8_t nextByte)
+{
+    assert(emi.vex.zero == 0xc5);
+    consumeByte();
+    TwoByteVex tbe = 0;
+    tbe.first = nextByte;
+
+    emi.vex.first.r = tbe.first.r;
+    emi.vex.first.x = 1;
+    emi.vex.first.b = 1;
+    emi.vex.first.map_select = 1;
+
+    emi.vex.second.w = 0;
+    emi.vex.second.vvvv = tbe.first.vvvv;
+    emi.vex.second.l = tbe.first.l;
+    emi.vex.second.pp = tbe.first.pp;
+
+    emi.opcode.type = Vex;
+    return OneByteOpcodeState;
+}
+
+Decoder::State
+Decoder::doThreeByteVexFirstState(uint8_t nextByte)
+{
+    consumeByte();
+    emi.vex.first = nextByte;
+    return ThreeByteVexSecondState;
+}
+
+Decoder::State
+Decoder::doThreeByteVexSecondState(uint8_t nextByte)
+{
+    consumeByte();
+    emi.vex.second = nextByte;
+    emi.opcode.type = Vex;
+    return OneByteOpcodeState;
+}
+
 // Load the first opcode byte. Determine if there are more opcode bytes, and
 // if not, what immediate and/or ModRM is needed.
 Decoder::State
@@ -222,7 +290,13 @@ Decoder::doOneByteOpcodeState(uint8_t nextByte)
 {
     State nextState = ErrorState;
     consumeByte();
-    if (nextByte == 0x0f) {
+
+    if (emi.vex.zero != 0) {
+        DPRINTF(Decoder, "Found VEX opcode %#x.\n", nextByte);
+        emi.opcode.op = nextByte;
+        const uint8_t opcode_map = emi.vex.first.map_select;
+        nextState = processExtendedOpcode(ImmediateTypeVex[opcode_map]);
+    } else if (nextByte == 0x0f) {
         nextState = TwoByteOpcodeState;
         DPRINTF(Decoder, "Found opcode escape byte %#x.\n", nextByte);
     } else {
@@ -346,6 +420,54 @@ Decoder::processOpcode(ByteTable &immTable, ByteTable &modrmTable,
     return nextState;
 }
 
+Decoder::State
+Decoder::processExtendedOpcode(ByteTable &immTable)
+{
+    //Figure out the effective operand size. This can be overriden to
+    //a fixed value at the decoder level.
+    int logOpSize;
+    if (emi.vex.second.w)
+        logOpSize = 3; // 64 bit operand size
+    else if (emi.vex.second.pp == 1)
+        logOpSize = altOp;
+    else
+        logOpSize = defOp;
+
+    //Set the actual op size
+    emi.opSize = 1 << logOpSize;
+
+    //Figure out the effective address size. This can be overriden to
+    //a fixed value at the decoder level.
+    int logAddrSize;
+    if(emi.legacy.addr)
+        logAddrSize = altAddr;
+    else
+        logAddrSize = defAddr;
+
+    //Set the actual address size
+    emi.addrSize = 1 << logAddrSize;
+
+    //Figure out the effective stack width. This can be overriden to
+    //a fixed value at the decoder level.
+    emi.stackSize = 1 << stack;
+
+    //Figure out how big of an immediate we'll retreive based
+    //on the opcode.
+    const uint8_t opcode = emi.opcode.op;
+
+    if (emi.vex.zero == 0xc5 || emi.vex.zero == 0xc4) {
+        int immType = immTable[opcode];
+        // Assume 64-bit mode;
+        immediateSize = SizeTypeToSize[2][immType];
+    }
+
+    if (opcode == 0x77) {
+        instDone = true;
+        return ResetState;
+    }
+    return ModRMState;
+}
+
 //Get the ModRM byte and determine what displacement, if any, there is.
 //Also determine whether or not to get the SIB byte, displacement, or
 //immediate next.
@@ -353,8 +475,7 @@ Decoder::State
 Decoder::doModRMState(uint8_t nextByte)
 {
     State nextState = ErrorState;
-    ModRM modRM;
-    modRM = nextByte;
+    ModRM modRM = nextByte;
     DPRINTF(Decoder, "Found modrm byte %#x.\n", nextByte);
     if (defOp == 1) {
         //figure out 16 bit displacement size
index a37fccfb1308d2122314ddd734b7fbbdcedc8d66..6cd0c6199a9d73c9aa5beffb1665b1d947770a9d 100644 (file)
@@ -64,6 +64,7 @@ class Decoder
     static ByteTable ImmediateTypeTwoByte;
     static ByteTable ImmediateTypeThreeByte0F38;
     static ByteTable ImmediateTypeThreeByte0F3A;
+    static ByteTable ImmediateTypeVex[10];
 
   protected:
     struct InstBytes
@@ -175,6 +176,9 @@ class Decoder
         ResetState,
         FromCacheState,
         PrefixState,
+        TwoByteVexState,
+        ThreeByteVexFirstState,
+        ThreeByteVexSecondState,
         OneByteOpcodeState,
         TwoByteOpcodeState,
         ThreeByte0F38OpcodeState,
@@ -193,6 +197,9 @@ class Decoder
     State doResetState();
     State doFromCacheState();
     State doPrefixState(uint8_t);
+    State doTwoByteVexState(uint8_t);
+    State doThreeByteVexFirstState(uint8_t);
+    State doThreeByteVexSecondState(uint8_t);
     State doOneByteOpcodeState(uint8_t);
     State doTwoByteOpcodeState(uint8_t);
     State doThreeByte0F38OpcodeState(uint8_t);
@@ -205,6 +212,8 @@ class Decoder
     //Process the actual opcode found earlier, using the supplied tables.
     State processOpcode(ByteTable &immTable, ByteTable &modrmTable,
                         bool addrSizedImm = false);
+    // Process the opcode found with VEX / XOP prefix.
+    State processExtendedOpcode(ByteTable &immTable);
 
   protected:
     /// Caching for decoded instruction objects.
index d5fcceef899b5ff5e3d93b39069d6cd252c4916b..58160756c5c767ebda9fe86d86188de1d085339a 100644 (file)
@@ -55,6 +55,8 @@ namespace X86ISA
     const uint8_t RE = Rep;
     const uint8_t RN = Repne;
     const uint8_t RX = RexPrefix;
+    const uint8_t V2 = Vex2Prefix;
+    const uint8_t V3 = Vex3Prefix;
 
     //This table identifies whether a byte is a prefix, and if it is,
     //which prefix it is.
@@ -73,7 +75,7 @@ namespace X86ISA
 /*   9*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
 /*   A*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
 /*   B*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
-/*   C*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
+/*   C*/ 0 , 0 , 0 , 0 , V3, V2, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
 /*   D*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
 /*   E*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
 /*   F*/ LO, 0 , RN, RE, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
@@ -282,4 +284,74 @@ namespace X86ISA
 /*  E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
 /*  F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
     };
+
+    const Decoder::ByteTable Decoder::ImmediateTypeVex[10] =
+    {
+        // Table for opcode map 1
+        {
+            //LSB
+    // MSB   0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
+    /*  0 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  1 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  2 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  3 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  4 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  6 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  7 */ BY, BY, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  8 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  A */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  B */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  C */ 0 , 0 , BY, 0 , BY, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  D */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
+        },
+
+        // Table for opcode map 2
+        {
+            //LSB
+    // MSB   0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
+    /*  0 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  1 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  2 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  3 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  4 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  6 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  7 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  8 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  A */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  B */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  C */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  D */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
+        },
+
+        // Table for opcode map 3
+        {
+            //LSB
+    // MSB   0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
+    /*  0 */ 0 , 0 , 0 , 0 , BY, BY, BY, 0 , BY, BY, BY, BY, BY, BY, BY, BY,
+    /*  1 */ 0 , 0 , 0 , 0 , BY, BY, BY, BY, BY, BY, 0 , 0 , 0 , BY, 0 , 0 ,
+    /*  2 */ BY, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  3 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  4 */ BY, BY, BY, 0 , BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  6 */ BY, BY, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  7 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  8 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  A */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  B */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  C */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  D */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , BY,
+    /*  E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
+    /*  F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
+        },
+        {}, {}, {}, {}, {}, {}, {}
+    };
 }
index e2751a8efcb14a92d8a8222e9d80bc58e3ec0bc8..b5121f4e36a8453ce9e8a6b0928aa07b7dc02c43 100644 (file)
@@ -87,3 +87,12 @@ def bitfield STACKSIZE stackSize;
 def bitfield MODE mode;
 def bitfield MODE_MODE mode.mode;
 def bitfield MODE_SUBMODE mode.submode;
+
+def bitfield VEX_R vex.first.r;
+def bitfield VEX_X vex.first.x;
+def bitfield VEX_B vex.first.b;
+def bitfield VEX_MAP vex.first.map_select;
+def bitfield VEX_W vex.second.w;
+def bitfield VEX_VVVV vex.second.vvvv;
+def bitfield VEX_L vex.second.l;
+def bitfield VEX_PP vex.second.pp;
index eaa579817ab11650514e9a154e48bfd3cf38ddf9..07006fe1adeacf1f2c2bc19ac8bf035d7a7d6679 100644 (file)
@@ -49,6 +49,7 @@ decode LEGACY_LOCK default Unknown::unknown()
         ##include "two_byte_opcodes.isa"
         ##include "three_byte_0f38_opcodes.isa"
         ##include "three_byte_0f3a_opcodes.isa"
+        ##include "vex_opcodes.isa"
     }
     //Lock prefix
     ##include "locked_opcodes.isa"
diff --git a/src/arch/x86/isa/decoder/vex_opcodes.isa b/src/arch/x86/isa/decoder/vex_opcodes.isa
new file mode 100644 (file)
index 0000000..0f412fe
--- /dev/null
@@ -0,0 +1,1431 @@
+// Copyright (c) 2015 Mark D. Hill and David A. Wood
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Nilay Vaish
+
+////////////////////////////////////////////////////////////////////
+//
+// Decode the opcodes with vex prefix.
+//
+format WarnUnimpl {
+    'X86ISA::Vex': decode VEX_MAP {
+        0x01: decode OPCODE_OP_TOP5 {
+            0x02: decode VEX_PP {
+                0x0: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_VVVV {
+                        0x15: vmovups();
+                        default: Inst::UD2();
+                    }
+                    0x1: decode VEX_VVVV {
+                        0x15: vmovups();
+                        default: Inst::UD2();
+                    }
+                    0x2: decode VEX_L {
+                        0x0: decode MODRM_MOD {
+                            0x03: vmovhlps();
+                            default: decode VEX_VVVV {
+                                0x15: vmovlps();
+                                default: Inst::UD2();
+                            }
+                        }
+                        default: Inst::UD2();
+                    }
+                    0x3: decode VEX_VVVV {
+                        0x15: decode VEX_L {
+                            0x0: vmovlps();
+                            default: Inst::UD2();
+                        }
+                        default: Inst::UD2();
+                    }
+                    0x4: vunpcklps();
+                    0x5: vunpckhps();
+                    0x6: decode VEX_L {
+                        0x0: decode MODRM_MOD {
+                            0x03: vmovlhps();
+                            0x0: vmovhps();
+                        }
+                        default: Inst::UD2();
+                    }
+                    0x7: decode VEX_L {
+                        0x0: vmovhps();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_VVVV {
+                        0x15: vmovupd();
+                        default: Inst::UD2();
+                    }
+                    0x1: decode VEX_VVVV {
+                        0x15: vmovupd();
+                        default: Inst::UD2();
+                    }
+                    0x2: decode VEX_L {
+                        0x0: vmovlpd();
+                        default: Inst::UD2();
+                    }
+                    0x3: decode VEX_L {
+                        0x0: vmovlpd();
+                        default: Inst::UD2();
+                    }
+                    0x4: vunpcklpd();
+                    0x5: vunpckhpd();
+                    0x6: decode VEX_L {
+                        0x0: vmovhpd();
+                        default: Inst::UD2();
+                    }
+                    0x7: decode VEX_L {
+                        0x0: vmovhpd();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                0x2: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode MODRM_MOD {
+                        0x03: vmovss();
+                        default: vmovss();
+                    }
+                    0x1: decode MODRM_MOD {
+                        0x03: vmovss();
+                        default: vmovss();
+                    }
+                    0x2: decode VEX_VVVV {
+                        0x15: vmovsldup();
+                        default: Inst::UD2();
+                    }
+                    0x6: decode VEX_VVVV {
+                        0x15: vmovshdup();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                0x3: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode MODRM_MOD {
+                        0x03: vmovsd();
+                        default: vmovsd();
+                    }
+                    0x1: decode MODRM_MOD {
+                        0x03: vmovsd();
+                        default: vmovsd();
+                    }
+                    0x2: decode VEX_VVVV {
+                        0x15: decode VEX_L {
+                            0x0: vmovddup();
+                            default: vmovddup();
+                        }
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x0A: decode VEX_PP {
+                0x0: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_VVVV {
+                        0x15: vmovmskps();
+                        default: Inst::UD2();
+                    }
+                    0x1: decode VEX_VVVV {
+                        0x015: vsqrtps();
+                        default: Inst::UD2();
+                    }
+                    0x2: decode VEX_VVVV {
+                        0x15: vrsqrtps();
+                        default: Inst::UD2();
+                    }
+                    0x3: decode VEX_VVVV {
+                        0x15: vrcpps();
+                        default: Inst::UD2();
+                    }
+                    0x4: vandps();
+                    0x5: vandnps();
+                    0x6: vorps();
+                    0x7: vxorps();
+                    default: Inst::UD2();
+                }
+
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_VVVV {
+                        0x15: vmovmskpd();
+                        default: Inst::UD2();
+                    }
+                    0x1: decode VEX_VVVV {
+                        0x15: vsqrtpd();
+                        default: Inst::UD2();
+                    }
+                    0x4: vandpd();
+                    0x5: vandnpd();
+                    0x6: vorpd();
+                    0x7: vxorpd();
+                    default: Inst::UD2();
+                }
+
+                0x2: decode OPCODE_OP_BOTTOM3 {
+                    0x1: vsqrtss();
+                    0x2: vrsqrtss();
+                    0x3: vrcpss();
+                    default: Inst::UD2();
+                }
+
+                0x3: decode OPCODE_OP_BOTTOM3 {
+                    0x1: vsqrtsd();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x0C: decode VEX_PP {
+                0x1: decode VEX_L {
+                    0x0: decode OPCODE_OP_BOTTOM3 {
+                        0x0: vpunpcklbw();
+                        0x1: vpunpcklwd();
+                        0x2: vpunpckldq();
+                        0x3: vpacksswb();
+                        0x4: vpcmpgtb();
+                        0x5: vpcmpgtw();
+                        0x6: vpcmpgtd();
+                        0x7: vpackuswb();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+                default: Inst::UD2();
+            }
+
+            0x0E: decode VEX_PP {
+                0x0: decode OPCODE_OP_BOTTOM3 {
+                    0x7: decode VEX_L {
+                        0x0: vzeroupper();
+                        0x1: vzeroall();
+                        default: Inst::UD2();
+                    }
+
+                    default: Inst::UD2();
+                }
+
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_VVVV {
+                        0x15: vpshufd();
+                        default: Inst::UD2();
+                    }
+                    0x1: decode VEX_L {
+                        0x0: decode MODRM_REG {
+                            0x2: vpsrlw();
+                            0x4: vpsraw();
+                            0x6: vpsllw();
+                            default: Inst::UD2();
+                        }
+                        default: Inst::UD2();
+                    }
+
+                    0x2: decode VEX_L {
+                        0x0: decode MODRM_REG {
+                            0x2: vpsrld();
+                            0x4: vpsrad();
+                            0x6: vpslld();
+                            default: Inst::UD2();
+                        }
+                        default: Inst::UD2();
+                    }
+
+                    0x3: decode VEX_L {
+                        0x0: decode MODRM_REG {
+                            0x2: vpsrlq();
+                            0x3: vpsrldq();
+                            0x6: vpsllq();
+                            0x7: vpslldq();
+                            default: Inst::UD2();
+                        }
+                        default: Inst::UD2();
+                    }
+
+                    0x4: decode VEX_L {
+                        0x0: vpcmpeqb();
+                        default: Inst::UD2();
+                    }
+
+                    0x5: decode VEX_L {
+                        0x0: vpcmpeqw();
+                        default: Inst::UD2();
+                    }
+                    0x6: decode VEX_L {
+                        0x0: vpcmpeqd();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+                0x2: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_VVVV {
+                        0x15: decode VEX_L {
+                            0x0: vpshufhw();
+                            default: Inst::UD2();
+                        }
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+                0x3: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_VVVV {
+                        0x15: decode VEX_L {
+                            0x0: vpshuflw();
+                            default: Inst::UD2();
+                        }
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+                default: Inst::UD2();
+            }
+
+            0x18: decode VEX_PP {
+                0x0: decode OPCODE_OP_BOTTOM3 {
+                    0x2: vcmpccps();
+                    0x6: vshufps();
+                    default: Inst::UD2();
+                }
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x2: vcmpccpd();
+                    0x4: decode MODRM_MOD {
+                        0x03: vpinsrw();
+                        default: vpinsrw();
+                    }
+                    0x5: decode VEX_VVVV {
+                        0x15: decode VEX_L {
+                            0x0: vpextrw();
+                            default: Inst::UD2();
+                        }
+                        default: Inst::UD2();
+                    }
+                    0x6: vshufpd();
+                    default: Inst::UD2();
+                }
+                0x2: decode OPCODE_OP_BOTTOM3 {
+                    0x2: vcmpccss();
+                    default: Inst::UD2();
+                }
+                0x3: decode OPCODE_OP_BOTTOM3 {
+                    0x2: vcmpccsd();
+                    default: Inst::UD2();
+                }
+                default: Inst::UD2();
+            }
+
+            0x1A: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vaddsubpd();
+                    0x1: decode VEX_L {
+                        0x0: vpsrlw();
+                        default: Inst::UD2();
+                    }
+                    0x2: decode VEX_L {
+                        0x0: vpsrld();
+                        default: Inst::UD2();
+                    }
+                    0x3: decode VEX_L {
+                        0x0: vpsrlq();
+                        default: Inst::UD2();
+                    }
+                    0x4: decode VEX_L {
+                        0x0: vpaddq();
+                        default: Inst::UD2();
+                    }
+                    0x5: decode VEX_L {
+                        0x0: vpmullw();
+                        default: Inst::UD2();
+                    }
+                    0x6: decode VEX_VVVV {
+                        0x15: decode VEX_L {
+                            0x0: vmovq();
+                            default: Inst::UD2();
+                        }
+                        default: Inst::UD2();
+                    }
+                    0x7: decode VEX_VVVV {
+                        0x15: vpmovmskb();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+                0x3: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vaddsubps();
+                    default: Inst::UD2();
+                }
+                default: Inst::UD2();
+            }
+
+            0x1C: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_L {
+                        0x0: vpavgb();
+                        default: Inst::UD2();
+                    }
+                    0x1: vpsraw();
+                    0x2: vpsrad();
+                    0x3: decode VEX_L {
+                        0x0: vpavgw();
+                        default: Inst::UD2();
+                    }
+                    0x4: vpmulhuw();
+                    0x5: vpmulhw();
+                    0x6: vcvttpd2dq();
+                    0x7: decode VEX_VVVV {
+                        0x015: decode VEX_L {
+                            0x0: vmovntdq();
+                            default: vmovntdq();
+                        }
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                0x2: decode OPCODE_OP_BOTTOM3 {
+                    0x6: vcvtdq2pd();
+                    default: Inst::UD2();
+                }
+
+                0x3: decode OPCODE_OP_BOTTOM3 {
+                    0x6: vcvtpd2dq();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x1E: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x1: vpsllw();
+                    0x2: vpslld();
+                    0x3: vpsllq();
+                    0x4: vpmuludq();
+                    0x5: vpmaddwd();
+                    0x6: vpsadbw();
+                    0x7: decode VEX_L {
+                        0x0: vmaskmovdqu();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                0x3: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_L {
+                        0x0: vlddqu();
+                        default: vlddqu();
+                    }
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x05: decode VEX_PP {
+                0x0: decode VEX_VVVV {
+                    0x15: decode OPCODE_OP_BOTTOM3 {
+                        0x0: vmovaps();
+                        0x1: vmovaps();
+                        0x3: vmovntps();
+                        0x6: vucomiss();
+                        0x7: vcomiss();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                0x1: decode VEX_VVVV {
+                    0x15: decode OPCODE_OP_BOTTOM3 {
+                        0x0: vmovapd();
+                        0x1: vmovapd();
+                        0x3: vmovntpd();
+                        0x6: vucomisd();
+                        0x7: vcomisd();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                0x2: decode OPCODE_OP_BOTTOM3 {
+                    0x2: vcvtsi2ss();
+                    0x4: vcvttss2si();
+                    0x5: vcvtss2si();
+                    default: Inst::UD2();
+                }
+
+                0x3: decode OPCODE_OP_BOTTOM3 {
+                    0x2: vcvtsi2sd();
+                    0x4: vcvttsd2si();
+                    0x5: vcvtsd2si();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x0B: decode VEX_PP {
+                0x0: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vaddps();
+                    0x1: vmulps();
+                    0x2: vcvtps2pd();
+                    0x3: vcvtdq2ps();
+                    0x4: vsubps();
+                    0x5: vminps();
+                    0x6: vdivps();
+                    0x7: vmaxps();
+                    default: Inst::UD2();
+                }
+
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vaddpd();
+                    0x1: vmulpd();
+                    0x2: vcvtpd2ps();
+                    0x3: vcvtps2dq();
+                    0x4: vsubpd();
+                    0x5: vminpd();
+                    0x6: vdivpd();
+                    0x7: vmaxpd();
+                    default: Inst::UD2();
+                }
+
+                0x2: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vaddss();
+                    0x1: vmulss();
+                    0x2: vcvtss2sd();
+                    0x3: vcvttps2dq();
+                    0x4: vsubss();
+                    0x5: vminss();
+                    0x6: vdivss();
+                    0x7: vmaxss();
+                    default: Inst::UD2();
+                }
+
+                0x3: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vaddsd();
+                    0x1: vmulsd();
+                    0x2: vcvtsd2ss();
+                    0x4: vsubsd();
+                    0x5: vminsd();
+                    0x6: vdivsd();
+                    0x7: vmaxsd();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x0D: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpunpckhbw();
+                    0x1: vpunpckhbd();
+                    0x2: vpunpckhdq();
+                    0x3: decode VEX_L {
+                        0x0: vpackssdw();
+                        default: Inst::UD2();
+                    }
+                    0x4: vpunpcklqdq();
+                    0x5: vpunpckhqdq();
+                    0x6: decode VEX_L {
+                        0x0: vmovdvmovq();
+                        default: Inst::UD2();
+                    }
+                    0x7: decode VEX_VVVV {
+                        0x15: vmovdqa();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                0x2: decode OPCODE_OP_BOTTOM3 {
+                    0x7: vmovdqu();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x0F: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x4: vhaddpd();
+                    0x5: vhsubpd();
+                    0x6: decode VEX_L {
+                        0x1: vmovdvmovq();
+                        default: Inst::UD2();
+                    }
+                    0x7: decode VEX_VVVV {
+                        0x15: vmovdqa();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                0x2: decode OPCODE_OP_BOTTOM3 {
+                    0x6: decode VEX_L {
+                        0x0: vmovq();
+                        default: Inst::UD2();
+                    }
+                    0x7: vmovdqu();
+                    default: Inst::UD2();
+                }
+
+                0x3: decode OPCODE_OP_BOTTOM3 {
+                    0x4: vhaddps();
+                    0x5: vhsubps();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x15: decode VEX_PP {
+                0x0: decode OPCODE_OP_BOTTOM3 {
+                    0x6: decode MODRM_REG {
+                        0x2: vldmxcsr();
+                        0x3: vstmxcsr();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+                default: Inst::UD2();
+            }
+
+            0x1B: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpsubusb();
+                    0x1: vpsubusw();
+                    0x2: vpminub();
+                    0x3: decode VEX_L {
+                        0x0: vpand();
+                        default: Inst::UD2();
+                    }
+                    0x4: vpaddusb();
+                    0x5: decode VEX_L {
+                        0x0: vpaddusw();
+                        default: Inst::UD2();
+                    }
+                    0x6: vpmaxub();
+                    0x7: decode VEX_L {
+                        0x0: vpandn();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x1D: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpsubsb();
+                    0x1: vpsubsw();
+                    0x2: vpminsw();
+                    0x3: vpor();
+                    0x4: decode VEX_L {
+                        0x0: vpaddsb();
+                        default: Inst::UD2();
+                    }
+                    0x5: decode VEX_L {
+                        0x0: vpaddsw();
+                        default: Inst::UD2();
+                    }
+                    0x6: vpmaxsw();
+                    0x7: vpxor();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x1F: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpsubb();
+                    0x1: vpsubw();
+                    0x2: vpsubd();
+                    0x3: vpsubq();
+                    0x4: vpaddb();
+                    0x5: decode VEX_L {
+                        0x0: vpaddw();
+                        default: Inst::UD2();
+                    }
+                    0x6: decode VEX_L {
+                        0x0: vpaddd();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            default: Inst::UD2();
+        }
+
+        0x02: decode OPCODE_OP_TOP5 {
+            0x00: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpshufb();
+                    0x1: vphaddw();
+                    0x2: vphaddd();
+                    0x3: vphaddsw();
+                    0x4: vpmaddubsw();
+                    0x5: vphsubw();
+                    0x6: vphsubd();
+                    0x7: vphsubsw();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x02: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x3: vcvtph2ps();
+                    0x7: vptest();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x04: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpmovsxbw();
+                    0x1: vpmovsxbd();
+                    0x2: vpmovsxbq();
+                    0x3: vpmovsxwd();
+                    0x4: vpmovsxwq();
+                    0x5: vpmovsxdq();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x06: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpmovzxbw();
+                    0x1: vpmovzxbd();
+                    0x2: vpmovzxbq();
+                    0x3: vpmovzxwd();
+                    0x4: vpmovzxwq();
+                    0x5: vpmovzxdq();
+                    0x7: vpcmpgtq();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x08: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpmulld();
+                    0x1: vphminposuw();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x12: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x6: decode VEX_W {
+                        0x0: vfmaddsub132ps();
+                        0x1: vfmaddsub132pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x7: decode VEX_W {
+                        0x0: vfmsubadd132ps();
+                        0x1: vfmaddsub132pd();
+                        default: Inst::UD2();
+                    }
+
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x14: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x6: decode VEX_W {
+                        0x0: vfmaddsub213ps();
+                        0x1: vfmaddsub213pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x7: decode VEX_W {
+                        0x0: vfmsubadd213ps();
+                        0x1: vfmaddsub213pd();
+                        default: Inst::UD2();
+                    }
+
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x16: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x6: decode VEX_W {
+                        0x0: vfmaddsub231ps();
+                        0x1: vfmaddsub231pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x7: decode VEX_W {
+                        0x0: vfmsubadd231ps();
+                        0x1: vfmaddsub231pd();
+                        default: Inst::UD2();
+                    }
+
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x1E: decode VEX_PP {
+                0x0: decode OPCODE_OP_BOTTOM3 {
+                    0x2: andn();
+                    0x3: decode MODRM_REG {
+                        0x1: blsr();
+                        0x2: blsmsk();
+                        0x3: blsi();
+                        default: Inst::UD2();
+                    }
+
+                    0x7: bextr();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x01: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpsignb();
+                    0x1: vpsignw();
+                    0x2: vpsignd();
+                    0x3: vpmulhrsw();
+                    0x4: vpermilps();
+                    0x5: vpermilpd();
+                    0x6: vtestps();
+                    0x7: vtestpd();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x03: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vbroadcastss();
+                    0x1: decode VEX_L {
+                        0x1: vbroadcastsd();
+                        default: Inst::UD2();
+                    }
+
+                    0x2: decode VEX_L {
+                        0x1: vbroadcastF128();
+                        default: Inst::UD2();
+                    }
+
+                    0x4: decode VEX_L {
+                        0x0: vpabsb();
+                        default: Inst::UD2();
+                    }
+                    0x5: decode VEX_L {
+                        0x0: vpabsw();
+                        default: Inst::UD2();
+                    }
+                    0x6: decode VEX_L {
+                        0x0: vpabsd();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x05: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpmuldq();
+                    0x1: vpcmpeqq();
+                    0x2: decode VEX_VVVV {
+                        0x15: vmovntdqa();
+                        default: Inst::UD2();
+                    }
+                    0x3: decode VEX_L {
+                        0x0: vpackusdw();
+                        default: Inst::UD2();
+                    }
+                    0x4: vmaskmovps();
+                    0x5: vmaskmovpd();
+                    0x6: vmaskmovps();
+                    0x7: vmaskmovpd();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x07: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpminsb();
+                    0x1: vpminsd();
+                    0x2: vpminuw();
+                    0x3: vpminud();
+                    0x4: vpmaxsb();
+                    0x5: vpmaxsd();
+                    0x6: vpmaxuw();
+                    0x7: vpmaxud();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x0B: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x2: vbroadcasti128();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x13: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_W {
+                        0x0: vfmadd132ps();
+                        0x1: vfmadd132pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x1: decode VEX_W {
+                        0x0: vfmadd132ss();
+                        0x1: vfmadd132sd();
+                        default: Inst::UD2();
+                    }
+
+                    0x2: decode VEX_W {
+                        0x0: vfmsub132ps();
+                        0x1: vfmsub132pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x3: decode VEX_W {
+                        0x0: vfmsub132ss();
+                        0x1: vfmsub132sd();
+                        default: Inst::UD2();
+                    }
+
+                    0x4: decode VEX_W {
+                        0x0: vfnmadd132ps();
+                        0x1: vfnmadd132pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x5: decode VEX_W {
+                        0x0: vfnmadd132ss();
+                        0x1: vfnmadd132sd();
+                        default: Inst::UD2();
+                    }
+
+                    0x6: decode VEX_W {
+                        0x0: vfnsub132ps();
+                        0x1: vfnsub132pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x7: decode VEX_W {
+                        0x0: vfnsub132ss();
+                        0x1: vfnsub132sd();
+                        default: Inst::UD2();
+                    }
+
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x15: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_W {
+                        0x0: vfmadd213ps();
+                        0x1: vfmadd213pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x1: decode VEX_W {
+                        0x0: vfmadd213ss();
+                        0x1: vfmadd213sd();
+                        default: Inst::UD2();
+                    }
+
+                    0x2: decode VEX_W {
+                        0x0: vfmsub213ps();
+                        0x1: vfmsub213pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x3: decode VEX_W {
+                        0x0: vfmsub213ss();
+                        0x1: vfmsub213sd();
+                        default: Inst::UD2();
+                    }
+
+                    0x4: decode VEX_W {
+                        0x0: vfnmadd213ps();
+                        0x1: vfnmadd213pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x5: decode VEX_W {
+                        0x0: vfnmadd213ss();
+                        0x1: vfnmadd213sd();
+                        default: Inst::UD2();
+                    }
+
+                    0x6: decode VEX_W {
+                        0x0: vfnsub213ps();
+                        0x1: vfnsub213pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x7: decode VEX_W {
+                        0x0: vfnsub213ss();
+                        0x1: vfnsub213sd();
+                        default: Inst::UD2();
+                    }
+
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x17: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_W {
+                        0x0: vfmadd231ps();
+                        0x1: vfmadd231pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x1: decode VEX_W {
+                        0x0: vfmadd231ss();
+                        0x1: vfmadd231sd();
+                        default: Inst::UD2();
+                    }
+
+                    0x2: decode VEX_W {
+                        0x0: vfmsub231ps();
+                        0x1: vfmsub231pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x3: decode VEX_W {
+                        0x0: vfmsub231ss();
+                        0x1: vfmsub231sd();
+                        default: Inst::UD2();
+                    }
+
+                    0x4: decode VEX_W {
+                        0x0: vfnmadd231ps();
+                        0x1: vfnmadd231pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x5: decode VEX_W {
+                        0x0: vfnmadd231ss();
+                        0x1: vfnmadd231sd();
+                        default: Inst::UD2();
+                    }
+
+                    0x6: decode VEX_W {
+                        0x0: vfnsub231ps();
+                        0x1: vfnsub231pd();
+                        default: Inst::UD2();
+                    }
+
+                    0x7: decode VEX_W {
+                        0x0: vfnsub231ss();
+                        0x1: vfnsub231sd();
+                        default: Inst::UD2();
+                    }
+
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x1B: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x3: vaesimc();
+                    0x4: vaesenc();
+                    0x5: vaesenclast();
+                    0x6: vaesdec();
+                    0x7: vaesdeclast();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            default: Inst::UD2();
+        }
+
+        0x03: decode OPCODE_OP_TOP5 {
+            0x00: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x4: vpermilps();
+                    0x5: vpermilpd();
+                    0x6: vperm2f128();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x02: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x4: decode MODRM_MOD {
+                        0x03: vpextrb();
+                        default: vpextrb();
+                    }
+
+                    0x5: decode VEX_VVVV {
+                        0x15: decode VEX_L {
+                            0x0: decode MODRM_MOD {
+                                0x03: vpextrw();
+                                default: vpextrw();
+                            }
+                            default: Inst::UD2();
+                        }
+                        default: Inst::UD2();
+                    }
+                    0x6: decode VEX_W {
+                        0x0: vpextrd();
+                        0x1: vpextrq();
+                        default: Inst::UD2();
+                    }
+                    0x7: decode MODRM_MOD {
+                        0x03: vextractps();
+                        default: vextractps();
+                    }
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x04: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode MODRM_MOD {
+                        0x03: vpinsrb();
+                        default: vpinsrb();
+                    }
+                    0x1: decode MODRM_MOD {
+                        0x03: vinsertps();
+                        default: vinsertps();
+                    }
+                    0x2: decode VEX_W {
+                        0x0: vpinsrd();
+                        0x1: vpinsrq();
+                    }
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x08: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vdpps();
+                    0x1: vdppd();
+                    0x2: decode VEX_L {
+                        0x0: vmpsadbw();
+                        default: Inst::UD2();
+                    }
+                    0x4: vpclmulqdq();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x0C: decode VEX_PP {
+                0x1: decode VEX_L {
+                    0x0: decode OPCODE_OP_BOTTOM3 {
+                        0x0: decode VEX_VVVV {
+                            0x15: vpcmpestrm();
+                            default: Inst::UD2();
+                        }
+                        0x1: decode VEX_VVVV {
+                            0x15: vpcmpestri();
+                            default: Inst::UD2();
+                        }
+                        0x2: vpcmpistrm();
+                        0x3: vpcmpistri();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x01: decode VEX_PP {
+                0x0: decode OPCODE_OP_BOTTOM3 {
+                    0x7: palignr();
+                    default: Inst::UD2();
+                }
+
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_VVVV {
+                        0x15: vroundps();
+                        default: Inst::UD2();
+                    }
+                    0x1: decode VEX_VVVV {
+                        0x15: vroundpd();
+                        default: Inst::UD2();
+                    }
+                    0x2: vroundss();
+                    0x3: vroundsd();
+                    0x4: vblendps();
+                    0x5: vblendpd();
+                    0x6: decode VEX_L {
+                        0x0: vpblendw();
+                        default: Inst::UD2();
+                    }
+                    0x7: decode VEX_L {
+                        0x0: vpalignr();
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x03: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vinsertf128();
+                    0x1: vextractf128();
+                    0x5: vcvtps2ph();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x09: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: vpermil2ps();
+                    0x1: vpermil2pd();
+                    0x2: vblendvps();
+                    0x3: vblendvpd();
+                    0x4: decode VEX_L {
+                        0x0: decode VEX_W {
+                            0x0: vpblendvb();
+                            default: Inst::UD2();
+                        }
+                        default: Inst::UD2();
+                    }
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x0B: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x4: decode VEX_W {
+                        0x0: vfmaddsubps();
+                        0x1: vfmaddsubps();
+                        default: Inst::UD2();
+                    }
+
+                    0x5: decode VEX_W {
+                        0x0: vfmaddsubpd();
+                        0x1: vfmaddsubpd();
+                        default: Inst::UD2();
+                    }
+
+                    0x6: decode VEX_W {
+                        0x0: vfmsubaddps();
+                        0x1: vfmsubaddps();
+                        default: Inst::UD2();
+                    }
+
+                    0x7: decode VEX_W {
+                        0x0: vfmsubaddpd();
+                        0x1: vfmsubaddpd();
+                        default: Inst::UD2();
+                    }
+
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x0D: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_W {
+                        0x0: vfmaddps();
+                        0x1: vfmaddps();
+                        default: Inst::UD2();
+                    }
+
+                    0x1: decode VEX_W {
+                        0x0: vfmaddpd();
+                        0x1: vfmaddpd();
+                        default: Inst::UD2();
+                    }
+
+                    0x2: decode VEX_W {
+                        0x0: vfmaddss();
+                        0x1: vfmaddss();
+                        default: Inst::UD2();
+                    }
+
+                    0x3: decode VEX_W {
+                        0x0: vfmaddsd();
+                        0x1: vfmaddsd();
+                        default: Inst::UD2();
+                    }
+
+                    0x4: decode VEX_W {
+                        0x0: vfmsubps();
+                        0x1: vfmsubps();
+                        default: Inst::UD2();
+                    }
+
+                    0x5: decode VEX_W {
+                        0x0: vfmsubpd();
+                        0x1: vfmsubpd();
+                        default: Inst::UD2();
+                    }
+
+                    0x6: decode VEX_W {
+                        0x0: vfmsubss();
+                        0x1: vfmsubss();
+                        default: Inst::UD2();
+                    }
+
+                    0x7: decode VEX_W {
+                        0x0: vfmsubsd();
+                        0x1: vfmsubsd();
+                        default: Inst::UD2();
+                    }
+
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x0F: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x0: decode VEX_W {
+                        0x0: vfnmaddps();
+                        0x1: vfnmaddps();
+                        default: Inst::UD2();
+                    }
+
+                    0x1: decode VEX_W {
+                        0x0: vfnmaddpd();
+                        0x1: vfnmaddpd();
+                        default: Inst::UD2();
+                    }
+
+                    0x2: decode VEX_W {
+                        0x0: vfnmaddss();
+                        0x1: vfnmaddss();
+                        default: Inst::UD2();
+                    }
+
+                    0x3: decode VEX_W {
+                        0x0: vfnmaddsd();
+                        0x1: vfnmaddsd();
+                        default: Inst::UD2();
+                    }
+
+                    0x4: decode VEX_W {
+                        0x0: vfnmsubps();
+                        0x1: vfnmsubps();
+                        default: Inst::UD2();
+                    }
+
+                    0x5: decode VEX_W {
+                        0x0: vfnmsubpd();
+                        0x1: vfnmsubpd();
+                        default: Inst::UD2();
+                    }
+
+                    0x6: decode VEX_W {
+                        0x0: vfnmsubss();
+                        0x1: vfnmsubss();
+                        default: Inst::UD2();
+                    }
+
+                    0x7: decode VEX_W {
+                        0x0: vfnmsubsd();
+                        0x1: vfnmsubsd();
+                        default: Inst::UD2();
+                    }
+
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            0x1B: decode VEX_PP {
+                0x1: decode OPCODE_OP_BOTTOM3 {
+                    0x7: vaeskeygenassist();
+                    default: Inst::UD2();
+                }
+
+                default: Inst::UD2();
+            }
+
+            default: Inst::UD2();
+        }
+
+        default: Inst::UD2();
+    }
+}
index 6e5373ca16aa59f0348872ee9fc04b1bfa01afbb..fcdffec1c30871910b242880d61d55c892c35620 100644 (file)
@@ -71,7 +71,8 @@ namespace X86ISA
     const ExtMachInst NoopMachInst M5_VAR_USED = {
         0x0,                            // No legacy prefixes.
         0x0,                            // No rex prefix.
-        { OneByteOpcode, 0x90 },          // One opcode byte, 0x90.
+        0x0,                            // No two / three byte escape sequence
+        { OneByteOpcode, 0x90 },        // One opcode byte, 0x90.
         0x0, 0x0,                       // No modrm or sib.
         0, 0,                           // No immediate or displacement.
         8, 8, 8,                        // All sizes are 8.
index a960205b58fffaf64149cdc589d02883749ce964..59f8b91de8a661c99da9d4148db43286b78d5fb0 100644 (file)
@@ -41,6 +41,7 @@ paramOut(CheckpointOut &cp, const string &name, ExtMachInst const &machInst)
     // Prefixes
     paramOut(cp, name + ".legacy", (uint8_t)machInst.legacy);
     paramOut(cp, name + ".rex", (uint8_t)machInst.rex);
+    paramOut(cp, name + ".vex", (uint32_t)machInst.vex);
 
     // Opcode
     paramOut(cp, name + ".opcode.type", (uint8_t)machInst.opcode.type);
@@ -75,6 +76,10 @@ paramIn(CheckpointIn &cp, const string &name, ExtMachInst &machInst)
     paramIn(cp, name + ".rex", temp8);
     machInst.rex = temp8;
 
+    uint32_t temp32;
+    paramIn(cp, name + ".vex", temp32);
+    machInst.vex = temp32;
+
     // Opcode
     paramIn(cp, name + ".opcode.type", temp8);
     machInst.opcode.type = (OpcodeType)temp8;
index 23d60020b9544e4088b1f45e82c0bdf08f233532..88b000b6e7038335fe7c8b9a94d8803e865b060a 100644 (file)
@@ -67,7 +67,10 @@ namespace X86ISA
         AddressSizeOverride,
         Lock,
         Rep,
-        Repne
+        Repne,
+        Vex2Prefix,
+        Vex3Prefix,
+        XopPrefix,
     };
 
     BitUnion8(LegacyPrefixVector)
@@ -104,12 +107,55 @@ namespace X86ISA
         Bitfield<0> b;
     EndBitUnion(Rex)
 
+    BitUnion(uint32_t, ThreeByteVex)
+        Bitfield<7,0> zero;
+        SubBitUnion(first, 15, 8)
+            // Inverted one-bit extension of ModRM reg field
+            Bitfield<15> r;
+            // Inverted one-bit extension of SIB index field
+            Bitfield<14> x;
+            // Inverted one-bit extension, r/m field or SIB base field
+            Bitfield<13> b;
+            // Opcode map select
+            Bitfield<12, 8> map_select;
+        EndSubBitUnion(first)
+        SubBitUnion(second, 23, 16)
+            // Default operand size override for a general purpose register to
+            // 64-bit size in 64-bit mode; operand configuration specifier for
+            // certain YMM/XMM-based operations.
+            Bitfield<23> w;
+            // Source or destination register selector, in ones' complement
+            // format
+            Bitfield<22, 19>  vvvv;
+            // Vector length specifier
+            Bitfield<18> l;
+            // Implied 66, F2, or F3 opcode extension
+            Bitfield<17, 16> pp;
+        EndSubBitUnion(second)
+    EndBitUnion(ThreeByteVex)
+
+    BitUnion16(TwoByteVex)
+        Bitfield<7,0> zero;
+        SubBitUnion(first, 15, 8)
+            // Inverted one-bit extension of ModRM reg field
+            Bitfield<15> r;
+            // Source or destination register selector, in ones' complement
+            // format
+            Bitfield<14, 11>  vvvv;
+            // Vector length specifier
+            Bitfield<10> l;
+            // Implied 66, F2, or F3 opcode extension
+            Bitfield<9, 8> pp;
+        EndSubBitUnion(first)
+    EndBitUnion(TwoByteVex)
+
     enum OpcodeType {
         BadOpcode,
         OneByteOpcode,
         TwoByteOpcode,
         ThreeByte0F38Opcode,
-        ThreeByte0F3AOpcode
+        ThreeByte0F3AOpcode,
+        Vex,
     };
 
     static inline const char *
@@ -126,6 +172,8 @@ namespace X86ISA
             return "three byte 0f38";
           case ThreeByte0F3AOpcode:
             return "three byte 0f3a";
+          case Vex:
+            return "vex";
           default:
             return "unrecognized!";
         }
@@ -160,6 +208,10 @@ namespace X86ISA
         //Prefixes
         LegacyPrefixVector legacy;
         Rex rex;
+        // We use the following field for encoding both two byte and three byte
+        // escape sequences
+        ThreeByteVex vex;
+
         //This holds all of the bytes of the opcode
         struct
         {
@@ -191,11 +243,13 @@ namespace X86ISA
         operator << (std::ostream & os, const ExtMachInst & emi)
     {
         ccprintf(os, "\n{\n\tleg = %#x,\n\trex = %#x,\n\t"
+                     "vex/xop = %#x,\n\t"
                      "op = {\n\t\ttype = %s,\n\t\top = %#x,\n\t\t},\n\t"
                      "modRM = %#x,\n\tsib = %#x,\n\t"
                      "immediate = %#x,\n\tdisplacement = %#x\n\t"
                      "dispSize = %d}\n",
                      (uint8_t)emi.legacy, (uint8_t)emi.rex,
+                     (uint32_t)emi.vex,
                      opcodeTypeToStr(emi.opcode.type), (uint8_t)emi.opcode.op,
                      (uint8_t)emi.modRM, (uint8_t)emi.sib,
                      emi.immediate, emi.displacement, emi.dispSize);