From 3d257c287397ae5370209bd05ca55d8e6a6dd3eb Mon Sep 17 00:00:00 2001 From: Tong Shen Date: Wed, 20 Jan 2021 16:11:01 -0800 Subject: [PATCH] arch-x86: implement PSHUFB SSE instruction. Change-Id: I9398f9ecb26b6aabf4015e0e285fdc2f4c2487dd Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/39495 Reviewed-by: Tong Shen Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- .../isa/decoder/three_byte_0f38_opcodes.isa | 2 +- .../integer/data_reordering/shuffle.py | 30 ++++++++++++++++++- src/arch/x86/isa/microops/mediaop.isa | 15 ++++++++-- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa b/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa index 3165eb708..0f4330bf7 100644 --- a/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa +++ b/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa @@ -31,7 +31,7 @@ 'X86ISA::ThreeByte0F38Opcode': decode LEGACY_OP { format WarnUnimpl { 1: decode OPCODE_OP { - 0x00: pshufb_Vdq_Wdq(); + 0x00: Inst::PSHUFB(Vo, Wo); 0x01: phaddw_Vdq_Wdq(); 0x02: phaddd_Vdq_Wdq(); 0x03: phaddsw_Vdq_Wdq(); diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py index 6651d8758..4187c4f59 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py @@ -84,4 +84,32 @@ def macroop PSHUFLW_XMM_P_I { ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 shuffle xmml, ufp1, ufp1, size=2, ext=imm }; -''' + +def macroop PSHUFB_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + movfp ufp2, xmmhm, dataSize=8 + shuffle ufp1, xmml, xmmh, size=1, ext=0 + shuffle ufp2, xmml, xmmh, size=1, ext=0 + movfp xmml, ufp1, dataSize=8 + movfp xmmh, ufp2, dataSize=8 +}; + +def macroop PSHUFB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + shuffle ufp1, xmml, xmmh, size=1, ext=0 + shuffle ufp2, xmml, xmmh, size=1, ext=0 + movfp xmml, ufp1, dataSize=8 + movfp xmmh, ufp2, dataSize=8 +}; + +def macroop PSHUFB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + shuffle ufp1, xmml, xmmh, size=1, ext=0 + shuffle ufp2, xmml, xmmh, size=1, ext=0 + movfp xmml, ufp1, dataSize=8 + movfp xmmh, ufp2, dataSize=8 +}; +''' \ No newline at end of file diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa index bf5fc6766..7e5fd1011 100644 --- a/src/arch/x86/isa/microops/mediaop.isa +++ b/src/arch/x86/isa/microops/mediaop.isa @@ -337,23 +337,32 @@ let {{ if (size == 8) { options = 2; optionBits = 1; + } else if (size == 1) { + options = 16; + optionBits = 8; } else { options = 4; optionBits = 2; } uint64_t result = 0; - uint8_t sel = ext; + // PSHUFB stores shuffle encoding in destination XMM register + // directly (instead of passed in by ext). + uint64_t sel = (size == 1) ? FpDestReg_uqw : ext; for (int i = 0; i < items; i++) { uint64_t resBits; uint8_t lsel = sel & mask(optionBits); - if (lsel * size >= sizeof(double)) { + if (size == 1 && bits(lsel, 7)) { + // PSHUFB sets result byte to zero when highest bit of the + // corresponding shuffle encoding is 1. + resBits = 0; + } else if (lsel * size >= sizeof(double)) { lsel -= options / 2; resBits = bits(FpSrcReg2_uqw, (lsel + 1) * sizeBits - 1, (lsel + 0) * sizeBits); - } else { + } else { resBits = bits(FpSrcReg1_uqw, (lsel + 1) * sizeBits - 1, (lsel + 0) * sizeBits); -- 2.30.2