From c8cad9d389b76d19449d9190122472fa465476e5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 29 Mar 2021 12:06:09 +0200 Subject: [PATCH] x86: VPSADBW's source operands are also commutative In commit 79dec6b7baa2 ("x86-64: optimize certain commutative VEX-encoded insns") I missed the fact that there being subtraction involved here doesn't matter, as absolute differences get summed up. --- gas/ChangeLog | 6 ++++++ gas/testsuite/gas/i386/x86-64-avx-swap-2.d | 4 ++-- gas/testsuite/gas/i386/x86-64-sse2avx.d | 1 + gas/testsuite/gas/i386/x86-64-sse2avx.s | 1 + opcodes/ChangeLog | 6 ++++++ opcodes/i386-opc.tbl | 6 +++--- opcodes/i386-tbl.h | 6 +++--- 7 files changed, 22 insertions(+), 8 deletions(-) diff --git a/gas/ChangeLog b/gas/ChangeLog index 01ab2e8b8c3..f7331343bc4 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,9 @@ +2021-03-29 Jan Beulich + + * testsuite/gas/i386/x86-64-sse2avx.s: Add vpsadbw case. + * testsuite/gas/i386/x86-64-avx-swap-2.d. + testsuite/gas/i386/x86-64-sse2avx.d: Adjust expectations. + 2021-03-29 Jan Beulich * config/tc-i386.c (optimize_encoding): Replace VEX-encoding diff --git a/gas/testsuite/gas/i386/x86-64-avx-swap-2.d b/gas/testsuite/gas/i386/x86-64-avx-swap-2.d index 513e80db0dd..c34c65817e1 100644 --- a/gas/testsuite/gas/i386/x86-64-avx-swap-2.d +++ b/gas/testsuite/gas/i386/x86-64-avx-swap-2.d @@ -69,7 +69,7 @@ Disassembly of section .text: [ ]*[a-f0-9]+: c5 8d f4 d6 vpmuludq %ymm6,%ymm14,%ymm2 [ ]*[a-f0-9]+: c4 c2 4d 28 d6 vpmuldq %ymm14,%ymm6,%ymm2 [ ]*[a-f0-9]+: c5 8d eb d6 vpor %ymm6,%ymm14,%ymm2 -[ ]*[a-f0-9]+: c4 c1 4d f6 d6 vpsadbw %ymm14,%ymm6,%ymm2 +[ ]*[a-f0-9]+: c5 8d f6 d6 vpsadbw %ymm6,%ymm14,%ymm2 [ ]*[a-f0-9]+: c4 c1 4d f8 d6 vpsubb %ymm14,%ymm6,%ymm2 [ ]*[a-f0-9]+: c4 c1 4d f9 d6 vpsubw %ymm14,%ymm6,%ymm2 [ ]*[a-f0-9]+: c4 c1 4d fa d6 vpsubd %ymm14,%ymm6,%ymm2 @@ -211,7 +211,7 @@ Disassembly of section .text: [ ]*[a-f0-9]+: c5 89 f4 d6 vpmuludq %xmm6,%xmm14,%xmm2 [ ]*[a-f0-9]+: c4 c2 49 28 d6 vpmuldq %xmm14,%xmm6,%xmm2 [ ]*[a-f0-9]+: c5 89 eb d6 vpor %xmm6,%xmm14,%xmm2 -[ ]*[a-f0-9]+: c4 c1 49 f6 d6 vpsadbw %xmm14,%xmm6,%xmm2 +[ ]*[a-f0-9]+: c5 89 f6 d6 vpsadbw %xmm6,%xmm14,%xmm2 [ ]*[a-f0-9]+: c4 c1 49 f8 d6 vpsubb %xmm14,%xmm6,%xmm2 [ ]*[a-f0-9]+: c4 c1 49 f9 d6 vpsubw %xmm14,%xmm6,%xmm2 [ ]*[a-f0-9]+: c4 c1 49 fa d6 vpsubd %xmm14,%xmm6,%xmm2 diff --git a/gas/testsuite/gas/i386/x86-64-sse2avx.d b/gas/testsuite/gas/i386/x86-64-sse2avx.d index c904a2b4137..d0e50c53ba8 100644 --- a/gas/testsuite/gas/i386/x86-64-sse2avx.d +++ b/gas/testsuite/gas/i386/x86-64-sse2avx.d @@ -273,6 +273,7 @@ Disassembly of section .text: [ ]*[a-f0-9]+: c5 89 eb f6 vpor %xmm6,%xmm14,%xmm6 [ ]*[a-f0-9]+: c5 c9 eb 31 vpor \(%rcx\),%xmm6,%xmm6 [ ]*[a-f0-9]+: c5 c9 f6 f4 vpsadbw %xmm4,%xmm6,%xmm6 +[ ]*[a-f0-9]+: c5 89 f6 f6 vpsadbw %xmm6,%xmm14,%xmm6 [ ]*[a-f0-9]+: c5 c9 f6 31 vpsadbw \(%rcx\),%xmm6,%xmm6 [ ]*[a-f0-9]+: c4 e2 49 00 f4 vpshufb %xmm4,%xmm6,%xmm6 [ ]*[a-f0-9]+: c4 e2 49 00 31 vpshufb \(%rcx\),%xmm6,%xmm6 diff --git a/gas/testsuite/gas/i386/x86-64-sse2avx.s b/gas/testsuite/gas/i386/x86-64-sse2avx.s index 6b25314b7a3..8a45b1e5224 100644 --- a/gas/testsuite/gas/i386/x86-64-sse2avx.s +++ b/gas/testsuite/gas/i386/x86-64-sse2avx.s @@ -280,6 +280,7 @@ _start: por %xmm14,%xmm6 por (%rcx),%xmm6 psadbw %xmm4,%xmm6 + psadbw %xmm14,%xmm6 psadbw (%rcx),%xmm6 pshufb %xmm4,%xmm6 pshufb (%rcx),%xmm6 diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index bc62fac9e81..1928cf722b7 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,3 +1,9 @@ +2021-03-29 Jan Beulich + + * i386-opc.tbl (psadbw): Add . + (vpsadbw): Add C. + * i386-tbl.h: Re-generate. + 2021-03-29 Jan Beulich * i386-opc.tbl (mmx, sse, sse2, sse3, ssse3, sse41, sse42, aes, diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index fe9f14157a9..51f206c4a19 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -1131,7 +1131,7 @@ prefetcht0, 0xf18, 1, CpuSSE|Cpu3dnowA, Modrm|Anysize|IgnoreSize|No_bSuf|No_wSuf prefetcht1, 0xf18, 2, CpuSSE|Cpu3dnowA, Modrm|Anysize|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoAVX, { BaseIndex } prefetcht2, 0xf18, 3, CpuSSE|Cpu3dnowA, Modrm|Anysize|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoAVX, { BaseIndex } psadbw, 0xff6, None, CpuSSE|Cpu3dnowA, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|NoAVX, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX } -psadbw, 0x660ff6, None, , Modrm|||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } +psadbw, 0x660ff6, None, , Modrm||||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } pshufw, 0xf70, None, CpuSSE|Cpu3dnowA, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|NoAVX, { Imm8, Qword|Unspecified|BaseIndex|RegMMX, RegMMX } rcpps, 0x0f53, None, , Modrm||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } rcpss, 0xf30f53, None, , Modrm|||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM } @@ -1703,7 +1703,7 @@ vpmulld, 0x6640, None, CpuAVX, Modrm|Vex|Space0F38|VexVVVV=1|VexWIG|No_bSuf|No_w vpmullw, 0x66d5, None, CpuAVX, Modrm|C|Vex|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM } vpmuludq, 0x66f4, None, CpuAVX, Modrm|C|Vex|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM } vpor, 0x66eb, None, CpuAVX, Modrm|C|Vex|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM } -vpsadbw, 0x66f6, None, CpuAVX, Modrm|Vex|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM } +vpsadbw, 0x66f6, None, CpuAVX, Modrm|Vex|Space0F|VexVVVV=1|VexWIG|C|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM } vpshufb, 0x6600, None, CpuAVX, Modrm|Vex|Space0F38|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM } vpshufd, 0x6670, None, CpuAVX, Modrm|Vex|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM } vpshufhw, 0xf370, None, CpuAVX, Modrm|Vex|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM } @@ -1854,7 +1854,7 @@ vpmulld, 0x6640, None, CpuAVX2, Modrm|Vex=2|Space0F38|VexVVVV=1|VexWIG|No_bSuf|N vpmullw, 0x66d5, None, CpuAVX2, Modrm|C|Vex=2|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM } vpmuludq, 0x66f4, None, CpuAVX2, Modrm|C|Vex=2|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM } vpor, 0x66eb, None, CpuAVX2, Modrm|C|Vex=2|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM } -vpsadbw, 0x66f6, None, CpuAVX2, Modrm|Vex=2|Space0F|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM } +vpsadbw, 0x66f6, None, CpuAVX2, Modrm|Vex=2|Space0F|VexVVVV=1|VexWIG|C|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM } vpshufb, 0x6600, None, CpuAVX2, Modrm|Vex=2|Space0F38|VexVVVV=1|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM } vpshufd, 0x6670, None, CpuAVX2, Modrm|Vex=2|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Unspecified|BaseIndex|RegYMM, RegYMM } vpshufhw, 0xf370, None, CpuAVX2, Modrm|Vex=2|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Unspecified|BaseIndex|RegYMM, RegYMM } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index dfc4c66b37d..0f5ea46bdd6 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -14578,7 +14578,7 @@ const insn_template i386_optab[] = { "psadbw", 0xf6, None, 2, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -40433,7 +40433,7 @@ const insn_template i386_optab[] = { "vpsadbw", 0xf6, None, 3, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -40450,7 +40450,7 @@ const insn_template i386_optab[] = { "vpsadbw", 0xf6, None, 3, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 3, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -- 2.30.2