From: Jan Beulich Date: Tue, 4 Jul 2023 15:05:47 +0000 (+0200) Subject: x86: optimize pre-AVX512 {,V}PCMPEQQ with identical sources X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b7bd93bc51c49c58d0e9cdddf6b4cc44bc4c76dd;p=binutils-gdb.git x86: optimize pre-AVX512 {,V}PCMPEQQ with identical sources The {,V}PCMPEQD alternative is 1 byte shorter in many cases. --- diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index de35ee2a2c6..eb3f128e6ef 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -4563,6 +4563,23 @@ optimize_encoding (void) i.types[j].bitfield.disp8 = fits_in_disp8 (i.op[j].disps->X_add_number); } + else if (optimize_for_space + && i.tm.base_opcode == 0x29 + && i.tm.opcode_space == SPACE_0F38 + && i.operands == i.reg_operands + && i.op[0].regs == i.op[1].regs + && (!i.tm.opcode_modifier.vex + || !(i.op[0].regs->reg_flags & RegRex)) + && !is_evex_encoding (&i.tm)) + { + /* Optimize: -Os: + pcmpeqq %xmmN, %xmmN -> pcmpeqd %xmmN, %xmmN + vpcmpeqq %xmmN, %xmmN, %xmmM -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8) + vpcmpeqq %ymmN, %ymmN, %ymmM -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8) + */ + i.tm.opcode_space = SPACE_0F; + i.tm.base_opcode = 0x76; + } } /* Return non-zero for load instruction. */ diff --git a/gas/testsuite/gas/i386/optimize-2.d b/gas/testsuite/gas/i386/optimize-2.d index 571059e8cfe..60a9069c6a7 100644 --- a/gas/testsuite/gas/i386/optimize-2.d +++ b/gas/testsuite/gas/i386/optimize-2.d @@ -161,4 +161,7 @@ Disassembly of section .text: +[a-f0-9]+: 62 .* vporq \(%eax\)\{1to2\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxord \(%eax\)\{1to4\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxorq \(%eax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 66 .* pcmpeqd %xmm2,%xmm2 + +[a-f0-9]+: c5 .* vpcmpeqd %xmm2,%xmm2,%xmm0 + +[a-f0-9]+: c5 .* vpcmpeqd %ymm2,%ymm2,%ymm0 #pass diff --git a/gas/testsuite/gas/i386/optimize-2.s b/gas/testsuite/gas/i386/optimize-2.s index 43af9459a8a..3cf41b1f59c 100644 --- a/gas/testsuite/gas/i386/optimize-2.s +++ b/gas/testsuite/gas/i386/optimize-2.s @@ -180,3 +180,7 @@ _start: vporq (%eax){1to2}, %xmm2, %xmm3 vpxord (%eax){1to4}, %xmm2, %xmm3 vpxorq (%eax){1to4}, %ymm2, %ymm3 + + pcmpeqq %xmm2, %xmm2 + vpcmpeqq %xmm2, %xmm2, %xmm0 + vpcmpeqq %ymm2, %ymm2, %ymm0 diff --git a/gas/testsuite/gas/i386/optimize-2b.d b/gas/testsuite/gas/i386/optimize-2b.d index 58636885eeb..0624cb482d5 100644 --- a/gas/testsuite/gas/i386/optimize-2b.d +++ b/gas/testsuite/gas/i386/optimize-2b.d @@ -162,4 +162,7 @@ Disassembly of section .text: +[a-f0-9]+: 62 .* vporq \(%eax\)\{1to2\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxord \(%eax\)\{1to4\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxorq \(%eax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 66 .* pcmpeqq %xmm2,%xmm2 + +[a-f0-9]+: c4 .* vpcmpeqq %xmm2,%xmm2,%xmm0 + +[a-f0-9]+: c4 .* vpcmpeqq %ymm2,%ymm2,%ymm0 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.d b/gas/testsuite/gas/i386/x86-64-optimize-3.d index 26122dd4e1e..fc5ec5e43d0 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-3.d @@ -199,4 +199,10 @@ Disassembly of section .text: +[a-f0-9]+: 62 .* vporq \(%rax\)\{1to2\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxord \(%rax\)\{1to4\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxorq \(%rax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 66 .* pcmpeqd %xmm2,%xmm2 + +[a-f0-9]+: c5 .* vpcmpeqd %xmm2,%xmm2,%xmm0 + +[a-f0-9]+: c5 .* vpcmpeqd %ymm2,%ymm2,%ymm0 + +[a-f0-9]+: 66 .* pcmpeqd %xmm12,%xmm12 + +[a-f0-9]+: c4 .* vpcmpeqq %xmm12,%xmm12,%xmm0 + +[a-f0-9]+: c4 .* vpcmpeqq %ymm12,%ymm12,%ymm0 #pass diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3.s b/gas/testsuite/gas/i386/x86-64-optimize-3.s index cbfed3bdf47..d2bcbce3bf4 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3.s +++ b/gas/testsuite/gas/i386/x86-64-optimize-3.s @@ -221,3 +221,11 @@ _start: vporq (%rax){1to2}, %xmm2, %xmm3 vpxord (%rax){1to4}, %xmm2, %xmm3 vpxorq (%rax){1to4}, %ymm2, %ymm3 + + pcmpeqq %xmm2, %xmm2 + vpcmpeqq %xmm2, %xmm2, %xmm0 + vpcmpeqq %ymm2, %ymm2, %ymm0 + + pcmpeqq %xmm12, %xmm12 + vpcmpeqq %xmm12, %xmm12, %xmm0 + vpcmpeqq %ymm12, %ymm12, %ymm0 diff --git a/gas/testsuite/gas/i386/x86-64-optimize-3b.d b/gas/testsuite/gas/i386/x86-64-optimize-3b.d index 00b65f3d8b2..abd48dce3a8 100644 --- a/gas/testsuite/gas/i386/x86-64-optimize-3b.d +++ b/gas/testsuite/gas/i386/x86-64-optimize-3b.d @@ -200,4 +200,10 @@ Disassembly of section .text: +[a-f0-9]+: 62 .* vporq \(%rax\)\{1to2\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxord \(%rax\)\{1to4\},%xmm2,%xmm3 +[a-f0-9]+: 62 .* vpxorq \(%rax\)\{1to4\},%ymm2,%ymm3 + +[a-f0-9]+: 66 .* pcmpeqq %xmm2,%xmm2 + +[a-f0-9]+: c4 .* vpcmpeqq %xmm2,%xmm2,%xmm0 + +[a-f0-9]+: c4 .* vpcmpeqq %ymm2,%ymm2,%ymm0 + +[a-f0-9]+: 66 .* pcmpeqq %xmm12,%xmm12 + +[a-f0-9]+: c4 .* vpcmpeqq %xmm12,%xmm12,%xmm0 + +[a-f0-9]+: c4 .* vpcmpeqq %ymm12,%ymm12,%ymm0 #pass diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 26fee21d10d..607a4382497 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -1363,7 +1363,7 @@ pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|Implicit1stXmm pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM } pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } pblendw, 0x660f3a0e, , Modrm|||NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM } -pcmpeqq, 0x660f3829, , Modrm|||NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } +pcmpeqq, 0x660f3829, , Modrm|||NoSuf|Optimize, { RegXMM|Unspecified|BaseIndex, RegXMM } pextr, 0x660f3a14 | , , RegMem||NoSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 } pextr, 0x660f3a14 | , , Modrm||NoSuf, { Imm8, RegXMM, |Unspecified|BaseIndex } pextrd, 0x660f3a16, , Modrm||NoSuf|IgnoreSize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex } @@ -1592,7 +1592,7 @@ vpblendvb, 0x664c, AVX|AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize vpblendw, 0x660e, AVX|AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vpcmpeq, 0x6674 | , AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vpcmpeqd, 0x6676, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } -vpcmpeqq, 0x6629, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } +vpcmpeqq, 0x6629, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM } vpcmpestri, 0x6661, AVX|No64, Modrm|Vex|Space0F3A|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM } vpcmpestri, 0x6661, AVX|x64, Modrm|Vex|Space0F3A|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Imm8, Xmmword|Unspecified|BaseIndex|RegXMM, RegXMM } vpcmpestrm, 0x6660, AVX|No64, Modrm|Vex|Space0F3A|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index cecb084a335..4a6c92a29ff 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -17966,7 +17966,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 0, 0 } } } }, { MN_pcmpeqq, 0x29, 2, SPACE_0F38, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -17980,7 +17980,7 @@ static const insn_template i386_optab[] = 1, 0, 0, 0, 0, 0 } } } }, { MN_pcmpeqq, 0x29, 2, SPACE_0F38, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -30132,7 +30132,7 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0 } } } }, { MN_vpcmpeqq, 0x29, 3, SPACE_0F38, None, { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,