From c906a69a1f30c12074165f5be0027249c643e904 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 21 Jan 2020 08:25:31 +0100 Subject: [PATCH] x86: VCVTNEPS2BF16{X,Y} should permit broadcasting Just like other VCVT*{X,Y} templates do, and to allow the programmer flexibility (might be relevant in particular when heavily macro-izing code), the two templates should also have Broadcast set, just like their X/Y-suffix-less counterparts. This in turn requires them to also have * Dword set on their memory operands, to cover the logic added to i386gen by 4a1b91eabbe7 ("x86: Expand Broadcast to 3 bits"), * RegXMM/RegYMM set on their source operands, to satisfy broadcast sizing logic in gas itself. Otherwise ATTSyntax templates wouldn't need such operand size attributes. While extending the test cases, also add Intel syntax broadcast forms without explicit size specifiers. --- gas/ChangeLog | 9 +++++++++ gas/testsuite/gas/i386/avx512_bf16_vl.d | 4 ++++ gas/testsuite/gas/i386/avx512_bf16_vl.s | 4 ++++ gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.d | 4 ++++ gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.s | 4 ++++ opcodes/ChangeLog | 7 +++++++ opcodes/i386-opc.tbl | 4 ++-- opcodes/i386-tbl.h | 12 ++++++------ 8 files changed, 40 insertions(+), 8 deletions(-) diff --git a/gas/ChangeLog b/gas/ChangeLog index bd3b750e97f..798d7cff521 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,12 @@ +2020-01-21 Jan Beulich + + * testsuite/gas/i386/avx512_bf16_vl.s, + testsuite/gas/i386/x86-64-avx512_bf16_vl.s: Add broadcast forms + of VCVTNEPS2BF16{X,Y}. Add operand-size less Intel syntax + broadcast forms of VCVTNEPS2BF16. + * testsuite/gas/i386/avx512_bf16_vl.d, + testsuite/gas/i386/x86-64-avx512_bf16_vl.d: Adjust expectations. + 2020-01-20 Nick Clifton * po/uk.po: Updated Ukranian translation. diff --git a/gas/testsuite/gas/i386/avx512_bf16_vl.d b/gas/testsuite/gas/i386/avx512_bf16_vl.d index 1467bc37671..21d7c8a6796 100644 --- a/gas/testsuite/gas/i386/avx512_bf16_vl.d +++ b/gas/testsuite/gas/i386/avx512_bf16_vl.d @@ -23,9 +23,11 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 62 f2 7e 28 72 f5 vcvtneps2bf16 %ymm5,%xmm6 [ ]*[a-f0-9]+: 62 f2 7e 0f 72 b4 f4 00 00 00 10 vcvtneps2bf16x 0x10000000\(%esp,%esi,8\),%xmm6\{%k7\} [ ]*[a-f0-9]+: 62 f2 7e 18 72 31 vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6 +[ ]*[a-f0-9]+: 62 f2 7e 18 72 31 vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6 [ ]*[a-f0-9]+: 62 f2 7e 08 72 71 7f vcvtneps2bf16x 0x7f0\(%ecx\),%xmm6 [ ]*[a-f0-9]+: 62 f2 7e 9f 72 b2 00 f8 ff ff vcvtneps2bf16 -0x800\(%edx\)\{1to4\},%xmm6\{%k7\}\{z\} [ ]*[a-f0-9]+: 62 f2 7e 38 72 31 vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6 +[ ]*[a-f0-9]+: 62 f2 7e 38 72 31 vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6 [ ]*[a-f0-9]+: 62 f2 7e 28 72 71 7f vcvtneps2bf16y 0xfe0\(%ecx\),%xmm6 [ ]*[a-f0-9]+: 62 f2 7e bf 72 b2 00 f0 ff ff vcvtneps2bf16 -0x1000\(%edx\)\{1to8\},%xmm6\{%k7\}\{z\} [ ]*[a-f0-9]+: 62 f2 56 28 52 f4 vdpbf16ps %ymm4,%ymm5,%ymm6 @@ -52,9 +54,11 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 62 f2 7e 28 72 f5 vcvtneps2bf16 %ymm5,%xmm6 [ ]*[a-f0-9]+: 62 f2 7e 0f 72 b4 f4 00 00 00 10 vcvtneps2bf16x 0x10000000\(%esp,%esi,8\),%xmm6\{%k7\} [ ]*[a-f0-9]+: 62 f2 7e 18 72 31 vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6 +[ ]*[a-f0-9]+: 62 f2 7e 18 72 31 vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6 [ ]*[a-f0-9]+: 62 f2 7e 08 72 71 7f vcvtneps2bf16x 0x7f0\(%ecx\),%xmm6 [ ]*[a-f0-9]+: 62 f2 7e 9f 72 b2 00 f8 ff ff vcvtneps2bf16 -0x800\(%edx\)\{1to4\},%xmm6\{%k7\}\{z\} [ ]*[a-f0-9]+: 62 f2 7e 38 72 31 vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6 +[ ]*[a-f0-9]+: 62 f2 7e 38 72 31 vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6 [ ]*[a-f0-9]+: 62 f2 7e 28 72 71 7f vcvtneps2bf16y 0xfe0\(%ecx\),%xmm6 [ ]*[a-f0-9]+: 62 f2 7e bf 72 b2 00 f0 ff ff vcvtneps2bf16 -0x1000\(%edx\)\{1to8\},%xmm6\{%k7\}\{z\} [ ]*[a-f0-9]+: 62 f2 56 28 52 f4 vdpbf16ps %ymm4,%ymm5,%ymm6 diff --git a/gas/testsuite/gas/i386/avx512_bf16_vl.s b/gas/testsuite/gas/i386/avx512_bf16_vl.s index 7872765b771..4f79d19e220 100644 --- a/gas/testsuite/gas/i386/avx512_bf16_vl.s +++ b/gas/testsuite/gas/i386/avx512_bf16_vl.s @@ -17,9 +17,11 @@ _start: vcvtneps2bf16 %ymm5, %xmm6 #AVX512{BF16,VL} vcvtneps2bf16x 0x10000000(%esp, %esi, 8), %xmm6{%k7} #AVX512{BF16,VL} MASK_ENABLING vcvtneps2bf16 (%ecx){1to4}, %xmm6 #AVX512{BF16,VL} BROADCAST_EN + vcvtneps2bf16x (%ecx){1to4}, %xmm6 #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16x 2032(%ecx), %xmm6 #AVX512{BF16,VL} Disp8 vcvtneps2bf16 -2048(%edx){1to4}, %xmm6{%k7}{z} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL vcvtneps2bf16 (%ecx){1to8}, %xmm6 #AVX512{BF16,VL} BROADCAST_EN + vcvtneps2bf16y (%ecx){1to8}, %xmm6 #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16y 4064(%ecx), %xmm6 #AVX512{BF16,VL} Disp8 vcvtneps2bf16 -4096(%edx){1to8}, %xmm6{%k7}{z} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL vdpbf16ps %ymm4, %ymm5, %ymm6 #AVX512{BF16,VL} @@ -47,9 +49,11 @@ _start: vcvtneps2bf16 xmm6, xmm5 #AVX512{BF16,VL} vcvtneps2bf16 xmm6, ymm5 #AVX512{BF16,VL} vcvtneps2bf16 xmm6{k7}, XMMWORD PTR [esp+esi*8+0x10000000] #AVX512{BF16,VL} MASK_ENABLING + vcvtneps2bf16 xmm6, [ecx]{1to4} #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16 xmm6, DWORD PTR [ecx]{1to4} #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16 xmm6, XMMWORD PTR [ecx+2032] #AVX512{BF16,VL} Disp8 vcvtneps2bf16 xmm6{k7}{z}, DWORD PTR [edx-2048]{1to4} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL + vcvtneps2bf16 xmm6, [ecx]{1to8} #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16 xmm6, DWORD PTR [ecx]{1to8} #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16 xmm6, YMMWORD PTR [ecx+4064] #AVX512{BF16,VL} Disp8 vcvtneps2bf16 xmm6{k7}{z}, DWORD PTR [edx-4096]{1to8} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL diff --git a/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.d b/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.d index 43810a6ac65..1dfff2c0aae 100644 --- a/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.d +++ b/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.d @@ -23,9 +23,11 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 62 02 7e 28 72 f5 vcvtneps2bf16 %ymm29,%xmm30 [ ]*[a-f0-9]+: 62 22 7e 0f 72 b4 f5 00 00 00 10 vcvtneps2bf16x 0x10000000\(%rbp,%r14,8\),%xmm30\{%k7\} [ ]*[a-f0-9]+: 62 c2 7e 18 72 29 vcvtneps2bf16 \(%r9\)\{1to4\},%xmm21 +[ ]*[a-f0-9]+: 62 f2 7e 18 72 09 vcvtneps2bf16 \(%rcx\)\{1to4\},%xmm1 [ ]*[a-f0-9]+: 62 62 7e 08 72 71 7f vcvtneps2bf16x 0x7f0\(%rcx\),%xmm30 [ ]*[a-f0-9]+: 62 62 7e 9f 72 aa 00 f8 ff ff vcvtneps2bf16 -0x800\(%rdx\)\{1to4\},%xmm29\{%k7\}\{z\} [ ]*[a-f0-9]+: 62 c2 7e 38 72 31 vcvtneps2bf16 \(%r9\)\{1to8\},%xmm22 +[ ]*[a-f0-9]+: 62 f2 7e 38 72 11 vcvtneps2bf16 \(%rcx\)\{1to8\},%xmm2 [ ]*[a-f0-9]+: 62 e2 7e 28 72 79 7f vcvtneps2bf16y 0xfe0\(%rcx\),%xmm23 [ ]*[a-f0-9]+: 62 62 7e bf 72 9a 00 f0 ff ff vcvtneps2bf16 -0x1000\(%rdx\)\{1to8\},%xmm27\{%k7\}\{z\} [ ]*[a-f0-9]+: 62 02 16 20 52 f4 vdpbf16ps %ymm28,%ymm29,%ymm30 @@ -51,9 +53,11 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 62 02 7e 08 72 f5 vcvtneps2bf16 %xmm29,%xmm30 [ ]*[a-f0-9]+: 62 02 7e 28 72 f5 vcvtneps2bf16 %ymm29,%xmm30 [ ]*[a-f0-9]+: 62 22 7e 0f 72 b4 f5 00 00 00 10 vcvtneps2bf16x 0x10000000\(%rbp,%r14,8\),%xmm30\{%k7\} +[ ]*[a-f0-9]+: 62 f2 7e 18 72 29 vcvtneps2bf16 \(%rcx\)\{1to4\},%xmm5 [ ]*[a-f0-9]+: 62 42 7e 18 72 09 vcvtneps2bf16 \(%r9\)\{1to4\},%xmm25 [ ]*[a-f0-9]+: 62 62 7e 08 72 71 7f vcvtneps2bf16x 0x7f0\(%rcx\),%xmm30 [ ]*[a-f0-9]+: 62 62 7e 9f 72 b2 00 f8 ff ff vcvtneps2bf16 -0x800\(%rdx\)\{1to4\},%xmm30\{%k7\}\{z\} +[ ]*[a-f0-9]+: 62 f2 7e 38 72 21 vcvtneps2bf16 \(%rcx\)\{1to8\},%xmm4 [ ]*[a-f0-9]+: 62 42 7e 38 72 01 vcvtneps2bf16 \(%r9\)\{1to8\},%xmm24 [ ]*[a-f0-9]+: 62 62 7e 28 72 71 7f vcvtneps2bf16y 0xfe0\(%rcx\),%xmm30 [ ]*[a-f0-9]+: 62 62 7e bf 72 b2 00 f0 ff ff vcvtneps2bf16 -0x1000\(%rdx\)\{1to8\},%xmm30\{%k7\}\{z\} diff --git a/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.s b/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.s index e7c3a0aee43..8453d8b7bc5 100644 --- a/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.s +++ b/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.s @@ -17,9 +17,11 @@ _start: vcvtneps2bf16 %ymm29, %xmm30 #AVX512{BF16,VL} vcvtneps2bf16x 0x10000000(%rbp, %r14, 8), %xmm30{%k7} #AVX512{BF16,VL} MASK_ENABLING vcvtneps2bf16 (%r9){1to4}, %xmm21 #AVX512{BF16,VL} BROADCAST_EN + vcvtneps2bf16x (%rcx){1to4}, %xmm1 #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16x 2032(%rcx), %xmm30 #AVX512{BF16,VL} Disp8 vcvtneps2bf16 -2048(%rdx){1to4}, %xmm29{%k7}{z} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL vcvtneps2bf16 (%r9){1to8}, %xmm22 #AVX512{BF16,VL} BROADCAST_EN + vcvtneps2bf16y (%rcx){1to8}, %xmm2 #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16y 4064(%rcx), %xmm23 #AVX512{BF16,VL} Disp8 vcvtneps2bf16 -4096(%rdx){1to8}, %xmm27{%k7}{z} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL vdpbf16ps %ymm28, %ymm29, %ymm30 #AVX512{BF16,VL} @@ -47,9 +49,11 @@ _start: vcvtneps2bf16 xmm30, xmm29 #AVX512{BF16,VL} vcvtneps2bf16 xmm30, ymm29 #AVX512{BF16,VL} vcvtneps2bf16 xmm30{k7}, XMMWORD PTR [rbp+r14*8+0x10000000] #AVX512{BF16,VL} MASK_ENABLING + vcvtneps2bf16 xmm5, [rcx]{1to4} #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16 xmm25, DWORD PTR [r9]{1to4} #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16 xmm30, XMMWORD PTR [rcx+2032] #AVX512{BF16,VL} Disp8 vcvtneps2bf16 xmm30{k7}{z}, DWORD PTR [rdx-2048]{1to4} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL + vcvtneps2bf16 xmm4, [rcx]{1to8} #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16 xmm24, DWORD PTR [r9]{1to8} #AVX512{BF16,VL} BROADCAST_EN vcvtneps2bf16 xmm30, YMMWORD PTR [rcx+4064] #AVX512{BF16,VL} Disp8 vcvtneps2bf16 xmm30{k7}{z}, DWORD PTR [rdx-4096]{1to8} #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index b6ef559f3e8..d02f133eac5 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,3 +1,10 @@ +2020-01-21 Jan Beulich + + * i386-opc.tbl (vcvtneps2bf16x): Add Broadcast, Xmmword, and + Dword. + (vcvtneps2bf16y): Add Broadcast, Ymmword, and Dword. + * i386-tbl.h: Re-generate. + 2020-01-20 Nick Clifton * po/de.po: Updated German translation. diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index d8af2599032..2396955a910 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -4772,8 +4772,8 @@ vcvtneps2bf16, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|E vcvtneps2bf16, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex256|Masking=3|VexW0|Broadcast|Disp8MemShift=5|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|Dword|BaseIndex, RegXMM } vcvtneps2bf16, 2, 0xf372, None, 1, CpuAVX512_BF16, Modrm|VexOpcode|EVex512|Masking=3|VexW0|Broadcast|Disp8MemShift=6|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Dword|Unspecified|BaseIndex, RegYMM } -vcvtneps2bf16x, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex128|Masking=3|VexW0|Disp8MemShift=4|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { Unspecified|BaseIndex, RegXMM } -vcvtneps2bf16y, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex256|Masking=3|VexW0|Disp8MemShift=5|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { Unspecified|BaseIndex, RegXMM } +vcvtneps2bf16x, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex128|Masking=3|VexW0|Broadcast|Disp8MemShift=4|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM } +vcvtneps2bf16y, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex256|Masking=3|VexW0|Broadcast|Disp8MemShift=5|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { RegYMM|Dword|Unspecified|BaseIndex, RegXMM } vdpbf16ps, 3, 0xf352, None, 1, CpuAVX512_BF16, Modrm|VexOpcode|VexVVVV|Masking=3|VexW0|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM } diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index 758d92bcfd2..3bdfb415ac5 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -58323,9 +58323,9 @@ const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 1, 0, 0, 0, 0, 2, 3, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0 }, - { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0 } }, + 1, 0, 0, 0, 0, 2, 3, 3, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0 }, + { { { 7, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, + 0, 1, 0, 0, 1, 0 } }, { { 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } } } }, { "vcvtneps2bf16y", 0xf372, None, 1, 2, @@ -58337,9 +58337,9 @@ const insn_template i386_optab[] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 1, 0, 0, 0, 0, 3, 3, 0, 0, 0, 5, 0, 0, 0, 0, 1, 0, 0, 0 }, - { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0 } }, + 1, 0, 0, 0, 0, 3, 3, 3, 0, 0, 5, 0, 0, 0, 0, 1, 0, 0, 0 }, + { { { 7, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0 } }, { { 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } } } }, { "vdpbf16ps", 0xf352, None, 1, 3, -- 2.30.2