From c8480b58e1968f209b6365af7422678f348222c2 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 21 Oct 2021 06:15:31 -0700 Subject: [PATCH] x86: Add -muse-unaligned-vector-move to assembler Unaligned load/store instructions on aligned memory or register are as fast as aligned load/store instructions on modern Intel processors. Add a command-line option, -muse-unaligned-vector-move, to x86 assembler to encode encode aligned vector load/store instructions as unaligned vector load/store instructions. * NEWS: Mention -muse-unaligned-vector-move. * config/tc-i386.c (use_unaligned_vector_move): New. (encode_with_unaligned_vector_move): Likewise. (md_assemble): Call encode_with_unaligned_vector_move for -muse-unaligned-vector-move. (OPTION_MUSE_UNALIGNED_VECTOR_MOVE): New. (md_longopts): Add -muse-unaligned-vector-move. (md_parse_option): Handle -muse-unaligned-vector-move. (md_show_usage): Add -muse-unaligned-vector-move. * doc/c-i386.texi: Document -muse-unaligned-vector-move. * testsuite/gas/i386/i386.exp: Run unaligned-vector-move and x86-64-unaligned-vector-move. * testsuite/gas/i386/unaligned-vector-move.d: New file. * testsuite/gas/i386/unaligned-vector-move.s: Likewise. * testsuite/gas/i386/x86-64-unaligned-vector-move.d: Likewise. --- gas/NEWS | 3 ++ gas/config/tc-i386.c | 39 +++++++++++++++++++ gas/doc/c-i386.texi | 6 +++ gas/testsuite/gas/i386/i386.exp | 2 + .../gas/i386/unaligned-vector-move.d | 22 +++++++++++ .../gas/i386/unaligned-vector-move.s | 15 +++++++ .../gas/i386/x86-64-unaligned-vector-move.d | 23 +++++++++++ 7 files changed, 110 insertions(+) create mode 100644 gas/testsuite/gas/i386/unaligned-vector-move.d create mode 100644 gas/testsuite/gas/i386/unaligned-vector-move.s create mode 100644 gas/testsuite/gas/i386/x86-64-unaligned-vector-move.d diff --git a/gas/NEWS b/gas/NEWS index 5de205ecd55..de4c61b320d 100644 --- a/gas/NEWS +++ b/gas/NEWS @@ -1,5 +1,8 @@ -*- text -*- +* Add a command-line option, -muse-unaligned-vector-move, for x86 target + to encode aligned vector move as unaligned vector move. + * Add support for Cortex-R52+ for Arm. * Add support for Cortex-A510, Cortex-A710, Cortex-X2 for AArch64. diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 339f9694948..ef30a6bc0e9 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -800,6 +800,9 @@ static unsigned int no_cond_jump_promotion = 0; /* Encode SSE instructions with VEX prefix. */ static unsigned int sse2avx; +/* Encode aligned vector move as unaligned vector move. */ +static unsigned int use_unaligned_vector_move; + /* Encode scalar AVX instructions with specific vector length. */ static enum { @@ -4073,6 +4076,30 @@ check_hle (void) } } +/* Encode aligned vector move as unaligned vector move. */ + +static void +encode_with_unaligned_vector_move (void) +{ + switch (i.tm.base_opcode) + { + case 0x28: + /* movaps/movapd/vmovaps/vmovapd. */ + if (i.tm.opcode_modifier.opcodespace == SPACE_0F + && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66) + i.tm.base_opcode = 0x10; + break; + case 0x6f: + /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */ + if (i.tm.opcode_modifier.opcodespace == SPACE_0F + && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66) + i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3; + break; + default: + break; + } +} + /* Try the shortest encoding by shortening operand size. */ static void @@ -5056,6 +5083,9 @@ md_assemble (char *line) if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize) optimize_encoding (); + if (use_unaligned_vector_move) + encode_with_unaligned_vector_move (); + if (!process_suffix ()) return; @@ -13060,6 +13090,7 @@ const char *md_shortopts = "qnO::"; #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31) #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32) #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33) +#define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34) struct option md_longopts[] = { @@ -13081,6 +13112,7 @@ struct option md_longopts[] = {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG}, {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG}, {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX}, + {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE}, {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK}, {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK}, {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR}, @@ -13381,6 +13413,10 @@ md_parse_option (int c, const char *arg) sse2avx = 1; break; + case OPTION_MUSE_UNALIGNED_VECTOR_MOVE: + use_unaligned_vector_move = 1; + break; + case OPTION_MSSE_CHECK: if (strcasecmp (arg, "error") == 0) sse_check = check_error; @@ -13796,6 +13832,9 @@ md_show_usage (FILE *stream) fprintf (stream, _("\ -msse2avx encode SSE instructions with VEX prefix\n")); fprintf (stream, _("\ + -muse-unaligned-vector-move\n\ + encode aligned vector move as unaligned vector move\n")); + fprintf (stream, _("\ -msse-check=[none|error|warning] (default: warning)\n\ check SSE instructions\n")); fprintf (stream, _("\ diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi index 99576ef2953..6245d9be82a 100644 --- a/gas/doc/c-i386.texi +++ b/gas/doc/c-i386.texi @@ -316,6 +316,12 @@ Valid @var{CPU} values are identical to the processor list of This option specifies that the assembler should encode SSE instructions with VEX prefix. +@cindex @samp{-muse-unaligned-vector-move} option, i386 +@cindex @samp{-muse-unaligned-vector-move} option, x86-64 +@item -muse-unaligned-vector-move +This option specifies that the assembler should encode aligned vector +move as unaligned vector move. + @cindex @samp{-msse-check=} option, i386 @cindex @samp{-msse-check=} option, x86-64 @item -msse-check=@var{none} diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp index 680259b1c4e..378e32b39cb 100644 --- a/gas/testsuite/gas/i386/i386.exp +++ b/gas/testsuite/gas/i386/i386.exp @@ -272,6 +272,7 @@ if [gas_32_check] then { run_dump_test "evex-wig1-intel" run_dump_test "evex-no-scale-32" run_dump_test "sse2avx" + run_dump_test "unaligned-vector-move" run_list_test "inval-avx" "-al" run_list_test "inval-avx512f" "-al" run_list_test "inval-avx512vl" "-al" @@ -948,6 +949,7 @@ if [gas_64_check] then { run_dump_test "x86-64-evex-wig2" run_dump_test "evex-no-scale-64" run_dump_test "x86-64-sse2avx" + run_dump_test "x86-64-unaligned-vector-move" run_list_test "x86-64-inval-avx" "-al" run_list_test "x86-64-inval-avx512f" "-al" run_list_test "x86-64-inval-avx512vl" "-al" diff --git a/gas/testsuite/gas/i386/unaligned-vector-move.d b/gas/testsuite/gas/i386/unaligned-vector-move.d new file mode 100644 index 00000000000..d5df2aed58e --- /dev/null +++ b/gas/testsuite/gas/i386/unaligned-vector-move.d @@ -0,0 +1,22 @@ +#as: -muse-unaligned-vector-move +#objdump: -dw +#name: i386 (Encode aligned vector move as unaligned vector move) + +.*: +file format .* + + +Disassembly of section .text: + +0+ <_start>: + +[a-f0-9]+: 0f 10 d1 movups %xmm1,%xmm2 + +[a-f0-9]+: 66 0f 10 d1 movupd %xmm1,%xmm2 + +[a-f0-9]+: f3 0f 6f d1 movdqu %xmm1,%xmm2 + +[a-f0-9]+: c5 f8 10 d1 vmovups %xmm1,%xmm2 + +[a-f0-9]+: c5 f9 10 d1 vmovupd %xmm1,%xmm2 + +[a-f0-9]+: c5 fa 6f d1 vmovdqu %xmm1,%xmm2 + +[a-f0-9]+: c5 f8 10 d1 vmovups %xmm1,%xmm2 + +[a-f0-9]+: 62 f1 fd 09 10 d1 vmovupd %xmm1,%xmm2\{%k1\} + +[a-f0-9]+: 62 f1 7c 09 10 d1 vmovups %xmm1,%xmm2\{%k1\} + +[a-f0-9]+: 62 f1 7e 09 6f d1 vmovdqu32 %xmm1,%xmm2\{%k1\} + +[a-f0-9]+: 62 f1 fe 09 6f d1 vmovdqu64 %xmm1,%xmm2\{%k1\} +#pass diff --git a/gas/testsuite/gas/i386/unaligned-vector-move.s b/gas/testsuite/gas/i386/unaligned-vector-move.s new file mode 100644 index 00000000000..b88ae232a38 --- /dev/null +++ b/gas/testsuite/gas/i386/unaligned-vector-move.s @@ -0,0 +1,15 @@ +# Encode aligned vector move as unaligned vector move. + + .text +_start: + movaps %xmm1, %xmm2 + movapd %xmm1, %xmm2 + movdqa %xmm1, %xmm2 + vmovaps %xmm1, %xmm2 + vmovapd %xmm1, %xmm2 + vmovdqa %xmm1, %xmm2 + vmovaps %xmm1, %xmm2 + vmovapd %xmm1, %xmm2{%k1} + vmovaps %xmm1, %xmm2{%k1} + vmovdqa32 %xmm1, %xmm2{%k1} + vmovdqa64 %xmm1, %xmm2{%k1} diff --git a/gas/testsuite/gas/i386/x86-64-unaligned-vector-move.d b/gas/testsuite/gas/i386/x86-64-unaligned-vector-move.d new file mode 100644 index 00000000000..cbcd7719bf0 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-unaligned-vector-move.d @@ -0,0 +1,23 @@ +#source: unaligned-vector-move.s +#as: -muse-unaligned-vector-move +#objdump: -dw +#name: x86-64 (Encode aligned vector move as unaligned vector move) + +.*: +file format .* + + +Disassembly of section .text: + +0+ <_start>: + +[a-f0-9]+: 0f 10 d1 movups %xmm1,%xmm2 + +[a-f0-9]+: 66 0f 10 d1 movupd %xmm1,%xmm2 + +[a-f0-9]+: f3 0f 6f d1 movdqu %xmm1,%xmm2 + +[a-f0-9]+: c5 f8 10 d1 vmovups %xmm1,%xmm2 + +[a-f0-9]+: c5 f9 10 d1 vmovupd %xmm1,%xmm2 + +[a-f0-9]+: c5 fa 6f d1 vmovdqu %xmm1,%xmm2 + +[a-f0-9]+: c5 f8 10 d1 vmovups %xmm1,%xmm2 + +[a-f0-9]+: 62 f1 fd 09 10 d1 vmovupd %xmm1,%xmm2\{%k1\} + +[a-f0-9]+: 62 f1 7c 09 10 d1 vmovups %xmm1,%xmm2\{%k1\} + +[a-f0-9]+: 62 f1 7e 09 6f d1 vmovdqu32 %xmm1,%xmm2\{%k1\} + +[a-f0-9]+: 62 f1 fe 09 6f d1 vmovdqu64 %xmm1,%xmm2\{%k1\} +#pass -- 2.30.2