From 00cb3494cab397b5655ab42fd69310883c12137c Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 3 Sep 2019 14:41:02 -0700 Subject: [PATCH] x86: Add cost model for operation of mask registers. gcc/ PR target/71453 * config/i386/i386.h (struct processor_costs): Add member mask_to_integer, integer_to_mask, mask_load[3], mask_store[3], mask_move. * config/i386/x86-tune-costs.h (ix86_size_cost, i386_cost, i386_cost, pentium_cost, lakemont_cost, pentiumpro_cost, geode_cost, k6_cost, athlon_cost, k8_cost, amdfam10_cost, bdver_cost, znver1_cost, znver2_cost, skylake_cost, btver1_cost, btver2_cost, pentium4_cost, nocona_cost, atom_cost, slm_cost, intel_cost, generic_cost, core_cost): Initialize mask_load[3], mask_store[3], mask_move, integer_to_mask, mask_to_integer for all target costs. * config/i386/i386.c (ix86_register_move_cost): Using cost model of mask registers. (inline_memory_move_cost): Ditto. (ix86_register_move_cost): Ditto. --- gcc/config/i386/i386.c | 34 ++++++++ gcc/config/i386/i386.h | 7 ++ gcc/config/i386/x86-tune-costs.h | 144 +++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e9ecb94d174..548df57d5b3 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -18740,6 +18740,29 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) return in ? ix86_cost->hard_register.sse_load [index] : ix86_cost->hard_register.sse_store [index]; } + if (MASK_CLASS_P (regclass)) + { + int index; + switch (GET_MODE_SIZE (mode)) + { + case 1: + index = 0; + break; + case 2: + index = 1; + break; + /* DImode loads and stores assumed to cost the same as SImode. */ + default: + index = 2; + break; + } + + if (in == 2) + return MAX (ix86_cost->hard_register.mask_load[index], + ix86_cost->hard_register.mask_store[index]); + return in ? ix86_cost->hard_register.mask_load[2] + : ix86_cost->hard_register.mask_store[2]; + } if (MMX_CLASS_P (regclass)) { int index; @@ -18865,6 +18888,17 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, ? ix86_cost->hard_register.sse_to_integer : ix86_cost->hard_register.integer_to_sse); + /* Moves between mask register and GPR. */ + if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) + { + return (MASK_CLASS_P (class1) + ? ix86_cost->hard_register.mask_to_integer + : ix86_cost->hard_register.integer_to_mask); + } + /* Moving between mask registers. */ + if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2)) + return ix86_cost->hard_register.mask_move; + if (MAYBE_FLOAT_CLASS_P (class1)) return ix86_cost->hard_register.fp_move; if (MAYBE_SSE_CLASS_P (class1)) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 114967e49a3..e0af87450b8 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -279,6 +279,13 @@ struct processor_costs { in SImode, DImode and TImode. */ const int sse_to_integer; /* cost of moving SSE register to integer. */ const int integer_to_sse; /* cost of moving integer register to SSE. */ + const int mask_to_integer; /* cost of moving mask register to integer. */ + const int integer_to_mask; /* cost of moving integer register to mask. */ + const int mask_load[3]; /* cost of loading mask registers + in QImode, HImode and SImode. */ + const int mask_store[3]; /* cost of storing mask register + in QImode, HImode and SImode. */ + const int mask_move; /* cost of moving mask register. */ } hard_register; const int add; /* cost of an add instruction */ diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 3207404e514..256c84e364e 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -59,6 +59,12 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ {3, 3, 3, 3, 3}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 3, 3, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {2, 2, 2}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {2, 2, 2}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -164,6 +170,12 @@ struct processor_costs i386_cost = { /* 386 specific costs */ {4, 8, 16, 32, 64}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 3, 3, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {2, 4, 2}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {2, 4, 2}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -266,6 +278,12 @@ struct processor_costs i486_cost = { /* 486 specific costs */ {4, 8, 16, 32, 64}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 3, 3, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {2, 4, 2}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {2, 4, 2}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -370,6 +388,12 @@ struct processor_costs pentium_cost = { {4, 8, 16, 32, 64}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 3, 3, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {2, 4, 2}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {2, 4, 2}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -465,6 +489,12 @@ struct processor_costs lakemont_cost = { {4, 8, 16, 32, 64}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 3, 3, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {2, 4, 2}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {2, 4, 2}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -575,6 +605,12 @@ struct processor_costs pentiumpro_cost = { {4, 8, 16, 32, 64}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 3, 3, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {4, 4, 4}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {2, 2, 2}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -676,6 +712,12 @@ struct processor_costs geode_cost = { {2, 2, 8, 16, 32}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 6, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {2, 2, 2}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {2, 2, 2}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -777,6 +819,12 @@ struct processor_costs k6_cost = { {2, 2, 8, 16, 32}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 6, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {4, 5, 4}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {2, 3, 2}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -884,6 +932,12 @@ struct processor_costs athlon_cost = { {4, 4, 10, 10, 20}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 5, 5, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {3, 4, 3}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {3, 4, 3}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -993,6 +1047,12 @@ struct processor_costs k8_cost = { {4, 4, 10, 10, 20}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 5, 5, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {3, 4, 3}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {3, 4, 3}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -1106,6 +1166,12 @@ struct processor_costs amdfam10_cost = { {4, 4, 5, 10, 20}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 3, 3, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {3, 4, 3}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {3, 4, 3}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* On K8: MOVD reg64, xmmreg Double FSTORE 4 @@ -1229,6 +1295,12 @@ const struct processor_costs bdver_cost = { {10, 10, 10, 40, 60}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 16, 20, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {8, 8, 8}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {8, 8, 8}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -1360,6 +1432,12 @@ struct processor_costs znver1_cost = { {8, 8, 8, 16, 32}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit. */ 6, 6, /* SSE->integer and integer->SSE moves. */ + 2, 2, /* mask->integer and integer->mask moves */ + {6, 6, 6}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {8, 8, 8}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -1509,6 +1587,12 @@ struct processor_costs znver2_cost = { in 32,64,128,256 and 512-bit. */ 6, 6, /* SSE->integer and integer->SSE moves. */ + 2, 2, /* mask->integer and integer->mask moves */ + {6, 6, 6}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {8, 8, 8}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -1643,6 +1727,12 @@ struct processor_costs skylake_cost = { {8, 8, 8, 12, 24}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 6, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {4, 4, 4}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {6, 6, 6}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -1751,6 +1841,12 @@ const struct processor_costs btver1_cost = { {10, 10, 12, 48, 96}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 14, 14, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {6, 8, 6}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {6, 8, 6}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -1855,6 +1951,12 @@ const struct processor_costs btver2_cost = { {10, 10, 12, 48, 96}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 14, 14, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {8, 8, 6}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {8, 8, 6}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -1958,6 +2060,12 @@ struct processor_costs pentium4_cost = { {16, 16, 16, 32, 64}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 20, 12, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {4, 5, 4}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {2, 3, 2}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -2064,6 +2172,12 @@ struct processor_costs nocona_cost = { {12, 12, 12, 24, 48}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 20, 12, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {4, 4, 4}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {4, 4, 4}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -2168,6 +2282,12 @@ struct processor_costs atom_cost = { {8, 8, 8, 16, 32}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 8, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {6, 6, 6}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {6, 6, 6}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -2272,6 +2392,12 @@ struct processor_costs slm_cost = { {8, 8, 8, 16, 32}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 8, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {8, 8, 8}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {6, 6, 6}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -2376,6 +2502,12 @@ struct processor_costs intel_cost = { {6, 6, 6, 6, 6}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 4, 4, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {4, 4, 4}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {6, 6, 6}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -2484,6 +2616,12 @@ struct processor_costs generic_cost = { {6, 6, 6, 10, 15}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 6, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {6, 6, 6}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {6, 6, 6}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -2597,6 +2735,12 @@ struct processor_costs core_cost = { {6, 6, 6, 6, 12}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 6, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* mask->integer and integer->mask moves */ + {4, 4, 4}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {6, 6, 6}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ /* End of register allocator costs. */ }, -- 2.30.2