From b71679934e64bdad94409ace50ee79471cad6f20 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 18 Oct 2018 14:31:29 +0000 Subject: [PATCH] i386.c (ix86_builtin_vectorization_cost): Do not feed width-specific load/store costs through ix86_vec_cost. 2018-10-18 Richard Biener * config/i386/i386.c (ix86_builtin_vectorization_cost): Do not feed width-specific load/store costs through ix86_vec_cost. * config/i386/x86-tune-costs.h (athlon_cost): Adjust. (k8_cost): Likewise. (bdver_cost): Likewise. (znver1_cost): Likewise. (btver1_cost): Likewise. (btver2_cost): Likewise. From-SVN: r265268 --- gcc/ChangeLog | 11 ++++++++ gcc/config/i386/i386.c | 14 +++------- gcc/config/i386/x86-tune-costs.h | 48 ++++++++++++++++---------------- 3 files changed, 39 insertions(+), 34 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 87519ad674c..1c2e6129218 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2018-10-18 Richard Biener + + * config/i386/i386.c (ix86_builtin_vectorization_cost): Do not + feed width-specific load/store costs through ix86_vec_cost. + * config/i386/x86-tune-costs.h (athlon_cost): Adjust. + (k8_cost): Likewise. + (bdver_cost): Likewise. + (znver1_cost): Likewise. + (btver1_cost): Likewise. + (btver2_cost): Likewise. + 2018-10-18 H.J. Lu * simplify-rtx.c (simplify_subreg): Call simplify_gen_subreg diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9a5b54978a9..03324c0d5ac 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -45108,16 +45108,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, /* See PR82713 - we may end up being called on non-vector type. */ if (index < 0) index = 2; - return ix86_vec_cost (mode, - COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2); + return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2; case vector_store: index = sse_store_index (mode); /* See PR82713 - we may end up being called on non-vector type. */ if (index < 0) index = 2; - return ix86_vec_cost (mode, - COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2); + return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2; case vec_to_scalar: case scalar_to_vec: @@ -45130,18 +45128,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, /* See PR82713 - we may end up being called on non-vector type. */ if (index < 0) index = 2; - return ix86_vec_cost (mode, - COSTS_N_INSNS - (ix86_cost->sse_unaligned_load[index]) / 2); + return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2; case unaligned_store: index = sse_store_index (mode); /* See PR82713 - we may end up being called on non-vector type. */ if (index < 0) index = 2; - return ix86_vec_cost (mode, - COSTS_N_INSNS - (ix86_cost->sse_unaligned_store[index]) / 2); + return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2; case vector_gather_load: return ix86_vec_cost (mode, diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index dbf9eb50402..50ecb35cbde 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -795,12 +795,12 @@ struct processor_costs athlon_cost = { {4, 4}, /* cost of storing MMX registers in SImode and DImode */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {4, 4, 6, 12, 24}, /* cost of loading SSE registers + {4, 4, 12, 12, 24}, /* cost of loading SSE registers in 32,64,128,256 and 512-bit */ - {4, 4, 6, 12, 24}, /* cost of unaligned loads. */ - {4, 4, 5, 10, 20}, /* cost of storing SSE registers + {4, 4, 12, 12, 24}, /* cost of unaligned loads. */ + {4, 4, 10, 10, 20}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ - {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ + {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ 5, 5, /* SSE->integer and integer->SSE moves */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ @@ -891,12 +891,12 @@ struct processor_costs k8_cost = { {4, 4}, /* cost of storing MMX registers in SImode and DImode */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {4, 3, 6, 12, 24}, /* cost of loading SSE registers + {4, 3, 12, 12, 24}, /* cost of loading SSE registers in 32,64,128,256 and 512-bit */ - {4, 3, 6, 12, 24}, /* cost of unaligned loads. */ - {4, 4, 5, 10, 20}, /* cost of storing SSE registers + {4, 3, 12, 12, 24}, /* cost of unaligned loads. */ + {4, 4, 10, 10, 20}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ - {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ + {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ 5, 5, /* SSE->integer and integer->SSE moves */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ @@ -1100,12 +1100,12 @@ const struct processor_costs bdver_cost = { {10, 10}, /* cost of storing MMX registers in SImode and DImode */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {12, 12, 10, 20, 30}, /* cost of loading SSE registers + {12, 12, 10, 40, 60}, /* cost of loading SSE registers in 32,64,128,256 and 512-bit */ - {12, 12, 10, 20, 30}, /* cost of unaligned loads. */ - {10, 10, 10, 20, 30}, /* cost of storing SSE registers + {12, 12, 10, 40, 60}, /* cost of unaligned loads. */ + {10, 10, 10, 40, 60}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ - {10, 10, 10, 20, 30}, /* cost of unaligned stores. */ + {10, 10, 10, 40, 60}, /* cost of unaligned stores. */ 16, 20, /* SSE->integer and integer->SSE moves */ 12, 12, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ @@ -1212,12 +1212,12 @@ struct processor_costs znver1_cost = { {8, 8}, /* cost of storing MMX registers in SImode and DImode. */ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */ - {6, 6, 6, 6, 12}, /* cost of loading SSE registers + {6, 6, 6, 12, 24}, /* cost of loading SSE registers in 32,64,128,256 and 512-bit. */ - {6, 6, 6, 6, 12}, /* cost of unaligned loads. */ - {8, 8, 8, 8, 16}, /* cost of storing SSE registers + {6, 6, 6, 12, 24}, /* cost of unaligned loads. */ + {8, 8, 8, 16, 32}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit. */ - {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ + {8, 8, 8, 16, 32}, /* cost of unaligned stores. */ 6, 6, /* SSE->integer and integer->SSE moves. */ /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, throughput 12. Approx 9 uops do not depend on vector size and every load @@ -1420,12 +1420,12 @@ const struct processor_costs btver1_cost = { {12, 12}, /* cost of storing MMX registers in SImode and DImode */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {10, 10, 12, 24, 48}, /* cost of loading SSE registers + {10, 10, 12, 48, 96}, /* cost of loading SSE registers in 32,64,128,256 and 512-bit */ - {10, 10, 12, 24, 48}, /* cost of unaligned loads. */ - {10, 10, 12, 24, 48}, /* cost of storing SSE registers + {10, 10, 12, 48, 96}, /* cost of unaligned loads. */ + {10, 10, 12, 48, 96}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ - {10, 10, 12, 24, 48}, /* cost of unaligned stores. */ + {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ 14, 14, /* SSE->integer and integer->SSE moves */ 10, 10, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ @@ -1511,12 +1511,12 @@ const struct processor_costs btver2_cost = { {12, 12}, /* cost of storing MMX registers in SImode and DImode */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ - {10, 10, 12, 24, 48}, /* cost of loading SSE registers + {10, 10, 12, 48, 96}, /* cost of loading SSE registers in 32,64,128,256 and 512-bit */ - {10, 10, 12, 24, 48}, /* cost of unaligned loads. */ - {10, 10, 12, 24, 48}, /* cost of storing SSE registers + {10, 10, 12, 48, 96}, /* cost of unaligned loads. */ + {10, 10, 12, 48, 96}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ - {10, 10, 12, 24, 48}, /* cost of unaligned stores. */ + {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ 14, 14, /* SSE->integer and integer->SSE moves */ 10, 10, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ -- 2.30.2