COSTS_N_BYTES (2), /* cost of FABS instruction. */
COSTS_N_BYTES (2), /* cost of FCHS instruction. */
COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
ix86_size_memcpy,
ix86_size_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (22), /* cost of FABS instruction. */
COSTS_N_INSNS (24), /* cost of FCHS instruction. */
COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i386_memcpy,
i386_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (3), /* cost of FABS instruction. */
COSTS_N_INSNS (3), /* cost of FCHS instruction. */
COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i486_memcpy,
i486_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (1), /* cost of FABS instruction. */
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (1), /* cost of FABS instruction. */
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentiumpro_memcpy,
pentiumpro_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (1), /* cost of FABS instruction. */
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
geode_memcpy,
geode_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k6_memcpy,
k6_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
athlon_memcpy,
athlon_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
-
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k8_memcpy,
k8_memset,
4, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
-
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
amdfam10_memcpy,
amdfam10_memset,
4, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
-
+ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver1_memcpy,
bdver1_memset,
6, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
-
+ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver2_memcpy,
bdver2_memset,
6, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
-
+ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver3_memcpy,
bdver3_memset,
6, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
-
+ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver4_memcpy,
bdver4_memset,
6, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
-
+ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
+ and it can execute 2 integer additions and 2 multiplications thus
+ reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
+ that 4 works better than 6 probably due to register pressure.
+
+ Integer vector operations are taken by FP unit and execute 3 vector
+ plus/minus operations per cycle but only one multiply. This is adjusted
+ in ix86_reassociation_width. */
+ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
znver1_memcpy,
znver1_memset,
6, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
-
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver1_memcpy,
btver1_memset,
4, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver2_memcpy,
btver2_memset,
4, /* scalar_stmt_cost. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium4_memcpy,
pentium4_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (3), /* cost of FABS instruction. */
COSTS_N_INSNS (3), /* cost of FCHS instruction. */
COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
+ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
nocona_memcpy,
nocona_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (8), /* cost of FABS instruction. */
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
atom_memcpy,
atom_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (8), /* cost of FABS instruction. */
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
slm_memcpy,
slm_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (8), /* cost of FABS instruction. */
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
intel_memcpy,
intel_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (8), /* cost of FABS instruction. */
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
generic_memcpy,
generic_memset,
1, /* scalar_stmt_cost. */
COSTS_N_INSNS (8), /* cost of FABS instruction. */
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
core_memcpy,
core_memset,
1, /* scalar_stmt_cost. */
/* Implementation of reassociation_width target hook used by
reassoc phase to identify parallelism level in reassociated
tree. Statements tree_code is passed in OPC. Arguments type
- is passed in MODE.
-
- Currently parallel reassociation is enabled for Atom
- processors only and we set reassociation width to be 2
- because Atom may issue up to 2 instructions per cycle.
-
- Return value should be fixed if parallel reassociation is
- enabled for other processors. */
+ is passed in MODE. */
static int
-ix86_reassociation_width (unsigned int, machine_mode mode)
+ix86_reassociation_width (unsigned int op, machine_mode mode)
{
+ int width = 1;
/* Vector part. */
if (VECTOR_MODE_P (mode))
{
- if (TARGET_VECTOR_PARALLEL_EXECUTION)
- return 2;
- else
+ int div = 1;
+ if (INTEGRAL_MODE_P (mode))
+ width = ix86_cost->reassoc_vec_int;
+ else if (FLOAT_MODE_P (mode))
+ width = ix86_cost->reassoc_vec_fp;
+
+ if (width == 1)
+ return 1;
+
+ /* Integer vector instructions execute in FP unit
+ and can execute 3 additions and one multiplication per cycle. */
+ if (ix86_tune == PROCESSOR_ZNVER1 && INTEGRAL_MODE_P (mode)
+ && op != PLUS && op != MINUS)
return 1;
- }
+ /* Account for targets that splits wide vectors into multiple parts. */
+ if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
+ div = GET_MODE_BITSIZE (mode) / 128;
+ else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
+ div = GET_MODE_BITSIZE (mode) / 64;
+ width = (width + div - 1) / div;
+ }
/* Scalar part. */
- if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
- return 2;
- else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
- return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2);
- else
- return 1;
+ else if (INTEGRAL_MODE_P (mode))
+ width = ix86_cost->reassoc_int;
+ else if (FLOAT_MODE_P (mode))
+ width = ix86_cost->reassoc_fp;
+
+ /* Avoid using too many registers in 32bit mode. */
+ if (!TARGET_64BIT && width > 2)
+ width = 2;
+ return width;
}
/* ??? No autovectorization into MMX or 3DNOW until we can reliably