+2015-04-06 Michael Meissner <meissner@linux.vnet.ibm.com>
+
+ PR target/65614
+ * config/rs6000/rs6000.c (struct processor_costs): Add cost field
+ for SF->DF conversions to make FLOAT_EXTEND more expensive, so
+ that LFD is used to load double constants instead of LFS. Add
+ defaults for all costs structures. Add comments for missing
+ initialization fields.
+ (size32_cost): Likewise.
+ (size64_cost): Likewise.
+ (rs64a_cost): Likewise.
+ (mpccore_cost): Likewise.
+ (ppc403_cost): Likewise.
+ (ppc405_cost): Likewise.
+ (ppc440_cost): Likewise.
+ (ppc476_cost): Likewise.
+ (ppc601_cost): Likewise.
+ (ppc603_cost): Likewise.
+ (ppc604_cost): Likewise.
+ (ppc604e_cost): Likewise.
+ (ppc620_cost): Likewise.
+ (ppc630_cost): Likewise.
+ (ppccell_cost): Likewise.
+ (ppc750_cost): Likewise.
+ (ppc7450_cost): Likewise.
+ (ppc8540_cost): Likewise.
+ (ppce300c2c3_cost): Likewise.
+ (ppce500mc_cost): Likewise.
+ (ppce500mc64_cost): Likewise.
+ (ppce5500_cost): Likewise.
+ (ppce6500_cost): Likewise.
+ (titan_cost): Likewise.
+ (power4_cost): Likewise.
+ (power6_cost): Likewise.
+ (power7_cost): Likewise.
+ (power8_cost): Likewise.
+ (ppca2_cost): Likewise.
+ (rs6000_rtx_costs): Make FLOAT_EXTEND use SFDF_convert field.
+
+ * config/rs6000/rs6000.md (extendsfdf2_fpr): Generate XSCPSGNDP
+ instead of XXLOR to copy SFmode to clear out dirty bits created
+ when SFmode denormals are generated.
+ (mov<mode>_hardfloat, FMOVE32 case): Likewise.
+ (truncdfsf2_fpr): Add support for ISA 2.07 XSRSP instruction.
+
2015-04-06 Evandro Menezes <e.menezes@samsung.com>
* doc/invoke.texi (AARCH64/mtune): Add exynos-m1 as an option.
const int l2_cache_size; /* size of l2 cache, in kilobytes. */
const int simultaneous_prefetches; /* number of parallel prefetch
operations. */
+ const int sfdf_convert; /* cost of SF->DF conversion. */
};
const struct processor_costs *rs6000_cost;
COSTS_N_INSNS (1), /* dmul */
COSTS_N_INSNS (1), /* sdiv */
COSTS_N_INSNS (1), /* ddiv */
- 32,
- 0,
- 0,
- 0,
+ 32, /* cache line size */
+ 0, /* l1 cache */
+ 0, /* l2 cache */
+ 0, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction size costs on 64bit processors. */
COSTS_N_INSNS (1), /* dmul */
COSTS_N_INSNS (1), /* sdiv */
COSTS_N_INSNS (1), /* ddiv */
- 128,
- 0,
- 0,
- 0,
+ 128, /* cache line size */
+ 0, /* l1 cache */
+ 0, /* l2 cache */
+ 0, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on RS64A processors. */
128, /* l1 cache */
2048, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on MPCCORE processors. */
4, /* l1 cache */
16, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC403 processors. */
4, /* l1 cache */
16, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC405 processors. */
16, /* l1 cache */
128, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC440 processors. */
32, /* l1 cache */
256, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC476 processors. */
32, /* l1 cache */
512, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC601 processors. */
32, /* l1 cache */
256, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC603 processors. */
8, /* l1 cache */
64, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC604 processors. */
16, /* l1 cache */
512, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC604e processors. */
32, /* l1 cache */
1024, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC620 processors. */
32, /* l1 cache */
1024, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC630 processors. */
64, /* l1 cache */
1024, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on Cell processor. */
32, /* l1 cache */
512, /* l2 cache */
6, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC750 and PPC7400 processors. */
32, /* l1 cache */
512, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC7450 processors. */
32, /* l1 cache */
1024, /* l2 cache */
1, /* streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPC8540 processors. */
32, /* l1 cache */
256, /* l2 cache */
1, /* prefetch streams /*/
+ 0, /* SF->DF convert */
};
/* Instruction costs on E300C2 and E300C3 cores. */
16, /* l1 cache */
16, /* l2 cache */
1, /* prefetch streams /*/
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPCE500MC processors. */
32, /* l1 cache */
128, /* l2 cache */
1, /* prefetch streams /*/
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPCE500MC64 processors. */
32, /* l1 cache */
128, /* l2 cache */
1, /* prefetch streams /*/
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPCE5500 processors. */
32, /* l1 cache */
128, /* l2 cache */
1, /* prefetch streams /*/
+ 0, /* SF->DF convert */
};
/* Instruction costs on PPCE6500 processors. */
32, /* l1 cache */
128, /* l2 cache */
1, /* prefetch streams /*/
+ 0, /* SF->DF convert */
};
/* Instruction costs on AppliedMicro Titan processors. */
32, /* l1 cache */
512, /* l2 cache */
1, /* prefetch streams /*/
+ 0, /* SF->DF convert */
};
/* Instruction costs on POWER4 and POWER5 processors. */
32, /* l1 cache */
1024, /* l2 cache */
8, /* prefetch streams /*/
+ 0, /* SF->DF convert */
};
/* Instruction costs on POWER6 processors. */
64, /* l1 cache */
2048, /* l2 cache */
16, /* prefetch streams */
+ 0, /* SF->DF convert */
};
/* Instruction costs on POWER7 processors. */
32, /* l1 cache */
256, /* l2 cache */
12, /* prefetch streams */
+ COSTS_N_INSNS (3), /* SF->DF convert */
};
/* Instruction costs on POWER8 processors. */
32, /* l1 cache */
256, /* l2 cache */
12, /* prefetch streams */
+ COSTS_N_INSNS (3), /* SF->DF convert */
};
/* Instruction costs on POWER A2 processors. */
16, /* l1 cache */
2048, /* l2 cache */
16, /* prefetch streams */
+ 0, /* SF->DF convert */
};
\f
case FLOAT_EXTEND:
if (mode == DFmode)
- *total = 0;
+ *total = rs6000_cost->sfdf_convert;
else
*total = rs6000_cost->fp;
return false;
fmr %0,%1
lfs%U1%X1 %0,%1
#
- xxlor %x0,%x1,%x1
+ xscpsgndp %x0,%x1,%x1
lxsspx %x0,%y1"
"&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
[(const_int 0)]
emit_note (NOTE_INSN_DELETED);
DONE;
}
- [(set_attr "type" "fp,fp,fpload,fp,vecsimple,fpload")])
+ [(set_attr "type" "fp,fp,fpload,fp,fp,fpload")])
(define_expand "truncdfsf2"
[(set (match_operand:SF 0 "gpc_reg_operand" "")
"")
(define_insn "*truncdfsf2_fpr"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy")
+ (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d,ws")))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
- "frsp %0,%1"
+ "@
+ frsp %0,%1
+ xsrsp %x0,%x1"
[(set_attr "type" "fp")])
;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
lwz%U1%X1 %0,%1
stw%U0%X0 %1,%0
fmr %0,%1
- xxlor %x0,%x1,%x1
+ xscpsgndp %x0,%x1,%x1
xxlxor %x0,%x0,%x0
li %0,0
<f32_li>
mt%0 %1
mf%1 %0
nop"
- [(set_attr "type" "*,load,store,fp,vecsimple,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*")
+ [(set_attr "type" "*,load,store,fp,fp,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*")
(set_attr "length" "4")])
(define_insn "*mov<mode>_softfloat"