From 3b0cb1a553831f9119995362d8b6d982a0ba1b6b Mon Sep 17 00:00:00 2001 From: Aaron Sawdey Date: Tue, 2 Jan 2018 23:01:43 +0000 Subject: [PATCH] rs6000-string.c (expand_block_move): Allow the use of unaligned VSX load/store on P8/P9. 2018-01-02 Aaron Sawdey * config/rs6000/rs6000-string.c (expand_block_move): Allow the use of unaligned VSX load/store on P8/P9. (expand_block_clear): Allow the use of unaligned VSX load/store on P8/P9. From-SVN: r256112 --- gcc/ChangeLog | 7 +++++++ gcc/config/rs6000/rs6000-string.c | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bdf32da0aeb..b9775434b70 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2018-01-02 Aaron Sawdey + + * config/rs6000/rs6000-string.c (expand_block_move): Allow the use of + unaligned VSX load/store on P8/P9. + (expand_block_clear): Allow the use of unaligned VSX + load/store on P8/P9. + 2018-01-02 Bill Schmidt * config/rs6000/rs6000-p8swap.c (swap_feeds_both_load_and_store): diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c index 7e1976bbe08..23de92572b7 100644 --- a/gcc/config/rs6000/rs6000-string.c +++ b/gcc/config/rs6000/rs6000-string.c @@ -73,7 +73,7 @@ expand_block_clear (rtx operands[]) When optimize_size, avoid any significant code bloat; calling memset is about 4 instructions, so allow for one instruction to load zero and three to do clearing. */ - if (TARGET_ALTIVEC && align >= 128) + if (TARGET_ALTIVEC && (align >= 128 || TARGET_EFFICIENT_UNALIGNED_VSX)) clear_step = 16; else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT)) clear_step = 8; @@ -90,7 +90,7 @@ expand_block_clear (rtx operands[]) machine_mode mode = BLKmode; rtx dest; - if (bytes >= 16 && TARGET_ALTIVEC && align >= 128) + if (bytes >= 16 && TARGET_ALTIVEC && (align >= 128 || TARGET_EFFICIENT_UNALIGNED_VSX)) { clear_bytes = 16; mode = V4SImode; @@ -1260,7 +1260,7 @@ expand_block_move (rtx operands[]) /* Altivec first, since it will be faster than a string move when it applies, and usually not significantly larger. */ - if (TARGET_ALTIVEC && bytes >= 16 && align >= 128) + if (TARGET_ALTIVEC && bytes >= 16 && (TARGET_EFFICIENT_UNALIGNED_VSX || align >= 128)) { move_bytes = 16; mode = V4SImode; -- 2.30.2