From 993f9e7d06de3e08f92fc4c72efe219495140967 Mon Sep 17 00:00:00 2001 From: Andreas Krebbel Date: Thu, 7 Feb 2019 15:53:38 +0000 Subject: [PATCH] S/390: Fix the vec_xl / vec_xst style builtins This patch fixes several problems with the vec_xl/vec_xst builtins: - vec_xl/vec_xst needs to use the alignment of the scalar memory operand for the vector type reference. This is required to emit the proper vl/vst alignment hints. - vec_xl / vec_xld2 / vec_xlw4 should accept const pointer source operands - vec_xlw4 / vec_xstw4 needs to accept float memory operands gcc/ChangeLog: 2019-02-07 Andreas Krebbel * config/s390/s390-builtin-types.def: Add new types. * config/s390/s390-builtins.def: (s390_vec_xl, s390_vec_xld2) (s390_vec_xlw4): Make the memory operand into a const pointer. (s390_vec_xld2, s390_vec_xlw4): Add a variant for single precision float. * config/s390/s390-c.c (s390_expand_overloaded_builtin): Generate a new vector type with the alignment of the scalar memory operand. gcc/testsuite/ChangeLog: 2019-02-07 Andreas Krebbel * gcc.target/s390/zvector/xl-xst-align-1.c: New test. * gcc.target/s390/zvector/xl-xst-align-2.c: New test. From-SVN: r268651 --- gcc/ChangeLog | 10 +++ gcc/config/s390/s390-builtin-types.def | 14 +++- gcc/config/s390/s390-builtins.def | 65 ++++++++++--------- gcc/config/s390/s390-c.c | 28 ++++++-- gcc/testsuite/ChangeLog | 5 ++ .../gcc.target/s390/zvector/xl-xst-align-1.c | 45 +++++++++++++ .../gcc.target/s390/zvector/xl-xst-align-2.c | 48 ++++++++++++++ 7 files changed, 176 insertions(+), 39 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-1.c create mode 100644 gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f92d2a5df49..ede2a6bcc44 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2019-02-07 Andreas Krebbel + + * config/s390/s390-builtin-types.def: Add new types. + * config/s390/s390-builtins.def: (s390_vec_xl, s390_vec_xld2) + (s390_vec_xlw4): Make the memory operand into a const pointer. + (s390_vec_xld2, s390_vec_xlw4): Add a variant for single precision + float. + * config/s390/s390-c.c (s390_expand_overloaded_builtin): Generate + a new vector type with the alignment of the scalar memory operand. + 2019-02-07 Matthew Malcomson Jakub Jelinek diff --git a/gcc/config/s390/s390-builtin-types.def b/gcc/config/s390/s390-builtin-types.def index e749d6df18c..58d920c7176 100644 --- a/gcc/config/s390/s390-builtin-types.def +++ b/gcc/config/s390/s390-builtin-types.def @@ -260,6 +260,7 @@ DEF_FN_TYPE_2 (BT_FN_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT) DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR) DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF) DEF_FN_TYPE_2 (BT_FN_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI) +DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDCONSTPTR, BT_V4SI, BT_INT, BT_VOIDCONSTPTR) DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDPTR, BT_V4SI, BT_INT, BT_VOIDPTR) DEF_FN_TYPE_2 (BT_FN_V4SI_UV4SI_UV4SI, BT_V4SI, BT_UV4SI, BT_UV4SI) DEF_FN_TYPE_2 (BT_FN_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI) @@ -492,6 +493,7 @@ DEF_OV_TYPE (BT_OV_USHORT_UV8HI_INT, BT_USHORT, BT_UV8HI, BT_INT) DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI, BT_UV16QI, BT_BV16QI, BT_BV16QI) DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI_INTPTR, BT_UV16QI, BT_BV16QI, BT_BV16QI, BT_INTPTR) DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_UV16QI, BT_UV16QI, BT_BV16QI, BT_UV16QI) +DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARCONSTPTR, BT_UV16QI, BT_LONG, BT_UCHARCONSTPTR) DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARPTR, BT_UV16QI, BT_LONG, BT_UCHARPTR) DEF_OV_TYPE (BT_OV_UV16QI_UCHAR, BT_UV16QI, BT_UCHAR) DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR, BT_UV16QI, BT_UCHARCONSTPTR) @@ -523,6 +525,7 @@ DEF_OV_TYPE (BT_OV_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_ DEF_OV_TYPE (BT_OV_UV16QI_V16QI, BT_UV16QI, BT_V16QI) DEF_OV_TYPE (BT_OV_UV16QI_V8HI_V8HI, BT_UV16QI, BT_V8HI, BT_V8HI) DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI) +DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGCONSTPTR) DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR) DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG) DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR, BT_UV2DI, BT_ULONGLONGCONSTPTR) @@ -556,6 +559,8 @@ DEF_OV_TYPE (BT_OV_UV2DI_V2DI, BT_UV2DI, BT_V2DI) DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI, BT_UV4SI, BT_BV4SI, BT_BV4SI) DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI_INTPTR, BT_UV4SI, BT_BV4SI, BT_BV4SI, BT_INTPTR) DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_UV4SI, BT_UV4SI, BT_BV4SI, BT_UV4SI) +DEF_OV_TYPE (BT_OV_UV4SI_LONG_FLTPTR, BT_UV4SI, BT_LONG, BT_FLTPTR) +DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTCONSTPTR, BT_UV4SI, BT_LONG, BT_UINTCONSTPTR) DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTPTR, BT_UV4SI, BT_LONG, BT_UINTPTR) DEF_OV_TYPE (BT_OV_UV4SI_UINT, BT_UV4SI, BT_UINT) DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR, BT_UV4SI, BT_UINTCONSTPTR) @@ -593,6 +598,7 @@ DEF_OV_TYPE (BT_OV_UV4SI_V4SI, BT_UV4SI, BT_V4SI) DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI, BT_UV8HI, BT_BV8HI, BT_BV8HI) DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI_INTPTR, BT_UV8HI, BT_BV8HI, BT_BV8HI, BT_INTPTR) DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_UV8HI, BT_UV8HI, BT_BV8HI, BT_UV8HI) +DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTCONSTPTR, BT_UV8HI, BT_LONG, BT_USHORTCONSTPTR) DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTPTR, BT_UV8HI, BT_LONG, BT_USHORTPTR) DEF_OV_TYPE (BT_OV_UV8HI_USHORT, BT_UV8HI, BT_USHORT) DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR, BT_UV8HI, BT_USHORTCONSTPTR) @@ -626,6 +632,7 @@ DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_V8HI, BT_UV8HI, BT_UV8HI, BT_V8HI) DEF_OV_TYPE (BT_OV_UV8HI_V4SI_V4SI, BT_UV8HI, BT_V4SI, BT_V4SI) DEF_OV_TYPE (BT_OV_UV8HI_V8HI, BT_UV8HI, BT_V8HI) DEF_OV_TYPE (BT_OV_V16QI_BV16QI_V16QI, BT_V16QI, BT_BV16QI, BT_V16QI) +DEF_OV_TYPE (BT_OV_V16QI_LONG_SCHARCONSTPTR, BT_V16QI, BT_LONG, BT_SCHARCONSTPTR) DEF_OV_TYPE (BT_OV_V16QI_LONG_SCHARPTR, BT_V16QI, BT_LONG, BT_SCHARPTR) DEF_OV_TYPE (BT_OV_V16QI_SCHAR, BT_V16QI, BT_SCHAR) DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR, BT_V16QI, BT_SCHARCONSTPTR) @@ -660,6 +667,7 @@ DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR_UINT, BT_V2DF, BT_DBLCONSTPTR, BT_UINT) DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR_USHORT, BT_V2DF, BT_DBLCONSTPTR, BT_USHORT) DEF_OV_TYPE (BT_OV_V2DF_DBL_INT, BT_V2DF, BT_DBL, BT_INT) DEF_OV_TYPE (BT_OV_V2DF_DBL_V2DF_INT, BT_V2DF, BT_DBL, BT_V2DF, BT_INT) +DEF_OV_TYPE (BT_OV_V2DF_LONG_DBLCONSTPTR, BT_V2DF, BT_LONG, BT_DBLCONSTPTR) DEF_OV_TYPE (BT_OV_V2DF_LONG_DBLPTR, BT_V2DF, BT_LONG, BT_DBLPTR) DEF_OV_TYPE (BT_OV_V2DF_UV2DI, BT_V2DF, BT_UV2DI) DEF_OV_TYPE (BT_OV_V2DF_UV2DI_INT, BT_V2DF, BT_UV2DI, BT_INT) @@ -687,6 +695,7 @@ DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR_USHORT, BT_V2DI, BT_LONGLONGCONSTPTR, B DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_INT, BT_V2DI, BT_LONGLONG, BT_INT) DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_LONGLONG, BT_V2DI, BT_LONGLONG, BT_LONGLONG) DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_V2DI_INT, BT_V2DI, BT_LONGLONG, BT_V2DI, BT_INT) +DEF_OV_TYPE (BT_OV_V2DI_LONG_LONGLONGCONSTPTR, BT_V2DI, BT_LONG, BT_LONGLONGCONSTPTR) DEF_OV_TYPE (BT_OV_V2DI_LONG_LONGLONGPTR, BT_V2DI, BT_LONG, BT_LONGLONGPTR) DEF_OV_TYPE (BT_OV_V2DI_V16QI, BT_V2DI, BT_V16QI) DEF_OV_TYPE (BT_OV_V2DI_V2DI, BT_V2DI, BT_V2DI) @@ -716,7 +725,7 @@ DEF_OV_TYPE (BT_OV_V4SF_FLTCONSTPTR_UINT, BT_V4SF, BT_FLTCONSTPTR, BT_UINT) DEF_OV_TYPE (BT_OV_V4SF_FLTCONSTPTR_USHORT, BT_V4SF, BT_FLTCONSTPTR, BT_USHORT) DEF_OV_TYPE (BT_OV_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT) DEF_OV_TYPE (BT_OV_V4SF_FLT_V4SF_INT, BT_V4SF, BT_FLT, BT_V4SF, BT_INT) -DEF_OV_TYPE (BT_OV_V4SF_LONG_FLTPTR, BT_V4SF, BT_LONG, BT_FLTPTR) +DEF_OV_TYPE (BT_OV_V4SF_LONG_FLTCONSTPTR, BT_V4SF, BT_LONG, BT_FLTCONSTPTR) DEF_OV_TYPE (BT_OV_V4SF_V4SF, BT_V4SF, BT_V4SF) DEF_OV_TYPE (BT_OV_V4SF_V4SF_BV4SI, BT_V4SF, BT_V4SF, BT_BV4SI) DEF_OV_TYPE (BT_OV_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR) @@ -737,6 +746,7 @@ DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR_UINT, BT_V4SI, BT_INTCONSTPTR, BT_UINT) DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR_USHORT, BT_V4SI, BT_INTCONSTPTR, BT_USHORT) DEF_OV_TYPE (BT_OV_V4SI_INT_INT, BT_V4SI, BT_INT, BT_INT) DEF_OV_TYPE (BT_OV_V4SI_INT_V4SI_INT, BT_V4SI, BT_INT, BT_V4SI, BT_INT) +DEF_OV_TYPE (BT_OV_V4SI_LONG_INTCONSTPTR, BT_V4SI, BT_LONG, BT_INTCONSTPTR) DEF_OV_TYPE (BT_OV_V4SI_LONG_INTPTR, BT_V4SI, BT_LONG, BT_INTPTR) DEF_OV_TYPE (BT_OV_V4SI_UV4SI_V4SI_V4SI, BT_V4SI, BT_UV4SI, BT_V4SI, BT_V4SI) DEF_OV_TYPE (BT_OV_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI) @@ -764,6 +774,7 @@ DEF_OV_TYPE (BT_OV_V4SI_V8HI, BT_V4SI, BT_V8HI) DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI, BT_V4SI, BT_V8HI, BT_V8HI) DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI_V4SI, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI) DEF_OV_TYPE (BT_OV_V8HI_BV8HI_V8HI, BT_V8HI, BT_BV8HI, BT_V8HI) +DEF_OV_TYPE (BT_OV_V8HI_LONG_SHORTCONSTPTR, BT_V8HI, BT_LONG, BT_SHORTCONSTPTR) DEF_OV_TYPE (BT_OV_V8HI_LONG_SHORTPTR, BT_V8HI, BT_LONG, BT_SHORTPTR) DEF_OV_TYPE (BT_OV_V8HI_SHORT, BT_V8HI, BT_SHORT) DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR, BT_V8HI, BT_SHORTCONSTPTR) @@ -802,6 +813,7 @@ DEF_OV_TYPE (BT_OV_VOID_UV16QI_UCHARPTR_UINT, BT_VOID, BT_UV16QI, BT_UCHARPTR, B DEF_OV_TYPE (BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR, BT_VOID, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR) DEF_OV_TYPE (BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT, BT_VOID, BT_UV2DI, BT_ULONGLONGPTR, BT_UINT) DEF_OV_TYPE (BT_OV_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, BT_VOID, BT_UV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG) +DEF_OV_TYPE (BT_OV_VOID_UV4SI_LONG_FLTPTR, BT_VOID, BT_UV4SI, BT_LONG, BT_FLTPTR) DEF_OV_TYPE (BT_OV_VOID_UV4SI_LONG_UINTPTR, BT_VOID, BT_UV4SI, BT_LONG, BT_UINTPTR) DEF_OV_TYPE (BT_OV_VOID_UV4SI_UINTPTR_UINT, BT_VOID, BT_UV4SI, BT_UINTPTR, BT_UINT) DEF_OV_TYPE (BT_OV_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG, BT_VOID, BT_UV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG) diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def index 91c42e214a2..ed81b8b8629 100644 --- a/gcc/config/s390/s390-builtins.def +++ b/gcc/config/s390/s390-builtins.def @@ -328,36 +328,38 @@ B_DEF (s390_vgmh, vec_genmaskv8hi, 0, B_DEF (s390_vgmf, vec_genmaskv4si, 0, B_VX, O1_U8 | O2_U8, BT_FN_UV4SI_UCHAR_UCHAR) B_DEF (s390_vgmg, vec_genmaskv2di, 0, B_VX, O1_U8 | O2_U8, BT_FN_UV2DI_UCHAR_UCHAR) -OB_DEF (s390_vec_xl, s390_vec_xl_s8, s390_vec_xl_dbl, B_VX, BT_FN_V4SI_INT_VOIDPTR) -OB_DEF_VAR (s390_vec_xl_s8, MAX, 0, O1_LIT, BT_OV_V16QI_LONG_SCHARPTR) /* vl */ -OB_DEF_VAR (s390_vec_xl_u8, MAX, 0, O1_LIT, BT_OV_UV16QI_LONG_UCHARPTR) /* vl */ -OB_DEF_VAR (s390_vec_xl_s16, MAX, 0, O1_LIT, BT_OV_V8HI_LONG_SHORTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xl_u16, MAX, 0, O1_LIT, BT_OV_UV8HI_LONG_USHORTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xl_s32, MAX, 0, O1_LIT, BT_OV_V4SI_LONG_INTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xl_u32, MAX, 0, O1_LIT, BT_OV_UV4SI_LONG_UINTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xl_s64, MAX, 0, O1_LIT, BT_OV_V2DI_LONG_LONGLONGPTR) /* vl */ -OB_DEF_VAR (s390_vec_xl_u64, MAX, 0, O1_LIT, BT_OV_UV2DI_LONG_ULONGLONGPTR) /* vl */ -OB_DEF_VAR (s390_vec_xl_flt, MAX, 0, O1_LIT, BT_OV_V4SF_LONG_FLTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xl_dbl, MAX, 0, O1_LIT, BT_OV_V2DF_LONG_DBLPTR) /* vl */ - -OB_DEF (s390_vec_xld2, s390_vec_xld2_s8, s390_vec_xld2_dbl, B_DEP | B_VX, BT_FN_V4SI_INT_VOIDPTR) -OB_DEF_VAR (s390_vec_xld2_s8, MAX, 0, O1_LIT, BT_OV_V16QI_LONG_SCHARPTR) /* vl */ -OB_DEF_VAR (s390_vec_xld2_u8, MAX, 0, O1_LIT, BT_OV_UV16QI_LONG_UCHARPTR) /* vl */ -OB_DEF_VAR (s390_vec_xld2_s16, MAX, 0, O1_LIT, BT_OV_V8HI_LONG_SHORTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xld2_u16, MAX, 0, O1_LIT, BT_OV_UV8HI_LONG_USHORTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xld2_s32, MAX, 0, O1_LIT, BT_OV_V4SI_LONG_INTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xld2_u32, MAX, 0, O1_LIT, BT_OV_UV4SI_LONG_UINTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xld2_s64, MAX, 0, O1_LIT, BT_OV_V2DI_LONG_LONGLONGPTR) /* vl */ -OB_DEF_VAR (s390_vec_xld2_u64, MAX, 0, O1_LIT, BT_OV_UV2DI_LONG_ULONGLONGPTR) /* vl */ -OB_DEF_VAR (s390_vec_xld2_dbl, MAX, 0, O1_LIT, BT_OV_V2DF_LONG_DBLPTR) /* vl */ - -OB_DEF (s390_vec_xlw4, s390_vec_xlw4_s8, s390_vec_xlw4_u32, B_DEP | B_VX, BT_FN_V4SI_INT_VOIDPTR) -OB_DEF_VAR (s390_vec_xlw4_s8, MAX, 0, O1_LIT, BT_OV_V16QI_LONG_SCHARPTR) /* vl */ -OB_DEF_VAR (s390_vec_xlw4_u8, MAX, 0, O1_LIT, BT_OV_UV16QI_LONG_UCHARPTR) /* vl */ -OB_DEF_VAR (s390_vec_xlw4_s16, MAX, 0, O1_LIT, BT_OV_V8HI_LONG_SHORTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xlw4_u16, MAX, 0, O1_LIT, BT_OV_UV8HI_LONG_USHORTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xlw4_s32, MAX, 0, O1_LIT, BT_OV_V4SI_LONG_INTPTR) /* vl */ -OB_DEF_VAR (s390_vec_xlw4_u32, MAX, 0, O1_LIT, BT_OV_UV4SI_LONG_UINTPTR) /* vl */ +OB_DEF (s390_vec_xl, s390_vec_xl_s8, s390_vec_xl_dbl, B_VX, BT_FN_V4SI_INT_VOIDCONSTPTR) +OB_DEF_VAR (s390_vec_xl_s8, MAX, 0, O1_LIT, BT_OV_V16QI_LONG_SCHARCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xl_u8, MAX, 0, O1_LIT, BT_OV_UV16QI_LONG_UCHARCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xl_s16, MAX, 0, O1_LIT, BT_OV_V8HI_LONG_SHORTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xl_u16, MAX, 0, O1_LIT, BT_OV_UV8HI_LONG_USHORTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xl_s32, MAX, 0, O1_LIT, BT_OV_V4SI_LONG_INTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xl_u32, MAX, 0, O1_LIT, BT_OV_UV4SI_LONG_UINTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xl_s64, MAX, 0, O1_LIT, BT_OV_V2DI_LONG_LONGLONGCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xl_u64, MAX, 0, O1_LIT, BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xl_flt, MAX, 0, O1_LIT, BT_OV_V4SF_LONG_FLTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xl_dbl, MAX, 0, O1_LIT, BT_OV_V2DF_LONG_DBLCONSTPTR) /* vl */ + +OB_DEF (s390_vec_xld2, s390_vec_xld2_s8, s390_vec_xld2_dbl, B_DEP | B_VX, BT_FN_V4SI_INT_VOIDCONSTPTR) +OB_DEF_VAR (s390_vec_xld2_s8, MAX, 0, O1_LIT, BT_OV_V16QI_LONG_SCHARCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xld2_u8, MAX, 0, O1_LIT, BT_OV_UV16QI_LONG_UCHARCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xld2_s16, MAX, 0, O1_LIT, BT_OV_V8HI_LONG_SHORTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xld2_u16, MAX, 0, O1_LIT, BT_OV_UV8HI_LONG_USHORTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xld2_s32, MAX, 0, O1_LIT, BT_OV_V4SI_LONG_INTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xld2_u32, MAX, 0, O1_LIT, BT_OV_UV4SI_LONG_UINTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xld2_s64, MAX, 0, O1_LIT, BT_OV_V2DI_LONG_LONGLONGCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xld2_u64, MAX, 0, O1_LIT, BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xld2_flt, MAX, B_VXE, O1_LIT, BT_OV_V4SF_LONG_FLTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xld2_dbl, MAX, 0, O1_LIT, BT_OV_V2DF_LONG_DBLCONSTPTR) /* vl */ + +OB_DEF (s390_vec_xlw4, s390_vec_xlw4_s8, s390_vec_xlw4_flt, B_DEP | B_VX, BT_FN_V4SI_INT_VOIDCONSTPTR) +OB_DEF_VAR (s390_vec_xlw4_s8, MAX, 0, O1_LIT, BT_OV_V16QI_LONG_SCHARCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xlw4_u8, MAX, 0, O1_LIT, BT_OV_UV16QI_LONG_UCHARCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xlw4_s16, MAX, 0, O1_LIT, BT_OV_V8HI_LONG_SHORTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xlw4_u16, MAX, 0, O1_LIT, BT_OV_UV8HI_LONG_USHORTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xlw4_s32, MAX, 0, O1_LIT, BT_OV_V4SI_LONG_INTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xlw4_u32, MAX, 0, O1_LIT, BT_OV_UV4SI_LONG_UINTCONSTPTR) /* vl */ +OB_DEF_VAR (s390_vec_xlw4_flt, MAX, B_VXE, O1_LIT, BT_OV_V4SF_LONG_FLTCONSTPTR) /* vl */ OB_DEF (s390_vec_splats, s390_vec_splats_s8, s390_vec_splats_dbl,B_VX, BT_FN_OV4SI_INT) OB_DEF_VAR (s390_vec_splats_s8, s390_vlrepb, 0, 0, BT_OV_V16QI_SCHAR) @@ -746,13 +748,14 @@ OB_DEF_VAR (s390_vec_xstd2_s64, MAX, 0, OB_DEF_VAR (s390_vec_xstd2_u64, MAX, 0, O2_LIT, BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR) /* vst */ OB_DEF_VAR (s390_vec_xstd2_dbl, MAX, 0, O2_LIT, BT_OV_VOID_V2DF_LONG_DBLPTR) /* vst */ -OB_DEF (s390_vec_xstw4, s390_vec_xstw4_s8, s390_vec_xstw4_u32, B_DEP | B_VX, BT_FN_VOID_OV4SI_INT_VOIDPTR) +OB_DEF (s390_vec_xstw4, s390_vec_xstw4_s8, s390_vec_xstw4_flt, B_DEP | B_VX, BT_FN_VOID_OV4SI_INT_VOIDPTR) OB_DEF_VAR (s390_vec_xstw4_s8, MAX, 0, O2_LIT, BT_OV_VOID_V16QI_LONG_SCHARPTR) /* vst */ OB_DEF_VAR (s390_vec_xstw4_u8, MAX, 0, O2_LIT, BT_OV_VOID_UV16QI_LONG_UCHARPTR) /* vst */ OB_DEF_VAR (s390_vec_xstw4_s16, MAX, 0, O2_LIT, BT_OV_VOID_V8HI_LONG_SHORTPTR) /* vst */ OB_DEF_VAR (s390_vec_xstw4_u16, MAX, 0, O2_LIT, BT_OV_VOID_UV8HI_LONG_USHORTPTR) /* vst */ OB_DEF_VAR (s390_vec_xstw4_s32, MAX, 0, O2_LIT, BT_OV_VOID_V4SI_LONG_INTPTR) /* vst */ OB_DEF_VAR (s390_vec_xstw4_u32, MAX, 0, O2_LIT, BT_OV_VOID_UV4SI_LONG_UINTPTR) /* vst */ +OB_DEF_VAR (s390_vec_xstw4_flt, MAX, B_VXE, O2_LIT, BT_OV_VOID_V4SF_LONG_FLTPTR) /* vst */ OB_DEF (s390_vec_store_len, s390_vec_store_len_s8,s390_vec_store_len_dbl,B_VX, BT_FN_VOID_OV4SI_VOIDPTR_UINT) OB_DEF_VAR (s390_vec_store_len_s8, s390_vstl, 0, 0, BT_OV_VOID_V16QI_SCHARPTR_UINT) diff --git a/gcc/config/s390/s390-c.c b/gcc/config/s390/s390-c.c index 5869628032c..d2fb7ba929b 100644 --- a/gcc/config/s390/s390-c.c +++ b/gcc/config/s390/s390-c.c @@ -470,16 +470,30 @@ s390_expand_overloaded_builtin (location_t loc, case S390_OVERLOADED_BUILTIN_s390_vec_xl: case S390_OVERLOADED_BUILTIN_s390_vec_xld2: case S390_OVERLOADED_BUILTIN_s390_vec_xlw4: - return build2 (MEM_REF, return_type, - fold_build_pointer_plus ((*arglist)[1], (*arglist)[0]), - build_int_cst (TREE_TYPE ((*arglist)[1]), 0)); + { + /* Build a vector type with the alignment of the source + location in order to enable correct alignment hints to be + generated for vl. */ + tree mem_type = build_aligned_type (return_type, + TYPE_ALIGN (TREE_TYPE (TREE_TYPE ((*arglist)[1])))); + return build2 (MEM_REF, mem_type, + fold_build_pointer_plus ((*arglist)[1], (*arglist)[0]), + build_int_cst (TREE_TYPE ((*arglist)[1]), 0)); + } case S390_OVERLOADED_BUILTIN_s390_vec_xst: case S390_OVERLOADED_BUILTIN_s390_vec_xstd2: case S390_OVERLOADED_BUILTIN_s390_vec_xstw4: - return build2 (MODIFY_EXPR, TREE_TYPE((*arglist)[0]), - build1 (INDIRECT_REF, TREE_TYPE((*arglist)[0]), - fold_build_pointer_plus ((*arglist)[2], (*arglist)[1])), - (*arglist)[0]); + { + /* Build a vector type with the alignment of the target + location in order to enable correct alignment hints to be + generated for vst. */ + tree mem_type = build_aligned_type (TREE_TYPE((*arglist)[0]), + TYPE_ALIGN (TREE_TYPE (TREE_TYPE ((*arglist)[2])))); + return build2 (MODIFY_EXPR, mem_type, + build1 (INDIRECT_REF, mem_type, + fold_build_pointer_plus ((*arglist)[2], (*arglist)[1])), + (*arglist)[0]); + } case S390_OVERLOADED_BUILTIN_s390_vec_load_pair: return build_constructor_va (return_type, 2, NULL_TREE, (*arglist)[0], diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b27035560f7..8dd55309ddf 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-02-07 Andreas Krebbel + + * gcc.target/s390/zvector/xl-xst-align-1.c: New test. + * gcc.target/s390/zvector/xl-xst-align-2.c: New test. + 2019-02-07 Matthew Malcomson Jakub Jelinek diff --git a/gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-1.c b/gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-1.c new file mode 100644 index 00000000000..bf9cc2824e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-1.c @@ -0,0 +1,45 @@ +/* { dg-do compile { target { s390*-*-* } } } */ +/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */ + +#include + +vector float +foo (float *a) +{ + return vec_xl (0, a); +} + +vector float +bar (const float *a) +{ + return vec_xl (0, a); +} + +void +baz (float *f, vector float a) +{ + vec_xst (a, 0, f); +} + +vector float +foo2 (float *a) +{ + return vec_xlw4 (0, a); +} + +vector float +bar2 (const float *a) +{ + return vec_xlw4 (0, a); +} + +void +baz2 (float *f, vector float a) +{ + vec_xstw4 (a, 0, f); +} + +/* Make sure no alignment hints are generated. */ + +/* { dg-final { scan-assembler-not "vl.*,3" } } */ +/* { dg-final { scan-assembler-not "vst.*,3" } } */ diff --git a/gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-2.c b/gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-2.c new file mode 100644 index 00000000000..fe69fd8a680 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-2.c @@ -0,0 +1,48 @@ +/* { dg-do compile { target { s390*-*-* } } } */ +/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */ + +#include + +typedef float __attribute__((aligned(8))) float_aligned; + +vector float +foo (float_aligned *a) +{ + return vec_xl (0, a); +} + +vector float +bar (const float_aligned *a) +{ + return vec_xl (0, a); +} + +void +baz (float_aligned *f, vector float a) +{ + vec_xst (a, 0, f); +} + +vector float +foo2 (float_aligned *a) +{ + return vec_xlw4 (0, a); +} + +vector float +bar2 (const float_aligned *a) +{ + return vec_xlw4 (0, a); +} + +void +baz2 (float_aligned *f, vector float a) +{ + vec_xstw4 (a, 0, f); +} + +/* Make sure alignment hints are generated if the source or target + operand is properly aligned. */ + +/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,0\\(%r2\\),3" 4 } } */ +/* { dg-final { scan-assembler-times "vst\t%v\[0-9\]*,0\\(%r2\\),3" 2 } } */ -- 2.30.2