From: Michael Meissner Date: Sat, 30 Jul 2016 22:31:16 +0000 (+0000) Subject: rs6000-protos.h (rs6000_adjust_vec_address): New function that takes a vector memory... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d0047a2538bc3381a7cf94e714f05b4b96799f6d;p=gcc.git rs6000-protos.h (rs6000_adjust_vec_address): New function that takes a vector memory address... [gcc] 2016-07-30 Michael Meissner * config/rs6000/rs6000-protos.h (rs6000_adjust_vec_address): New function that takes a vector memory address, a hard register, an element number and a temporary base register, and recreates an address that points to the appropriate element within the vector. * config/rs6000/rs6000.c (rs6000_adjust_vec_address): Likewise. (rs6000_split_vec_extract_var): Add support for the target of a vec_extract with variable element number being a scalar memory location. (rtx_is_swappable_p): VLSO insns (UNSPEC_VSX_VSLOW) are not swappable. * config/rs6000/vsx.md (vsx_extract__load): Replace vsx_extract__load insn with a new insn that optimizes storing either element to a memory location, using scratch registers to pick apart the vector and reconstruct the address. (vsx_extract___load): Likewise. (vsx_extract__store): Rework alternatives to more correctly support Altivec registers. Add support for ISA 3.0 Altivec d-form store instruction. (vsx_extract__var): Add support for extracting a variable element number from memory. [gcc/testsuite] 2016-07-30 Michael Meissner * gcc.target/powerpc/vec-extract-2.c: New tests for vec_extract of vector double or vector long where the vector is in memory. * gcc.target/powerpc/vec-extract-3.c: Likewise. * gcc.target/powerpc/vec-extract-4.c: Likewise. From-SVN: r238908 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b628b92eaab..a394f345e42 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2016-07-30 Michael Meissner + + * config/rs6000/rs6000-protos.h (rs6000_adjust_vec_address): New + function that takes a vector memory address, a hard register, an + element number and a temporary base register, and recreates an + address that points to the appropriate element within the vector. + * config/rs6000/rs6000.c (rs6000_adjust_vec_address): Likewise. + (rs6000_split_vec_extract_var): Add support for the target of a + vec_extract with variable element number being a scalar memory + location. + (rtx_is_swappable_p): VLSO insns (UNSPEC_VSX_VSLOW) are not + swappable. + * config/rs6000/vsx.md (vsx_extract__load): Replace + vsx_extract__load insn with a new insn that optimizes + storing either element to a memory location, using scratch + registers to pick apart the vector and reconstruct the address. + (vsx_extract___load): Likewise. + (vsx_extract__store): Rework alternatives to more correctly + support Altivec registers. Add support for ISA 3.0 Altivec d-form + store instruction. + (vsx_extract__var): Add support for extracting a variable + element number from memory. + 2016-07-29 Georg-Johann Lay * config/avr/avr.c (avr_out_compare): Use const0_rtx instead of 0 diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index fdf5c6885a7..8a307a85e6f 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -63,6 +63,7 @@ extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); extern void rs6000_expand_vector_extract (rtx, rtx, rtx); extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx); +extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode); extern bool altivec_expand_vec_perm_const (rtx op[4]); extern void altivec_expand_vec_perm_le (rtx op[4]); extern bool rs6000_expand_vec_perm_const (rtx op[4]); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 46b46d74986..567749c5cb1 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -7001,6 +7001,164 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt) emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0)); } +/* Adjust a memory address (MEM) of a vector type to point to a scalar field + within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register + temporary (BASE_TMP) to fixup the address. Return the new memory address + that is valid for reads or writes to a given register (SCALAR_REG). */ + +rtx +rs6000_adjust_vec_address (rtx scalar_reg, + rtx mem, + rtx element, + rtx base_tmp, + machine_mode scalar_mode) +{ + unsigned scalar_size = GET_MODE_SIZE (scalar_mode); + rtx addr = XEXP (mem, 0); + rtx element_offset; + rtx new_addr; + bool valid_addr_p; + + /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */ + gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC); + + /* Calculate what we need to add to the address to get the element + address. */ + if (CONST_INT_P (element)) + element_offset = GEN_INT (INTVAL (element) * scalar_size); + else + { + int byte_shift = exact_log2 (scalar_size); + gcc_assert (byte_shift >= 0); + + if (byte_shift == 0) + element_offset = element; + + else + { + if (TARGET_POWERPC64) + emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift))); + else + emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift))); + + element_offset = base_tmp; + } + } + + /* Create the new address pointing to the element within the vector. If we + are adding 0, we don't have to change the address. */ + if (element_offset == const0_rtx) + new_addr = addr; + + /* A simple indirect address can be converted into a reg + offset + address. */ + else if (REG_P (addr) || SUBREG_P (addr)) + new_addr = gen_rtx_PLUS (Pmode, addr, element_offset); + + /* Optimize D-FORM addresses with constant offset with a constant element, to + include the element offset in the address directly. */ + else if (GET_CODE (addr) == PLUS) + { + rtx op0 = XEXP (addr, 0); + rtx op1 = XEXP (addr, 1); + rtx insn; + + gcc_assert (REG_P (op0) || SUBREG_P (op0)); + if (CONST_INT_P (op1) && CONST_INT_P (element_offset)) + { + HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset); + rtx offset_rtx = GEN_INT (offset); + + if (IN_RANGE (offset, -32768, 32767) + && (scalar_size < 8 || (offset & 0x3) == 0)) + new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx); + else + { + emit_move_insn (base_tmp, offset_rtx); + new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp); + } + } + else + { + if (REG_P (op1) || SUBREG_P (op1)) + { + insn = gen_add3_insn (base_tmp, op1, element_offset); + gcc_assert (insn != NULL_RTX); + emit_insn (insn); + } + + else if (REG_P (element_offset) || SUBREG_P (element_offset)) + { + insn = gen_add3_insn (base_tmp, element_offset, op1); + gcc_assert (insn != NULL_RTX); + emit_insn (insn); + } + + else + { + emit_move_insn (base_tmp, op1); + emit_insn (gen_add2_insn (base_tmp, element_offset)); + } + + new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp); + } + } + + else + { + emit_move_insn (base_tmp, addr); + new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); + } + + /* If we have a PLUS, we need to see whether the particular register class + allows for D-FORM or X-FORM addressing. */ + if (GET_CODE (new_addr) == PLUS) + { + rtx op1 = XEXP (new_addr, 1); + addr_mask_type addr_mask; + int scalar_regno; + + if (REG_P (scalar_reg)) + scalar_regno = REGNO (scalar_reg); + else if (SUBREG_P (scalar_reg)) + scalar_regno = subreg_regno (scalar_reg); + else + gcc_unreachable (); + + gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER); + if (INT_REGNO_P (scalar_regno)) + addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR]; + + else if (FP_REGNO_P (scalar_regno)) + addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR]; + + else if (ALTIVEC_REGNO_P (scalar_regno)) + addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX]; + + else + gcc_unreachable (); + + if (REG_P (op1) || SUBREG_P (op1)) + valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0; + else + valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0; + } + + else if (REG_P (new_addr) || SUBREG_P (new_addr)) + valid_addr_p = true; + + else + valid_addr_p = false; + + if (!valid_addr_p) + { + emit_move_insn (base_tmp, new_addr); + new_addr = base_tmp; + } + + return change_address (mem, scalar_mode, new_addr); +} + /* Split a variable vec_extract operation into the component instructions. */ void @@ -7014,7 +7172,18 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, gcc_assert (byte_shift >= 0); - if (REG_P (src) || SUBREG_P (src)) + /* If we are given a memory address, optimize to load just the element. We + don't have to adjust the vector element number on little endian + systems. */ + if (MEM_P (src)) + { + gcc_assert (REG_P (tmp_gpr)); + emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element, + tmp_gpr, scalar_mode)); + return; + } + + else if (REG_P (src) || SUBREG_P (src)) { int bit_shift = byte_shift + 3; rtx element2; @@ -38759,6 +38928,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VSX_CVSPDP: case UNSPEC_VSX_CVSPDPN: case UNSPEC_VSX_EXTRACT: + case UNSPEC_VSX_VSLO: return 0; case UNSPEC_VSPLT_DIRECT: *special = SH_SPLAT; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index ca569a2cbf1..b66b1affd7c 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -2174,33 +2174,36 @@ } [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")]) -;; Optimize extracting a single scalar element from memory if the scalar is in -;; the correct location to use a single load. -(define_insn "*vsx_extract__load" - [(set (match_operand: 0 "register_operand" "=d,wv,wr") - (vec_select: - (match_operand:VSX_D 1 "memory_operand" "m,Z,m") - (parallel [(const_int 0)])))] - "VECTOR_MEM_VSX_P (mode)" - "@ - lfd%U1%X1 %0,%1 - lxsd%U1x %x0,%y1 - ld%U1%X1 %0,%1" - [(set_attr "type" "fpload,fpload,load") - (set_attr "length" "4")]) +;; Optimize extracting a single scalar element from memory. +(define_insn_and_split "*vsx_extract___load" + [(set (match_operand: 0 "register_operand" "=,wr") + (vec_select: + (match_operand:VSX_D 1 "memory_operand" "m,m") + (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")]))) + (clobber (match_scratch:P 3 "=&b,&b"))] + "VECTOR_MEM_VSX_P (mode)" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 4))] +{ + operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], + operands[3], mode); +} + [(set_attr "type" "fpload,load") + (set_attr "length" "8")]) ;; Optimize storing a single scalar element that is the right location to ;; memory (define_insn "*vsx_extract__store" - [(set (match_operand: 0 "memory_operand" "=m,Z,?Z") + [(set (match_operand: 0 "memory_operand" "=m,Z,o") (vec_select: - (match_operand:VSX_D 1 "register_operand" "d,wd,") + (match_operand:VSX_D 1 "register_operand" "d,wv,wb") (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] "VECTOR_MEM_VSX_P (mode)" "@ stfd%U0%X0 %1,%0 stxsd%U0x %x1,%y0 - stxsd%U0x %x1,%y0" + stxsd %1,%0" [(set_attr "type" "fpstore") (set_attr "length" "4")]) @@ -2216,12 +2219,12 @@ ;; Variable V2DI/V2DF extract (define_insn_and_split "vsx_extract__var" - [(set (match_operand: 0 "gpc_reg_operand" "=v") - (unspec: [(match_operand:VSX_D 1 "input_operand" "v") - (match_operand:DI 2 "gpc_reg_operand" "r")] + [(set (match_operand: 0 "gpc_reg_operand" "=v,,r") + (unspec: [(match_operand:VSX_D 1 "input_operand" "v,m,m") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] UNSPEC_VSX_EXTRACT)) - (clobber (match_scratch:DI 3 "=r")) - (clobber (match_scratch:V2DI 4 "=&v"))] + (clobber (match_scratch:DI 3 "=r,&b,&b")) + (clobber (match_scratch:V2DI 4 "=&v,X,X"))] "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8625717c5bf..8a2d5c8850b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2016-07-30 Michael Meissner + + * gcc.target/powerpc/vec-extract-2.c: New tests for vec_extract of + vector double or vector long where the vector is in memory. + * gcc.target/powerpc/vec-extract-3.c: Likewise. + * gcc.target/powerpc/vec-extract-4.c: Likewise. + 2016-07-30 Steven G. Kargl PR fortran/69867 diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-2.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-2.c new file mode 100644 index 00000000000..efad62cca9f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-2.c @@ -0,0 +1,37 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx" } */ + +#include + +double +add_double_0 (vector double *p, double x) +{ + return vec_extract (*p, 0) + x; +} + +double +add_double_1 (vector double *p, double x) +{ + return vec_extract (*p, 1) + x; +} + +long +add_long_0 (vector long *p, long x) +{ + return vec_extract (*p, 0) + x; +} + +long +add_long_1 (vector long *p, long x) +{ + return vec_extract (*p, 1) + x; +} + +/* { dg-final { scan-assembler-not "lxvd2x" } } */ +/* { dg-final { scan-assembler-not "lxvw4x" } } */ +/* { dg-final { scan-assembler-not "lxvx" } } */ +/* { dg-final { scan-assembler-not "lxv" } } */ +/* { dg-final { scan-assembler-not "lvx" } } */ +/* { dg-final { scan-assembler-not "xxpermdi" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-3.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-3.c new file mode 100644 index 00000000000..62649ce9e42 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-3.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-O2 -mcpu=power8" } */ + +#include + +double +add_double_n (vector double *p, double x, long n) +{ + return vec_extract (*p, n) + x; +} + +long +add_long_n (vector long *p, long x, long n) +{ + return vec_extract (*p, n) + x; +} + +/* { dg-final { scan-assembler-not "lxvd2x" } } */ +/* { dg-final { scan-assembler-not "lxvw4x" } } */ +/* { dg-final { scan-assembler-not "lxvx" } } */ +/* { dg-final { scan-assembler-not "lxv" } } */ +/* { dg-final { scan-assembler-not "lvx" } } */ +/* { dg-final { scan-assembler-not "xxpermdi" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-4.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-4.c new file mode 100644 index 00000000000..6380fef9bc8 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-4.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-O2 -mcpu=power9" } */ + +#include + +#ifdef __LITTLE_ENDIAN__ +#define ELEMENT 1 +#else +#define ELEMENT 0 +#endif + +void foo (double *p, vector double v) +{ + p[10] = vec_extract (v, ELEMENT); +} + +/* { dg-final { scan-assembler "stxsd " } } */ +/* { dg-final { scan-assembler-not "stxsdx" } } */ +/* { dg-final { scan-assembler-not "stfd" } } */ +/* { dg-final { scan-assembler-not "xxpermdi" } } */