From 46b57af175e889126f65a6ca667c2c4606f93f84 Mon Sep 17 00:00:00 2001 From: Tejas Belagod Date: Wed, 25 Aug 2010 08:23:26 +0100 Subject: [PATCH] vmull / vmovl support for Neon. For Tejas Belagod 2010-08-25 Tejas Belagod * lib/target-supports.exp (check_effective_target_vect_unpack): Set vect_unpack supported flag to true for neon. * config/arm/iterators.md (VU, SE, V_widen_l): New. (V_unpack, US): New. * config/arm/neon.md (vec_unpack_hi_): Expansion for vmovl. (vec_unpack_lo_): Likewise. (neon_vec_unpack_hi_): Instruction pattern for vmovl. (neon_vec_unpack_lo_): Likewise. (vec_widen_mult_lo_): Expansion for vmull. (vec_widen_mult_hi_): Likewise. (neon_vec_mult_lo_"): Instruction pattern for vmull. (neon_vec_mult_hi_"): Likewise. (neon_unpack_): Widening move intermediate step for vectorizing without -mvectorize-with-neon-quad. (neon_vec_mult_): Widening multiply intermediate step for vectorizing without -mvectorize-with-neon-quad. * config/arm/predicates.md (vect_par_constant_high): Check for high-half lanes of a vector. (vect_par_constant_low): Check for low-half lanes of a vector. From-SVN: r163538 --- gcc/ChangeLog | 21 +++ gcc/config/arm/iterators.md | 14 +- gcc/config/arm/neon.md | 202 ++++++++++++++++++++++++++ gcc/config/arm/predicates.md | 58 ++++++++ gcc/testsuite/ChangeLog | 5 + gcc/testsuite/lib/target-supports.exp | 3 +- 6 files changed, 301 insertions(+), 2 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d0d6bb932c8..29c5aecd4b2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2010-08-25 Tejas Belagod + + * config/arm/iterators.md (VU, SE, V_widen_l): New. + (V_unpack, US): New. + * config/arm/neon.md (vec_unpack_hi_): Expansion for + vmovl. + (vec_unpack_lo_): Likewise. + (neon_vec_unpack_hi_): Instruction pattern for vmovl. + (neon_vec_unpack_lo_): Likewise. + (vec_widen_mult_lo_): Expansion for vmull. + (vec_widen_mult_hi_): Likewise. + (neon_vec_mult_lo_"): Instruction pattern for vmull. + (neon_vec_mult_hi_"): Likewise. + (neon_unpack_): Widening move intermediate step for + vectorizing without -mvectorize-with-neon-quad. + (neon_vec_mult_): Widening multiply intermediate step + for vectorizing without -mvectorize-with-neon-quad. + * config/arm/predicates.md (vect_par_constant_high): Check for + high-half lanes of a vector. + (vect_par_constant_low): Check for low-half lanes of a vector. + 2010-08-24 Sebastian Pop * tree-if-conv.c (struct ifc_dr): New. diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index ee04aabebf2..d9b5621ef76 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -136,7 +136,9 @@ ;; Modes with 32-bit elements only. (define_mode_iterator V32 [V2SI V2SF V4SI V4SF]) - +;; Modes with 8-bit, 16-bit and 32-bit elements. +(define_mode_iterator VU [V16QI V8HI V4SI]) + ;;---------------------------------------------------------------------------- ;; Code iterators ;;---------------------------------------------------------------------------- @@ -156,6 +158,8 @@ ;; without unsigned variants (for use with *SFmode pattern). (define_code_iterator vqhs_ops [plus smin smax]) +;; A list of widening operators +(define_code_iterator SE [sign_extend zero_extend]) ;;---------------------------------------------------------------------------- ;; Mode attributes @@ -360,6 +364,11 @@ (V2SF "2") (V4SF "4") (DI "1") (V2DI "2")]) +;; Same as V_widen, but lower-case. +(define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")]) + +;; Widen. Result is half the number of elements, but widened to double-width. +(define_mode_attr V_unpack [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")]) ;;---------------------------------------------------------------------------- ;; Code attributes @@ -375,3 +384,6 @@ (define_code_attr cnb [(ltu "CC_C") (geu "CC")]) (define_code_attr optab [(ltu "ltu") (geu "geu")]) + +;; Assembler mnemonics for signedness of widening operations. +(define_code_attr US [(sign_extend "s") (zero_extend "u")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index bdc279a92a5..96241b9c17f 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4977,3 +4977,205 @@ emit_insn (gen_orn3_neon (operands[0], operands[1], operands[2])); DONE; }) + +(define_insn "neon_vec_unpack_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_low" ""))))] + "TARGET_NEON" + "vmovl. %q0, %e1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_insn "neon_vec_unpack_hi_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_high" ""))))] + "TARGET_NEON" + "vmovl. %q0, %f1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_unpack_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand"))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2); i++) + RTVEC_ELT (v, i) = GEN_INT ((/2) + i); + + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_vec_unpack_hi_ (operands[0], + operands[1], + t1)); + DONE; + } +) + +(define_expand "vec_unpack_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (i); + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_vec_unpack_lo_ (operands[0], + operands[1], + t1)); + DONE; + } +) + +(define_insn "neon_vec_mult_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_low" ""))) + (SE: (vec_select: + (match_operand:VU 3 "register_operand" "w") + (match_dup 2)))))] + "TARGET_NEON" + "vmull. %q0, %e1, %e3" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_mult_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" "")) + (SE: (match_operand:VU 2 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (i); + t1 = gen_rtx_PARALLEL (mode, v); + + emit_insn (gen_neon_vec_mult_lo_ (operands[0], + operands[1], + t1, + operands[2])); + DONE; + } +) + +(define_insn "neon_vec_mult_hi_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_high" ""))) + (SE: (vec_select: + (match_operand:VU 3 "register_operand" "w") + (match_dup 2)))))] + "TARGET_NEON" + "vmull. %q0, %f1, %f3" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_mult_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" "")) + (SE: (match_operand:VU 2 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (/2 + i); + t1 = gen_rtx_PARALLEL (mode, v); + + emit_insn (gen_neon_vec_mult_hi_ (operands[0], + operands[1], + t1, + operands[2])); + DONE; + + } +) + +;; Vectorize for non-neon-quad case +(define_insn "neon_unpack_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (match_operand:VDI 1 "register_operand" "")))] + "TARGET_NEON" + "vmovl. %q0, %1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_unpack_lo_" + [(match_operand: 0 "register_operand" "") + (SE:(match_operand:VDI 1 "register_operand"))] + "TARGET_NEON" +{ + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_unpack_ (tmpreg, operands[1])); + emit_insn (gen_neon_vget_low (operands[0], tmpreg)); + + DONE; +} +) + +(define_expand "vec_unpack_hi_" + [(match_operand: 0 "register_operand" "") + (SE:(match_operand:VDI 1 "register_operand"))] + "TARGET_NEON" +{ + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_unpack_ (tmpreg, operands[1])); + emit_insn (gen_neon_vget_high (operands[0], tmpreg)); + + DONE; +} +) + +(define_insn "neon_vec_mult_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: + (match_operand:VDI 1 "register_operand" "w")) + (SE: + (match_operand:VDI 2 "register_operand" "w"))))] + "TARGET_NEON" + "vmull. %q0, %1, %2" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_mult_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VDI 1 "register_operand" "")) + (SE: (match_operand:VDI 2 "register_operand" ""))] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_vec_mult_ (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_high (operands[0], tmpreg)); + + DONE; + + } +) + +(define_expand "vec_widen_mult_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VDI 1 "register_operand" "")) + (SE: (match_operand:VDI 2 "register_operand" ""))] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_vec_mult_ (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_low (operands[0], tmpreg)); + + DONE; + + } +) diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index da3b6dcd4fd..032b2ecaaf3 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -619,3 +619,61 @@ (and (match_test "TARGET_32BIT") (match_operand 0 "arm_di_operand")))) +;; Predicates for parallel expanders based on mode. +(define_special_predicate "vect_par_constant_high" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int i; + int base = GET_MODE_NUNITS (mode); + + if ((count < 1) + || (count != base/2)) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (GET_CODE (elt) != CONST_INT) + return false; + + val = INTVAL (elt); + if (val != (base/2) + i) + return false; + } + return true; +}) + +(define_special_predicate "vect_par_constant_low" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int i; + int base = GET_MODE_NUNITS (mode); + + if ((count < 1) + || (count != base/2)) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (GET_CODE (elt) != CONST_INT) + return false; + + val = INTVAL (elt); + if (val != i) + return false; + } + return true; +}) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 26e6df7c426..7a92568cb70 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2010-08-25 Tejas Belagod + + * lib/target-supports.exp (check_effective_target_vect_unpack): + Set vect_unpack supported flag to true for neon. + 2010-08-24 Sebastian Pop * gcc.dg/tree-ssa/ifc-5.c: New. diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 1682d582bc5..4b95323c96c 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2640,7 +2640,8 @@ proc check_effective_target_vect_unpack { } { if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*]) || [istarget i?86-*-*] || [istarget x86_64-*-*] - || [istarget spu-*-*] } { + || [istarget spu-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { set et_vect_unpack_saved 1 } } -- 2.30.2