+ {
+ enum tree_code shift_code;
+ bool have_whole_vector_shift = true;
+ enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
+ int bit_offset;
+ int element_bitsize = tree_low_cst (bitsize, 1);
+ int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+ tree vec_temp;
+
+ /* The result of the reduction is expected to be at the LSB bits
+ of the vector. For big-endian targets this means at the right
+ end of the vector. For little-edian targets this means at the
+ left end of the vector. */
+
+ if (BITS_BIG_ENDIAN
+ && vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
+ shift_code = VEC_RSHIFT_EXPR;
+ else if (!BITS_BIG_ENDIAN
+ && vec_shl_optab->handlers[mode].insn_code != CODE_FOR_nothing)
+ shift_code = VEC_LSHIFT_EXPR;
+ else
+ have_whole_vector_shift = false;
+
+ if (have_whole_vector_shift)
+ {
+ /*** Case 2:
+ for (offset = VS/2; offset >= element_size; offset/=2)
+ {
+ Create: va' = vec_shift <va, offset>
+ Create: va = vop <va, va'>
+ } */
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "Reduce using vector shifts");
+
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ new_temp = PHI_RESULT (new_phi);
+
+ for (bit_offset = vec_size_in_bits/2;
+ bit_offset >= element_bitsize;
+ bit_offset /= 2)
+ {
+ tree bitpos = size_int (bit_offset);
+
+ epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+ build2 (shift_code, vectype, new_temp, bitpos));
+ new_name = make_ssa_name (vec_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_name;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+
+
+ epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+ build2 (code, vectype, new_name, new_temp));
+ new_temp = make_ssa_name (vec_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+ }
+
+ extract_scalar_result = true;
+ adjust_in_epilog = true;
+ }
+ else
+ {
+ /*** Case 3:
+ Create: s = init;
+ for (offset=0; offset<vector_size; offset+=element_size;)
+ {
+ Create: s' = extract_field <v_out2, offset>
+ Create: s = op <s, s'>
+ } */
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "Reduce using scalar code. ");
+
+ vec_temp = PHI_RESULT (new_phi);
+ vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+
+ /* first iteration is peeled out when possible to minimize
+ the number of operations we generate: */
+ if (code == PLUS_EXPR
+ && (integer_zerop (scalar_initial_def)
+ || real_zerop (scalar_initial_def)))
+ {
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+ build3 (BIT_FIELD_REF, scalar_type,
+ vec_temp, bitsize, bitsize_zero_node));
+ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+
+ bit_offset = element_bitsize;
+ }
+ else
+ {
+ new_temp = scalar_initial_def;
+ bit_offset = 0;
+ }