1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
41 #include "langhooks.h"
43 struct target_expmed default_target_expmed
;
45 struct target_expmed
*this_target_expmed
= &default_target_expmed
;
48 static void store_fixed_bit_field (rtx
, opt_scalar_int_mode
,
49 unsigned HOST_WIDE_INT
,
50 unsigned HOST_WIDE_INT
,
51 unsigned HOST_WIDE_INT
,
52 unsigned HOST_WIDE_INT
,
53 rtx
, scalar_int_mode
, bool);
54 static void store_fixed_bit_field_1 (rtx
, scalar_int_mode
,
55 unsigned HOST_WIDE_INT
,
56 unsigned HOST_WIDE_INT
,
57 rtx
, scalar_int_mode
, bool);
58 static void store_split_bit_field (rtx
, opt_scalar_int_mode
,
59 unsigned HOST_WIDE_INT
,
60 unsigned HOST_WIDE_INT
,
61 unsigned HOST_WIDE_INT
,
62 unsigned HOST_WIDE_INT
,
63 rtx
, scalar_int_mode
, bool);
64 static rtx
extract_fixed_bit_field (machine_mode
, rtx
, opt_scalar_int_mode
,
65 unsigned HOST_WIDE_INT
,
66 unsigned HOST_WIDE_INT
, rtx
, int, bool);
67 static rtx
extract_fixed_bit_field_1 (machine_mode
, rtx
, scalar_int_mode
,
68 unsigned HOST_WIDE_INT
,
69 unsigned HOST_WIDE_INT
, rtx
, int, bool);
70 static rtx
lshift_value (machine_mode
, unsigned HOST_WIDE_INT
, int);
71 static rtx
extract_split_bit_field (rtx
, opt_scalar_int_mode
,
72 unsigned HOST_WIDE_INT
,
73 unsigned HOST_WIDE_INT
, int, bool);
74 static void do_cmp_and_jump (rtx
, rtx
, enum rtx_code
, machine_mode
, rtx_code_label
*);
75 static rtx
expand_smod_pow2 (scalar_int_mode
, rtx
, HOST_WIDE_INT
);
76 static rtx
expand_sdiv_pow2 (scalar_int_mode
, rtx
, HOST_WIDE_INT
);
78 /* Return a constant integer mask value of mode MODE with BITSIZE ones
79 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
80 The mask is truncated if necessary to the width of mode MODE. The
81 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
84 mask_rtx (scalar_int_mode mode
, int bitpos
, int bitsize
, bool complement
)
86 return immed_wide_int_const
87 (wi::shifted_mask (bitpos
, bitsize
, complement
,
88 GET_MODE_PRECISION (mode
)), mode
);
91 /* Test whether a value is zero of a power of two. */
92 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
93 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
95 struct init_expmed_rtl
116 rtx pow2
[MAX_BITS_PER_WORD
];
117 rtx cint
[MAX_BITS_PER_WORD
];
121 init_expmed_one_conv (struct init_expmed_rtl
*all
, scalar_int_mode to_mode
,
122 scalar_int_mode from_mode
, bool speed
)
124 int to_size
, from_size
;
127 to_size
= GET_MODE_PRECISION (to_mode
);
128 from_size
= GET_MODE_PRECISION (from_mode
);
130 /* Most partial integers have a precision less than the "full"
131 integer it requires for storage. In case one doesn't, for
132 comparison purposes here, reduce the bit size by one in that
134 if (GET_MODE_CLASS (to_mode
) == MODE_PARTIAL_INT
135 && pow2p_hwi (to_size
))
137 if (GET_MODE_CLASS (from_mode
) == MODE_PARTIAL_INT
138 && pow2p_hwi (from_size
))
141 /* Assume cost of zero-extend and sign-extend is the same. */
142 which
= (to_size
< from_size
? all
->trunc
: all
->zext
);
144 PUT_MODE (all
->reg
, from_mode
);
145 set_convert_cost (to_mode
, from_mode
, speed
,
146 set_src_cost (which
, to_mode
, speed
));
150 init_expmed_one_mode (struct init_expmed_rtl
*all
,
151 machine_mode mode
, int speed
)
153 int m
, n
, mode_bitsize
;
154 machine_mode mode_from
;
156 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
158 PUT_MODE (all
->reg
, mode
);
159 PUT_MODE (all
->plus
, mode
);
160 PUT_MODE (all
->neg
, mode
);
161 PUT_MODE (all
->mult
, mode
);
162 PUT_MODE (all
->sdiv
, mode
);
163 PUT_MODE (all
->udiv
, mode
);
164 PUT_MODE (all
->sdiv_32
, mode
);
165 PUT_MODE (all
->smod_32
, mode
);
166 PUT_MODE (all
->wide_trunc
, mode
);
167 PUT_MODE (all
->shift
, mode
);
168 PUT_MODE (all
->shift_mult
, mode
);
169 PUT_MODE (all
->shift_add
, mode
);
170 PUT_MODE (all
->shift_sub0
, mode
);
171 PUT_MODE (all
->shift_sub1
, mode
);
172 PUT_MODE (all
->zext
, mode
);
173 PUT_MODE (all
->trunc
, mode
);
175 set_add_cost (speed
, mode
, set_src_cost (all
->plus
, mode
, speed
));
176 set_neg_cost (speed
, mode
, set_src_cost (all
->neg
, mode
, speed
));
177 set_mul_cost (speed
, mode
, set_src_cost (all
->mult
, mode
, speed
));
178 set_sdiv_cost (speed
, mode
, set_src_cost (all
->sdiv
, mode
, speed
));
179 set_udiv_cost (speed
, mode
, set_src_cost (all
->udiv
, mode
, speed
));
181 set_sdiv_pow2_cheap (speed
, mode
, (set_src_cost (all
->sdiv_32
, mode
, speed
)
182 <= 2 * add_cost (speed
, mode
)));
183 set_smod_pow2_cheap (speed
, mode
, (set_src_cost (all
->smod_32
, mode
, speed
)
184 <= 4 * add_cost (speed
, mode
)));
186 set_shift_cost (speed
, mode
, 0, 0);
188 int cost
= add_cost (speed
, mode
);
189 set_shiftadd_cost (speed
, mode
, 0, cost
);
190 set_shiftsub0_cost (speed
, mode
, 0, cost
);
191 set_shiftsub1_cost (speed
, mode
, 0, cost
);
194 n
= MIN (MAX_BITS_PER_WORD
, mode_bitsize
);
195 for (m
= 1; m
< n
; m
++)
197 XEXP (all
->shift
, 1) = all
->cint
[m
];
198 XEXP (all
->shift_mult
, 1) = all
->pow2
[m
];
200 set_shift_cost (speed
, mode
, m
, set_src_cost (all
->shift
, mode
, speed
));
201 set_shiftadd_cost (speed
, mode
, m
, set_src_cost (all
->shift_add
, mode
,
203 set_shiftsub0_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub0
, mode
,
205 set_shiftsub1_cost (speed
, mode
, m
, set_src_cost (all
->shift_sub1
, mode
,
209 scalar_int_mode int_mode_to
;
210 if (is_a
<scalar_int_mode
> (mode
, &int_mode_to
))
212 for (mode_from
= MIN_MODE_INT
; mode_from
<= MAX_MODE_INT
;
213 mode_from
= (machine_mode
)(mode_from
+ 1))
214 init_expmed_one_conv (all
, int_mode_to
,
215 as_a
<scalar_int_mode
> (mode_from
), speed
);
217 scalar_int_mode wider_mode
;
218 if (GET_MODE_CLASS (int_mode_to
) == MODE_INT
219 && GET_MODE_WIDER_MODE (int_mode_to
).exists (&wider_mode
))
221 PUT_MODE (all
->zext
, wider_mode
);
222 PUT_MODE (all
->wide_mult
, wider_mode
);
223 PUT_MODE (all
->wide_lshr
, wider_mode
);
224 XEXP (all
->wide_lshr
, 1) = GEN_INT (mode_bitsize
);
226 set_mul_widen_cost (speed
, wider_mode
,
227 set_src_cost (all
->wide_mult
, wider_mode
, speed
));
228 set_mul_highpart_cost (speed
, int_mode_to
,
229 set_src_cost (all
->wide_trunc
,
230 int_mode_to
, speed
));
238 struct init_expmed_rtl all
;
239 machine_mode mode
= QImode
;
242 memset (&all
, 0, sizeof all
);
243 for (m
= 1; m
< MAX_BITS_PER_WORD
; m
++)
245 all
.pow2
[m
] = GEN_INT (HOST_WIDE_INT_1
<< m
);
246 all
.cint
[m
] = GEN_INT (m
);
249 /* Avoid using hard regs in ways which may be unsupported. */
250 all
.reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
251 all
.plus
= gen_rtx_PLUS (mode
, all
.reg
, all
.reg
);
252 all
.neg
= gen_rtx_NEG (mode
, all
.reg
);
253 all
.mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
254 all
.sdiv
= gen_rtx_DIV (mode
, all
.reg
, all
.reg
);
255 all
.udiv
= gen_rtx_UDIV (mode
, all
.reg
, all
.reg
);
256 all
.sdiv_32
= gen_rtx_DIV (mode
, all
.reg
, all
.pow2
[5]);
257 all
.smod_32
= gen_rtx_MOD (mode
, all
.reg
, all
.pow2
[5]);
258 all
.zext
= gen_rtx_ZERO_EXTEND (mode
, all
.reg
);
259 all
.wide_mult
= gen_rtx_MULT (mode
, all
.zext
, all
.zext
);
260 all
.wide_lshr
= gen_rtx_LSHIFTRT (mode
, all
.wide_mult
, all
.reg
);
261 all
.wide_trunc
= gen_rtx_TRUNCATE (mode
, all
.wide_lshr
);
262 all
.shift
= gen_rtx_ASHIFT (mode
, all
.reg
, all
.reg
);
263 all
.shift_mult
= gen_rtx_MULT (mode
, all
.reg
, all
.reg
);
264 all
.shift_add
= gen_rtx_PLUS (mode
, all
.shift_mult
, all
.reg
);
265 all
.shift_sub0
= gen_rtx_MINUS (mode
, all
.shift_mult
, all
.reg
);
266 all
.shift_sub1
= gen_rtx_MINUS (mode
, all
.reg
, all
.shift_mult
);
267 all
.trunc
= gen_rtx_TRUNCATE (mode
, all
.reg
);
269 for (speed
= 0; speed
< 2; speed
++)
271 crtl
->maybe_hot_insn_p
= speed
;
272 set_zero_cost (speed
, set_src_cost (const0_rtx
, mode
, speed
));
274 for (mode
= MIN_MODE_INT
; mode
<= MAX_MODE_INT
;
275 mode
= (machine_mode
)(mode
+ 1))
276 init_expmed_one_mode (&all
, mode
, speed
);
278 if (MIN_MODE_PARTIAL_INT
!= VOIDmode
)
279 for (mode
= MIN_MODE_PARTIAL_INT
; mode
<= MAX_MODE_PARTIAL_INT
;
280 mode
= (machine_mode
)(mode
+ 1))
281 init_expmed_one_mode (&all
, mode
, speed
);
283 if (MIN_MODE_VECTOR_INT
!= VOIDmode
)
284 for (mode
= MIN_MODE_VECTOR_INT
; mode
<= MAX_MODE_VECTOR_INT
;
285 mode
= (machine_mode
)(mode
+ 1))
286 init_expmed_one_mode (&all
, mode
, speed
);
289 if (alg_hash_used_p ())
291 struct alg_hash_entry
*p
= alg_hash_entry_ptr (0);
292 memset (p
, 0, sizeof (*p
) * NUM_ALG_HASH_ENTRIES
);
295 set_alg_hash_used_p (true);
296 default_rtl_profile ();
298 ggc_free (all
.trunc
);
299 ggc_free (all
.shift_sub1
);
300 ggc_free (all
.shift_sub0
);
301 ggc_free (all
.shift_add
);
302 ggc_free (all
.shift_mult
);
303 ggc_free (all
.shift
);
304 ggc_free (all
.wide_trunc
);
305 ggc_free (all
.wide_lshr
);
306 ggc_free (all
.wide_mult
);
308 ggc_free (all
.smod_32
);
309 ggc_free (all
.sdiv_32
);
318 /* Return an rtx representing minus the value of X.
319 MODE is the intended mode of the result,
320 useful if X is a CONST_INT. */
323 negate_rtx (machine_mode mode
, rtx x
)
325 rtx result
= simplify_unary_operation (NEG
, mode
, x
, mode
);
328 result
= expand_unop (mode
, neg_optab
, x
, NULL_RTX
, 0);
333 /* Whether reverse storage order is supported on the target. */
334 static int reverse_storage_order_supported
= -1;
336 /* Check whether reverse storage order is supported on the target. */
339 check_reverse_storage_order_support (void)
341 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
343 reverse_storage_order_supported
= 0;
344 sorry ("reverse scalar storage order");
347 reverse_storage_order_supported
= 1;
350 /* Whether reverse FP storage order is supported on the target. */
351 static int reverse_float_storage_order_supported
= -1;
353 /* Check whether reverse FP storage order is supported on the target. */
356 check_reverse_float_storage_order_support (void)
358 if (FLOAT_WORDS_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
360 reverse_float_storage_order_supported
= 0;
361 sorry ("reverse floating-point scalar storage order");
364 reverse_float_storage_order_supported
= 1;
367 /* Return an rtx representing value of X with reverse storage order.
368 MODE is the intended mode of the result,
369 useful if X is a CONST_INT. */
372 flip_storage_order (machine_mode mode
, rtx x
)
374 scalar_int_mode int_mode
;
380 if (COMPLEX_MODE_P (mode
))
382 rtx real
= read_complex_part (x
, false);
383 rtx imag
= read_complex_part (x
, true);
385 real
= flip_storage_order (GET_MODE_INNER (mode
), real
);
386 imag
= flip_storage_order (GET_MODE_INNER (mode
), imag
);
388 return gen_rtx_CONCAT (mode
, real
, imag
);
391 if (__builtin_expect (reverse_storage_order_supported
< 0, 0))
392 check_reverse_storage_order_support ();
394 if (!is_a
<scalar_int_mode
> (mode
, &int_mode
))
396 if (FLOAT_MODE_P (mode
)
397 && __builtin_expect (reverse_float_storage_order_supported
< 0, 0))
398 check_reverse_float_storage_order_support ();
400 if (!int_mode_for_size (GET_MODE_PRECISION (mode
), 0).exists (&int_mode
))
402 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode
));
405 x
= gen_lowpart (int_mode
, x
);
408 result
= simplify_unary_operation (BSWAP
, int_mode
, x
, int_mode
);
410 result
= expand_unop (int_mode
, bswap_optab
, x
, NULL_RTX
, 1);
412 if (int_mode
!= mode
)
413 result
= gen_lowpart (mode
, result
);
418 /* If MODE is set, adjust bitfield memory MEM so that it points to the
419 first unit of mode MODE that contains a bitfield of size BITSIZE at
420 bit position BITNUM. If MODE is not set, return a BLKmode reference
421 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
422 of the field within the new memory. */
425 narrow_bit_field_mem (rtx mem
, opt_scalar_int_mode mode
,
426 unsigned HOST_WIDE_INT bitsize
,
427 unsigned HOST_WIDE_INT bitnum
,
428 unsigned HOST_WIDE_INT
*new_bitnum
)
430 scalar_int_mode imode
;
431 if (mode
.exists (&imode
))
433 unsigned int unit
= GET_MODE_BITSIZE (imode
);
434 *new_bitnum
= bitnum
% unit
;
435 HOST_WIDE_INT offset
= (bitnum
- *new_bitnum
) / BITS_PER_UNIT
;
436 return adjust_bitfield_address (mem
, imode
, offset
);
440 *new_bitnum
= bitnum
% BITS_PER_UNIT
;
441 HOST_WIDE_INT offset
= bitnum
/ BITS_PER_UNIT
;
442 HOST_WIDE_INT size
= ((*new_bitnum
+ bitsize
+ BITS_PER_UNIT
- 1)
444 return adjust_bitfield_address_size (mem
, BLKmode
, offset
, size
);
448 /* The caller wants to perform insertion or extraction PATTERN on a
449 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
450 BITREGION_START and BITREGION_END are as for store_bit_field
451 and FIELDMODE is the natural mode of the field.
453 Search for a mode that is compatible with the memory access
454 restrictions and (where applicable) with a register insertion or
455 extraction. Return the new memory on success, storing the adjusted
456 bit position in *NEW_BITNUM. Return null otherwise. */
459 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern
,
460 rtx op0
, HOST_WIDE_INT bitsize
,
461 HOST_WIDE_INT bitnum
,
462 unsigned HOST_WIDE_INT bitregion_start
,
463 unsigned HOST_WIDE_INT bitregion_end
,
464 machine_mode fieldmode
,
465 unsigned HOST_WIDE_INT
*new_bitnum
)
467 bit_field_mode_iterator
iter (bitsize
, bitnum
, bitregion_start
,
468 bitregion_end
, MEM_ALIGN (op0
),
469 MEM_VOLATILE_P (op0
));
470 scalar_int_mode best_mode
;
471 if (iter
.next_mode (&best_mode
))
473 /* We can use a memory in BEST_MODE. See whether this is true for
474 any wider modes. All other things being equal, we prefer to
475 use the widest mode possible because it tends to expose more
476 CSE opportunities. */
477 if (!iter
.prefer_smaller_modes ())
479 /* Limit the search to the mode required by the corresponding
480 register insertion or extraction instruction, if any. */
481 scalar_int_mode limit_mode
= word_mode
;
482 extraction_insn insn
;
483 if (get_best_reg_extraction_insn (&insn
, pattern
,
484 GET_MODE_BITSIZE (best_mode
),
486 limit_mode
= insn
.field_mode
;
488 scalar_int_mode wider_mode
;
489 while (iter
.next_mode (&wider_mode
)
490 && GET_MODE_SIZE (wider_mode
) <= GET_MODE_SIZE (limit_mode
))
491 best_mode
= wider_mode
;
493 return narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
,
499 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
500 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
501 offset is then BITNUM / BITS_PER_UNIT. */
504 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum
,
505 unsigned HOST_WIDE_INT bitsize
,
506 machine_mode struct_mode
)
508 if (BYTES_BIG_ENDIAN
)
509 return (bitnum
% BITS_PER_UNIT
== 0
510 && (bitnum
+ bitsize
== GET_MODE_BITSIZE (struct_mode
)
511 || (bitnum
+ bitsize
) % BITS_PER_WORD
== 0));
513 return bitnum
% BITS_PER_WORD
== 0;
516 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
517 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
518 Return false if the access would touch memory outside the range
519 BITREGION_START to BITREGION_END for conformance to the C++ memory
523 strict_volatile_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
524 unsigned HOST_WIDE_INT bitnum
,
525 scalar_int_mode fieldmode
,
526 unsigned HOST_WIDE_INT bitregion_start
,
527 unsigned HOST_WIDE_INT bitregion_end
)
529 unsigned HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (fieldmode
);
531 /* -fstrict-volatile-bitfields must be enabled and we must have a
534 || !MEM_VOLATILE_P (op0
)
535 || flag_strict_volatile_bitfields
<= 0)
538 /* The bit size must not be larger than the field mode, and
539 the field mode must not be larger than a word. */
540 if (bitsize
> modesize
|| modesize
> BITS_PER_WORD
)
543 /* Check for cases of unaligned fields that must be split. */
544 if (bitnum
% modesize
+ bitsize
> modesize
)
547 /* The memory must be sufficiently aligned for a MODESIZE access.
548 This condition guarantees, that the memory access will not
549 touch anything after the end of the structure. */
550 if (MEM_ALIGN (op0
) < modesize
)
553 /* Check for cases where the C++ memory model applies. */
554 if (bitregion_end
!= 0
555 && (bitnum
- bitnum
% modesize
< bitregion_start
556 || bitnum
- bitnum
% modesize
+ modesize
- 1 > bitregion_end
))
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563 bit number BITNUM can be treated as a simple value of mode MODE. */
566 simple_mem_bitfield_p (rtx op0
, unsigned HOST_WIDE_INT bitsize
,
567 unsigned HOST_WIDE_INT bitnum
, machine_mode mode
)
570 && bitnum
% BITS_PER_UNIT
== 0
571 && bitsize
== GET_MODE_BITSIZE (mode
)
572 && (!SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (op0
))
573 || (bitnum
% GET_MODE_ALIGNMENT (mode
) == 0
574 && MEM_ALIGN (op0
) >= GET_MODE_ALIGNMENT (mode
))));
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
579 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
580 are as for store_bit_field. */
583 store_bit_field_using_insv (const extraction_insn
*insv
, rtx op0
,
584 opt_scalar_int_mode op0_mode
,
585 unsigned HOST_WIDE_INT bitsize
,
586 unsigned HOST_WIDE_INT bitnum
,
587 rtx value
, scalar_int_mode value_mode
)
589 struct expand_operand ops
[4];
592 rtx_insn
*last
= get_last_insn ();
593 bool copy_back
= false;
595 scalar_int_mode op_mode
= insv
->field_mode
;
596 unsigned int unit
= GET_MODE_BITSIZE (op_mode
);
597 if (bitsize
== 0 || bitsize
> unit
)
601 /* Get a reference to the first byte of the field. */
602 xop0
= narrow_bit_field_mem (xop0
, insv
->struct_mode
, bitsize
, bitnum
,
606 /* Convert from counting within OP0 to counting in OP_MODE. */
607 if (BYTES_BIG_ENDIAN
)
608 bitnum
+= unit
- GET_MODE_BITSIZE (op0_mode
.require ());
610 /* If xop0 is a register, we need it in OP_MODE
611 to make it acceptable to the format of insv. */
612 if (GET_CODE (xop0
) == SUBREG
)
613 /* We can't just change the mode, because this might clobber op0,
614 and we will need the original value of op0 if insv fails. */
615 xop0
= gen_rtx_SUBREG (op_mode
, SUBREG_REG (xop0
), SUBREG_BYTE (xop0
));
616 if (REG_P (xop0
) && GET_MODE (xop0
) != op_mode
)
617 xop0
= gen_lowpart_SUBREG (op_mode
, xop0
);
620 /* If the destination is a paradoxical subreg such that we need a
621 truncate to the inner mode, perform the insertion on a temporary and
622 truncate the result to the original destination. Note that we can't
623 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
624 X) 0)) is (reg:N X). */
625 if (GET_CODE (xop0
) == SUBREG
626 && REG_P (SUBREG_REG (xop0
))
627 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0
)),
630 rtx tem
= gen_reg_rtx (op_mode
);
631 emit_move_insn (tem
, xop0
);
636 /* There are similar overflow check at the start of store_bit_field_1,
637 but that only check the situation where the field lies completely
638 outside the register, while there do have situation where the field
639 lies partialy in the register, we need to adjust bitsize for this
640 partial overflow situation. Without this fix, pr48335-2.c on big-endian
641 will broken on those arch support bit insert instruction, like arm, aarch64
643 if (bitsize
+ bitnum
> unit
&& bitnum
< unit
)
645 warning (OPT_Wextra
, "write of %wu-bit data outside the bound of "
646 "destination object, data truncated into %wu-bit",
647 bitsize
, unit
- bitnum
);
648 bitsize
= unit
- bitnum
;
651 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
652 "backwards" from the size of the unit we are inserting into.
653 Otherwise, we count bits from the most significant on a
654 BYTES/BITS_BIG_ENDIAN machine. */
656 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
657 bitnum
= unit
- bitsize
- bitnum
;
659 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
661 if (value_mode
!= op_mode
)
663 if (GET_MODE_BITSIZE (value_mode
) >= bitsize
)
666 /* Optimization: Don't bother really extending VALUE
667 if it has all the bits we will actually use. However,
668 if we must narrow it, be sure we do it correctly. */
670 if (GET_MODE_SIZE (value_mode
) < GET_MODE_SIZE (op_mode
))
672 tmp
= simplify_subreg (op_mode
, value1
, value_mode
, 0);
674 tmp
= simplify_gen_subreg (op_mode
,
675 force_reg (value_mode
, value1
),
680 tmp
= gen_lowpart_if_possible (op_mode
, value1
);
682 tmp
= gen_lowpart (op_mode
, force_reg (value_mode
, value1
));
686 else if (CONST_INT_P (value
))
687 value1
= gen_int_mode (INTVAL (value
), op_mode
);
689 /* Parse phase is supposed to make VALUE's data type
690 match that of the component reference, which is a type
691 at least as wide as the field; so VALUE should have
692 a mode that corresponds to that type. */
693 gcc_assert (CONSTANT_P (value
));
696 create_fixed_operand (&ops
[0], xop0
);
697 create_integer_operand (&ops
[1], bitsize
);
698 create_integer_operand (&ops
[2], bitnum
);
699 create_input_operand (&ops
[3], value1
, op_mode
);
700 if (maybe_expand_insn (insv
->icode
, 4, ops
))
703 convert_move (op0
, xop0
, true);
706 delete_insns_since (last
);
710 /* A subroutine of store_bit_field, with the same arguments. Return true
711 if the operation could be implemented.
713 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
714 no other way of implementing the operation. If FALLBACK_P is false,
715 return false instead. */
718 store_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
719 unsigned HOST_WIDE_INT bitnum
,
720 unsigned HOST_WIDE_INT bitregion_start
,
721 unsigned HOST_WIDE_INT bitregion_end
,
722 machine_mode fieldmode
,
723 rtx value
, bool reverse
, bool fallback_p
)
728 while (GET_CODE (op0
) == SUBREG
)
730 bitnum
+= subreg_memory_offset (op0
) * BITS_PER_UNIT
;
731 op0
= SUBREG_REG (op0
);
734 /* No action is needed if the target is a register and if the field
735 lies completely outside that register. This can occur if the source
736 code contains an out-of-bounds access to a small array. */
737 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
740 /* Use vec_set patterns for inserting parts of vectors whenever
742 machine_mode outermode
= GET_MODE (op0
);
743 scalar_mode innermode
= GET_MODE_INNER (outermode
);
744 if (VECTOR_MODE_P (outermode
)
746 && optab_handler (vec_set_optab
, outermode
) != CODE_FOR_nothing
747 && fieldmode
== innermode
748 && bitsize
== GET_MODE_BITSIZE (innermode
)
749 && !(bitnum
% GET_MODE_BITSIZE (innermode
)))
751 struct expand_operand ops
[3];
752 enum insn_code icode
= optab_handler (vec_set_optab
, outermode
);
753 int pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
755 create_fixed_operand (&ops
[0], op0
);
756 create_input_operand (&ops
[1], value
, innermode
);
757 create_integer_operand (&ops
[2], pos
);
758 if (maybe_expand_insn (icode
, 3, ops
))
762 /* If the target is a register, overwriting the entire object, or storing
763 a full-word or multi-word field can be done with just a SUBREG. */
765 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
766 && ((bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)) && bitnum
== 0)
767 || (bitsize
% BITS_PER_WORD
== 0 && bitnum
% BITS_PER_WORD
== 0)))
769 /* Use the subreg machinery either to narrow OP0 to the required
770 words or to cope with mode punning between equal-sized modes.
771 In the latter case, use subreg on the rhs side, not lhs. */
774 if (bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
776 sub
= simplify_gen_subreg (GET_MODE (op0
), value
, fieldmode
, 0);
780 sub
= flip_storage_order (GET_MODE (op0
), sub
);
781 emit_move_insn (op0
, sub
);
787 sub
= simplify_gen_subreg (fieldmode
, op0
, GET_MODE (op0
),
788 bitnum
/ BITS_PER_UNIT
);
792 value
= flip_storage_order (fieldmode
, value
);
793 emit_move_insn (sub
, value
);
799 /* If the target is memory, storing any naturally aligned field can be
800 done with a simple store. For targets that support fast unaligned
801 memory, any naturally sized, unit aligned field can be done directly. */
802 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, fieldmode
))
804 op0
= adjust_bitfield_address (op0
, fieldmode
, bitnum
/ BITS_PER_UNIT
);
806 value
= flip_storage_order (fieldmode
, value
);
807 emit_move_insn (op0
, value
);
811 /* Make sure we are playing with integral modes. Pun with subregs
812 if we aren't. This must come after the entire register case above,
813 since that case is valid for any mode. The following cases are only
814 valid for integral modes. */
815 opt_scalar_int_mode op0_mode
= int_mode_for_mode (GET_MODE (op0
));
816 scalar_int_mode imode
;
817 if (!op0_mode
.exists (&imode
) || imode
!= GET_MODE (op0
))
820 op0
= adjust_bitfield_address_size (op0
, op0_mode
.else_blk (),
823 op0
= gen_lowpart (op0_mode
.require (), op0
);
826 /* Storing an lsb-aligned field in a register
827 can be done with a movstrict instruction. */
831 && lowpart_bit_field_p (bitnum
, bitsize
, GET_MODE (op0
))
832 && bitsize
== GET_MODE_BITSIZE (fieldmode
)
833 && optab_handler (movstrict_optab
, fieldmode
) != CODE_FOR_nothing
)
835 struct expand_operand ops
[2];
836 enum insn_code icode
= optab_handler (movstrict_optab
, fieldmode
);
838 unsigned HOST_WIDE_INT subreg_off
;
840 if (GET_CODE (arg0
) == SUBREG
)
842 /* Else we've got some float mode source being extracted into
843 a different float mode destination -- this combination of
844 subregs results in Severe Tire Damage. */
845 gcc_assert (GET_MODE (SUBREG_REG (arg0
)) == fieldmode
846 || GET_MODE_CLASS (fieldmode
) == MODE_INT
847 || GET_MODE_CLASS (fieldmode
) == MODE_PARTIAL_INT
);
848 arg0
= SUBREG_REG (arg0
);
851 subreg_off
= bitnum
/ BITS_PER_UNIT
;
852 if (validate_subreg (fieldmode
, GET_MODE (arg0
), arg0
, subreg_off
))
854 arg0
= gen_rtx_SUBREG (fieldmode
, arg0
, subreg_off
);
856 create_fixed_operand (&ops
[0], arg0
);
857 /* Shrink the source operand to FIELDMODE. */
858 create_convert_operand_to (&ops
[1], value
, fieldmode
, false);
859 if (maybe_expand_insn (icode
, 2, ops
))
864 /* Handle fields bigger than a word. */
866 if (bitsize
> BITS_PER_WORD
)
868 /* Here we transfer the words of the field
869 in the order least significant first.
870 This is because the most significant word is the one which may
872 However, only do that if the value is not BLKmode. */
874 const bool backwards
= WORDS_BIG_ENDIAN
&& fieldmode
!= BLKmode
;
875 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
879 /* This is the mode we must force value to, so that there will be enough
880 subwords to extract. Note that fieldmode will often (always?) be
881 VOIDmode, because that is what store_field uses to indicate that this
882 is a bit field, but passing VOIDmode to operand_subword_force
884 fieldmode
= GET_MODE (value
);
885 if (fieldmode
== VOIDmode
)
886 fieldmode
= smallest_int_mode_for_size (nwords
* BITS_PER_WORD
);
888 last
= get_last_insn ();
889 for (i
= 0; i
< nwords
; i
++)
891 /* If I is 0, use the low-order word in both field and target;
892 if I is 1, use the next to lowest word; and so on. */
893 unsigned int wordnum
= (backwards
894 ? GET_MODE_SIZE (fieldmode
) / UNITS_PER_WORD
897 unsigned int bit_offset
= (backwards
^ reverse
898 ? MAX ((int) bitsize
- ((int) i
+ 1)
901 : (int) i
* BITS_PER_WORD
);
902 rtx value_word
= operand_subword_force (value
, wordnum
, fieldmode
);
903 unsigned HOST_WIDE_INT new_bitsize
=
904 MIN (BITS_PER_WORD
, bitsize
- i
* BITS_PER_WORD
);
906 /* If the remaining chunk doesn't have full wordsize we have
907 to make sure that for big-endian machines the higher order
909 if (new_bitsize
< BITS_PER_WORD
&& BYTES_BIG_ENDIAN
&& !backwards
)
910 value_word
= simplify_expand_binop (word_mode
, lshr_optab
,
912 GEN_INT (BITS_PER_WORD
917 if (!store_bit_field_1 (op0
, new_bitsize
,
919 bitregion_start
, bitregion_end
,
921 value_word
, reverse
, fallback_p
))
923 delete_insns_since (last
);
930 /* If VALUE has a floating-point or complex mode, access it as an
931 integer of the corresponding size. This can occur on a machine
932 with 64 bit registers that uses SFmode for float. It can also
933 occur for unaligned float or complex fields. */
935 scalar_int_mode value_mode
;
936 if (GET_MODE (value
) == VOIDmode
)
937 /* By this point we've dealt with values that are bigger than a word,
938 so word_mode is a conservatively correct choice. */
939 value_mode
= word_mode
;
940 else if (!is_a
<scalar_int_mode
> (GET_MODE (value
), &value_mode
))
942 value_mode
= int_mode_for_mode (GET_MODE (value
)).require ();
943 value
= gen_reg_rtx (value_mode
);
944 emit_move_insn (gen_lowpart (GET_MODE (orig_value
), value
), orig_value
);
947 /* If OP0 is a multi-word register, narrow it to the affected word.
948 If the region spans two words, defer to store_split_bit_field.
949 Don't do this if op0 is a single hard register wider than word
950 such as a float or vector register. */
952 && GET_MODE_SIZE (op0_mode
.require ()) > UNITS_PER_WORD
954 || !HARD_REGISTER_P (op0
)
955 || HARD_REGNO_NREGS (REGNO (op0
), op0_mode
.require ()) != 1))
957 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
962 store_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
963 bitregion_start
, bitregion_end
,
964 value
, value_mode
, reverse
);
967 op0
= simplify_gen_subreg (word_mode
, op0
, op0_mode
.require (),
968 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
970 op0_mode
= word_mode
;
971 bitnum
%= BITS_PER_WORD
;
974 /* From here on we can assume that the field to be stored in fits
975 within a word. If the destination is a register, it too fits
978 extraction_insn insv
;
981 && get_best_reg_extraction_insn (&insv
, EP_insv
,
982 GET_MODE_BITSIZE (op0_mode
.require ()),
984 && store_bit_field_using_insv (&insv
, op0
, op0_mode
,
985 bitsize
, bitnum
, value
, value_mode
))
988 /* If OP0 is a memory, try copying it to a register and seeing if a
989 cheap register alternative is available. */
990 if (MEM_P (op0
) && !reverse
)
992 if (get_best_mem_extraction_insn (&insv
, EP_insv
, bitsize
, bitnum
,
994 && store_bit_field_using_insv (&insv
, op0
, op0_mode
,
995 bitsize
, bitnum
, value
, value_mode
))
998 rtx_insn
*last
= get_last_insn ();
1000 /* Try loading part of OP0 into a register, inserting the bitfield
1001 into that, and then copying the result back to OP0. */
1002 unsigned HOST_WIDE_INT bitpos
;
1003 rtx xop0
= adjust_bit_field_mem_for_reg (EP_insv
, op0
, bitsize
, bitnum
,
1004 bitregion_start
, bitregion_end
,
1005 fieldmode
, &bitpos
);
1008 rtx tempreg
= copy_to_reg (xop0
);
1009 if (store_bit_field_1 (tempreg
, bitsize
, bitpos
,
1010 bitregion_start
, bitregion_end
,
1011 fieldmode
, orig_value
, reverse
, false))
1013 emit_move_insn (xop0
, tempreg
);
1016 delete_insns_since (last
);
1023 store_fixed_bit_field (op0
, op0_mode
, bitsize
, bitnum
, bitregion_start
,
1024 bitregion_end
, value
, value_mode
, reverse
);
1028 /* Generate code to store value from rtx VALUE
1029 into a bit-field within structure STR_RTX
1030 containing BITSIZE bits starting at bit BITNUM.
1032 BITREGION_START is bitpos of the first bitfield in this region.
1033 BITREGION_END is the bitpos of the ending bitfield in this region.
1034 These two fields are 0, if the C++ memory model does not apply,
1035 or we are not interested in keeping track of bitfield regions.
1037 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1039 If REVERSE is true, the store is to be done in reverse order. */
1042 store_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1043 unsigned HOST_WIDE_INT bitnum
,
1044 unsigned HOST_WIDE_INT bitregion_start
,
1045 unsigned HOST_WIDE_INT bitregion_end
,
1046 machine_mode fieldmode
,
1047 rtx value
, bool reverse
)
1049 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1050 scalar_int_mode int_mode
;
1051 if (is_a
<scalar_int_mode
> (fieldmode
, &int_mode
)
1052 && strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, int_mode
,
1053 bitregion_start
, bitregion_end
))
1055 /* Storing of a full word can be done with a simple store.
1056 We know here that the field can be accessed with one single
1057 instruction. For targets that support unaligned memory,
1058 an unaligned access may be necessary. */
1059 if (bitsize
== GET_MODE_BITSIZE (int_mode
))
1061 str_rtx
= adjust_bitfield_address (str_rtx
, int_mode
,
1062 bitnum
/ BITS_PER_UNIT
);
1064 value
= flip_storage_order (int_mode
, value
);
1065 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1066 emit_move_insn (str_rtx
, value
);
1072 str_rtx
= narrow_bit_field_mem (str_rtx
, int_mode
, bitsize
, bitnum
,
1074 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (int_mode
));
1075 temp
= copy_to_reg (str_rtx
);
1076 if (!store_bit_field_1 (temp
, bitsize
, bitnum
, 0, 0,
1077 int_mode
, value
, reverse
, true))
1080 emit_move_insn (str_rtx
, temp
);
1086 /* Under the C++0x memory model, we must not touch bits outside the
1087 bit region. Adjust the address to start at the beginning of the
1089 if (MEM_P (str_rtx
) && bitregion_start
> 0)
1091 scalar_int_mode best_mode
;
1092 machine_mode addr_mode
= VOIDmode
;
1093 HOST_WIDE_INT offset
, size
;
1095 gcc_assert ((bitregion_start
% BITS_PER_UNIT
) == 0);
1097 offset
= bitregion_start
/ BITS_PER_UNIT
;
1098 bitnum
-= bitregion_start
;
1099 size
= (bitnum
+ bitsize
+ BITS_PER_UNIT
- 1) / BITS_PER_UNIT
;
1100 bitregion_end
-= bitregion_start
;
1101 bitregion_start
= 0;
1102 if (get_best_mode (bitsize
, bitnum
,
1103 bitregion_start
, bitregion_end
,
1104 MEM_ALIGN (str_rtx
), INT_MAX
,
1105 MEM_VOLATILE_P (str_rtx
), &best_mode
))
1106 addr_mode
= best_mode
;
1107 str_rtx
= adjust_bitfield_address_size (str_rtx
, addr_mode
,
1111 if (!store_bit_field_1 (str_rtx
, bitsize
, bitnum
,
1112 bitregion_start
, bitregion_end
,
1113 fieldmode
, value
, reverse
, true))
1117 /* Use shifts and boolean operations to store VALUE into a bit field of
1118 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1119 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1122 If REVERSE is true, the store is to be done in reverse order. */
1125 store_fixed_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1126 unsigned HOST_WIDE_INT bitsize
,
1127 unsigned HOST_WIDE_INT bitnum
,
1128 unsigned HOST_WIDE_INT bitregion_start
,
1129 unsigned HOST_WIDE_INT bitregion_end
,
1130 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1132 /* There is a case not handled here:
1133 a structure with a known alignment of just a halfword
1134 and a field split across two aligned halfwords within the structure.
1135 Or likewise a structure with a known alignment of just a byte
1136 and a field split across two bytes.
1137 Such cases are not supposed to be able to occur. */
1139 scalar_int_mode best_mode
;
1142 unsigned int max_bitsize
= BITS_PER_WORD
;
1143 scalar_int_mode imode
;
1144 if (op0_mode
.exists (&imode
) && GET_MODE_BITSIZE (imode
) < max_bitsize
)
1145 max_bitsize
= GET_MODE_BITSIZE (imode
);
1147 if (!get_best_mode (bitsize
, bitnum
, bitregion_start
, bitregion_end
,
1148 MEM_ALIGN (op0
), max_bitsize
, MEM_VOLATILE_P (op0
),
1151 /* The only way this should occur is if the field spans word
1153 store_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1154 bitregion_start
, bitregion_end
,
1155 value
, value_mode
, reverse
);
1159 op0
= narrow_bit_field_mem (op0
, best_mode
, bitsize
, bitnum
, &bitnum
);
1162 best_mode
= op0_mode
.require ();
1164 store_fixed_bit_field_1 (op0
, best_mode
, bitsize
, bitnum
,
1165 value
, value_mode
, reverse
);
1168 /* Helper function for store_fixed_bit_field, stores
1169 the bit field always using MODE, which is the mode of OP0. The other
1170 arguments are as for store_fixed_bit_field. */
1173 store_fixed_bit_field_1 (rtx op0
, scalar_int_mode mode
,
1174 unsigned HOST_WIDE_INT bitsize
,
1175 unsigned HOST_WIDE_INT bitnum
,
1176 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1182 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1183 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1185 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1186 /* BITNUM is the distance between our msb
1187 and that of the containing datum.
1188 Convert it to the distance from the lsb. */
1189 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
1191 /* Now BITNUM is always the distance between our lsb
1194 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1195 we must first convert its mode to MODE. */
1197 if (CONST_INT_P (value
))
1199 unsigned HOST_WIDE_INT v
= UINTVAL (value
);
1201 if (bitsize
< HOST_BITS_PER_WIDE_INT
)
1202 v
&= (HOST_WIDE_INT_1U
<< bitsize
) - 1;
1206 else if ((bitsize
< HOST_BITS_PER_WIDE_INT
1207 && v
== (HOST_WIDE_INT_1U
<< bitsize
) - 1)
1208 || (bitsize
== HOST_BITS_PER_WIDE_INT
1209 && v
== HOST_WIDE_INT_M1U
))
1212 value
= lshift_value (mode
, v
, bitnum
);
1216 int must_and
= (GET_MODE_BITSIZE (value_mode
) != bitsize
1217 && bitnum
+ bitsize
!= GET_MODE_BITSIZE (mode
));
1219 if (value_mode
!= mode
)
1220 value
= convert_to_mode (mode
, value
, 1);
1223 value
= expand_binop (mode
, and_optab
, value
,
1224 mask_rtx (mode
, 0, bitsize
, 0),
1225 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1227 value
= expand_shift (LSHIFT_EXPR
, mode
, value
,
1228 bitnum
, NULL_RTX
, 1);
1232 value
= flip_storage_order (mode
, value
);
1234 /* Now clear the chosen bits in OP0,
1235 except that if VALUE is -1 we need not bother. */
1236 /* We keep the intermediates in registers to allow CSE to combine
1237 consecutive bitfield assignments. */
1239 temp
= force_reg (mode
, op0
);
1243 rtx mask
= mask_rtx (mode
, bitnum
, bitsize
, 1);
1245 mask
= flip_storage_order (mode
, mask
);
1246 temp
= expand_binop (mode
, and_optab
, temp
, mask
,
1247 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1248 temp
= force_reg (mode
, temp
);
1251 /* Now logical-or VALUE into OP0, unless it is zero. */
1255 temp
= expand_binop (mode
, ior_optab
, temp
, value
,
1256 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
1257 temp
= force_reg (mode
, temp
);
1262 op0
= copy_rtx (op0
);
1263 emit_move_insn (op0
, temp
);
1267 /* Store a bit field that is split across multiple accessible memory objects.
1269 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1270 BITSIZE is the field width; BITPOS the position of its first bit
1272 VALUE is the value to store, which has mode VALUE_MODE.
1273 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1276 If REVERSE is true, the store is to be done in reverse order.
1278 This does not yet handle fields wider than BITS_PER_WORD. */
1281 store_split_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
1282 unsigned HOST_WIDE_INT bitsize
,
1283 unsigned HOST_WIDE_INT bitpos
,
1284 unsigned HOST_WIDE_INT bitregion_start
,
1285 unsigned HOST_WIDE_INT bitregion_end
,
1286 rtx value
, scalar_int_mode value_mode
, bool reverse
)
1288 unsigned int unit
, total_bits
, bitsdone
= 0;
1290 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1292 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
1293 unit
= BITS_PER_WORD
;
1295 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
1297 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1298 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1299 again, and we will mutually recurse forever. */
1300 if (MEM_P (op0
) && op0_mode
.exists ())
1301 unit
= MIN (unit
, GET_MODE_BITSIZE (op0_mode
.require ()));
1303 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1304 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1305 that VALUE might be a floating-point constant. */
1306 if (CONSTANT_P (value
) && !CONST_INT_P (value
))
1308 rtx word
= gen_lowpart_common (word_mode
, value
);
1310 if (word
&& (value
!= word
))
1313 value
= gen_lowpart_common (word_mode
, force_reg (value_mode
, value
));
1314 value_mode
= word_mode
;
1317 total_bits
= GET_MODE_BITSIZE (value_mode
);
1319 while (bitsdone
< bitsize
)
1321 unsigned HOST_WIDE_INT thissize
;
1322 unsigned HOST_WIDE_INT thispos
;
1323 unsigned HOST_WIDE_INT offset
;
1326 offset
= (bitpos
+ bitsdone
) / unit
;
1327 thispos
= (bitpos
+ bitsdone
) % unit
;
1329 /* When region of bytes we can touch is restricted, decrease
1330 UNIT close to the end of the region as needed. If op0 is a REG
1331 or SUBREG of REG, don't do this, as there can't be data races
1332 on a register and we can expand shorter code in some cases. */
1334 && unit
> BITS_PER_UNIT
1335 && bitpos
+ bitsdone
- thispos
+ unit
> bitregion_end
+ 1
1337 && (GET_CODE (op0
) != SUBREG
|| !REG_P (SUBREG_REG (op0
))))
1343 /* THISSIZE must not overrun a word boundary. Otherwise,
1344 store_fixed_bit_field will call us again, and we will mutually
1346 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
1347 thissize
= MIN (thissize
, unit
- thispos
);
1349 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
1351 /* Fetch successively less significant portions. */
1352 if (CONST_INT_P (value
))
1353 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1354 >> (bitsize
- bitsdone
- thissize
))
1355 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1356 /* Likewise, but the source is little-endian. */
1358 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1360 bitsize
- bitsdone
- thissize
,
1361 NULL_RTX
, 1, false);
1363 /* The args are chosen so that the last part includes the
1364 lsb. Give extract_bit_field the value it needs (with
1365 endianness compensation) to fetch the piece we want. */
1366 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1368 total_bits
- bitsize
+ bitsdone
,
1369 NULL_RTX
, 1, false);
1373 /* Fetch successively more significant portions. */
1374 if (CONST_INT_P (value
))
1375 part
= GEN_INT (((unsigned HOST_WIDE_INT
) (INTVAL (value
))
1377 & ((HOST_WIDE_INT_1
<< thissize
) - 1));
1378 /* Likewise, but the source is big-endian. */
1380 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1382 total_bits
- bitsdone
- thissize
,
1383 NULL_RTX
, 1, false);
1385 part
= extract_fixed_bit_field (word_mode
, value
, value_mode
,
1386 thissize
, bitsdone
, NULL_RTX
,
1390 /* If OP0 is a register, then handle OFFSET here. */
1391 rtx op0_piece
= op0
;
1392 opt_scalar_int_mode op0_piece_mode
= op0_mode
;
1393 if (SUBREG_P (op0
) || REG_P (op0
))
1395 scalar_int_mode imode
;
1396 if (op0_mode
.exists (&imode
)
1397 && GET_MODE_SIZE (imode
) < UNITS_PER_WORD
)
1400 op0_piece
= const0_rtx
;
1404 op0_piece
= operand_subword_force (op0
,
1405 offset
* unit
/ BITS_PER_WORD
,
1407 op0_piece_mode
= word_mode
;
1409 offset
&= BITS_PER_WORD
/ unit
- 1;
1412 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1413 it is just an out-of-bounds access. Ignore it. */
1414 if (op0_piece
!= const0_rtx
)
1415 store_fixed_bit_field (op0_piece
, op0_piece_mode
, thissize
,
1416 offset
* unit
+ thispos
, bitregion_start
,
1417 bitregion_end
, part
, word_mode
, reverse
);
1418 bitsdone
+= thissize
;
1422 /* A subroutine of extract_bit_field_1 that converts return value X
1423 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1424 to extract_bit_field. */
1427 convert_extracted_bit_field (rtx x
, machine_mode mode
,
1428 machine_mode tmode
, bool unsignedp
)
1430 if (GET_MODE (x
) == tmode
|| GET_MODE (x
) == mode
)
1433 /* If the x mode is not a scalar integral, first convert to the
1434 integer mode of that size and then access it as a floating-point
1435 value via a SUBREG. */
1436 if (!SCALAR_INT_MODE_P (tmode
))
1438 scalar_int_mode int_mode
= int_mode_for_mode (tmode
).require ();
1439 x
= convert_to_mode (int_mode
, x
, unsignedp
);
1440 x
= force_reg (int_mode
, x
);
1441 return gen_lowpart (tmode
, x
);
1444 return convert_to_mode (tmode
, x
, unsignedp
);
1447 /* Try to use an ext(z)v pattern to extract a field from OP0.
1448 Return the extracted value on success, otherwise return null.
1449 EXTV describes the extraction instruction to use. If OP0_MODE
1450 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1451 The other arguments are as for extract_bit_field. */
1454 extract_bit_field_using_extv (const extraction_insn
*extv
, rtx op0
,
1455 opt_scalar_int_mode op0_mode
,
1456 unsigned HOST_WIDE_INT bitsize
,
1457 unsigned HOST_WIDE_INT bitnum
,
1458 int unsignedp
, rtx target
,
1459 machine_mode mode
, machine_mode tmode
)
1461 struct expand_operand ops
[4];
1462 rtx spec_target
= target
;
1463 rtx spec_target_subreg
= 0;
1464 scalar_int_mode ext_mode
= extv
->field_mode
;
1465 unsigned unit
= GET_MODE_BITSIZE (ext_mode
);
1467 if (bitsize
== 0 || unit
< bitsize
)
1471 /* Get a reference to the first byte of the field. */
1472 op0
= narrow_bit_field_mem (op0
, extv
->struct_mode
, bitsize
, bitnum
,
1476 /* Convert from counting within OP0 to counting in EXT_MODE. */
1477 if (BYTES_BIG_ENDIAN
)
1478 bitnum
+= unit
- GET_MODE_BITSIZE (op0_mode
.require ());
1480 /* If op0 is a register, we need it in EXT_MODE to make it
1481 acceptable to the format of ext(z)v. */
1482 if (GET_CODE (op0
) == SUBREG
&& op0_mode
.require () != ext_mode
)
1484 if (REG_P (op0
) && op0_mode
.require () != ext_mode
)
1485 op0
= gen_lowpart_SUBREG (ext_mode
, op0
);
1488 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1489 "backwards" from the size of the unit we are extracting from.
1490 Otherwise, we count bits from the most significant on a
1491 BYTES/BITS_BIG_ENDIAN machine. */
1493 if (BITS_BIG_ENDIAN
!= BYTES_BIG_ENDIAN
)
1494 bitnum
= unit
- bitsize
- bitnum
;
1497 target
= spec_target
= gen_reg_rtx (tmode
);
1499 if (GET_MODE (target
) != ext_mode
)
1501 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1502 between the mode of the extraction (word_mode) and the target
1503 mode. Instead, create a temporary and use convert_move to set
1506 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target
), ext_mode
))
1508 target
= gen_lowpart (ext_mode
, target
);
1509 if (partial_subreg_p (GET_MODE (spec_target
), ext_mode
))
1510 spec_target_subreg
= target
;
1513 target
= gen_reg_rtx (ext_mode
);
1516 create_output_operand (&ops
[0], target
, ext_mode
);
1517 create_fixed_operand (&ops
[1], op0
);
1518 create_integer_operand (&ops
[2], bitsize
);
1519 create_integer_operand (&ops
[3], bitnum
);
1520 if (maybe_expand_insn (extv
->icode
, 4, ops
))
1522 target
= ops
[0].value
;
1523 if (target
== spec_target
)
1525 if (target
== spec_target_subreg
)
1527 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1532 /* A subroutine of extract_bit_field, with the same arguments.
1533 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1534 if we can find no other means of implementing the operation.
1535 if FALLBACK_P is false, return NULL instead. */
1538 extract_bit_field_1 (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1539 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1540 machine_mode mode
, machine_mode tmode
,
1541 bool reverse
, bool fallback_p
, rtx
*alt_rtl
)
1546 if (tmode
== VOIDmode
)
1549 while (GET_CODE (op0
) == SUBREG
)
1551 bitnum
+= SUBREG_BYTE (op0
) * BITS_PER_UNIT
;
1552 op0
= SUBREG_REG (op0
);
1555 /* If we have an out-of-bounds access to a register, just return an
1556 uninitialized register of the required mode. This can occur if the
1557 source code contains an out-of-bounds access to a small array. */
1558 if (REG_P (op0
) && bitnum
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
1559 return gen_reg_rtx (tmode
);
1562 && mode
== GET_MODE (op0
)
1564 && bitsize
== GET_MODE_BITSIZE (GET_MODE (op0
)))
1567 op0
= flip_storage_order (mode
, op0
);
1568 /* We're trying to extract a full register from itself. */
1572 /* First try to check for vector from vector extractions. */
1573 if (VECTOR_MODE_P (GET_MODE (op0
))
1575 && VECTOR_MODE_P (tmode
)
1576 && GET_MODE_SIZE (GET_MODE (op0
)) > GET_MODE_SIZE (tmode
))
1578 machine_mode new_mode
= GET_MODE (op0
);
1579 if (GET_MODE_INNER (new_mode
) != GET_MODE_INNER (tmode
))
1581 new_mode
= mode_for_vector (GET_MODE_INNER (tmode
),
1582 GET_MODE_BITSIZE (GET_MODE (op0
))
1583 / GET_MODE_UNIT_BITSIZE (tmode
));
1584 if (!VECTOR_MODE_P (new_mode
)
1585 || GET_MODE_SIZE (new_mode
) != GET_MODE_SIZE (GET_MODE (op0
))
1586 || GET_MODE_INNER (new_mode
) != GET_MODE_INNER (tmode
)
1587 || !targetm
.vector_mode_supported_p (new_mode
))
1588 new_mode
= VOIDmode
;
1590 if (new_mode
!= VOIDmode
1591 && (convert_optab_handler (vec_extract_optab
, new_mode
, tmode
)
1592 != CODE_FOR_nothing
)
1593 && ((bitnum
+ bitsize
- 1) / GET_MODE_BITSIZE (tmode
)
1594 == bitnum
/ GET_MODE_BITSIZE (tmode
)))
1596 struct expand_operand ops
[3];
1597 machine_mode outermode
= new_mode
;
1598 machine_mode innermode
= tmode
;
1599 enum insn_code icode
1600 = convert_optab_handler (vec_extract_optab
, outermode
, innermode
);
1601 unsigned HOST_WIDE_INT pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
1603 if (new_mode
!= GET_MODE (op0
))
1604 op0
= gen_lowpart (new_mode
, op0
);
1605 create_output_operand (&ops
[0], target
, innermode
);
1607 create_input_operand (&ops
[1], op0
, outermode
);
1608 create_integer_operand (&ops
[2], pos
);
1609 if (maybe_expand_insn (icode
, 3, ops
))
1611 if (alt_rtl
&& ops
[0].target
)
1613 target
= ops
[0].value
;
1614 if (GET_MODE (target
) != mode
)
1615 return gen_lowpart (tmode
, target
);
1621 /* See if we can get a better vector mode before extracting. */
1622 if (VECTOR_MODE_P (GET_MODE (op0
))
1624 && GET_MODE_INNER (GET_MODE (op0
)) != tmode
)
1626 machine_mode new_mode
;
1628 if (GET_MODE_CLASS (tmode
) == MODE_FLOAT
)
1629 new_mode
= MIN_MODE_VECTOR_FLOAT
;
1630 else if (GET_MODE_CLASS (tmode
) == MODE_FRACT
)
1631 new_mode
= MIN_MODE_VECTOR_FRACT
;
1632 else if (GET_MODE_CLASS (tmode
) == MODE_UFRACT
)
1633 new_mode
= MIN_MODE_VECTOR_UFRACT
;
1634 else if (GET_MODE_CLASS (tmode
) == MODE_ACCUM
)
1635 new_mode
= MIN_MODE_VECTOR_ACCUM
;
1636 else if (GET_MODE_CLASS (tmode
) == MODE_UACCUM
)
1637 new_mode
= MIN_MODE_VECTOR_UACCUM
;
1639 new_mode
= MIN_MODE_VECTOR_INT
;
1641 FOR_EACH_MODE_FROM (new_mode
, new_mode
)
1642 if (GET_MODE_SIZE (new_mode
) == GET_MODE_SIZE (GET_MODE (op0
))
1643 && GET_MODE_UNIT_SIZE (new_mode
) == GET_MODE_SIZE (tmode
)
1644 && targetm
.vector_mode_supported_p (new_mode
))
1646 if (new_mode
!= VOIDmode
)
1647 op0
= gen_lowpart (new_mode
, op0
);
1650 /* Use vec_extract patterns for extracting parts of vectors whenever
1652 machine_mode outermode
= GET_MODE (op0
);
1653 scalar_mode innermode
= GET_MODE_INNER (outermode
);
1654 if (VECTOR_MODE_P (outermode
)
1656 && (convert_optab_handler (vec_extract_optab
, outermode
, innermode
)
1657 != CODE_FOR_nothing
)
1658 && ((bitnum
+ bitsize
- 1) / GET_MODE_BITSIZE (innermode
)
1659 == bitnum
/ GET_MODE_BITSIZE (innermode
)))
1661 struct expand_operand ops
[3];
1662 enum insn_code icode
1663 = convert_optab_handler (vec_extract_optab
, outermode
, innermode
);
1664 unsigned HOST_WIDE_INT pos
= bitnum
/ GET_MODE_BITSIZE (innermode
);
1666 create_output_operand (&ops
[0], target
, innermode
);
1668 create_input_operand (&ops
[1], op0
, outermode
);
1669 create_integer_operand (&ops
[2], pos
);
1670 if (maybe_expand_insn (icode
, 3, ops
))
1672 if (alt_rtl
&& ops
[0].target
)
1674 target
= ops
[0].value
;
1675 if (GET_MODE (target
) != mode
)
1676 return gen_lowpart (tmode
, target
);
1681 /* Make sure we are playing with integral modes. Pun with subregs
1683 opt_scalar_int_mode op0_mode
= int_mode_for_mode (GET_MODE (op0
));
1684 scalar_int_mode imode
;
1685 if (!op0_mode
.exists (&imode
) || imode
!= GET_MODE (op0
))
1688 op0
= adjust_bitfield_address_size (op0
, op0_mode
.else_blk (),
1690 else if (op0_mode
.exists (&imode
))
1692 op0
= gen_lowpart (imode
, op0
);
1694 /* If we got a SUBREG, force it into a register since we
1695 aren't going to be able to do another SUBREG on it. */
1696 if (GET_CODE (op0
) == SUBREG
)
1697 op0
= force_reg (imode
, op0
);
1701 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (op0
));
1702 rtx mem
= assign_stack_temp (GET_MODE (op0
), size
);
1703 emit_move_insn (mem
, op0
);
1704 op0
= adjust_bitfield_address_size (mem
, BLKmode
, 0, size
);
1708 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1709 If that's wrong, the solution is to test for it and set TARGET to 0
1712 /* Get the mode of the field to use for atomic access or subreg
1715 if (SCALAR_INT_MODE_P (tmode
))
1717 machine_mode try_mode
= mode_for_size (bitsize
,
1718 GET_MODE_CLASS (tmode
), 0);
1719 if (try_mode
!= BLKmode
)
1722 gcc_assert (mode1
!= BLKmode
);
1724 /* Extraction of a full MODE1 value can be done with a subreg as long
1725 as the least significant bit of the value is the least significant
1726 bit of either OP0 or a word of OP0. */
1729 && lowpart_bit_field_p (bitnum
, bitsize
, op0_mode
.require ())
1730 && bitsize
== GET_MODE_BITSIZE (mode1
)
1731 && TRULY_NOOP_TRUNCATION_MODES_P (mode1
, op0_mode
.require ()))
1733 rtx sub
= simplify_gen_subreg (mode1
, op0
, op0_mode
.require (),
1734 bitnum
/ BITS_PER_UNIT
);
1736 return convert_extracted_bit_field (sub
, mode
, tmode
, unsignedp
);
1739 /* Extraction of a full MODE1 value can be done with a load as long as
1740 the field is on a byte boundary and is sufficiently aligned. */
1741 if (simple_mem_bitfield_p (op0
, bitsize
, bitnum
, mode1
))
1743 op0
= adjust_bitfield_address (op0
, mode1
, bitnum
/ BITS_PER_UNIT
);
1745 op0
= flip_storage_order (mode1
, op0
);
1746 return convert_extracted_bit_field (op0
, mode
, tmode
, unsignedp
);
1749 /* Handle fields bigger than a word. */
1751 if (bitsize
> BITS_PER_WORD
)
1753 /* Here we transfer the words of the field
1754 in the order least significant first.
1755 This is because the most significant word is the one which may
1756 be less than full. */
1758 const bool backwards
= WORDS_BIG_ENDIAN
;
1759 unsigned int nwords
= (bitsize
+ (BITS_PER_WORD
- 1)) / BITS_PER_WORD
;
1763 if (target
== 0 || !REG_P (target
) || !valid_multiword_target_p (target
))
1764 target
= gen_reg_rtx (mode
);
1766 /* In case we're about to clobber a base register or something
1767 (see gcc.c-torture/execute/20040625-1.c). */
1768 if (reg_mentioned_p (target
, str_rtx
))
1769 target
= gen_reg_rtx (mode
);
1771 /* Indicate for flow that the entire target reg is being set. */
1772 emit_clobber (target
);
1774 last
= get_last_insn ();
1775 for (i
= 0; i
< nwords
; i
++)
1777 /* If I is 0, use the low-order word in both field and target;
1778 if I is 1, use the next to lowest word; and so on. */
1779 /* Word number in TARGET to use. */
1780 unsigned int wordnum
1782 ? GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
- i
- 1
1784 /* Offset from start of field in OP0. */
1785 unsigned int bit_offset
= (backwards
^ reverse
1786 ? MAX ((int) bitsize
- ((int) i
+ 1)
1789 : (int) i
* BITS_PER_WORD
);
1790 rtx target_part
= operand_subword (target
, wordnum
, 1, VOIDmode
);
1792 = extract_bit_field_1 (op0
, MIN (BITS_PER_WORD
,
1793 bitsize
- i
* BITS_PER_WORD
),
1794 bitnum
+ bit_offset
, 1, target_part
,
1795 mode
, word_mode
, reverse
, fallback_p
, NULL
);
1797 gcc_assert (target_part
);
1800 delete_insns_since (last
);
1804 if (result_part
!= target_part
)
1805 emit_move_insn (target_part
, result_part
);
1810 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1811 need to be zero'd out. */
1812 if (GET_MODE_SIZE (GET_MODE (target
)) > nwords
* UNITS_PER_WORD
)
1814 unsigned int i
, total_words
;
1816 total_words
= GET_MODE_SIZE (GET_MODE (target
)) / UNITS_PER_WORD
;
1817 for (i
= nwords
; i
< total_words
; i
++)
1819 (operand_subword (target
,
1820 backwards
? total_words
- i
- 1 : i
,
1827 /* Signed bit field: sign-extend with two arithmetic shifts. */
1828 target
= expand_shift (LSHIFT_EXPR
, mode
, target
,
1829 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1830 return expand_shift (RSHIFT_EXPR
, mode
, target
,
1831 GET_MODE_BITSIZE (mode
) - bitsize
, NULL_RTX
, 0);
1834 /* If OP0 is a multi-word register, narrow it to the affected word.
1835 If the region spans two words, defer to extract_split_bit_field. */
1836 if (!MEM_P (op0
) && GET_MODE_SIZE (op0_mode
.require ()) > UNITS_PER_WORD
)
1838 if (bitnum
% BITS_PER_WORD
+ bitsize
> BITS_PER_WORD
)
1842 target
= extract_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
1843 unsignedp
, reverse
);
1844 return convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1846 op0
= simplify_gen_subreg (word_mode
, op0
, op0_mode
.require (),
1847 bitnum
/ BITS_PER_WORD
* UNITS_PER_WORD
);
1848 op0_mode
= word_mode
;
1849 bitnum
%= BITS_PER_WORD
;
1852 /* From here on we know the desired field is smaller than a word.
1853 If OP0 is a register, it too fits within a word. */
1854 enum extraction_pattern pattern
= unsignedp
? EP_extzv
: EP_extv
;
1855 extraction_insn extv
;
1858 /* ??? We could limit the structure size to the part of OP0 that
1859 contains the field, with appropriate checks for endianness
1860 and TRULY_NOOP_TRUNCATION. */
1861 && get_best_reg_extraction_insn (&extv
, pattern
,
1862 GET_MODE_BITSIZE (op0_mode
.require ()),
1865 rtx result
= extract_bit_field_using_extv (&extv
, op0
, op0_mode
,
1867 unsignedp
, target
, mode
,
1873 /* If OP0 is a memory, try copying it to a register and seeing if a
1874 cheap register alternative is available. */
1875 if (MEM_P (op0
) & !reverse
)
1877 if (get_best_mem_extraction_insn (&extv
, pattern
, bitsize
, bitnum
,
1880 rtx result
= extract_bit_field_using_extv (&extv
, op0
, op0_mode
,
1882 unsignedp
, target
, mode
,
1888 rtx_insn
*last
= get_last_insn ();
1890 /* Try loading part of OP0 into a register and extracting the
1891 bitfield from that. */
1892 unsigned HOST_WIDE_INT bitpos
;
1893 rtx xop0
= adjust_bit_field_mem_for_reg (pattern
, op0
, bitsize
, bitnum
,
1894 0, 0, tmode
, &bitpos
);
1897 xop0
= copy_to_reg (xop0
);
1898 rtx result
= extract_bit_field_1 (xop0
, bitsize
, bitpos
,
1900 mode
, tmode
, reverse
, false, NULL
);
1903 delete_insns_since (last
);
1910 /* Find a correspondingly-sized integer field, so we can apply
1911 shifts and masks to it. */
1912 scalar_int_mode int_mode
;
1913 if (!int_mode_for_mode (tmode
).exists (&int_mode
))
1914 /* If this fails, we should probably push op0 out to memory and then
1916 int_mode
= int_mode_for_mode (mode
).require ();
1918 target
= extract_fixed_bit_field (int_mode
, op0
, op0_mode
, bitsize
,
1919 bitnum
, target
, unsignedp
, reverse
);
1921 /* Complex values must be reversed piecewise, so we need to undo the global
1922 reversal, convert to the complex mode and reverse again. */
1923 if (reverse
&& COMPLEX_MODE_P (tmode
))
1925 target
= flip_storage_order (int_mode
, target
);
1926 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1927 target
= flip_storage_order (tmode
, target
);
1930 target
= convert_extracted_bit_field (target
, mode
, tmode
, unsignedp
);
1935 /* Generate code to extract a byte-field from STR_RTX
1936 containing BITSIZE bits, starting at BITNUM,
1937 and put it in TARGET if possible (if TARGET is nonzero).
1938 Regardless of TARGET, we return the rtx for where the value is placed.
1940 STR_RTX is the structure containing the byte (a REG or MEM).
1941 UNSIGNEDP is nonzero if this is an unsigned bit field.
1942 MODE is the natural mode of the field value once extracted.
1943 TMODE is the mode the caller would like the value to have;
1944 but the value may be returned with type MODE instead.
1946 If REVERSE is true, the extraction is to be done in reverse order.
1948 If a TARGET is specified and we can store in it at no extra cost,
1949 we do so, and return TARGET.
1950 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1951 if they are equally easy. */
1954 extract_bit_field (rtx str_rtx
, unsigned HOST_WIDE_INT bitsize
,
1955 unsigned HOST_WIDE_INT bitnum
, int unsignedp
, rtx target
,
1956 machine_mode mode
, machine_mode tmode
, bool reverse
,
1961 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1962 if (GET_MODE_BITSIZE (GET_MODE (str_rtx
)) > 0)
1963 mode1
= GET_MODE (str_rtx
);
1964 else if (target
&& GET_MODE_BITSIZE (GET_MODE (target
)) > 0)
1965 mode1
= GET_MODE (target
);
1969 scalar_int_mode int_mode
;
1970 if (is_a
<scalar_int_mode
> (mode1
, &int_mode
)
1971 && strict_volatile_bitfield_p (str_rtx
, bitsize
, bitnum
, int_mode
, 0, 0))
1973 /* Extraction of a full INT_MODE value can be done with a simple load.
1974 We know here that the field can be accessed with one single
1975 instruction. For targets that support unaligned memory,
1976 an unaligned access may be necessary. */
1977 if (bitsize
== GET_MODE_BITSIZE (int_mode
))
1979 rtx result
= adjust_bitfield_address (str_rtx
, int_mode
,
1980 bitnum
/ BITS_PER_UNIT
);
1982 result
= flip_storage_order (int_mode
, result
);
1983 gcc_assert (bitnum
% BITS_PER_UNIT
== 0);
1984 return convert_extracted_bit_field (result
, mode
, tmode
, unsignedp
);
1987 str_rtx
= narrow_bit_field_mem (str_rtx
, int_mode
, bitsize
, bitnum
,
1989 gcc_assert (bitnum
+ bitsize
<= GET_MODE_BITSIZE (int_mode
));
1990 str_rtx
= copy_to_reg (str_rtx
);
1993 return extract_bit_field_1 (str_rtx
, bitsize
, bitnum
, unsignedp
,
1994 target
, mode
, tmode
, reverse
, true, alt_rtl
);
1997 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1998 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
1999 otherwise OP0 is a BLKmode MEM.
2001 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2002 If REVERSE is true, the extraction is to be done in reverse order.
2004 If TARGET is nonzero, attempts to store the value there
2005 and return TARGET, but this is not guaranteed.
2006 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2009 extract_fixed_bit_field (machine_mode tmode
, rtx op0
,
2010 opt_scalar_int_mode op0_mode
,
2011 unsigned HOST_WIDE_INT bitsize
,
2012 unsigned HOST_WIDE_INT bitnum
, rtx target
,
2013 int unsignedp
, bool reverse
)
2015 scalar_int_mode mode
;
2018 if (!get_best_mode (bitsize
, bitnum
, 0, 0, MEM_ALIGN (op0
),
2019 BITS_PER_WORD
, MEM_VOLATILE_P (op0
), &mode
))
2020 /* The only way this should occur is if the field spans word
2022 return extract_split_bit_field (op0
, op0_mode
, bitsize
, bitnum
,
2023 unsignedp
, reverse
);
2025 op0
= narrow_bit_field_mem (op0
, mode
, bitsize
, bitnum
, &bitnum
);
2028 mode
= op0_mode
.require ();
2030 return extract_fixed_bit_field_1 (tmode
, op0
, mode
, bitsize
, bitnum
,
2031 target
, unsignedp
, reverse
);
2034 /* Helper function for extract_fixed_bit_field, extracts
2035 the bit field always using MODE, which is the mode of OP0.
2036 The other arguments are as for extract_fixed_bit_field. */
2039 extract_fixed_bit_field_1 (machine_mode tmode
, rtx op0
, scalar_int_mode mode
,
2040 unsigned HOST_WIDE_INT bitsize
,
2041 unsigned HOST_WIDE_INT bitnum
, rtx target
,
2042 int unsignedp
, bool reverse
)
2044 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2045 for invalid input, such as extract equivalent of f5 from
2046 gcc.dg/pr48335-2.c. */
2048 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2049 /* BITNUM is the distance between our msb and that of OP0.
2050 Convert it to the distance from the lsb. */
2051 bitnum
= GET_MODE_BITSIZE (mode
) - bitsize
- bitnum
;
2053 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2054 We have reduced the big-endian case to the little-endian case. */
2056 op0
= flip_storage_order (mode
, op0
);
2062 /* If the field does not already start at the lsb,
2063 shift it so it does. */
2064 /* Maybe propagate the target for the shift. */
2065 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2068 op0
= expand_shift (RSHIFT_EXPR
, mode
, op0
, bitnum
, subtarget
, 1);
2070 /* Convert the value to the desired mode. TMODE must also be a
2071 scalar integer for this conversion to make sense, since we
2072 shouldn't reinterpret the bits. */
2073 scalar_int_mode new_mode
= as_a
<scalar_int_mode
> (tmode
);
2074 if (mode
!= new_mode
)
2075 op0
= convert_to_mode (new_mode
, op0
, 1);
2077 /* Unless the msb of the field used to be the msb when we shifted,
2078 mask out the upper bits. */
2080 if (GET_MODE_BITSIZE (mode
) != bitnum
+ bitsize
)
2081 return expand_binop (new_mode
, and_optab
, op0
,
2082 mask_rtx (new_mode
, 0, bitsize
, 0),
2083 target
, 1, OPTAB_LIB_WIDEN
);
2087 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2088 then arithmetic-shift its lsb to the lsb of the word. */
2089 op0
= force_reg (mode
, op0
);
2091 /* Find the narrowest integer mode that contains the field. */
2093 opt_scalar_int_mode mode_iter
;
2094 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_INT
)
2095 if (GET_MODE_BITSIZE (mode_iter
.require ()) >= bitsize
+ bitnum
)
2098 mode
= mode_iter
.require ();
2099 op0
= convert_to_mode (mode
, op0
, 0);
2104 if (GET_MODE_BITSIZE (mode
) != (bitsize
+ bitnum
))
2106 int amount
= GET_MODE_BITSIZE (mode
) - (bitsize
+ bitnum
);
2107 /* Maybe propagate the target for the shift. */
2108 rtx subtarget
= (target
!= 0 && REG_P (target
) ? target
: 0);
2109 op0
= expand_shift (LSHIFT_EXPR
, mode
, op0
, amount
, subtarget
, 1);
2112 return expand_shift (RSHIFT_EXPR
, mode
, op0
,
2113 GET_MODE_BITSIZE (mode
) - bitsize
, target
, 0);
2116 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2120 lshift_value (machine_mode mode
, unsigned HOST_WIDE_INT value
,
2123 return immed_wide_int_const (wi::lshift (value
, bitpos
), mode
);
2126 /* Extract a bit field that is split across two words
2127 and return an RTX for the result.
2129 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2130 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2131 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2132 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2135 If REVERSE is true, the extraction is to be done in reverse order. */
2138 extract_split_bit_field (rtx op0
, opt_scalar_int_mode op0_mode
,
2139 unsigned HOST_WIDE_INT bitsize
,
2140 unsigned HOST_WIDE_INT bitpos
, int unsignedp
,
2144 unsigned int bitsdone
= 0;
2145 rtx result
= NULL_RTX
;
2148 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2150 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
2151 unit
= BITS_PER_WORD
;
2153 unit
= MIN (MEM_ALIGN (op0
), BITS_PER_WORD
);
2155 while (bitsdone
< bitsize
)
2157 unsigned HOST_WIDE_INT thissize
;
2159 unsigned HOST_WIDE_INT thispos
;
2160 unsigned HOST_WIDE_INT offset
;
2162 offset
= (bitpos
+ bitsdone
) / unit
;
2163 thispos
= (bitpos
+ bitsdone
) % unit
;
2165 /* THISSIZE must not overrun a word boundary. Otherwise,
2166 extract_fixed_bit_field will call us again, and we will mutually
2168 thissize
= MIN (bitsize
- bitsdone
, BITS_PER_WORD
);
2169 thissize
= MIN (thissize
, unit
- thispos
);
2171 /* If OP0 is a register, then handle OFFSET here. */
2172 rtx op0_piece
= op0
;
2173 opt_scalar_int_mode op0_piece_mode
= op0_mode
;
2174 if (SUBREG_P (op0
) || REG_P (op0
))
2176 op0_piece
= operand_subword_force (op0
, offset
, op0_mode
.require ());
2177 op0_piece_mode
= word_mode
;
2181 /* Extract the parts in bit-counting order,
2182 whose meaning is determined by BYTES_PER_UNIT.
2183 OFFSET is in UNITs, and UNIT is in bits. */
2184 part
= extract_fixed_bit_field (word_mode
, op0_piece
, op0_piece_mode
,
2185 thissize
, offset
* unit
+ thispos
,
2187 bitsdone
+= thissize
;
2189 /* Shift this part into place for the result. */
2190 if (reverse
? !BYTES_BIG_ENDIAN
: BYTES_BIG_ENDIAN
)
2192 if (bitsize
!= bitsdone
)
2193 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2194 bitsize
- bitsdone
, 0, 1);
2198 if (bitsdone
!= thissize
)
2199 part
= expand_shift (LSHIFT_EXPR
, word_mode
, part
,
2200 bitsdone
- thissize
, 0, 1);
2206 /* Combine the parts with bitwise or. This works
2207 because we extracted each part as an unsigned bit field. */
2208 result
= expand_binop (word_mode
, ior_optab
, part
, result
, NULL_RTX
, 1,
2214 /* Unsigned bit field: we are done. */
2217 /* Signed bit field: sign-extend with two arithmetic shifts. */
2218 result
= expand_shift (LSHIFT_EXPR
, word_mode
, result
,
2219 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2220 return expand_shift (RSHIFT_EXPR
, word_mode
, result
,
2221 BITS_PER_WORD
- bitsize
, NULL_RTX
, 0);
2224 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2225 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2226 MODE, fill the upper bits with zeros. Fail if the layout of either
2227 mode is unknown (as for CC modes) or if the extraction would involve
2228 unprofitable mode punning. Return the value on success, otherwise
2231 This is different from gen_lowpart* in these respects:
2233 - the returned value must always be considered an rvalue
2235 - when MODE is wider than SRC_MODE, the extraction involves
2238 - when MODE is smaller than SRC_MODE, the extraction involves
2239 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2241 In other words, this routine performs a computation, whereas the
2242 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2246 extract_low_bits (machine_mode mode
, machine_mode src_mode
, rtx src
)
2248 scalar_int_mode int_mode
, src_int_mode
;
2250 if (mode
== src_mode
)
2253 if (CONSTANT_P (src
))
2255 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2256 fails, it will happily create (subreg (symbol_ref)) or similar
2258 unsigned int byte
= subreg_lowpart_offset (mode
, src_mode
);
2259 rtx ret
= simplify_subreg (mode
, src
, src_mode
, byte
);
2263 if (GET_MODE (src
) == VOIDmode
2264 || !validate_subreg (mode
, src_mode
, src
, byte
))
2267 src
= force_reg (GET_MODE (src
), src
);
2268 return gen_rtx_SUBREG (mode
, src
, byte
);
2271 if (GET_MODE_CLASS (mode
) == MODE_CC
|| GET_MODE_CLASS (src_mode
) == MODE_CC
)
2274 if (GET_MODE_BITSIZE (mode
) == GET_MODE_BITSIZE (src_mode
)
2275 && MODES_TIEABLE_P (mode
, src_mode
))
2277 rtx x
= gen_lowpart_common (mode
, src
);
2282 if (!int_mode_for_mode (src_mode
).exists (&src_int_mode
)
2283 || !int_mode_for_mode (mode
).exists (&int_mode
))
2286 if (!MODES_TIEABLE_P (src_int_mode
, src_mode
))
2288 if (!MODES_TIEABLE_P (int_mode
, mode
))
2291 src
= gen_lowpart (src_int_mode
, src
);
2292 src
= convert_modes (int_mode
, src_int_mode
, src
, true);
2293 src
= gen_lowpart (mode
, src
);
2297 /* Add INC into TARGET. */
2300 expand_inc (rtx target
, rtx inc
)
2302 rtx value
= expand_binop (GET_MODE (target
), add_optab
,
2304 target
, 0, OPTAB_LIB_WIDEN
);
2305 if (value
!= target
)
2306 emit_move_insn (target
, value
);
2309 /* Subtract DEC from TARGET. */
2312 expand_dec (rtx target
, rtx dec
)
2314 rtx value
= expand_binop (GET_MODE (target
), sub_optab
,
2316 target
, 0, OPTAB_LIB_WIDEN
);
2317 if (value
!= target
)
2318 emit_move_insn (target
, value
);
2321 /* Output a shift instruction for expression code CODE,
2322 with SHIFTED being the rtx for the value to shift,
2323 and AMOUNT the rtx for the amount to shift by.
2324 Store the result in the rtx TARGET, if that is convenient.
2325 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2326 Return the rtx for where the value is.
2327 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2328 in which case 0 is returned. */
2331 expand_shift_1 (enum tree_code code
, machine_mode mode
, rtx shifted
,
2332 rtx amount
, rtx target
, int unsignedp
, bool may_fail
= false)
2335 int left
= (code
== LSHIFT_EXPR
|| code
== LROTATE_EXPR
);
2336 int rotate
= (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
);
2337 optab lshift_optab
= ashl_optab
;
2338 optab rshift_arith_optab
= ashr_optab
;
2339 optab rshift_uns_optab
= lshr_optab
;
2340 optab lrotate_optab
= rotl_optab
;
2341 optab rrotate_optab
= rotr_optab
;
2342 machine_mode op1_mode
;
2343 machine_mode scalar_mode
= mode
;
2345 bool speed
= optimize_insn_for_speed_p ();
2347 if (VECTOR_MODE_P (mode
))
2348 scalar_mode
= GET_MODE_INNER (mode
);
2350 op1_mode
= GET_MODE (op1
);
2352 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2353 shift amount is a vector, use the vector/vector shift patterns. */
2354 if (VECTOR_MODE_P (mode
) && VECTOR_MODE_P (op1_mode
))
2356 lshift_optab
= vashl_optab
;
2357 rshift_arith_optab
= vashr_optab
;
2358 rshift_uns_optab
= vlshr_optab
;
2359 lrotate_optab
= vrotl_optab
;
2360 rrotate_optab
= vrotr_optab
;
2363 /* Previously detected shift-counts computed by NEGATE_EXPR
2364 and shifted in the other direction; but that does not work
2367 if (SHIFT_COUNT_TRUNCATED
)
2369 if (CONST_INT_P (op1
)
2370 && ((unsigned HOST_WIDE_INT
) INTVAL (op1
) >=
2371 (unsigned HOST_WIDE_INT
) GET_MODE_BITSIZE (scalar_mode
)))
2372 op1
= GEN_INT ((unsigned HOST_WIDE_INT
) INTVAL (op1
)
2373 % GET_MODE_BITSIZE (scalar_mode
));
2374 else if (GET_CODE (op1
) == SUBREG
2375 && subreg_lowpart_p (op1
)
2376 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1
)))
2377 && SCALAR_INT_MODE_P (GET_MODE (op1
)))
2378 op1
= SUBREG_REG (op1
);
2381 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2382 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2383 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2386 && CONST_INT_P (op1
)
2387 && IN_RANGE (INTVAL (op1
), GET_MODE_BITSIZE (scalar_mode
) / 2 + left
,
2388 GET_MODE_BITSIZE (scalar_mode
) - 1))
2390 op1
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
) - INTVAL (op1
));
2392 code
= left
? LROTATE_EXPR
: RROTATE_EXPR
;
2395 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2396 Note that this is not the case for bigger values. For instance a rotation
2397 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2398 0x04030201 (bswapsi). */
2400 && CONST_INT_P (op1
)
2401 && INTVAL (op1
) == BITS_PER_UNIT
2402 && GET_MODE_SIZE (scalar_mode
) == 2
2403 && optab_handler (bswap_optab
, HImode
) != CODE_FOR_nothing
)
2404 return expand_unop (HImode
, bswap_optab
, shifted
, NULL_RTX
,
2407 if (op1
== const0_rtx
)
2410 /* Check whether its cheaper to implement a left shift by a constant
2411 bit count by a sequence of additions. */
2412 if (code
== LSHIFT_EXPR
2413 && CONST_INT_P (op1
)
2415 && INTVAL (op1
) < GET_MODE_PRECISION (scalar_mode
)
2416 && INTVAL (op1
) < MAX_BITS_PER_WORD
2417 && (shift_cost (speed
, mode
, INTVAL (op1
))
2418 > INTVAL (op1
) * add_cost (speed
, mode
))
2419 && shift_cost (speed
, mode
, INTVAL (op1
)) != MAX_COST
)
2422 for (i
= 0; i
< INTVAL (op1
); i
++)
2424 temp
= force_reg (mode
, shifted
);
2425 shifted
= expand_binop (mode
, add_optab
, temp
, temp
, NULL_RTX
,
2426 unsignedp
, OPTAB_LIB_WIDEN
);
2431 for (attempt
= 0; temp
== 0 && attempt
< 3; attempt
++)
2433 enum optab_methods methods
;
2436 methods
= OPTAB_DIRECT
;
2437 else if (attempt
== 1)
2438 methods
= OPTAB_WIDEN
;
2440 methods
= OPTAB_LIB_WIDEN
;
2444 /* Widening does not work for rotation. */
2445 if (methods
== OPTAB_WIDEN
)
2447 else if (methods
== OPTAB_LIB_WIDEN
)
2449 /* If we have been unable to open-code this by a rotation,
2450 do it as the IOR of two shifts. I.e., to rotate A
2452 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2453 where C is the bitsize of A.
2455 It is theoretically possible that the target machine might
2456 not be able to perform either shift and hence we would
2457 be making two libcalls rather than just the one for the
2458 shift (similarly if IOR could not be done). We will allow
2459 this extremely unlikely lossage to avoid complicating the
2462 rtx subtarget
= target
== shifted
? 0 : target
;
2463 rtx new_amount
, other_amount
;
2467 if (op1
== const0_rtx
)
2469 else if (CONST_INT_P (op1
))
2470 other_amount
= GEN_INT (GET_MODE_BITSIZE (scalar_mode
)
2475 = simplify_gen_unary (NEG
, GET_MODE (op1
),
2476 op1
, GET_MODE (op1
));
2477 HOST_WIDE_INT mask
= GET_MODE_PRECISION (scalar_mode
) - 1;
2479 = simplify_gen_binary (AND
, GET_MODE (op1
), other_amount
,
2480 gen_int_mode (mask
, GET_MODE (op1
)));
2483 shifted
= force_reg (mode
, shifted
);
2485 temp
= expand_shift_1 (left
? LSHIFT_EXPR
: RSHIFT_EXPR
,
2486 mode
, shifted
, new_amount
, 0, 1);
2487 temp1
= expand_shift_1 (left
? RSHIFT_EXPR
: LSHIFT_EXPR
,
2488 mode
, shifted
, other_amount
,
2490 return expand_binop (mode
, ior_optab
, temp
, temp1
, target
,
2491 unsignedp
, methods
);
2494 temp
= expand_binop (mode
,
2495 left
? lrotate_optab
: rrotate_optab
,
2496 shifted
, op1
, target
, unsignedp
, methods
);
2499 temp
= expand_binop (mode
,
2500 left
? lshift_optab
: rshift_uns_optab
,
2501 shifted
, op1
, target
, unsignedp
, methods
);
2503 /* Do arithmetic shifts.
2504 Also, if we are going to widen the operand, we can just as well
2505 use an arithmetic right-shift instead of a logical one. */
2506 if (temp
== 0 && ! rotate
2507 && (! unsignedp
|| (! left
&& methods
== OPTAB_WIDEN
)))
2509 enum optab_methods methods1
= methods
;
2511 /* If trying to widen a log shift to an arithmetic shift,
2512 don't accept an arithmetic shift of the same size. */
2514 methods1
= OPTAB_MUST_WIDEN
;
2516 /* Arithmetic shift */
2518 temp
= expand_binop (mode
,
2519 left
? lshift_optab
: rshift_arith_optab
,
2520 shifted
, op1
, target
, unsignedp
, methods1
);
2523 /* We used to try extzv here for logical right shifts, but that was
2524 only useful for one machine, the VAX, and caused poor code
2525 generation there for lshrdi3, so the code was deleted and a
2526 define_expand for lshrsi3 was added to vax.md. */
2529 gcc_assert (temp
!= NULL_RTX
|| may_fail
);
2533 /* Output a shift instruction for expression code CODE,
2534 with SHIFTED being the rtx for the value to shift,
2535 and AMOUNT the amount to shift by.
2536 Store the result in the rtx TARGET, if that is convenient.
2537 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2538 Return the rtx for where the value is. */
2541 expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2542 int amount
, rtx target
, int unsignedp
)
2544 return expand_shift_1 (code
, mode
,
2545 shifted
, GEN_INT (amount
), target
, unsignedp
);
2548 /* Likewise, but return 0 if that cannot be done. */
2551 maybe_expand_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2552 int amount
, rtx target
, int unsignedp
)
2554 return expand_shift_1 (code
, mode
,
2555 shifted
, GEN_INT (amount
), target
, unsignedp
, true);
2558 /* Output a shift instruction for expression code CODE,
2559 with SHIFTED being the rtx for the value to shift,
2560 and AMOUNT the tree for the amount to shift by.
2561 Store the result in the rtx TARGET, if that is convenient.
2562 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2563 Return the rtx for where the value is. */
2566 expand_variable_shift (enum tree_code code
, machine_mode mode
, rtx shifted
,
2567 tree amount
, rtx target
, int unsignedp
)
2569 return expand_shift_1 (code
, mode
,
2570 shifted
, expand_normal (amount
), target
, unsignedp
);
2574 static void synth_mult (struct algorithm
*, unsigned HOST_WIDE_INT
,
2575 const struct mult_cost
*, machine_mode mode
);
2576 static rtx
expand_mult_const (machine_mode
, rtx
, HOST_WIDE_INT
, rtx
,
2577 const struct algorithm
*, enum mult_variant
);
2578 static unsigned HOST_WIDE_INT
invert_mod2n (unsigned HOST_WIDE_INT
, int);
2579 static rtx
extract_high_half (scalar_int_mode
, rtx
);
2580 static rtx
expmed_mult_highpart (scalar_int_mode
, rtx
, rtx
, rtx
, int, int);
2581 static rtx
expmed_mult_highpart_optab (scalar_int_mode
, rtx
, rtx
, rtx
,
2583 /* Compute and return the best algorithm for multiplying by T.
2584 The algorithm must cost less than cost_limit
2585 If retval.cost >= COST_LIMIT, no algorithm was found and all
2586 other field of the returned struct are undefined.
2587 MODE is the machine mode of the multiplication. */
2590 synth_mult (struct algorithm
*alg_out
, unsigned HOST_WIDE_INT t
,
2591 const struct mult_cost
*cost_limit
, machine_mode mode
)
2594 struct algorithm
*alg_in
, *best_alg
;
2595 struct mult_cost best_cost
;
2596 struct mult_cost new_limit
;
2597 int op_cost
, op_latency
;
2598 unsigned HOST_WIDE_INT orig_t
= t
;
2599 unsigned HOST_WIDE_INT q
;
2600 int maxm
, hash_index
;
2601 bool cache_hit
= false;
2602 enum alg_code cache_alg
= alg_zero
;
2603 bool speed
= optimize_insn_for_speed_p ();
2604 scalar_int_mode imode
;
2605 struct alg_hash_entry
*entry_ptr
;
2607 /* Indicate that no algorithm is yet found. If no algorithm
2608 is found, this value will be returned and indicate failure. */
2609 alg_out
->cost
.cost
= cost_limit
->cost
+ 1;
2610 alg_out
->cost
.latency
= cost_limit
->latency
+ 1;
2612 if (cost_limit
->cost
< 0
2613 || (cost_limit
->cost
== 0 && cost_limit
->latency
<= 0))
2616 /* Be prepared for vector modes. */
2617 imode
= as_a
<scalar_int_mode
> (GET_MODE_INNER (mode
));
2619 maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (imode
));
2621 /* Restrict the bits of "t" to the multiplication's mode. */
2622 t
&= GET_MODE_MASK (imode
);
2624 /* t == 1 can be done in zero cost. */
2628 alg_out
->cost
.cost
= 0;
2629 alg_out
->cost
.latency
= 0;
2630 alg_out
->op
[0] = alg_m
;
2634 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2638 if (MULT_COST_LESS (cost_limit
, zero_cost (speed
)))
2643 alg_out
->cost
.cost
= zero_cost (speed
);
2644 alg_out
->cost
.latency
= zero_cost (speed
);
2645 alg_out
->op
[0] = alg_zero
;
2650 /* We'll be needing a couple extra algorithm structures now. */
2652 alg_in
= XALLOCA (struct algorithm
);
2653 best_alg
= XALLOCA (struct algorithm
);
2654 best_cost
= *cost_limit
;
2656 /* Compute the hash index. */
2657 hash_index
= (t
^ (unsigned int) mode
^ (speed
* 256)) % NUM_ALG_HASH_ENTRIES
;
2659 /* See if we already know what to do for T. */
2660 entry_ptr
= alg_hash_entry_ptr (hash_index
);
2661 if (entry_ptr
->t
== t
2662 && entry_ptr
->mode
== mode
2663 && entry_ptr
->speed
== speed
2664 && entry_ptr
->alg
!= alg_unknown
)
2666 cache_alg
= entry_ptr
->alg
;
2668 if (cache_alg
== alg_impossible
)
2670 /* The cache tells us that it's impossible to synthesize
2671 multiplication by T within entry_ptr->cost. */
2672 if (!CHEAPER_MULT_COST (&entry_ptr
->cost
, cost_limit
))
2673 /* COST_LIMIT is at least as restrictive as the one
2674 recorded in the hash table, in which case we have no
2675 hope of synthesizing a multiplication. Just
2679 /* If we get here, COST_LIMIT is less restrictive than the
2680 one recorded in the hash table, so we may be able to
2681 synthesize a multiplication. Proceed as if we didn't
2682 have the cache entry. */
2686 if (CHEAPER_MULT_COST (cost_limit
, &entry_ptr
->cost
))
2687 /* The cached algorithm shows that this multiplication
2688 requires more cost than COST_LIMIT. Just return. This
2689 way, we don't clobber this cache entry with
2690 alg_impossible but retain useful information. */
2702 goto do_alg_addsub_t_m2
;
2704 case alg_add_factor
:
2705 case alg_sub_factor
:
2706 goto do_alg_addsub_factor
;
2709 goto do_alg_add_t2_m
;
2712 goto do_alg_sub_t2_m
;
2720 /* If we have a group of zero bits at the low-order part of T, try
2721 multiplying by the remaining bits and then doing a shift. */
2726 m
= ctz_or_zero (t
); /* m = number of low zero bits */
2730 /* The function expand_shift will choose between a shift and
2731 a sequence of additions, so the observed cost is given as
2732 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2733 op_cost
= m
* add_cost (speed
, mode
);
2734 if (shift_cost (speed
, mode
, m
) < op_cost
)
2735 op_cost
= shift_cost (speed
, mode
, m
);
2736 new_limit
.cost
= best_cost
.cost
- op_cost
;
2737 new_limit
.latency
= best_cost
.latency
- op_cost
;
2738 synth_mult (alg_in
, q
, &new_limit
, mode
);
2740 alg_in
->cost
.cost
+= op_cost
;
2741 alg_in
->cost
.latency
+= op_cost
;
2742 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2744 best_cost
= alg_in
->cost
;
2745 std::swap (alg_in
, best_alg
);
2746 best_alg
->log
[best_alg
->ops
] = m
;
2747 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2750 /* See if treating ORIG_T as a signed number yields a better
2751 sequence. Try this sequence only for a negative ORIG_T
2752 as it would be useless for a non-negative ORIG_T. */
2753 if ((HOST_WIDE_INT
) orig_t
< 0)
2755 /* Shift ORIG_T as follows because a right shift of a
2756 negative-valued signed type is implementation
2758 q
= ~(~orig_t
>> m
);
2759 /* The function expand_shift will choose between a shift
2760 and a sequence of additions, so the observed cost is
2761 given as MIN (m * add_cost(speed, mode),
2762 shift_cost(speed, mode, m)). */
2763 op_cost
= m
* add_cost (speed
, mode
);
2764 if (shift_cost (speed
, mode
, m
) < op_cost
)
2765 op_cost
= shift_cost (speed
, mode
, m
);
2766 new_limit
.cost
= best_cost
.cost
- op_cost
;
2767 new_limit
.latency
= best_cost
.latency
- op_cost
;
2768 synth_mult (alg_in
, q
, &new_limit
, mode
);
2770 alg_in
->cost
.cost
+= op_cost
;
2771 alg_in
->cost
.latency
+= op_cost
;
2772 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2774 best_cost
= alg_in
->cost
;
2775 std::swap (alg_in
, best_alg
);
2776 best_alg
->log
[best_alg
->ops
] = m
;
2777 best_alg
->op
[best_alg
->ops
] = alg_shift
;
2785 /* If we have an odd number, add or subtract one. */
2788 unsigned HOST_WIDE_INT w
;
2791 for (w
= 1; (w
& t
) != 0; w
<<= 1)
2793 /* If T was -1, then W will be zero after the loop. This is another
2794 case where T ends with ...111. Handling this with (T + 1) and
2795 subtract 1 produces slightly better code and results in algorithm
2796 selection much faster than treating it like the ...0111 case
2800 /* Reject the case where t is 3.
2801 Thus we prefer addition in that case. */
2804 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2806 op_cost
= add_cost (speed
, mode
);
2807 new_limit
.cost
= best_cost
.cost
- op_cost
;
2808 new_limit
.latency
= best_cost
.latency
- op_cost
;
2809 synth_mult (alg_in
, t
+ 1, &new_limit
, mode
);
2811 alg_in
->cost
.cost
+= op_cost
;
2812 alg_in
->cost
.latency
+= op_cost
;
2813 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2815 best_cost
= alg_in
->cost
;
2816 std::swap (alg_in
, best_alg
);
2817 best_alg
->log
[best_alg
->ops
] = 0;
2818 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2823 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2825 op_cost
= add_cost (speed
, mode
);
2826 new_limit
.cost
= best_cost
.cost
- op_cost
;
2827 new_limit
.latency
= best_cost
.latency
- op_cost
;
2828 synth_mult (alg_in
, t
- 1, &new_limit
, mode
);
2830 alg_in
->cost
.cost
+= op_cost
;
2831 alg_in
->cost
.latency
+= op_cost
;
2832 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2834 best_cost
= alg_in
->cost
;
2835 std::swap (alg_in
, best_alg
);
2836 best_alg
->log
[best_alg
->ops
] = 0;
2837 best_alg
->op
[best_alg
->ops
] = alg_add_t_m2
;
2841 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2842 quickly with a - a * n for some appropriate constant n. */
2843 m
= exact_log2 (-orig_t
+ 1);
2844 if (m
>= 0 && m
< maxm
)
2846 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2847 /* If the target has a cheap shift-and-subtract insn use
2848 that in preference to a shift insn followed by a sub insn.
2849 Assume that the shift-and-sub is "atomic" with a latency
2850 equal to it's cost, otherwise assume that on superscalar
2851 hardware the shift may be executed concurrently with the
2852 earlier steps in the algorithm. */
2853 if (shiftsub1_cost (speed
, mode
, m
) <= op_cost
)
2855 op_cost
= shiftsub1_cost (speed
, mode
, m
);
2856 op_latency
= op_cost
;
2859 op_latency
= add_cost (speed
, mode
);
2861 new_limit
.cost
= best_cost
.cost
- op_cost
;
2862 new_limit
.latency
= best_cost
.latency
- op_latency
;
2863 synth_mult (alg_in
, (unsigned HOST_WIDE_INT
) (-orig_t
+ 1) >> m
,
2866 alg_in
->cost
.cost
+= op_cost
;
2867 alg_in
->cost
.latency
+= op_latency
;
2868 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2870 best_cost
= alg_in
->cost
;
2871 std::swap (alg_in
, best_alg
);
2872 best_alg
->log
[best_alg
->ops
] = m
;
2873 best_alg
->op
[best_alg
->ops
] = alg_sub_t_m2
;
2881 /* Look for factors of t of the form
2882 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2883 If we find such a factor, we can multiply by t using an algorithm that
2884 multiplies by q, shift the result by m and add/subtract it to itself.
2886 We search for large factors first and loop down, even if large factors
2887 are less probable than small; if we find a large factor we will find a
2888 good sequence quickly, and therefore be able to prune (by decreasing
2889 COST_LIMIT) the search. */
2891 do_alg_addsub_factor
:
2892 for (m
= floor_log2 (t
- 1); m
>= 2; m
--)
2894 unsigned HOST_WIDE_INT d
;
2896 d
= (HOST_WIDE_INT_1U
<< m
) + 1;
2897 if (t
% d
== 0 && t
> d
&& m
< maxm
2898 && (!cache_hit
|| cache_alg
== alg_add_factor
))
2900 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2901 if (shiftadd_cost (speed
, mode
, m
) <= op_cost
)
2902 op_cost
= shiftadd_cost (speed
, mode
, m
);
2904 op_latency
= op_cost
;
2907 new_limit
.cost
= best_cost
.cost
- op_cost
;
2908 new_limit
.latency
= best_cost
.latency
- op_latency
;
2909 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2911 alg_in
->cost
.cost
+= op_cost
;
2912 alg_in
->cost
.latency
+= op_latency
;
2913 if (alg_in
->cost
.latency
< op_cost
)
2914 alg_in
->cost
.latency
= op_cost
;
2915 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2917 best_cost
= alg_in
->cost
;
2918 std::swap (alg_in
, best_alg
);
2919 best_alg
->log
[best_alg
->ops
] = m
;
2920 best_alg
->op
[best_alg
->ops
] = alg_add_factor
;
2922 /* Other factors will have been taken care of in the recursion. */
2926 d
= (HOST_WIDE_INT_1U
<< m
) - 1;
2927 if (t
% d
== 0 && t
> d
&& m
< maxm
2928 && (!cache_hit
|| cache_alg
== alg_sub_factor
))
2930 op_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
2931 if (shiftsub0_cost (speed
, mode
, m
) <= op_cost
)
2932 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2934 op_latency
= op_cost
;
2936 new_limit
.cost
= best_cost
.cost
- op_cost
;
2937 new_limit
.latency
= best_cost
.latency
- op_latency
;
2938 synth_mult (alg_in
, t
/ d
, &new_limit
, mode
);
2940 alg_in
->cost
.cost
+= op_cost
;
2941 alg_in
->cost
.latency
+= op_latency
;
2942 if (alg_in
->cost
.latency
< op_cost
)
2943 alg_in
->cost
.latency
= op_cost
;
2944 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2946 best_cost
= alg_in
->cost
;
2947 std::swap (alg_in
, best_alg
);
2948 best_alg
->log
[best_alg
->ops
] = m
;
2949 best_alg
->op
[best_alg
->ops
] = alg_sub_factor
;
2957 /* Try shift-and-add (load effective address) instructions,
2958 i.e. do a*3, a*5, a*9. */
2966 op_cost
= shiftadd_cost (speed
, mode
, m
);
2967 new_limit
.cost
= best_cost
.cost
- op_cost
;
2968 new_limit
.latency
= best_cost
.latency
- op_cost
;
2969 synth_mult (alg_in
, (t
- 1) >> m
, &new_limit
, mode
);
2971 alg_in
->cost
.cost
+= op_cost
;
2972 alg_in
->cost
.latency
+= op_cost
;
2973 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2975 best_cost
= alg_in
->cost
;
2976 std::swap (alg_in
, best_alg
);
2977 best_alg
->log
[best_alg
->ops
] = m
;
2978 best_alg
->op
[best_alg
->ops
] = alg_add_t2_m
;
2989 op_cost
= shiftsub0_cost (speed
, mode
, m
);
2990 new_limit
.cost
= best_cost
.cost
- op_cost
;
2991 new_limit
.latency
= best_cost
.latency
- op_cost
;
2992 synth_mult (alg_in
, (t
+ 1) >> m
, &new_limit
, mode
);
2994 alg_in
->cost
.cost
+= op_cost
;
2995 alg_in
->cost
.latency
+= op_cost
;
2996 if (CHEAPER_MULT_COST (&alg_in
->cost
, &best_cost
))
2998 best_cost
= alg_in
->cost
;
2999 std::swap (alg_in
, best_alg
);
3000 best_alg
->log
[best_alg
->ops
] = m
;
3001 best_alg
->op
[best_alg
->ops
] = alg_sub_t2_m
;
3009 /* If best_cost has not decreased, we have not found any algorithm. */
3010 if (!CHEAPER_MULT_COST (&best_cost
, cost_limit
))
3012 /* We failed to find an algorithm. Record alg_impossible for
3013 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3014 we are asked to find an algorithm for T within the same or
3015 lower COST_LIMIT, we can immediately return to the
3018 entry_ptr
->mode
= mode
;
3019 entry_ptr
->speed
= speed
;
3020 entry_ptr
->alg
= alg_impossible
;
3021 entry_ptr
->cost
= *cost_limit
;
3025 /* Cache the result. */
3029 entry_ptr
->mode
= mode
;
3030 entry_ptr
->speed
= speed
;
3031 entry_ptr
->alg
= best_alg
->op
[best_alg
->ops
];
3032 entry_ptr
->cost
.cost
= best_cost
.cost
;
3033 entry_ptr
->cost
.latency
= best_cost
.latency
;
3036 /* If we are getting a too long sequence for `struct algorithm'
3037 to record, make this search fail. */
3038 if (best_alg
->ops
== MAX_BITS_PER_WORD
)
3041 /* Copy the algorithm from temporary space to the space at alg_out.
3042 We avoid using structure assignment because the majority of
3043 best_alg is normally undefined, and this is a critical function. */
3044 alg_out
->ops
= best_alg
->ops
+ 1;
3045 alg_out
->cost
= best_cost
;
3046 memcpy (alg_out
->op
, best_alg
->op
,
3047 alg_out
->ops
* sizeof *alg_out
->op
);
3048 memcpy (alg_out
->log
, best_alg
->log
,
3049 alg_out
->ops
* sizeof *alg_out
->log
);
3052 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3053 Try three variations:
3055 - a shift/add sequence based on VAL itself
3056 - a shift/add sequence based on -VAL, followed by a negation
3057 - a shift/add sequence based on VAL - 1, followed by an addition.
3059 Return true if the cheapest of these cost less than MULT_COST,
3060 describing the algorithm in *ALG and final fixup in *VARIANT. */
3063 choose_mult_variant (machine_mode mode
, HOST_WIDE_INT val
,
3064 struct algorithm
*alg
, enum mult_variant
*variant
,
3067 struct algorithm alg2
;
3068 struct mult_cost limit
;
3070 bool speed
= optimize_insn_for_speed_p ();
3072 /* Fail quickly for impossible bounds. */
3076 /* Ensure that mult_cost provides a reasonable upper bound.
3077 Any constant multiplication can be performed with less
3078 than 2 * bits additions. */
3079 op_cost
= 2 * GET_MODE_UNIT_BITSIZE (mode
) * add_cost (speed
, mode
);
3080 if (mult_cost
> op_cost
)
3081 mult_cost
= op_cost
;
3083 *variant
= basic_variant
;
3084 limit
.cost
= mult_cost
;
3085 limit
.latency
= mult_cost
;
3086 synth_mult (alg
, val
, &limit
, mode
);
3088 /* This works only if the inverted value actually fits in an
3090 if (HOST_BITS_PER_INT
>= GET_MODE_UNIT_BITSIZE (mode
))
3092 op_cost
= neg_cost (speed
, mode
);
3093 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3095 limit
.cost
= alg
->cost
.cost
- op_cost
;
3096 limit
.latency
= alg
->cost
.latency
- op_cost
;
3100 limit
.cost
= mult_cost
- op_cost
;
3101 limit
.latency
= mult_cost
- op_cost
;
3104 synth_mult (&alg2
, -val
, &limit
, mode
);
3105 alg2
.cost
.cost
+= op_cost
;
3106 alg2
.cost
.latency
+= op_cost
;
3107 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3108 *alg
= alg2
, *variant
= negate_variant
;
3111 /* This proves very useful for division-by-constant. */
3112 op_cost
= add_cost (speed
, mode
);
3113 if (MULT_COST_LESS (&alg
->cost
, mult_cost
))
3115 limit
.cost
= alg
->cost
.cost
- op_cost
;
3116 limit
.latency
= alg
->cost
.latency
- op_cost
;
3120 limit
.cost
= mult_cost
- op_cost
;
3121 limit
.latency
= mult_cost
- op_cost
;
3124 synth_mult (&alg2
, val
- 1, &limit
, mode
);
3125 alg2
.cost
.cost
+= op_cost
;
3126 alg2
.cost
.latency
+= op_cost
;
3127 if (CHEAPER_MULT_COST (&alg2
.cost
, &alg
->cost
))
3128 *alg
= alg2
, *variant
= add_variant
;
3130 return MULT_COST_LESS (&alg
->cost
, mult_cost
);
3133 /* A subroutine of expand_mult, used for constant multiplications.
3134 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3135 convenient. Use the shift/add sequence described by ALG and apply
3136 the final fixup specified by VARIANT. */
3139 expand_mult_const (machine_mode mode
, rtx op0
, HOST_WIDE_INT val
,
3140 rtx target
, const struct algorithm
*alg
,
3141 enum mult_variant variant
)
3143 unsigned HOST_WIDE_INT val_so_far
;
3149 /* Avoid referencing memory over and over and invalid sharing
3151 op0
= force_reg (mode
, op0
);
3153 /* ACCUM starts out either as OP0 or as a zero, depending on
3154 the first operation. */
3156 if (alg
->op
[0] == alg_zero
)
3158 accum
= copy_to_mode_reg (mode
, CONST0_RTX (mode
));
3161 else if (alg
->op
[0] == alg_m
)
3163 accum
= copy_to_mode_reg (mode
, op0
);
3169 for (opno
= 1; opno
< alg
->ops
; opno
++)
3171 int log
= alg
->log
[opno
];
3172 rtx shift_subtarget
= optimize
? 0 : accum
;
3174 = (opno
== alg
->ops
- 1 && target
!= 0 && variant
!= add_variant
3177 rtx accum_target
= optimize
? 0 : accum
;
3180 switch (alg
->op
[opno
])
3183 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3184 /* REG_EQUAL note will be attached to the following insn. */
3185 emit_move_insn (accum
, tem
);
3190 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3191 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3192 add_target
? add_target
: accum_target
);
3193 val_so_far
+= HOST_WIDE_INT_1U
<< log
;
3197 tem
= expand_shift (LSHIFT_EXPR
, mode
, op0
, log
, NULL_RTX
, 0);
3198 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, tem
),
3199 add_target
? add_target
: accum_target
);
3200 val_so_far
-= HOST_WIDE_INT_1U
<< log
;
3204 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3205 log
, shift_subtarget
, 0);
3206 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
),
3207 add_target
? add_target
: accum_target
);
3208 val_so_far
= (val_so_far
<< log
) + 1;
3212 accum
= expand_shift (LSHIFT_EXPR
, mode
, accum
,
3213 log
, shift_subtarget
, 0);
3214 accum
= force_operand (gen_rtx_MINUS (mode
, accum
, op0
),
3215 add_target
? add_target
: accum_target
);
3216 val_so_far
= (val_so_far
<< log
) - 1;
3219 case alg_add_factor
:
3220 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3221 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, tem
),
3222 add_target
? add_target
: accum_target
);
3223 val_so_far
+= val_so_far
<< log
;
3226 case alg_sub_factor
:
3227 tem
= expand_shift (LSHIFT_EXPR
, mode
, accum
, log
, NULL_RTX
, 0);
3228 accum
= force_operand (gen_rtx_MINUS (mode
, tem
, accum
),
3230 ? add_target
: (optimize
? 0 : tem
)));
3231 val_so_far
= (val_so_far
<< log
) - val_so_far
;
3238 if (SCALAR_INT_MODE_P (mode
))
3240 /* Write a REG_EQUAL note on the last insn so that we can cse
3241 multiplication sequences. Note that if ACCUM is a SUBREG,
3242 we've set the inner register and must properly indicate that. */
3243 tem
= op0
, nmode
= mode
;
3244 accum_inner
= accum
;
3245 if (GET_CODE (accum
) == SUBREG
)
3247 accum_inner
= SUBREG_REG (accum
);
3248 nmode
= GET_MODE (accum_inner
);
3249 tem
= gen_lowpart (nmode
, op0
);
3252 insn
= get_last_insn ();
3253 set_dst_reg_note (insn
, REG_EQUAL
,
3254 gen_rtx_MULT (nmode
, tem
,
3255 gen_int_mode (val_so_far
, nmode
)),
3260 if (variant
== negate_variant
)
3262 val_so_far
= -val_so_far
;
3263 accum
= expand_unop (mode
, neg_optab
, accum
, target
, 0);
3265 else if (variant
== add_variant
)
3267 val_so_far
= val_so_far
+ 1;
3268 accum
= force_operand (gen_rtx_PLUS (mode
, accum
, op0
), target
);
3271 /* Compare only the bits of val and val_so_far that are significant
3272 in the result mode, to avoid sign-/zero-extension confusion. */
3273 nmode
= GET_MODE_INNER (mode
);
3274 val
&= GET_MODE_MASK (nmode
);
3275 val_so_far
&= GET_MODE_MASK (nmode
);
3276 gcc_assert (val
== (HOST_WIDE_INT
) val_so_far
);
3281 /* Perform a multiplication and return an rtx for the result.
3282 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3283 TARGET is a suggestion for where to store the result (an rtx).
3285 We check specially for a constant integer as OP1.
3286 If you want this check for OP0 as well, then before calling
3287 you should swap the two operands if OP0 would be constant. */
3290 expand_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3293 enum mult_variant variant
;
3294 struct algorithm algorithm
;
3297 bool speed
= optimize_insn_for_speed_p ();
3298 bool do_trapv
= flag_trapv
&& SCALAR_INT_MODE_P (mode
) && !unsignedp
;
3300 if (CONSTANT_P (op0
))
3301 std::swap (op0
, op1
);
3303 /* For vectors, there are several simplifications that can be made if
3304 all elements of the vector constant are identical. */
3305 scalar_op1
= unwrap_const_vec_duplicate (op1
);
3307 if (INTEGRAL_MODE_P (mode
))
3310 HOST_WIDE_INT coeff
;
3314 if (op1
== CONST0_RTX (mode
))
3316 if (op1
== CONST1_RTX (mode
))
3318 if (op1
== CONSTM1_RTX (mode
))
3319 return expand_unop (mode
, do_trapv
? negv_optab
: neg_optab
,
3325 /* If mode is integer vector mode, check if the backend supports
3326 vector lshift (by scalar or vector) at all. If not, we can't use
3327 synthetized multiply. */
3328 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3329 && optab_handler (vashl_optab
, mode
) == CODE_FOR_nothing
3330 && optab_handler (ashl_optab
, mode
) == CODE_FOR_nothing
)
3333 /* These are the operations that are potentially turned into
3334 a sequence of shifts and additions. */
3335 mode_bitsize
= GET_MODE_UNIT_BITSIZE (mode
);
3337 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3338 less than or equal in size to `unsigned int' this doesn't matter.
3339 If the mode is larger than `unsigned int', then synth_mult works
3340 only if the constant value exactly fits in an `unsigned int' without
3341 any truncation. This means that multiplying by negative values does
3342 not work; results are off by 2^32 on a 32 bit machine. */
3343 if (CONST_INT_P (scalar_op1
))
3345 coeff
= INTVAL (scalar_op1
);
3348 #if TARGET_SUPPORTS_WIDE_INT
3349 else if (CONST_WIDE_INT_P (scalar_op1
))
3351 else if (CONST_DOUBLE_AS_INT_P (scalar_op1
))
3354 int shift
= wi::exact_log2 (rtx_mode_t (scalar_op1
, mode
));
3355 /* Perfect power of 2 (other than 1, which is handled above). */
3357 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3358 shift
, target
, unsignedp
);
3365 /* We used to test optimize here, on the grounds that it's better to
3366 produce a smaller program when -O is not used. But this causes
3367 such a terrible slowdown sometimes that it seems better to always
3370 /* Special case powers of two. */
3371 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
)
3372 && !(is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
))
3373 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3374 floor_log2 (coeff
), target
, unsignedp
);
3376 fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3378 /* Attempt to handle multiplication of DImode values by negative
3379 coefficients, by performing the multiplication by a positive
3380 multiplier and then inverting the result. */
3381 if (is_neg
&& mode_bitsize
> HOST_BITS_PER_WIDE_INT
)
3383 /* Its safe to use -coeff even for INT_MIN, as the
3384 result is interpreted as an unsigned coefficient.
3385 Exclude cost of op0 from max_cost to match the cost
3386 calculation of the synth_mult. */
3387 coeff
= -(unsigned HOST_WIDE_INT
) coeff
;
3388 max_cost
= (set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
),
3390 - neg_cost (speed
, mode
));
3394 /* Special case powers of two. */
3395 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3397 rtx temp
= expand_shift (LSHIFT_EXPR
, mode
, op0
,
3398 floor_log2 (coeff
), target
, unsignedp
);
3399 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3402 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3405 rtx temp
= expand_mult_const (mode
, op0
, coeff
, NULL_RTX
,
3406 &algorithm
, variant
);
3407 return expand_unop (mode
, neg_optab
, temp
, target
, 0);
3412 /* Exclude cost of op0 from max_cost to match the cost
3413 calculation of the synth_mult. */
3414 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, op1
), mode
, speed
);
3415 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3416 return expand_mult_const (mode
, op0
, coeff
, target
,
3417 &algorithm
, variant
);
3421 /* Expand x*2.0 as x+x. */
3422 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1
)
3423 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1
), &dconst2
))
3425 op0
= force_reg (GET_MODE (op0
), op0
);
3426 return expand_binop (mode
, add_optab
, op0
, op0
,
3427 target
, unsignedp
, OPTAB_LIB_WIDEN
);
3430 /* This used to use umul_optab if unsigned, but for non-widening multiply
3431 there is no difference between signed and unsigned. */
3432 op0
= expand_binop (mode
, do_trapv
? smulv_optab
: smul_optab
,
3433 op0
, op1
, target
, unsignedp
, OPTAB_LIB_WIDEN
);
3438 /* Return a cost estimate for multiplying a register by the given
3439 COEFFicient in the given MODE and SPEED. */
3442 mult_by_coeff_cost (HOST_WIDE_INT coeff
, machine_mode mode
, bool speed
)
3445 struct algorithm algorithm
;
3446 enum mult_variant variant
;
3448 rtx fake_reg
= gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1);
3449 max_cost
= set_src_cost (gen_rtx_MULT (mode
, fake_reg
, fake_reg
),
3451 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
, max_cost
))
3452 return algorithm
.cost
.cost
;
3457 /* Perform a widening multiplication and return an rtx for the result.
3458 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3459 TARGET is a suggestion for where to store the result (an rtx).
3460 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3461 or smul_widen_optab.
3463 We check specially for a constant integer as OP1, comparing the
3464 cost of a widening multiply against the cost of a sequence of shifts
3468 expand_widening_mult (machine_mode mode
, rtx op0
, rtx op1
, rtx target
,
3469 int unsignedp
, optab this_optab
)
3471 bool speed
= optimize_insn_for_speed_p ();
3474 if (CONST_INT_P (op1
)
3475 && GET_MODE (op0
) != VOIDmode
3476 && (cop1
= convert_modes (mode
, GET_MODE (op0
), op1
,
3477 this_optab
== umul_widen_optab
))
3478 && CONST_INT_P (cop1
)
3479 && (INTVAL (cop1
) >= 0
3480 || HWI_COMPUTABLE_MODE_P (mode
)))
3482 HOST_WIDE_INT coeff
= INTVAL (cop1
);
3484 enum mult_variant variant
;
3485 struct algorithm algorithm
;
3488 return CONST0_RTX (mode
);
3490 /* Special case powers of two. */
3491 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff
))
3493 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3494 return expand_shift (LSHIFT_EXPR
, mode
, op0
,
3495 floor_log2 (coeff
), target
, unsignedp
);
3498 /* Exclude cost of op0 from max_cost to match the cost
3499 calculation of the synth_mult. */
3500 max_cost
= mul_widen_cost (speed
, mode
);
3501 if (choose_mult_variant (mode
, coeff
, &algorithm
, &variant
,
3504 op0
= convert_to_mode (mode
, op0
, this_optab
== umul_widen_optab
);
3505 return expand_mult_const (mode
, op0
, coeff
, target
,
3506 &algorithm
, variant
);
3509 return expand_binop (mode
, this_optab
, op0
, op1
, target
,
3510 unsignedp
, OPTAB_LIB_WIDEN
);
3513 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3514 replace division by D, and put the least significant N bits of the result
3515 in *MULTIPLIER_PTR and return the most significant bit.
3517 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3518 needed precision is in PRECISION (should be <= N).
3520 PRECISION should be as small as possible so this function can choose
3521 multiplier more freely.
3523 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3524 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3526 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3527 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3529 unsigned HOST_WIDE_INT
3530 choose_multiplier (unsigned HOST_WIDE_INT d
, int n
, int precision
,
3531 unsigned HOST_WIDE_INT
*multiplier_ptr
,
3532 int *post_shift_ptr
, int *lgup_ptr
)
3534 int lgup
, post_shift
;
3537 /* lgup = ceil(log2(divisor)); */
3538 lgup
= ceil_log2 (d
);
3540 gcc_assert (lgup
<= n
);
3543 pow2
= n
+ lgup
- precision
;
3545 /* mlow = 2^(N + lgup)/d */
3546 wide_int val
= wi::set_bit_in_zero (pow
, HOST_BITS_PER_DOUBLE_INT
);
3547 wide_int mlow
= wi::udiv_trunc (val
, d
);
3549 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3550 val
|= wi::set_bit_in_zero (pow2
, HOST_BITS_PER_DOUBLE_INT
);
3551 wide_int mhigh
= wi::udiv_trunc (val
, d
);
3553 /* If precision == N, then mlow, mhigh exceed 2^N
3554 (but they do not exceed 2^(N+1)). */
3556 /* Reduce to lowest terms. */
3557 for (post_shift
= lgup
; post_shift
> 0; post_shift
--)
3559 unsigned HOST_WIDE_INT ml_lo
= wi::extract_uhwi (mlow
, 1,
3560 HOST_BITS_PER_WIDE_INT
);
3561 unsigned HOST_WIDE_INT mh_lo
= wi::extract_uhwi (mhigh
, 1,
3562 HOST_BITS_PER_WIDE_INT
);
3566 mlow
= wi::uhwi (ml_lo
, HOST_BITS_PER_DOUBLE_INT
);
3567 mhigh
= wi::uhwi (mh_lo
, HOST_BITS_PER_DOUBLE_INT
);
3570 *post_shift_ptr
= post_shift
;
3572 if (n
< HOST_BITS_PER_WIDE_INT
)
3574 unsigned HOST_WIDE_INT mask
= (HOST_WIDE_INT_1U
<< n
) - 1;
3575 *multiplier_ptr
= mhigh
.to_uhwi () & mask
;
3576 return mhigh
.to_uhwi () >= mask
;
3580 *multiplier_ptr
= mhigh
.to_uhwi ();
3581 return wi::extract_uhwi (mhigh
, HOST_BITS_PER_WIDE_INT
, 1);
3585 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3586 congruent to 1 (mod 2**N). */
3588 static unsigned HOST_WIDE_INT
3589 invert_mod2n (unsigned HOST_WIDE_INT x
, int n
)
3591 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3593 /* The algorithm notes that the choice y = x satisfies
3594 x*y == 1 mod 2^3, since x is assumed odd.
3595 Each iteration doubles the number of bits of significance in y. */
3597 unsigned HOST_WIDE_INT mask
;
3598 unsigned HOST_WIDE_INT y
= x
;
3601 mask
= (n
== HOST_BITS_PER_WIDE_INT
3603 : (HOST_WIDE_INT_1U
<< n
) - 1);
3607 y
= y
* (2 - x
*y
) & mask
; /* Modulo 2^N */
3613 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3614 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3615 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3616 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3619 The result is put in TARGET if that is convenient.
3621 MODE is the mode of operation. */
3624 expand_mult_highpart_adjust (scalar_int_mode mode
, rtx adj_operand
, rtx op0
,
3625 rtx op1
, rtx target
, int unsignedp
)
3628 enum rtx_code adj_code
= unsignedp
? PLUS
: MINUS
;
3630 tem
= expand_shift (RSHIFT_EXPR
, mode
, op0
,
3631 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3632 tem
= expand_and (mode
, tem
, op1
, NULL_RTX
);
3634 = force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3637 tem
= expand_shift (RSHIFT_EXPR
, mode
, op1
,
3638 GET_MODE_BITSIZE (mode
) - 1, NULL_RTX
, 0);
3639 tem
= expand_and (mode
, tem
, op0
, NULL_RTX
);
3640 target
= force_operand (gen_rtx_fmt_ee (adj_code
, mode
, adj_operand
, tem
),
3646 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3649 extract_high_half (scalar_int_mode mode
, rtx op
)
3651 if (mode
== word_mode
)
3652 return gen_highpart (mode
, op
);
3654 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3656 op
= expand_shift (RSHIFT_EXPR
, wider_mode
, op
,
3657 GET_MODE_BITSIZE (mode
), 0, 1);
3658 return convert_modes (mode
, wider_mode
, op
, 0);
3661 /* Like expmed_mult_highpart, but only consider using a multiplication
3662 optab. OP1 is an rtx for the constant operand. */
3665 expmed_mult_highpart_optab (scalar_int_mode mode
, rtx op0
, rtx op1
,
3666 rtx target
, int unsignedp
, int max_cost
)
3668 rtx narrow_op1
= gen_int_mode (INTVAL (op1
), mode
);
3672 bool speed
= optimize_insn_for_speed_p ();
3674 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3676 size
= GET_MODE_BITSIZE (mode
);
3678 /* Firstly, try using a multiplication insn that only generates the needed
3679 high part of the product, and in the sign flavor of unsignedp. */
3680 if (mul_highpart_cost (speed
, mode
) < max_cost
)
3682 moptab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
3683 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3684 unsignedp
, OPTAB_DIRECT
);
3689 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3690 Need to adjust the result after the multiplication. */
3691 if (size
- 1 < BITS_PER_WORD
3692 && (mul_highpart_cost (speed
, mode
)
3693 + 2 * shift_cost (speed
, mode
, size
-1)
3694 + 4 * add_cost (speed
, mode
) < max_cost
))
3696 moptab
= unsignedp
? smul_highpart_optab
: umul_highpart_optab
;
3697 tem
= expand_binop (mode
, moptab
, op0
, narrow_op1
, target
,
3698 unsignedp
, OPTAB_DIRECT
);
3700 /* We used the wrong signedness. Adjust the result. */
3701 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3705 /* Try widening multiplication. */
3706 moptab
= unsignedp
? umul_widen_optab
: smul_widen_optab
;
3707 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3708 && mul_widen_cost (speed
, wider_mode
) < max_cost
)
3710 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
, 0,
3711 unsignedp
, OPTAB_WIDEN
);
3713 return extract_high_half (mode
, tem
);
3716 /* Try widening the mode and perform a non-widening multiplication. */
3717 if (optab_handler (smul_optab
, wider_mode
) != CODE_FOR_nothing
3718 && size
- 1 < BITS_PER_WORD
3719 && (mul_cost (speed
, wider_mode
) + shift_cost (speed
, mode
, size
-1)
3725 /* We need to widen the operands, for example to ensure the
3726 constant multiplier is correctly sign or zero extended.
3727 Use a sequence to clean-up any instructions emitted by
3728 the conversions if things don't work out. */
3730 wop0
= convert_modes (wider_mode
, mode
, op0
, unsignedp
);
3731 wop1
= convert_modes (wider_mode
, mode
, op1
, unsignedp
);
3732 tem
= expand_binop (wider_mode
, smul_optab
, wop0
, wop1
, 0,
3733 unsignedp
, OPTAB_WIDEN
);
3734 insns
= get_insns ();
3740 return extract_high_half (mode
, tem
);
3744 /* Try widening multiplication of opposite signedness, and adjust. */
3745 moptab
= unsignedp
? smul_widen_optab
: umul_widen_optab
;
3746 if (widening_optab_handler (moptab
, wider_mode
, mode
) != CODE_FOR_nothing
3747 && size
- 1 < BITS_PER_WORD
3748 && (mul_widen_cost (speed
, wider_mode
)
3749 + 2 * shift_cost (speed
, mode
, size
-1)
3750 + 4 * add_cost (speed
, mode
) < max_cost
))
3752 tem
= expand_binop (wider_mode
, moptab
, op0
, narrow_op1
,
3753 NULL_RTX
, ! unsignedp
, OPTAB_WIDEN
);
3756 tem
= extract_high_half (mode
, tem
);
3757 /* We used the wrong signedness. Adjust the result. */
3758 return expand_mult_highpart_adjust (mode
, tem
, op0
, narrow_op1
,
3766 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3767 putting the high half of the result in TARGET if that is convenient,
3768 and return where the result is. If the operation can not be performed,
3771 MODE is the mode of operation and result.
3773 UNSIGNEDP nonzero means unsigned multiply.
3775 MAX_COST is the total allowed cost for the expanded RTL. */
3778 expmed_mult_highpart (scalar_int_mode mode
, rtx op0
, rtx op1
,
3779 rtx target
, int unsignedp
, int max_cost
)
3781 unsigned HOST_WIDE_INT cnst1
;
3783 bool sign_adjust
= false;
3784 enum mult_variant variant
;
3785 struct algorithm alg
;
3787 bool speed
= optimize_insn_for_speed_p ();
3789 /* We can't support modes wider than HOST_BITS_PER_INT. */
3790 gcc_assert (HWI_COMPUTABLE_MODE_P (mode
));
3792 cnst1
= INTVAL (op1
) & GET_MODE_MASK (mode
);
3794 /* We can't optimize modes wider than BITS_PER_WORD.
3795 ??? We might be able to perform double-word arithmetic if
3796 mode == word_mode, however all the cost calculations in
3797 synth_mult etc. assume single-word operations. */
3798 scalar_int_mode wider_mode
= GET_MODE_WIDER_MODE (mode
).require ();
3799 if (GET_MODE_BITSIZE (wider_mode
) > BITS_PER_WORD
)
3800 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3801 unsignedp
, max_cost
);
3803 extra_cost
= shift_cost (speed
, mode
, GET_MODE_BITSIZE (mode
) - 1);
3805 /* Check whether we try to multiply by a negative constant. */
3806 if (!unsignedp
&& ((cnst1
>> (GET_MODE_BITSIZE (mode
) - 1)) & 1))
3809 extra_cost
+= add_cost (speed
, mode
);
3812 /* See whether shift/add multiplication is cheap enough. */
3813 if (choose_mult_variant (wider_mode
, cnst1
, &alg
, &variant
,
3814 max_cost
- extra_cost
))
3816 /* See whether the specialized multiplication optabs are
3817 cheaper than the shift/add version. */
3818 tem
= expmed_mult_highpart_optab (mode
, op0
, op1
, target
, unsignedp
,
3819 alg
.cost
.cost
+ extra_cost
);
3823 tem
= convert_to_mode (wider_mode
, op0
, unsignedp
);
3824 tem
= expand_mult_const (wider_mode
, tem
, cnst1
, 0, &alg
, variant
);
3825 tem
= extract_high_half (mode
, tem
);
3827 /* Adjust result for signedness. */
3829 tem
= force_operand (gen_rtx_MINUS (mode
, tem
, op0
), tem
);
3833 return expmed_mult_highpart_optab (mode
, op0
, op1
, target
,
3834 unsignedp
, max_cost
);
3838 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3841 expand_smod_pow2 (scalar_int_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3843 rtx result
, temp
, shift
;
3844 rtx_code_label
*label
;
3846 int prec
= GET_MODE_PRECISION (mode
);
3848 logd
= floor_log2 (d
);
3849 result
= gen_reg_rtx (mode
);
3851 /* Avoid conditional branches when they're expensive. */
3852 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3853 && optimize_insn_for_speed_p ())
3855 rtx signmask
= emit_store_flag (result
, LT
, op0
, const0_rtx
,
3859 HOST_WIDE_INT masklow
= (HOST_WIDE_INT_1
<< logd
) - 1;
3860 signmask
= force_reg (mode
, signmask
);
3861 shift
= GEN_INT (GET_MODE_BITSIZE (mode
) - logd
);
3863 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3864 which instruction sequence to use. If logical right shifts
3865 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3866 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3868 temp
= gen_rtx_LSHIFTRT (mode
, result
, shift
);
3869 if (optab_handler (lshr_optab
, mode
) == CODE_FOR_nothing
3870 || (set_src_cost (temp
, mode
, optimize_insn_for_speed_p ())
3871 > COSTS_N_INSNS (2)))
3873 temp
= expand_binop (mode
, xor_optab
, op0
, signmask
,
3874 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3875 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3876 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3877 temp
= expand_binop (mode
, and_optab
, temp
,
3878 gen_int_mode (masklow
, mode
),
3879 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3880 temp
= expand_binop (mode
, xor_optab
, temp
, signmask
,
3881 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3882 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3883 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3887 signmask
= expand_binop (mode
, lshr_optab
, signmask
, shift
,
3888 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3889 signmask
= force_reg (mode
, signmask
);
3891 temp
= expand_binop (mode
, add_optab
, op0
, signmask
,
3892 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3893 temp
= expand_binop (mode
, and_optab
, temp
,
3894 gen_int_mode (masklow
, mode
),
3895 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3896 temp
= expand_binop (mode
, sub_optab
, temp
, signmask
,
3897 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
3903 /* Mask contains the mode's signbit and the significant bits of the
3904 modulus. By including the signbit in the operation, many targets
3905 can avoid an explicit compare operation in the following comparison
3907 wide_int mask
= wi::mask (logd
, false, prec
);
3908 mask
= wi::set_bit (mask
, prec
- 1);
3910 temp
= expand_binop (mode
, and_optab
, op0
,
3911 immed_wide_int_const (mask
, mode
),
3912 result
, 1, OPTAB_LIB_WIDEN
);
3914 emit_move_insn (result
, temp
);
3916 label
= gen_label_rtx ();
3917 do_cmp_and_jump (result
, const0_rtx
, GE
, mode
, label
);
3919 temp
= expand_binop (mode
, sub_optab
, result
, const1_rtx
, result
,
3920 0, OPTAB_LIB_WIDEN
);
3922 mask
= wi::mask (logd
, true, prec
);
3923 temp
= expand_binop (mode
, ior_optab
, temp
,
3924 immed_wide_int_const (mask
, mode
),
3925 result
, 1, OPTAB_LIB_WIDEN
);
3926 temp
= expand_binop (mode
, add_optab
, temp
, const1_rtx
, result
,
3927 0, OPTAB_LIB_WIDEN
);
3929 emit_move_insn (result
, temp
);
3934 /* Expand signed division of OP0 by a power of two D in mode MODE.
3935 This routine is only called for positive values of D. */
3938 expand_sdiv_pow2 (scalar_int_mode mode
, rtx op0
, HOST_WIDE_INT d
)
3941 rtx_code_label
*label
;
3944 logd
= floor_log2 (d
);
3947 && BRANCH_COST (optimize_insn_for_speed_p (),
3950 temp
= gen_reg_rtx (mode
);
3951 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, 1);
3952 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3953 0, OPTAB_LIB_WIDEN
);
3954 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
3957 if (HAVE_conditional_move
3958 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3963 temp2
= copy_to_mode_reg (mode
, op0
);
3964 temp
= expand_binop (mode
, add_optab
, temp2
, gen_int_mode (d
- 1, mode
),
3965 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3966 temp
= force_reg (mode
, temp
);
3968 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3969 temp2
= emit_conditional_move (temp2
, LT
, temp2
, const0_rtx
,
3970 mode
, temp
, temp2
, mode
, 0);
3973 rtx_insn
*seq
= get_insns ();
3976 return expand_shift (RSHIFT_EXPR
, mode
, temp2
, logd
, NULL_RTX
, 0);
3981 if (BRANCH_COST (optimize_insn_for_speed_p (),
3984 int ushift
= GET_MODE_BITSIZE (mode
) - logd
;
3986 temp
= gen_reg_rtx (mode
);
3987 temp
= emit_store_flag (temp
, LT
, op0
, const0_rtx
, mode
, 0, -1);
3988 if (GET_MODE_BITSIZE (mode
) >= BITS_PER_WORD
3989 || shift_cost (optimize_insn_for_speed_p (), mode
, ushift
)
3990 > COSTS_N_INSNS (1))
3991 temp
= expand_binop (mode
, and_optab
, temp
, gen_int_mode (d
- 1, mode
),
3992 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
3994 temp
= expand_shift (RSHIFT_EXPR
, mode
, temp
,
3995 ushift
, NULL_RTX
, 1);
3996 temp
= expand_binop (mode
, add_optab
, temp
, op0
, NULL_RTX
,
3997 0, OPTAB_LIB_WIDEN
);
3998 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4001 label
= gen_label_rtx ();
4002 temp
= copy_to_mode_reg (mode
, op0
);
4003 do_cmp_and_jump (temp
, const0_rtx
, GE
, mode
, label
);
4004 expand_inc (temp
, gen_int_mode (d
- 1, mode
));
4006 return expand_shift (RSHIFT_EXPR
, mode
, temp
, logd
, NULL_RTX
, 0);
4009 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4010 if that is convenient, and returning where the result is.
4011 You may request either the quotient or the remainder as the result;
4012 specify REM_FLAG nonzero to get the remainder.
4014 CODE is the expression code for which kind of division this is;
4015 it controls how rounding is done. MODE is the machine mode to use.
4016 UNSIGNEDP nonzero means do unsigned division. */
4018 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4019 and then correct it by or'ing in missing high bits
4020 if result of ANDI is nonzero.
4021 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4022 This could optimize to a bfexts instruction.
4023 But C doesn't use these operations, so their optimizations are
4025 /* ??? For modulo, we don't actually need the highpart of the first product,
4026 the low part will do nicely. And for small divisors, the second multiply
4027 can also be a low-part only multiply or even be completely left out.
4028 E.g. to calculate the remainder of a division by 3 with a 32 bit
4029 multiply, multiply with 0x55555556 and extract the upper two bits;
4030 the result is exact for inputs up to 0x1fffffff.
4031 The input range can be reduced by using cross-sum rules.
4032 For odd divisors >= 3, the following table gives right shift counts
4033 so that if a number is shifted by an integer multiple of the given
4034 amount, the remainder stays the same:
4035 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4036 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4037 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4038 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4039 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4041 Cross-sum rules for even numbers can be derived by leaving as many bits
4042 to the right alone as the divisor has zeros to the right.
4043 E.g. if x is an unsigned 32 bit number:
4044 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4048 expand_divmod (int rem_flag
, enum tree_code code
, machine_mode mode
,
4049 rtx op0
, rtx op1
, rtx target
, int unsignedp
)
4051 machine_mode compute_mode
;
4053 rtx quotient
= 0, remainder
= 0;
4056 optab optab1
, optab2
;
4057 int op1_is_constant
, op1_is_pow2
= 0;
4058 int max_cost
, extra_cost
;
4059 static HOST_WIDE_INT last_div_const
= 0;
4060 bool speed
= optimize_insn_for_speed_p ();
4062 op1_is_constant
= CONST_INT_P (op1
);
4063 if (op1_is_constant
)
4065 wide_int ext_op1
= rtx_mode_t (op1
, mode
);
4066 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4068 && wi::popcount (wi::neg (ext_op1
)) == 1));
4072 This is the structure of expand_divmod:
4074 First comes code to fix up the operands so we can perform the operations
4075 correctly and efficiently.
4077 Second comes a switch statement with code specific for each rounding mode.
4078 For some special operands this code emits all RTL for the desired
4079 operation, for other cases, it generates only a quotient and stores it in
4080 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4081 to indicate that it has not done anything.
4083 Last comes code that finishes the operation. If QUOTIENT is set and
4084 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4085 QUOTIENT is not set, it is computed using trunc rounding.
4087 We try to generate special code for division and remainder when OP1 is a
4088 constant. If |OP1| = 2**n we can use shifts and some other fast
4089 operations. For other values of OP1, we compute a carefully selected
4090 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4093 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4094 half of the product. Different strategies for generating the product are
4095 implemented in expmed_mult_highpart.
4097 If what we actually want is the remainder, we generate that by another
4098 by-constant multiplication and a subtraction. */
4100 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4101 code below will malfunction if we are, so check here and handle
4102 the special case if so. */
4103 if (op1
== const1_rtx
)
4104 return rem_flag
? const0_rtx
: op0
;
4106 /* When dividing by -1, we could get an overflow.
4107 negv_optab can handle overflows. */
4108 if (! unsignedp
&& op1
== constm1_rtx
)
4112 return expand_unop (mode
, flag_trapv
&& GET_MODE_CLASS (mode
) == MODE_INT
4113 ? negv_optab
: neg_optab
, op0
, target
, 0);
4117 /* Don't use the function value register as a target
4118 since we have to read it as well as write it,
4119 and function-inlining gets confused by this. */
4120 && ((REG_P (target
) && REG_FUNCTION_VALUE_P (target
))
4121 /* Don't clobber an operand while doing a multi-step calculation. */
4122 || ((rem_flag
|| op1_is_constant
)
4123 && (reg_mentioned_p (target
, op0
)
4124 || (MEM_P (op0
) && MEM_P (target
))))
4125 || reg_mentioned_p (target
, op1
)
4126 || (MEM_P (op1
) && MEM_P (target
))))
4129 /* Get the mode in which to perform this computation. Normally it will
4130 be MODE, but sometimes we can't do the desired operation in MODE.
4131 If so, pick a wider mode in which we can do the operation. Convert
4132 to that mode at the start to avoid repeated conversions.
4134 First see what operations we need. These depend on the expression
4135 we are evaluating. (We assume that divxx3 insns exist under the
4136 same conditions that modxx3 insns and that these insns don't normally
4137 fail. If these assumptions are not correct, we may generate less
4138 efficient code in some cases.)
4140 Then see if we find a mode in which we can open-code that operation
4141 (either a division, modulus, or shift). Finally, check for the smallest
4142 mode for which we can do the operation with a library call. */
4144 /* We might want to refine this now that we have division-by-constant
4145 optimization. Since expmed_mult_highpart tries so many variants, it is
4146 not straightforward to generalize this. Maybe we should make an array
4147 of possible modes in init_expmed? Save this for GCC 2.7. */
4149 optab1
= (op1_is_pow2
4150 ? (unsignedp
? lshr_optab
: ashr_optab
)
4151 : (unsignedp
? udiv_optab
: sdiv_optab
));
4152 optab2
= (op1_is_pow2
? optab1
4153 : (unsignedp
? udivmod_optab
: sdivmod_optab
));
4155 FOR_EACH_MODE_FROM (compute_mode
, mode
)
4156 if (optab_handler (optab1
, compute_mode
) != CODE_FOR_nothing
4157 || optab_handler (optab2
, compute_mode
) != CODE_FOR_nothing
)
4160 if (compute_mode
== VOIDmode
)
4161 FOR_EACH_MODE_FROM (compute_mode
, mode
)
4162 if (optab_libfunc (optab1
, compute_mode
)
4163 || optab_libfunc (optab2
, compute_mode
))
4166 /* If we still couldn't find a mode, use MODE, but expand_binop will
4168 if (compute_mode
== VOIDmode
)
4169 compute_mode
= mode
;
4171 if (target
&& GET_MODE (target
) == compute_mode
)
4174 tquotient
= gen_reg_rtx (compute_mode
);
4177 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4178 (mode), and thereby get better code when OP1 is a constant. Do that
4179 later. It will require going over all usages of SIZE below. */
4180 size
= GET_MODE_BITSIZE (mode
);
4183 /* Only deduct something for a REM if the last divide done was
4184 for a different constant. Then set the constant of the last
4186 max_cost
= (unsignedp
4187 ? udiv_cost (speed
, compute_mode
)
4188 : sdiv_cost (speed
, compute_mode
));
4189 if (rem_flag
&& ! (last_div_const
!= 0 && op1_is_constant
4190 && INTVAL (op1
) == last_div_const
))
4191 max_cost
-= (mul_cost (speed
, compute_mode
)
4192 + add_cost (speed
, compute_mode
));
4194 last_div_const
= ! rem_flag
&& op1_is_constant
? INTVAL (op1
) : 0;
4196 /* Now convert to the best mode to use. */
4197 if (compute_mode
!= mode
)
4199 op0
= convert_modes (compute_mode
, mode
, op0
, unsignedp
);
4200 op1
= convert_modes (compute_mode
, mode
, op1
, unsignedp
);
4202 /* convert_modes may have placed op1 into a register, so we
4203 must recompute the following. */
4204 op1_is_constant
= CONST_INT_P (op1
);
4205 if (op1_is_constant
)
4207 wide_int ext_op1
= rtx_mode_t (op1
, compute_mode
);
4208 op1_is_pow2
= (wi::popcount (ext_op1
) == 1
4210 && wi::popcount (wi::neg (ext_op1
)) == 1));
4216 /* If one of the operands is a volatile MEM, copy it into a register. */
4218 if (MEM_P (op0
) && MEM_VOLATILE_P (op0
))
4219 op0
= force_reg (compute_mode
, op0
);
4220 if (MEM_P (op1
) && MEM_VOLATILE_P (op1
))
4221 op1
= force_reg (compute_mode
, op1
);
4223 /* If we need the remainder or if OP1 is constant, we need to
4224 put OP0 in a register in case it has any queued subexpressions. */
4225 if (rem_flag
|| op1_is_constant
)
4226 op0
= force_reg (compute_mode
, op0
);
4228 last
= get_last_insn ();
4230 /* Promote floor rounding to trunc rounding for unsigned operations. */
4233 if (code
== FLOOR_DIV_EXPR
)
4234 code
= TRUNC_DIV_EXPR
;
4235 if (code
== FLOOR_MOD_EXPR
)
4236 code
= TRUNC_MOD_EXPR
;
4237 if (code
== EXACT_DIV_EXPR
&& op1_is_pow2
)
4238 code
= TRUNC_DIV_EXPR
;
4241 if (op1
!= const0_rtx
)
4244 case TRUNC_MOD_EXPR
:
4245 case TRUNC_DIV_EXPR
:
4246 if (op1_is_constant
)
4248 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
4249 int size
= GET_MODE_BITSIZE (int_mode
);
4252 unsigned HOST_WIDE_INT mh
, ml
;
4253 int pre_shift
, post_shift
;
4255 wide_int wd
= rtx_mode_t (op1
, int_mode
);
4256 unsigned HOST_WIDE_INT d
= wd
.to_uhwi ();
4258 if (wi::popcount (wd
) == 1)
4260 pre_shift
= floor_log2 (d
);
4263 unsigned HOST_WIDE_INT mask
4264 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4266 = expand_binop (int_mode
, and_optab
, op0
,
4267 gen_int_mode (mask
, int_mode
),
4271 return gen_lowpart (mode
, remainder
);
4273 quotient
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
4274 pre_shift
, tquotient
, 1);
4276 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4278 if (d
>= (HOST_WIDE_INT_1U
<< (size
- 1)))
4280 /* Most significant bit of divisor is set; emit an scc
4282 quotient
= emit_store_flag_force (tquotient
, GEU
, op0
, op1
,
4287 /* Find a suitable multiplier and right shift count
4288 instead of multiplying with D. */
4290 mh
= choose_multiplier (d
, size
, size
,
4291 &ml
, &post_shift
, &dummy
);
4293 /* If the suggested multiplier is more than SIZE bits,
4294 we can do better for even divisors, using an
4295 initial right shift. */
4296 if (mh
!= 0 && (d
& 1) == 0)
4298 pre_shift
= ctz_or_zero (d
);
4299 mh
= choose_multiplier (d
>> pre_shift
, size
,
4301 &ml
, &post_shift
, &dummy
);
4311 if (post_shift
- 1 >= BITS_PER_WORD
)
4315 = (shift_cost (speed
, int_mode
, post_shift
- 1)
4316 + shift_cost (speed
, int_mode
, 1)
4317 + 2 * add_cost (speed
, int_mode
));
4318 t1
= expmed_mult_highpart
4319 (int_mode
, op0
, gen_int_mode (ml
, int_mode
),
4320 NULL_RTX
, 1, max_cost
- extra_cost
);
4323 t2
= force_operand (gen_rtx_MINUS (int_mode
,
4326 t3
= expand_shift (RSHIFT_EXPR
, int_mode
,
4327 t2
, 1, NULL_RTX
, 1);
4328 t4
= force_operand (gen_rtx_PLUS (int_mode
,
4331 quotient
= expand_shift
4332 (RSHIFT_EXPR
, int_mode
, t4
,
4333 post_shift
- 1, tquotient
, 1);
4339 if (pre_shift
>= BITS_PER_WORD
4340 || post_shift
>= BITS_PER_WORD
)
4344 (RSHIFT_EXPR
, int_mode
, op0
,
4345 pre_shift
, NULL_RTX
, 1);
4347 = (shift_cost (speed
, int_mode
, pre_shift
)
4348 + shift_cost (speed
, int_mode
, post_shift
));
4349 t2
= expmed_mult_highpart
4351 gen_int_mode (ml
, int_mode
),
4352 NULL_RTX
, 1, max_cost
- extra_cost
);
4355 quotient
= expand_shift
4356 (RSHIFT_EXPR
, int_mode
, t2
,
4357 post_shift
, tquotient
, 1);
4361 else /* Too wide mode to use tricky code */
4364 insn
= get_last_insn ();
4366 set_dst_reg_note (insn
, REG_EQUAL
,
4367 gen_rtx_UDIV (int_mode
, op0
, op1
),
4370 else /* TRUNC_DIV, signed */
4372 unsigned HOST_WIDE_INT ml
;
4373 int lgup
, post_shift
;
4375 HOST_WIDE_INT d
= INTVAL (op1
);
4376 unsigned HOST_WIDE_INT abs_d
;
4378 /* Since d might be INT_MIN, we have to cast to
4379 unsigned HOST_WIDE_INT before negating to avoid
4380 undefined signed overflow. */
4382 ? (unsigned HOST_WIDE_INT
) d
4383 : - (unsigned HOST_WIDE_INT
) d
);
4385 /* n rem d = n rem -d */
4386 if (rem_flag
&& d
< 0)
4389 op1
= gen_int_mode (abs_d
, int_mode
);
4395 quotient
= expand_unop (int_mode
, neg_optab
, op0
,
4397 else if (size
<= HOST_BITS_PER_WIDE_INT
4398 && abs_d
== HOST_WIDE_INT_1U
<< (size
- 1))
4400 /* This case is not handled correctly below. */
4401 quotient
= emit_store_flag (tquotient
, EQ
, op0
, op1
,
4406 else if (EXACT_POWER_OF_2_OR_ZERO_P (d
)
4407 && (size
<= HOST_BITS_PER_WIDE_INT
|| d
>= 0)
4409 ? smod_pow2_cheap (speed
, int_mode
)
4410 : sdiv_pow2_cheap (speed
, int_mode
))
4411 /* We assume that cheap metric is true if the
4412 optab has an expander for this mode. */
4413 && ((optab_handler ((rem_flag
? smod_optab
4416 != CODE_FOR_nothing
)
4417 || (optab_handler (sdivmod_optab
, int_mode
)
4418 != CODE_FOR_nothing
)))
4420 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d
)
4421 && (size
<= HOST_BITS_PER_WIDE_INT
4422 || abs_d
!= (unsigned HOST_WIDE_INT
) d
))
4426 remainder
= expand_smod_pow2 (int_mode
, op0
, d
);
4428 return gen_lowpart (mode
, remainder
);
4431 if (sdiv_pow2_cheap (speed
, int_mode
)
4432 && ((optab_handler (sdiv_optab
, int_mode
)
4433 != CODE_FOR_nothing
)
4434 || (optab_handler (sdivmod_optab
, int_mode
)
4435 != CODE_FOR_nothing
)))
4436 quotient
= expand_divmod (0, TRUNC_DIV_EXPR
,
4438 gen_int_mode (abs_d
,
4442 quotient
= expand_sdiv_pow2 (int_mode
, op0
, abs_d
);
4444 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4445 negate the quotient. */
4448 insn
= get_last_insn ();
4450 && abs_d
< (HOST_WIDE_INT_1U
4451 << (HOST_BITS_PER_WIDE_INT
- 1)))
4452 set_dst_reg_note (insn
, REG_EQUAL
,
4453 gen_rtx_DIV (int_mode
, op0
,
4459 quotient
= expand_unop (int_mode
, neg_optab
,
4460 quotient
, quotient
, 0);
4463 else if (size
<= HOST_BITS_PER_WIDE_INT
)
4465 choose_multiplier (abs_d
, size
, size
- 1,
4466 &ml
, &post_shift
, &lgup
);
4467 if (ml
< HOST_WIDE_INT_1U
<< (size
- 1))
4471 if (post_shift
>= BITS_PER_WORD
4472 || size
- 1 >= BITS_PER_WORD
)
4475 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4476 + shift_cost (speed
, int_mode
, size
- 1)
4477 + add_cost (speed
, int_mode
));
4478 t1
= expmed_mult_highpart
4479 (int_mode
, op0
, gen_int_mode (ml
, int_mode
),
4480 NULL_RTX
, 0, max_cost
- extra_cost
);
4484 (RSHIFT_EXPR
, int_mode
, t1
,
4485 post_shift
, NULL_RTX
, 0);
4487 (RSHIFT_EXPR
, int_mode
, op0
,
4488 size
- 1, NULL_RTX
, 0);
4491 = force_operand (gen_rtx_MINUS (int_mode
, t3
, t2
),
4495 = force_operand (gen_rtx_MINUS (int_mode
, t2
, t3
),
4502 if (post_shift
>= BITS_PER_WORD
4503 || size
- 1 >= BITS_PER_WORD
)
4506 ml
|= HOST_WIDE_INT_M1U
<< (size
- 1);
4507 mlr
= gen_int_mode (ml
, int_mode
);
4508 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4509 + shift_cost (speed
, int_mode
, size
- 1)
4510 + 2 * add_cost (speed
, int_mode
));
4511 t1
= expmed_mult_highpart (int_mode
, op0
, mlr
,
4513 max_cost
- extra_cost
);
4516 t2
= force_operand (gen_rtx_PLUS (int_mode
, t1
, op0
),
4519 (RSHIFT_EXPR
, int_mode
, t2
,
4520 post_shift
, NULL_RTX
, 0);
4522 (RSHIFT_EXPR
, int_mode
, op0
,
4523 size
- 1, NULL_RTX
, 0);
4526 = force_operand (gen_rtx_MINUS (int_mode
, t4
, t3
),
4530 = force_operand (gen_rtx_MINUS (int_mode
, t3
, t4
),
4534 else /* Too wide mode to use tricky code */
4537 insn
= get_last_insn ();
4539 set_dst_reg_note (insn
, REG_EQUAL
,
4540 gen_rtx_DIV (int_mode
, op0
, op1
),
4546 delete_insns_since (last
);
4549 case FLOOR_DIV_EXPR
:
4550 case FLOOR_MOD_EXPR
:
4551 /* We will come here only for signed operations. */
4552 if (op1_is_constant
&& HWI_COMPUTABLE_MODE_P (compute_mode
))
4554 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
4555 int size
= GET_MODE_BITSIZE (int_mode
);
4556 unsigned HOST_WIDE_INT mh
, ml
;
4557 int pre_shift
, lgup
, post_shift
;
4558 HOST_WIDE_INT d
= INTVAL (op1
);
4562 /* We could just as easily deal with negative constants here,
4563 but it does not seem worth the trouble for GCC 2.6. */
4564 if (EXACT_POWER_OF_2_OR_ZERO_P (d
))
4566 pre_shift
= floor_log2 (d
);
4569 unsigned HOST_WIDE_INT mask
4570 = (HOST_WIDE_INT_1U
<< pre_shift
) - 1;
4571 remainder
= expand_binop
4572 (int_mode
, and_optab
, op0
,
4573 gen_int_mode (mask
, int_mode
),
4574 remainder
, 0, OPTAB_LIB_WIDEN
);
4576 return gen_lowpart (mode
, remainder
);
4578 quotient
= expand_shift
4579 (RSHIFT_EXPR
, int_mode
, op0
,
4580 pre_shift
, tquotient
, 0);
4586 mh
= choose_multiplier (d
, size
, size
- 1,
4587 &ml
, &post_shift
, &lgup
);
4590 if (post_shift
< BITS_PER_WORD
4591 && size
- 1 < BITS_PER_WORD
)
4594 (RSHIFT_EXPR
, int_mode
, op0
,
4595 size
- 1, NULL_RTX
, 0);
4596 t2
= expand_binop (int_mode
, xor_optab
, op0
, t1
,
4597 NULL_RTX
, 0, OPTAB_WIDEN
);
4598 extra_cost
= (shift_cost (speed
, int_mode
, post_shift
)
4599 + shift_cost (speed
, int_mode
, size
- 1)
4600 + 2 * add_cost (speed
, int_mode
));
4601 t3
= expmed_mult_highpart
4602 (int_mode
, t2
, gen_int_mode (ml
, int_mode
),
4603 NULL_RTX
, 1, max_cost
- extra_cost
);
4607 (RSHIFT_EXPR
, int_mode
, t3
,
4608 post_shift
, NULL_RTX
, 1);
4609 quotient
= expand_binop (int_mode
, xor_optab
,
4610 t4
, t1
, tquotient
, 0,
4618 rtx nsign
, t1
, t2
, t3
, t4
;
4619 t1
= force_operand (gen_rtx_PLUS (int_mode
,
4620 op0
, constm1_rtx
), NULL_RTX
);
4621 t2
= expand_binop (int_mode
, ior_optab
, op0
, t1
, NULL_RTX
,
4623 nsign
= expand_shift (RSHIFT_EXPR
, int_mode
, t2
,
4624 size
- 1, NULL_RTX
, 0);
4625 t3
= force_operand (gen_rtx_MINUS (int_mode
, t1
, nsign
),
4627 t4
= expand_divmod (0, TRUNC_DIV_EXPR
, int_mode
, t3
, op1
,
4632 t5
= expand_unop (int_mode
, one_cmpl_optab
, nsign
,
4634 quotient
= force_operand (gen_rtx_PLUS (int_mode
, t4
, t5
),
4642 delete_insns_since (last
);
4644 /* Try using an instruction that produces both the quotient and
4645 remainder, using truncation. We can easily compensate the quotient
4646 or remainder to get floor rounding, once we have the remainder.
4647 Notice that we compute also the final remainder value here,
4648 and return the result right away. */
4649 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4650 target
= gen_reg_rtx (compute_mode
);
4655 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4656 quotient
= gen_reg_rtx (compute_mode
);
4661 = REG_P (target
) ? target
: gen_reg_rtx (compute_mode
);
4662 remainder
= gen_reg_rtx (compute_mode
);
4665 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
,
4666 quotient
, remainder
, 0))
4668 /* This could be computed with a branch-less sequence.
4669 Save that for later. */
4671 rtx_code_label
*label
= gen_label_rtx ();
4672 do_cmp_and_jump (remainder
, const0_rtx
, EQ
, compute_mode
, label
);
4673 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4674 NULL_RTX
, 0, OPTAB_WIDEN
);
4675 do_cmp_and_jump (tem
, const0_rtx
, GE
, compute_mode
, label
);
4676 expand_dec (quotient
, const1_rtx
);
4677 expand_inc (remainder
, op1
);
4679 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4682 /* No luck with division elimination or divmod. Have to do it
4683 by conditionally adjusting op0 *and* the result. */
4685 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4689 quotient
= gen_reg_rtx (compute_mode
);
4690 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4691 label1
= gen_label_rtx ();
4692 label2
= gen_label_rtx ();
4693 label3
= gen_label_rtx ();
4694 label4
= gen_label_rtx ();
4695 label5
= gen_label_rtx ();
4696 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4697 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
, compute_mode
, label1
);
4698 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4699 quotient
, 0, OPTAB_LIB_WIDEN
);
4700 if (tem
!= quotient
)
4701 emit_move_insn (quotient
, tem
);
4702 emit_jump_insn (targetm
.gen_jump (label5
));
4704 emit_label (label1
);
4705 expand_inc (adjusted_op0
, const1_rtx
);
4706 emit_jump_insn (targetm
.gen_jump (label4
));
4708 emit_label (label2
);
4709 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
, compute_mode
, label3
);
4710 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4711 quotient
, 0, OPTAB_LIB_WIDEN
);
4712 if (tem
!= quotient
)
4713 emit_move_insn (quotient
, tem
);
4714 emit_jump_insn (targetm
.gen_jump (label5
));
4716 emit_label (label3
);
4717 expand_dec (adjusted_op0
, const1_rtx
);
4718 emit_label (label4
);
4719 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4720 quotient
, 0, OPTAB_LIB_WIDEN
);
4721 if (tem
!= quotient
)
4722 emit_move_insn (quotient
, tem
);
4723 expand_dec (quotient
, const1_rtx
);
4724 emit_label (label5
);
4733 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4734 && (HWI_COMPUTABLE_MODE_P (compute_mode
)
4735 || INTVAL (op1
) >= 0))
4737 scalar_int_mode int_mode
4738 = as_a
<scalar_int_mode
> (compute_mode
);
4740 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4741 t1
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
4742 floor_log2 (d
), tquotient
, 1);
4743 t2
= expand_binop (int_mode
, and_optab
, op0
,
4744 gen_int_mode (d
- 1, int_mode
),
4745 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4746 t3
= gen_reg_rtx (int_mode
);
4747 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
, int_mode
, 1, 1);
4750 rtx_code_label
*lab
;
4751 lab
= gen_label_rtx ();
4752 do_cmp_and_jump (t2
, const0_rtx
, EQ
, int_mode
, lab
);
4753 expand_inc (t1
, const1_rtx
);
4758 quotient
= force_operand (gen_rtx_PLUS (int_mode
, t1
, t3
),
4763 /* Try using an instruction that produces both the quotient and
4764 remainder, using truncation. We can easily compensate the
4765 quotient or remainder to get ceiling rounding, once we have the
4766 remainder. Notice that we compute also the final remainder
4767 value here, and return the result right away. */
4768 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4769 target
= gen_reg_rtx (compute_mode
);
4773 remainder
= (REG_P (target
)
4774 ? target
: gen_reg_rtx (compute_mode
));
4775 quotient
= gen_reg_rtx (compute_mode
);
4779 quotient
= (REG_P (target
)
4780 ? target
: gen_reg_rtx (compute_mode
));
4781 remainder
= gen_reg_rtx (compute_mode
);
4784 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
,
4787 /* This could be computed with a branch-less sequence.
4788 Save that for later. */
4789 rtx_code_label
*label
= gen_label_rtx ();
4790 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4791 compute_mode
, label
);
4792 expand_inc (quotient
, const1_rtx
);
4793 expand_dec (remainder
, op1
);
4795 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4798 /* No luck with division elimination or divmod. Have to do it
4799 by conditionally adjusting op0 *and* the result. */
4801 rtx_code_label
*label1
, *label2
;
4802 rtx adjusted_op0
, tem
;
4804 quotient
= gen_reg_rtx (compute_mode
);
4805 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4806 label1
= gen_label_rtx ();
4807 label2
= gen_label_rtx ();
4808 do_cmp_and_jump (adjusted_op0
, const0_rtx
, NE
,
4809 compute_mode
, label1
);
4810 emit_move_insn (quotient
, const0_rtx
);
4811 emit_jump_insn (targetm
.gen_jump (label2
));
4813 emit_label (label1
);
4814 expand_dec (adjusted_op0
, const1_rtx
);
4815 tem
= expand_binop (compute_mode
, udiv_optab
, adjusted_op0
, op1
,
4816 quotient
, 1, OPTAB_LIB_WIDEN
);
4817 if (tem
!= quotient
)
4818 emit_move_insn (quotient
, tem
);
4819 expand_inc (quotient
, const1_rtx
);
4820 emit_label (label2
);
4825 if (op1_is_constant
&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1
))
4826 && INTVAL (op1
) >= 0)
4828 /* This is extremely similar to the code for the unsigned case
4829 above. For 2.7 we should merge these variants, but for
4830 2.6.1 I don't want to touch the code for unsigned since that
4831 get used in C. The signed case will only be used by other
4835 unsigned HOST_WIDE_INT d
= INTVAL (op1
);
4836 t1
= expand_shift (RSHIFT_EXPR
, compute_mode
, op0
,
4837 floor_log2 (d
), tquotient
, 0);
4838 t2
= expand_binop (compute_mode
, and_optab
, op0
,
4839 gen_int_mode (d
- 1, compute_mode
),
4840 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
4841 t3
= gen_reg_rtx (compute_mode
);
4842 t3
= emit_store_flag (t3
, NE
, t2
, const0_rtx
,
4843 compute_mode
, 1, 1);
4846 rtx_code_label
*lab
;
4847 lab
= gen_label_rtx ();
4848 do_cmp_and_jump (t2
, const0_rtx
, EQ
, compute_mode
, lab
);
4849 expand_inc (t1
, const1_rtx
);
4854 quotient
= force_operand (gen_rtx_PLUS (compute_mode
,
4860 /* Try using an instruction that produces both the quotient and
4861 remainder, using truncation. We can easily compensate the
4862 quotient or remainder to get ceiling rounding, once we have the
4863 remainder. Notice that we compute also the final remainder
4864 value here, and return the result right away. */
4865 if (target
== 0 || GET_MODE (target
) != compute_mode
)
4866 target
= gen_reg_rtx (compute_mode
);
4869 remainder
= (REG_P (target
)
4870 ? target
: gen_reg_rtx (compute_mode
));
4871 quotient
= gen_reg_rtx (compute_mode
);
4875 quotient
= (REG_P (target
)
4876 ? target
: gen_reg_rtx (compute_mode
));
4877 remainder
= gen_reg_rtx (compute_mode
);
4880 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
,
4883 /* This could be computed with a branch-less sequence.
4884 Save that for later. */
4886 rtx_code_label
*label
= gen_label_rtx ();
4887 do_cmp_and_jump (remainder
, const0_rtx
, EQ
,
4888 compute_mode
, label
);
4889 tem
= expand_binop (compute_mode
, xor_optab
, op0
, op1
,
4890 NULL_RTX
, 0, OPTAB_WIDEN
);
4891 do_cmp_and_jump (tem
, const0_rtx
, LT
, compute_mode
, label
);
4892 expand_inc (quotient
, const1_rtx
);
4893 expand_dec (remainder
, op1
);
4895 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
4898 /* No luck with division elimination or divmod. Have to do it
4899 by conditionally adjusting op0 *and* the result. */
4901 rtx_code_label
*label1
, *label2
, *label3
, *label4
, *label5
;
4905 quotient
= gen_reg_rtx (compute_mode
);
4906 adjusted_op0
= copy_to_mode_reg (compute_mode
, op0
);
4907 label1
= gen_label_rtx ();
4908 label2
= gen_label_rtx ();
4909 label3
= gen_label_rtx ();
4910 label4
= gen_label_rtx ();
4911 label5
= gen_label_rtx ();
4912 do_cmp_and_jump (op1
, const0_rtx
, LT
, compute_mode
, label2
);
4913 do_cmp_and_jump (adjusted_op0
, const0_rtx
, GT
,
4914 compute_mode
, label1
);
4915 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4916 quotient
, 0, OPTAB_LIB_WIDEN
);
4917 if (tem
!= quotient
)
4918 emit_move_insn (quotient
, tem
);
4919 emit_jump_insn (targetm
.gen_jump (label5
));
4921 emit_label (label1
);
4922 expand_dec (adjusted_op0
, const1_rtx
);
4923 emit_jump_insn (targetm
.gen_jump (label4
));
4925 emit_label (label2
);
4926 do_cmp_and_jump (adjusted_op0
, const0_rtx
, LT
,
4927 compute_mode
, label3
);
4928 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4929 quotient
, 0, OPTAB_LIB_WIDEN
);
4930 if (tem
!= quotient
)
4931 emit_move_insn (quotient
, tem
);
4932 emit_jump_insn (targetm
.gen_jump (label5
));
4934 emit_label (label3
);
4935 expand_inc (adjusted_op0
, const1_rtx
);
4936 emit_label (label4
);
4937 tem
= expand_binop (compute_mode
, sdiv_optab
, adjusted_op0
, op1
,
4938 quotient
, 0, OPTAB_LIB_WIDEN
);
4939 if (tem
!= quotient
)
4940 emit_move_insn (quotient
, tem
);
4941 expand_inc (quotient
, const1_rtx
);
4942 emit_label (label5
);
4947 case EXACT_DIV_EXPR
:
4948 if (op1_is_constant
&& HWI_COMPUTABLE_MODE_P (compute_mode
))
4950 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
4951 int size
= GET_MODE_BITSIZE (int_mode
);
4952 HOST_WIDE_INT d
= INTVAL (op1
);
4953 unsigned HOST_WIDE_INT ml
;
4957 pre_shift
= ctz_or_zero (d
);
4958 ml
= invert_mod2n (d
>> pre_shift
, size
);
4959 t1
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
4960 pre_shift
, NULL_RTX
, unsignedp
);
4961 quotient
= expand_mult (int_mode
, t1
, gen_int_mode (ml
, int_mode
),
4964 insn
= get_last_insn ();
4965 set_dst_reg_note (insn
, REG_EQUAL
,
4966 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
4967 int_mode
, op0
, op1
),
4972 case ROUND_DIV_EXPR
:
4973 case ROUND_MOD_EXPR
:
4976 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
4978 rtx_code_label
*label
;
4979 label
= gen_label_rtx ();
4980 quotient
= gen_reg_rtx (int_mode
);
4981 remainder
= gen_reg_rtx (int_mode
);
4982 if (expand_twoval_binop (udivmod_optab
, op0
, op1
, quotient
, remainder
, 1) == 0)
4985 quotient
= expand_binop (int_mode
, udiv_optab
, op0
, op1
,
4986 quotient
, 1, OPTAB_LIB_WIDEN
);
4987 tem
= expand_mult (int_mode
, quotient
, op1
, NULL_RTX
, 1);
4988 remainder
= expand_binop (int_mode
, sub_optab
, op0
, tem
,
4989 remainder
, 1, OPTAB_LIB_WIDEN
);
4991 tem
= plus_constant (int_mode
, op1
, -1);
4992 tem
= expand_shift (RSHIFT_EXPR
, int_mode
, tem
, 1, NULL_RTX
, 1);
4993 do_cmp_and_jump (remainder
, tem
, LEU
, int_mode
, label
);
4994 expand_inc (quotient
, const1_rtx
);
4995 expand_dec (remainder
, op1
);
5000 scalar_int_mode int_mode
= as_a
<scalar_int_mode
> (compute_mode
);
5001 int size
= GET_MODE_BITSIZE (int_mode
);
5002 rtx abs_rem
, abs_op1
, tem
, mask
;
5003 rtx_code_label
*label
;
5004 label
= gen_label_rtx ();
5005 quotient
= gen_reg_rtx (int_mode
);
5006 remainder
= gen_reg_rtx (int_mode
);
5007 if (expand_twoval_binop (sdivmod_optab
, op0
, op1
, quotient
, remainder
, 0) == 0)
5010 quotient
= expand_binop (int_mode
, sdiv_optab
, op0
, op1
,
5011 quotient
, 0, OPTAB_LIB_WIDEN
);
5012 tem
= expand_mult (int_mode
, quotient
, op1
, NULL_RTX
, 0);
5013 remainder
= expand_binop (int_mode
, sub_optab
, op0
, tem
,
5014 remainder
, 0, OPTAB_LIB_WIDEN
);
5016 abs_rem
= expand_abs (int_mode
, remainder
, NULL_RTX
, 1, 0);
5017 abs_op1
= expand_abs (int_mode
, op1
, NULL_RTX
, 1, 0);
5018 tem
= expand_shift (LSHIFT_EXPR
, int_mode
, abs_rem
,
5020 do_cmp_and_jump (tem
, abs_op1
, LTU
, int_mode
, label
);
5021 tem
= expand_binop (int_mode
, xor_optab
, op0
, op1
,
5022 NULL_RTX
, 0, OPTAB_WIDEN
);
5023 mask
= expand_shift (RSHIFT_EXPR
, int_mode
, tem
,
5024 size
- 1, NULL_RTX
, 0);
5025 tem
= expand_binop (int_mode
, xor_optab
, mask
, const1_rtx
,
5026 NULL_RTX
, 0, OPTAB_WIDEN
);
5027 tem
= expand_binop (int_mode
, sub_optab
, tem
, mask
,
5028 NULL_RTX
, 0, OPTAB_WIDEN
);
5029 expand_inc (quotient
, tem
);
5030 tem
= expand_binop (int_mode
, xor_optab
, mask
, op1
,
5031 NULL_RTX
, 0, OPTAB_WIDEN
);
5032 tem
= expand_binop (int_mode
, sub_optab
, tem
, mask
,
5033 NULL_RTX
, 0, OPTAB_WIDEN
);
5034 expand_dec (remainder
, tem
);
5037 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5045 if (target
&& GET_MODE (target
) != compute_mode
)
5050 /* Try to produce the remainder without producing the quotient.
5051 If we seem to have a divmod pattern that does not require widening,
5052 don't try widening here. We should really have a WIDEN argument
5053 to expand_twoval_binop, since what we'd really like to do here is
5054 1) try a mod insn in compute_mode
5055 2) try a divmod insn in compute_mode
5056 3) try a div insn in compute_mode and multiply-subtract to get
5058 4) try the same things with widening allowed. */
5060 = sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5063 ((optab_handler (optab2
, compute_mode
)
5064 != CODE_FOR_nothing
)
5065 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5068 /* No luck there. Can we do remainder and divide at once
5069 without a library call? */
5070 remainder
= gen_reg_rtx (compute_mode
);
5071 if (! expand_twoval_binop ((unsignedp
5075 NULL_RTX
, remainder
, unsignedp
))
5080 return gen_lowpart (mode
, remainder
);
5083 /* Produce the quotient. Try a quotient insn, but not a library call.
5084 If we have a divmod in this mode, use it in preference to widening
5085 the div (for this test we assume it will not fail). Note that optab2
5086 is set to the one of the two optabs that the call below will use. */
5088 = sign_expand_binop (compute_mode
, udiv_optab
, sdiv_optab
,
5089 op0
, op1
, rem_flag
? NULL_RTX
: target
,
5091 ((optab_handler (optab2
, compute_mode
)
5092 != CODE_FOR_nothing
)
5093 ? OPTAB_DIRECT
: OPTAB_WIDEN
));
5097 /* No luck there. Try a quotient-and-remainder insn,
5098 keeping the quotient alone. */
5099 quotient
= gen_reg_rtx (compute_mode
);
5100 if (! expand_twoval_binop (unsignedp
? udivmod_optab
: sdivmod_optab
,
5102 quotient
, NULL_RTX
, unsignedp
))
5106 /* Still no luck. If we are not computing the remainder,
5107 use a library call for the quotient. */
5108 quotient
= sign_expand_binop (compute_mode
,
5109 udiv_optab
, sdiv_optab
,
5111 unsignedp
, OPTAB_LIB_WIDEN
);
5118 if (target
&& GET_MODE (target
) != compute_mode
)
5123 /* No divide instruction either. Use library for remainder. */
5124 remainder
= sign_expand_binop (compute_mode
, umod_optab
, smod_optab
,
5126 unsignedp
, OPTAB_LIB_WIDEN
);
5127 /* No remainder function. Try a quotient-and-remainder
5128 function, keeping the remainder. */
5131 remainder
= gen_reg_rtx (compute_mode
);
5132 if (!expand_twoval_binop_libfunc
5133 (unsignedp
? udivmod_optab
: sdivmod_optab
,
5135 NULL_RTX
, remainder
,
5136 unsignedp
? UMOD
: MOD
))
5137 remainder
= NULL_RTX
;
5142 /* We divided. Now finish doing X - Y * (X / Y). */
5143 remainder
= expand_mult (compute_mode
, quotient
, op1
,
5144 NULL_RTX
, unsignedp
);
5145 remainder
= expand_binop (compute_mode
, sub_optab
, op0
,
5146 remainder
, target
, unsignedp
,
5151 return gen_lowpart (mode
, rem_flag
? remainder
: quotient
);
5154 /* Return a tree node with data type TYPE, describing the value of X.
5155 Usually this is an VAR_DECL, if there is no obvious better choice.
5156 X may be an expression, however we only support those expressions
5157 generated by loop.c. */
5160 make_tree (tree type
, rtx x
)
5164 switch (GET_CODE (x
))
5167 case CONST_WIDE_INT
:
5168 t
= wide_int_to_tree (type
, rtx_mode_t (x
, TYPE_MODE (type
)));
5172 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT
* 2 <= MAX_BITSIZE_MODE_ANY_INT
);
5173 if (TARGET_SUPPORTS_WIDE_INT
== 0 && GET_MODE (x
) == VOIDmode
)
5174 t
= wide_int_to_tree (type
,
5175 wide_int::from_array (&CONST_DOUBLE_LOW (x
), 2,
5176 HOST_BITS_PER_WIDE_INT
* 2));
5178 t
= build_real (type
, *CONST_DOUBLE_REAL_VALUE (x
));
5184 int units
= CONST_VECTOR_NUNITS (x
);
5185 tree itype
= TREE_TYPE (type
);
5189 /* Build a tree with vector elements. */
5190 elts
= XALLOCAVEC (tree
, units
);
5191 for (i
= units
- 1; i
>= 0; --i
)
5193 rtx elt
= CONST_VECTOR_ELT (x
, i
);
5194 elts
[i
] = make_tree (itype
, elt
);
5197 return build_vector (type
, elts
);
5201 return fold_build2 (PLUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5202 make_tree (type
, XEXP (x
, 1)));
5205 return fold_build2 (MINUS_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5206 make_tree (type
, XEXP (x
, 1)));
5209 return fold_build1 (NEGATE_EXPR
, type
, make_tree (type
, XEXP (x
, 0)));
5212 return fold_build2 (MULT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5213 make_tree (type
, XEXP (x
, 1)));
5216 return fold_build2 (LSHIFT_EXPR
, type
, make_tree (type
, XEXP (x
, 0)),
5217 make_tree (type
, XEXP (x
, 1)));
5220 t
= unsigned_type_for (type
);
5221 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5222 make_tree (t
, XEXP (x
, 0)),
5223 make_tree (type
, XEXP (x
, 1))));
5226 t
= signed_type_for (type
);
5227 return fold_convert (type
, build2 (RSHIFT_EXPR
, t
,
5228 make_tree (t
, XEXP (x
, 0)),
5229 make_tree (type
, XEXP (x
, 1))));
5232 if (TREE_CODE (type
) != REAL_TYPE
)
5233 t
= signed_type_for (type
);
5237 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5238 make_tree (t
, XEXP (x
, 0)),
5239 make_tree (t
, XEXP (x
, 1))));
5241 t
= unsigned_type_for (type
);
5242 return fold_convert (type
, build2 (TRUNC_DIV_EXPR
, t
,
5243 make_tree (t
, XEXP (x
, 0)),
5244 make_tree (t
, XEXP (x
, 1))));
5248 t
= lang_hooks
.types
.type_for_mode (GET_MODE (XEXP (x
, 0)),
5249 GET_CODE (x
) == ZERO_EXTEND
);
5250 return fold_convert (type
, make_tree (t
, XEXP (x
, 0)));
5253 return make_tree (type
, XEXP (x
, 0));
5256 t
= SYMBOL_REF_DECL (x
);
5258 return fold_convert (type
, build_fold_addr_expr (t
));
5262 t
= build_decl (RTL_LOCATION (x
), VAR_DECL
, NULL_TREE
, type
);
5264 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5265 address mode to pointer mode. */
5266 if (POINTER_TYPE_P (type
))
5267 x
= convert_memory_address_addr_space
5268 (SCALAR_INT_TYPE_MODE (type
), x
, TYPE_ADDR_SPACE (TREE_TYPE (type
)));
5270 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5271 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5272 t
->decl_with_rtl
.rtl
= x
;
5278 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5279 and returning TARGET.
5281 If TARGET is 0, a pseudo-register or constant is returned. */
5284 expand_and (machine_mode mode
, rtx op0
, rtx op1
, rtx target
)
5288 if (GET_MODE (op0
) == VOIDmode
&& GET_MODE (op1
) == VOIDmode
)
5289 tem
= simplify_binary_operation (AND
, mode
, op0
, op1
);
5291 tem
= expand_binop (mode
, and_optab
, op0
, op1
, target
, 0, OPTAB_LIB_WIDEN
);
5295 else if (tem
!= target
)
5296 emit_move_insn (target
, tem
);
5300 /* Helper function for emit_store_flag. */
5302 emit_cstore (rtx target
, enum insn_code icode
, enum rtx_code code
,
5303 machine_mode mode
, machine_mode compare_mode
,
5304 int unsignedp
, rtx x
, rtx y
, int normalizep
,
5305 machine_mode target_mode
)
5307 struct expand_operand ops
[4];
5308 rtx op0
, comparison
, subtarget
;
5310 scalar_int_mode result_mode
= targetm
.cstore_mode (icode
);
5311 scalar_int_mode int_target_mode
;
5313 last
= get_last_insn ();
5314 x
= prepare_operand (icode
, x
, 2, mode
, compare_mode
, unsignedp
);
5315 y
= prepare_operand (icode
, y
, 3, mode
, compare_mode
, unsignedp
);
5318 delete_insns_since (last
);
5322 if (target_mode
== VOIDmode
)
5323 int_target_mode
= result_mode
;
5325 int_target_mode
= as_a
<scalar_int_mode
> (target_mode
);
5327 target
= gen_reg_rtx (int_target_mode
);
5329 comparison
= gen_rtx_fmt_ee (code
, result_mode
, x
, y
);
5331 create_output_operand (&ops
[0], optimize
? NULL_RTX
: target
, result_mode
);
5332 create_fixed_operand (&ops
[1], comparison
);
5333 create_fixed_operand (&ops
[2], x
);
5334 create_fixed_operand (&ops
[3], y
);
5335 if (!maybe_expand_insn (icode
, 4, ops
))
5337 delete_insns_since (last
);
5340 subtarget
= ops
[0].value
;
5342 /* If we are converting to a wider mode, first convert to
5343 INT_TARGET_MODE, then normalize. This produces better combining
5344 opportunities on machines that have a SIGN_EXTRACT when we are
5345 testing a single bit. This mostly benefits the 68k.
5347 If STORE_FLAG_VALUE does not have the sign bit set when
5348 interpreted in MODE, we can do this conversion as unsigned, which
5349 is usually more efficient. */
5350 if (GET_MODE_SIZE (int_target_mode
) > GET_MODE_SIZE (result_mode
))
5352 convert_move (target
, subtarget
,
5353 val_signbit_known_clear_p (result_mode
,
5356 result_mode
= int_target_mode
;
5361 /* If we want to keep subexpressions around, don't reuse our last
5366 /* Now normalize to the proper value in MODE. Sometimes we don't
5367 have to do anything. */
5368 if (normalizep
== 0 || normalizep
== STORE_FLAG_VALUE
)
5370 /* STORE_FLAG_VALUE might be the most negative number, so write
5371 the comparison this way to avoid a compiler-time warning. */
5372 else if (- normalizep
== STORE_FLAG_VALUE
)
5373 op0
= expand_unop (result_mode
, neg_optab
, op0
, subtarget
, 0);
5375 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5376 it hard to use a value of just the sign bit due to ANSI integer
5377 constant typing rules. */
5378 else if (val_signbit_known_set_p (result_mode
, STORE_FLAG_VALUE
))
5379 op0
= expand_shift (RSHIFT_EXPR
, result_mode
, op0
,
5380 GET_MODE_BITSIZE (result_mode
) - 1, subtarget
,
5384 gcc_assert (STORE_FLAG_VALUE
& 1);
5386 op0
= expand_and (result_mode
, op0
, const1_rtx
, subtarget
);
5387 if (normalizep
== -1)
5388 op0
= expand_unop (result_mode
, neg_optab
, op0
, op0
, 0);
5391 /* If we were converting to a smaller mode, do the conversion now. */
5392 if (int_target_mode
!= result_mode
)
5394 convert_move (target
, op0
, 0);
5402 /* A subroutine of emit_store_flag only including "tricks" that do not
5403 need a recursive call. These are kept separate to avoid infinite
5407 emit_store_flag_1 (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5408 machine_mode mode
, int unsignedp
, int normalizep
,
5409 machine_mode target_mode
)
5412 enum insn_code icode
;
5413 machine_mode compare_mode
;
5414 enum mode_class mclass
;
5415 enum rtx_code scode
;
5418 code
= unsigned_condition (code
);
5419 scode
= swap_condition (code
);
5421 /* If one operand is constant, make it the second one. Only do this
5422 if the other operand is not constant as well. */
5424 if (swap_commutative_operands_p (op0
, op1
))
5426 std::swap (op0
, op1
);
5427 code
= swap_condition (code
);
5430 if (mode
== VOIDmode
)
5431 mode
= GET_MODE (op0
);
5433 /* For some comparisons with 1 and -1, we can convert this to
5434 comparisons with zero. This will often produce more opportunities for
5435 store-flag insns. */
5440 if (op1
== const1_rtx
)
5441 op1
= const0_rtx
, code
= LE
;
5444 if (op1
== constm1_rtx
)
5445 op1
= const0_rtx
, code
= LT
;
5448 if (op1
== const1_rtx
)
5449 op1
= const0_rtx
, code
= GT
;
5452 if (op1
== constm1_rtx
)
5453 op1
= const0_rtx
, code
= GE
;
5456 if (op1
== const1_rtx
)
5457 op1
= const0_rtx
, code
= NE
;
5460 if (op1
== const1_rtx
)
5461 op1
= const0_rtx
, code
= EQ
;
5467 /* If we are comparing a double-word integer with zero or -1, we can
5468 convert the comparison into one involving a single word. */
5469 scalar_int_mode int_mode
;
5470 if (is_int_mode (mode
, &int_mode
)
5471 && GET_MODE_BITSIZE (int_mode
) == BITS_PER_WORD
* 2
5472 && (!MEM_P (op0
) || ! MEM_VOLATILE_P (op0
)))
5475 if ((code
== EQ
|| code
== NE
)
5476 && (op1
== const0_rtx
|| op1
== constm1_rtx
))
5480 /* Do a logical OR or AND of the two words and compare the
5482 op00
= simplify_gen_subreg (word_mode
, op0
, int_mode
, 0);
5483 op01
= simplify_gen_subreg (word_mode
, op0
, int_mode
, UNITS_PER_WORD
);
5484 tem
= expand_binop (word_mode
,
5485 op1
== const0_rtx
? ior_optab
: and_optab
,
5486 op00
, op01
, NULL_RTX
, unsignedp
,
5490 tem
= emit_store_flag (NULL_RTX
, code
, tem
, op1
, word_mode
,
5491 unsignedp
, normalizep
);
5493 else if ((code
== LT
|| code
== GE
) && op1
== const0_rtx
)
5497 /* If testing the sign bit, can just test on high word. */
5498 op0h
= simplify_gen_subreg (word_mode
, op0
, int_mode
,
5499 subreg_highpart_offset (word_mode
,
5501 tem
= emit_store_flag (NULL_RTX
, code
, op0h
, op1
, word_mode
,
5502 unsignedp
, normalizep
);
5509 if (target_mode
== VOIDmode
|| GET_MODE (tem
) == target_mode
)
5512 target
= gen_reg_rtx (target_mode
);
5514 convert_move (target
, tem
,
5515 !val_signbit_known_set_p (word_mode
,
5516 (normalizep
? normalizep
5517 : STORE_FLAG_VALUE
)));
5522 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5523 complement of A (for GE) and shifting the sign bit to the low bit. */
5524 if (op1
== const0_rtx
&& (code
== LT
|| code
== GE
)
5525 && is_int_mode (mode
, &int_mode
)
5526 && (normalizep
|| STORE_FLAG_VALUE
== 1
5527 || val_signbit_p (int_mode
, STORE_FLAG_VALUE
)))
5529 scalar_int_mode int_target_mode
;
5533 int_target_mode
= int_mode
;
5536 /* If the result is to be wider than OP0, it is best to convert it
5537 first. If it is to be narrower, it is *incorrect* to convert it
5539 int_target_mode
= as_a
<scalar_int_mode
> (target_mode
);
5540 if (GET_MODE_SIZE (int_target_mode
) > GET_MODE_SIZE (int_mode
))
5542 op0
= convert_modes (int_target_mode
, int_mode
, op0
, 0);
5543 int_mode
= int_target_mode
;
5547 if (int_target_mode
!= int_mode
)
5551 op0
= expand_unop (int_mode
, one_cmpl_optab
, op0
,
5552 ((STORE_FLAG_VALUE
== 1 || normalizep
)
5553 ? 0 : subtarget
), 0);
5555 if (STORE_FLAG_VALUE
== 1 || normalizep
)
5556 /* If we are supposed to produce a 0/1 value, we want to do
5557 a logical shift from the sign bit to the low-order bit; for
5558 a -1/0 value, we do an arithmetic shift. */
5559 op0
= expand_shift (RSHIFT_EXPR
, int_mode
, op0
,
5560 GET_MODE_BITSIZE (int_mode
) - 1,
5561 subtarget
, normalizep
!= -1);
5563 if (int_mode
!= int_target_mode
)
5564 op0
= convert_modes (int_target_mode
, int_mode
, op0
, 0);
5569 mclass
= GET_MODE_CLASS (mode
);
5570 FOR_EACH_MODE_FROM (compare_mode
, mode
)
5572 machine_mode optab_mode
= mclass
== MODE_CC
? CCmode
: compare_mode
;
5573 icode
= optab_handler (cstore_optab
, optab_mode
);
5574 if (icode
!= CODE_FOR_nothing
)
5576 do_pending_stack_adjust ();
5577 rtx tem
= emit_cstore (target
, icode
, code
, mode
, compare_mode
,
5578 unsignedp
, op0
, op1
, normalizep
, target_mode
);
5582 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5584 tem
= emit_cstore (target
, icode
, scode
, mode
, compare_mode
,
5585 unsignedp
, op1
, op0
, normalizep
, target_mode
);
5596 /* Subroutine of emit_store_flag that handles cases in which the operands
5597 are scalar integers. SUBTARGET is the target to use for temporary
5598 operations and TRUEVAL is the value to store when the condition is
5599 true. All other arguments are as for emit_store_flag. */
5602 emit_store_flag_int (rtx target
, rtx subtarget
, enum rtx_code code
, rtx op0
,
5603 rtx op1
, scalar_int_mode mode
, int unsignedp
,
5604 int normalizep
, rtx trueval
)
5606 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5607 rtx_insn
*last
= get_last_insn ();
5610 /* If this is an equality comparison of integers, we can try to exclusive-or
5611 (or subtract) the two operands and use a recursive call to try the
5612 comparison with zero. Don't do any of these cases if branches are
5615 if ((code
== EQ
|| code
== NE
) && op1
!= const0_rtx
)
5617 tem
= expand_binop (mode
, xor_optab
, op0
, op1
, subtarget
, 1,
5621 tem
= expand_binop (mode
, sub_optab
, op0
, op1
, subtarget
, 1,
5624 tem
= emit_store_flag (target
, code
, tem
, const0_rtx
,
5625 mode
, unsignedp
, normalizep
);
5629 delete_insns_since (last
);
5632 /* For integer comparisons, try the reverse comparison. However, for
5633 small X and if we'd have anyway to extend, implementing "X != 0"
5634 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5635 rtx_code rcode
= reverse_condition (code
);
5636 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5637 && ! (optab_handler (cstore_optab
, mode
) == CODE_FOR_nothing
5639 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
5640 && op1
== const0_rtx
))
5642 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5643 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5645 /* Again, for the reverse comparison, use either an addition or a XOR. */
5647 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5648 optimize_insn_for_speed_p ()) == 0)
5650 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5651 STORE_FLAG_VALUE
, target_mode
);
5653 tem
= expand_binop (target_mode
, add_optab
, tem
,
5654 gen_int_mode (normalizep
, target_mode
),
5655 target
, 0, OPTAB_WIDEN
);
5658 && rtx_cost (trueval
, mode
, XOR
, 1,
5659 optimize_insn_for_speed_p ()) == 0)
5661 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5662 normalizep
, target_mode
);
5664 tem
= expand_binop (target_mode
, xor_optab
, tem
, trueval
, target
,
5665 INTVAL (trueval
) >= 0, OPTAB_WIDEN
);
5670 delete_insns_since (last
);
5673 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5674 the constant zero. Reject all other comparisons at this point. Only
5675 do LE and GT if branches are expensive since they are expensive on
5676 2-operand machines. */
5678 if (op1
!= const0_rtx
5679 || (code
!= EQ
&& code
!= NE
5680 && (BRANCH_COST (optimize_insn_for_speed_p (),
5681 false) <= 1 || (code
!= LE
&& code
!= GT
))))
5684 /* Try to put the result of the comparison in the sign bit. Assume we can't
5685 do the necessary operation below. */
5689 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5690 the sign bit set. */
5694 /* This is destructive, so SUBTARGET can't be OP0. */
5695 if (rtx_equal_p (subtarget
, op0
))
5698 tem
= expand_binop (mode
, sub_optab
, op0
, const1_rtx
, subtarget
, 0,
5701 tem
= expand_binop (mode
, ior_optab
, op0
, tem
, subtarget
, 0,
5705 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5706 number of bits in the mode of OP0, minus one. */
5710 if (rtx_equal_p (subtarget
, op0
))
5713 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, op0
,
5714 GET_MODE_BITSIZE (mode
) - 1,
5717 tem
= expand_binop (mode
, sub_optab
, tem
, op0
, subtarget
, 0,
5721 if (code
== EQ
|| code
== NE
)
5723 /* For EQ or NE, one way to do the comparison is to apply an operation
5724 that converts the operand into a positive number if it is nonzero
5725 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5726 for NE we negate. This puts the result in the sign bit. Then we
5727 normalize with a shift, if needed.
5729 Two operations that can do the above actions are ABS and FFS, so try
5730 them. If that doesn't work, and MODE is smaller than a full word,
5731 we can use zero-extension to the wider mode (an unsigned conversion)
5732 as the operation. */
5734 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5735 that is compensated by the subsequent overflow when subtracting
5738 if (optab_handler (abs_optab
, mode
) != CODE_FOR_nothing
)
5739 tem
= expand_unop (mode
, abs_optab
, op0
, subtarget
, 1);
5740 else if (optab_handler (ffs_optab
, mode
) != CODE_FOR_nothing
)
5741 tem
= expand_unop (mode
, ffs_optab
, op0
, subtarget
, 1);
5742 else if (GET_MODE_SIZE (mode
) < UNITS_PER_WORD
)
5744 tem
= convert_modes (word_mode
, mode
, op0
, 1);
5751 tem
= expand_binop (mode
, sub_optab
, tem
, const1_rtx
, subtarget
,
5754 tem
= expand_unop (mode
, neg_optab
, tem
, subtarget
, 0);
5757 /* If we couldn't do it that way, for NE we can "or" the two's complement
5758 of the value with itself. For EQ, we take the one's complement of
5759 that "or", which is an extra insn, so we only handle EQ if branches
5764 || BRANCH_COST (optimize_insn_for_speed_p (),
5767 if (rtx_equal_p (subtarget
, op0
))
5770 tem
= expand_unop (mode
, neg_optab
, op0
, subtarget
, 0);
5771 tem
= expand_binop (mode
, ior_optab
, tem
, op0
, subtarget
, 0,
5774 if (tem
&& code
== EQ
)
5775 tem
= expand_unop (mode
, one_cmpl_optab
, tem
, subtarget
, 0);
5779 if (tem
&& normalizep
)
5780 tem
= maybe_expand_shift (RSHIFT_EXPR
, mode
, tem
,
5781 GET_MODE_BITSIZE (mode
) - 1,
5782 subtarget
, normalizep
== 1);
5788 else if (GET_MODE (tem
) != target_mode
)
5790 convert_move (target
, tem
, 0);
5793 else if (!subtarget
)
5795 emit_move_insn (target
, tem
);
5800 delete_insns_since (last
);
5805 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5806 and storing in TARGET. Normally return TARGET.
5807 Return 0 if that cannot be done.
5809 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5810 it is VOIDmode, they cannot both be CONST_INT.
5812 UNSIGNEDP is for the case where we have to widen the operands
5813 to perform the operation. It says to use zero-extension.
5815 NORMALIZEP is 1 if we should convert the result to be either zero
5816 or one. Normalize is -1 if we should convert the result to be
5817 either zero or -1. If NORMALIZEP is zero, the result will be left
5818 "raw" out of the scc insn. */
5821 emit_store_flag (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5822 machine_mode mode
, int unsignedp
, int normalizep
)
5824 machine_mode target_mode
= target
? GET_MODE (target
) : VOIDmode
;
5825 enum rtx_code rcode
;
5830 /* If we compare constants, we shouldn't use a store-flag operation,
5831 but a constant load. We can get there via the vanilla route that
5832 usually generates a compare-branch sequence, but will in this case
5833 fold the comparison to a constant, and thus elide the branch. */
5834 if (CONSTANT_P (op0
) && CONSTANT_P (op1
))
5837 tem
= emit_store_flag_1 (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
,
5842 /* If we reached here, we can't do this with a scc insn, however there
5843 are some comparisons that can be done in other ways. Don't do any
5844 of these cases if branches are very cheap. */
5845 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5848 /* See what we need to return. We can only return a 1, -1, or the
5851 if (normalizep
== 0)
5853 if (STORE_FLAG_VALUE
== 1 || STORE_FLAG_VALUE
== -1)
5854 normalizep
= STORE_FLAG_VALUE
;
5856 else if (val_signbit_p (mode
, STORE_FLAG_VALUE
))
5862 last
= get_last_insn ();
5864 /* If optimizing, use different pseudo registers for each insn, instead
5865 of reusing the same pseudo. This leads to better CSE, but slows
5866 down the compiler, since there are more pseudos. */
5867 subtarget
= (!optimize
5868 && (target_mode
== mode
)) ? target
: NULL_RTX
;
5869 trueval
= GEN_INT (normalizep
? normalizep
: STORE_FLAG_VALUE
);
5871 /* For floating-point comparisons, try the reverse comparison or try
5872 changing the "orderedness" of the comparison. */
5873 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5875 enum rtx_code first_code
;
5878 rcode
= reverse_condition_maybe_unordered (code
);
5879 if (can_compare_p (rcode
, mode
, ccp_store_flag
)
5880 && (code
== ORDERED
|| code
== UNORDERED
5881 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
5882 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
5884 int want_add
= ((STORE_FLAG_VALUE
== 1 && normalizep
== -1)
5885 || (STORE_FLAG_VALUE
== -1 && normalizep
== 1));
5887 /* For the reverse comparison, use either an addition or a XOR. */
5889 && rtx_cost (GEN_INT (normalizep
), mode
, PLUS
, 1,
5890 optimize_insn_for_speed_p ()) == 0)
5892 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5893 STORE_FLAG_VALUE
, target_mode
);
5895 return expand_binop (target_mode
, add_optab
, tem
,
5896 gen_int_mode (normalizep
, target_mode
),
5897 target
, 0, OPTAB_WIDEN
);
5900 && rtx_cost (trueval
, mode
, XOR
, 1,
5901 optimize_insn_for_speed_p ()) == 0)
5903 tem
= emit_store_flag_1 (subtarget
, rcode
, op0
, op1
, mode
, 0,
5904 normalizep
, target_mode
);
5906 return expand_binop (target_mode
, xor_optab
, tem
, trueval
,
5907 target
, INTVAL (trueval
) >= 0,
5912 delete_insns_since (last
);
5914 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5915 if (code
== ORDERED
|| code
== UNORDERED
)
5918 and_them
= split_comparison (code
, mode
, &first_code
, &code
);
5920 /* If there are no NaNs, the first comparison should always fall through.
5921 Effectively change the comparison to the other one. */
5922 if (!HONOR_NANS (mode
))
5924 gcc_assert (first_code
== (and_them
? ORDERED
: UNORDERED
));
5925 return emit_store_flag_1 (target
, code
, op0
, op1
, mode
, 0, normalizep
,
5929 if (!HAVE_conditional_move
)
5932 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5933 conditional move. */
5934 tem
= emit_store_flag_1 (subtarget
, first_code
, op0
, op1
, mode
, 0,
5935 normalizep
, target_mode
);
5940 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5941 tem
, const0_rtx
, GET_MODE (tem
), 0);
5943 tem
= emit_conditional_move (target
, code
, op0
, op1
, mode
,
5944 trueval
, tem
, GET_MODE (tem
), 0);
5947 delete_insns_since (last
);
5951 /* The remaining tricks only apply to integer comparisons. */
5953 scalar_int_mode int_mode
;
5954 if (is_int_mode (mode
, &int_mode
))
5955 return emit_store_flag_int (target
, subtarget
, code
, op0
, op1
, int_mode
,
5956 unsignedp
, normalizep
, trueval
);
5961 /* Like emit_store_flag, but always succeeds. */
5964 emit_store_flag_force (rtx target
, enum rtx_code code
, rtx op0
, rtx op1
,
5965 machine_mode mode
, int unsignedp
, int normalizep
)
5968 rtx_code_label
*label
;
5969 rtx trueval
, falseval
;
5971 /* First see if emit_store_flag can do the job. */
5972 tem
= emit_store_flag (target
, code
, op0
, op1
, mode
, unsignedp
, normalizep
);
5977 target
= gen_reg_rtx (word_mode
);
5979 /* If this failed, we have to do this with set/compare/jump/set code.
5980 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5981 trueval
= normalizep
? GEN_INT (normalizep
) : const1_rtx
;
5983 && GET_MODE_CLASS (mode
) == MODE_INT
5986 && op1
== const0_rtx
)
5988 label
= gen_label_rtx ();
5989 do_compare_rtx_and_jump (target
, const0_rtx
, EQ
, unsignedp
, mode
,
5990 NULL_RTX
, NULL
, label
,
5991 profile_probability::uninitialized ());
5992 emit_move_insn (target
, trueval
);
5998 || reg_mentioned_p (target
, op0
) || reg_mentioned_p (target
, op1
))
5999 target
= gen_reg_rtx (GET_MODE (target
));
6001 /* Jump in the right direction if the target cannot implement CODE
6002 but can jump on its reverse condition. */
6003 falseval
= const0_rtx
;
6004 if (! can_compare_p (code
, mode
, ccp_jump
)
6005 && (! FLOAT_MODE_P (mode
)
6006 || code
== ORDERED
|| code
== UNORDERED
6007 || (! HONOR_NANS (mode
) && (code
== LTGT
|| code
== UNEQ
))
6008 || (! HONOR_SNANS (mode
) && (code
== EQ
|| code
== NE
))))
6010 enum rtx_code rcode
;
6011 if (FLOAT_MODE_P (mode
))
6012 rcode
= reverse_condition_maybe_unordered (code
);
6014 rcode
= reverse_condition (code
);
6016 /* Canonicalize to UNORDERED for the libcall. */
6017 if (can_compare_p (rcode
, mode
, ccp_jump
)
6018 || (code
== ORDERED
&& ! can_compare_p (ORDERED
, mode
, ccp_jump
)))
6021 trueval
= const0_rtx
;
6026 emit_move_insn (target
, trueval
);
6027 label
= gen_label_rtx ();
6028 do_compare_rtx_and_jump (op0
, op1
, code
, unsignedp
, mode
, NULL_RTX
, NULL
,
6029 label
, profile_probability::uninitialized ());
6031 emit_move_insn (target
, falseval
);
6037 /* Perform possibly multi-word comparison and conditional jump to LABEL
6038 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6039 now a thin wrapper around do_compare_rtx_and_jump. */
6042 do_cmp_and_jump (rtx arg1
, rtx arg2
, enum rtx_code op
, machine_mode mode
,
6043 rtx_code_label
*label
)
6045 int unsignedp
= (op
== LTU
|| op
== LEU
|| op
== GTU
|| op
== GEU
);
6046 do_compare_rtx_and_jump (arg1
, arg2
, op
, unsignedp
, mode
, NULL_RTX
,
6047 NULL
, label
, profile_probability::uninitialized ());