Add subreg_memory_offset helper functions
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "emit-rtl.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "dojump.h"
39 #include "explow.h"
40 #include "expr.h"
41 #include "langhooks.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx, scalar_int_mode, bool);
54 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 rtx, scalar_int_mode, bool);
58 static void store_split_bit_field (rtx, opt_scalar_int_mode,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT,
63 rtx, scalar_int_mode, bool);
64 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, rtx, int, bool);
67 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
68 unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT, rtx, int, bool);
70 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
71 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
72 unsigned HOST_WIDE_INT,
73 unsigned HOST_WIDE_INT, int, bool);
74 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
75 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
76 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
77
78 /* Return a constant integer mask value of mode MODE with BITSIZE ones
79 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
80 The mask is truncated if necessary to the width of mode MODE. The
81 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
82
83 static inline rtx
84 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
85 {
86 return immed_wide_int_const
87 (wi::shifted_mask (bitpos, bitsize, complement,
88 GET_MODE_PRECISION (mode)), mode);
89 }
90
91 /* Test whether a value is zero of a power of two. */
92 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
93 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
94
95 struct init_expmed_rtl
96 {
97 rtx reg;
98 rtx plus;
99 rtx neg;
100 rtx mult;
101 rtx sdiv;
102 rtx udiv;
103 rtx sdiv_32;
104 rtx smod_32;
105 rtx wide_mult;
106 rtx wide_lshr;
107 rtx wide_trunc;
108 rtx shift;
109 rtx shift_mult;
110 rtx shift_add;
111 rtx shift_sub0;
112 rtx shift_sub1;
113 rtx zext;
114 rtx trunc;
115
116 rtx pow2[MAX_BITS_PER_WORD];
117 rtx cint[MAX_BITS_PER_WORD];
118 };
119
120 static void
121 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
122 scalar_int_mode from_mode, bool speed)
123 {
124 int to_size, from_size;
125 rtx which;
126
127 to_size = GET_MODE_PRECISION (to_mode);
128 from_size = GET_MODE_PRECISION (from_mode);
129
130 /* Most partial integers have a precision less than the "full"
131 integer it requires for storage. In case one doesn't, for
132 comparison purposes here, reduce the bit size by one in that
133 case. */
134 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
135 && pow2p_hwi (to_size))
136 to_size --;
137 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
138 && pow2p_hwi (from_size))
139 from_size --;
140
141 /* Assume cost of zero-extend and sign-extend is the same. */
142 which = (to_size < from_size ? all->trunc : all->zext);
143
144 PUT_MODE (all->reg, from_mode);
145 set_convert_cost (to_mode, from_mode, speed,
146 set_src_cost (which, to_mode, speed));
147 }
148
149 static void
150 init_expmed_one_mode (struct init_expmed_rtl *all,
151 machine_mode mode, int speed)
152 {
153 int m, n, mode_bitsize;
154 machine_mode mode_from;
155
156 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
157
158 PUT_MODE (all->reg, mode);
159 PUT_MODE (all->plus, mode);
160 PUT_MODE (all->neg, mode);
161 PUT_MODE (all->mult, mode);
162 PUT_MODE (all->sdiv, mode);
163 PUT_MODE (all->udiv, mode);
164 PUT_MODE (all->sdiv_32, mode);
165 PUT_MODE (all->smod_32, mode);
166 PUT_MODE (all->wide_trunc, mode);
167 PUT_MODE (all->shift, mode);
168 PUT_MODE (all->shift_mult, mode);
169 PUT_MODE (all->shift_add, mode);
170 PUT_MODE (all->shift_sub0, mode);
171 PUT_MODE (all->shift_sub1, mode);
172 PUT_MODE (all->zext, mode);
173 PUT_MODE (all->trunc, mode);
174
175 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
176 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
177 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
178 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
179 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
180
181 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
182 <= 2 * add_cost (speed, mode)));
183 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
184 <= 4 * add_cost (speed, mode)));
185
186 set_shift_cost (speed, mode, 0, 0);
187 {
188 int cost = add_cost (speed, mode);
189 set_shiftadd_cost (speed, mode, 0, cost);
190 set_shiftsub0_cost (speed, mode, 0, cost);
191 set_shiftsub1_cost (speed, mode, 0, cost);
192 }
193
194 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
195 for (m = 1; m < n; m++)
196 {
197 XEXP (all->shift, 1) = all->cint[m];
198 XEXP (all->shift_mult, 1) = all->pow2[m];
199
200 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
201 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
202 speed));
203 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
204 speed));
205 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
206 speed));
207 }
208
209 scalar_int_mode int_mode_to;
210 if (is_a <scalar_int_mode> (mode, &int_mode_to))
211 {
212 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
213 mode_from = (machine_mode)(mode_from + 1))
214 init_expmed_one_conv (all, int_mode_to,
215 as_a <scalar_int_mode> (mode_from), speed);
216
217 scalar_int_mode wider_mode;
218 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
219 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
220 {
221 PUT_MODE (all->zext, wider_mode);
222 PUT_MODE (all->wide_mult, wider_mode);
223 PUT_MODE (all->wide_lshr, wider_mode);
224 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
225
226 set_mul_widen_cost (speed, wider_mode,
227 set_src_cost (all->wide_mult, wider_mode, speed));
228 set_mul_highpart_cost (speed, int_mode_to,
229 set_src_cost (all->wide_trunc,
230 int_mode_to, speed));
231 }
232 }
233 }
234
235 void
236 init_expmed (void)
237 {
238 struct init_expmed_rtl all;
239 machine_mode mode = QImode;
240 int m, speed;
241
242 memset (&all, 0, sizeof all);
243 for (m = 1; m < MAX_BITS_PER_WORD; m++)
244 {
245 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
246 all.cint[m] = GEN_INT (m);
247 }
248
249 /* Avoid using hard regs in ways which may be unsupported. */
250 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
251 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
252 all.neg = gen_rtx_NEG (mode, all.reg);
253 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
254 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
255 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
256 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
257 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
258 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
259 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
260 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
261 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
262 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
263 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
264 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
265 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
266 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
267 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
268
269 for (speed = 0; speed < 2; speed++)
270 {
271 crtl->maybe_hot_insn_p = speed;
272 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
273
274 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
275 mode = (machine_mode)(mode + 1))
276 init_expmed_one_mode (&all, mode, speed);
277
278 if (MIN_MODE_PARTIAL_INT != VOIDmode)
279 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
280 mode = (machine_mode)(mode + 1))
281 init_expmed_one_mode (&all, mode, speed);
282
283 if (MIN_MODE_VECTOR_INT != VOIDmode)
284 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
285 mode = (machine_mode)(mode + 1))
286 init_expmed_one_mode (&all, mode, speed);
287 }
288
289 if (alg_hash_used_p ())
290 {
291 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
292 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
293 }
294 else
295 set_alg_hash_used_p (true);
296 default_rtl_profile ();
297
298 ggc_free (all.trunc);
299 ggc_free (all.shift_sub1);
300 ggc_free (all.shift_sub0);
301 ggc_free (all.shift_add);
302 ggc_free (all.shift_mult);
303 ggc_free (all.shift);
304 ggc_free (all.wide_trunc);
305 ggc_free (all.wide_lshr);
306 ggc_free (all.wide_mult);
307 ggc_free (all.zext);
308 ggc_free (all.smod_32);
309 ggc_free (all.sdiv_32);
310 ggc_free (all.udiv);
311 ggc_free (all.sdiv);
312 ggc_free (all.mult);
313 ggc_free (all.neg);
314 ggc_free (all.plus);
315 ggc_free (all.reg);
316 }
317
318 /* Return an rtx representing minus the value of X.
319 MODE is the intended mode of the result,
320 useful if X is a CONST_INT. */
321
322 rtx
323 negate_rtx (machine_mode mode, rtx x)
324 {
325 rtx result = simplify_unary_operation (NEG, mode, x, mode);
326
327 if (result == 0)
328 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
329
330 return result;
331 }
332
333 /* Whether reverse storage order is supported on the target. */
334 static int reverse_storage_order_supported = -1;
335
336 /* Check whether reverse storage order is supported on the target. */
337
338 static void
339 check_reverse_storage_order_support (void)
340 {
341 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
342 {
343 reverse_storage_order_supported = 0;
344 sorry ("reverse scalar storage order");
345 }
346 else
347 reverse_storage_order_supported = 1;
348 }
349
350 /* Whether reverse FP storage order is supported on the target. */
351 static int reverse_float_storage_order_supported = -1;
352
353 /* Check whether reverse FP storage order is supported on the target. */
354
355 static void
356 check_reverse_float_storage_order_support (void)
357 {
358 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
359 {
360 reverse_float_storage_order_supported = 0;
361 sorry ("reverse floating-point scalar storage order");
362 }
363 else
364 reverse_float_storage_order_supported = 1;
365 }
366
367 /* Return an rtx representing value of X with reverse storage order.
368 MODE is the intended mode of the result,
369 useful if X is a CONST_INT. */
370
371 rtx
372 flip_storage_order (machine_mode mode, rtx x)
373 {
374 scalar_int_mode int_mode;
375 rtx result;
376
377 if (mode == QImode)
378 return x;
379
380 if (COMPLEX_MODE_P (mode))
381 {
382 rtx real = read_complex_part (x, false);
383 rtx imag = read_complex_part (x, true);
384
385 real = flip_storage_order (GET_MODE_INNER (mode), real);
386 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
387
388 return gen_rtx_CONCAT (mode, real, imag);
389 }
390
391 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
392 check_reverse_storage_order_support ();
393
394 if (!is_a <scalar_int_mode> (mode, &int_mode))
395 {
396 if (FLOAT_MODE_P (mode)
397 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
398 check_reverse_float_storage_order_support ();
399
400 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
401 {
402 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
403 return x;
404 }
405 x = gen_lowpart (int_mode, x);
406 }
407
408 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
409 if (result == 0)
410 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
411
412 if (int_mode != mode)
413 result = gen_lowpart (mode, result);
414
415 return result;
416 }
417
418 /* If MODE is set, adjust bitfield memory MEM so that it points to the
419 first unit of mode MODE that contains a bitfield of size BITSIZE at
420 bit position BITNUM. If MODE is not set, return a BLKmode reference
421 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
422 of the field within the new memory. */
423
424 static rtx
425 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
426 unsigned HOST_WIDE_INT bitsize,
427 unsigned HOST_WIDE_INT bitnum,
428 unsigned HOST_WIDE_INT *new_bitnum)
429 {
430 scalar_int_mode imode;
431 if (mode.exists (&imode))
432 {
433 unsigned int unit = GET_MODE_BITSIZE (imode);
434 *new_bitnum = bitnum % unit;
435 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
436 return adjust_bitfield_address (mem, imode, offset);
437 }
438 else
439 {
440 *new_bitnum = bitnum % BITS_PER_UNIT;
441 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
442 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
443 / BITS_PER_UNIT);
444 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
445 }
446 }
447
448 /* The caller wants to perform insertion or extraction PATTERN on a
449 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
450 BITREGION_START and BITREGION_END are as for store_bit_field
451 and FIELDMODE is the natural mode of the field.
452
453 Search for a mode that is compatible with the memory access
454 restrictions and (where applicable) with a register insertion or
455 extraction. Return the new memory on success, storing the adjusted
456 bit position in *NEW_BITNUM. Return null otherwise. */
457
458 static rtx
459 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
460 rtx op0, HOST_WIDE_INT bitsize,
461 HOST_WIDE_INT bitnum,
462 unsigned HOST_WIDE_INT bitregion_start,
463 unsigned HOST_WIDE_INT bitregion_end,
464 machine_mode fieldmode,
465 unsigned HOST_WIDE_INT *new_bitnum)
466 {
467 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
468 bitregion_end, MEM_ALIGN (op0),
469 MEM_VOLATILE_P (op0));
470 scalar_int_mode best_mode;
471 if (iter.next_mode (&best_mode))
472 {
473 /* We can use a memory in BEST_MODE. See whether this is true for
474 any wider modes. All other things being equal, we prefer to
475 use the widest mode possible because it tends to expose more
476 CSE opportunities. */
477 if (!iter.prefer_smaller_modes ())
478 {
479 /* Limit the search to the mode required by the corresponding
480 register insertion or extraction instruction, if any. */
481 scalar_int_mode limit_mode = word_mode;
482 extraction_insn insn;
483 if (get_best_reg_extraction_insn (&insn, pattern,
484 GET_MODE_BITSIZE (best_mode),
485 fieldmode))
486 limit_mode = insn.field_mode;
487
488 scalar_int_mode wider_mode;
489 while (iter.next_mode (&wider_mode)
490 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
491 best_mode = wider_mode;
492 }
493 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
494 new_bitnum);
495 }
496 return NULL_RTX;
497 }
498
499 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
500 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
501 offset is then BITNUM / BITS_PER_UNIT. */
502
503 static bool
504 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
505 unsigned HOST_WIDE_INT bitsize,
506 machine_mode struct_mode)
507 {
508 if (BYTES_BIG_ENDIAN)
509 return (bitnum % BITS_PER_UNIT == 0
510 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
511 || (bitnum + bitsize) % BITS_PER_WORD == 0));
512 else
513 return bitnum % BITS_PER_WORD == 0;
514 }
515
516 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
517 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
518 Return false if the access would touch memory outside the range
519 BITREGION_START to BITREGION_END for conformance to the C++ memory
520 model. */
521
522 static bool
523 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
524 unsigned HOST_WIDE_INT bitnum,
525 scalar_int_mode fieldmode,
526 unsigned HOST_WIDE_INT bitregion_start,
527 unsigned HOST_WIDE_INT bitregion_end)
528 {
529 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
530
531 /* -fstrict-volatile-bitfields must be enabled and we must have a
532 volatile MEM. */
533 if (!MEM_P (op0)
534 || !MEM_VOLATILE_P (op0)
535 || flag_strict_volatile_bitfields <= 0)
536 return false;
537
538 /* The bit size must not be larger than the field mode, and
539 the field mode must not be larger than a word. */
540 if (bitsize > modesize || modesize > BITS_PER_WORD)
541 return false;
542
543 /* Check for cases of unaligned fields that must be split. */
544 if (bitnum % modesize + bitsize > modesize)
545 return false;
546
547 /* The memory must be sufficiently aligned for a MODESIZE access.
548 This condition guarantees, that the memory access will not
549 touch anything after the end of the structure. */
550 if (MEM_ALIGN (op0) < modesize)
551 return false;
552
553 /* Check for cases where the C++ memory model applies. */
554 if (bitregion_end != 0
555 && (bitnum - bitnum % modesize < bitregion_start
556 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
557 return false;
558
559 return true;
560 }
561
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563 bit number BITNUM can be treated as a simple value of mode MODE. */
564
565 static bool
566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
567 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
568 {
569 return (MEM_P (op0)
570 && bitnum % BITS_PER_UNIT == 0
571 && bitsize == GET_MODE_BITSIZE (mode)
572 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
573 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
574 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
575 }
576 \f
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
579 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
580 are as for store_bit_field. */
581
582 static bool
583 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
584 opt_scalar_int_mode op0_mode,
585 unsigned HOST_WIDE_INT bitsize,
586 unsigned HOST_WIDE_INT bitnum,
587 rtx value, scalar_int_mode value_mode)
588 {
589 struct expand_operand ops[4];
590 rtx value1;
591 rtx xop0 = op0;
592 rtx_insn *last = get_last_insn ();
593 bool copy_back = false;
594
595 scalar_int_mode op_mode = insv->field_mode;
596 unsigned int unit = GET_MODE_BITSIZE (op_mode);
597 if (bitsize == 0 || bitsize > unit)
598 return false;
599
600 if (MEM_P (xop0))
601 /* Get a reference to the first byte of the field. */
602 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
603 &bitnum);
604 else
605 {
606 /* Convert from counting within OP0 to counting in OP_MODE. */
607 if (BYTES_BIG_ENDIAN)
608 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
609
610 /* If xop0 is a register, we need it in OP_MODE
611 to make it acceptable to the format of insv. */
612 if (GET_CODE (xop0) == SUBREG)
613 /* We can't just change the mode, because this might clobber op0,
614 and we will need the original value of op0 if insv fails. */
615 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
616 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
617 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
618 }
619
620 /* If the destination is a paradoxical subreg such that we need a
621 truncate to the inner mode, perform the insertion on a temporary and
622 truncate the result to the original destination. Note that we can't
623 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
624 X) 0)) is (reg:N X). */
625 if (GET_CODE (xop0) == SUBREG
626 && REG_P (SUBREG_REG (xop0))
627 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
628 op_mode))
629 {
630 rtx tem = gen_reg_rtx (op_mode);
631 emit_move_insn (tem, xop0);
632 xop0 = tem;
633 copy_back = true;
634 }
635
636 /* There are similar overflow check at the start of store_bit_field_1,
637 but that only check the situation where the field lies completely
638 outside the register, while there do have situation where the field
639 lies partialy in the register, we need to adjust bitsize for this
640 partial overflow situation. Without this fix, pr48335-2.c on big-endian
641 will broken on those arch support bit insert instruction, like arm, aarch64
642 etc. */
643 if (bitsize + bitnum > unit && bitnum < unit)
644 {
645 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
646 "destination object, data truncated into %wu-bit",
647 bitsize, unit - bitnum);
648 bitsize = unit - bitnum;
649 }
650
651 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
652 "backwards" from the size of the unit we are inserting into.
653 Otherwise, we count bits from the most significant on a
654 BYTES/BITS_BIG_ENDIAN machine. */
655
656 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
657 bitnum = unit - bitsize - bitnum;
658
659 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
660 value1 = value;
661 if (value_mode != op_mode)
662 {
663 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
664 {
665 rtx tmp;
666 /* Optimization: Don't bother really extending VALUE
667 if it has all the bits we will actually use. However,
668 if we must narrow it, be sure we do it correctly. */
669
670 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
671 {
672 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
673 if (! tmp)
674 tmp = simplify_gen_subreg (op_mode,
675 force_reg (value_mode, value1),
676 value_mode, 0);
677 }
678 else
679 {
680 tmp = gen_lowpart_if_possible (op_mode, value1);
681 if (! tmp)
682 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
683 }
684 value1 = tmp;
685 }
686 else if (CONST_INT_P (value))
687 value1 = gen_int_mode (INTVAL (value), op_mode);
688 else
689 /* Parse phase is supposed to make VALUE's data type
690 match that of the component reference, which is a type
691 at least as wide as the field; so VALUE should have
692 a mode that corresponds to that type. */
693 gcc_assert (CONSTANT_P (value));
694 }
695
696 create_fixed_operand (&ops[0], xop0);
697 create_integer_operand (&ops[1], bitsize);
698 create_integer_operand (&ops[2], bitnum);
699 create_input_operand (&ops[3], value1, op_mode);
700 if (maybe_expand_insn (insv->icode, 4, ops))
701 {
702 if (copy_back)
703 convert_move (op0, xop0, true);
704 return true;
705 }
706 delete_insns_since (last);
707 return false;
708 }
709
710 /* A subroutine of store_bit_field, with the same arguments. Return true
711 if the operation could be implemented.
712
713 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
714 no other way of implementing the operation. If FALLBACK_P is false,
715 return false instead. */
716
717 static bool
718 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
719 unsigned HOST_WIDE_INT bitnum,
720 unsigned HOST_WIDE_INT bitregion_start,
721 unsigned HOST_WIDE_INT bitregion_end,
722 machine_mode fieldmode,
723 rtx value, bool reverse, bool fallback_p)
724 {
725 rtx op0 = str_rtx;
726 rtx orig_value;
727
728 while (GET_CODE (op0) == SUBREG)
729 {
730 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
731 op0 = SUBREG_REG (op0);
732 }
733
734 /* No action is needed if the target is a register and if the field
735 lies completely outside that register. This can occur if the source
736 code contains an out-of-bounds access to a small array. */
737 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
738 return true;
739
740 /* Use vec_set patterns for inserting parts of vectors whenever
741 available. */
742 machine_mode outermode = GET_MODE (op0);
743 scalar_mode innermode = GET_MODE_INNER (outermode);
744 if (VECTOR_MODE_P (outermode)
745 && !MEM_P (op0)
746 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
747 && fieldmode == innermode
748 && bitsize == GET_MODE_BITSIZE (innermode)
749 && !(bitnum % GET_MODE_BITSIZE (innermode)))
750 {
751 struct expand_operand ops[3];
752 enum insn_code icode = optab_handler (vec_set_optab, outermode);
753 int pos = bitnum / GET_MODE_BITSIZE (innermode);
754
755 create_fixed_operand (&ops[0], op0);
756 create_input_operand (&ops[1], value, innermode);
757 create_integer_operand (&ops[2], pos);
758 if (maybe_expand_insn (icode, 3, ops))
759 return true;
760 }
761
762 /* If the target is a register, overwriting the entire object, or storing
763 a full-word or multi-word field can be done with just a SUBREG. */
764 if (!MEM_P (op0)
765 && bitsize == GET_MODE_BITSIZE (fieldmode)
766 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
767 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
768 {
769 /* Use the subreg machinery either to narrow OP0 to the required
770 words or to cope with mode punning between equal-sized modes.
771 In the latter case, use subreg on the rhs side, not lhs. */
772 rtx sub;
773
774 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
775 {
776 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
777 if (sub)
778 {
779 if (reverse)
780 sub = flip_storage_order (GET_MODE (op0), sub);
781 emit_move_insn (op0, sub);
782 return true;
783 }
784 }
785 else
786 {
787 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
788 bitnum / BITS_PER_UNIT);
789 if (sub)
790 {
791 if (reverse)
792 value = flip_storage_order (fieldmode, value);
793 emit_move_insn (sub, value);
794 return true;
795 }
796 }
797 }
798
799 /* If the target is memory, storing any naturally aligned field can be
800 done with a simple store. For targets that support fast unaligned
801 memory, any naturally sized, unit aligned field can be done directly. */
802 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
803 {
804 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
805 if (reverse)
806 value = flip_storage_order (fieldmode, value);
807 emit_move_insn (op0, value);
808 return true;
809 }
810
811 /* Make sure we are playing with integral modes. Pun with subregs
812 if we aren't. This must come after the entire register case above,
813 since that case is valid for any mode. The following cases are only
814 valid for integral modes. */
815 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
816 scalar_int_mode imode;
817 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
818 {
819 if (MEM_P (op0))
820 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
821 0, MEM_SIZE (op0));
822 else
823 op0 = gen_lowpart (op0_mode.require (), op0);
824 }
825
826 /* Storing an lsb-aligned field in a register
827 can be done with a movstrict instruction. */
828
829 if (!MEM_P (op0)
830 && !reverse
831 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
832 && bitsize == GET_MODE_BITSIZE (fieldmode)
833 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
834 {
835 struct expand_operand ops[2];
836 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
837 rtx arg0 = op0;
838 unsigned HOST_WIDE_INT subreg_off;
839
840 if (GET_CODE (arg0) == SUBREG)
841 {
842 /* Else we've got some float mode source being extracted into
843 a different float mode destination -- this combination of
844 subregs results in Severe Tire Damage. */
845 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
846 || GET_MODE_CLASS (fieldmode) == MODE_INT
847 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
848 arg0 = SUBREG_REG (arg0);
849 }
850
851 subreg_off = bitnum / BITS_PER_UNIT;
852 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
853 {
854 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
855
856 create_fixed_operand (&ops[0], arg0);
857 /* Shrink the source operand to FIELDMODE. */
858 create_convert_operand_to (&ops[1], value, fieldmode, false);
859 if (maybe_expand_insn (icode, 2, ops))
860 return true;
861 }
862 }
863
864 /* Handle fields bigger than a word. */
865
866 if (bitsize > BITS_PER_WORD)
867 {
868 /* Here we transfer the words of the field
869 in the order least significant first.
870 This is because the most significant word is the one which may
871 be less than full.
872 However, only do that if the value is not BLKmode. */
873
874 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
875 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
876 unsigned int i;
877 rtx_insn *last;
878
879 /* This is the mode we must force value to, so that there will be enough
880 subwords to extract. Note that fieldmode will often (always?) be
881 VOIDmode, because that is what store_field uses to indicate that this
882 is a bit field, but passing VOIDmode to operand_subword_force
883 is not allowed. */
884 fieldmode = GET_MODE (value);
885 if (fieldmode == VOIDmode)
886 fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
887
888 last = get_last_insn ();
889 for (i = 0; i < nwords; i++)
890 {
891 /* If I is 0, use the low-order word in both field and target;
892 if I is 1, use the next to lowest word; and so on. */
893 unsigned int wordnum = (backwards
894 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
895 - i - 1
896 : i);
897 unsigned int bit_offset = (backwards ^ reverse
898 ? MAX ((int) bitsize - ((int) i + 1)
899 * BITS_PER_WORD,
900 0)
901 : (int) i * BITS_PER_WORD);
902 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
903 unsigned HOST_WIDE_INT new_bitsize =
904 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
905
906 /* If the remaining chunk doesn't have full wordsize we have
907 to make sure that for big-endian machines the higher order
908 bits are used. */
909 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
910 value_word = simplify_expand_binop (word_mode, lshr_optab,
911 value_word,
912 GEN_INT (BITS_PER_WORD
913 - new_bitsize),
914 NULL_RTX, true,
915 OPTAB_LIB_WIDEN);
916
917 if (!store_bit_field_1 (op0, new_bitsize,
918 bitnum + bit_offset,
919 bitregion_start, bitregion_end,
920 word_mode,
921 value_word, reverse, fallback_p))
922 {
923 delete_insns_since (last);
924 return false;
925 }
926 }
927 return true;
928 }
929
930 /* If VALUE has a floating-point or complex mode, access it as an
931 integer of the corresponding size. This can occur on a machine
932 with 64 bit registers that uses SFmode for float. It can also
933 occur for unaligned float or complex fields. */
934 orig_value = value;
935 scalar_int_mode value_mode;
936 if (GET_MODE (value) == VOIDmode)
937 /* By this point we've dealt with values that are bigger than a word,
938 so word_mode is a conservatively correct choice. */
939 value_mode = word_mode;
940 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
941 {
942 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
943 value = gen_reg_rtx (value_mode);
944 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
945 }
946
947 /* If OP0 is a multi-word register, narrow it to the affected word.
948 If the region spans two words, defer to store_split_bit_field.
949 Don't do this if op0 is a single hard register wider than word
950 such as a float or vector register. */
951 if (!MEM_P (op0)
952 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
953 && (!REG_P (op0)
954 || !HARD_REGISTER_P (op0)
955 || HARD_REGNO_NREGS (REGNO (op0), op0_mode.require ()) != 1))
956 {
957 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
958 {
959 if (!fallback_p)
960 return false;
961
962 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
963 bitregion_start, bitregion_end,
964 value, value_mode, reverse);
965 return true;
966 }
967 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
968 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
969 gcc_assert (op0);
970 op0_mode = word_mode;
971 bitnum %= BITS_PER_WORD;
972 }
973
974 /* From here on we can assume that the field to be stored in fits
975 within a word. If the destination is a register, it too fits
976 in a word. */
977
978 extraction_insn insv;
979 if (!MEM_P (op0)
980 && !reverse
981 && get_best_reg_extraction_insn (&insv, EP_insv,
982 GET_MODE_BITSIZE (op0_mode.require ()),
983 fieldmode)
984 && store_bit_field_using_insv (&insv, op0, op0_mode,
985 bitsize, bitnum, value, value_mode))
986 return true;
987
988 /* If OP0 is a memory, try copying it to a register and seeing if a
989 cheap register alternative is available. */
990 if (MEM_P (op0) && !reverse)
991 {
992 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
993 fieldmode)
994 && store_bit_field_using_insv (&insv, op0, op0_mode,
995 bitsize, bitnum, value, value_mode))
996 return true;
997
998 rtx_insn *last = get_last_insn ();
999
1000 /* Try loading part of OP0 into a register, inserting the bitfield
1001 into that, and then copying the result back to OP0. */
1002 unsigned HOST_WIDE_INT bitpos;
1003 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1004 bitregion_start, bitregion_end,
1005 fieldmode, &bitpos);
1006 if (xop0)
1007 {
1008 rtx tempreg = copy_to_reg (xop0);
1009 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1010 bitregion_start, bitregion_end,
1011 fieldmode, orig_value, reverse, false))
1012 {
1013 emit_move_insn (xop0, tempreg);
1014 return true;
1015 }
1016 delete_insns_since (last);
1017 }
1018 }
1019
1020 if (!fallback_p)
1021 return false;
1022
1023 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1024 bitregion_end, value, value_mode, reverse);
1025 return true;
1026 }
1027
1028 /* Generate code to store value from rtx VALUE
1029 into a bit-field within structure STR_RTX
1030 containing BITSIZE bits starting at bit BITNUM.
1031
1032 BITREGION_START is bitpos of the first bitfield in this region.
1033 BITREGION_END is the bitpos of the ending bitfield in this region.
1034 These two fields are 0, if the C++ memory model does not apply,
1035 or we are not interested in keeping track of bitfield regions.
1036
1037 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1038
1039 If REVERSE is true, the store is to be done in reverse order. */
1040
1041 void
1042 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1043 unsigned HOST_WIDE_INT bitnum,
1044 unsigned HOST_WIDE_INT bitregion_start,
1045 unsigned HOST_WIDE_INT bitregion_end,
1046 machine_mode fieldmode,
1047 rtx value, bool reverse)
1048 {
1049 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1050 scalar_int_mode int_mode;
1051 if (is_a <scalar_int_mode> (fieldmode, &int_mode)
1052 && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode,
1053 bitregion_start, bitregion_end))
1054 {
1055 /* Storing of a full word can be done with a simple store.
1056 We know here that the field can be accessed with one single
1057 instruction. For targets that support unaligned memory,
1058 an unaligned access may be necessary. */
1059 if (bitsize == GET_MODE_BITSIZE (int_mode))
1060 {
1061 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1062 bitnum / BITS_PER_UNIT);
1063 if (reverse)
1064 value = flip_storage_order (int_mode, value);
1065 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1066 emit_move_insn (str_rtx, value);
1067 }
1068 else
1069 {
1070 rtx temp;
1071
1072 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1073 &bitnum);
1074 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1075 temp = copy_to_reg (str_rtx);
1076 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1077 int_mode, value, reverse, true))
1078 gcc_unreachable ();
1079
1080 emit_move_insn (str_rtx, temp);
1081 }
1082
1083 return;
1084 }
1085
1086 /* Under the C++0x memory model, we must not touch bits outside the
1087 bit region. Adjust the address to start at the beginning of the
1088 bit region. */
1089 if (MEM_P (str_rtx) && bitregion_start > 0)
1090 {
1091 scalar_int_mode best_mode;
1092 machine_mode addr_mode = VOIDmode;
1093 HOST_WIDE_INT offset, size;
1094
1095 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1096
1097 offset = bitregion_start / BITS_PER_UNIT;
1098 bitnum -= bitregion_start;
1099 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1100 bitregion_end -= bitregion_start;
1101 bitregion_start = 0;
1102 if (get_best_mode (bitsize, bitnum,
1103 bitregion_start, bitregion_end,
1104 MEM_ALIGN (str_rtx), INT_MAX,
1105 MEM_VOLATILE_P (str_rtx), &best_mode))
1106 addr_mode = best_mode;
1107 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1108 offset, size);
1109 }
1110
1111 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1112 bitregion_start, bitregion_end,
1113 fieldmode, value, reverse, true))
1114 gcc_unreachable ();
1115 }
1116 \f
1117 /* Use shifts and boolean operations to store VALUE into a bit field of
1118 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1119 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1120 the mode of VALUE.
1121
1122 If REVERSE is true, the store is to be done in reverse order. */
1123
1124 static void
1125 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1126 unsigned HOST_WIDE_INT bitsize,
1127 unsigned HOST_WIDE_INT bitnum,
1128 unsigned HOST_WIDE_INT bitregion_start,
1129 unsigned HOST_WIDE_INT bitregion_end,
1130 rtx value, scalar_int_mode value_mode, bool reverse)
1131 {
1132 /* There is a case not handled here:
1133 a structure with a known alignment of just a halfword
1134 and a field split across two aligned halfwords within the structure.
1135 Or likewise a structure with a known alignment of just a byte
1136 and a field split across two bytes.
1137 Such cases are not supposed to be able to occur. */
1138
1139 scalar_int_mode best_mode;
1140 if (MEM_P (op0))
1141 {
1142 unsigned int max_bitsize = BITS_PER_WORD;
1143 scalar_int_mode imode;
1144 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1145 max_bitsize = GET_MODE_BITSIZE (imode);
1146
1147 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1148 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1149 &best_mode))
1150 {
1151 /* The only way this should occur is if the field spans word
1152 boundaries. */
1153 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1154 bitregion_start, bitregion_end,
1155 value, value_mode, reverse);
1156 return;
1157 }
1158
1159 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1160 }
1161 else
1162 best_mode = op0_mode.require ();
1163
1164 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1165 value, value_mode, reverse);
1166 }
1167
1168 /* Helper function for store_fixed_bit_field, stores
1169 the bit field always using MODE, which is the mode of OP0. The other
1170 arguments are as for store_fixed_bit_field. */
1171
1172 static void
1173 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1174 unsigned HOST_WIDE_INT bitsize,
1175 unsigned HOST_WIDE_INT bitnum,
1176 rtx value, scalar_int_mode value_mode, bool reverse)
1177 {
1178 rtx temp;
1179 int all_zero = 0;
1180 int all_one = 0;
1181
1182 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1183 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1184
1185 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1186 /* BITNUM is the distance between our msb
1187 and that of the containing datum.
1188 Convert it to the distance from the lsb. */
1189 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1190
1191 /* Now BITNUM is always the distance between our lsb
1192 and that of OP0. */
1193
1194 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1195 we must first convert its mode to MODE. */
1196
1197 if (CONST_INT_P (value))
1198 {
1199 unsigned HOST_WIDE_INT v = UINTVAL (value);
1200
1201 if (bitsize < HOST_BITS_PER_WIDE_INT)
1202 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1203
1204 if (v == 0)
1205 all_zero = 1;
1206 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1207 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1208 || (bitsize == HOST_BITS_PER_WIDE_INT
1209 && v == HOST_WIDE_INT_M1U))
1210 all_one = 1;
1211
1212 value = lshift_value (mode, v, bitnum);
1213 }
1214 else
1215 {
1216 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1217 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1218
1219 if (value_mode != mode)
1220 value = convert_to_mode (mode, value, 1);
1221
1222 if (must_and)
1223 value = expand_binop (mode, and_optab, value,
1224 mask_rtx (mode, 0, bitsize, 0),
1225 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1226 if (bitnum > 0)
1227 value = expand_shift (LSHIFT_EXPR, mode, value,
1228 bitnum, NULL_RTX, 1);
1229 }
1230
1231 if (reverse)
1232 value = flip_storage_order (mode, value);
1233
1234 /* Now clear the chosen bits in OP0,
1235 except that if VALUE is -1 we need not bother. */
1236 /* We keep the intermediates in registers to allow CSE to combine
1237 consecutive bitfield assignments. */
1238
1239 temp = force_reg (mode, op0);
1240
1241 if (! all_one)
1242 {
1243 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1244 if (reverse)
1245 mask = flip_storage_order (mode, mask);
1246 temp = expand_binop (mode, and_optab, temp, mask,
1247 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1248 temp = force_reg (mode, temp);
1249 }
1250
1251 /* Now logical-or VALUE into OP0, unless it is zero. */
1252
1253 if (! all_zero)
1254 {
1255 temp = expand_binop (mode, ior_optab, temp, value,
1256 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1257 temp = force_reg (mode, temp);
1258 }
1259
1260 if (op0 != temp)
1261 {
1262 op0 = copy_rtx (op0);
1263 emit_move_insn (op0, temp);
1264 }
1265 }
1266 \f
1267 /* Store a bit field that is split across multiple accessible memory objects.
1268
1269 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1270 BITSIZE is the field width; BITPOS the position of its first bit
1271 (within the word).
1272 VALUE is the value to store, which has mode VALUE_MODE.
1273 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1274 a BLKmode MEM.
1275
1276 If REVERSE is true, the store is to be done in reverse order.
1277
1278 This does not yet handle fields wider than BITS_PER_WORD. */
1279
1280 static void
1281 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1282 unsigned HOST_WIDE_INT bitsize,
1283 unsigned HOST_WIDE_INT bitpos,
1284 unsigned HOST_WIDE_INT bitregion_start,
1285 unsigned HOST_WIDE_INT bitregion_end,
1286 rtx value, scalar_int_mode value_mode, bool reverse)
1287 {
1288 unsigned int unit, total_bits, bitsdone = 0;
1289
1290 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1291 much at a time. */
1292 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1293 unit = BITS_PER_WORD;
1294 else
1295 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1296
1297 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1298 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1299 again, and we will mutually recurse forever. */
1300 if (MEM_P (op0) && op0_mode.exists ())
1301 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1302
1303 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1304 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1305 that VALUE might be a floating-point constant. */
1306 if (CONSTANT_P (value) && !CONST_INT_P (value))
1307 {
1308 rtx word = gen_lowpart_common (word_mode, value);
1309
1310 if (word && (value != word))
1311 value = word;
1312 else
1313 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1314 value_mode = word_mode;
1315 }
1316
1317 total_bits = GET_MODE_BITSIZE (value_mode);
1318
1319 while (bitsdone < bitsize)
1320 {
1321 unsigned HOST_WIDE_INT thissize;
1322 unsigned HOST_WIDE_INT thispos;
1323 unsigned HOST_WIDE_INT offset;
1324 rtx part;
1325
1326 offset = (bitpos + bitsdone) / unit;
1327 thispos = (bitpos + bitsdone) % unit;
1328
1329 /* When region of bytes we can touch is restricted, decrease
1330 UNIT close to the end of the region as needed. If op0 is a REG
1331 or SUBREG of REG, don't do this, as there can't be data races
1332 on a register and we can expand shorter code in some cases. */
1333 if (bitregion_end
1334 && unit > BITS_PER_UNIT
1335 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1336 && !REG_P (op0)
1337 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1338 {
1339 unit = unit / 2;
1340 continue;
1341 }
1342
1343 /* THISSIZE must not overrun a word boundary. Otherwise,
1344 store_fixed_bit_field will call us again, and we will mutually
1345 recurse forever. */
1346 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1347 thissize = MIN (thissize, unit - thispos);
1348
1349 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1350 {
1351 /* Fetch successively less significant portions. */
1352 if (CONST_INT_P (value))
1353 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1354 >> (bitsize - bitsdone - thissize))
1355 & ((HOST_WIDE_INT_1 << thissize) - 1));
1356 /* Likewise, but the source is little-endian. */
1357 else if (reverse)
1358 part = extract_fixed_bit_field (word_mode, value, value_mode,
1359 thissize,
1360 bitsize - bitsdone - thissize,
1361 NULL_RTX, 1, false);
1362 else
1363 /* The args are chosen so that the last part includes the
1364 lsb. Give extract_bit_field the value it needs (with
1365 endianness compensation) to fetch the piece we want. */
1366 part = extract_fixed_bit_field (word_mode, value, value_mode,
1367 thissize,
1368 total_bits - bitsize + bitsdone,
1369 NULL_RTX, 1, false);
1370 }
1371 else
1372 {
1373 /* Fetch successively more significant portions. */
1374 if (CONST_INT_P (value))
1375 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1376 >> bitsdone)
1377 & ((HOST_WIDE_INT_1 << thissize) - 1));
1378 /* Likewise, but the source is big-endian. */
1379 else if (reverse)
1380 part = extract_fixed_bit_field (word_mode, value, value_mode,
1381 thissize,
1382 total_bits - bitsdone - thissize,
1383 NULL_RTX, 1, false);
1384 else
1385 part = extract_fixed_bit_field (word_mode, value, value_mode,
1386 thissize, bitsdone, NULL_RTX,
1387 1, false);
1388 }
1389
1390 /* If OP0 is a register, then handle OFFSET here. */
1391 rtx op0_piece = op0;
1392 opt_scalar_int_mode op0_piece_mode = op0_mode;
1393 if (SUBREG_P (op0) || REG_P (op0))
1394 {
1395 scalar_int_mode imode;
1396 if (op0_mode.exists (&imode)
1397 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1398 {
1399 if (offset)
1400 op0_piece = const0_rtx;
1401 }
1402 else
1403 {
1404 op0_piece = operand_subword_force (op0,
1405 offset * unit / BITS_PER_WORD,
1406 GET_MODE (op0));
1407 op0_piece_mode = word_mode;
1408 }
1409 offset &= BITS_PER_WORD / unit - 1;
1410 }
1411
1412 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1413 it is just an out-of-bounds access. Ignore it. */
1414 if (op0_piece != const0_rtx)
1415 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1416 offset * unit + thispos, bitregion_start,
1417 bitregion_end, part, word_mode, reverse);
1418 bitsdone += thissize;
1419 }
1420 }
1421 \f
1422 /* A subroutine of extract_bit_field_1 that converts return value X
1423 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1424 to extract_bit_field. */
1425
1426 static rtx
1427 convert_extracted_bit_field (rtx x, machine_mode mode,
1428 machine_mode tmode, bool unsignedp)
1429 {
1430 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1431 return x;
1432
1433 /* If the x mode is not a scalar integral, first convert to the
1434 integer mode of that size and then access it as a floating-point
1435 value via a SUBREG. */
1436 if (!SCALAR_INT_MODE_P (tmode))
1437 {
1438 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1439 x = convert_to_mode (int_mode, x, unsignedp);
1440 x = force_reg (int_mode, x);
1441 return gen_lowpart (tmode, x);
1442 }
1443
1444 return convert_to_mode (tmode, x, unsignedp);
1445 }
1446
1447 /* Try to use an ext(z)v pattern to extract a field from OP0.
1448 Return the extracted value on success, otherwise return null.
1449 EXTV describes the extraction instruction to use. If OP0_MODE
1450 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1451 The other arguments are as for extract_bit_field. */
1452
1453 static rtx
1454 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1455 opt_scalar_int_mode op0_mode,
1456 unsigned HOST_WIDE_INT bitsize,
1457 unsigned HOST_WIDE_INT bitnum,
1458 int unsignedp, rtx target,
1459 machine_mode mode, machine_mode tmode)
1460 {
1461 struct expand_operand ops[4];
1462 rtx spec_target = target;
1463 rtx spec_target_subreg = 0;
1464 scalar_int_mode ext_mode = extv->field_mode;
1465 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1466
1467 if (bitsize == 0 || unit < bitsize)
1468 return NULL_RTX;
1469
1470 if (MEM_P (op0))
1471 /* Get a reference to the first byte of the field. */
1472 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1473 &bitnum);
1474 else
1475 {
1476 /* Convert from counting within OP0 to counting in EXT_MODE. */
1477 if (BYTES_BIG_ENDIAN)
1478 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1479
1480 /* If op0 is a register, we need it in EXT_MODE to make it
1481 acceptable to the format of ext(z)v. */
1482 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1483 return NULL_RTX;
1484 if (REG_P (op0) && op0_mode.require () != ext_mode)
1485 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1486 }
1487
1488 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1489 "backwards" from the size of the unit we are extracting from.
1490 Otherwise, we count bits from the most significant on a
1491 BYTES/BITS_BIG_ENDIAN machine. */
1492
1493 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1494 bitnum = unit - bitsize - bitnum;
1495
1496 if (target == 0)
1497 target = spec_target = gen_reg_rtx (tmode);
1498
1499 if (GET_MODE (target) != ext_mode)
1500 {
1501 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1502 between the mode of the extraction (word_mode) and the target
1503 mode. Instead, create a temporary and use convert_move to set
1504 the target. */
1505 if (REG_P (target)
1506 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1507 {
1508 target = gen_lowpart (ext_mode, target);
1509 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1510 spec_target_subreg = target;
1511 }
1512 else
1513 target = gen_reg_rtx (ext_mode);
1514 }
1515
1516 create_output_operand (&ops[0], target, ext_mode);
1517 create_fixed_operand (&ops[1], op0);
1518 create_integer_operand (&ops[2], bitsize);
1519 create_integer_operand (&ops[3], bitnum);
1520 if (maybe_expand_insn (extv->icode, 4, ops))
1521 {
1522 target = ops[0].value;
1523 if (target == spec_target)
1524 return target;
1525 if (target == spec_target_subreg)
1526 return spec_target;
1527 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1528 }
1529 return NULL_RTX;
1530 }
1531
1532 /* A subroutine of extract_bit_field, with the same arguments.
1533 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1534 if we can find no other means of implementing the operation.
1535 if FALLBACK_P is false, return NULL instead. */
1536
1537 static rtx
1538 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1539 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1540 machine_mode mode, machine_mode tmode,
1541 bool reverse, bool fallback_p, rtx *alt_rtl)
1542 {
1543 rtx op0 = str_rtx;
1544 machine_mode mode1;
1545
1546 if (tmode == VOIDmode)
1547 tmode = mode;
1548
1549 while (GET_CODE (op0) == SUBREG)
1550 {
1551 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1552 op0 = SUBREG_REG (op0);
1553 }
1554
1555 /* If we have an out-of-bounds access to a register, just return an
1556 uninitialized register of the required mode. This can occur if the
1557 source code contains an out-of-bounds access to a small array. */
1558 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1559 return gen_reg_rtx (tmode);
1560
1561 if (REG_P (op0)
1562 && mode == GET_MODE (op0)
1563 && bitnum == 0
1564 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1565 {
1566 if (reverse)
1567 op0 = flip_storage_order (mode, op0);
1568 /* We're trying to extract a full register from itself. */
1569 return op0;
1570 }
1571
1572 /* First try to check for vector from vector extractions. */
1573 if (VECTOR_MODE_P (GET_MODE (op0))
1574 && !MEM_P (op0)
1575 && VECTOR_MODE_P (tmode)
1576 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1577 {
1578 machine_mode new_mode = GET_MODE (op0);
1579 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1580 {
1581 new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1582 GET_MODE_BITSIZE (GET_MODE (op0))
1583 / GET_MODE_UNIT_BITSIZE (tmode));
1584 if (!VECTOR_MODE_P (new_mode)
1585 || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1586 || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1587 || !targetm.vector_mode_supported_p (new_mode))
1588 new_mode = VOIDmode;
1589 }
1590 if (new_mode != VOIDmode
1591 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1592 != CODE_FOR_nothing)
1593 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1594 == bitnum / GET_MODE_BITSIZE (tmode)))
1595 {
1596 struct expand_operand ops[3];
1597 machine_mode outermode = new_mode;
1598 machine_mode innermode = tmode;
1599 enum insn_code icode
1600 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1601 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1602
1603 if (new_mode != GET_MODE (op0))
1604 op0 = gen_lowpart (new_mode, op0);
1605 create_output_operand (&ops[0], target, innermode);
1606 ops[0].target = 1;
1607 create_input_operand (&ops[1], op0, outermode);
1608 create_integer_operand (&ops[2], pos);
1609 if (maybe_expand_insn (icode, 3, ops))
1610 {
1611 if (alt_rtl && ops[0].target)
1612 *alt_rtl = target;
1613 target = ops[0].value;
1614 if (GET_MODE (target) != mode)
1615 return gen_lowpart (tmode, target);
1616 return target;
1617 }
1618 }
1619 }
1620
1621 /* See if we can get a better vector mode before extracting. */
1622 if (VECTOR_MODE_P (GET_MODE (op0))
1623 && !MEM_P (op0)
1624 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1625 {
1626 machine_mode new_mode;
1627
1628 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1629 new_mode = MIN_MODE_VECTOR_FLOAT;
1630 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1631 new_mode = MIN_MODE_VECTOR_FRACT;
1632 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1633 new_mode = MIN_MODE_VECTOR_UFRACT;
1634 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1635 new_mode = MIN_MODE_VECTOR_ACCUM;
1636 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1637 new_mode = MIN_MODE_VECTOR_UACCUM;
1638 else
1639 new_mode = MIN_MODE_VECTOR_INT;
1640
1641 FOR_EACH_MODE_FROM (new_mode, new_mode)
1642 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1643 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1644 && targetm.vector_mode_supported_p (new_mode))
1645 break;
1646 if (new_mode != VOIDmode)
1647 op0 = gen_lowpart (new_mode, op0);
1648 }
1649
1650 /* Use vec_extract patterns for extracting parts of vectors whenever
1651 available. */
1652 machine_mode outermode = GET_MODE (op0);
1653 scalar_mode innermode = GET_MODE_INNER (outermode);
1654 if (VECTOR_MODE_P (outermode)
1655 && !MEM_P (op0)
1656 && (convert_optab_handler (vec_extract_optab, outermode, innermode)
1657 != CODE_FOR_nothing)
1658 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode)
1659 == bitnum / GET_MODE_BITSIZE (innermode)))
1660 {
1661 struct expand_operand ops[3];
1662 enum insn_code icode
1663 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1664 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1665
1666 create_output_operand (&ops[0], target, innermode);
1667 ops[0].target = 1;
1668 create_input_operand (&ops[1], op0, outermode);
1669 create_integer_operand (&ops[2], pos);
1670 if (maybe_expand_insn (icode, 3, ops))
1671 {
1672 if (alt_rtl && ops[0].target)
1673 *alt_rtl = target;
1674 target = ops[0].value;
1675 if (GET_MODE (target) != mode)
1676 return gen_lowpart (tmode, target);
1677 return target;
1678 }
1679 }
1680
1681 /* Make sure we are playing with integral modes. Pun with subregs
1682 if we aren't. */
1683 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1684 scalar_int_mode imode;
1685 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1686 {
1687 if (MEM_P (op0))
1688 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1689 0, MEM_SIZE (op0));
1690 else if (op0_mode.exists (&imode))
1691 {
1692 op0 = gen_lowpart (imode, op0);
1693
1694 /* If we got a SUBREG, force it into a register since we
1695 aren't going to be able to do another SUBREG on it. */
1696 if (GET_CODE (op0) == SUBREG)
1697 op0 = force_reg (imode, op0);
1698 }
1699 else
1700 {
1701 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1702 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1703 emit_move_insn (mem, op0);
1704 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1705 }
1706 }
1707
1708 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1709 If that's wrong, the solution is to test for it and set TARGET to 0
1710 if needed. */
1711
1712 /* Get the mode of the field to use for atomic access or subreg
1713 conversion. */
1714 mode1 = mode;
1715 if (SCALAR_INT_MODE_P (tmode))
1716 {
1717 machine_mode try_mode = mode_for_size (bitsize,
1718 GET_MODE_CLASS (tmode), 0);
1719 if (try_mode != BLKmode)
1720 mode1 = try_mode;
1721 }
1722 gcc_assert (mode1 != BLKmode);
1723
1724 /* Extraction of a full MODE1 value can be done with a subreg as long
1725 as the least significant bit of the value is the least significant
1726 bit of either OP0 or a word of OP0. */
1727 if (!MEM_P (op0)
1728 && !reverse
1729 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
1730 && bitsize == GET_MODE_BITSIZE (mode1)
1731 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ()))
1732 {
1733 rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (),
1734 bitnum / BITS_PER_UNIT);
1735 if (sub)
1736 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1737 }
1738
1739 /* Extraction of a full MODE1 value can be done with a load as long as
1740 the field is on a byte boundary and is sufficiently aligned. */
1741 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1742 {
1743 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1744 if (reverse)
1745 op0 = flip_storage_order (mode1, op0);
1746 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1747 }
1748
1749 /* Handle fields bigger than a word. */
1750
1751 if (bitsize > BITS_PER_WORD)
1752 {
1753 /* Here we transfer the words of the field
1754 in the order least significant first.
1755 This is because the most significant word is the one which may
1756 be less than full. */
1757
1758 const bool backwards = WORDS_BIG_ENDIAN;
1759 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1760 unsigned int i;
1761 rtx_insn *last;
1762
1763 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1764 target = gen_reg_rtx (mode);
1765
1766 /* In case we're about to clobber a base register or something
1767 (see gcc.c-torture/execute/20040625-1.c). */
1768 if (reg_mentioned_p (target, str_rtx))
1769 target = gen_reg_rtx (mode);
1770
1771 /* Indicate for flow that the entire target reg is being set. */
1772 emit_clobber (target);
1773
1774 last = get_last_insn ();
1775 for (i = 0; i < nwords; i++)
1776 {
1777 /* If I is 0, use the low-order word in both field and target;
1778 if I is 1, use the next to lowest word; and so on. */
1779 /* Word number in TARGET to use. */
1780 unsigned int wordnum
1781 = (backwards
1782 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1783 : i);
1784 /* Offset from start of field in OP0. */
1785 unsigned int bit_offset = (backwards ^ reverse
1786 ? MAX ((int) bitsize - ((int) i + 1)
1787 * BITS_PER_WORD,
1788 0)
1789 : (int) i * BITS_PER_WORD);
1790 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1791 rtx result_part
1792 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1793 bitsize - i * BITS_PER_WORD),
1794 bitnum + bit_offset, 1, target_part,
1795 mode, word_mode, reverse, fallback_p, NULL);
1796
1797 gcc_assert (target_part);
1798 if (!result_part)
1799 {
1800 delete_insns_since (last);
1801 return NULL;
1802 }
1803
1804 if (result_part != target_part)
1805 emit_move_insn (target_part, result_part);
1806 }
1807
1808 if (unsignedp)
1809 {
1810 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1811 need to be zero'd out. */
1812 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1813 {
1814 unsigned int i, total_words;
1815
1816 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1817 for (i = nwords; i < total_words; i++)
1818 emit_move_insn
1819 (operand_subword (target,
1820 backwards ? total_words - i - 1 : i,
1821 1, VOIDmode),
1822 const0_rtx);
1823 }
1824 return target;
1825 }
1826
1827 /* Signed bit field: sign-extend with two arithmetic shifts. */
1828 target = expand_shift (LSHIFT_EXPR, mode, target,
1829 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1830 return expand_shift (RSHIFT_EXPR, mode, target,
1831 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1832 }
1833
1834 /* If OP0 is a multi-word register, narrow it to the affected word.
1835 If the region spans two words, defer to extract_split_bit_field. */
1836 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1837 {
1838 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1839 {
1840 if (!fallback_p)
1841 return NULL_RTX;
1842 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1843 unsignedp, reverse);
1844 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1845 }
1846 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1847 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1848 op0_mode = word_mode;
1849 bitnum %= BITS_PER_WORD;
1850 }
1851
1852 /* From here on we know the desired field is smaller than a word.
1853 If OP0 is a register, it too fits within a word. */
1854 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1855 extraction_insn extv;
1856 if (!MEM_P (op0)
1857 && !reverse
1858 /* ??? We could limit the structure size to the part of OP0 that
1859 contains the field, with appropriate checks for endianness
1860 and TRULY_NOOP_TRUNCATION. */
1861 && get_best_reg_extraction_insn (&extv, pattern,
1862 GET_MODE_BITSIZE (op0_mode.require ()),
1863 tmode))
1864 {
1865 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1866 bitsize, bitnum,
1867 unsignedp, target, mode,
1868 tmode);
1869 if (result)
1870 return result;
1871 }
1872
1873 /* If OP0 is a memory, try copying it to a register and seeing if a
1874 cheap register alternative is available. */
1875 if (MEM_P (op0) & !reverse)
1876 {
1877 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1878 tmode))
1879 {
1880 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1881 bitsize, bitnum,
1882 unsignedp, target, mode,
1883 tmode);
1884 if (result)
1885 return result;
1886 }
1887
1888 rtx_insn *last = get_last_insn ();
1889
1890 /* Try loading part of OP0 into a register and extracting the
1891 bitfield from that. */
1892 unsigned HOST_WIDE_INT bitpos;
1893 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1894 0, 0, tmode, &bitpos);
1895 if (xop0)
1896 {
1897 xop0 = copy_to_reg (xop0);
1898 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1899 unsignedp, target,
1900 mode, tmode, reverse, false, NULL);
1901 if (result)
1902 return result;
1903 delete_insns_since (last);
1904 }
1905 }
1906
1907 if (!fallback_p)
1908 return NULL;
1909
1910 /* Find a correspondingly-sized integer field, so we can apply
1911 shifts and masks to it. */
1912 scalar_int_mode int_mode;
1913 if (!int_mode_for_mode (tmode).exists (&int_mode))
1914 /* If this fails, we should probably push op0 out to memory and then
1915 do a load. */
1916 int_mode = int_mode_for_mode (mode).require ();
1917
1918 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
1919 bitnum, target, unsignedp, reverse);
1920
1921 /* Complex values must be reversed piecewise, so we need to undo the global
1922 reversal, convert to the complex mode and reverse again. */
1923 if (reverse && COMPLEX_MODE_P (tmode))
1924 {
1925 target = flip_storage_order (int_mode, target);
1926 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1927 target = flip_storage_order (tmode, target);
1928 }
1929 else
1930 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1931
1932 return target;
1933 }
1934
1935 /* Generate code to extract a byte-field from STR_RTX
1936 containing BITSIZE bits, starting at BITNUM,
1937 and put it in TARGET if possible (if TARGET is nonzero).
1938 Regardless of TARGET, we return the rtx for where the value is placed.
1939
1940 STR_RTX is the structure containing the byte (a REG or MEM).
1941 UNSIGNEDP is nonzero if this is an unsigned bit field.
1942 MODE is the natural mode of the field value once extracted.
1943 TMODE is the mode the caller would like the value to have;
1944 but the value may be returned with type MODE instead.
1945
1946 If REVERSE is true, the extraction is to be done in reverse order.
1947
1948 If a TARGET is specified and we can store in it at no extra cost,
1949 we do so, and return TARGET.
1950 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1951 if they are equally easy. */
1952
1953 rtx
1954 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1955 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1956 machine_mode mode, machine_mode tmode, bool reverse,
1957 rtx *alt_rtl)
1958 {
1959 machine_mode mode1;
1960
1961 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1962 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1963 mode1 = GET_MODE (str_rtx);
1964 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1965 mode1 = GET_MODE (target);
1966 else
1967 mode1 = tmode;
1968
1969 scalar_int_mode int_mode;
1970 if (is_a <scalar_int_mode> (mode1, &int_mode)
1971 && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0))
1972 {
1973 /* Extraction of a full INT_MODE value can be done with a simple load.
1974 We know here that the field can be accessed with one single
1975 instruction. For targets that support unaligned memory,
1976 an unaligned access may be necessary. */
1977 if (bitsize == GET_MODE_BITSIZE (int_mode))
1978 {
1979 rtx result = adjust_bitfield_address (str_rtx, int_mode,
1980 bitnum / BITS_PER_UNIT);
1981 if (reverse)
1982 result = flip_storage_order (int_mode, result);
1983 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1984 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1985 }
1986
1987 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1988 &bitnum);
1989 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1990 str_rtx = copy_to_reg (str_rtx);
1991 }
1992
1993 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1994 target, mode, tmode, reverse, true, alt_rtl);
1995 }
1996 \f
1997 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1998 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
1999 otherwise OP0 is a BLKmode MEM.
2000
2001 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2002 If REVERSE is true, the extraction is to be done in reverse order.
2003
2004 If TARGET is nonzero, attempts to store the value there
2005 and return TARGET, but this is not guaranteed.
2006 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2007
2008 static rtx
2009 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2010 opt_scalar_int_mode op0_mode,
2011 unsigned HOST_WIDE_INT bitsize,
2012 unsigned HOST_WIDE_INT bitnum, rtx target,
2013 int unsignedp, bool reverse)
2014 {
2015 scalar_int_mode mode;
2016 if (MEM_P (op0))
2017 {
2018 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2019 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2020 /* The only way this should occur is if the field spans word
2021 boundaries. */
2022 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2023 unsignedp, reverse);
2024
2025 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2026 }
2027 else
2028 mode = op0_mode.require ();
2029
2030 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2031 target, unsignedp, reverse);
2032 }
2033
2034 /* Helper function for extract_fixed_bit_field, extracts
2035 the bit field always using MODE, which is the mode of OP0.
2036 The other arguments are as for extract_fixed_bit_field. */
2037
2038 static rtx
2039 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2040 unsigned HOST_WIDE_INT bitsize,
2041 unsigned HOST_WIDE_INT bitnum, rtx target,
2042 int unsignedp, bool reverse)
2043 {
2044 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2045 for invalid input, such as extract equivalent of f5 from
2046 gcc.dg/pr48335-2.c. */
2047
2048 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2049 /* BITNUM is the distance between our msb and that of OP0.
2050 Convert it to the distance from the lsb. */
2051 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2052
2053 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2054 We have reduced the big-endian case to the little-endian case. */
2055 if (reverse)
2056 op0 = flip_storage_order (mode, op0);
2057
2058 if (unsignedp)
2059 {
2060 if (bitnum)
2061 {
2062 /* If the field does not already start at the lsb,
2063 shift it so it does. */
2064 /* Maybe propagate the target for the shift. */
2065 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2066 if (tmode != mode)
2067 subtarget = 0;
2068 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2069 }
2070 /* Convert the value to the desired mode. TMODE must also be a
2071 scalar integer for this conversion to make sense, since we
2072 shouldn't reinterpret the bits. */
2073 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2074 if (mode != new_mode)
2075 op0 = convert_to_mode (new_mode, op0, 1);
2076
2077 /* Unless the msb of the field used to be the msb when we shifted,
2078 mask out the upper bits. */
2079
2080 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2081 return expand_binop (new_mode, and_optab, op0,
2082 mask_rtx (new_mode, 0, bitsize, 0),
2083 target, 1, OPTAB_LIB_WIDEN);
2084 return op0;
2085 }
2086
2087 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2088 then arithmetic-shift its lsb to the lsb of the word. */
2089 op0 = force_reg (mode, op0);
2090
2091 /* Find the narrowest integer mode that contains the field. */
2092
2093 opt_scalar_int_mode mode_iter;
2094 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2095 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2096 break;
2097
2098 mode = mode_iter.require ();
2099 op0 = convert_to_mode (mode, op0, 0);
2100
2101 if (mode != tmode)
2102 target = 0;
2103
2104 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2105 {
2106 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2107 /* Maybe propagate the target for the shift. */
2108 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2109 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2110 }
2111
2112 return expand_shift (RSHIFT_EXPR, mode, op0,
2113 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2114 }
2115
2116 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2117 VALUE << BITPOS. */
2118
2119 static rtx
2120 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2121 int bitpos)
2122 {
2123 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2124 }
2125 \f
2126 /* Extract a bit field that is split across two words
2127 and return an RTX for the result.
2128
2129 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2130 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2131 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2132 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2133 a BLKmode MEM.
2134
2135 If REVERSE is true, the extraction is to be done in reverse order. */
2136
2137 static rtx
2138 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2139 unsigned HOST_WIDE_INT bitsize,
2140 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2141 bool reverse)
2142 {
2143 unsigned int unit;
2144 unsigned int bitsdone = 0;
2145 rtx result = NULL_RTX;
2146 int first = 1;
2147
2148 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2149 much at a time. */
2150 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2151 unit = BITS_PER_WORD;
2152 else
2153 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2154
2155 while (bitsdone < bitsize)
2156 {
2157 unsigned HOST_WIDE_INT thissize;
2158 rtx part;
2159 unsigned HOST_WIDE_INT thispos;
2160 unsigned HOST_WIDE_INT offset;
2161
2162 offset = (bitpos + bitsdone) / unit;
2163 thispos = (bitpos + bitsdone) % unit;
2164
2165 /* THISSIZE must not overrun a word boundary. Otherwise,
2166 extract_fixed_bit_field will call us again, and we will mutually
2167 recurse forever. */
2168 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2169 thissize = MIN (thissize, unit - thispos);
2170
2171 /* If OP0 is a register, then handle OFFSET here. */
2172 rtx op0_piece = op0;
2173 opt_scalar_int_mode op0_piece_mode = op0_mode;
2174 if (SUBREG_P (op0) || REG_P (op0))
2175 {
2176 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2177 op0_piece_mode = word_mode;
2178 offset = 0;
2179 }
2180
2181 /* Extract the parts in bit-counting order,
2182 whose meaning is determined by BYTES_PER_UNIT.
2183 OFFSET is in UNITs, and UNIT is in bits. */
2184 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2185 thissize, offset * unit + thispos,
2186 0, 1, reverse);
2187 bitsdone += thissize;
2188
2189 /* Shift this part into place for the result. */
2190 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2191 {
2192 if (bitsize != bitsdone)
2193 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2194 bitsize - bitsdone, 0, 1);
2195 }
2196 else
2197 {
2198 if (bitsdone != thissize)
2199 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2200 bitsdone - thissize, 0, 1);
2201 }
2202
2203 if (first)
2204 result = part;
2205 else
2206 /* Combine the parts with bitwise or. This works
2207 because we extracted each part as an unsigned bit field. */
2208 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2209 OPTAB_LIB_WIDEN);
2210
2211 first = 0;
2212 }
2213
2214 /* Unsigned bit field: we are done. */
2215 if (unsignedp)
2216 return result;
2217 /* Signed bit field: sign-extend with two arithmetic shifts. */
2218 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2219 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2220 return expand_shift (RSHIFT_EXPR, word_mode, result,
2221 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2222 }
2223 \f
2224 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2225 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2226 MODE, fill the upper bits with zeros. Fail if the layout of either
2227 mode is unknown (as for CC modes) or if the extraction would involve
2228 unprofitable mode punning. Return the value on success, otherwise
2229 return null.
2230
2231 This is different from gen_lowpart* in these respects:
2232
2233 - the returned value must always be considered an rvalue
2234
2235 - when MODE is wider than SRC_MODE, the extraction involves
2236 a zero extension
2237
2238 - when MODE is smaller than SRC_MODE, the extraction involves
2239 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2240
2241 In other words, this routine performs a computation, whereas the
2242 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2243 operations. */
2244
2245 rtx
2246 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2247 {
2248 scalar_int_mode int_mode, src_int_mode;
2249
2250 if (mode == src_mode)
2251 return src;
2252
2253 if (CONSTANT_P (src))
2254 {
2255 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2256 fails, it will happily create (subreg (symbol_ref)) or similar
2257 invalid SUBREGs. */
2258 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2259 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2260 if (ret)
2261 return ret;
2262
2263 if (GET_MODE (src) == VOIDmode
2264 || !validate_subreg (mode, src_mode, src, byte))
2265 return NULL_RTX;
2266
2267 src = force_reg (GET_MODE (src), src);
2268 return gen_rtx_SUBREG (mode, src, byte);
2269 }
2270
2271 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2272 return NULL_RTX;
2273
2274 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2275 && MODES_TIEABLE_P (mode, src_mode))
2276 {
2277 rtx x = gen_lowpart_common (mode, src);
2278 if (x)
2279 return x;
2280 }
2281
2282 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2283 || !int_mode_for_mode (mode).exists (&int_mode))
2284 return NULL_RTX;
2285
2286 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2287 return NULL_RTX;
2288 if (!MODES_TIEABLE_P (int_mode, mode))
2289 return NULL_RTX;
2290
2291 src = gen_lowpart (src_int_mode, src);
2292 src = convert_modes (int_mode, src_int_mode, src, true);
2293 src = gen_lowpart (mode, src);
2294 return src;
2295 }
2296 \f
2297 /* Add INC into TARGET. */
2298
2299 void
2300 expand_inc (rtx target, rtx inc)
2301 {
2302 rtx value = expand_binop (GET_MODE (target), add_optab,
2303 target, inc,
2304 target, 0, OPTAB_LIB_WIDEN);
2305 if (value != target)
2306 emit_move_insn (target, value);
2307 }
2308
2309 /* Subtract DEC from TARGET. */
2310
2311 void
2312 expand_dec (rtx target, rtx dec)
2313 {
2314 rtx value = expand_binop (GET_MODE (target), sub_optab,
2315 target, dec,
2316 target, 0, OPTAB_LIB_WIDEN);
2317 if (value != target)
2318 emit_move_insn (target, value);
2319 }
2320 \f
2321 /* Output a shift instruction for expression code CODE,
2322 with SHIFTED being the rtx for the value to shift,
2323 and AMOUNT the rtx for the amount to shift by.
2324 Store the result in the rtx TARGET, if that is convenient.
2325 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2326 Return the rtx for where the value is.
2327 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2328 in which case 0 is returned. */
2329
2330 static rtx
2331 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2332 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2333 {
2334 rtx op1, temp = 0;
2335 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2336 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2337 optab lshift_optab = ashl_optab;
2338 optab rshift_arith_optab = ashr_optab;
2339 optab rshift_uns_optab = lshr_optab;
2340 optab lrotate_optab = rotl_optab;
2341 optab rrotate_optab = rotr_optab;
2342 machine_mode op1_mode;
2343 machine_mode scalar_mode = mode;
2344 int attempt;
2345 bool speed = optimize_insn_for_speed_p ();
2346
2347 if (VECTOR_MODE_P (mode))
2348 scalar_mode = GET_MODE_INNER (mode);
2349 op1 = amount;
2350 op1_mode = GET_MODE (op1);
2351
2352 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2353 shift amount is a vector, use the vector/vector shift patterns. */
2354 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2355 {
2356 lshift_optab = vashl_optab;
2357 rshift_arith_optab = vashr_optab;
2358 rshift_uns_optab = vlshr_optab;
2359 lrotate_optab = vrotl_optab;
2360 rrotate_optab = vrotr_optab;
2361 }
2362
2363 /* Previously detected shift-counts computed by NEGATE_EXPR
2364 and shifted in the other direction; but that does not work
2365 on all machines. */
2366
2367 if (SHIFT_COUNT_TRUNCATED)
2368 {
2369 if (CONST_INT_P (op1)
2370 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2371 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2372 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2373 % GET_MODE_BITSIZE (scalar_mode));
2374 else if (GET_CODE (op1) == SUBREG
2375 && subreg_lowpart_p (op1)
2376 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2377 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2378 op1 = SUBREG_REG (op1);
2379 }
2380
2381 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2382 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2383 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2384 amount instead. */
2385 if (rotate
2386 && CONST_INT_P (op1)
2387 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2388 GET_MODE_BITSIZE (scalar_mode) - 1))
2389 {
2390 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2391 left = !left;
2392 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2393 }
2394
2395 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2396 Note that this is not the case for bigger values. For instance a rotation
2397 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2398 0x04030201 (bswapsi). */
2399 if (rotate
2400 && CONST_INT_P (op1)
2401 && INTVAL (op1) == BITS_PER_UNIT
2402 && GET_MODE_SIZE (scalar_mode) == 2
2403 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2404 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2405 unsignedp);
2406
2407 if (op1 == const0_rtx)
2408 return shifted;
2409
2410 /* Check whether its cheaper to implement a left shift by a constant
2411 bit count by a sequence of additions. */
2412 if (code == LSHIFT_EXPR
2413 && CONST_INT_P (op1)
2414 && INTVAL (op1) > 0
2415 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2416 && INTVAL (op1) < MAX_BITS_PER_WORD
2417 && (shift_cost (speed, mode, INTVAL (op1))
2418 > INTVAL (op1) * add_cost (speed, mode))
2419 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2420 {
2421 int i;
2422 for (i = 0; i < INTVAL (op1); i++)
2423 {
2424 temp = force_reg (mode, shifted);
2425 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2426 unsignedp, OPTAB_LIB_WIDEN);
2427 }
2428 return shifted;
2429 }
2430
2431 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2432 {
2433 enum optab_methods methods;
2434
2435 if (attempt == 0)
2436 methods = OPTAB_DIRECT;
2437 else if (attempt == 1)
2438 methods = OPTAB_WIDEN;
2439 else
2440 methods = OPTAB_LIB_WIDEN;
2441
2442 if (rotate)
2443 {
2444 /* Widening does not work for rotation. */
2445 if (methods == OPTAB_WIDEN)
2446 continue;
2447 else if (methods == OPTAB_LIB_WIDEN)
2448 {
2449 /* If we have been unable to open-code this by a rotation,
2450 do it as the IOR of two shifts. I.e., to rotate A
2451 by N bits, compute
2452 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2453 where C is the bitsize of A.
2454
2455 It is theoretically possible that the target machine might
2456 not be able to perform either shift and hence we would
2457 be making two libcalls rather than just the one for the
2458 shift (similarly if IOR could not be done). We will allow
2459 this extremely unlikely lossage to avoid complicating the
2460 code below. */
2461
2462 rtx subtarget = target == shifted ? 0 : target;
2463 rtx new_amount, other_amount;
2464 rtx temp1;
2465
2466 new_amount = op1;
2467 if (op1 == const0_rtx)
2468 return shifted;
2469 else if (CONST_INT_P (op1))
2470 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2471 - INTVAL (op1));
2472 else
2473 {
2474 other_amount
2475 = simplify_gen_unary (NEG, GET_MODE (op1),
2476 op1, GET_MODE (op1));
2477 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2478 other_amount
2479 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2480 gen_int_mode (mask, GET_MODE (op1)));
2481 }
2482
2483 shifted = force_reg (mode, shifted);
2484
2485 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2486 mode, shifted, new_amount, 0, 1);
2487 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2488 mode, shifted, other_amount,
2489 subtarget, 1);
2490 return expand_binop (mode, ior_optab, temp, temp1, target,
2491 unsignedp, methods);
2492 }
2493
2494 temp = expand_binop (mode,
2495 left ? lrotate_optab : rrotate_optab,
2496 shifted, op1, target, unsignedp, methods);
2497 }
2498 else if (unsignedp)
2499 temp = expand_binop (mode,
2500 left ? lshift_optab : rshift_uns_optab,
2501 shifted, op1, target, unsignedp, methods);
2502
2503 /* Do arithmetic shifts.
2504 Also, if we are going to widen the operand, we can just as well
2505 use an arithmetic right-shift instead of a logical one. */
2506 if (temp == 0 && ! rotate
2507 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2508 {
2509 enum optab_methods methods1 = methods;
2510
2511 /* If trying to widen a log shift to an arithmetic shift,
2512 don't accept an arithmetic shift of the same size. */
2513 if (unsignedp)
2514 methods1 = OPTAB_MUST_WIDEN;
2515
2516 /* Arithmetic shift */
2517
2518 temp = expand_binop (mode,
2519 left ? lshift_optab : rshift_arith_optab,
2520 shifted, op1, target, unsignedp, methods1);
2521 }
2522
2523 /* We used to try extzv here for logical right shifts, but that was
2524 only useful for one machine, the VAX, and caused poor code
2525 generation there for lshrdi3, so the code was deleted and a
2526 define_expand for lshrsi3 was added to vax.md. */
2527 }
2528
2529 gcc_assert (temp != NULL_RTX || may_fail);
2530 return temp;
2531 }
2532
2533 /* Output a shift instruction for expression code CODE,
2534 with SHIFTED being the rtx for the value to shift,
2535 and AMOUNT the amount to shift by.
2536 Store the result in the rtx TARGET, if that is convenient.
2537 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2538 Return the rtx for where the value is. */
2539
2540 rtx
2541 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2542 int amount, rtx target, int unsignedp)
2543 {
2544 return expand_shift_1 (code, mode,
2545 shifted, GEN_INT (amount), target, unsignedp);
2546 }
2547
2548 /* Likewise, but return 0 if that cannot be done. */
2549
2550 static rtx
2551 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2552 int amount, rtx target, int unsignedp)
2553 {
2554 return expand_shift_1 (code, mode,
2555 shifted, GEN_INT (amount), target, unsignedp, true);
2556 }
2557
2558 /* Output a shift instruction for expression code CODE,
2559 with SHIFTED being the rtx for the value to shift,
2560 and AMOUNT the tree for the amount to shift by.
2561 Store the result in the rtx TARGET, if that is convenient.
2562 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2563 Return the rtx for where the value is. */
2564
2565 rtx
2566 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2567 tree amount, rtx target, int unsignedp)
2568 {
2569 return expand_shift_1 (code, mode,
2570 shifted, expand_normal (amount), target, unsignedp);
2571 }
2572
2573 \f
2574 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2575 const struct mult_cost *, machine_mode mode);
2576 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2577 const struct algorithm *, enum mult_variant);
2578 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2579 static rtx extract_high_half (scalar_int_mode, rtx);
2580 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2581 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2582 int, int);
2583 /* Compute and return the best algorithm for multiplying by T.
2584 The algorithm must cost less than cost_limit
2585 If retval.cost >= COST_LIMIT, no algorithm was found and all
2586 other field of the returned struct are undefined.
2587 MODE is the machine mode of the multiplication. */
2588
2589 static void
2590 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2591 const struct mult_cost *cost_limit, machine_mode mode)
2592 {
2593 int m;
2594 struct algorithm *alg_in, *best_alg;
2595 struct mult_cost best_cost;
2596 struct mult_cost new_limit;
2597 int op_cost, op_latency;
2598 unsigned HOST_WIDE_INT orig_t = t;
2599 unsigned HOST_WIDE_INT q;
2600 int maxm, hash_index;
2601 bool cache_hit = false;
2602 enum alg_code cache_alg = alg_zero;
2603 bool speed = optimize_insn_for_speed_p ();
2604 scalar_int_mode imode;
2605 struct alg_hash_entry *entry_ptr;
2606
2607 /* Indicate that no algorithm is yet found. If no algorithm
2608 is found, this value will be returned and indicate failure. */
2609 alg_out->cost.cost = cost_limit->cost + 1;
2610 alg_out->cost.latency = cost_limit->latency + 1;
2611
2612 if (cost_limit->cost < 0
2613 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2614 return;
2615
2616 /* Be prepared for vector modes. */
2617 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2618
2619 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2620
2621 /* Restrict the bits of "t" to the multiplication's mode. */
2622 t &= GET_MODE_MASK (imode);
2623
2624 /* t == 1 can be done in zero cost. */
2625 if (t == 1)
2626 {
2627 alg_out->ops = 1;
2628 alg_out->cost.cost = 0;
2629 alg_out->cost.latency = 0;
2630 alg_out->op[0] = alg_m;
2631 return;
2632 }
2633
2634 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2635 fail now. */
2636 if (t == 0)
2637 {
2638 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2639 return;
2640 else
2641 {
2642 alg_out->ops = 1;
2643 alg_out->cost.cost = zero_cost (speed);
2644 alg_out->cost.latency = zero_cost (speed);
2645 alg_out->op[0] = alg_zero;
2646 return;
2647 }
2648 }
2649
2650 /* We'll be needing a couple extra algorithm structures now. */
2651
2652 alg_in = XALLOCA (struct algorithm);
2653 best_alg = XALLOCA (struct algorithm);
2654 best_cost = *cost_limit;
2655
2656 /* Compute the hash index. */
2657 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2658
2659 /* See if we already know what to do for T. */
2660 entry_ptr = alg_hash_entry_ptr (hash_index);
2661 if (entry_ptr->t == t
2662 && entry_ptr->mode == mode
2663 && entry_ptr->speed == speed
2664 && entry_ptr->alg != alg_unknown)
2665 {
2666 cache_alg = entry_ptr->alg;
2667
2668 if (cache_alg == alg_impossible)
2669 {
2670 /* The cache tells us that it's impossible to synthesize
2671 multiplication by T within entry_ptr->cost. */
2672 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2673 /* COST_LIMIT is at least as restrictive as the one
2674 recorded in the hash table, in which case we have no
2675 hope of synthesizing a multiplication. Just
2676 return. */
2677 return;
2678
2679 /* If we get here, COST_LIMIT is less restrictive than the
2680 one recorded in the hash table, so we may be able to
2681 synthesize a multiplication. Proceed as if we didn't
2682 have the cache entry. */
2683 }
2684 else
2685 {
2686 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2687 /* The cached algorithm shows that this multiplication
2688 requires more cost than COST_LIMIT. Just return. This
2689 way, we don't clobber this cache entry with
2690 alg_impossible but retain useful information. */
2691 return;
2692
2693 cache_hit = true;
2694
2695 switch (cache_alg)
2696 {
2697 case alg_shift:
2698 goto do_alg_shift;
2699
2700 case alg_add_t_m2:
2701 case alg_sub_t_m2:
2702 goto do_alg_addsub_t_m2;
2703
2704 case alg_add_factor:
2705 case alg_sub_factor:
2706 goto do_alg_addsub_factor;
2707
2708 case alg_add_t2_m:
2709 goto do_alg_add_t2_m;
2710
2711 case alg_sub_t2_m:
2712 goto do_alg_sub_t2_m;
2713
2714 default:
2715 gcc_unreachable ();
2716 }
2717 }
2718 }
2719
2720 /* If we have a group of zero bits at the low-order part of T, try
2721 multiplying by the remaining bits and then doing a shift. */
2722
2723 if ((t & 1) == 0)
2724 {
2725 do_alg_shift:
2726 m = ctz_or_zero (t); /* m = number of low zero bits */
2727 if (m < maxm)
2728 {
2729 q = t >> m;
2730 /* The function expand_shift will choose between a shift and
2731 a sequence of additions, so the observed cost is given as
2732 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2733 op_cost = m * add_cost (speed, mode);
2734 if (shift_cost (speed, mode, m) < op_cost)
2735 op_cost = shift_cost (speed, mode, m);
2736 new_limit.cost = best_cost.cost - op_cost;
2737 new_limit.latency = best_cost.latency - op_cost;
2738 synth_mult (alg_in, q, &new_limit, mode);
2739
2740 alg_in->cost.cost += op_cost;
2741 alg_in->cost.latency += op_cost;
2742 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2743 {
2744 best_cost = alg_in->cost;
2745 std::swap (alg_in, best_alg);
2746 best_alg->log[best_alg->ops] = m;
2747 best_alg->op[best_alg->ops] = alg_shift;
2748 }
2749
2750 /* See if treating ORIG_T as a signed number yields a better
2751 sequence. Try this sequence only for a negative ORIG_T
2752 as it would be useless for a non-negative ORIG_T. */
2753 if ((HOST_WIDE_INT) orig_t < 0)
2754 {
2755 /* Shift ORIG_T as follows because a right shift of a
2756 negative-valued signed type is implementation
2757 defined. */
2758 q = ~(~orig_t >> m);
2759 /* The function expand_shift will choose between a shift
2760 and a sequence of additions, so the observed cost is
2761 given as MIN (m * add_cost(speed, mode),
2762 shift_cost(speed, mode, m)). */
2763 op_cost = m * add_cost (speed, mode);
2764 if (shift_cost (speed, mode, m) < op_cost)
2765 op_cost = shift_cost (speed, mode, m);
2766 new_limit.cost = best_cost.cost - op_cost;
2767 new_limit.latency = best_cost.latency - op_cost;
2768 synth_mult (alg_in, q, &new_limit, mode);
2769
2770 alg_in->cost.cost += op_cost;
2771 alg_in->cost.latency += op_cost;
2772 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2773 {
2774 best_cost = alg_in->cost;
2775 std::swap (alg_in, best_alg);
2776 best_alg->log[best_alg->ops] = m;
2777 best_alg->op[best_alg->ops] = alg_shift;
2778 }
2779 }
2780 }
2781 if (cache_hit)
2782 goto done;
2783 }
2784
2785 /* If we have an odd number, add or subtract one. */
2786 if ((t & 1) != 0)
2787 {
2788 unsigned HOST_WIDE_INT w;
2789
2790 do_alg_addsub_t_m2:
2791 for (w = 1; (w & t) != 0; w <<= 1)
2792 ;
2793 /* If T was -1, then W will be zero after the loop. This is another
2794 case where T ends with ...111. Handling this with (T + 1) and
2795 subtract 1 produces slightly better code and results in algorithm
2796 selection much faster than treating it like the ...0111 case
2797 below. */
2798 if (w == 0
2799 || (w > 2
2800 /* Reject the case where t is 3.
2801 Thus we prefer addition in that case. */
2802 && t != 3))
2803 {
2804 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2805
2806 op_cost = add_cost (speed, mode);
2807 new_limit.cost = best_cost.cost - op_cost;
2808 new_limit.latency = best_cost.latency - op_cost;
2809 synth_mult (alg_in, t + 1, &new_limit, mode);
2810
2811 alg_in->cost.cost += op_cost;
2812 alg_in->cost.latency += op_cost;
2813 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2814 {
2815 best_cost = alg_in->cost;
2816 std::swap (alg_in, best_alg);
2817 best_alg->log[best_alg->ops] = 0;
2818 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2819 }
2820 }
2821 else
2822 {
2823 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2824
2825 op_cost = add_cost (speed, mode);
2826 new_limit.cost = best_cost.cost - op_cost;
2827 new_limit.latency = best_cost.latency - op_cost;
2828 synth_mult (alg_in, t - 1, &new_limit, mode);
2829
2830 alg_in->cost.cost += op_cost;
2831 alg_in->cost.latency += op_cost;
2832 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2833 {
2834 best_cost = alg_in->cost;
2835 std::swap (alg_in, best_alg);
2836 best_alg->log[best_alg->ops] = 0;
2837 best_alg->op[best_alg->ops] = alg_add_t_m2;
2838 }
2839 }
2840
2841 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2842 quickly with a - a * n for some appropriate constant n. */
2843 m = exact_log2 (-orig_t + 1);
2844 if (m >= 0 && m < maxm)
2845 {
2846 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2847 /* If the target has a cheap shift-and-subtract insn use
2848 that in preference to a shift insn followed by a sub insn.
2849 Assume that the shift-and-sub is "atomic" with a latency
2850 equal to it's cost, otherwise assume that on superscalar
2851 hardware the shift may be executed concurrently with the
2852 earlier steps in the algorithm. */
2853 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2854 {
2855 op_cost = shiftsub1_cost (speed, mode, m);
2856 op_latency = op_cost;
2857 }
2858 else
2859 op_latency = add_cost (speed, mode);
2860
2861 new_limit.cost = best_cost.cost - op_cost;
2862 new_limit.latency = best_cost.latency - op_latency;
2863 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2864 &new_limit, mode);
2865
2866 alg_in->cost.cost += op_cost;
2867 alg_in->cost.latency += op_latency;
2868 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2869 {
2870 best_cost = alg_in->cost;
2871 std::swap (alg_in, best_alg);
2872 best_alg->log[best_alg->ops] = m;
2873 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2874 }
2875 }
2876
2877 if (cache_hit)
2878 goto done;
2879 }
2880
2881 /* Look for factors of t of the form
2882 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2883 If we find such a factor, we can multiply by t using an algorithm that
2884 multiplies by q, shift the result by m and add/subtract it to itself.
2885
2886 We search for large factors first and loop down, even if large factors
2887 are less probable than small; if we find a large factor we will find a
2888 good sequence quickly, and therefore be able to prune (by decreasing
2889 COST_LIMIT) the search. */
2890
2891 do_alg_addsub_factor:
2892 for (m = floor_log2 (t - 1); m >= 2; m--)
2893 {
2894 unsigned HOST_WIDE_INT d;
2895
2896 d = (HOST_WIDE_INT_1U << m) + 1;
2897 if (t % d == 0 && t > d && m < maxm
2898 && (!cache_hit || cache_alg == alg_add_factor))
2899 {
2900 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2901 if (shiftadd_cost (speed, mode, m) <= op_cost)
2902 op_cost = shiftadd_cost (speed, mode, m);
2903
2904 op_latency = op_cost;
2905
2906
2907 new_limit.cost = best_cost.cost - op_cost;
2908 new_limit.latency = best_cost.latency - op_latency;
2909 synth_mult (alg_in, t / d, &new_limit, mode);
2910
2911 alg_in->cost.cost += op_cost;
2912 alg_in->cost.latency += op_latency;
2913 if (alg_in->cost.latency < op_cost)
2914 alg_in->cost.latency = op_cost;
2915 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2916 {
2917 best_cost = alg_in->cost;
2918 std::swap (alg_in, best_alg);
2919 best_alg->log[best_alg->ops] = m;
2920 best_alg->op[best_alg->ops] = alg_add_factor;
2921 }
2922 /* Other factors will have been taken care of in the recursion. */
2923 break;
2924 }
2925
2926 d = (HOST_WIDE_INT_1U << m) - 1;
2927 if (t % d == 0 && t > d && m < maxm
2928 && (!cache_hit || cache_alg == alg_sub_factor))
2929 {
2930 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2931 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2932 op_cost = shiftsub0_cost (speed, mode, m);
2933
2934 op_latency = op_cost;
2935
2936 new_limit.cost = best_cost.cost - op_cost;
2937 new_limit.latency = best_cost.latency - op_latency;
2938 synth_mult (alg_in, t / d, &new_limit, mode);
2939
2940 alg_in->cost.cost += op_cost;
2941 alg_in->cost.latency += op_latency;
2942 if (alg_in->cost.latency < op_cost)
2943 alg_in->cost.latency = op_cost;
2944 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2945 {
2946 best_cost = alg_in->cost;
2947 std::swap (alg_in, best_alg);
2948 best_alg->log[best_alg->ops] = m;
2949 best_alg->op[best_alg->ops] = alg_sub_factor;
2950 }
2951 break;
2952 }
2953 }
2954 if (cache_hit)
2955 goto done;
2956
2957 /* Try shift-and-add (load effective address) instructions,
2958 i.e. do a*3, a*5, a*9. */
2959 if ((t & 1) != 0)
2960 {
2961 do_alg_add_t2_m:
2962 q = t - 1;
2963 m = ctz_hwi (q);
2964 if (q && m < maxm)
2965 {
2966 op_cost = shiftadd_cost (speed, mode, m);
2967 new_limit.cost = best_cost.cost - op_cost;
2968 new_limit.latency = best_cost.latency - op_cost;
2969 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2970
2971 alg_in->cost.cost += op_cost;
2972 alg_in->cost.latency += op_cost;
2973 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2974 {
2975 best_cost = alg_in->cost;
2976 std::swap (alg_in, best_alg);
2977 best_alg->log[best_alg->ops] = m;
2978 best_alg->op[best_alg->ops] = alg_add_t2_m;
2979 }
2980 }
2981 if (cache_hit)
2982 goto done;
2983
2984 do_alg_sub_t2_m:
2985 q = t + 1;
2986 m = ctz_hwi (q);
2987 if (q && m < maxm)
2988 {
2989 op_cost = shiftsub0_cost (speed, mode, m);
2990 new_limit.cost = best_cost.cost - op_cost;
2991 new_limit.latency = best_cost.latency - op_cost;
2992 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2993
2994 alg_in->cost.cost += op_cost;
2995 alg_in->cost.latency += op_cost;
2996 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2997 {
2998 best_cost = alg_in->cost;
2999 std::swap (alg_in, best_alg);
3000 best_alg->log[best_alg->ops] = m;
3001 best_alg->op[best_alg->ops] = alg_sub_t2_m;
3002 }
3003 }
3004 if (cache_hit)
3005 goto done;
3006 }
3007
3008 done:
3009 /* If best_cost has not decreased, we have not found any algorithm. */
3010 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3011 {
3012 /* We failed to find an algorithm. Record alg_impossible for
3013 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3014 we are asked to find an algorithm for T within the same or
3015 lower COST_LIMIT, we can immediately return to the
3016 caller. */
3017 entry_ptr->t = t;
3018 entry_ptr->mode = mode;
3019 entry_ptr->speed = speed;
3020 entry_ptr->alg = alg_impossible;
3021 entry_ptr->cost = *cost_limit;
3022 return;
3023 }
3024
3025 /* Cache the result. */
3026 if (!cache_hit)
3027 {
3028 entry_ptr->t = t;
3029 entry_ptr->mode = mode;
3030 entry_ptr->speed = speed;
3031 entry_ptr->alg = best_alg->op[best_alg->ops];
3032 entry_ptr->cost.cost = best_cost.cost;
3033 entry_ptr->cost.latency = best_cost.latency;
3034 }
3035
3036 /* If we are getting a too long sequence for `struct algorithm'
3037 to record, make this search fail. */
3038 if (best_alg->ops == MAX_BITS_PER_WORD)
3039 return;
3040
3041 /* Copy the algorithm from temporary space to the space at alg_out.
3042 We avoid using structure assignment because the majority of
3043 best_alg is normally undefined, and this is a critical function. */
3044 alg_out->ops = best_alg->ops + 1;
3045 alg_out->cost = best_cost;
3046 memcpy (alg_out->op, best_alg->op,
3047 alg_out->ops * sizeof *alg_out->op);
3048 memcpy (alg_out->log, best_alg->log,
3049 alg_out->ops * sizeof *alg_out->log);
3050 }
3051 \f
3052 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3053 Try three variations:
3054
3055 - a shift/add sequence based on VAL itself
3056 - a shift/add sequence based on -VAL, followed by a negation
3057 - a shift/add sequence based on VAL - 1, followed by an addition.
3058
3059 Return true if the cheapest of these cost less than MULT_COST,
3060 describing the algorithm in *ALG and final fixup in *VARIANT. */
3061
3062 bool
3063 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3064 struct algorithm *alg, enum mult_variant *variant,
3065 int mult_cost)
3066 {
3067 struct algorithm alg2;
3068 struct mult_cost limit;
3069 int op_cost;
3070 bool speed = optimize_insn_for_speed_p ();
3071
3072 /* Fail quickly for impossible bounds. */
3073 if (mult_cost < 0)
3074 return false;
3075
3076 /* Ensure that mult_cost provides a reasonable upper bound.
3077 Any constant multiplication can be performed with less
3078 than 2 * bits additions. */
3079 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3080 if (mult_cost > op_cost)
3081 mult_cost = op_cost;
3082
3083 *variant = basic_variant;
3084 limit.cost = mult_cost;
3085 limit.latency = mult_cost;
3086 synth_mult (alg, val, &limit, mode);
3087
3088 /* This works only if the inverted value actually fits in an
3089 `unsigned int' */
3090 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3091 {
3092 op_cost = neg_cost (speed, mode);
3093 if (MULT_COST_LESS (&alg->cost, mult_cost))
3094 {
3095 limit.cost = alg->cost.cost - op_cost;
3096 limit.latency = alg->cost.latency - op_cost;
3097 }
3098 else
3099 {
3100 limit.cost = mult_cost - op_cost;
3101 limit.latency = mult_cost - op_cost;
3102 }
3103
3104 synth_mult (&alg2, -val, &limit, mode);
3105 alg2.cost.cost += op_cost;
3106 alg2.cost.latency += op_cost;
3107 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3108 *alg = alg2, *variant = negate_variant;
3109 }
3110
3111 /* This proves very useful for division-by-constant. */
3112 op_cost = add_cost (speed, mode);
3113 if (MULT_COST_LESS (&alg->cost, mult_cost))
3114 {
3115 limit.cost = alg->cost.cost - op_cost;
3116 limit.latency = alg->cost.latency - op_cost;
3117 }
3118 else
3119 {
3120 limit.cost = mult_cost - op_cost;
3121 limit.latency = mult_cost - op_cost;
3122 }
3123
3124 synth_mult (&alg2, val - 1, &limit, mode);
3125 alg2.cost.cost += op_cost;
3126 alg2.cost.latency += op_cost;
3127 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3128 *alg = alg2, *variant = add_variant;
3129
3130 return MULT_COST_LESS (&alg->cost, mult_cost);
3131 }
3132
3133 /* A subroutine of expand_mult, used for constant multiplications.
3134 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3135 convenient. Use the shift/add sequence described by ALG and apply
3136 the final fixup specified by VARIANT. */
3137
3138 static rtx
3139 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3140 rtx target, const struct algorithm *alg,
3141 enum mult_variant variant)
3142 {
3143 unsigned HOST_WIDE_INT val_so_far;
3144 rtx_insn *insn;
3145 rtx accum, tem;
3146 int opno;
3147 machine_mode nmode;
3148
3149 /* Avoid referencing memory over and over and invalid sharing
3150 on SUBREGs. */
3151 op0 = force_reg (mode, op0);
3152
3153 /* ACCUM starts out either as OP0 or as a zero, depending on
3154 the first operation. */
3155
3156 if (alg->op[0] == alg_zero)
3157 {
3158 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3159 val_so_far = 0;
3160 }
3161 else if (alg->op[0] == alg_m)
3162 {
3163 accum = copy_to_mode_reg (mode, op0);
3164 val_so_far = 1;
3165 }
3166 else
3167 gcc_unreachable ();
3168
3169 for (opno = 1; opno < alg->ops; opno++)
3170 {
3171 int log = alg->log[opno];
3172 rtx shift_subtarget = optimize ? 0 : accum;
3173 rtx add_target
3174 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3175 && !optimize)
3176 ? target : 0;
3177 rtx accum_target = optimize ? 0 : accum;
3178 rtx accum_inner;
3179
3180 switch (alg->op[opno])
3181 {
3182 case alg_shift:
3183 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3184 /* REG_EQUAL note will be attached to the following insn. */
3185 emit_move_insn (accum, tem);
3186 val_so_far <<= log;
3187 break;
3188
3189 case alg_add_t_m2:
3190 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3191 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3192 add_target ? add_target : accum_target);
3193 val_so_far += HOST_WIDE_INT_1U << log;
3194 break;
3195
3196 case alg_sub_t_m2:
3197 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3198 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3199 add_target ? add_target : accum_target);
3200 val_so_far -= HOST_WIDE_INT_1U << log;
3201 break;
3202
3203 case alg_add_t2_m:
3204 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3205 log, shift_subtarget, 0);
3206 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3207 add_target ? add_target : accum_target);
3208 val_so_far = (val_so_far << log) + 1;
3209 break;
3210
3211 case alg_sub_t2_m:
3212 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3213 log, shift_subtarget, 0);
3214 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3215 add_target ? add_target : accum_target);
3216 val_so_far = (val_so_far << log) - 1;
3217 break;
3218
3219 case alg_add_factor:
3220 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3221 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3222 add_target ? add_target : accum_target);
3223 val_so_far += val_so_far << log;
3224 break;
3225
3226 case alg_sub_factor:
3227 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3228 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3229 (add_target
3230 ? add_target : (optimize ? 0 : tem)));
3231 val_so_far = (val_so_far << log) - val_so_far;
3232 break;
3233
3234 default:
3235 gcc_unreachable ();
3236 }
3237
3238 if (SCALAR_INT_MODE_P (mode))
3239 {
3240 /* Write a REG_EQUAL note on the last insn so that we can cse
3241 multiplication sequences. Note that if ACCUM is a SUBREG,
3242 we've set the inner register and must properly indicate that. */
3243 tem = op0, nmode = mode;
3244 accum_inner = accum;
3245 if (GET_CODE (accum) == SUBREG)
3246 {
3247 accum_inner = SUBREG_REG (accum);
3248 nmode = GET_MODE (accum_inner);
3249 tem = gen_lowpart (nmode, op0);
3250 }
3251
3252 insn = get_last_insn ();
3253 set_dst_reg_note (insn, REG_EQUAL,
3254 gen_rtx_MULT (nmode, tem,
3255 gen_int_mode (val_so_far, nmode)),
3256 accum_inner);
3257 }
3258 }
3259
3260 if (variant == negate_variant)
3261 {
3262 val_so_far = -val_so_far;
3263 accum = expand_unop (mode, neg_optab, accum, target, 0);
3264 }
3265 else if (variant == add_variant)
3266 {
3267 val_so_far = val_so_far + 1;
3268 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3269 }
3270
3271 /* Compare only the bits of val and val_so_far that are significant
3272 in the result mode, to avoid sign-/zero-extension confusion. */
3273 nmode = GET_MODE_INNER (mode);
3274 val &= GET_MODE_MASK (nmode);
3275 val_so_far &= GET_MODE_MASK (nmode);
3276 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3277
3278 return accum;
3279 }
3280
3281 /* Perform a multiplication and return an rtx for the result.
3282 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3283 TARGET is a suggestion for where to store the result (an rtx).
3284
3285 We check specially for a constant integer as OP1.
3286 If you want this check for OP0 as well, then before calling
3287 you should swap the two operands if OP0 would be constant. */
3288
3289 rtx
3290 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3291 int unsignedp)
3292 {
3293 enum mult_variant variant;
3294 struct algorithm algorithm;
3295 rtx scalar_op1;
3296 int max_cost;
3297 bool speed = optimize_insn_for_speed_p ();
3298 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3299
3300 if (CONSTANT_P (op0))
3301 std::swap (op0, op1);
3302
3303 /* For vectors, there are several simplifications that can be made if
3304 all elements of the vector constant are identical. */
3305 scalar_op1 = unwrap_const_vec_duplicate (op1);
3306
3307 if (INTEGRAL_MODE_P (mode))
3308 {
3309 rtx fake_reg;
3310 HOST_WIDE_INT coeff;
3311 bool is_neg;
3312 int mode_bitsize;
3313
3314 if (op1 == CONST0_RTX (mode))
3315 return op1;
3316 if (op1 == CONST1_RTX (mode))
3317 return op0;
3318 if (op1 == CONSTM1_RTX (mode))
3319 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3320 op0, target, 0);
3321
3322 if (do_trapv)
3323 goto skip_synth;
3324
3325 /* If mode is integer vector mode, check if the backend supports
3326 vector lshift (by scalar or vector) at all. If not, we can't use
3327 synthetized multiply. */
3328 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3329 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3330 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3331 goto skip_synth;
3332
3333 /* These are the operations that are potentially turned into
3334 a sequence of shifts and additions. */
3335 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3336
3337 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3338 less than or equal in size to `unsigned int' this doesn't matter.
3339 If the mode is larger than `unsigned int', then synth_mult works
3340 only if the constant value exactly fits in an `unsigned int' without
3341 any truncation. This means that multiplying by negative values does
3342 not work; results are off by 2^32 on a 32 bit machine. */
3343 if (CONST_INT_P (scalar_op1))
3344 {
3345 coeff = INTVAL (scalar_op1);
3346 is_neg = coeff < 0;
3347 }
3348 #if TARGET_SUPPORTS_WIDE_INT
3349 else if (CONST_WIDE_INT_P (scalar_op1))
3350 #else
3351 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3352 #endif
3353 {
3354 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3355 /* Perfect power of 2 (other than 1, which is handled above). */
3356 if (shift > 0)
3357 return expand_shift (LSHIFT_EXPR, mode, op0,
3358 shift, target, unsignedp);
3359 else
3360 goto skip_synth;
3361 }
3362 else
3363 goto skip_synth;
3364
3365 /* We used to test optimize here, on the grounds that it's better to
3366 produce a smaller program when -O is not used. But this causes
3367 such a terrible slowdown sometimes that it seems better to always
3368 use synth_mult. */
3369
3370 /* Special case powers of two. */
3371 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3372 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3373 return expand_shift (LSHIFT_EXPR, mode, op0,
3374 floor_log2 (coeff), target, unsignedp);
3375
3376 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3377
3378 /* Attempt to handle multiplication of DImode values by negative
3379 coefficients, by performing the multiplication by a positive
3380 multiplier and then inverting the result. */
3381 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3382 {
3383 /* Its safe to use -coeff even for INT_MIN, as the
3384 result is interpreted as an unsigned coefficient.
3385 Exclude cost of op0 from max_cost to match the cost
3386 calculation of the synth_mult. */
3387 coeff = -(unsigned HOST_WIDE_INT) coeff;
3388 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3389 mode, speed)
3390 - neg_cost (speed, mode));
3391 if (max_cost <= 0)
3392 goto skip_synth;
3393
3394 /* Special case powers of two. */
3395 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3396 {
3397 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3398 floor_log2 (coeff), target, unsignedp);
3399 return expand_unop (mode, neg_optab, temp, target, 0);
3400 }
3401
3402 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3403 max_cost))
3404 {
3405 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3406 &algorithm, variant);
3407 return expand_unop (mode, neg_optab, temp, target, 0);
3408 }
3409 goto skip_synth;
3410 }
3411
3412 /* Exclude cost of op0 from max_cost to match the cost
3413 calculation of the synth_mult. */
3414 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3415 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3416 return expand_mult_const (mode, op0, coeff, target,
3417 &algorithm, variant);
3418 }
3419 skip_synth:
3420
3421 /* Expand x*2.0 as x+x. */
3422 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3423 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3424 {
3425 op0 = force_reg (GET_MODE (op0), op0);
3426 return expand_binop (mode, add_optab, op0, op0,
3427 target, unsignedp, OPTAB_LIB_WIDEN);
3428 }
3429
3430 /* This used to use umul_optab if unsigned, but for non-widening multiply
3431 there is no difference between signed and unsigned. */
3432 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3433 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3434 gcc_assert (op0);
3435 return op0;
3436 }
3437
3438 /* Return a cost estimate for multiplying a register by the given
3439 COEFFicient in the given MODE and SPEED. */
3440
3441 int
3442 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3443 {
3444 int max_cost;
3445 struct algorithm algorithm;
3446 enum mult_variant variant;
3447
3448 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3449 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3450 mode, speed);
3451 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3452 return algorithm.cost.cost;
3453 else
3454 return max_cost;
3455 }
3456
3457 /* Perform a widening multiplication and return an rtx for the result.
3458 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3459 TARGET is a suggestion for where to store the result (an rtx).
3460 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3461 or smul_widen_optab.
3462
3463 We check specially for a constant integer as OP1, comparing the
3464 cost of a widening multiply against the cost of a sequence of shifts
3465 and adds. */
3466
3467 rtx
3468 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3469 int unsignedp, optab this_optab)
3470 {
3471 bool speed = optimize_insn_for_speed_p ();
3472 rtx cop1;
3473
3474 if (CONST_INT_P (op1)
3475 && GET_MODE (op0) != VOIDmode
3476 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3477 this_optab == umul_widen_optab))
3478 && CONST_INT_P (cop1)
3479 && (INTVAL (cop1) >= 0
3480 || HWI_COMPUTABLE_MODE_P (mode)))
3481 {
3482 HOST_WIDE_INT coeff = INTVAL (cop1);
3483 int max_cost;
3484 enum mult_variant variant;
3485 struct algorithm algorithm;
3486
3487 if (coeff == 0)
3488 return CONST0_RTX (mode);
3489
3490 /* Special case powers of two. */
3491 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3492 {
3493 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3494 return expand_shift (LSHIFT_EXPR, mode, op0,
3495 floor_log2 (coeff), target, unsignedp);
3496 }
3497
3498 /* Exclude cost of op0 from max_cost to match the cost
3499 calculation of the synth_mult. */
3500 max_cost = mul_widen_cost (speed, mode);
3501 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3502 max_cost))
3503 {
3504 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3505 return expand_mult_const (mode, op0, coeff, target,
3506 &algorithm, variant);
3507 }
3508 }
3509 return expand_binop (mode, this_optab, op0, op1, target,
3510 unsignedp, OPTAB_LIB_WIDEN);
3511 }
3512 \f
3513 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3514 replace division by D, and put the least significant N bits of the result
3515 in *MULTIPLIER_PTR and return the most significant bit.
3516
3517 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3518 needed precision is in PRECISION (should be <= N).
3519
3520 PRECISION should be as small as possible so this function can choose
3521 multiplier more freely.
3522
3523 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3524 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3525
3526 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3527 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3528
3529 unsigned HOST_WIDE_INT
3530 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3531 unsigned HOST_WIDE_INT *multiplier_ptr,
3532 int *post_shift_ptr, int *lgup_ptr)
3533 {
3534 int lgup, post_shift;
3535 int pow, pow2;
3536
3537 /* lgup = ceil(log2(divisor)); */
3538 lgup = ceil_log2 (d);
3539
3540 gcc_assert (lgup <= n);
3541
3542 pow = n + lgup;
3543 pow2 = n + lgup - precision;
3544
3545 /* mlow = 2^(N + lgup)/d */
3546 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3547 wide_int mlow = wi::udiv_trunc (val, d);
3548
3549 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3550 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3551 wide_int mhigh = wi::udiv_trunc (val, d);
3552
3553 /* If precision == N, then mlow, mhigh exceed 2^N
3554 (but they do not exceed 2^(N+1)). */
3555
3556 /* Reduce to lowest terms. */
3557 for (post_shift = lgup; post_shift > 0; post_shift--)
3558 {
3559 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3560 HOST_BITS_PER_WIDE_INT);
3561 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3562 HOST_BITS_PER_WIDE_INT);
3563 if (ml_lo >= mh_lo)
3564 break;
3565
3566 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3567 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3568 }
3569
3570 *post_shift_ptr = post_shift;
3571 *lgup_ptr = lgup;
3572 if (n < HOST_BITS_PER_WIDE_INT)
3573 {
3574 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3575 *multiplier_ptr = mhigh.to_uhwi () & mask;
3576 return mhigh.to_uhwi () >= mask;
3577 }
3578 else
3579 {
3580 *multiplier_ptr = mhigh.to_uhwi ();
3581 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3582 }
3583 }
3584
3585 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3586 congruent to 1 (mod 2**N). */
3587
3588 static unsigned HOST_WIDE_INT
3589 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3590 {
3591 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3592
3593 /* The algorithm notes that the choice y = x satisfies
3594 x*y == 1 mod 2^3, since x is assumed odd.
3595 Each iteration doubles the number of bits of significance in y. */
3596
3597 unsigned HOST_WIDE_INT mask;
3598 unsigned HOST_WIDE_INT y = x;
3599 int nbit = 3;
3600
3601 mask = (n == HOST_BITS_PER_WIDE_INT
3602 ? HOST_WIDE_INT_M1U
3603 : (HOST_WIDE_INT_1U << n) - 1);
3604
3605 while (nbit < n)
3606 {
3607 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3608 nbit *= 2;
3609 }
3610 return y;
3611 }
3612
3613 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3614 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3615 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3616 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3617 become signed.
3618
3619 The result is put in TARGET if that is convenient.
3620
3621 MODE is the mode of operation. */
3622
3623 rtx
3624 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3625 rtx op1, rtx target, int unsignedp)
3626 {
3627 rtx tem;
3628 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3629
3630 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3631 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3632 tem = expand_and (mode, tem, op1, NULL_RTX);
3633 adj_operand
3634 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3635 adj_operand);
3636
3637 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3638 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3639 tem = expand_and (mode, tem, op0, NULL_RTX);
3640 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3641 target);
3642
3643 return target;
3644 }
3645
3646 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3647
3648 static rtx
3649 extract_high_half (scalar_int_mode mode, rtx op)
3650 {
3651 if (mode == word_mode)
3652 return gen_highpart (mode, op);
3653
3654 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3655
3656 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3657 GET_MODE_BITSIZE (mode), 0, 1);
3658 return convert_modes (mode, wider_mode, op, 0);
3659 }
3660
3661 /* Like expmed_mult_highpart, but only consider using a multiplication
3662 optab. OP1 is an rtx for the constant operand. */
3663
3664 static rtx
3665 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3666 rtx target, int unsignedp, int max_cost)
3667 {
3668 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3669 optab moptab;
3670 rtx tem;
3671 int size;
3672 bool speed = optimize_insn_for_speed_p ();
3673
3674 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3675
3676 size = GET_MODE_BITSIZE (mode);
3677
3678 /* Firstly, try using a multiplication insn that only generates the needed
3679 high part of the product, and in the sign flavor of unsignedp. */
3680 if (mul_highpart_cost (speed, mode) < max_cost)
3681 {
3682 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3683 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3684 unsignedp, OPTAB_DIRECT);
3685 if (tem)
3686 return tem;
3687 }
3688
3689 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3690 Need to adjust the result after the multiplication. */
3691 if (size - 1 < BITS_PER_WORD
3692 && (mul_highpart_cost (speed, mode)
3693 + 2 * shift_cost (speed, mode, size-1)
3694 + 4 * add_cost (speed, mode) < max_cost))
3695 {
3696 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3697 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3698 unsignedp, OPTAB_DIRECT);
3699 if (tem)
3700 /* We used the wrong signedness. Adjust the result. */
3701 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3702 tem, unsignedp);
3703 }
3704
3705 /* Try widening multiplication. */
3706 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3707 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3708 && mul_widen_cost (speed, wider_mode) < max_cost)
3709 {
3710 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3711 unsignedp, OPTAB_WIDEN);
3712 if (tem)
3713 return extract_high_half (mode, tem);
3714 }
3715
3716 /* Try widening the mode and perform a non-widening multiplication. */
3717 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3718 && size - 1 < BITS_PER_WORD
3719 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3720 < max_cost))
3721 {
3722 rtx_insn *insns;
3723 rtx wop0, wop1;
3724
3725 /* We need to widen the operands, for example to ensure the
3726 constant multiplier is correctly sign or zero extended.
3727 Use a sequence to clean-up any instructions emitted by
3728 the conversions if things don't work out. */
3729 start_sequence ();
3730 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3731 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3732 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3733 unsignedp, OPTAB_WIDEN);
3734 insns = get_insns ();
3735 end_sequence ();
3736
3737 if (tem)
3738 {
3739 emit_insn (insns);
3740 return extract_high_half (mode, tem);
3741 }
3742 }
3743
3744 /* Try widening multiplication of opposite signedness, and adjust. */
3745 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3746 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3747 && size - 1 < BITS_PER_WORD
3748 && (mul_widen_cost (speed, wider_mode)
3749 + 2 * shift_cost (speed, mode, size-1)
3750 + 4 * add_cost (speed, mode) < max_cost))
3751 {
3752 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3753 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3754 if (tem != 0)
3755 {
3756 tem = extract_high_half (mode, tem);
3757 /* We used the wrong signedness. Adjust the result. */
3758 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3759 target, unsignedp);
3760 }
3761 }
3762
3763 return 0;
3764 }
3765
3766 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3767 putting the high half of the result in TARGET if that is convenient,
3768 and return where the result is. If the operation can not be performed,
3769 0 is returned.
3770
3771 MODE is the mode of operation and result.
3772
3773 UNSIGNEDP nonzero means unsigned multiply.
3774
3775 MAX_COST is the total allowed cost for the expanded RTL. */
3776
3777 static rtx
3778 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3779 rtx target, int unsignedp, int max_cost)
3780 {
3781 unsigned HOST_WIDE_INT cnst1;
3782 int extra_cost;
3783 bool sign_adjust = false;
3784 enum mult_variant variant;
3785 struct algorithm alg;
3786 rtx tem;
3787 bool speed = optimize_insn_for_speed_p ();
3788
3789 /* We can't support modes wider than HOST_BITS_PER_INT. */
3790 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3791
3792 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3793
3794 /* We can't optimize modes wider than BITS_PER_WORD.
3795 ??? We might be able to perform double-word arithmetic if
3796 mode == word_mode, however all the cost calculations in
3797 synth_mult etc. assume single-word operations. */
3798 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3799 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3800 return expmed_mult_highpart_optab (mode, op0, op1, target,
3801 unsignedp, max_cost);
3802
3803 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3804
3805 /* Check whether we try to multiply by a negative constant. */
3806 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3807 {
3808 sign_adjust = true;
3809 extra_cost += add_cost (speed, mode);
3810 }
3811
3812 /* See whether shift/add multiplication is cheap enough. */
3813 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3814 max_cost - extra_cost))
3815 {
3816 /* See whether the specialized multiplication optabs are
3817 cheaper than the shift/add version. */
3818 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3819 alg.cost.cost + extra_cost);
3820 if (tem)
3821 return tem;
3822
3823 tem = convert_to_mode (wider_mode, op0, unsignedp);
3824 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3825 tem = extract_high_half (mode, tem);
3826
3827 /* Adjust result for signedness. */
3828 if (sign_adjust)
3829 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3830
3831 return tem;
3832 }
3833 return expmed_mult_highpart_optab (mode, op0, op1, target,
3834 unsignedp, max_cost);
3835 }
3836
3837
3838 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3839
3840 static rtx
3841 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3842 {
3843 rtx result, temp, shift;
3844 rtx_code_label *label;
3845 int logd;
3846 int prec = GET_MODE_PRECISION (mode);
3847
3848 logd = floor_log2 (d);
3849 result = gen_reg_rtx (mode);
3850
3851 /* Avoid conditional branches when they're expensive. */
3852 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3853 && optimize_insn_for_speed_p ())
3854 {
3855 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3856 mode, 0, -1);
3857 if (signmask)
3858 {
3859 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3860 signmask = force_reg (mode, signmask);
3861 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3862
3863 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3864 which instruction sequence to use. If logical right shifts
3865 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3866 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3867
3868 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3869 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3870 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3871 > COSTS_N_INSNS (2)))
3872 {
3873 temp = expand_binop (mode, xor_optab, op0, signmask,
3874 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3875 temp = expand_binop (mode, sub_optab, temp, signmask,
3876 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3877 temp = expand_binop (mode, and_optab, temp,
3878 gen_int_mode (masklow, mode),
3879 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3880 temp = expand_binop (mode, xor_optab, temp, signmask,
3881 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3882 temp = expand_binop (mode, sub_optab, temp, signmask,
3883 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3884 }
3885 else
3886 {
3887 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3888 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3889 signmask = force_reg (mode, signmask);
3890
3891 temp = expand_binop (mode, add_optab, op0, signmask,
3892 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3893 temp = expand_binop (mode, and_optab, temp,
3894 gen_int_mode (masklow, mode),
3895 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3896 temp = expand_binop (mode, sub_optab, temp, signmask,
3897 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3898 }
3899 return temp;
3900 }
3901 }
3902
3903 /* Mask contains the mode's signbit and the significant bits of the
3904 modulus. By including the signbit in the operation, many targets
3905 can avoid an explicit compare operation in the following comparison
3906 against zero. */
3907 wide_int mask = wi::mask (logd, false, prec);
3908 mask = wi::set_bit (mask, prec - 1);
3909
3910 temp = expand_binop (mode, and_optab, op0,
3911 immed_wide_int_const (mask, mode),
3912 result, 1, OPTAB_LIB_WIDEN);
3913 if (temp != result)
3914 emit_move_insn (result, temp);
3915
3916 label = gen_label_rtx ();
3917 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3918
3919 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3920 0, OPTAB_LIB_WIDEN);
3921
3922 mask = wi::mask (logd, true, prec);
3923 temp = expand_binop (mode, ior_optab, temp,
3924 immed_wide_int_const (mask, mode),
3925 result, 1, OPTAB_LIB_WIDEN);
3926 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3927 0, OPTAB_LIB_WIDEN);
3928 if (temp != result)
3929 emit_move_insn (result, temp);
3930 emit_label (label);
3931 return result;
3932 }
3933
3934 /* Expand signed division of OP0 by a power of two D in mode MODE.
3935 This routine is only called for positive values of D. */
3936
3937 static rtx
3938 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3939 {
3940 rtx temp;
3941 rtx_code_label *label;
3942 int logd;
3943
3944 logd = floor_log2 (d);
3945
3946 if (d == 2
3947 && BRANCH_COST (optimize_insn_for_speed_p (),
3948 false) >= 1)
3949 {
3950 temp = gen_reg_rtx (mode);
3951 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3952 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3953 0, OPTAB_LIB_WIDEN);
3954 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3955 }
3956
3957 if (HAVE_conditional_move
3958 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3959 {
3960 rtx temp2;
3961
3962 start_sequence ();
3963 temp2 = copy_to_mode_reg (mode, op0);
3964 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3965 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3966 temp = force_reg (mode, temp);
3967
3968 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3969 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3970 mode, temp, temp2, mode, 0);
3971 if (temp2)
3972 {
3973 rtx_insn *seq = get_insns ();
3974 end_sequence ();
3975 emit_insn (seq);
3976 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3977 }
3978 end_sequence ();
3979 }
3980
3981 if (BRANCH_COST (optimize_insn_for_speed_p (),
3982 false) >= 2)
3983 {
3984 int ushift = GET_MODE_BITSIZE (mode) - logd;
3985
3986 temp = gen_reg_rtx (mode);
3987 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3988 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3989 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3990 > COSTS_N_INSNS (1))
3991 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3992 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3993 else
3994 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3995 ushift, NULL_RTX, 1);
3996 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3997 0, OPTAB_LIB_WIDEN);
3998 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3999 }
4000
4001 label = gen_label_rtx ();
4002 temp = copy_to_mode_reg (mode, op0);
4003 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4004 expand_inc (temp, gen_int_mode (d - 1, mode));
4005 emit_label (label);
4006 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4007 }
4008 \f
4009 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4010 if that is convenient, and returning where the result is.
4011 You may request either the quotient or the remainder as the result;
4012 specify REM_FLAG nonzero to get the remainder.
4013
4014 CODE is the expression code for which kind of division this is;
4015 it controls how rounding is done. MODE is the machine mode to use.
4016 UNSIGNEDP nonzero means do unsigned division. */
4017
4018 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4019 and then correct it by or'ing in missing high bits
4020 if result of ANDI is nonzero.
4021 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4022 This could optimize to a bfexts instruction.
4023 But C doesn't use these operations, so their optimizations are
4024 left for later. */
4025 /* ??? For modulo, we don't actually need the highpart of the first product,
4026 the low part will do nicely. And for small divisors, the second multiply
4027 can also be a low-part only multiply or even be completely left out.
4028 E.g. to calculate the remainder of a division by 3 with a 32 bit
4029 multiply, multiply with 0x55555556 and extract the upper two bits;
4030 the result is exact for inputs up to 0x1fffffff.
4031 The input range can be reduced by using cross-sum rules.
4032 For odd divisors >= 3, the following table gives right shift counts
4033 so that if a number is shifted by an integer multiple of the given
4034 amount, the remainder stays the same:
4035 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4036 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4037 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4038 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4039 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4040
4041 Cross-sum rules for even numbers can be derived by leaving as many bits
4042 to the right alone as the divisor has zeros to the right.
4043 E.g. if x is an unsigned 32 bit number:
4044 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4045 */
4046
4047 rtx
4048 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4049 rtx op0, rtx op1, rtx target, int unsignedp)
4050 {
4051 machine_mode compute_mode;
4052 rtx tquotient;
4053 rtx quotient = 0, remainder = 0;
4054 rtx_insn *last;
4055 rtx_insn *insn;
4056 optab optab1, optab2;
4057 int op1_is_constant, op1_is_pow2 = 0;
4058 int max_cost, extra_cost;
4059 static HOST_WIDE_INT last_div_const = 0;
4060 bool speed = optimize_insn_for_speed_p ();
4061
4062 op1_is_constant = CONST_INT_P (op1);
4063 if (op1_is_constant)
4064 {
4065 wide_int ext_op1 = rtx_mode_t (op1, mode);
4066 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4067 || (! unsignedp
4068 && wi::popcount (wi::neg (ext_op1)) == 1));
4069 }
4070
4071 /*
4072 This is the structure of expand_divmod:
4073
4074 First comes code to fix up the operands so we can perform the operations
4075 correctly and efficiently.
4076
4077 Second comes a switch statement with code specific for each rounding mode.
4078 For some special operands this code emits all RTL for the desired
4079 operation, for other cases, it generates only a quotient and stores it in
4080 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4081 to indicate that it has not done anything.
4082
4083 Last comes code that finishes the operation. If QUOTIENT is set and
4084 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4085 QUOTIENT is not set, it is computed using trunc rounding.
4086
4087 We try to generate special code for division and remainder when OP1 is a
4088 constant. If |OP1| = 2**n we can use shifts and some other fast
4089 operations. For other values of OP1, we compute a carefully selected
4090 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4091 by m.
4092
4093 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4094 half of the product. Different strategies for generating the product are
4095 implemented in expmed_mult_highpart.
4096
4097 If what we actually want is the remainder, we generate that by another
4098 by-constant multiplication and a subtraction. */
4099
4100 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4101 code below will malfunction if we are, so check here and handle
4102 the special case if so. */
4103 if (op1 == const1_rtx)
4104 return rem_flag ? const0_rtx : op0;
4105
4106 /* When dividing by -1, we could get an overflow.
4107 negv_optab can handle overflows. */
4108 if (! unsignedp && op1 == constm1_rtx)
4109 {
4110 if (rem_flag)
4111 return const0_rtx;
4112 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4113 ? negv_optab : neg_optab, op0, target, 0);
4114 }
4115
4116 if (target
4117 /* Don't use the function value register as a target
4118 since we have to read it as well as write it,
4119 and function-inlining gets confused by this. */
4120 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4121 /* Don't clobber an operand while doing a multi-step calculation. */
4122 || ((rem_flag || op1_is_constant)
4123 && (reg_mentioned_p (target, op0)
4124 || (MEM_P (op0) && MEM_P (target))))
4125 || reg_mentioned_p (target, op1)
4126 || (MEM_P (op1) && MEM_P (target))))
4127 target = 0;
4128
4129 /* Get the mode in which to perform this computation. Normally it will
4130 be MODE, but sometimes we can't do the desired operation in MODE.
4131 If so, pick a wider mode in which we can do the operation. Convert
4132 to that mode at the start to avoid repeated conversions.
4133
4134 First see what operations we need. These depend on the expression
4135 we are evaluating. (We assume that divxx3 insns exist under the
4136 same conditions that modxx3 insns and that these insns don't normally
4137 fail. If these assumptions are not correct, we may generate less
4138 efficient code in some cases.)
4139
4140 Then see if we find a mode in which we can open-code that operation
4141 (either a division, modulus, or shift). Finally, check for the smallest
4142 mode for which we can do the operation with a library call. */
4143
4144 /* We might want to refine this now that we have division-by-constant
4145 optimization. Since expmed_mult_highpart tries so many variants, it is
4146 not straightforward to generalize this. Maybe we should make an array
4147 of possible modes in init_expmed? Save this for GCC 2.7. */
4148
4149 optab1 = (op1_is_pow2
4150 ? (unsignedp ? lshr_optab : ashr_optab)
4151 : (unsignedp ? udiv_optab : sdiv_optab));
4152 optab2 = (op1_is_pow2 ? optab1
4153 : (unsignedp ? udivmod_optab : sdivmod_optab));
4154
4155 FOR_EACH_MODE_FROM (compute_mode, mode)
4156 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4157 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4158 break;
4159
4160 if (compute_mode == VOIDmode)
4161 FOR_EACH_MODE_FROM (compute_mode, mode)
4162 if (optab_libfunc (optab1, compute_mode)
4163 || optab_libfunc (optab2, compute_mode))
4164 break;
4165
4166 /* If we still couldn't find a mode, use MODE, but expand_binop will
4167 probably die. */
4168 if (compute_mode == VOIDmode)
4169 compute_mode = mode;
4170
4171 if (target && GET_MODE (target) == compute_mode)
4172 tquotient = target;
4173 else
4174 tquotient = gen_reg_rtx (compute_mode);
4175
4176 #if 0
4177 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4178 (mode), and thereby get better code when OP1 is a constant. Do that
4179 later. It will require going over all usages of SIZE below. */
4180 size = GET_MODE_BITSIZE (mode);
4181 #endif
4182
4183 /* Only deduct something for a REM if the last divide done was
4184 for a different constant. Then set the constant of the last
4185 divide. */
4186 max_cost = (unsignedp
4187 ? udiv_cost (speed, compute_mode)
4188 : sdiv_cost (speed, compute_mode));
4189 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4190 && INTVAL (op1) == last_div_const))
4191 max_cost -= (mul_cost (speed, compute_mode)
4192 + add_cost (speed, compute_mode));
4193
4194 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4195
4196 /* Now convert to the best mode to use. */
4197 if (compute_mode != mode)
4198 {
4199 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4200 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4201
4202 /* convert_modes may have placed op1 into a register, so we
4203 must recompute the following. */
4204 op1_is_constant = CONST_INT_P (op1);
4205 if (op1_is_constant)
4206 {
4207 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4208 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4209 || (! unsignedp
4210 && wi::popcount (wi::neg (ext_op1)) == 1));
4211 }
4212 else
4213 op1_is_pow2 = 0;
4214 }
4215
4216 /* If one of the operands is a volatile MEM, copy it into a register. */
4217
4218 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4219 op0 = force_reg (compute_mode, op0);
4220 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4221 op1 = force_reg (compute_mode, op1);
4222
4223 /* If we need the remainder or if OP1 is constant, we need to
4224 put OP0 in a register in case it has any queued subexpressions. */
4225 if (rem_flag || op1_is_constant)
4226 op0 = force_reg (compute_mode, op0);
4227
4228 last = get_last_insn ();
4229
4230 /* Promote floor rounding to trunc rounding for unsigned operations. */
4231 if (unsignedp)
4232 {
4233 if (code == FLOOR_DIV_EXPR)
4234 code = TRUNC_DIV_EXPR;
4235 if (code == FLOOR_MOD_EXPR)
4236 code = TRUNC_MOD_EXPR;
4237 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4238 code = TRUNC_DIV_EXPR;
4239 }
4240
4241 if (op1 != const0_rtx)
4242 switch (code)
4243 {
4244 case TRUNC_MOD_EXPR:
4245 case TRUNC_DIV_EXPR:
4246 if (op1_is_constant)
4247 {
4248 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4249 int size = GET_MODE_BITSIZE (int_mode);
4250 if (unsignedp)
4251 {
4252 unsigned HOST_WIDE_INT mh, ml;
4253 int pre_shift, post_shift;
4254 int dummy;
4255 wide_int wd = rtx_mode_t (op1, int_mode);
4256 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4257
4258 if (wi::popcount (wd) == 1)
4259 {
4260 pre_shift = floor_log2 (d);
4261 if (rem_flag)
4262 {
4263 unsigned HOST_WIDE_INT mask
4264 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4265 remainder
4266 = expand_binop (int_mode, and_optab, op0,
4267 gen_int_mode (mask, int_mode),
4268 remainder, 1,
4269 OPTAB_LIB_WIDEN);
4270 if (remainder)
4271 return gen_lowpart (mode, remainder);
4272 }
4273 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4274 pre_shift, tquotient, 1);
4275 }
4276 else if (size <= HOST_BITS_PER_WIDE_INT)
4277 {
4278 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4279 {
4280 /* Most significant bit of divisor is set; emit an scc
4281 insn. */
4282 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4283 int_mode, 1, 1);
4284 }
4285 else
4286 {
4287 /* Find a suitable multiplier and right shift count
4288 instead of multiplying with D. */
4289
4290 mh = choose_multiplier (d, size, size,
4291 &ml, &post_shift, &dummy);
4292
4293 /* If the suggested multiplier is more than SIZE bits,
4294 we can do better for even divisors, using an
4295 initial right shift. */
4296 if (mh != 0 && (d & 1) == 0)
4297 {
4298 pre_shift = ctz_or_zero (d);
4299 mh = choose_multiplier (d >> pre_shift, size,
4300 size - pre_shift,
4301 &ml, &post_shift, &dummy);
4302 gcc_assert (!mh);
4303 }
4304 else
4305 pre_shift = 0;
4306
4307 if (mh != 0)
4308 {
4309 rtx t1, t2, t3, t4;
4310
4311 if (post_shift - 1 >= BITS_PER_WORD)
4312 goto fail1;
4313
4314 extra_cost
4315 = (shift_cost (speed, int_mode, post_shift - 1)
4316 + shift_cost (speed, int_mode, 1)
4317 + 2 * add_cost (speed, int_mode));
4318 t1 = expmed_mult_highpart
4319 (int_mode, op0, gen_int_mode (ml, int_mode),
4320 NULL_RTX, 1, max_cost - extra_cost);
4321 if (t1 == 0)
4322 goto fail1;
4323 t2 = force_operand (gen_rtx_MINUS (int_mode,
4324 op0, t1),
4325 NULL_RTX);
4326 t3 = expand_shift (RSHIFT_EXPR, int_mode,
4327 t2, 1, NULL_RTX, 1);
4328 t4 = force_operand (gen_rtx_PLUS (int_mode,
4329 t1, t3),
4330 NULL_RTX);
4331 quotient = expand_shift
4332 (RSHIFT_EXPR, int_mode, t4,
4333 post_shift - 1, tquotient, 1);
4334 }
4335 else
4336 {
4337 rtx t1, t2;
4338
4339 if (pre_shift >= BITS_PER_WORD
4340 || post_shift >= BITS_PER_WORD)
4341 goto fail1;
4342
4343 t1 = expand_shift
4344 (RSHIFT_EXPR, int_mode, op0,
4345 pre_shift, NULL_RTX, 1);
4346 extra_cost
4347 = (shift_cost (speed, int_mode, pre_shift)
4348 + shift_cost (speed, int_mode, post_shift));
4349 t2 = expmed_mult_highpart
4350 (int_mode, t1,
4351 gen_int_mode (ml, int_mode),
4352 NULL_RTX, 1, max_cost - extra_cost);
4353 if (t2 == 0)
4354 goto fail1;
4355 quotient = expand_shift
4356 (RSHIFT_EXPR, int_mode, t2,
4357 post_shift, tquotient, 1);
4358 }
4359 }
4360 }
4361 else /* Too wide mode to use tricky code */
4362 break;
4363
4364 insn = get_last_insn ();
4365 if (insn != last)
4366 set_dst_reg_note (insn, REG_EQUAL,
4367 gen_rtx_UDIV (int_mode, op0, op1),
4368 quotient);
4369 }
4370 else /* TRUNC_DIV, signed */
4371 {
4372 unsigned HOST_WIDE_INT ml;
4373 int lgup, post_shift;
4374 rtx mlr;
4375 HOST_WIDE_INT d = INTVAL (op1);
4376 unsigned HOST_WIDE_INT abs_d;
4377
4378 /* Since d might be INT_MIN, we have to cast to
4379 unsigned HOST_WIDE_INT before negating to avoid
4380 undefined signed overflow. */
4381 abs_d = (d >= 0
4382 ? (unsigned HOST_WIDE_INT) d
4383 : - (unsigned HOST_WIDE_INT) d);
4384
4385 /* n rem d = n rem -d */
4386 if (rem_flag && d < 0)
4387 {
4388 d = abs_d;
4389 op1 = gen_int_mode (abs_d, int_mode);
4390 }
4391
4392 if (d == 1)
4393 quotient = op0;
4394 else if (d == -1)
4395 quotient = expand_unop (int_mode, neg_optab, op0,
4396 tquotient, 0);
4397 else if (size <= HOST_BITS_PER_WIDE_INT
4398 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4399 {
4400 /* This case is not handled correctly below. */
4401 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4402 int_mode, 1, 1);
4403 if (quotient == 0)
4404 goto fail1;
4405 }
4406 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4407 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4408 && (rem_flag
4409 ? smod_pow2_cheap (speed, int_mode)
4410 : sdiv_pow2_cheap (speed, int_mode))
4411 /* We assume that cheap metric is true if the
4412 optab has an expander for this mode. */
4413 && ((optab_handler ((rem_flag ? smod_optab
4414 : sdiv_optab),
4415 int_mode)
4416 != CODE_FOR_nothing)
4417 || (optab_handler (sdivmod_optab, int_mode)
4418 != CODE_FOR_nothing)))
4419 ;
4420 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4421 && (size <= HOST_BITS_PER_WIDE_INT
4422 || abs_d != (unsigned HOST_WIDE_INT) d))
4423 {
4424 if (rem_flag)
4425 {
4426 remainder = expand_smod_pow2 (int_mode, op0, d);
4427 if (remainder)
4428 return gen_lowpart (mode, remainder);
4429 }
4430
4431 if (sdiv_pow2_cheap (speed, int_mode)
4432 && ((optab_handler (sdiv_optab, int_mode)
4433 != CODE_FOR_nothing)
4434 || (optab_handler (sdivmod_optab, int_mode)
4435 != CODE_FOR_nothing)))
4436 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4437 int_mode, op0,
4438 gen_int_mode (abs_d,
4439 int_mode),
4440 NULL_RTX, 0);
4441 else
4442 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4443
4444 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4445 negate the quotient. */
4446 if (d < 0)
4447 {
4448 insn = get_last_insn ();
4449 if (insn != last
4450 && abs_d < (HOST_WIDE_INT_1U
4451 << (HOST_BITS_PER_WIDE_INT - 1)))
4452 set_dst_reg_note (insn, REG_EQUAL,
4453 gen_rtx_DIV (int_mode, op0,
4454 gen_int_mode
4455 (abs_d,
4456 int_mode)),
4457 quotient);
4458
4459 quotient = expand_unop (int_mode, neg_optab,
4460 quotient, quotient, 0);
4461 }
4462 }
4463 else if (size <= HOST_BITS_PER_WIDE_INT)
4464 {
4465 choose_multiplier (abs_d, size, size - 1,
4466 &ml, &post_shift, &lgup);
4467 if (ml < HOST_WIDE_INT_1U << (size - 1))
4468 {
4469 rtx t1, t2, t3;
4470
4471 if (post_shift >= BITS_PER_WORD
4472 || size - 1 >= BITS_PER_WORD)
4473 goto fail1;
4474
4475 extra_cost = (shift_cost (speed, int_mode, post_shift)
4476 + shift_cost (speed, int_mode, size - 1)
4477 + add_cost (speed, int_mode));
4478 t1 = expmed_mult_highpart
4479 (int_mode, op0, gen_int_mode (ml, int_mode),
4480 NULL_RTX, 0, max_cost - extra_cost);
4481 if (t1 == 0)
4482 goto fail1;
4483 t2 = expand_shift
4484 (RSHIFT_EXPR, int_mode, t1,
4485 post_shift, NULL_RTX, 0);
4486 t3 = expand_shift
4487 (RSHIFT_EXPR, int_mode, op0,
4488 size - 1, NULL_RTX, 0);
4489 if (d < 0)
4490 quotient
4491 = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4492 tquotient);
4493 else
4494 quotient
4495 = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4496 tquotient);
4497 }
4498 else
4499 {
4500 rtx t1, t2, t3, t4;
4501
4502 if (post_shift >= BITS_PER_WORD
4503 || size - 1 >= BITS_PER_WORD)
4504 goto fail1;
4505
4506 ml |= HOST_WIDE_INT_M1U << (size - 1);
4507 mlr = gen_int_mode (ml, int_mode);
4508 extra_cost = (shift_cost (speed, int_mode, post_shift)
4509 + shift_cost (speed, int_mode, size - 1)
4510 + 2 * add_cost (speed, int_mode));
4511 t1 = expmed_mult_highpart (int_mode, op0, mlr,
4512 NULL_RTX, 0,
4513 max_cost - extra_cost);
4514 if (t1 == 0)
4515 goto fail1;
4516 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4517 NULL_RTX);
4518 t3 = expand_shift
4519 (RSHIFT_EXPR, int_mode, t2,
4520 post_shift, NULL_RTX, 0);
4521 t4 = expand_shift
4522 (RSHIFT_EXPR, int_mode, op0,
4523 size - 1, NULL_RTX, 0);
4524 if (d < 0)
4525 quotient
4526 = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4527 tquotient);
4528 else
4529 quotient
4530 = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4531 tquotient);
4532 }
4533 }
4534 else /* Too wide mode to use tricky code */
4535 break;
4536
4537 insn = get_last_insn ();
4538 if (insn != last)
4539 set_dst_reg_note (insn, REG_EQUAL,
4540 gen_rtx_DIV (int_mode, op0, op1),
4541 quotient);
4542 }
4543 break;
4544 }
4545 fail1:
4546 delete_insns_since (last);
4547 break;
4548
4549 case FLOOR_DIV_EXPR:
4550 case FLOOR_MOD_EXPR:
4551 /* We will come here only for signed operations. */
4552 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4553 {
4554 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4555 int size = GET_MODE_BITSIZE (int_mode);
4556 unsigned HOST_WIDE_INT mh, ml;
4557 int pre_shift, lgup, post_shift;
4558 HOST_WIDE_INT d = INTVAL (op1);
4559
4560 if (d > 0)
4561 {
4562 /* We could just as easily deal with negative constants here,
4563 but it does not seem worth the trouble for GCC 2.6. */
4564 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4565 {
4566 pre_shift = floor_log2 (d);
4567 if (rem_flag)
4568 {
4569 unsigned HOST_WIDE_INT mask
4570 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4571 remainder = expand_binop
4572 (int_mode, and_optab, op0,
4573 gen_int_mode (mask, int_mode),
4574 remainder, 0, OPTAB_LIB_WIDEN);
4575 if (remainder)
4576 return gen_lowpart (mode, remainder);
4577 }
4578 quotient = expand_shift
4579 (RSHIFT_EXPR, int_mode, op0,
4580 pre_shift, tquotient, 0);
4581 }
4582 else
4583 {
4584 rtx t1, t2, t3, t4;
4585
4586 mh = choose_multiplier (d, size, size - 1,
4587 &ml, &post_shift, &lgup);
4588 gcc_assert (!mh);
4589
4590 if (post_shift < BITS_PER_WORD
4591 && size - 1 < BITS_PER_WORD)
4592 {
4593 t1 = expand_shift
4594 (RSHIFT_EXPR, int_mode, op0,
4595 size - 1, NULL_RTX, 0);
4596 t2 = expand_binop (int_mode, xor_optab, op0, t1,
4597 NULL_RTX, 0, OPTAB_WIDEN);
4598 extra_cost = (shift_cost (speed, int_mode, post_shift)
4599 + shift_cost (speed, int_mode, size - 1)
4600 + 2 * add_cost (speed, int_mode));
4601 t3 = expmed_mult_highpart
4602 (int_mode, t2, gen_int_mode (ml, int_mode),
4603 NULL_RTX, 1, max_cost - extra_cost);
4604 if (t3 != 0)
4605 {
4606 t4 = expand_shift
4607 (RSHIFT_EXPR, int_mode, t3,
4608 post_shift, NULL_RTX, 1);
4609 quotient = expand_binop (int_mode, xor_optab,
4610 t4, t1, tquotient, 0,
4611 OPTAB_WIDEN);
4612 }
4613 }
4614 }
4615 }
4616 else
4617 {
4618 rtx nsign, t1, t2, t3, t4;
4619 t1 = force_operand (gen_rtx_PLUS (int_mode,
4620 op0, constm1_rtx), NULL_RTX);
4621 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4622 0, OPTAB_WIDEN);
4623 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4624 size - 1, NULL_RTX, 0);
4625 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4626 NULL_RTX);
4627 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4628 NULL_RTX, 0);
4629 if (t4)
4630 {
4631 rtx t5;
4632 t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4633 NULL_RTX, 0);
4634 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4635 tquotient);
4636 }
4637 }
4638 }
4639
4640 if (quotient != 0)
4641 break;
4642 delete_insns_since (last);
4643
4644 /* Try using an instruction that produces both the quotient and
4645 remainder, using truncation. We can easily compensate the quotient
4646 or remainder to get floor rounding, once we have the remainder.
4647 Notice that we compute also the final remainder value here,
4648 and return the result right away. */
4649 if (target == 0 || GET_MODE (target) != compute_mode)
4650 target = gen_reg_rtx (compute_mode);
4651
4652 if (rem_flag)
4653 {
4654 remainder
4655 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4656 quotient = gen_reg_rtx (compute_mode);
4657 }
4658 else
4659 {
4660 quotient
4661 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4662 remainder = gen_reg_rtx (compute_mode);
4663 }
4664
4665 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4666 quotient, remainder, 0))
4667 {
4668 /* This could be computed with a branch-less sequence.
4669 Save that for later. */
4670 rtx tem;
4671 rtx_code_label *label = gen_label_rtx ();
4672 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4673 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4674 NULL_RTX, 0, OPTAB_WIDEN);
4675 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4676 expand_dec (quotient, const1_rtx);
4677 expand_inc (remainder, op1);
4678 emit_label (label);
4679 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4680 }
4681
4682 /* No luck with division elimination or divmod. Have to do it
4683 by conditionally adjusting op0 *and* the result. */
4684 {
4685 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4686 rtx adjusted_op0;
4687 rtx tem;
4688
4689 quotient = gen_reg_rtx (compute_mode);
4690 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4691 label1 = gen_label_rtx ();
4692 label2 = gen_label_rtx ();
4693 label3 = gen_label_rtx ();
4694 label4 = gen_label_rtx ();
4695 label5 = gen_label_rtx ();
4696 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4697 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4698 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4699 quotient, 0, OPTAB_LIB_WIDEN);
4700 if (tem != quotient)
4701 emit_move_insn (quotient, tem);
4702 emit_jump_insn (targetm.gen_jump (label5));
4703 emit_barrier ();
4704 emit_label (label1);
4705 expand_inc (adjusted_op0, const1_rtx);
4706 emit_jump_insn (targetm.gen_jump (label4));
4707 emit_barrier ();
4708 emit_label (label2);
4709 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4710 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4711 quotient, 0, OPTAB_LIB_WIDEN);
4712 if (tem != quotient)
4713 emit_move_insn (quotient, tem);
4714 emit_jump_insn (targetm.gen_jump (label5));
4715 emit_barrier ();
4716 emit_label (label3);
4717 expand_dec (adjusted_op0, const1_rtx);
4718 emit_label (label4);
4719 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4720 quotient, 0, OPTAB_LIB_WIDEN);
4721 if (tem != quotient)
4722 emit_move_insn (quotient, tem);
4723 expand_dec (quotient, const1_rtx);
4724 emit_label (label5);
4725 }
4726 break;
4727
4728 case CEIL_DIV_EXPR:
4729 case CEIL_MOD_EXPR:
4730 if (unsignedp)
4731 {
4732 if (op1_is_constant
4733 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4734 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4735 || INTVAL (op1) >= 0))
4736 {
4737 scalar_int_mode int_mode
4738 = as_a <scalar_int_mode> (compute_mode);
4739 rtx t1, t2, t3;
4740 unsigned HOST_WIDE_INT d = INTVAL (op1);
4741 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4742 floor_log2 (d), tquotient, 1);
4743 t2 = expand_binop (int_mode, and_optab, op0,
4744 gen_int_mode (d - 1, int_mode),
4745 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4746 t3 = gen_reg_rtx (int_mode);
4747 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4748 if (t3 == 0)
4749 {
4750 rtx_code_label *lab;
4751 lab = gen_label_rtx ();
4752 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4753 expand_inc (t1, const1_rtx);
4754 emit_label (lab);
4755 quotient = t1;
4756 }
4757 else
4758 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4759 tquotient);
4760 break;
4761 }
4762
4763 /* Try using an instruction that produces both the quotient and
4764 remainder, using truncation. We can easily compensate the
4765 quotient or remainder to get ceiling rounding, once we have the
4766 remainder. Notice that we compute also the final remainder
4767 value here, and return the result right away. */
4768 if (target == 0 || GET_MODE (target) != compute_mode)
4769 target = gen_reg_rtx (compute_mode);
4770
4771 if (rem_flag)
4772 {
4773 remainder = (REG_P (target)
4774 ? target : gen_reg_rtx (compute_mode));
4775 quotient = gen_reg_rtx (compute_mode);
4776 }
4777 else
4778 {
4779 quotient = (REG_P (target)
4780 ? target : gen_reg_rtx (compute_mode));
4781 remainder = gen_reg_rtx (compute_mode);
4782 }
4783
4784 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4785 remainder, 1))
4786 {
4787 /* This could be computed with a branch-less sequence.
4788 Save that for later. */
4789 rtx_code_label *label = gen_label_rtx ();
4790 do_cmp_and_jump (remainder, const0_rtx, EQ,
4791 compute_mode, label);
4792 expand_inc (quotient, const1_rtx);
4793 expand_dec (remainder, op1);
4794 emit_label (label);
4795 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4796 }
4797
4798 /* No luck with division elimination or divmod. Have to do it
4799 by conditionally adjusting op0 *and* the result. */
4800 {
4801 rtx_code_label *label1, *label2;
4802 rtx adjusted_op0, tem;
4803
4804 quotient = gen_reg_rtx (compute_mode);
4805 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4806 label1 = gen_label_rtx ();
4807 label2 = gen_label_rtx ();
4808 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4809 compute_mode, label1);
4810 emit_move_insn (quotient, const0_rtx);
4811 emit_jump_insn (targetm.gen_jump (label2));
4812 emit_barrier ();
4813 emit_label (label1);
4814 expand_dec (adjusted_op0, const1_rtx);
4815 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4816 quotient, 1, OPTAB_LIB_WIDEN);
4817 if (tem != quotient)
4818 emit_move_insn (quotient, tem);
4819 expand_inc (quotient, const1_rtx);
4820 emit_label (label2);
4821 }
4822 }
4823 else /* signed */
4824 {
4825 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4826 && INTVAL (op1) >= 0)
4827 {
4828 /* This is extremely similar to the code for the unsigned case
4829 above. For 2.7 we should merge these variants, but for
4830 2.6.1 I don't want to touch the code for unsigned since that
4831 get used in C. The signed case will only be used by other
4832 languages (Ada). */
4833
4834 rtx t1, t2, t3;
4835 unsigned HOST_WIDE_INT d = INTVAL (op1);
4836 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4837 floor_log2 (d), tquotient, 0);
4838 t2 = expand_binop (compute_mode, and_optab, op0,
4839 gen_int_mode (d - 1, compute_mode),
4840 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4841 t3 = gen_reg_rtx (compute_mode);
4842 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4843 compute_mode, 1, 1);
4844 if (t3 == 0)
4845 {
4846 rtx_code_label *lab;
4847 lab = gen_label_rtx ();
4848 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4849 expand_inc (t1, const1_rtx);
4850 emit_label (lab);
4851 quotient = t1;
4852 }
4853 else
4854 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4855 t1, t3),
4856 tquotient);
4857 break;
4858 }
4859
4860 /* Try using an instruction that produces both the quotient and
4861 remainder, using truncation. We can easily compensate the
4862 quotient or remainder to get ceiling rounding, once we have the
4863 remainder. Notice that we compute also the final remainder
4864 value here, and return the result right away. */
4865 if (target == 0 || GET_MODE (target) != compute_mode)
4866 target = gen_reg_rtx (compute_mode);
4867 if (rem_flag)
4868 {
4869 remainder= (REG_P (target)
4870 ? target : gen_reg_rtx (compute_mode));
4871 quotient = gen_reg_rtx (compute_mode);
4872 }
4873 else
4874 {
4875 quotient = (REG_P (target)
4876 ? target : gen_reg_rtx (compute_mode));
4877 remainder = gen_reg_rtx (compute_mode);
4878 }
4879
4880 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4881 remainder, 0))
4882 {
4883 /* This could be computed with a branch-less sequence.
4884 Save that for later. */
4885 rtx tem;
4886 rtx_code_label *label = gen_label_rtx ();
4887 do_cmp_and_jump (remainder, const0_rtx, EQ,
4888 compute_mode, label);
4889 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4890 NULL_RTX, 0, OPTAB_WIDEN);
4891 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4892 expand_inc (quotient, const1_rtx);
4893 expand_dec (remainder, op1);
4894 emit_label (label);
4895 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4896 }
4897
4898 /* No luck with division elimination or divmod. Have to do it
4899 by conditionally adjusting op0 *and* the result. */
4900 {
4901 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4902 rtx adjusted_op0;
4903 rtx tem;
4904
4905 quotient = gen_reg_rtx (compute_mode);
4906 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4907 label1 = gen_label_rtx ();
4908 label2 = gen_label_rtx ();
4909 label3 = gen_label_rtx ();
4910 label4 = gen_label_rtx ();
4911 label5 = gen_label_rtx ();
4912 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4913 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4914 compute_mode, label1);
4915 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4916 quotient, 0, OPTAB_LIB_WIDEN);
4917 if (tem != quotient)
4918 emit_move_insn (quotient, tem);
4919 emit_jump_insn (targetm.gen_jump (label5));
4920 emit_barrier ();
4921 emit_label (label1);
4922 expand_dec (adjusted_op0, const1_rtx);
4923 emit_jump_insn (targetm.gen_jump (label4));
4924 emit_barrier ();
4925 emit_label (label2);
4926 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4927 compute_mode, label3);
4928 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4929 quotient, 0, OPTAB_LIB_WIDEN);
4930 if (tem != quotient)
4931 emit_move_insn (quotient, tem);
4932 emit_jump_insn (targetm.gen_jump (label5));
4933 emit_barrier ();
4934 emit_label (label3);
4935 expand_inc (adjusted_op0, const1_rtx);
4936 emit_label (label4);
4937 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4938 quotient, 0, OPTAB_LIB_WIDEN);
4939 if (tem != quotient)
4940 emit_move_insn (quotient, tem);
4941 expand_inc (quotient, const1_rtx);
4942 emit_label (label5);
4943 }
4944 }
4945 break;
4946
4947 case EXACT_DIV_EXPR:
4948 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4949 {
4950 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4951 int size = GET_MODE_BITSIZE (int_mode);
4952 HOST_WIDE_INT d = INTVAL (op1);
4953 unsigned HOST_WIDE_INT ml;
4954 int pre_shift;
4955 rtx t1;
4956
4957 pre_shift = ctz_or_zero (d);
4958 ml = invert_mod2n (d >> pre_shift, size);
4959 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4960 pre_shift, NULL_RTX, unsignedp);
4961 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
4962 NULL_RTX, 1);
4963
4964 insn = get_last_insn ();
4965 set_dst_reg_note (insn, REG_EQUAL,
4966 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4967 int_mode, op0, op1),
4968 quotient);
4969 }
4970 break;
4971
4972 case ROUND_DIV_EXPR:
4973 case ROUND_MOD_EXPR:
4974 if (unsignedp)
4975 {
4976 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4977 rtx tem;
4978 rtx_code_label *label;
4979 label = gen_label_rtx ();
4980 quotient = gen_reg_rtx (int_mode);
4981 remainder = gen_reg_rtx (int_mode);
4982 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4983 {
4984 rtx tem;
4985 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
4986 quotient, 1, OPTAB_LIB_WIDEN);
4987 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
4988 remainder = expand_binop (int_mode, sub_optab, op0, tem,
4989 remainder, 1, OPTAB_LIB_WIDEN);
4990 }
4991 tem = plus_constant (int_mode, op1, -1);
4992 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
4993 do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
4994 expand_inc (quotient, const1_rtx);
4995 expand_dec (remainder, op1);
4996 emit_label (label);
4997 }
4998 else
4999 {
5000 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5001 int size = GET_MODE_BITSIZE (int_mode);
5002 rtx abs_rem, abs_op1, tem, mask;
5003 rtx_code_label *label;
5004 label = gen_label_rtx ();
5005 quotient = gen_reg_rtx (int_mode);
5006 remainder = gen_reg_rtx (int_mode);
5007 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5008 {
5009 rtx tem;
5010 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5011 quotient, 0, OPTAB_LIB_WIDEN);
5012 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5013 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5014 remainder, 0, OPTAB_LIB_WIDEN);
5015 }
5016 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5017 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5018 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5019 1, NULL_RTX, 1);
5020 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5021 tem = expand_binop (int_mode, xor_optab, op0, op1,
5022 NULL_RTX, 0, OPTAB_WIDEN);
5023 mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5024 size - 1, NULL_RTX, 0);
5025 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5026 NULL_RTX, 0, OPTAB_WIDEN);
5027 tem = expand_binop (int_mode, sub_optab, tem, mask,
5028 NULL_RTX, 0, OPTAB_WIDEN);
5029 expand_inc (quotient, tem);
5030 tem = expand_binop (int_mode, xor_optab, mask, op1,
5031 NULL_RTX, 0, OPTAB_WIDEN);
5032 tem = expand_binop (int_mode, sub_optab, tem, mask,
5033 NULL_RTX, 0, OPTAB_WIDEN);
5034 expand_dec (remainder, tem);
5035 emit_label (label);
5036 }
5037 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5038
5039 default:
5040 gcc_unreachable ();
5041 }
5042
5043 if (quotient == 0)
5044 {
5045 if (target && GET_MODE (target) != compute_mode)
5046 target = 0;
5047
5048 if (rem_flag)
5049 {
5050 /* Try to produce the remainder without producing the quotient.
5051 If we seem to have a divmod pattern that does not require widening,
5052 don't try widening here. We should really have a WIDEN argument
5053 to expand_twoval_binop, since what we'd really like to do here is
5054 1) try a mod insn in compute_mode
5055 2) try a divmod insn in compute_mode
5056 3) try a div insn in compute_mode and multiply-subtract to get
5057 remainder
5058 4) try the same things with widening allowed. */
5059 remainder
5060 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5061 op0, op1, target,
5062 unsignedp,
5063 ((optab_handler (optab2, compute_mode)
5064 != CODE_FOR_nothing)
5065 ? OPTAB_DIRECT : OPTAB_WIDEN));
5066 if (remainder == 0)
5067 {
5068 /* No luck there. Can we do remainder and divide at once
5069 without a library call? */
5070 remainder = gen_reg_rtx (compute_mode);
5071 if (! expand_twoval_binop ((unsignedp
5072 ? udivmod_optab
5073 : sdivmod_optab),
5074 op0, op1,
5075 NULL_RTX, remainder, unsignedp))
5076 remainder = 0;
5077 }
5078
5079 if (remainder)
5080 return gen_lowpart (mode, remainder);
5081 }
5082
5083 /* Produce the quotient. Try a quotient insn, but not a library call.
5084 If we have a divmod in this mode, use it in preference to widening
5085 the div (for this test we assume it will not fail). Note that optab2
5086 is set to the one of the two optabs that the call below will use. */
5087 quotient
5088 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5089 op0, op1, rem_flag ? NULL_RTX : target,
5090 unsignedp,
5091 ((optab_handler (optab2, compute_mode)
5092 != CODE_FOR_nothing)
5093 ? OPTAB_DIRECT : OPTAB_WIDEN));
5094
5095 if (quotient == 0)
5096 {
5097 /* No luck there. Try a quotient-and-remainder insn,
5098 keeping the quotient alone. */
5099 quotient = gen_reg_rtx (compute_mode);
5100 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5101 op0, op1,
5102 quotient, NULL_RTX, unsignedp))
5103 {
5104 quotient = 0;
5105 if (! rem_flag)
5106 /* Still no luck. If we are not computing the remainder,
5107 use a library call for the quotient. */
5108 quotient = sign_expand_binop (compute_mode,
5109 udiv_optab, sdiv_optab,
5110 op0, op1, target,
5111 unsignedp, OPTAB_LIB_WIDEN);
5112 }
5113 }
5114 }
5115
5116 if (rem_flag)
5117 {
5118 if (target && GET_MODE (target) != compute_mode)
5119 target = 0;
5120
5121 if (quotient == 0)
5122 {
5123 /* No divide instruction either. Use library for remainder. */
5124 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5125 op0, op1, target,
5126 unsignedp, OPTAB_LIB_WIDEN);
5127 /* No remainder function. Try a quotient-and-remainder
5128 function, keeping the remainder. */
5129 if (!remainder)
5130 {
5131 remainder = gen_reg_rtx (compute_mode);
5132 if (!expand_twoval_binop_libfunc
5133 (unsignedp ? udivmod_optab : sdivmod_optab,
5134 op0, op1,
5135 NULL_RTX, remainder,
5136 unsignedp ? UMOD : MOD))
5137 remainder = NULL_RTX;
5138 }
5139 }
5140 else
5141 {
5142 /* We divided. Now finish doing X - Y * (X / Y). */
5143 remainder = expand_mult (compute_mode, quotient, op1,
5144 NULL_RTX, unsignedp);
5145 remainder = expand_binop (compute_mode, sub_optab, op0,
5146 remainder, target, unsignedp,
5147 OPTAB_LIB_WIDEN);
5148 }
5149 }
5150
5151 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5152 }
5153 \f
5154 /* Return a tree node with data type TYPE, describing the value of X.
5155 Usually this is an VAR_DECL, if there is no obvious better choice.
5156 X may be an expression, however we only support those expressions
5157 generated by loop.c. */
5158
5159 tree
5160 make_tree (tree type, rtx x)
5161 {
5162 tree t;
5163
5164 switch (GET_CODE (x))
5165 {
5166 case CONST_INT:
5167 case CONST_WIDE_INT:
5168 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5169 return t;
5170
5171 case CONST_DOUBLE:
5172 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5173 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5174 t = wide_int_to_tree (type,
5175 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5176 HOST_BITS_PER_WIDE_INT * 2));
5177 else
5178 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5179
5180 return t;
5181
5182 case CONST_VECTOR:
5183 {
5184 int units = CONST_VECTOR_NUNITS (x);
5185 tree itype = TREE_TYPE (type);
5186 tree *elts;
5187 int i;
5188
5189 /* Build a tree with vector elements. */
5190 elts = XALLOCAVEC (tree, units);
5191 for (i = units - 1; i >= 0; --i)
5192 {
5193 rtx elt = CONST_VECTOR_ELT (x, i);
5194 elts[i] = make_tree (itype, elt);
5195 }
5196
5197 return build_vector (type, elts);
5198 }
5199
5200 case PLUS:
5201 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5202 make_tree (type, XEXP (x, 1)));
5203
5204 case MINUS:
5205 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5206 make_tree (type, XEXP (x, 1)));
5207
5208 case NEG:
5209 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5210
5211 case MULT:
5212 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5213 make_tree (type, XEXP (x, 1)));
5214
5215 case ASHIFT:
5216 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5217 make_tree (type, XEXP (x, 1)));
5218
5219 case LSHIFTRT:
5220 t = unsigned_type_for (type);
5221 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5222 make_tree (t, XEXP (x, 0)),
5223 make_tree (type, XEXP (x, 1))));
5224
5225 case ASHIFTRT:
5226 t = signed_type_for (type);
5227 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5228 make_tree (t, XEXP (x, 0)),
5229 make_tree (type, XEXP (x, 1))));
5230
5231 case DIV:
5232 if (TREE_CODE (type) != REAL_TYPE)
5233 t = signed_type_for (type);
5234 else
5235 t = type;
5236
5237 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5238 make_tree (t, XEXP (x, 0)),
5239 make_tree (t, XEXP (x, 1))));
5240 case UDIV:
5241 t = unsigned_type_for (type);
5242 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5243 make_tree (t, XEXP (x, 0)),
5244 make_tree (t, XEXP (x, 1))));
5245
5246 case SIGN_EXTEND:
5247 case ZERO_EXTEND:
5248 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5249 GET_CODE (x) == ZERO_EXTEND);
5250 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5251
5252 case CONST:
5253 return make_tree (type, XEXP (x, 0));
5254
5255 case SYMBOL_REF:
5256 t = SYMBOL_REF_DECL (x);
5257 if (t)
5258 return fold_convert (type, build_fold_addr_expr (t));
5259 /* fall through. */
5260
5261 default:
5262 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5263
5264 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5265 address mode to pointer mode. */
5266 if (POINTER_TYPE_P (type))
5267 x = convert_memory_address_addr_space
5268 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5269
5270 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5271 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5272 t->decl_with_rtl.rtl = x;
5273
5274 return t;
5275 }
5276 }
5277 \f
5278 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5279 and returning TARGET.
5280
5281 If TARGET is 0, a pseudo-register or constant is returned. */
5282
5283 rtx
5284 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5285 {
5286 rtx tem = 0;
5287
5288 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5289 tem = simplify_binary_operation (AND, mode, op0, op1);
5290 if (tem == 0)
5291 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5292
5293 if (target == 0)
5294 target = tem;
5295 else if (tem != target)
5296 emit_move_insn (target, tem);
5297 return target;
5298 }
5299
5300 /* Helper function for emit_store_flag. */
5301 rtx
5302 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5303 machine_mode mode, machine_mode compare_mode,
5304 int unsignedp, rtx x, rtx y, int normalizep,
5305 machine_mode target_mode)
5306 {
5307 struct expand_operand ops[4];
5308 rtx op0, comparison, subtarget;
5309 rtx_insn *last;
5310 scalar_int_mode result_mode = targetm.cstore_mode (icode);
5311 scalar_int_mode int_target_mode;
5312
5313 last = get_last_insn ();
5314 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5315 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5316 if (!x || !y)
5317 {
5318 delete_insns_since (last);
5319 return NULL_RTX;
5320 }
5321
5322 if (target_mode == VOIDmode)
5323 int_target_mode = result_mode;
5324 else
5325 int_target_mode = as_a <scalar_int_mode> (target_mode);
5326 if (!target)
5327 target = gen_reg_rtx (int_target_mode);
5328
5329 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5330
5331 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5332 create_fixed_operand (&ops[1], comparison);
5333 create_fixed_operand (&ops[2], x);
5334 create_fixed_operand (&ops[3], y);
5335 if (!maybe_expand_insn (icode, 4, ops))
5336 {
5337 delete_insns_since (last);
5338 return NULL_RTX;
5339 }
5340 subtarget = ops[0].value;
5341
5342 /* If we are converting to a wider mode, first convert to
5343 INT_TARGET_MODE, then normalize. This produces better combining
5344 opportunities on machines that have a SIGN_EXTRACT when we are
5345 testing a single bit. This mostly benefits the 68k.
5346
5347 If STORE_FLAG_VALUE does not have the sign bit set when
5348 interpreted in MODE, we can do this conversion as unsigned, which
5349 is usually more efficient. */
5350 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5351 {
5352 convert_move (target, subtarget,
5353 val_signbit_known_clear_p (result_mode,
5354 STORE_FLAG_VALUE));
5355 op0 = target;
5356 result_mode = int_target_mode;
5357 }
5358 else
5359 op0 = subtarget;
5360
5361 /* If we want to keep subexpressions around, don't reuse our last
5362 target. */
5363 if (optimize)
5364 subtarget = 0;
5365
5366 /* Now normalize to the proper value in MODE. Sometimes we don't
5367 have to do anything. */
5368 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5369 ;
5370 /* STORE_FLAG_VALUE might be the most negative number, so write
5371 the comparison this way to avoid a compiler-time warning. */
5372 else if (- normalizep == STORE_FLAG_VALUE)
5373 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5374
5375 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5376 it hard to use a value of just the sign bit due to ANSI integer
5377 constant typing rules. */
5378 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5379 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5380 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5381 normalizep == 1);
5382 else
5383 {
5384 gcc_assert (STORE_FLAG_VALUE & 1);
5385
5386 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5387 if (normalizep == -1)
5388 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5389 }
5390
5391 /* If we were converting to a smaller mode, do the conversion now. */
5392 if (int_target_mode != result_mode)
5393 {
5394 convert_move (target, op0, 0);
5395 return target;
5396 }
5397 else
5398 return op0;
5399 }
5400
5401
5402 /* A subroutine of emit_store_flag only including "tricks" that do not
5403 need a recursive call. These are kept separate to avoid infinite
5404 loops. */
5405
5406 static rtx
5407 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5408 machine_mode mode, int unsignedp, int normalizep,
5409 machine_mode target_mode)
5410 {
5411 rtx subtarget;
5412 enum insn_code icode;
5413 machine_mode compare_mode;
5414 enum mode_class mclass;
5415 enum rtx_code scode;
5416
5417 if (unsignedp)
5418 code = unsigned_condition (code);
5419 scode = swap_condition (code);
5420
5421 /* If one operand is constant, make it the second one. Only do this
5422 if the other operand is not constant as well. */
5423
5424 if (swap_commutative_operands_p (op0, op1))
5425 {
5426 std::swap (op0, op1);
5427 code = swap_condition (code);
5428 }
5429
5430 if (mode == VOIDmode)
5431 mode = GET_MODE (op0);
5432
5433 /* For some comparisons with 1 and -1, we can convert this to
5434 comparisons with zero. This will often produce more opportunities for
5435 store-flag insns. */
5436
5437 switch (code)
5438 {
5439 case LT:
5440 if (op1 == const1_rtx)
5441 op1 = const0_rtx, code = LE;
5442 break;
5443 case LE:
5444 if (op1 == constm1_rtx)
5445 op1 = const0_rtx, code = LT;
5446 break;
5447 case GE:
5448 if (op1 == const1_rtx)
5449 op1 = const0_rtx, code = GT;
5450 break;
5451 case GT:
5452 if (op1 == constm1_rtx)
5453 op1 = const0_rtx, code = GE;
5454 break;
5455 case GEU:
5456 if (op1 == const1_rtx)
5457 op1 = const0_rtx, code = NE;
5458 break;
5459 case LTU:
5460 if (op1 == const1_rtx)
5461 op1 = const0_rtx, code = EQ;
5462 break;
5463 default:
5464 break;
5465 }
5466
5467 /* If we are comparing a double-word integer with zero or -1, we can
5468 convert the comparison into one involving a single word. */
5469 scalar_int_mode int_mode;
5470 if (is_int_mode (mode, &int_mode)
5471 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5472 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5473 {
5474 rtx tem;
5475 if ((code == EQ || code == NE)
5476 && (op1 == const0_rtx || op1 == constm1_rtx))
5477 {
5478 rtx op00, op01;
5479
5480 /* Do a logical OR or AND of the two words and compare the
5481 result. */
5482 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5483 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5484 tem = expand_binop (word_mode,
5485 op1 == const0_rtx ? ior_optab : and_optab,
5486 op00, op01, NULL_RTX, unsignedp,
5487 OPTAB_DIRECT);
5488
5489 if (tem != 0)
5490 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5491 unsignedp, normalizep);
5492 }
5493 else if ((code == LT || code == GE) && op1 == const0_rtx)
5494 {
5495 rtx op0h;
5496
5497 /* If testing the sign bit, can just test on high word. */
5498 op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5499 subreg_highpart_offset (word_mode,
5500 int_mode));
5501 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5502 unsignedp, normalizep);
5503 }
5504 else
5505 tem = NULL_RTX;
5506
5507 if (tem)
5508 {
5509 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5510 return tem;
5511 if (!target)
5512 target = gen_reg_rtx (target_mode);
5513
5514 convert_move (target, tem,
5515 !val_signbit_known_set_p (word_mode,
5516 (normalizep ? normalizep
5517 : STORE_FLAG_VALUE)));
5518 return target;
5519 }
5520 }
5521
5522 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5523 complement of A (for GE) and shifting the sign bit to the low bit. */
5524 if (op1 == const0_rtx && (code == LT || code == GE)
5525 && is_int_mode (mode, &int_mode)
5526 && (normalizep || STORE_FLAG_VALUE == 1
5527 || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5528 {
5529 scalar_int_mode int_target_mode;
5530 subtarget = target;
5531
5532 if (!target)
5533 int_target_mode = int_mode;
5534 else
5535 {
5536 /* If the result is to be wider than OP0, it is best to convert it
5537 first. If it is to be narrower, it is *incorrect* to convert it
5538 first. */
5539 int_target_mode = as_a <scalar_int_mode> (target_mode);
5540 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5541 {
5542 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5543 int_mode = int_target_mode;
5544 }
5545 }
5546
5547 if (int_target_mode != int_mode)
5548 subtarget = 0;
5549
5550 if (code == GE)
5551 op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5552 ((STORE_FLAG_VALUE == 1 || normalizep)
5553 ? 0 : subtarget), 0);
5554
5555 if (STORE_FLAG_VALUE == 1 || normalizep)
5556 /* If we are supposed to produce a 0/1 value, we want to do
5557 a logical shift from the sign bit to the low-order bit; for
5558 a -1/0 value, we do an arithmetic shift. */
5559 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5560 GET_MODE_BITSIZE (int_mode) - 1,
5561 subtarget, normalizep != -1);
5562
5563 if (int_mode != int_target_mode)
5564 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5565
5566 return op0;
5567 }
5568
5569 mclass = GET_MODE_CLASS (mode);
5570 FOR_EACH_MODE_FROM (compare_mode, mode)
5571 {
5572 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5573 icode = optab_handler (cstore_optab, optab_mode);
5574 if (icode != CODE_FOR_nothing)
5575 {
5576 do_pending_stack_adjust ();
5577 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5578 unsignedp, op0, op1, normalizep, target_mode);
5579 if (tem)
5580 return tem;
5581
5582 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5583 {
5584 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5585 unsignedp, op1, op0, normalizep, target_mode);
5586 if (tem)
5587 return tem;
5588 }
5589 break;
5590 }
5591 }
5592
5593 return 0;
5594 }
5595
5596 /* Subroutine of emit_store_flag that handles cases in which the operands
5597 are scalar integers. SUBTARGET is the target to use for temporary
5598 operations and TRUEVAL is the value to store when the condition is
5599 true. All other arguments are as for emit_store_flag. */
5600
5601 rtx
5602 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5603 rtx op1, scalar_int_mode mode, int unsignedp,
5604 int normalizep, rtx trueval)
5605 {
5606 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5607 rtx_insn *last = get_last_insn ();
5608 rtx tem;
5609
5610 /* If this is an equality comparison of integers, we can try to exclusive-or
5611 (or subtract) the two operands and use a recursive call to try the
5612 comparison with zero. Don't do any of these cases if branches are
5613 very cheap. */
5614
5615 if ((code == EQ || code == NE) && op1 != const0_rtx)
5616 {
5617 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5618 OPTAB_WIDEN);
5619
5620 if (tem == 0)
5621 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5622 OPTAB_WIDEN);
5623 if (tem != 0)
5624 tem = emit_store_flag (target, code, tem, const0_rtx,
5625 mode, unsignedp, normalizep);
5626 if (tem != 0)
5627 return tem;
5628
5629 delete_insns_since (last);
5630 }
5631
5632 /* For integer comparisons, try the reverse comparison. However, for
5633 small X and if we'd have anyway to extend, implementing "X != 0"
5634 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5635 rtx_code rcode = reverse_condition (code);
5636 if (can_compare_p (rcode, mode, ccp_store_flag)
5637 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5638 && code == NE
5639 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5640 && op1 == const0_rtx))
5641 {
5642 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5643 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5644
5645 /* Again, for the reverse comparison, use either an addition or a XOR. */
5646 if (want_add
5647 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5648 optimize_insn_for_speed_p ()) == 0)
5649 {
5650 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5651 STORE_FLAG_VALUE, target_mode);
5652 if (tem != 0)
5653 tem = expand_binop (target_mode, add_optab, tem,
5654 gen_int_mode (normalizep, target_mode),
5655 target, 0, OPTAB_WIDEN);
5656 }
5657 else if (!want_add
5658 && rtx_cost (trueval, mode, XOR, 1,
5659 optimize_insn_for_speed_p ()) == 0)
5660 {
5661 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5662 normalizep, target_mode);
5663 if (tem != 0)
5664 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5665 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5666 }
5667
5668 if (tem != 0)
5669 return tem;
5670 delete_insns_since (last);
5671 }
5672
5673 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5674 the constant zero. Reject all other comparisons at this point. Only
5675 do LE and GT if branches are expensive since they are expensive on
5676 2-operand machines. */
5677
5678 if (op1 != const0_rtx
5679 || (code != EQ && code != NE
5680 && (BRANCH_COST (optimize_insn_for_speed_p (),
5681 false) <= 1 || (code != LE && code != GT))))
5682 return 0;
5683
5684 /* Try to put the result of the comparison in the sign bit. Assume we can't
5685 do the necessary operation below. */
5686
5687 tem = 0;
5688
5689 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5690 the sign bit set. */
5691
5692 if (code == LE)
5693 {
5694 /* This is destructive, so SUBTARGET can't be OP0. */
5695 if (rtx_equal_p (subtarget, op0))
5696 subtarget = 0;
5697
5698 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5699 OPTAB_WIDEN);
5700 if (tem)
5701 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5702 OPTAB_WIDEN);
5703 }
5704
5705 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5706 number of bits in the mode of OP0, minus one. */
5707
5708 if (code == GT)
5709 {
5710 if (rtx_equal_p (subtarget, op0))
5711 subtarget = 0;
5712
5713 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5714 GET_MODE_BITSIZE (mode) - 1,
5715 subtarget, 0);
5716 if (tem)
5717 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5718 OPTAB_WIDEN);
5719 }
5720
5721 if (code == EQ || code == NE)
5722 {
5723 /* For EQ or NE, one way to do the comparison is to apply an operation
5724 that converts the operand into a positive number if it is nonzero
5725 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5726 for NE we negate. This puts the result in the sign bit. Then we
5727 normalize with a shift, if needed.
5728
5729 Two operations that can do the above actions are ABS and FFS, so try
5730 them. If that doesn't work, and MODE is smaller than a full word,
5731 we can use zero-extension to the wider mode (an unsigned conversion)
5732 as the operation. */
5733
5734 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5735 that is compensated by the subsequent overflow when subtracting
5736 one / negating. */
5737
5738 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5739 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5740 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5741 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5742 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5743 {
5744 tem = convert_modes (word_mode, mode, op0, 1);
5745 mode = word_mode;
5746 }
5747
5748 if (tem != 0)
5749 {
5750 if (code == EQ)
5751 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5752 0, OPTAB_WIDEN);
5753 else
5754 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5755 }
5756
5757 /* If we couldn't do it that way, for NE we can "or" the two's complement
5758 of the value with itself. For EQ, we take the one's complement of
5759 that "or", which is an extra insn, so we only handle EQ if branches
5760 are expensive. */
5761
5762 if (tem == 0
5763 && (code == NE
5764 || BRANCH_COST (optimize_insn_for_speed_p (),
5765 false) > 1))
5766 {
5767 if (rtx_equal_p (subtarget, op0))
5768 subtarget = 0;
5769
5770 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5771 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5772 OPTAB_WIDEN);
5773
5774 if (tem && code == EQ)
5775 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5776 }
5777 }
5778
5779 if (tem && normalizep)
5780 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5781 GET_MODE_BITSIZE (mode) - 1,
5782 subtarget, normalizep == 1);
5783
5784 if (tem)
5785 {
5786 if (!target)
5787 ;
5788 else if (GET_MODE (tem) != target_mode)
5789 {
5790 convert_move (target, tem, 0);
5791 tem = target;
5792 }
5793 else if (!subtarget)
5794 {
5795 emit_move_insn (target, tem);
5796 tem = target;
5797 }
5798 }
5799 else
5800 delete_insns_since (last);
5801
5802 return tem;
5803 }
5804
5805 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5806 and storing in TARGET. Normally return TARGET.
5807 Return 0 if that cannot be done.
5808
5809 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5810 it is VOIDmode, they cannot both be CONST_INT.
5811
5812 UNSIGNEDP is for the case where we have to widen the operands
5813 to perform the operation. It says to use zero-extension.
5814
5815 NORMALIZEP is 1 if we should convert the result to be either zero
5816 or one. Normalize is -1 if we should convert the result to be
5817 either zero or -1. If NORMALIZEP is zero, the result will be left
5818 "raw" out of the scc insn. */
5819
5820 rtx
5821 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5822 machine_mode mode, int unsignedp, int normalizep)
5823 {
5824 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5825 enum rtx_code rcode;
5826 rtx subtarget;
5827 rtx tem, trueval;
5828 rtx_insn *last;
5829
5830 /* If we compare constants, we shouldn't use a store-flag operation,
5831 but a constant load. We can get there via the vanilla route that
5832 usually generates a compare-branch sequence, but will in this case
5833 fold the comparison to a constant, and thus elide the branch. */
5834 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5835 return NULL_RTX;
5836
5837 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5838 target_mode);
5839 if (tem)
5840 return tem;
5841
5842 /* If we reached here, we can't do this with a scc insn, however there
5843 are some comparisons that can be done in other ways. Don't do any
5844 of these cases if branches are very cheap. */
5845 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5846 return 0;
5847
5848 /* See what we need to return. We can only return a 1, -1, or the
5849 sign bit. */
5850
5851 if (normalizep == 0)
5852 {
5853 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5854 normalizep = STORE_FLAG_VALUE;
5855
5856 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5857 ;
5858 else
5859 return 0;
5860 }
5861
5862 last = get_last_insn ();
5863
5864 /* If optimizing, use different pseudo registers for each insn, instead
5865 of reusing the same pseudo. This leads to better CSE, but slows
5866 down the compiler, since there are more pseudos. */
5867 subtarget = (!optimize
5868 && (target_mode == mode)) ? target : NULL_RTX;
5869 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5870
5871 /* For floating-point comparisons, try the reverse comparison or try
5872 changing the "orderedness" of the comparison. */
5873 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5874 {
5875 enum rtx_code first_code;
5876 bool and_them;
5877
5878 rcode = reverse_condition_maybe_unordered (code);
5879 if (can_compare_p (rcode, mode, ccp_store_flag)
5880 && (code == ORDERED || code == UNORDERED
5881 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5882 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5883 {
5884 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5885 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5886
5887 /* For the reverse comparison, use either an addition or a XOR. */
5888 if (want_add
5889 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5890 optimize_insn_for_speed_p ()) == 0)
5891 {
5892 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5893 STORE_FLAG_VALUE, target_mode);
5894 if (tem)
5895 return expand_binop (target_mode, add_optab, tem,
5896 gen_int_mode (normalizep, target_mode),
5897 target, 0, OPTAB_WIDEN);
5898 }
5899 else if (!want_add
5900 && rtx_cost (trueval, mode, XOR, 1,
5901 optimize_insn_for_speed_p ()) == 0)
5902 {
5903 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5904 normalizep, target_mode);
5905 if (tem)
5906 return expand_binop (target_mode, xor_optab, tem, trueval,
5907 target, INTVAL (trueval) >= 0,
5908 OPTAB_WIDEN);
5909 }
5910 }
5911
5912 delete_insns_since (last);
5913
5914 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5915 if (code == ORDERED || code == UNORDERED)
5916 return 0;
5917
5918 and_them = split_comparison (code, mode, &first_code, &code);
5919
5920 /* If there are no NaNs, the first comparison should always fall through.
5921 Effectively change the comparison to the other one. */
5922 if (!HONOR_NANS (mode))
5923 {
5924 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5925 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5926 target_mode);
5927 }
5928
5929 if (!HAVE_conditional_move)
5930 return 0;
5931
5932 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5933 conditional move. */
5934 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5935 normalizep, target_mode);
5936 if (tem == 0)
5937 return 0;
5938
5939 if (and_them)
5940 tem = emit_conditional_move (target, code, op0, op1, mode,
5941 tem, const0_rtx, GET_MODE (tem), 0);
5942 else
5943 tem = emit_conditional_move (target, code, op0, op1, mode,
5944 trueval, tem, GET_MODE (tem), 0);
5945
5946 if (tem == 0)
5947 delete_insns_since (last);
5948 return tem;
5949 }
5950
5951 /* The remaining tricks only apply to integer comparisons. */
5952
5953 scalar_int_mode int_mode;
5954 if (is_int_mode (mode, &int_mode))
5955 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
5956 unsignedp, normalizep, trueval);
5957
5958 return 0;
5959 }
5960
5961 /* Like emit_store_flag, but always succeeds. */
5962
5963 rtx
5964 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5965 machine_mode mode, int unsignedp, int normalizep)
5966 {
5967 rtx tem;
5968 rtx_code_label *label;
5969 rtx trueval, falseval;
5970
5971 /* First see if emit_store_flag can do the job. */
5972 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5973 if (tem != 0)
5974 return tem;
5975
5976 if (!target)
5977 target = gen_reg_rtx (word_mode);
5978
5979 /* If this failed, we have to do this with set/compare/jump/set code.
5980 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5981 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5982 if (code == NE
5983 && GET_MODE_CLASS (mode) == MODE_INT
5984 && REG_P (target)
5985 && op0 == target
5986 && op1 == const0_rtx)
5987 {
5988 label = gen_label_rtx ();
5989 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5990 NULL_RTX, NULL, label,
5991 profile_probability::uninitialized ());
5992 emit_move_insn (target, trueval);
5993 emit_label (label);
5994 return target;
5995 }
5996
5997 if (!REG_P (target)
5998 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5999 target = gen_reg_rtx (GET_MODE (target));
6000
6001 /* Jump in the right direction if the target cannot implement CODE
6002 but can jump on its reverse condition. */
6003 falseval = const0_rtx;
6004 if (! can_compare_p (code, mode, ccp_jump)
6005 && (! FLOAT_MODE_P (mode)
6006 || code == ORDERED || code == UNORDERED
6007 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6008 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6009 {
6010 enum rtx_code rcode;
6011 if (FLOAT_MODE_P (mode))
6012 rcode = reverse_condition_maybe_unordered (code);
6013 else
6014 rcode = reverse_condition (code);
6015
6016 /* Canonicalize to UNORDERED for the libcall. */
6017 if (can_compare_p (rcode, mode, ccp_jump)
6018 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6019 {
6020 falseval = trueval;
6021 trueval = const0_rtx;
6022 code = rcode;
6023 }
6024 }
6025
6026 emit_move_insn (target, trueval);
6027 label = gen_label_rtx ();
6028 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6029 label, profile_probability::uninitialized ());
6030
6031 emit_move_insn (target, falseval);
6032 emit_label (label);
6033
6034 return target;
6035 }
6036 \f
6037 /* Perform possibly multi-word comparison and conditional jump to LABEL
6038 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6039 now a thin wrapper around do_compare_rtx_and_jump. */
6040
6041 static void
6042 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6043 rtx_code_label *label)
6044 {
6045 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6046 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6047 NULL, label, profile_probability::uninitialized ());
6048 }