stor-layout.c (finish_builtin_struct): Copy fields into the variants.
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2014 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "tm_p.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "recog.h"
36 #include "langhooks.h"
37 #include "df.h"
38 #include "target.h"
39 #include "expmed.h"
40
41 struct target_expmed default_target_expmed;
42 #if SWITCHABLE_TARGET
43 struct target_expmed *this_target_expmed = &default_target_expmed;
44 #endif
45
46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 rtx);
51 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT, rtx, int);
62 static rtx extract_fixed_bit_field_1 (enum machine_mode, rtx,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, rtx, int);
65 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
66 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
67 unsigned HOST_WIDE_INT, int);
68 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
69 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71
72 /* Return a constant integer mask value of mode MODE with BITSIZE ones
73 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
74 The mask is truncated if necessary to the width of mode MODE. The
75 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
76
77 static inline rtx
78 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, bool complement)
79 {
80 return immed_wide_int_const
81 (wi::shifted_mask (bitpos, bitsize, complement,
82 GET_MODE_PRECISION (mode)), mode);
83 }
84
85 /* Test whether a value is zero of a power of two. */
86 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
87 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
88
89 struct init_expmed_rtl
90 {
91 struct rtx_def reg;
92 struct rtx_def plus;
93 struct rtx_def neg;
94 struct rtx_def mult;
95 struct rtx_def sdiv;
96 struct rtx_def udiv;
97 struct rtx_def sdiv_32;
98 struct rtx_def smod_32;
99 struct rtx_def wide_mult;
100 struct rtx_def wide_lshr;
101 struct rtx_def wide_trunc;
102 struct rtx_def shift;
103 struct rtx_def shift_mult;
104 struct rtx_def shift_add;
105 struct rtx_def shift_sub0;
106 struct rtx_def shift_sub1;
107 struct rtx_def zext;
108 struct rtx_def trunc;
109
110 rtx pow2[MAX_BITS_PER_WORD];
111 rtx cint[MAX_BITS_PER_WORD];
112 };
113
114 static void
115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
116 enum machine_mode from_mode, bool speed)
117 {
118 int to_size, from_size;
119 rtx which;
120
121 /* We're given no information about the true size of a partial integer,
122 only the size of the "full" integer it requires for storage. For
123 comparison purposes here, reduce the bit size by one in that case. */
124 to_size = (GET_MODE_BITSIZE (to_mode)
125 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
126 from_size = (GET_MODE_BITSIZE (from_mode)
127 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
128
129 /* Assume cost of zero-extend and sign-extend is the same. */
130 which = (to_size < from_size ? &all->trunc : &all->zext);
131
132 PUT_MODE (&all->reg, from_mode);
133 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
134 }
135
136 static void
137 init_expmed_one_mode (struct init_expmed_rtl *all,
138 enum machine_mode mode, int speed)
139 {
140 int m, n, mode_bitsize;
141 enum machine_mode mode_from;
142
143 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
144
145 PUT_MODE (&all->reg, mode);
146 PUT_MODE (&all->plus, mode);
147 PUT_MODE (&all->neg, mode);
148 PUT_MODE (&all->mult, mode);
149 PUT_MODE (&all->sdiv, mode);
150 PUT_MODE (&all->udiv, mode);
151 PUT_MODE (&all->sdiv_32, mode);
152 PUT_MODE (&all->smod_32, mode);
153 PUT_MODE (&all->wide_trunc, mode);
154 PUT_MODE (&all->shift, mode);
155 PUT_MODE (&all->shift_mult, mode);
156 PUT_MODE (&all->shift_add, mode);
157 PUT_MODE (&all->shift_sub0, mode);
158 PUT_MODE (&all->shift_sub1, mode);
159 PUT_MODE (&all->zext, mode);
160 PUT_MODE (&all->trunc, mode);
161
162 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
163 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
164 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
165 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
166 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
167
168 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
169 <= 2 * add_cost (speed, mode)));
170 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
171 <= 4 * add_cost (speed, mode)));
172
173 set_shift_cost (speed, mode, 0, 0);
174 {
175 int cost = add_cost (speed, mode);
176 set_shiftadd_cost (speed, mode, 0, cost);
177 set_shiftsub0_cost (speed, mode, 0, cost);
178 set_shiftsub1_cost (speed, mode, 0, cost);
179 }
180
181 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
182 for (m = 1; m < n; m++)
183 {
184 XEXP (&all->shift, 1) = all->cint[m];
185 XEXP (&all->shift_mult, 1) = all->pow2[m];
186
187 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
188 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
189 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
190 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
191 }
192
193 if (SCALAR_INT_MODE_P (mode))
194 {
195 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
196 mode_from = (enum machine_mode)(mode_from + 1))
197 init_expmed_one_conv (all, mode, mode_from, speed);
198 }
199 if (GET_MODE_CLASS (mode) == MODE_INT)
200 {
201 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
202 if (wider_mode != VOIDmode)
203 {
204 PUT_MODE (&all->zext, wider_mode);
205 PUT_MODE (&all->wide_mult, wider_mode);
206 PUT_MODE (&all->wide_lshr, wider_mode);
207 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
208
209 set_mul_widen_cost (speed, wider_mode,
210 set_src_cost (&all->wide_mult, speed));
211 set_mul_highpart_cost (speed, mode,
212 set_src_cost (&all->wide_trunc, speed));
213 }
214 }
215 }
216
217 void
218 init_expmed (void)
219 {
220 struct init_expmed_rtl all;
221 enum machine_mode mode;
222 int m, speed;
223
224 memset (&all, 0, sizeof all);
225 for (m = 1; m < MAX_BITS_PER_WORD; m++)
226 {
227 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
228 all.cint[m] = GEN_INT (m);
229 }
230
231 PUT_CODE (&all.reg, REG);
232 /* Avoid using hard regs in ways which may be unsupported. */
233 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
234
235 PUT_CODE (&all.plus, PLUS);
236 XEXP (&all.plus, 0) = &all.reg;
237 XEXP (&all.plus, 1) = &all.reg;
238
239 PUT_CODE (&all.neg, NEG);
240 XEXP (&all.neg, 0) = &all.reg;
241
242 PUT_CODE (&all.mult, MULT);
243 XEXP (&all.mult, 0) = &all.reg;
244 XEXP (&all.mult, 1) = &all.reg;
245
246 PUT_CODE (&all.sdiv, DIV);
247 XEXP (&all.sdiv, 0) = &all.reg;
248 XEXP (&all.sdiv, 1) = &all.reg;
249
250 PUT_CODE (&all.udiv, UDIV);
251 XEXP (&all.udiv, 0) = &all.reg;
252 XEXP (&all.udiv, 1) = &all.reg;
253
254 PUT_CODE (&all.sdiv_32, DIV);
255 XEXP (&all.sdiv_32, 0) = &all.reg;
256 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
257
258 PUT_CODE (&all.smod_32, MOD);
259 XEXP (&all.smod_32, 0) = &all.reg;
260 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
261
262 PUT_CODE (&all.zext, ZERO_EXTEND);
263 XEXP (&all.zext, 0) = &all.reg;
264
265 PUT_CODE (&all.wide_mult, MULT);
266 XEXP (&all.wide_mult, 0) = &all.zext;
267 XEXP (&all.wide_mult, 1) = &all.zext;
268
269 PUT_CODE (&all.wide_lshr, LSHIFTRT);
270 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
271
272 PUT_CODE (&all.wide_trunc, TRUNCATE);
273 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
274
275 PUT_CODE (&all.shift, ASHIFT);
276 XEXP (&all.shift, 0) = &all.reg;
277
278 PUT_CODE (&all.shift_mult, MULT);
279 XEXP (&all.shift_mult, 0) = &all.reg;
280
281 PUT_CODE (&all.shift_add, PLUS);
282 XEXP (&all.shift_add, 0) = &all.shift_mult;
283 XEXP (&all.shift_add, 1) = &all.reg;
284
285 PUT_CODE (&all.shift_sub0, MINUS);
286 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
287 XEXP (&all.shift_sub0, 1) = &all.reg;
288
289 PUT_CODE (&all.shift_sub1, MINUS);
290 XEXP (&all.shift_sub1, 0) = &all.reg;
291 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
292
293 PUT_CODE (&all.trunc, TRUNCATE);
294 XEXP (&all.trunc, 0) = &all.reg;
295
296 for (speed = 0; speed < 2; speed++)
297 {
298 crtl->maybe_hot_insn_p = speed;
299 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
300
301 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
302 mode = (enum machine_mode)(mode + 1))
303 init_expmed_one_mode (&all, mode, speed);
304
305 if (MIN_MODE_PARTIAL_INT != VOIDmode)
306 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
307 mode = (enum machine_mode)(mode + 1))
308 init_expmed_one_mode (&all, mode, speed);
309
310 if (MIN_MODE_VECTOR_INT != VOIDmode)
311 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
312 mode = (enum machine_mode)(mode + 1))
313 init_expmed_one_mode (&all, mode, speed);
314 }
315
316 if (alg_hash_used_p ())
317 {
318 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
319 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
320 }
321 else
322 set_alg_hash_used_p (true);
323 default_rtl_profile ();
324 }
325
326 /* Return an rtx representing minus the value of X.
327 MODE is the intended mode of the result,
328 useful if X is a CONST_INT. */
329
330 rtx
331 negate_rtx (enum machine_mode mode, rtx x)
332 {
333 rtx result = simplify_unary_operation (NEG, mode, x, mode);
334
335 if (result == 0)
336 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
337
338 return result;
339 }
340
341 /* Adjust bitfield memory MEM so that it points to the first unit of mode
342 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
343 If MODE is BLKmode, return a reference to every byte in the bitfield.
344 Set *NEW_BITNUM to the bit position of the field within the new memory. */
345
346 static rtx
347 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
348 unsigned HOST_WIDE_INT bitsize,
349 unsigned HOST_WIDE_INT bitnum,
350 unsigned HOST_WIDE_INT *new_bitnum)
351 {
352 if (mode == BLKmode)
353 {
354 *new_bitnum = bitnum % BITS_PER_UNIT;
355 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
356 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
357 / BITS_PER_UNIT);
358 return adjust_bitfield_address_size (mem, mode, offset, size);
359 }
360 else
361 {
362 unsigned int unit = GET_MODE_BITSIZE (mode);
363 *new_bitnum = bitnum % unit;
364 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
365 return adjust_bitfield_address (mem, mode, offset);
366 }
367 }
368
369 /* The caller wants to perform insertion or extraction PATTERN on a
370 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
371 BITREGION_START and BITREGION_END are as for store_bit_field
372 and FIELDMODE is the natural mode of the field.
373
374 Search for a mode that is compatible with the memory access
375 restrictions and (where applicable) with a register insertion or
376 extraction. Return the new memory on success, storing the adjusted
377 bit position in *NEW_BITNUM. Return null otherwise. */
378
379 static rtx
380 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
381 rtx op0, HOST_WIDE_INT bitsize,
382 HOST_WIDE_INT bitnum,
383 unsigned HOST_WIDE_INT bitregion_start,
384 unsigned HOST_WIDE_INT bitregion_end,
385 enum machine_mode fieldmode,
386 unsigned HOST_WIDE_INT *new_bitnum)
387 {
388 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
389 bitregion_end, MEM_ALIGN (op0),
390 MEM_VOLATILE_P (op0));
391 enum machine_mode best_mode;
392 if (iter.next_mode (&best_mode))
393 {
394 /* We can use a memory in BEST_MODE. See whether this is true for
395 any wider modes. All other things being equal, we prefer to
396 use the widest mode possible because it tends to expose more
397 CSE opportunities. */
398 if (!iter.prefer_smaller_modes ())
399 {
400 /* Limit the search to the mode required by the corresponding
401 register insertion or extraction instruction, if any. */
402 enum machine_mode limit_mode = word_mode;
403 extraction_insn insn;
404 if (get_best_reg_extraction_insn (&insn, pattern,
405 GET_MODE_BITSIZE (best_mode),
406 fieldmode))
407 limit_mode = insn.field_mode;
408
409 enum machine_mode wider_mode;
410 while (iter.next_mode (&wider_mode)
411 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
412 best_mode = wider_mode;
413 }
414 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
415 new_bitnum);
416 }
417 return NULL_RTX;
418 }
419
420 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
421 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
422 offset is then BITNUM / BITS_PER_UNIT. */
423
424 static bool
425 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
426 unsigned HOST_WIDE_INT bitsize,
427 enum machine_mode struct_mode)
428 {
429 if (BYTES_BIG_ENDIAN)
430 return (bitnum % BITS_PER_UNIT == 0
431 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
432 || (bitnum + bitsize) % BITS_PER_WORD == 0));
433 else
434 return bitnum % BITS_PER_WORD == 0;
435 }
436
437 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
438 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
439 Return false if the access would touch memory outside the range
440 BITREGION_START to BITREGION_END for conformance to the C++ memory
441 model. */
442
443 static bool
444 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
445 unsigned HOST_WIDE_INT bitnum,
446 enum machine_mode fieldmode,
447 unsigned HOST_WIDE_INT bitregion_start,
448 unsigned HOST_WIDE_INT bitregion_end)
449 {
450 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
451
452 /* -fstrict-volatile-bitfields must be enabled and we must have a
453 volatile MEM. */
454 if (!MEM_P (op0)
455 || !MEM_VOLATILE_P (op0)
456 || flag_strict_volatile_bitfields <= 0)
457 return false;
458
459 /* Non-integral modes likely only happen with packed structures.
460 Punt. */
461 if (!SCALAR_INT_MODE_P (fieldmode))
462 return false;
463
464 /* The bit size must not be larger than the field mode, and
465 the field mode must not be larger than a word. */
466 if (bitsize > modesize || modesize > BITS_PER_WORD)
467 return false;
468
469 /* Check for cases of unaligned fields that must be split. */
470 if (bitnum % BITS_PER_UNIT + bitsize > modesize
471 || (STRICT_ALIGNMENT
472 && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
473 return false;
474
475 /* Check for cases where the C++ memory model applies. */
476 if (bitregion_end != 0
477 && (bitnum - bitnum % modesize < bitregion_start
478 || bitnum - bitnum % modesize + modesize > bitregion_end))
479 return false;
480
481 return true;
482 }
483
484 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
485 bit number BITNUM can be treated as a simple value of mode MODE. */
486
487 static bool
488 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
489 unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
490 {
491 return (MEM_P (op0)
492 && bitnum % BITS_PER_UNIT == 0
493 && bitsize == GET_MODE_BITSIZE (mode)
494 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
495 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
496 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
497 }
498 \f
499 /* Try to use instruction INSV to store VALUE into a field of OP0.
500 BITSIZE and BITNUM are as for store_bit_field. */
501
502 static bool
503 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
504 unsigned HOST_WIDE_INT bitsize,
505 unsigned HOST_WIDE_INT bitnum,
506 rtx value)
507 {
508 struct expand_operand ops[4];
509 rtx value1;
510 rtx xop0 = op0;
511 rtx last = get_last_insn ();
512 bool copy_back = false;
513
514 enum machine_mode op_mode = insv->field_mode;
515 unsigned int unit = GET_MODE_BITSIZE (op_mode);
516 if (bitsize == 0 || bitsize > unit)
517 return false;
518
519 if (MEM_P (xop0))
520 /* Get a reference to the first byte of the field. */
521 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
522 &bitnum);
523 else
524 {
525 /* Convert from counting within OP0 to counting in OP_MODE. */
526 if (BYTES_BIG_ENDIAN)
527 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
528
529 /* If xop0 is a register, we need it in OP_MODE
530 to make it acceptable to the format of insv. */
531 if (GET_CODE (xop0) == SUBREG)
532 /* We can't just change the mode, because this might clobber op0,
533 and we will need the original value of op0 if insv fails. */
534 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
535 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
536 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
537 }
538
539 /* If the destination is a paradoxical subreg such that we need a
540 truncate to the inner mode, perform the insertion on a temporary and
541 truncate the result to the original destination. Note that we can't
542 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
543 X) 0)) is (reg:N X). */
544 if (GET_CODE (xop0) == SUBREG
545 && REG_P (SUBREG_REG (xop0))
546 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
547 op_mode))
548 {
549 rtx tem = gen_reg_rtx (op_mode);
550 emit_move_insn (tem, xop0);
551 xop0 = tem;
552 copy_back = true;
553 }
554
555 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
556 "backwards" from the size of the unit we are inserting into.
557 Otherwise, we count bits from the most significant on a
558 BYTES/BITS_BIG_ENDIAN machine. */
559
560 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
561 bitnum = unit - bitsize - bitnum;
562
563 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
564 value1 = value;
565 if (GET_MODE (value) != op_mode)
566 {
567 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
568 {
569 /* Optimization: Don't bother really extending VALUE
570 if it has all the bits we will actually use. However,
571 if we must narrow it, be sure we do it correctly. */
572
573 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
574 {
575 rtx tmp;
576
577 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
578 if (! tmp)
579 tmp = simplify_gen_subreg (op_mode,
580 force_reg (GET_MODE (value),
581 value1),
582 GET_MODE (value), 0);
583 value1 = tmp;
584 }
585 else
586 value1 = gen_lowpart (op_mode, value1);
587 }
588 else if (CONST_INT_P (value))
589 value1 = gen_int_mode (INTVAL (value), op_mode);
590 else
591 /* Parse phase is supposed to make VALUE's data type
592 match that of the component reference, which is a type
593 at least as wide as the field; so VALUE should have
594 a mode that corresponds to that type. */
595 gcc_assert (CONSTANT_P (value));
596 }
597
598 create_fixed_operand (&ops[0], xop0);
599 create_integer_operand (&ops[1], bitsize);
600 create_integer_operand (&ops[2], bitnum);
601 create_input_operand (&ops[3], value1, op_mode);
602 if (maybe_expand_insn (insv->icode, 4, ops))
603 {
604 if (copy_back)
605 convert_move (op0, xop0, true);
606 return true;
607 }
608 delete_insns_since (last);
609 return false;
610 }
611
612 /* A subroutine of store_bit_field, with the same arguments. Return true
613 if the operation could be implemented.
614
615 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
616 no other way of implementing the operation. If FALLBACK_P is false,
617 return false instead. */
618
619 static bool
620 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
621 unsigned HOST_WIDE_INT bitnum,
622 unsigned HOST_WIDE_INT bitregion_start,
623 unsigned HOST_WIDE_INT bitregion_end,
624 enum machine_mode fieldmode,
625 rtx value, bool fallback_p)
626 {
627 rtx op0 = str_rtx;
628 rtx orig_value;
629
630 while (GET_CODE (op0) == SUBREG)
631 {
632 /* The following line once was done only if WORDS_BIG_ENDIAN,
633 but I think that is a mistake. WORDS_BIG_ENDIAN is
634 meaningful at a much higher level; when structures are copied
635 between memory and regs, the higher-numbered regs
636 always get higher addresses. */
637 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
638 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
639 int byte_offset = 0;
640
641 /* Paradoxical subregs need special handling on big endian machines. */
642 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
643 {
644 int difference = inner_mode_size - outer_mode_size;
645
646 if (WORDS_BIG_ENDIAN)
647 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
648 if (BYTES_BIG_ENDIAN)
649 byte_offset += difference % UNITS_PER_WORD;
650 }
651 else
652 byte_offset = SUBREG_BYTE (op0);
653
654 bitnum += byte_offset * BITS_PER_UNIT;
655 op0 = SUBREG_REG (op0);
656 }
657
658 /* No action is needed if the target is a register and if the field
659 lies completely outside that register. This can occur if the source
660 code contains an out-of-bounds access to a small array. */
661 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
662 return true;
663
664 /* Use vec_set patterns for inserting parts of vectors whenever
665 available. */
666 if (VECTOR_MODE_P (GET_MODE (op0))
667 && !MEM_P (op0)
668 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
669 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
670 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
671 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
672 {
673 struct expand_operand ops[3];
674 enum machine_mode outermode = GET_MODE (op0);
675 enum machine_mode innermode = GET_MODE_INNER (outermode);
676 enum insn_code icode = optab_handler (vec_set_optab, outermode);
677 int pos = bitnum / GET_MODE_BITSIZE (innermode);
678
679 create_fixed_operand (&ops[0], op0);
680 create_input_operand (&ops[1], value, innermode);
681 create_integer_operand (&ops[2], pos);
682 if (maybe_expand_insn (icode, 3, ops))
683 return true;
684 }
685
686 /* If the target is a register, overwriting the entire object, or storing
687 a full-word or multi-word field can be done with just a SUBREG. */
688 if (!MEM_P (op0)
689 && bitsize == GET_MODE_BITSIZE (fieldmode)
690 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
691 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
692 {
693 /* Use the subreg machinery either to narrow OP0 to the required
694 words or to cope with mode punning between equal-sized modes.
695 In the latter case, use subreg on the rhs side, not lhs. */
696 rtx sub;
697
698 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
699 {
700 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
701 if (sub)
702 {
703 emit_move_insn (op0, sub);
704 return true;
705 }
706 }
707 else
708 {
709 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
710 bitnum / BITS_PER_UNIT);
711 if (sub)
712 {
713 emit_move_insn (sub, value);
714 return true;
715 }
716 }
717 }
718
719 /* If the target is memory, storing any naturally aligned field can be
720 done with a simple store. For targets that support fast unaligned
721 memory, any naturally sized, unit aligned field can be done directly. */
722 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
723 {
724 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
725 emit_move_insn (op0, value);
726 return true;
727 }
728
729 /* Make sure we are playing with integral modes. Pun with subregs
730 if we aren't. This must come after the entire register case above,
731 since that case is valid for any mode. The following cases are only
732 valid for integral modes. */
733 {
734 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
735 if (imode != GET_MODE (op0))
736 {
737 if (MEM_P (op0))
738 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
739 else
740 {
741 gcc_assert (imode != BLKmode);
742 op0 = gen_lowpart (imode, op0);
743 }
744 }
745 }
746
747 /* Storing an lsb-aligned field in a register
748 can be done with a movstrict instruction. */
749
750 if (!MEM_P (op0)
751 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
752 && bitsize == GET_MODE_BITSIZE (fieldmode)
753 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
754 {
755 struct expand_operand ops[2];
756 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
757 rtx arg0 = op0;
758 unsigned HOST_WIDE_INT subreg_off;
759
760 if (GET_CODE (arg0) == SUBREG)
761 {
762 /* Else we've got some float mode source being extracted into
763 a different float mode destination -- this combination of
764 subregs results in Severe Tire Damage. */
765 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
766 || GET_MODE_CLASS (fieldmode) == MODE_INT
767 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
768 arg0 = SUBREG_REG (arg0);
769 }
770
771 subreg_off = bitnum / BITS_PER_UNIT;
772 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
773 {
774 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
775
776 create_fixed_operand (&ops[0], arg0);
777 /* Shrink the source operand to FIELDMODE. */
778 create_convert_operand_to (&ops[1], value, fieldmode, false);
779 if (maybe_expand_insn (icode, 2, ops))
780 return true;
781 }
782 }
783
784 /* Handle fields bigger than a word. */
785
786 if (bitsize > BITS_PER_WORD)
787 {
788 /* Here we transfer the words of the field
789 in the order least significant first.
790 This is because the most significant word is the one which may
791 be less than full.
792 However, only do that if the value is not BLKmode. */
793
794 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
795 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
796 unsigned int i;
797 rtx last;
798
799 /* This is the mode we must force value to, so that there will be enough
800 subwords to extract. Note that fieldmode will often (always?) be
801 VOIDmode, because that is what store_field uses to indicate that this
802 is a bit field, but passing VOIDmode to operand_subword_force
803 is not allowed. */
804 fieldmode = GET_MODE (value);
805 if (fieldmode == VOIDmode)
806 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
807
808 last = get_last_insn ();
809 for (i = 0; i < nwords; i++)
810 {
811 /* If I is 0, use the low-order word in both field and target;
812 if I is 1, use the next to lowest word; and so on. */
813 unsigned int wordnum = (backwards
814 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
815 - i - 1
816 : i);
817 unsigned int bit_offset = (backwards
818 ? MAX ((int) bitsize - ((int) i + 1)
819 * BITS_PER_WORD,
820 0)
821 : (int) i * BITS_PER_WORD);
822 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
823 unsigned HOST_WIDE_INT new_bitsize =
824 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
825
826 /* If the remaining chunk doesn't have full wordsize we have
827 to make sure that for big endian machines the higher order
828 bits are used. */
829 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
830 value_word = simplify_expand_binop (word_mode, lshr_optab,
831 value_word,
832 GEN_INT (BITS_PER_WORD
833 - new_bitsize),
834 NULL_RTX, true,
835 OPTAB_LIB_WIDEN);
836
837 if (!store_bit_field_1 (op0, new_bitsize,
838 bitnum + bit_offset,
839 bitregion_start, bitregion_end,
840 word_mode,
841 value_word, fallback_p))
842 {
843 delete_insns_since (last);
844 return false;
845 }
846 }
847 return true;
848 }
849
850 /* If VALUE has a floating-point or complex mode, access it as an
851 integer of the corresponding size. This can occur on a machine
852 with 64 bit registers that uses SFmode for float. It can also
853 occur for unaligned float or complex fields. */
854 orig_value = value;
855 if (GET_MODE (value) != VOIDmode
856 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
857 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
858 {
859 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
860 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
861 }
862
863 /* If OP0 is a multi-word register, narrow it to the affected word.
864 If the region spans two words, defer to store_split_bit_field. */
865 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
866 {
867 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
868 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
869 gcc_assert (op0);
870 bitnum %= BITS_PER_WORD;
871 if (bitnum + bitsize > BITS_PER_WORD)
872 {
873 if (!fallback_p)
874 return false;
875
876 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
877 bitregion_end, value);
878 return true;
879 }
880 }
881
882 /* From here on we can assume that the field to be stored in fits
883 within a word. If the destination is a register, it too fits
884 in a word. */
885
886 extraction_insn insv;
887 if (!MEM_P (op0)
888 && get_best_reg_extraction_insn (&insv, EP_insv,
889 GET_MODE_BITSIZE (GET_MODE (op0)),
890 fieldmode)
891 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
892 return true;
893
894 /* If OP0 is a memory, try copying it to a register and seeing if a
895 cheap register alternative is available. */
896 if (MEM_P (op0))
897 {
898 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
899 fieldmode)
900 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
901 return true;
902
903 rtx last = get_last_insn ();
904
905 /* Try loading part of OP0 into a register, inserting the bitfield
906 into that, and then copying the result back to OP0. */
907 unsigned HOST_WIDE_INT bitpos;
908 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
909 bitregion_start, bitregion_end,
910 fieldmode, &bitpos);
911 if (xop0)
912 {
913 rtx tempreg = copy_to_reg (xop0);
914 if (store_bit_field_1 (tempreg, bitsize, bitpos,
915 bitregion_start, bitregion_end,
916 fieldmode, orig_value, false))
917 {
918 emit_move_insn (xop0, tempreg);
919 return true;
920 }
921 delete_insns_since (last);
922 }
923 }
924
925 if (!fallback_p)
926 return false;
927
928 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
929 bitregion_end, value);
930 return true;
931 }
932
933 /* Generate code to store value from rtx VALUE
934 into a bit-field within structure STR_RTX
935 containing BITSIZE bits starting at bit BITNUM.
936
937 BITREGION_START is bitpos of the first bitfield in this region.
938 BITREGION_END is the bitpos of the ending bitfield in this region.
939 These two fields are 0, if the C++ memory model does not apply,
940 or we are not interested in keeping track of bitfield regions.
941
942 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
943
944 void
945 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
946 unsigned HOST_WIDE_INT bitnum,
947 unsigned HOST_WIDE_INT bitregion_start,
948 unsigned HOST_WIDE_INT bitregion_end,
949 enum machine_mode fieldmode,
950 rtx value)
951 {
952 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
953 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
954 bitregion_start, bitregion_end))
955 {
956 /* Storing any naturally aligned field can be done with a simple
957 store. For targets that support fast unaligned memory, any
958 naturally sized, unit aligned field can be done directly. */
959 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
960 {
961 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
962 bitnum / BITS_PER_UNIT);
963 emit_move_insn (str_rtx, value);
964 }
965 else
966 {
967 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
968 &bitnum);
969 /* Explicitly override the C/C++ memory model; ignore the
970 bit range so that we can do the access in the mode mandated
971 by -fstrict-volatile-bitfields instead. */
972 store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
973 }
974
975 return;
976 }
977
978 /* Under the C++0x memory model, we must not touch bits outside the
979 bit region. Adjust the address to start at the beginning of the
980 bit region. */
981 if (MEM_P (str_rtx) && bitregion_start > 0)
982 {
983 enum machine_mode bestmode;
984 HOST_WIDE_INT offset, size;
985
986 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
987
988 offset = bitregion_start / BITS_PER_UNIT;
989 bitnum -= bitregion_start;
990 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
991 bitregion_end -= bitregion_start;
992 bitregion_start = 0;
993 bestmode = get_best_mode (bitsize, bitnum,
994 bitregion_start, bitregion_end,
995 MEM_ALIGN (str_rtx), VOIDmode,
996 MEM_VOLATILE_P (str_rtx));
997 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
998 }
999
1000 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1001 bitregion_start, bitregion_end,
1002 fieldmode, value, true))
1003 gcc_unreachable ();
1004 }
1005 \f
1006 /* Use shifts and boolean operations to store VALUE into a bit field of
1007 width BITSIZE in OP0, starting at bit BITNUM. */
1008
1009 static void
1010 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1011 unsigned HOST_WIDE_INT bitnum,
1012 unsigned HOST_WIDE_INT bitregion_start,
1013 unsigned HOST_WIDE_INT bitregion_end,
1014 rtx value)
1015 {
1016 /* There is a case not handled here:
1017 a structure with a known alignment of just a halfword
1018 and a field split across two aligned halfwords within the structure.
1019 Or likewise a structure with a known alignment of just a byte
1020 and a field split across two bytes.
1021 Such cases are not supposed to be able to occur. */
1022
1023 if (MEM_P (op0))
1024 {
1025 enum machine_mode mode = GET_MODE (op0);
1026 if (GET_MODE_BITSIZE (mode) == 0
1027 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1028 mode = word_mode;
1029 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1030 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1031
1032 if (mode == VOIDmode)
1033 {
1034 /* The only way this should occur is if the field spans word
1035 boundaries. */
1036 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1037 bitregion_end, value);
1038 return;
1039 }
1040
1041 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1042 }
1043
1044 store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1045 }
1046
1047 /* Helper function for store_fixed_bit_field, stores
1048 the bit field always using the MODE of OP0. */
1049
1050 static void
1051 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1052 unsigned HOST_WIDE_INT bitnum,
1053 rtx value)
1054 {
1055 enum machine_mode mode;
1056 rtx temp;
1057 int all_zero = 0;
1058 int all_one = 0;
1059
1060 mode = GET_MODE (op0);
1061 gcc_assert (SCALAR_INT_MODE_P (mode));
1062
1063 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1064 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1065
1066 if (BYTES_BIG_ENDIAN)
1067 /* BITNUM is the distance between our msb
1068 and that of the containing datum.
1069 Convert it to the distance from the lsb. */
1070 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1071
1072 /* Now BITNUM is always the distance between our lsb
1073 and that of OP0. */
1074
1075 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1076 we must first convert its mode to MODE. */
1077
1078 if (CONST_INT_P (value))
1079 {
1080 HOST_WIDE_INT v = INTVAL (value);
1081
1082 if (bitsize < HOST_BITS_PER_WIDE_INT)
1083 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1084
1085 if (v == 0)
1086 all_zero = 1;
1087 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1088 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1089 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1090 all_one = 1;
1091
1092 value = lshift_value (mode, v, bitnum);
1093 }
1094 else
1095 {
1096 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1097 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1098
1099 if (GET_MODE (value) != mode)
1100 value = convert_to_mode (mode, value, 1);
1101
1102 if (must_and)
1103 value = expand_binop (mode, and_optab, value,
1104 mask_rtx (mode, 0, bitsize, 0),
1105 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1106 if (bitnum > 0)
1107 value = expand_shift (LSHIFT_EXPR, mode, value,
1108 bitnum, NULL_RTX, 1);
1109 }
1110
1111 /* Now clear the chosen bits in OP0,
1112 except that if VALUE is -1 we need not bother. */
1113 /* We keep the intermediates in registers to allow CSE to combine
1114 consecutive bitfield assignments. */
1115
1116 temp = force_reg (mode, op0);
1117
1118 if (! all_one)
1119 {
1120 temp = expand_binop (mode, and_optab, temp,
1121 mask_rtx (mode, bitnum, bitsize, 1),
1122 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1123 temp = force_reg (mode, temp);
1124 }
1125
1126 /* Now logical-or VALUE into OP0, unless it is zero. */
1127
1128 if (! all_zero)
1129 {
1130 temp = expand_binop (mode, ior_optab, temp, value,
1131 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1132 temp = force_reg (mode, temp);
1133 }
1134
1135 if (op0 != temp)
1136 {
1137 op0 = copy_rtx (op0);
1138 emit_move_insn (op0, temp);
1139 }
1140 }
1141 \f
1142 /* Store a bit field that is split across multiple accessible memory objects.
1143
1144 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1145 BITSIZE is the field width; BITPOS the position of its first bit
1146 (within the word).
1147 VALUE is the value to store.
1148
1149 This does not yet handle fields wider than BITS_PER_WORD. */
1150
1151 static void
1152 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1153 unsigned HOST_WIDE_INT bitpos,
1154 unsigned HOST_WIDE_INT bitregion_start,
1155 unsigned HOST_WIDE_INT bitregion_end,
1156 rtx value)
1157 {
1158 unsigned int unit;
1159 unsigned int bitsdone = 0;
1160
1161 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1162 much at a time. */
1163 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1164 unit = BITS_PER_WORD;
1165 else
1166 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1167
1168 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1169 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1170 again, and we will mutually recurse forever. */
1171 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1172 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1173
1174 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1175 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1176 that VALUE might be a floating-point constant. */
1177 if (CONSTANT_P (value) && !CONST_INT_P (value))
1178 {
1179 rtx word = gen_lowpart_common (word_mode, value);
1180
1181 if (word && (value != word))
1182 value = word;
1183 else
1184 value = gen_lowpart_common (word_mode,
1185 force_reg (GET_MODE (value) != VOIDmode
1186 ? GET_MODE (value)
1187 : word_mode, value));
1188 }
1189
1190 while (bitsdone < bitsize)
1191 {
1192 unsigned HOST_WIDE_INT thissize;
1193 rtx part, word;
1194 unsigned HOST_WIDE_INT thispos;
1195 unsigned HOST_WIDE_INT offset;
1196
1197 offset = (bitpos + bitsdone) / unit;
1198 thispos = (bitpos + bitsdone) % unit;
1199
1200 /* When region of bytes we can touch is restricted, decrease
1201 UNIT close to the end of the region as needed. If op0 is a REG
1202 or SUBREG of REG, don't do this, as there can't be data races
1203 on a register and we can expand shorter code in some cases. */
1204 if (bitregion_end
1205 && unit > BITS_PER_UNIT
1206 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1207 && !REG_P (op0)
1208 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1209 {
1210 unit = unit / 2;
1211 continue;
1212 }
1213
1214 /* THISSIZE must not overrun a word boundary. Otherwise,
1215 store_fixed_bit_field will call us again, and we will mutually
1216 recurse forever. */
1217 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1218 thissize = MIN (thissize, unit - thispos);
1219
1220 if (BYTES_BIG_ENDIAN)
1221 {
1222 /* Fetch successively less significant portions. */
1223 if (CONST_INT_P (value))
1224 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1225 >> (bitsize - bitsdone - thissize))
1226 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1227 else
1228 {
1229 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1230 /* The args are chosen so that the last part includes the
1231 lsb. Give extract_bit_field the value it needs (with
1232 endianness compensation) to fetch the piece we want. */
1233 part = extract_fixed_bit_field (word_mode, value, thissize,
1234 total_bits - bitsize + bitsdone,
1235 NULL_RTX, 1);
1236 }
1237 }
1238 else
1239 {
1240 /* Fetch successively more significant portions. */
1241 if (CONST_INT_P (value))
1242 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1243 >> bitsdone)
1244 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1245 else
1246 part = extract_fixed_bit_field (word_mode, value, thissize,
1247 bitsdone, NULL_RTX, 1);
1248 }
1249
1250 /* If OP0 is a register, then handle OFFSET here.
1251
1252 When handling multiword bitfields, extract_bit_field may pass
1253 down a word_mode SUBREG of a larger REG for a bitfield that actually
1254 crosses a word boundary. Thus, for a SUBREG, we must find
1255 the current word starting from the base register. */
1256 if (GET_CODE (op0) == SUBREG)
1257 {
1258 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1259 + (offset * unit / BITS_PER_WORD);
1260 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1261 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1262 word = word_offset ? const0_rtx : op0;
1263 else
1264 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1265 GET_MODE (SUBREG_REG (op0)));
1266 offset &= BITS_PER_WORD / unit - 1;
1267 }
1268 else if (REG_P (op0))
1269 {
1270 enum machine_mode op0_mode = GET_MODE (op0);
1271 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1272 word = offset ? const0_rtx : op0;
1273 else
1274 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1275 GET_MODE (op0));
1276 offset &= BITS_PER_WORD / unit - 1;
1277 }
1278 else
1279 word = op0;
1280
1281 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1282 it is just an out-of-bounds access. Ignore it. */
1283 if (word != const0_rtx)
1284 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1285 bitregion_start, bitregion_end, part);
1286 bitsdone += thissize;
1287 }
1288 }
1289 \f
1290 /* A subroutine of extract_bit_field_1 that converts return value X
1291 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1292 to extract_bit_field. */
1293
1294 static rtx
1295 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1296 enum machine_mode tmode, bool unsignedp)
1297 {
1298 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1299 return x;
1300
1301 /* If the x mode is not a scalar integral, first convert to the
1302 integer mode of that size and then access it as a floating-point
1303 value via a SUBREG. */
1304 if (!SCALAR_INT_MODE_P (tmode))
1305 {
1306 enum machine_mode smode;
1307
1308 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1309 x = convert_to_mode (smode, x, unsignedp);
1310 x = force_reg (smode, x);
1311 return gen_lowpart (tmode, x);
1312 }
1313
1314 return convert_to_mode (tmode, x, unsignedp);
1315 }
1316
1317 /* Try to use an ext(z)v pattern to extract a field from OP0.
1318 Return the extracted value on success, otherwise return null.
1319 EXT_MODE is the mode of the extraction and the other arguments
1320 are as for extract_bit_field. */
1321
1322 static rtx
1323 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1324 unsigned HOST_WIDE_INT bitsize,
1325 unsigned HOST_WIDE_INT bitnum,
1326 int unsignedp, rtx target,
1327 enum machine_mode mode, enum machine_mode tmode)
1328 {
1329 struct expand_operand ops[4];
1330 rtx spec_target = target;
1331 rtx spec_target_subreg = 0;
1332 enum machine_mode ext_mode = extv->field_mode;
1333 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1334
1335 if (bitsize == 0 || unit < bitsize)
1336 return NULL_RTX;
1337
1338 if (MEM_P (op0))
1339 /* Get a reference to the first byte of the field. */
1340 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1341 &bitnum);
1342 else
1343 {
1344 /* Convert from counting within OP0 to counting in EXT_MODE. */
1345 if (BYTES_BIG_ENDIAN)
1346 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1347
1348 /* If op0 is a register, we need it in EXT_MODE to make it
1349 acceptable to the format of ext(z)v. */
1350 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1351 return NULL_RTX;
1352 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1353 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1354 }
1355
1356 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1357 "backwards" from the size of the unit we are extracting from.
1358 Otherwise, we count bits from the most significant on a
1359 BYTES/BITS_BIG_ENDIAN machine. */
1360
1361 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1362 bitnum = unit - bitsize - bitnum;
1363
1364 if (target == 0)
1365 target = spec_target = gen_reg_rtx (tmode);
1366
1367 if (GET_MODE (target) != ext_mode)
1368 {
1369 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1370 between the mode of the extraction (word_mode) and the target
1371 mode. Instead, create a temporary and use convert_move to set
1372 the target. */
1373 if (REG_P (target)
1374 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1375 {
1376 target = gen_lowpart (ext_mode, target);
1377 if (GET_MODE_PRECISION (ext_mode)
1378 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1379 spec_target_subreg = target;
1380 }
1381 else
1382 target = gen_reg_rtx (ext_mode);
1383 }
1384
1385 create_output_operand (&ops[0], target, ext_mode);
1386 create_fixed_operand (&ops[1], op0);
1387 create_integer_operand (&ops[2], bitsize);
1388 create_integer_operand (&ops[3], bitnum);
1389 if (maybe_expand_insn (extv->icode, 4, ops))
1390 {
1391 target = ops[0].value;
1392 if (target == spec_target)
1393 return target;
1394 if (target == spec_target_subreg)
1395 return spec_target;
1396 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1397 }
1398 return NULL_RTX;
1399 }
1400
1401 /* A subroutine of extract_bit_field, with the same arguments.
1402 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1403 if we can find no other means of implementing the operation.
1404 if FALLBACK_P is false, return NULL instead. */
1405
1406 static rtx
1407 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1408 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1409 enum machine_mode mode, enum machine_mode tmode,
1410 bool fallback_p)
1411 {
1412 rtx op0 = str_rtx;
1413 enum machine_mode int_mode;
1414 enum machine_mode mode1;
1415
1416 if (tmode == VOIDmode)
1417 tmode = mode;
1418
1419 while (GET_CODE (op0) == SUBREG)
1420 {
1421 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1422 op0 = SUBREG_REG (op0);
1423 }
1424
1425 /* If we have an out-of-bounds access to a register, just return an
1426 uninitialized register of the required mode. This can occur if the
1427 source code contains an out-of-bounds access to a small array. */
1428 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1429 return gen_reg_rtx (tmode);
1430
1431 if (REG_P (op0)
1432 && mode == GET_MODE (op0)
1433 && bitnum == 0
1434 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1435 {
1436 /* We're trying to extract a full register from itself. */
1437 return op0;
1438 }
1439
1440 /* See if we can get a better vector mode before extracting. */
1441 if (VECTOR_MODE_P (GET_MODE (op0))
1442 && !MEM_P (op0)
1443 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1444 {
1445 enum machine_mode new_mode;
1446
1447 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1448 new_mode = MIN_MODE_VECTOR_FLOAT;
1449 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1450 new_mode = MIN_MODE_VECTOR_FRACT;
1451 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1452 new_mode = MIN_MODE_VECTOR_UFRACT;
1453 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1454 new_mode = MIN_MODE_VECTOR_ACCUM;
1455 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1456 new_mode = MIN_MODE_VECTOR_UACCUM;
1457 else
1458 new_mode = MIN_MODE_VECTOR_INT;
1459
1460 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1461 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1462 && targetm.vector_mode_supported_p (new_mode))
1463 break;
1464 if (new_mode != VOIDmode)
1465 op0 = gen_lowpart (new_mode, op0);
1466 }
1467
1468 /* Use vec_extract patterns for extracting parts of vectors whenever
1469 available. */
1470 if (VECTOR_MODE_P (GET_MODE (op0))
1471 && !MEM_P (op0)
1472 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1473 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1474 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1475 {
1476 struct expand_operand ops[3];
1477 enum machine_mode outermode = GET_MODE (op0);
1478 enum machine_mode innermode = GET_MODE_INNER (outermode);
1479 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1480 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1481
1482 create_output_operand (&ops[0], target, innermode);
1483 create_input_operand (&ops[1], op0, outermode);
1484 create_integer_operand (&ops[2], pos);
1485 if (maybe_expand_insn (icode, 3, ops))
1486 {
1487 target = ops[0].value;
1488 if (GET_MODE (target) != mode)
1489 return gen_lowpart (tmode, target);
1490 return target;
1491 }
1492 }
1493
1494 /* Make sure we are playing with integral modes. Pun with subregs
1495 if we aren't. */
1496 {
1497 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1498 if (imode != GET_MODE (op0))
1499 {
1500 if (MEM_P (op0))
1501 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1502 else if (imode != BLKmode)
1503 {
1504 op0 = gen_lowpart (imode, op0);
1505
1506 /* If we got a SUBREG, force it into a register since we
1507 aren't going to be able to do another SUBREG on it. */
1508 if (GET_CODE (op0) == SUBREG)
1509 op0 = force_reg (imode, op0);
1510 }
1511 else if (REG_P (op0))
1512 {
1513 rtx reg, subreg;
1514 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1515 MODE_INT);
1516 reg = gen_reg_rtx (imode);
1517 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1518 emit_move_insn (subreg, op0);
1519 op0 = reg;
1520 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1521 }
1522 else
1523 {
1524 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1525 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1526 emit_move_insn (mem, op0);
1527 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1528 }
1529 }
1530 }
1531
1532 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1533 If that's wrong, the solution is to test for it and set TARGET to 0
1534 if needed. */
1535
1536 /* Get the mode of the field to use for atomic access or subreg
1537 conversion. */
1538 mode1 = mode;
1539 if (SCALAR_INT_MODE_P (tmode))
1540 {
1541 enum machine_mode try_mode = mode_for_size (bitsize,
1542 GET_MODE_CLASS (tmode), 0);
1543 if (try_mode != BLKmode)
1544 mode1 = try_mode;
1545 }
1546 gcc_assert (mode1 != BLKmode);
1547
1548 /* Extraction of a full MODE1 value can be done with a subreg as long
1549 as the least significant bit of the value is the least significant
1550 bit of either OP0 or a word of OP0. */
1551 if (!MEM_P (op0)
1552 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1553 && bitsize == GET_MODE_BITSIZE (mode1)
1554 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1555 {
1556 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1557 bitnum / BITS_PER_UNIT);
1558 if (sub)
1559 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1560 }
1561
1562 /* Extraction of a full MODE1 value can be done with a load as long as
1563 the field is on a byte boundary and is sufficiently aligned. */
1564 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1565 {
1566 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1567 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1568 }
1569
1570 /* Handle fields bigger than a word. */
1571
1572 if (bitsize > BITS_PER_WORD)
1573 {
1574 /* Here we transfer the words of the field
1575 in the order least significant first.
1576 This is because the most significant word is the one which may
1577 be less than full. */
1578
1579 unsigned int backwards = WORDS_BIG_ENDIAN;
1580 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1581 unsigned int i;
1582 rtx last;
1583
1584 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1585 target = gen_reg_rtx (mode);
1586
1587 /* Indicate for flow that the entire target reg is being set. */
1588 emit_clobber (target);
1589
1590 last = get_last_insn ();
1591 for (i = 0; i < nwords; i++)
1592 {
1593 /* If I is 0, use the low-order word in both field and target;
1594 if I is 1, use the next to lowest word; and so on. */
1595 /* Word number in TARGET to use. */
1596 unsigned int wordnum
1597 = (backwards
1598 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1599 : i);
1600 /* Offset from start of field in OP0. */
1601 unsigned int bit_offset = (backwards
1602 ? MAX ((int) bitsize - ((int) i + 1)
1603 * BITS_PER_WORD,
1604 0)
1605 : (int) i * BITS_PER_WORD);
1606 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1607 rtx result_part
1608 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1609 bitsize - i * BITS_PER_WORD),
1610 bitnum + bit_offset, 1, target_part,
1611 mode, word_mode, fallback_p);
1612
1613 gcc_assert (target_part);
1614 if (!result_part)
1615 {
1616 delete_insns_since (last);
1617 return NULL;
1618 }
1619
1620 if (result_part != target_part)
1621 emit_move_insn (target_part, result_part);
1622 }
1623
1624 if (unsignedp)
1625 {
1626 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1627 need to be zero'd out. */
1628 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1629 {
1630 unsigned int i, total_words;
1631
1632 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1633 for (i = nwords; i < total_words; i++)
1634 emit_move_insn
1635 (operand_subword (target,
1636 backwards ? total_words - i - 1 : i,
1637 1, VOIDmode),
1638 const0_rtx);
1639 }
1640 return target;
1641 }
1642
1643 /* Signed bit field: sign-extend with two arithmetic shifts. */
1644 target = expand_shift (LSHIFT_EXPR, mode, target,
1645 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1646 return expand_shift (RSHIFT_EXPR, mode, target,
1647 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1648 }
1649
1650 /* If OP0 is a multi-word register, narrow it to the affected word.
1651 If the region spans two words, defer to extract_split_bit_field. */
1652 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1653 {
1654 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1655 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1656 bitnum %= BITS_PER_WORD;
1657 if (bitnum + bitsize > BITS_PER_WORD)
1658 {
1659 if (!fallback_p)
1660 return NULL_RTX;
1661 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1662 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1663 }
1664 }
1665
1666 /* From here on we know the desired field is smaller than a word.
1667 If OP0 is a register, it too fits within a word. */
1668 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1669 extraction_insn extv;
1670 if (!MEM_P (op0)
1671 /* ??? We could limit the structure size to the part of OP0 that
1672 contains the field, with appropriate checks for endianness
1673 and TRULY_NOOP_TRUNCATION. */
1674 && get_best_reg_extraction_insn (&extv, pattern,
1675 GET_MODE_BITSIZE (GET_MODE (op0)),
1676 tmode))
1677 {
1678 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1679 unsignedp, target, mode,
1680 tmode);
1681 if (result)
1682 return result;
1683 }
1684
1685 /* If OP0 is a memory, try copying it to a register and seeing if a
1686 cheap register alternative is available. */
1687 if (MEM_P (op0))
1688 {
1689 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1690 tmode))
1691 {
1692 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1693 bitnum, unsignedp,
1694 target, mode,
1695 tmode);
1696 if (result)
1697 return result;
1698 }
1699
1700 rtx last = get_last_insn ();
1701
1702 /* Try loading part of OP0 into a register and extracting the
1703 bitfield from that. */
1704 unsigned HOST_WIDE_INT bitpos;
1705 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1706 0, 0, tmode, &bitpos);
1707 if (xop0)
1708 {
1709 xop0 = copy_to_reg (xop0);
1710 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1711 unsignedp, target,
1712 mode, tmode, false);
1713 if (result)
1714 return result;
1715 delete_insns_since (last);
1716 }
1717 }
1718
1719 if (!fallback_p)
1720 return NULL;
1721
1722 /* Find a correspondingly-sized integer field, so we can apply
1723 shifts and masks to it. */
1724 int_mode = int_mode_for_mode (tmode);
1725 if (int_mode == BLKmode)
1726 int_mode = int_mode_for_mode (mode);
1727 /* Should probably push op0 out to memory and then do a load. */
1728 gcc_assert (int_mode != BLKmode);
1729
1730 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1731 target, unsignedp);
1732 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1733 }
1734
1735 /* Generate code to extract a byte-field from STR_RTX
1736 containing BITSIZE bits, starting at BITNUM,
1737 and put it in TARGET if possible (if TARGET is nonzero).
1738 Regardless of TARGET, we return the rtx for where the value is placed.
1739
1740 STR_RTX is the structure containing the byte (a REG or MEM).
1741 UNSIGNEDP is nonzero if this is an unsigned bit field.
1742 MODE is the natural mode of the field value once extracted.
1743 TMODE is the mode the caller would like the value to have;
1744 but the value may be returned with type MODE instead.
1745
1746 If a TARGET is specified and we can store in it at no extra cost,
1747 we do so, and return TARGET.
1748 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1749 if they are equally easy. */
1750
1751 rtx
1752 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1753 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1754 enum machine_mode mode, enum machine_mode tmode)
1755 {
1756 enum machine_mode mode1;
1757
1758 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1759 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1760 mode1 = GET_MODE (str_rtx);
1761 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1762 mode1 = GET_MODE (target);
1763 else
1764 mode1 = tmode;
1765
1766 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1767 {
1768 rtx result;
1769
1770 /* Extraction of a full MODE1 value can be done with a load as long as
1771 the field is on a byte boundary and is sufficiently aligned. */
1772 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1773 result = adjust_bitfield_address (str_rtx, mode1,
1774 bitnum / BITS_PER_UNIT);
1775 else
1776 {
1777 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1778 &bitnum);
1779 result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1780 target, unsignedp);
1781 }
1782
1783 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1784 }
1785
1786 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1787 target, mode, tmode, true);
1788 }
1789 \f
1790 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1791 from bit BITNUM of OP0.
1792
1793 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1794 If TARGET is nonzero, attempts to store the value there
1795 and return TARGET, but this is not guaranteed.
1796 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1797
1798 static rtx
1799 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1800 unsigned HOST_WIDE_INT bitsize,
1801 unsigned HOST_WIDE_INT bitnum, rtx target,
1802 int unsignedp)
1803 {
1804 if (MEM_P (op0))
1805 {
1806 enum machine_mode mode
1807 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1808 MEM_VOLATILE_P (op0));
1809
1810 if (mode == VOIDmode)
1811 /* The only way this should occur is if the field spans word
1812 boundaries. */
1813 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1814
1815 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1816 }
1817
1818 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1819 target, unsignedp);
1820 }
1821
1822 /* Helper function for extract_fixed_bit_field, extracts
1823 the bit field always using the MODE of OP0. */
1824
1825 static rtx
1826 extract_fixed_bit_field_1 (enum machine_mode tmode, rtx op0,
1827 unsigned HOST_WIDE_INT bitsize,
1828 unsigned HOST_WIDE_INT bitnum, rtx target,
1829 int unsignedp)
1830 {
1831 enum machine_mode mode = GET_MODE (op0);
1832 gcc_assert (SCALAR_INT_MODE_P (mode));
1833
1834 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1835 for invalid input, such as extract equivalent of f5 from
1836 gcc.dg/pr48335-2.c. */
1837
1838 if (BYTES_BIG_ENDIAN)
1839 /* BITNUM is the distance between our msb and that of OP0.
1840 Convert it to the distance from the lsb. */
1841 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1842
1843 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1844 We have reduced the big-endian case to the little-endian case. */
1845
1846 if (unsignedp)
1847 {
1848 if (bitnum)
1849 {
1850 /* If the field does not already start at the lsb,
1851 shift it so it does. */
1852 /* Maybe propagate the target for the shift. */
1853 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1854 if (tmode != mode)
1855 subtarget = 0;
1856 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1857 }
1858 /* Convert the value to the desired mode. */
1859 if (mode != tmode)
1860 op0 = convert_to_mode (tmode, op0, 1);
1861
1862 /* Unless the msb of the field used to be the msb when we shifted,
1863 mask out the upper bits. */
1864
1865 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1866 return expand_binop (GET_MODE (op0), and_optab, op0,
1867 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1868 target, 1, OPTAB_LIB_WIDEN);
1869 return op0;
1870 }
1871
1872 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1873 then arithmetic-shift its lsb to the lsb of the word. */
1874 op0 = force_reg (mode, op0);
1875
1876 /* Find the narrowest integer mode that contains the field. */
1877
1878 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1879 mode = GET_MODE_WIDER_MODE (mode))
1880 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1881 {
1882 op0 = convert_to_mode (mode, op0, 0);
1883 break;
1884 }
1885
1886 if (mode != tmode)
1887 target = 0;
1888
1889 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1890 {
1891 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1892 /* Maybe propagate the target for the shift. */
1893 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1894 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1895 }
1896
1897 return expand_shift (RSHIFT_EXPR, mode, op0,
1898 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1899 }
1900
1901 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1902 VALUE << BITPOS. */
1903
1904 static rtx
1905 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1906 int bitpos)
1907 {
1908 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1909 }
1910 \f
1911 /* Extract a bit field that is split across two words
1912 and return an RTX for the result.
1913
1914 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1915 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1916 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1917
1918 static rtx
1919 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1920 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1921 {
1922 unsigned int unit;
1923 unsigned int bitsdone = 0;
1924 rtx result = NULL_RTX;
1925 int first = 1;
1926
1927 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1928 much at a time. */
1929 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1930 unit = BITS_PER_WORD;
1931 else
1932 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1933
1934 while (bitsdone < bitsize)
1935 {
1936 unsigned HOST_WIDE_INT thissize;
1937 rtx part, word;
1938 unsigned HOST_WIDE_INT thispos;
1939 unsigned HOST_WIDE_INT offset;
1940
1941 offset = (bitpos + bitsdone) / unit;
1942 thispos = (bitpos + bitsdone) % unit;
1943
1944 /* THISSIZE must not overrun a word boundary. Otherwise,
1945 extract_fixed_bit_field will call us again, and we will mutually
1946 recurse forever. */
1947 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1948 thissize = MIN (thissize, unit - thispos);
1949
1950 /* If OP0 is a register, then handle OFFSET here.
1951
1952 When handling multiword bitfields, extract_bit_field may pass
1953 down a word_mode SUBREG of a larger REG for a bitfield that actually
1954 crosses a word boundary. Thus, for a SUBREG, we must find
1955 the current word starting from the base register. */
1956 if (GET_CODE (op0) == SUBREG)
1957 {
1958 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1959 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1960 GET_MODE (SUBREG_REG (op0)));
1961 offset = 0;
1962 }
1963 else if (REG_P (op0))
1964 {
1965 word = operand_subword_force (op0, offset, GET_MODE (op0));
1966 offset = 0;
1967 }
1968 else
1969 word = op0;
1970
1971 /* Extract the parts in bit-counting order,
1972 whose meaning is determined by BYTES_PER_UNIT.
1973 OFFSET is in UNITs, and UNIT is in bits. */
1974 part = extract_fixed_bit_field (word_mode, word, thissize,
1975 offset * unit + thispos, 0, 1);
1976 bitsdone += thissize;
1977
1978 /* Shift this part into place for the result. */
1979 if (BYTES_BIG_ENDIAN)
1980 {
1981 if (bitsize != bitsdone)
1982 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1983 bitsize - bitsdone, 0, 1);
1984 }
1985 else
1986 {
1987 if (bitsdone != thissize)
1988 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1989 bitsdone - thissize, 0, 1);
1990 }
1991
1992 if (first)
1993 result = part;
1994 else
1995 /* Combine the parts with bitwise or. This works
1996 because we extracted each part as an unsigned bit field. */
1997 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1998 OPTAB_LIB_WIDEN);
1999
2000 first = 0;
2001 }
2002
2003 /* Unsigned bit field: we are done. */
2004 if (unsignedp)
2005 return result;
2006 /* Signed bit field: sign-extend with two arithmetic shifts. */
2007 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2008 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2009 return expand_shift (RSHIFT_EXPR, word_mode, result,
2010 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2011 }
2012 \f
2013 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2014 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2015 MODE, fill the upper bits with zeros. Fail if the layout of either
2016 mode is unknown (as for CC modes) or if the extraction would involve
2017 unprofitable mode punning. Return the value on success, otherwise
2018 return null.
2019
2020 This is different from gen_lowpart* in these respects:
2021
2022 - the returned value must always be considered an rvalue
2023
2024 - when MODE is wider than SRC_MODE, the extraction involves
2025 a zero extension
2026
2027 - when MODE is smaller than SRC_MODE, the extraction involves
2028 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2029
2030 In other words, this routine performs a computation, whereas the
2031 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2032 operations. */
2033
2034 rtx
2035 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2036 {
2037 enum machine_mode int_mode, src_int_mode;
2038
2039 if (mode == src_mode)
2040 return src;
2041
2042 if (CONSTANT_P (src))
2043 {
2044 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2045 fails, it will happily create (subreg (symbol_ref)) or similar
2046 invalid SUBREGs. */
2047 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2048 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2049 if (ret)
2050 return ret;
2051
2052 if (GET_MODE (src) == VOIDmode
2053 || !validate_subreg (mode, src_mode, src, byte))
2054 return NULL_RTX;
2055
2056 src = force_reg (GET_MODE (src), src);
2057 return gen_rtx_SUBREG (mode, src, byte);
2058 }
2059
2060 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2061 return NULL_RTX;
2062
2063 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2064 && MODES_TIEABLE_P (mode, src_mode))
2065 {
2066 rtx x = gen_lowpart_common (mode, src);
2067 if (x)
2068 return x;
2069 }
2070
2071 src_int_mode = int_mode_for_mode (src_mode);
2072 int_mode = int_mode_for_mode (mode);
2073 if (src_int_mode == BLKmode || int_mode == BLKmode)
2074 return NULL_RTX;
2075
2076 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2077 return NULL_RTX;
2078 if (!MODES_TIEABLE_P (int_mode, mode))
2079 return NULL_RTX;
2080
2081 src = gen_lowpart (src_int_mode, src);
2082 src = convert_modes (int_mode, src_int_mode, src, true);
2083 src = gen_lowpart (mode, src);
2084 return src;
2085 }
2086 \f
2087 /* Add INC into TARGET. */
2088
2089 void
2090 expand_inc (rtx target, rtx inc)
2091 {
2092 rtx value = expand_binop (GET_MODE (target), add_optab,
2093 target, inc,
2094 target, 0, OPTAB_LIB_WIDEN);
2095 if (value != target)
2096 emit_move_insn (target, value);
2097 }
2098
2099 /* Subtract DEC from TARGET. */
2100
2101 void
2102 expand_dec (rtx target, rtx dec)
2103 {
2104 rtx value = expand_binop (GET_MODE (target), sub_optab,
2105 target, dec,
2106 target, 0, OPTAB_LIB_WIDEN);
2107 if (value != target)
2108 emit_move_insn (target, value);
2109 }
2110 \f
2111 /* Output a shift instruction for expression code CODE,
2112 with SHIFTED being the rtx for the value to shift,
2113 and AMOUNT the rtx for the amount to shift by.
2114 Store the result in the rtx TARGET, if that is convenient.
2115 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2116 Return the rtx for where the value is. */
2117
2118 static rtx
2119 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2120 rtx amount, rtx target, int unsignedp)
2121 {
2122 rtx op1, temp = 0;
2123 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2124 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2125 optab lshift_optab = ashl_optab;
2126 optab rshift_arith_optab = ashr_optab;
2127 optab rshift_uns_optab = lshr_optab;
2128 optab lrotate_optab = rotl_optab;
2129 optab rrotate_optab = rotr_optab;
2130 enum machine_mode op1_mode;
2131 enum machine_mode scalar_mode = mode;
2132 int attempt;
2133 bool speed = optimize_insn_for_speed_p ();
2134
2135 if (VECTOR_MODE_P (mode))
2136 scalar_mode = GET_MODE_INNER (mode);
2137 op1 = amount;
2138 op1_mode = GET_MODE (op1);
2139
2140 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2141 shift amount is a vector, use the vector/vector shift patterns. */
2142 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2143 {
2144 lshift_optab = vashl_optab;
2145 rshift_arith_optab = vashr_optab;
2146 rshift_uns_optab = vlshr_optab;
2147 lrotate_optab = vrotl_optab;
2148 rrotate_optab = vrotr_optab;
2149 }
2150
2151 /* Previously detected shift-counts computed by NEGATE_EXPR
2152 and shifted in the other direction; but that does not work
2153 on all machines. */
2154
2155 if (SHIFT_COUNT_TRUNCATED)
2156 {
2157 if (CONST_INT_P (op1)
2158 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2159 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2160 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2161 % GET_MODE_BITSIZE (scalar_mode));
2162 else if (GET_CODE (op1) == SUBREG
2163 && subreg_lowpart_p (op1)
2164 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2165 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2166 op1 = SUBREG_REG (op1);
2167 }
2168
2169 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2170 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2171 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2172 amount instead. */
2173 if (rotate
2174 && CONST_INT_P (op1)
2175 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2176 GET_MODE_BITSIZE (scalar_mode) - 1))
2177 {
2178 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2179 left = !left;
2180 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2181 }
2182
2183 if (op1 == const0_rtx)
2184 return shifted;
2185
2186 /* Check whether its cheaper to implement a left shift by a constant
2187 bit count by a sequence of additions. */
2188 if (code == LSHIFT_EXPR
2189 && CONST_INT_P (op1)
2190 && INTVAL (op1) > 0
2191 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2192 && INTVAL (op1) < MAX_BITS_PER_WORD
2193 && (shift_cost (speed, mode, INTVAL (op1))
2194 > INTVAL (op1) * add_cost (speed, mode))
2195 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2196 {
2197 int i;
2198 for (i = 0; i < INTVAL (op1); i++)
2199 {
2200 temp = force_reg (mode, shifted);
2201 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2202 unsignedp, OPTAB_LIB_WIDEN);
2203 }
2204 return shifted;
2205 }
2206
2207 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2208 {
2209 enum optab_methods methods;
2210
2211 if (attempt == 0)
2212 methods = OPTAB_DIRECT;
2213 else if (attempt == 1)
2214 methods = OPTAB_WIDEN;
2215 else
2216 methods = OPTAB_LIB_WIDEN;
2217
2218 if (rotate)
2219 {
2220 /* Widening does not work for rotation. */
2221 if (methods == OPTAB_WIDEN)
2222 continue;
2223 else if (methods == OPTAB_LIB_WIDEN)
2224 {
2225 /* If we have been unable to open-code this by a rotation,
2226 do it as the IOR of two shifts. I.e., to rotate A
2227 by N bits, compute
2228 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2229 where C is the bitsize of A.
2230
2231 It is theoretically possible that the target machine might
2232 not be able to perform either shift and hence we would
2233 be making two libcalls rather than just the one for the
2234 shift (similarly if IOR could not be done). We will allow
2235 this extremely unlikely lossage to avoid complicating the
2236 code below. */
2237
2238 rtx subtarget = target == shifted ? 0 : target;
2239 rtx new_amount, other_amount;
2240 rtx temp1;
2241
2242 new_amount = op1;
2243 if (op1 == const0_rtx)
2244 return shifted;
2245 else if (CONST_INT_P (op1))
2246 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2247 - INTVAL (op1));
2248 else
2249 {
2250 other_amount
2251 = simplify_gen_unary (NEG, GET_MODE (op1),
2252 op1, GET_MODE (op1));
2253 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2254 other_amount
2255 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2256 gen_int_mode (mask, GET_MODE (op1)));
2257 }
2258
2259 shifted = force_reg (mode, shifted);
2260
2261 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2262 mode, shifted, new_amount, 0, 1);
2263 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2264 mode, shifted, other_amount,
2265 subtarget, 1);
2266 return expand_binop (mode, ior_optab, temp, temp1, target,
2267 unsignedp, methods);
2268 }
2269
2270 temp = expand_binop (mode,
2271 left ? lrotate_optab : rrotate_optab,
2272 shifted, op1, target, unsignedp, methods);
2273 }
2274 else if (unsignedp)
2275 temp = expand_binop (mode,
2276 left ? lshift_optab : rshift_uns_optab,
2277 shifted, op1, target, unsignedp, methods);
2278
2279 /* Do arithmetic shifts.
2280 Also, if we are going to widen the operand, we can just as well
2281 use an arithmetic right-shift instead of a logical one. */
2282 if (temp == 0 && ! rotate
2283 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2284 {
2285 enum optab_methods methods1 = methods;
2286
2287 /* If trying to widen a log shift to an arithmetic shift,
2288 don't accept an arithmetic shift of the same size. */
2289 if (unsignedp)
2290 methods1 = OPTAB_MUST_WIDEN;
2291
2292 /* Arithmetic shift */
2293
2294 temp = expand_binop (mode,
2295 left ? lshift_optab : rshift_arith_optab,
2296 shifted, op1, target, unsignedp, methods1);
2297 }
2298
2299 /* We used to try extzv here for logical right shifts, but that was
2300 only useful for one machine, the VAX, and caused poor code
2301 generation there for lshrdi3, so the code was deleted and a
2302 define_expand for lshrsi3 was added to vax.md. */
2303 }
2304
2305 gcc_assert (temp);
2306 return temp;
2307 }
2308
2309 /* Output a shift instruction for expression code CODE,
2310 with SHIFTED being the rtx for the value to shift,
2311 and AMOUNT the amount to shift by.
2312 Store the result in the rtx TARGET, if that is convenient.
2313 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2314 Return the rtx for where the value is. */
2315
2316 rtx
2317 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2318 int amount, rtx target, int unsignedp)
2319 {
2320 return expand_shift_1 (code, mode,
2321 shifted, GEN_INT (amount), target, unsignedp);
2322 }
2323
2324 /* Output a shift instruction for expression code CODE,
2325 with SHIFTED being the rtx for the value to shift,
2326 and AMOUNT the tree for the amount to shift by.
2327 Store the result in the rtx TARGET, if that is convenient.
2328 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2329 Return the rtx for where the value is. */
2330
2331 rtx
2332 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2333 tree amount, rtx target, int unsignedp)
2334 {
2335 return expand_shift_1 (code, mode,
2336 shifted, expand_normal (amount), target, unsignedp);
2337 }
2338
2339 \f
2340 /* Indicates the type of fixup needed after a constant multiplication.
2341 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2342 the result should be negated, and ADD_VARIANT means that the
2343 multiplicand should be added to the result. */
2344 enum mult_variant {basic_variant, negate_variant, add_variant};
2345
2346 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2347 const struct mult_cost *, enum machine_mode mode);
2348 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2349 struct algorithm *, enum mult_variant *, int);
2350 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2351 const struct algorithm *, enum mult_variant);
2352 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2353 static rtx extract_high_half (enum machine_mode, rtx);
2354 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2355 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2356 int, int);
2357 /* Compute and return the best algorithm for multiplying by T.
2358 The algorithm must cost less than cost_limit
2359 If retval.cost >= COST_LIMIT, no algorithm was found and all
2360 other field of the returned struct are undefined.
2361 MODE is the machine mode of the multiplication. */
2362
2363 static void
2364 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2365 const struct mult_cost *cost_limit, enum machine_mode mode)
2366 {
2367 int m;
2368 struct algorithm *alg_in, *best_alg;
2369 struct mult_cost best_cost;
2370 struct mult_cost new_limit;
2371 int op_cost, op_latency;
2372 unsigned HOST_WIDE_INT orig_t = t;
2373 unsigned HOST_WIDE_INT q;
2374 int maxm, hash_index;
2375 bool cache_hit = false;
2376 enum alg_code cache_alg = alg_zero;
2377 bool speed = optimize_insn_for_speed_p ();
2378 enum machine_mode imode;
2379 struct alg_hash_entry *entry_ptr;
2380
2381 /* Indicate that no algorithm is yet found. If no algorithm
2382 is found, this value will be returned and indicate failure. */
2383 alg_out->cost.cost = cost_limit->cost + 1;
2384 alg_out->cost.latency = cost_limit->latency + 1;
2385
2386 if (cost_limit->cost < 0
2387 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2388 return;
2389
2390 /* Be prepared for vector modes. */
2391 imode = GET_MODE_INNER (mode);
2392 if (imode == VOIDmode)
2393 imode = mode;
2394
2395 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2396
2397 /* Restrict the bits of "t" to the multiplication's mode. */
2398 t &= GET_MODE_MASK (imode);
2399
2400 /* t == 1 can be done in zero cost. */
2401 if (t == 1)
2402 {
2403 alg_out->ops = 1;
2404 alg_out->cost.cost = 0;
2405 alg_out->cost.latency = 0;
2406 alg_out->op[0] = alg_m;
2407 return;
2408 }
2409
2410 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2411 fail now. */
2412 if (t == 0)
2413 {
2414 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2415 return;
2416 else
2417 {
2418 alg_out->ops = 1;
2419 alg_out->cost.cost = zero_cost (speed);
2420 alg_out->cost.latency = zero_cost (speed);
2421 alg_out->op[0] = alg_zero;
2422 return;
2423 }
2424 }
2425
2426 /* We'll be needing a couple extra algorithm structures now. */
2427
2428 alg_in = XALLOCA (struct algorithm);
2429 best_alg = XALLOCA (struct algorithm);
2430 best_cost = *cost_limit;
2431
2432 /* Compute the hash index. */
2433 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2434
2435 /* See if we already know what to do for T. */
2436 entry_ptr = alg_hash_entry_ptr (hash_index);
2437 if (entry_ptr->t == t
2438 && entry_ptr->mode == mode
2439 && entry_ptr->mode == mode
2440 && entry_ptr->speed == speed
2441 && entry_ptr->alg != alg_unknown)
2442 {
2443 cache_alg = entry_ptr->alg;
2444
2445 if (cache_alg == alg_impossible)
2446 {
2447 /* The cache tells us that it's impossible to synthesize
2448 multiplication by T within entry_ptr->cost. */
2449 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2450 /* COST_LIMIT is at least as restrictive as the one
2451 recorded in the hash table, in which case we have no
2452 hope of synthesizing a multiplication. Just
2453 return. */
2454 return;
2455
2456 /* If we get here, COST_LIMIT is less restrictive than the
2457 one recorded in the hash table, so we may be able to
2458 synthesize a multiplication. Proceed as if we didn't
2459 have the cache entry. */
2460 }
2461 else
2462 {
2463 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2464 /* The cached algorithm shows that this multiplication
2465 requires more cost than COST_LIMIT. Just return. This
2466 way, we don't clobber this cache entry with
2467 alg_impossible but retain useful information. */
2468 return;
2469
2470 cache_hit = true;
2471
2472 switch (cache_alg)
2473 {
2474 case alg_shift:
2475 goto do_alg_shift;
2476
2477 case alg_add_t_m2:
2478 case alg_sub_t_m2:
2479 goto do_alg_addsub_t_m2;
2480
2481 case alg_add_factor:
2482 case alg_sub_factor:
2483 goto do_alg_addsub_factor;
2484
2485 case alg_add_t2_m:
2486 goto do_alg_add_t2_m;
2487
2488 case alg_sub_t2_m:
2489 goto do_alg_sub_t2_m;
2490
2491 default:
2492 gcc_unreachable ();
2493 }
2494 }
2495 }
2496
2497 /* If we have a group of zero bits at the low-order part of T, try
2498 multiplying by the remaining bits and then doing a shift. */
2499
2500 if ((t & 1) == 0)
2501 {
2502 do_alg_shift:
2503 m = floor_log2 (t & -t); /* m = number of low zero bits */
2504 if (m < maxm)
2505 {
2506 q = t >> m;
2507 /* The function expand_shift will choose between a shift and
2508 a sequence of additions, so the observed cost is given as
2509 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2510 op_cost = m * add_cost (speed, mode);
2511 if (shift_cost (speed, mode, m) < op_cost)
2512 op_cost = shift_cost (speed, mode, m);
2513 new_limit.cost = best_cost.cost - op_cost;
2514 new_limit.latency = best_cost.latency - op_cost;
2515 synth_mult (alg_in, q, &new_limit, mode);
2516
2517 alg_in->cost.cost += op_cost;
2518 alg_in->cost.latency += op_cost;
2519 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2520 {
2521 struct algorithm *x;
2522 best_cost = alg_in->cost;
2523 x = alg_in, alg_in = best_alg, best_alg = x;
2524 best_alg->log[best_alg->ops] = m;
2525 best_alg->op[best_alg->ops] = alg_shift;
2526 }
2527
2528 /* See if treating ORIG_T as a signed number yields a better
2529 sequence. Try this sequence only for a negative ORIG_T
2530 as it would be useless for a non-negative ORIG_T. */
2531 if ((HOST_WIDE_INT) orig_t < 0)
2532 {
2533 /* Shift ORIG_T as follows because a right shift of a
2534 negative-valued signed type is implementation
2535 defined. */
2536 q = ~(~orig_t >> m);
2537 /* The function expand_shift will choose between a shift
2538 and a sequence of additions, so the observed cost is
2539 given as MIN (m * add_cost(speed, mode),
2540 shift_cost(speed, mode, m)). */
2541 op_cost = m * add_cost (speed, mode);
2542 if (shift_cost (speed, mode, m) < op_cost)
2543 op_cost = shift_cost (speed, mode, m);
2544 new_limit.cost = best_cost.cost - op_cost;
2545 new_limit.latency = best_cost.latency - op_cost;
2546 synth_mult (alg_in, q, &new_limit, mode);
2547
2548 alg_in->cost.cost += op_cost;
2549 alg_in->cost.latency += op_cost;
2550 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2551 {
2552 struct algorithm *x;
2553 best_cost = alg_in->cost;
2554 x = alg_in, alg_in = best_alg, best_alg = x;
2555 best_alg->log[best_alg->ops] = m;
2556 best_alg->op[best_alg->ops] = alg_shift;
2557 }
2558 }
2559 }
2560 if (cache_hit)
2561 goto done;
2562 }
2563
2564 /* If we have an odd number, add or subtract one. */
2565 if ((t & 1) != 0)
2566 {
2567 unsigned HOST_WIDE_INT w;
2568
2569 do_alg_addsub_t_m2:
2570 for (w = 1; (w & t) != 0; w <<= 1)
2571 ;
2572 /* If T was -1, then W will be zero after the loop. This is another
2573 case where T ends with ...111. Handling this with (T + 1) and
2574 subtract 1 produces slightly better code and results in algorithm
2575 selection much faster than treating it like the ...0111 case
2576 below. */
2577 if (w == 0
2578 || (w > 2
2579 /* Reject the case where t is 3.
2580 Thus we prefer addition in that case. */
2581 && t != 3))
2582 {
2583 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2584
2585 op_cost = add_cost (speed, mode);
2586 new_limit.cost = best_cost.cost - op_cost;
2587 new_limit.latency = best_cost.latency - op_cost;
2588 synth_mult (alg_in, t + 1, &new_limit, mode);
2589
2590 alg_in->cost.cost += op_cost;
2591 alg_in->cost.latency += op_cost;
2592 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2593 {
2594 struct algorithm *x;
2595 best_cost = alg_in->cost;
2596 x = alg_in, alg_in = best_alg, best_alg = x;
2597 best_alg->log[best_alg->ops] = 0;
2598 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2599 }
2600 }
2601 else
2602 {
2603 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2604
2605 op_cost = add_cost (speed, mode);
2606 new_limit.cost = best_cost.cost - op_cost;
2607 new_limit.latency = best_cost.latency - op_cost;
2608 synth_mult (alg_in, t - 1, &new_limit, mode);
2609
2610 alg_in->cost.cost += op_cost;
2611 alg_in->cost.latency += op_cost;
2612 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2613 {
2614 struct algorithm *x;
2615 best_cost = alg_in->cost;
2616 x = alg_in, alg_in = best_alg, best_alg = x;
2617 best_alg->log[best_alg->ops] = 0;
2618 best_alg->op[best_alg->ops] = alg_add_t_m2;
2619 }
2620 }
2621
2622 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2623 quickly with a - a * n for some appropriate constant n. */
2624 m = exact_log2 (-orig_t + 1);
2625 if (m >= 0 && m < maxm)
2626 {
2627 op_cost = shiftsub1_cost (speed, mode, m);
2628 new_limit.cost = best_cost.cost - op_cost;
2629 new_limit.latency = best_cost.latency - op_cost;
2630 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2631 &new_limit, mode);
2632
2633 alg_in->cost.cost += op_cost;
2634 alg_in->cost.latency += op_cost;
2635 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2636 {
2637 struct algorithm *x;
2638 best_cost = alg_in->cost;
2639 x = alg_in, alg_in = best_alg, best_alg = x;
2640 best_alg->log[best_alg->ops] = m;
2641 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2642 }
2643 }
2644
2645 if (cache_hit)
2646 goto done;
2647 }
2648
2649 /* Look for factors of t of the form
2650 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2651 If we find such a factor, we can multiply by t using an algorithm that
2652 multiplies by q, shift the result by m and add/subtract it to itself.
2653
2654 We search for large factors first and loop down, even if large factors
2655 are less probable than small; if we find a large factor we will find a
2656 good sequence quickly, and therefore be able to prune (by decreasing
2657 COST_LIMIT) the search. */
2658
2659 do_alg_addsub_factor:
2660 for (m = floor_log2 (t - 1); m >= 2; m--)
2661 {
2662 unsigned HOST_WIDE_INT d;
2663
2664 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2665 if (t % d == 0 && t > d && m < maxm
2666 && (!cache_hit || cache_alg == alg_add_factor))
2667 {
2668 /* If the target has a cheap shift-and-add instruction use
2669 that in preference to a shift insn followed by an add insn.
2670 Assume that the shift-and-add is "atomic" with a latency
2671 equal to its cost, otherwise assume that on superscalar
2672 hardware the shift may be executed concurrently with the
2673 earlier steps in the algorithm. */
2674 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2675 if (shiftadd_cost (speed, mode, m) < op_cost)
2676 {
2677 op_cost = shiftadd_cost (speed, mode, m);
2678 op_latency = op_cost;
2679 }
2680 else
2681 op_latency = add_cost (speed, mode);
2682
2683 new_limit.cost = best_cost.cost - op_cost;
2684 new_limit.latency = best_cost.latency - op_latency;
2685 synth_mult (alg_in, t / d, &new_limit, mode);
2686
2687 alg_in->cost.cost += op_cost;
2688 alg_in->cost.latency += op_latency;
2689 if (alg_in->cost.latency < op_cost)
2690 alg_in->cost.latency = op_cost;
2691 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2692 {
2693 struct algorithm *x;
2694 best_cost = alg_in->cost;
2695 x = alg_in, alg_in = best_alg, best_alg = x;
2696 best_alg->log[best_alg->ops] = m;
2697 best_alg->op[best_alg->ops] = alg_add_factor;
2698 }
2699 /* Other factors will have been taken care of in the recursion. */
2700 break;
2701 }
2702
2703 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2704 if (t % d == 0 && t > d && m < maxm
2705 && (!cache_hit || cache_alg == alg_sub_factor))
2706 {
2707 /* If the target has a cheap shift-and-subtract insn use
2708 that in preference to a shift insn followed by a sub insn.
2709 Assume that the shift-and-sub is "atomic" with a latency
2710 equal to it's cost, otherwise assume that on superscalar
2711 hardware the shift may be executed concurrently with the
2712 earlier steps in the algorithm. */
2713 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2714 if (shiftsub0_cost (speed, mode, m) < op_cost)
2715 {
2716 op_cost = shiftsub0_cost (speed, mode, m);
2717 op_latency = op_cost;
2718 }
2719 else
2720 op_latency = add_cost (speed, mode);
2721
2722 new_limit.cost = best_cost.cost - op_cost;
2723 new_limit.latency = best_cost.latency - op_latency;
2724 synth_mult (alg_in, t / d, &new_limit, mode);
2725
2726 alg_in->cost.cost += op_cost;
2727 alg_in->cost.latency += op_latency;
2728 if (alg_in->cost.latency < op_cost)
2729 alg_in->cost.latency = op_cost;
2730 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2731 {
2732 struct algorithm *x;
2733 best_cost = alg_in->cost;
2734 x = alg_in, alg_in = best_alg, best_alg = x;
2735 best_alg->log[best_alg->ops] = m;
2736 best_alg->op[best_alg->ops] = alg_sub_factor;
2737 }
2738 break;
2739 }
2740 }
2741 if (cache_hit)
2742 goto done;
2743
2744 /* Try shift-and-add (load effective address) instructions,
2745 i.e. do a*3, a*5, a*9. */
2746 if ((t & 1) != 0)
2747 {
2748 do_alg_add_t2_m:
2749 q = t - 1;
2750 q = q & -q;
2751 m = exact_log2 (q);
2752 if (m >= 0 && m < maxm)
2753 {
2754 op_cost = shiftadd_cost (speed, mode, m);
2755 new_limit.cost = best_cost.cost - op_cost;
2756 new_limit.latency = best_cost.latency - op_cost;
2757 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2758
2759 alg_in->cost.cost += op_cost;
2760 alg_in->cost.latency += op_cost;
2761 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2762 {
2763 struct algorithm *x;
2764 best_cost = alg_in->cost;
2765 x = alg_in, alg_in = best_alg, best_alg = x;
2766 best_alg->log[best_alg->ops] = m;
2767 best_alg->op[best_alg->ops] = alg_add_t2_m;
2768 }
2769 }
2770 if (cache_hit)
2771 goto done;
2772
2773 do_alg_sub_t2_m:
2774 q = t + 1;
2775 q = q & -q;
2776 m = exact_log2 (q);
2777 if (m >= 0 && m < maxm)
2778 {
2779 op_cost = shiftsub0_cost (speed, mode, m);
2780 new_limit.cost = best_cost.cost - op_cost;
2781 new_limit.latency = best_cost.latency - op_cost;
2782 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2783
2784 alg_in->cost.cost += op_cost;
2785 alg_in->cost.latency += op_cost;
2786 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2787 {
2788 struct algorithm *x;
2789 best_cost = alg_in->cost;
2790 x = alg_in, alg_in = best_alg, best_alg = x;
2791 best_alg->log[best_alg->ops] = m;
2792 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2793 }
2794 }
2795 if (cache_hit)
2796 goto done;
2797 }
2798
2799 done:
2800 /* If best_cost has not decreased, we have not found any algorithm. */
2801 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2802 {
2803 /* We failed to find an algorithm. Record alg_impossible for
2804 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2805 we are asked to find an algorithm for T within the same or
2806 lower COST_LIMIT, we can immediately return to the
2807 caller. */
2808 entry_ptr->t = t;
2809 entry_ptr->mode = mode;
2810 entry_ptr->speed = speed;
2811 entry_ptr->alg = alg_impossible;
2812 entry_ptr->cost = *cost_limit;
2813 return;
2814 }
2815
2816 /* Cache the result. */
2817 if (!cache_hit)
2818 {
2819 entry_ptr->t = t;
2820 entry_ptr->mode = mode;
2821 entry_ptr->speed = speed;
2822 entry_ptr->alg = best_alg->op[best_alg->ops];
2823 entry_ptr->cost.cost = best_cost.cost;
2824 entry_ptr->cost.latency = best_cost.latency;
2825 }
2826
2827 /* If we are getting a too long sequence for `struct algorithm'
2828 to record, make this search fail. */
2829 if (best_alg->ops == MAX_BITS_PER_WORD)
2830 return;
2831
2832 /* Copy the algorithm from temporary space to the space at alg_out.
2833 We avoid using structure assignment because the majority of
2834 best_alg is normally undefined, and this is a critical function. */
2835 alg_out->ops = best_alg->ops + 1;
2836 alg_out->cost = best_cost;
2837 memcpy (alg_out->op, best_alg->op,
2838 alg_out->ops * sizeof *alg_out->op);
2839 memcpy (alg_out->log, best_alg->log,
2840 alg_out->ops * sizeof *alg_out->log);
2841 }
2842 \f
2843 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2844 Try three variations:
2845
2846 - a shift/add sequence based on VAL itself
2847 - a shift/add sequence based on -VAL, followed by a negation
2848 - a shift/add sequence based on VAL - 1, followed by an addition.
2849
2850 Return true if the cheapest of these cost less than MULT_COST,
2851 describing the algorithm in *ALG and final fixup in *VARIANT. */
2852
2853 static bool
2854 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2855 struct algorithm *alg, enum mult_variant *variant,
2856 int mult_cost)
2857 {
2858 struct algorithm alg2;
2859 struct mult_cost limit;
2860 int op_cost;
2861 bool speed = optimize_insn_for_speed_p ();
2862
2863 /* Fail quickly for impossible bounds. */
2864 if (mult_cost < 0)
2865 return false;
2866
2867 /* Ensure that mult_cost provides a reasonable upper bound.
2868 Any constant multiplication can be performed with less
2869 than 2 * bits additions. */
2870 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2871 if (mult_cost > op_cost)
2872 mult_cost = op_cost;
2873
2874 *variant = basic_variant;
2875 limit.cost = mult_cost;
2876 limit.latency = mult_cost;
2877 synth_mult (alg, val, &limit, mode);
2878
2879 /* This works only if the inverted value actually fits in an
2880 `unsigned int' */
2881 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2882 {
2883 op_cost = neg_cost (speed, mode);
2884 if (MULT_COST_LESS (&alg->cost, mult_cost))
2885 {
2886 limit.cost = alg->cost.cost - op_cost;
2887 limit.latency = alg->cost.latency - op_cost;
2888 }
2889 else
2890 {
2891 limit.cost = mult_cost - op_cost;
2892 limit.latency = mult_cost - op_cost;
2893 }
2894
2895 synth_mult (&alg2, -val, &limit, mode);
2896 alg2.cost.cost += op_cost;
2897 alg2.cost.latency += op_cost;
2898 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2899 *alg = alg2, *variant = negate_variant;
2900 }
2901
2902 /* This proves very useful for division-by-constant. */
2903 op_cost = add_cost (speed, mode);
2904 if (MULT_COST_LESS (&alg->cost, mult_cost))
2905 {
2906 limit.cost = alg->cost.cost - op_cost;
2907 limit.latency = alg->cost.latency - op_cost;
2908 }
2909 else
2910 {
2911 limit.cost = mult_cost - op_cost;
2912 limit.latency = mult_cost - op_cost;
2913 }
2914
2915 synth_mult (&alg2, val - 1, &limit, mode);
2916 alg2.cost.cost += op_cost;
2917 alg2.cost.latency += op_cost;
2918 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2919 *alg = alg2, *variant = add_variant;
2920
2921 return MULT_COST_LESS (&alg->cost, mult_cost);
2922 }
2923
2924 /* A subroutine of expand_mult, used for constant multiplications.
2925 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2926 convenient. Use the shift/add sequence described by ALG and apply
2927 the final fixup specified by VARIANT. */
2928
2929 static rtx
2930 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2931 rtx target, const struct algorithm *alg,
2932 enum mult_variant variant)
2933 {
2934 HOST_WIDE_INT val_so_far;
2935 rtx insn, accum, tem;
2936 int opno;
2937 enum machine_mode nmode;
2938
2939 /* Avoid referencing memory over and over and invalid sharing
2940 on SUBREGs. */
2941 op0 = force_reg (mode, op0);
2942
2943 /* ACCUM starts out either as OP0 or as a zero, depending on
2944 the first operation. */
2945
2946 if (alg->op[0] == alg_zero)
2947 {
2948 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2949 val_so_far = 0;
2950 }
2951 else if (alg->op[0] == alg_m)
2952 {
2953 accum = copy_to_mode_reg (mode, op0);
2954 val_so_far = 1;
2955 }
2956 else
2957 gcc_unreachable ();
2958
2959 for (opno = 1; opno < alg->ops; opno++)
2960 {
2961 int log = alg->log[opno];
2962 rtx shift_subtarget = optimize ? 0 : accum;
2963 rtx add_target
2964 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2965 && !optimize)
2966 ? target : 0;
2967 rtx accum_target = optimize ? 0 : accum;
2968 rtx accum_inner;
2969
2970 switch (alg->op[opno])
2971 {
2972 case alg_shift:
2973 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2974 /* REG_EQUAL note will be attached to the following insn. */
2975 emit_move_insn (accum, tem);
2976 val_so_far <<= log;
2977 break;
2978
2979 case alg_add_t_m2:
2980 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2981 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2982 add_target ? add_target : accum_target);
2983 val_so_far += (HOST_WIDE_INT) 1 << log;
2984 break;
2985
2986 case alg_sub_t_m2:
2987 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2988 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2989 add_target ? add_target : accum_target);
2990 val_so_far -= (HOST_WIDE_INT) 1 << log;
2991 break;
2992
2993 case alg_add_t2_m:
2994 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2995 log, shift_subtarget, 0);
2996 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2997 add_target ? add_target : accum_target);
2998 val_so_far = (val_so_far << log) + 1;
2999 break;
3000
3001 case alg_sub_t2_m:
3002 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3003 log, shift_subtarget, 0);
3004 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3005 add_target ? add_target : accum_target);
3006 val_so_far = (val_so_far << log) - 1;
3007 break;
3008
3009 case alg_add_factor:
3010 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3011 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3012 add_target ? add_target : accum_target);
3013 val_so_far += val_so_far << log;
3014 break;
3015
3016 case alg_sub_factor:
3017 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3018 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3019 (add_target
3020 ? add_target : (optimize ? 0 : tem)));
3021 val_so_far = (val_so_far << log) - val_so_far;
3022 break;
3023
3024 default:
3025 gcc_unreachable ();
3026 }
3027
3028 if (SCALAR_INT_MODE_P (mode))
3029 {
3030 /* Write a REG_EQUAL note on the last insn so that we can cse
3031 multiplication sequences. Note that if ACCUM is a SUBREG,
3032 we've set the inner register and must properly indicate that. */
3033 tem = op0, nmode = mode;
3034 accum_inner = accum;
3035 if (GET_CODE (accum) == SUBREG)
3036 {
3037 accum_inner = SUBREG_REG (accum);
3038 nmode = GET_MODE (accum_inner);
3039 tem = gen_lowpart (nmode, op0);
3040 }
3041
3042 insn = get_last_insn ();
3043 set_dst_reg_note (insn, REG_EQUAL,
3044 gen_rtx_MULT (nmode, tem,
3045 gen_int_mode (val_so_far, nmode)),
3046 accum_inner);
3047 }
3048 }
3049
3050 if (variant == negate_variant)
3051 {
3052 val_so_far = -val_so_far;
3053 accum = expand_unop (mode, neg_optab, accum, target, 0);
3054 }
3055 else if (variant == add_variant)
3056 {
3057 val_so_far = val_so_far + 1;
3058 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3059 }
3060
3061 /* Compare only the bits of val and val_so_far that are significant
3062 in the result mode, to avoid sign-/zero-extension confusion. */
3063 nmode = GET_MODE_INNER (mode);
3064 if (nmode == VOIDmode)
3065 nmode = mode;
3066 val &= GET_MODE_MASK (nmode);
3067 val_so_far &= GET_MODE_MASK (nmode);
3068 gcc_assert (val == val_so_far);
3069
3070 return accum;
3071 }
3072
3073 /* Perform a multiplication and return an rtx for the result.
3074 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3075 TARGET is a suggestion for where to store the result (an rtx).
3076
3077 We check specially for a constant integer as OP1.
3078 If you want this check for OP0 as well, then before calling
3079 you should swap the two operands if OP0 would be constant. */
3080
3081 rtx
3082 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3083 int unsignedp)
3084 {
3085 enum mult_variant variant;
3086 struct algorithm algorithm;
3087 rtx scalar_op1;
3088 int max_cost;
3089 bool speed = optimize_insn_for_speed_p ();
3090 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3091
3092 if (CONSTANT_P (op0))
3093 {
3094 rtx temp = op0;
3095 op0 = op1;
3096 op1 = temp;
3097 }
3098
3099 /* For vectors, there are several simplifications that can be made if
3100 all elements of the vector constant are identical. */
3101 scalar_op1 = op1;
3102 if (GET_CODE (op1) == CONST_VECTOR)
3103 {
3104 int i, n = CONST_VECTOR_NUNITS (op1);
3105 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3106 for (i = 1; i < n; ++i)
3107 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3108 goto skip_scalar;
3109 }
3110
3111 if (INTEGRAL_MODE_P (mode))
3112 {
3113 rtx fake_reg;
3114 HOST_WIDE_INT coeff;
3115 bool is_neg;
3116 int mode_bitsize;
3117
3118 if (op1 == CONST0_RTX (mode))
3119 return op1;
3120 if (op1 == CONST1_RTX (mode))
3121 return op0;
3122 if (op1 == CONSTM1_RTX (mode))
3123 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3124 op0, target, 0);
3125
3126 if (do_trapv)
3127 goto skip_synth;
3128
3129 /* If mode is integer vector mode, check if the backend supports
3130 vector lshift (by scalar or vector) at all. If not, we can't use
3131 synthetized multiply. */
3132 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3133 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3134 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3135 goto skip_synth;
3136
3137 /* These are the operations that are potentially turned into
3138 a sequence of shifts and additions. */
3139 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3140
3141 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3142 less than or equal in size to `unsigned int' this doesn't matter.
3143 If the mode is larger than `unsigned int', then synth_mult works
3144 only if the constant value exactly fits in an `unsigned int' without
3145 any truncation. This means that multiplying by negative values does
3146 not work; results are off by 2^32 on a 32 bit machine. */
3147 if (CONST_INT_P (scalar_op1))
3148 {
3149 coeff = INTVAL (scalar_op1);
3150 is_neg = coeff < 0;
3151 }
3152 #if TARGET_SUPPORTS_WIDE_INT
3153 else if (CONST_WIDE_INT_P (scalar_op1))
3154 #else
3155 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3156 #endif
3157 {
3158 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3159 /* Perfect power of 2 (other than 1, which is handled above). */
3160 if (shift > 0)
3161 return expand_shift (LSHIFT_EXPR, mode, op0,
3162 shift, target, unsignedp);
3163 else
3164 goto skip_synth;
3165 }
3166 else
3167 goto skip_synth;
3168
3169 /* We used to test optimize here, on the grounds that it's better to
3170 produce a smaller program when -O is not used. But this causes
3171 such a terrible slowdown sometimes that it seems better to always
3172 use synth_mult. */
3173
3174 /* Special case powers of two. */
3175 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3176 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3177 return expand_shift (LSHIFT_EXPR, mode, op0,
3178 floor_log2 (coeff), target, unsignedp);
3179
3180 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3181
3182 /* Attempt to handle multiplication of DImode values by negative
3183 coefficients, by performing the multiplication by a positive
3184 multiplier and then inverting the result. */
3185 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3186 {
3187 /* Its safe to use -coeff even for INT_MIN, as the
3188 result is interpreted as an unsigned coefficient.
3189 Exclude cost of op0 from max_cost to match the cost
3190 calculation of the synth_mult. */
3191 coeff = -(unsigned HOST_WIDE_INT) coeff;
3192 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3193 - neg_cost (speed, mode));
3194 if (max_cost <= 0)
3195 goto skip_synth;
3196
3197 /* Special case powers of two. */
3198 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3199 {
3200 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3201 floor_log2 (coeff), target, unsignedp);
3202 return expand_unop (mode, neg_optab, temp, target, 0);
3203 }
3204
3205 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3206 max_cost))
3207 {
3208 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3209 &algorithm, variant);
3210 return expand_unop (mode, neg_optab, temp, target, 0);
3211 }
3212 goto skip_synth;
3213 }
3214
3215 /* Exclude cost of op0 from max_cost to match the cost
3216 calculation of the synth_mult. */
3217 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3218 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3219 return expand_mult_const (mode, op0, coeff, target,
3220 &algorithm, variant);
3221 }
3222 skip_synth:
3223
3224 /* Expand x*2.0 as x+x. */
3225 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3226 {
3227 REAL_VALUE_TYPE d;
3228 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3229
3230 if (REAL_VALUES_EQUAL (d, dconst2))
3231 {
3232 op0 = force_reg (GET_MODE (op0), op0);
3233 return expand_binop (mode, add_optab, op0, op0,
3234 target, unsignedp, OPTAB_LIB_WIDEN);
3235 }
3236 }
3237 skip_scalar:
3238
3239 /* This used to use umul_optab if unsigned, but for non-widening multiply
3240 there is no difference between signed and unsigned. */
3241 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3242 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3243 gcc_assert (op0);
3244 return op0;
3245 }
3246
3247 /* Return a cost estimate for multiplying a register by the given
3248 COEFFicient in the given MODE and SPEED. */
3249
3250 int
3251 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3252 {
3253 int max_cost;
3254 struct algorithm algorithm;
3255 enum mult_variant variant;
3256
3257 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3258 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3259 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3260 return algorithm.cost.cost;
3261 else
3262 return max_cost;
3263 }
3264
3265 /* Perform a widening multiplication and return an rtx for the result.
3266 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3267 TARGET is a suggestion for where to store the result (an rtx).
3268 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3269 or smul_widen_optab.
3270
3271 We check specially for a constant integer as OP1, comparing the
3272 cost of a widening multiply against the cost of a sequence of shifts
3273 and adds. */
3274
3275 rtx
3276 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3277 int unsignedp, optab this_optab)
3278 {
3279 bool speed = optimize_insn_for_speed_p ();
3280 rtx cop1;
3281
3282 if (CONST_INT_P (op1)
3283 && GET_MODE (op0) != VOIDmode
3284 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3285 this_optab == umul_widen_optab))
3286 && CONST_INT_P (cop1)
3287 && (INTVAL (cop1) >= 0
3288 || HWI_COMPUTABLE_MODE_P (mode)))
3289 {
3290 HOST_WIDE_INT coeff = INTVAL (cop1);
3291 int max_cost;
3292 enum mult_variant variant;
3293 struct algorithm algorithm;
3294
3295 /* Special case powers of two. */
3296 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3297 {
3298 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3299 return expand_shift (LSHIFT_EXPR, mode, op0,
3300 floor_log2 (coeff), target, unsignedp);
3301 }
3302
3303 /* Exclude cost of op0 from max_cost to match the cost
3304 calculation of the synth_mult. */
3305 max_cost = mul_widen_cost (speed, mode);
3306 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3307 max_cost))
3308 {
3309 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3310 return expand_mult_const (mode, op0, coeff, target,
3311 &algorithm, variant);
3312 }
3313 }
3314 return expand_binop (mode, this_optab, op0, op1, target,
3315 unsignedp, OPTAB_LIB_WIDEN);
3316 }
3317 \f
3318 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3319 replace division by D, and put the least significant N bits of the result
3320 in *MULTIPLIER_PTR and return the most significant bit.
3321
3322 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3323 needed precision is in PRECISION (should be <= N).
3324
3325 PRECISION should be as small as possible so this function can choose
3326 multiplier more freely.
3327
3328 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3329 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3330
3331 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3332 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3333
3334 unsigned HOST_WIDE_INT
3335 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3336 unsigned HOST_WIDE_INT *multiplier_ptr,
3337 int *post_shift_ptr, int *lgup_ptr)
3338 {
3339 int lgup, post_shift;
3340 int pow, pow2;
3341
3342 /* lgup = ceil(log2(divisor)); */
3343 lgup = ceil_log2 (d);
3344
3345 gcc_assert (lgup <= n);
3346
3347 pow = n + lgup;
3348 pow2 = n + lgup - precision;
3349
3350 /* mlow = 2^(N + lgup)/d */
3351 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3352 wide_int mlow = wi::udiv_trunc (val, d);
3353
3354 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3355 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3356 wide_int mhigh = wi::udiv_trunc (val, d);
3357
3358 /* If precision == N, then mlow, mhigh exceed 2^N
3359 (but they do not exceed 2^(N+1)). */
3360
3361 /* Reduce to lowest terms. */
3362 for (post_shift = lgup; post_shift > 0; post_shift--)
3363 {
3364 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3365 HOST_BITS_PER_WIDE_INT);
3366 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3367 HOST_BITS_PER_WIDE_INT);
3368 if (ml_lo >= mh_lo)
3369 break;
3370
3371 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3372 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3373 }
3374
3375 *post_shift_ptr = post_shift;
3376 *lgup_ptr = lgup;
3377 if (n < HOST_BITS_PER_WIDE_INT)
3378 {
3379 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3380 *multiplier_ptr = mhigh.to_uhwi () & mask;
3381 return mhigh.to_uhwi () >= mask;
3382 }
3383 else
3384 {
3385 *multiplier_ptr = mhigh.to_uhwi ();
3386 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3387 }
3388 }
3389
3390 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3391 congruent to 1 (mod 2**N). */
3392
3393 static unsigned HOST_WIDE_INT
3394 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3395 {
3396 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3397
3398 /* The algorithm notes that the choice y = x satisfies
3399 x*y == 1 mod 2^3, since x is assumed odd.
3400 Each iteration doubles the number of bits of significance in y. */
3401
3402 unsigned HOST_WIDE_INT mask;
3403 unsigned HOST_WIDE_INT y = x;
3404 int nbit = 3;
3405
3406 mask = (n == HOST_BITS_PER_WIDE_INT
3407 ? ~(unsigned HOST_WIDE_INT) 0
3408 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3409
3410 while (nbit < n)
3411 {
3412 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3413 nbit *= 2;
3414 }
3415 return y;
3416 }
3417
3418 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3419 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3420 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3421 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3422 become signed.
3423
3424 The result is put in TARGET if that is convenient.
3425
3426 MODE is the mode of operation. */
3427
3428 rtx
3429 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3430 rtx op1, rtx target, int unsignedp)
3431 {
3432 rtx tem;
3433 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3434
3435 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3436 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3437 tem = expand_and (mode, tem, op1, NULL_RTX);
3438 adj_operand
3439 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3440 adj_operand);
3441
3442 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3443 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3444 tem = expand_and (mode, tem, op0, NULL_RTX);
3445 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3446 target);
3447
3448 return target;
3449 }
3450
3451 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3452
3453 static rtx
3454 extract_high_half (enum machine_mode mode, rtx op)
3455 {
3456 enum machine_mode wider_mode;
3457
3458 if (mode == word_mode)
3459 return gen_highpart (mode, op);
3460
3461 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3462
3463 wider_mode = GET_MODE_WIDER_MODE (mode);
3464 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3465 GET_MODE_BITSIZE (mode), 0, 1);
3466 return convert_modes (mode, wider_mode, op, 0);
3467 }
3468
3469 /* Like expmed_mult_highpart, but only consider using a multiplication
3470 optab. OP1 is an rtx for the constant operand. */
3471
3472 static rtx
3473 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3474 rtx target, int unsignedp, int max_cost)
3475 {
3476 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3477 enum machine_mode wider_mode;
3478 optab moptab;
3479 rtx tem;
3480 int size;
3481 bool speed = optimize_insn_for_speed_p ();
3482
3483 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3484
3485 wider_mode = GET_MODE_WIDER_MODE (mode);
3486 size = GET_MODE_BITSIZE (mode);
3487
3488 /* Firstly, try using a multiplication insn that only generates the needed
3489 high part of the product, and in the sign flavor of unsignedp. */
3490 if (mul_highpart_cost (speed, mode) < max_cost)
3491 {
3492 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3493 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3494 unsignedp, OPTAB_DIRECT);
3495 if (tem)
3496 return tem;
3497 }
3498
3499 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3500 Need to adjust the result after the multiplication. */
3501 if (size - 1 < BITS_PER_WORD
3502 && (mul_highpart_cost (speed, mode)
3503 + 2 * shift_cost (speed, mode, size-1)
3504 + 4 * add_cost (speed, mode) < max_cost))
3505 {
3506 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3507 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3508 unsignedp, OPTAB_DIRECT);
3509 if (tem)
3510 /* We used the wrong signedness. Adjust the result. */
3511 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3512 tem, unsignedp);
3513 }
3514
3515 /* Try widening multiplication. */
3516 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3517 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3518 && mul_widen_cost (speed, wider_mode) < max_cost)
3519 {
3520 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3521 unsignedp, OPTAB_WIDEN);
3522 if (tem)
3523 return extract_high_half (mode, tem);
3524 }
3525
3526 /* Try widening the mode and perform a non-widening multiplication. */
3527 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3528 && size - 1 < BITS_PER_WORD
3529 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3530 < max_cost))
3531 {
3532 rtx insns, wop0, wop1;
3533
3534 /* We need to widen the operands, for example to ensure the
3535 constant multiplier is correctly sign or zero extended.
3536 Use a sequence to clean-up any instructions emitted by
3537 the conversions if things don't work out. */
3538 start_sequence ();
3539 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3540 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3541 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3542 unsignedp, OPTAB_WIDEN);
3543 insns = get_insns ();
3544 end_sequence ();
3545
3546 if (tem)
3547 {
3548 emit_insn (insns);
3549 return extract_high_half (mode, tem);
3550 }
3551 }
3552
3553 /* Try widening multiplication of opposite signedness, and adjust. */
3554 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3555 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3556 && size - 1 < BITS_PER_WORD
3557 && (mul_widen_cost (speed, wider_mode)
3558 + 2 * shift_cost (speed, mode, size-1)
3559 + 4 * add_cost (speed, mode) < max_cost))
3560 {
3561 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3562 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3563 if (tem != 0)
3564 {
3565 tem = extract_high_half (mode, tem);
3566 /* We used the wrong signedness. Adjust the result. */
3567 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3568 target, unsignedp);
3569 }
3570 }
3571
3572 return 0;
3573 }
3574
3575 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3576 putting the high half of the result in TARGET if that is convenient,
3577 and return where the result is. If the operation can not be performed,
3578 0 is returned.
3579
3580 MODE is the mode of operation and result.
3581
3582 UNSIGNEDP nonzero means unsigned multiply.
3583
3584 MAX_COST is the total allowed cost for the expanded RTL. */
3585
3586 static rtx
3587 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3588 rtx target, int unsignedp, int max_cost)
3589 {
3590 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3591 unsigned HOST_WIDE_INT cnst1;
3592 int extra_cost;
3593 bool sign_adjust = false;
3594 enum mult_variant variant;
3595 struct algorithm alg;
3596 rtx tem;
3597 bool speed = optimize_insn_for_speed_p ();
3598
3599 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3600 /* We can't support modes wider than HOST_BITS_PER_INT. */
3601 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3602
3603 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3604
3605 /* We can't optimize modes wider than BITS_PER_WORD.
3606 ??? We might be able to perform double-word arithmetic if
3607 mode == word_mode, however all the cost calculations in
3608 synth_mult etc. assume single-word operations. */
3609 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3610 return expmed_mult_highpart_optab (mode, op0, op1, target,
3611 unsignedp, max_cost);
3612
3613 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3614
3615 /* Check whether we try to multiply by a negative constant. */
3616 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3617 {
3618 sign_adjust = true;
3619 extra_cost += add_cost (speed, mode);
3620 }
3621
3622 /* See whether shift/add multiplication is cheap enough. */
3623 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3624 max_cost - extra_cost))
3625 {
3626 /* See whether the specialized multiplication optabs are
3627 cheaper than the shift/add version. */
3628 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3629 alg.cost.cost + extra_cost);
3630 if (tem)
3631 return tem;
3632
3633 tem = convert_to_mode (wider_mode, op0, unsignedp);
3634 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3635 tem = extract_high_half (mode, tem);
3636
3637 /* Adjust result for signedness. */
3638 if (sign_adjust)
3639 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3640
3641 return tem;
3642 }
3643 return expmed_mult_highpart_optab (mode, op0, op1, target,
3644 unsignedp, max_cost);
3645 }
3646
3647
3648 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3649
3650 static rtx
3651 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3652 {
3653 rtx result, temp, shift, label;
3654 int logd;
3655 int prec = GET_MODE_PRECISION (mode);
3656
3657 logd = floor_log2 (d);
3658 result = gen_reg_rtx (mode);
3659
3660 /* Avoid conditional branches when they're expensive. */
3661 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3662 && optimize_insn_for_speed_p ())
3663 {
3664 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3665 mode, 0, -1);
3666 if (signmask)
3667 {
3668 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3669 signmask = force_reg (mode, signmask);
3670 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3671
3672 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3673 which instruction sequence to use. If logical right shifts
3674 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3675 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3676
3677 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3678 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3679 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3680 > COSTS_N_INSNS (2)))
3681 {
3682 temp = expand_binop (mode, xor_optab, op0, signmask,
3683 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684 temp = expand_binop (mode, sub_optab, temp, signmask,
3685 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3686 temp = expand_binop (mode, and_optab, temp,
3687 gen_int_mode (masklow, mode),
3688 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3689 temp = expand_binop (mode, xor_optab, temp, signmask,
3690 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3691 temp = expand_binop (mode, sub_optab, temp, signmask,
3692 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3693 }
3694 else
3695 {
3696 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3697 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3698 signmask = force_reg (mode, signmask);
3699
3700 temp = expand_binop (mode, add_optab, op0, signmask,
3701 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3702 temp = expand_binop (mode, and_optab, temp,
3703 gen_int_mode (masklow, mode),
3704 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3705 temp = expand_binop (mode, sub_optab, temp, signmask,
3706 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3707 }
3708 return temp;
3709 }
3710 }
3711
3712 /* Mask contains the mode's signbit and the significant bits of the
3713 modulus. By including the signbit in the operation, many targets
3714 can avoid an explicit compare operation in the following comparison
3715 against zero. */
3716 wide_int mask = wi::mask (logd, false, prec);
3717 mask = wi::set_bit (mask, prec - 1);
3718
3719 temp = expand_binop (mode, and_optab, op0,
3720 immed_wide_int_const (mask, mode),
3721 result, 1, OPTAB_LIB_WIDEN);
3722 if (temp != result)
3723 emit_move_insn (result, temp);
3724
3725 label = gen_label_rtx ();
3726 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3727
3728 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3729 0, OPTAB_LIB_WIDEN);
3730
3731 mask = wi::mask (logd, true, prec);
3732 temp = expand_binop (mode, ior_optab, temp,
3733 immed_wide_int_const (mask, mode),
3734 result, 1, OPTAB_LIB_WIDEN);
3735 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3736 0, OPTAB_LIB_WIDEN);
3737 if (temp != result)
3738 emit_move_insn (result, temp);
3739 emit_label (label);
3740 return result;
3741 }
3742
3743 /* Expand signed division of OP0 by a power of two D in mode MODE.
3744 This routine is only called for positive values of D. */
3745
3746 static rtx
3747 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3748 {
3749 rtx temp, label;
3750 int logd;
3751
3752 logd = floor_log2 (d);
3753
3754 if (d == 2
3755 && BRANCH_COST (optimize_insn_for_speed_p (),
3756 false) >= 1)
3757 {
3758 temp = gen_reg_rtx (mode);
3759 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3760 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3761 0, OPTAB_LIB_WIDEN);
3762 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3763 }
3764
3765 #ifdef HAVE_conditional_move
3766 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3767 >= 2)
3768 {
3769 rtx temp2;
3770
3771 start_sequence ();
3772 temp2 = copy_to_mode_reg (mode, op0);
3773 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3774 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3775 temp = force_reg (mode, temp);
3776
3777 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3778 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3779 mode, temp, temp2, mode, 0);
3780 if (temp2)
3781 {
3782 rtx seq = get_insns ();
3783 end_sequence ();
3784 emit_insn (seq);
3785 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3786 }
3787 end_sequence ();
3788 }
3789 #endif
3790
3791 if (BRANCH_COST (optimize_insn_for_speed_p (),
3792 false) >= 2)
3793 {
3794 int ushift = GET_MODE_BITSIZE (mode) - logd;
3795
3796 temp = gen_reg_rtx (mode);
3797 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3798 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3799 > COSTS_N_INSNS (1))
3800 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3801 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3802 else
3803 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3804 ushift, NULL_RTX, 1);
3805 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3806 0, OPTAB_LIB_WIDEN);
3807 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3808 }
3809
3810 label = gen_label_rtx ();
3811 temp = copy_to_mode_reg (mode, op0);
3812 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3813 expand_inc (temp, gen_int_mode (d - 1, mode));
3814 emit_label (label);
3815 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3816 }
3817 \f
3818 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3819 if that is convenient, and returning where the result is.
3820 You may request either the quotient or the remainder as the result;
3821 specify REM_FLAG nonzero to get the remainder.
3822
3823 CODE is the expression code for which kind of division this is;
3824 it controls how rounding is done. MODE is the machine mode to use.
3825 UNSIGNEDP nonzero means do unsigned division. */
3826
3827 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3828 and then correct it by or'ing in missing high bits
3829 if result of ANDI is nonzero.
3830 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3831 This could optimize to a bfexts instruction.
3832 But C doesn't use these operations, so their optimizations are
3833 left for later. */
3834 /* ??? For modulo, we don't actually need the highpart of the first product,
3835 the low part will do nicely. And for small divisors, the second multiply
3836 can also be a low-part only multiply or even be completely left out.
3837 E.g. to calculate the remainder of a division by 3 with a 32 bit
3838 multiply, multiply with 0x55555556 and extract the upper two bits;
3839 the result is exact for inputs up to 0x1fffffff.
3840 The input range can be reduced by using cross-sum rules.
3841 For odd divisors >= 3, the following table gives right shift counts
3842 so that if a number is shifted by an integer multiple of the given
3843 amount, the remainder stays the same:
3844 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3845 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3846 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3847 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3848 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3849
3850 Cross-sum rules for even numbers can be derived by leaving as many bits
3851 to the right alone as the divisor has zeros to the right.
3852 E.g. if x is an unsigned 32 bit number:
3853 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3854 */
3855
3856 rtx
3857 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3858 rtx op0, rtx op1, rtx target, int unsignedp)
3859 {
3860 enum machine_mode compute_mode;
3861 rtx tquotient;
3862 rtx quotient = 0, remainder = 0;
3863 rtx last;
3864 int size;
3865 rtx insn;
3866 optab optab1, optab2;
3867 int op1_is_constant, op1_is_pow2 = 0;
3868 int max_cost, extra_cost;
3869 static HOST_WIDE_INT last_div_const = 0;
3870 bool speed = optimize_insn_for_speed_p ();
3871
3872 op1_is_constant = CONST_INT_P (op1);
3873 if (op1_is_constant)
3874 {
3875 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3876 if (unsignedp)
3877 ext_op1 &= GET_MODE_MASK (mode);
3878 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3879 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3880 }
3881
3882 /*
3883 This is the structure of expand_divmod:
3884
3885 First comes code to fix up the operands so we can perform the operations
3886 correctly and efficiently.
3887
3888 Second comes a switch statement with code specific for each rounding mode.
3889 For some special operands this code emits all RTL for the desired
3890 operation, for other cases, it generates only a quotient and stores it in
3891 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3892 to indicate that it has not done anything.
3893
3894 Last comes code that finishes the operation. If QUOTIENT is set and
3895 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3896 QUOTIENT is not set, it is computed using trunc rounding.
3897
3898 We try to generate special code for division and remainder when OP1 is a
3899 constant. If |OP1| = 2**n we can use shifts and some other fast
3900 operations. For other values of OP1, we compute a carefully selected
3901 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3902 by m.
3903
3904 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3905 half of the product. Different strategies for generating the product are
3906 implemented in expmed_mult_highpart.
3907
3908 If what we actually want is the remainder, we generate that by another
3909 by-constant multiplication and a subtraction. */
3910
3911 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3912 code below will malfunction if we are, so check here and handle
3913 the special case if so. */
3914 if (op1 == const1_rtx)
3915 return rem_flag ? const0_rtx : op0;
3916
3917 /* When dividing by -1, we could get an overflow.
3918 negv_optab can handle overflows. */
3919 if (! unsignedp && op1 == constm1_rtx)
3920 {
3921 if (rem_flag)
3922 return const0_rtx;
3923 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3924 ? negv_optab : neg_optab, op0, target, 0);
3925 }
3926
3927 if (target
3928 /* Don't use the function value register as a target
3929 since we have to read it as well as write it,
3930 and function-inlining gets confused by this. */
3931 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3932 /* Don't clobber an operand while doing a multi-step calculation. */
3933 || ((rem_flag || op1_is_constant)
3934 && (reg_mentioned_p (target, op0)
3935 || (MEM_P (op0) && MEM_P (target))))
3936 || reg_mentioned_p (target, op1)
3937 || (MEM_P (op1) && MEM_P (target))))
3938 target = 0;
3939
3940 /* Get the mode in which to perform this computation. Normally it will
3941 be MODE, but sometimes we can't do the desired operation in MODE.
3942 If so, pick a wider mode in which we can do the operation. Convert
3943 to that mode at the start to avoid repeated conversions.
3944
3945 First see what operations we need. These depend on the expression
3946 we are evaluating. (We assume that divxx3 insns exist under the
3947 same conditions that modxx3 insns and that these insns don't normally
3948 fail. If these assumptions are not correct, we may generate less
3949 efficient code in some cases.)
3950
3951 Then see if we find a mode in which we can open-code that operation
3952 (either a division, modulus, or shift). Finally, check for the smallest
3953 mode for which we can do the operation with a library call. */
3954
3955 /* We might want to refine this now that we have division-by-constant
3956 optimization. Since expmed_mult_highpart tries so many variants, it is
3957 not straightforward to generalize this. Maybe we should make an array
3958 of possible modes in init_expmed? Save this for GCC 2.7. */
3959
3960 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3961 ? (unsignedp ? lshr_optab : ashr_optab)
3962 : (unsignedp ? udiv_optab : sdiv_optab));
3963 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3964 ? optab1
3965 : (unsignedp ? udivmod_optab : sdivmod_optab));
3966
3967 for (compute_mode = mode; compute_mode != VOIDmode;
3968 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3969 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3970 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3971 break;
3972
3973 if (compute_mode == VOIDmode)
3974 for (compute_mode = mode; compute_mode != VOIDmode;
3975 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3976 if (optab_libfunc (optab1, compute_mode)
3977 || optab_libfunc (optab2, compute_mode))
3978 break;
3979
3980 /* If we still couldn't find a mode, use MODE, but expand_binop will
3981 probably die. */
3982 if (compute_mode == VOIDmode)
3983 compute_mode = mode;
3984
3985 if (target && GET_MODE (target) == compute_mode)
3986 tquotient = target;
3987 else
3988 tquotient = gen_reg_rtx (compute_mode);
3989
3990 size = GET_MODE_BITSIZE (compute_mode);
3991 #if 0
3992 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3993 (mode), and thereby get better code when OP1 is a constant. Do that
3994 later. It will require going over all usages of SIZE below. */
3995 size = GET_MODE_BITSIZE (mode);
3996 #endif
3997
3998 /* Only deduct something for a REM if the last divide done was
3999 for a different constant. Then set the constant of the last
4000 divide. */
4001 max_cost = (unsignedp
4002 ? udiv_cost (speed, compute_mode)
4003 : sdiv_cost (speed, compute_mode));
4004 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4005 && INTVAL (op1) == last_div_const))
4006 max_cost -= (mul_cost (speed, compute_mode)
4007 + add_cost (speed, compute_mode));
4008
4009 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4010
4011 /* Now convert to the best mode to use. */
4012 if (compute_mode != mode)
4013 {
4014 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4015 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4016
4017 /* convert_modes may have placed op1 into a register, so we
4018 must recompute the following. */
4019 op1_is_constant = CONST_INT_P (op1);
4020 op1_is_pow2 = (op1_is_constant
4021 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4022 || (! unsignedp
4023 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4024 }
4025
4026 /* If one of the operands is a volatile MEM, copy it into a register. */
4027
4028 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4029 op0 = force_reg (compute_mode, op0);
4030 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4031 op1 = force_reg (compute_mode, op1);
4032
4033 /* If we need the remainder or if OP1 is constant, we need to
4034 put OP0 in a register in case it has any queued subexpressions. */
4035 if (rem_flag || op1_is_constant)
4036 op0 = force_reg (compute_mode, op0);
4037
4038 last = get_last_insn ();
4039
4040 /* Promote floor rounding to trunc rounding for unsigned operations. */
4041 if (unsignedp)
4042 {
4043 if (code == FLOOR_DIV_EXPR)
4044 code = TRUNC_DIV_EXPR;
4045 if (code == FLOOR_MOD_EXPR)
4046 code = TRUNC_MOD_EXPR;
4047 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4048 code = TRUNC_DIV_EXPR;
4049 }
4050
4051 if (op1 != const0_rtx)
4052 switch (code)
4053 {
4054 case TRUNC_MOD_EXPR:
4055 case TRUNC_DIV_EXPR:
4056 if (op1_is_constant)
4057 {
4058 if (unsignedp)
4059 {
4060 unsigned HOST_WIDE_INT mh, ml;
4061 int pre_shift, post_shift;
4062 int dummy;
4063 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4064 & GET_MODE_MASK (compute_mode));
4065
4066 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4067 {
4068 pre_shift = floor_log2 (d);
4069 if (rem_flag)
4070 {
4071 unsigned HOST_WIDE_INT mask
4072 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4073 remainder
4074 = expand_binop (compute_mode, and_optab, op0,
4075 gen_int_mode (mask, compute_mode),
4076 remainder, 1,
4077 OPTAB_LIB_WIDEN);
4078 if (remainder)
4079 return gen_lowpart (mode, remainder);
4080 }
4081 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4082 pre_shift, tquotient, 1);
4083 }
4084 else if (size <= HOST_BITS_PER_WIDE_INT)
4085 {
4086 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4087 {
4088 /* Most significant bit of divisor is set; emit an scc
4089 insn. */
4090 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4091 compute_mode, 1, 1);
4092 }
4093 else
4094 {
4095 /* Find a suitable multiplier and right shift count
4096 instead of multiplying with D. */
4097
4098 mh = choose_multiplier (d, size, size,
4099 &ml, &post_shift, &dummy);
4100
4101 /* If the suggested multiplier is more than SIZE bits,
4102 we can do better for even divisors, using an
4103 initial right shift. */
4104 if (mh != 0 && (d & 1) == 0)
4105 {
4106 pre_shift = floor_log2 (d & -d);
4107 mh = choose_multiplier (d >> pre_shift, size,
4108 size - pre_shift,
4109 &ml, &post_shift, &dummy);
4110 gcc_assert (!mh);
4111 }
4112 else
4113 pre_shift = 0;
4114
4115 if (mh != 0)
4116 {
4117 rtx t1, t2, t3, t4;
4118
4119 if (post_shift - 1 >= BITS_PER_WORD)
4120 goto fail1;
4121
4122 extra_cost
4123 = (shift_cost (speed, compute_mode, post_shift - 1)
4124 + shift_cost (speed, compute_mode, 1)
4125 + 2 * add_cost (speed, compute_mode));
4126 t1 = expmed_mult_highpart
4127 (compute_mode, op0,
4128 gen_int_mode (ml, compute_mode),
4129 NULL_RTX, 1, max_cost - extra_cost);
4130 if (t1 == 0)
4131 goto fail1;
4132 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4133 op0, t1),
4134 NULL_RTX);
4135 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4136 t2, 1, NULL_RTX, 1);
4137 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4138 t1, t3),
4139 NULL_RTX);
4140 quotient = expand_shift
4141 (RSHIFT_EXPR, compute_mode, t4,
4142 post_shift - 1, tquotient, 1);
4143 }
4144 else
4145 {
4146 rtx t1, t2;
4147
4148 if (pre_shift >= BITS_PER_WORD
4149 || post_shift >= BITS_PER_WORD)
4150 goto fail1;
4151
4152 t1 = expand_shift
4153 (RSHIFT_EXPR, compute_mode, op0,
4154 pre_shift, NULL_RTX, 1);
4155 extra_cost
4156 = (shift_cost (speed, compute_mode, pre_shift)
4157 + shift_cost (speed, compute_mode, post_shift));
4158 t2 = expmed_mult_highpart
4159 (compute_mode, t1,
4160 gen_int_mode (ml, compute_mode),
4161 NULL_RTX, 1, max_cost - extra_cost);
4162 if (t2 == 0)
4163 goto fail1;
4164 quotient = expand_shift
4165 (RSHIFT_EXPR, compute_mode, t2,
4166 post_shift, tquotient, 1);
4167 }
4168 }
4169 }
4170 else /* Too wide mode to use tricky code */
4171 break;
4172
4173 insn = get_last_insn ();
4174 if (insn != last)
4175 set_dst_reg_note (insn, REG_EQUAL,
4176 gen_rtx_UDIV (compute_mode, op0, op1),
4177 quotient);
4178 }
4179 else /* TRUNC_DIV, signed */
4180 {
4181 unsigned HOST_WIDE_INT ml;
4182 int lgup, post_shift;
4183 rtx mlr;
4184 HOST_WIDE_INT d = INTVAL (op1);
4185 unsigned HOST_WIDE_INT abs_d;
4186
4187 /* Since d might be INT_MIN, we have to cast to
4188 unsigned HOST_WIDE_INT before negating to avoid
4189 undefined signed overflow. */
4190 abs_d = (d >= 0
4191 ? (unsigned HOST_WIDE_INT) d
4192 : - (unsigned HOST_WIDE_INT) d);
4193
4194 /* n rem d = n rem -d */
4195 if (rem_flag && d < 0)
4196 {
4197 d = abs_d;
4198 op1 = gen_int_mode (abs_d, compute_mode);
4199 }
4200
4201 if (d == 1)
4202 quotient = op0;
4203 else if (d == -1)
4204 quotient = expand_unop (compute_mode, neg_optab, op0,
4205 tquotient, 0);
4206 else if (HOST_BITS_PER_WIDE_INT >= size
4207 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4208 {
4209 /* This case is not handled correctly below. */
4210 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4211 compute_mode, 1, 1);
4212 if (quotient == 0)
4213 goto fail1;
4214 }
4215 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4216 && (rem_flag
4217 ? smod_pow2_cheap (speed, compute_mode)
4218 : sdiv_pow2_cheap (speed, compute_mode))
4219 /* We assume that cheap metric is true if the
4220 optab has an expander for this mode. */
4221 && ((optab_handler ((rem_flag ? smod_optab
4222 : sdiv_optab),
4223 compute_mode)
4224 != CODE_FOR_nothing)
4225 || (optab_handler (sdivmod_optab,
4226 compute_mode)
4227 != CODE_FOR_nothing)))
4228 ;
4229 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4230 {
4231 if (rem_flag)
4232 {
4233 remainder = expand_smod_pow2 (compute_mode, op0, d);
4234 if (remainder)
4235 return gen_lowpart (mode, remainder);
4236 }
4237
4238 if (sdiv_pow2_cheap (speed, compute_mode)
4239 && ((optab_handler (sdiv_optab, compute_mode)
4240 != CODE_FOR_nothing)
4241 || (optab_handler (sdivmod_optab, compute_mode)
4242 != CODE_FOR_nothing)))
4243 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4244 compute_mode, op0,
4245 gen_int_mode (abs_d,
4246 compute_mode),
4247 NULL_RTX, 0);
4248 else
4249 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4250
4251 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4252 negate the quotient. */
4253 if (d < 0)
4254 {
4255 insn = get_last_insn ();
4256 if (insn != last
4257 && abs_d < ((unsigned HOST_WIDE_INT) 1
4258 << (HOST_BITS_PER_WIDE_INT - 1)))
4259 set_dst_reg_note (insn, REG_EQUAL,
4260 gen_rtx_DIV (compute_mode, op0,
4261 gen_int_mode
4262 (abs_d,
4263 compute_mode)),
4264 quotient);
4265
4266 quotient = expand_unop (compute_mode, neg_optab,
4267 quotient, quotient, 0);
4268 }
4269 }
4270 else if (size <= HOST_BITS_PER_WIDE_INT)
4271 {
4272 choose_multiplier (abs_d, size, size - 1,
4273 &ml, &post_shift, &lgup);
4274 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4275 {
4276 rtx t1, t2, t3;
4277
4278 if (post_shift >= BITS_PER_WORD
4279 || size - 1 >= BITS_PER_WORD)
4280 goto fail1;
4281
4282 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4283 + shift_cost (speed, compute_mode, size - 1)
4284 + add_cost (speed, compute_mode));
4285 t1 = expmed_mult_highpart
4286 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4287 NULL_RTX, 0, max_cost - extra_cost);
4288 if (t1 == 0)
4289 goto fail1;
4290 t2 = expand_shift
4291 (RSHIFT_EXPR, compute_mode, t1,
4292 post_shift, NULL_RTX, 0);
4293 t3 = expand_shift
4294 (RSHIFT_EXPR, compute_mode, op0,
4295 size - 1, NULL_RTX, 0);
4296 if (d < 0)
4297 quotient
4298 = force_operand (gen_rtx_MINUS (compute_mode,
4299 t3, t2),
4300 tquotient);
4301 else
4302 quotient
4303 = force_operand (gen_rtx_MINUS (compute_mode,
4304 t2, t3),
4305 tquotient);
4306 }
4307 else
4308 {
4309 rtx t1, t2, t3, t4;
4310
4311 if (post_shift >= BITS_PER_WORD
4312 || size - 1 >= BITS_PER_WORD)
4313 goto fail1;
4314
4315 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4316 mlr = gen_int_mode (ml, compute_mode);
4317 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4318 + shift_cost (speed, compute_mode, size - 1)
4319 + 2 * add_cost (speed, compute_mode));
4320 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4321 NULL_RTX, 0,
4322 max_cost - extra_cost);
4323 if (t1 == 0)
4324 goto fail1;
4325 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4326 t1, op0),
4327 NULL_RTX);
4328 t3 = expand_shift
4329 (RSHIFT_EXPR, compute_mode, t2,
4330 post_shift, NULL_RTX, 0);
4331 t4 = expand_shift
4332 (RSHIFT_EXPR, compute_mode, op0,
4333 size - 1, NULL_RTX, 0);
4334 if (d < 0)
4335 quotient
4336 = force_operand (gen_rtx_MINUS (compute_mode,
4337 t4, t3),
4338 tquotient);
4339 else
4340 quotient
4341 = force_operand (gen_rtx_MINUS (compute_mode,
4342 t3, t4),
4343 tquotient);
4344 }
4345 }
4346 else /* Too wide mode to use tricky code */
4347 break;
4348
4349 insn = get_last_insn ();
4350 if (insn != last)
4351 set_dst_reg_note (insn, REG_EQUAL,
4352 gen_rtx_DIV (compute_mode, op0, op1),
4353 quotient);
4354 }
4355 break;
4356 }
4357 fail1:
4358 delete_insns_since (last);
4359 break;
4360
4361 case FLOOR_DIV_EXPR:
4362 case FLOOR_MOD_EXPR:
4363 /* We will come here only for signed operations. */
4364 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4365 {
4366 unsigned HOST_WIDE_INT mh, ml;
4367 int pre_shift, lgup, post_shift;
4368 HOST_WIDE_INT d = INTVAL (op1);
4369
4370 if (d > 0)
4371 {
4372 /* We could just as easily deal with negative constants here,
4373 but it does not seem worth the trouble for GCC 2.6. */
4374 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4375 {
4376 pre_shift = floor_log2 (d);
4377 if (rem_flag)
4378 {
4379 unsigned HOST_WIDE_INT mask
4380 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4381 remainder = expand_binop
4382 (compute_mode, and_optab, op0,
4383 gen_int_mode (mask, compute_mode),
4384 remainder, 0, OPTAB_LIB_WIDEN);
4385 if (remainder)
4386 return gen_lowpart (mode, remainder);
4387 }
4388 quotient = expand_shift
4389 (RSHIFT_EXPR, compute_mode, op0,
4390 pre_shift, tquotient, 0);
4391 }
4392 else
4393 {
4394 rtx t1, t2, t3, t4;
4395
4396 mh = choose_multiplier (d, size, size - 1,
4397 &ml, &post_shift, &lgup);
4398 gcc_assert (!mh);
4399
4400 if (post_shift < BITS_PER_WORD
4401 && size - 1 < BITS_PER_WORD)
4402 {
4403 t1 = expand_shift
4404 (RSHIFT_EXPR, compute_mode, op0,
4405 size - 1, NULL_RTX, 0);
4406 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4407 NULL_RTX, 0, OPTAB_WIDEN);
4408 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4409 + shift_cost (speed, compute_mode, size - 1)
4410 + 2 * add_cost (speed, compute_mode));
4411 t3 = expmed_mult_highpart
4412 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4413 NULL_RTX, 1, max_cost - extra_cost);
4414 if (t3 != 0)
4415 {
4416 t4 = expand_shift
4417 (RSHIFT_EXPR, compute_mode, t3,
4418 post_shift, NULL_RTX, 1);
4419 quotient = expand_binop (compute_mode, xor_optab,
4420 t4, t1, tquotient, 0,
4421 OPTAB_WIDEN);
4422 }
4423 }
4424 }
4425 }
4426 else
4427 {
4428 rtx nsign, t1, t2, t3, t4;
4429 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4430 op0, constm1_rtx), NULL_RTX);
4431 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4432 0, OPTAB_WIDEN);
4433 nsign = expand_shift
4434 (RSHIFT_EXPR, compute_mode, t2,
4435 size - 1, NULL_RTX, 0);
4436 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4437 NULL_RTX);
4438 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4439 NULL_RTX, 0);
4440 if (t4)
4441 {
4442 rtx t5;
4443 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4444 NULL_RTX, 0);
4445 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4446 t4, t5),
4447 tquotient);
4448 }
4449 }
4450 }
4451
4452 if (quotient != 0)
4453 break;
4454 delete_insns_since (last);
4455
4456 /* Try using an instruction that produces both the quotient and
4457 remainder, using truncation. We can easily compensate the quotient
4458 or remainder to get floor rounding, once we have the remainder.
4459 Notice that we compute also the final remainder value here,
4460 and return the result right away. */
4461 if (target == 0 || GET_MODE (target) != compute_mode)
4462 target = gen_reg_rtx (compute_mode);
4463
4464 if (rem_flag)
4465 {
4466 remainder
4467 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4468 quotient = gen_reg_rtx (compute_mode);
4469 }
4470 else
4471 {
4472 quotient
4473 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4474 remainder = gen_reg_rtx (compute_mode);
4475 }
4476
4477 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4478 quotient, remainder, 0))
4479 {
4480 /* This could be computed with a branch-less sequence.
4481 Save that for later. */
4482 rtx tem;
4483 rtx label = gen_label_rtx ();
4484 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4485 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4486 NULL_RTX, 0, OPTAB_WIDEN);
4487 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4488 expand_dec (quotient, const1_rtx);
4489 expand_inc (remainder, op1);
4490 emit_label (label);
4491 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4492 }
4493
4494 /* No luck with division elimination or divmod. Have to do it
4495 by conditionally adjusting op0 *and* the result. */
4496 {
4497 rtx label1, label2, label3, label4, label5;
4498 rtx adjusted_op0;
4499 rtx tem;
4500
4501 quotient = gen_reg_rtx (compute_mode);
4502 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4503 label1 = gen_label_rtx ();
4504 label2 = gen_label_rtx ();
4505 label3 = gen_label_rtx ();
4506 label4 = gen_label_rtx ();
4507 label5 = gen_label_rtx ();
4508 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4509 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4510 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4511 quotient, 0, OPTAB_LIB_WIDEN);
4512 if (tem != quotient)
4513 emit_move_insn (quotient, tem);
4514 emit_jump_insn (gen_jump (label5));
4515 emit_barrier ();
4516 emit_label (label1);
4517 expand_inc (adjusted_op0, const1_rtx);
4518 emit_jump_insn (gen_jump (label4));
4519 emit_barrier ();
4520 emit_label (label2);
4521 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4522 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4523 quotient, 0, OPTAB_LIB_WIDEN);
4524 if (tem != quotient)
4525 emit_move_insn (quotient, tem);
4526 emit_jump_insn (gen_jump (label5));
4527 emit_barrier ();
4528 emit_label (label3);
4529 expand_dec (adjusted_op0, const1_rtx);
4530 emit_label (label4);
4531 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4532 quotient, 0, OPTAB_LIB_WIDEN);
4533 if (tem != quotient)
4534 emit_move_insn (quotient, tem);
4535 expand_dec (quotient, const1_rtx);
4536 emit_label (label5);
4537 }
4538 break;
4539
4540 case CEIL_DIV_EXPR:
4541 case CEIL_MOD_EXPR:
4542 if (unsignedp)
4543 {
4544 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4545 {
4546 rtx t1, t2, t3;
4547 unsigned HOST_WIDE_INT d = INTVAL (op1);
4548 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4549 floor_log2 (d), tquotient, 1);
4550 t2 = expand_binop (compute_mode, and_optab, op0,
4551 gen_int_mode (d - 1, compute_mode),
4552 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4553 t3 = gen_reg_rtx (compute_mode);
4554 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4555 compute_mode, 1, 1);
4556 if (t3 == 0)
4557 {
4558 rtx lab;
4559 lab = gen_label_rtx ();
4560 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4561 expand_inc (t1, const1_rtx);
4562 emit_label (lab);
4563 quotient = t1;
4564 }
4565 else
4566 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4567 t1, t3),
4568 tquotient);
4569 break;
4570 }
4571
4572 /* Try using an instruction that produces both the quotient and
4573 remainder, using truncation. We can easily compensate the
4574 quotient or remainder to get ceiling rounding, once we have the
4575 remainder. Notice that we compute also the final remainder
4576 value here, and return the result right away. */
4577 if (target == 0 || GET_MODE (target) != compute_mode)
4578 target = gen_reg_rtx (compute_mode);
4579
4580 if (rem_flag)
4581 {
4582 remainder = (REG_P (target)
4583 ? target : gen_reg_rtx (compute_mode));
4584 quotient = gen_reg_rtx (compute_mode);
4585 }
4586 else
4587 {
4588 quotient = (REG_P (target)
4589 ? target : gen_reg_rtx (compute_mode));
4590 remainder = gen_reg_rtx (compute_mode);
4591 }
4592
4593 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4594 remainder, 1))
4595 {
4596 /* This could be computed with a branch-less sequence.
4597 Save that for later. */
4598 rtx label = gen_label_rtx ();
4599 do_cmp_and_jump (remainder, const0_rtx, EQ,
4600 compute_mode, label);
4601 expand_inc (quotient, const1_rtx);
4602 expand_dec (remainder, op1);
4603 emit_label (label);
4604 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4605 }
4606
4607 /* No luck with division elimination or divmod. Have to do it
4608 by conditionally adjusting op0 *and* the result. */
4609 {
4610 rtx label1, label2;
4611 rtx adjusted_op0, tem;
4612
4613 quotient = gen_reg_rtx (compute_mode);
4614 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4615 label1 = gen_label_rtx ();
4616 label2 = gen_label_rtx ();
4617 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4618 compute_mode, label1);
4619 emit_move_insn (quotient, const0_rtx);
4620 emit_jump_insn (gen_jump (label2));
4621 emit_barrier ();
4622 emit_label (label1);
4623 expand_dec (adjusted_op0, const1_rtx);
4624 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4625 quotient, 1, OPTAB_LIB_WIDEN);
4626 if (tem != quotient)
4627 emit_move_insn (quotient, tem);
4628 expand_inc (quotient, const1_rtx);
4629 emit_label (label2);
4630 }
4631 }
4632 else /* signed */
4633 {
4634 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4635 && INTVAL (op1) >= 0)
4636 {
4637 /* This is extremely similar to the code for the unsigned case
4638 above. For 2.7 we should merge these variants, but for
4639 2.6.1 I don't want to touch the code for unsigned since that
4640 get used in C. The signed case will only be used by other
4641 languages (Ada). */
4642
4643 rtx t1, t2, t3;
4644 unsigned HOST_WIDE_INT d = INTVAL (op1);
4645 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4646 floor_log2 (d), tquotient, 0);
4647 t2 = expand_binop (compute_mode, and_optab, op0,
4648 gen_int_mode (d - 1, compute_mode),
4649 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4650 t3 = gen_reg_rtx (compute_mode);
4651 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4652 compute_mode, 1, 1);
4653 if (t3 == 0)
4654 {
4655 rtx lab;
4656 lab = gen_label_rtx ();
4657 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4658 expand_inc (t1, const1_rtx);
4659 emit_label (lab);
4660 quotient = t1;
4661 }
4662 else
4663 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4664 t1, t3),
4665 tquotient);
4666 break;
4667 }
4668
4669 /* Try using an instruction that produces both the quotient and
4670 remainder, using truncation. We can easily compensate the
4671 quotient or remainder to get ceiling rounding, once we have the
4672 remainder. Notice that we compute also the final remainder
4673 value here, and return the result right away. */
4674 if (target == 0 || GET_MODE (target) != compute_mode)
4675 target = gen_reg_rtx (compute_mode);
4676 if (rem_flag)
4677 {
4678 remainder= (REG_P (target)
4679 ? target : gen_reg_rtx (compute_mode));
4680 quotient = gen_reg_rtx (compute_mode);
4681 }
4682 else
4683 {
4684 quotient = (REG_P (target)
4685 ? target : gen_reg_rtx (compute_mode));
4686 remainder = gen_reg_rtx (compute_mode);
4687 }
4688
4689 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4690 remainder, 0))
4691 {
4692 /* This could be computed with a branch-less sequence.
4693 Save that for later. */
4694 rtx tem;
4695 rtx label = gen_label_rtx ();
4696 do_cmp_and_jump (remainder, const0_rtx, EQ,
4697 compute_mode, label);
4698 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4699 NULL_RTX, 0, OPTAB_WIDEN);
4700 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4701 expand_inc (quotient, const1_rtx);
4702 expand_dec (remainder, op1);
4703 emit_label (label);
4704 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4705 }
4706
4707 /* No luck with division elimination or divmod. Have to do it
4708 by conditionally adjusting op0 *and* the result. */
4709 {
4710 rtx label1, label2, label3, label4, label5;
4711 rtx adjusted_op0;
4712 rtx tem;
4713
4714 quotient = gen_reg_rtx (compute_mode);
4715 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4716 label1 = gen_label_rtx ();
4717 label2 = gen_label_rtx ();
4718 label3 = gen_label_rtx ();
4719 label4 = gen_label_rtx ();
4720 label5 = gen_label_rtx ();
4721 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4722 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4723 compute_mode, label1);
4724 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4725 quotient, 0, OPTAB_LIB_WIDEN);
4726 if (tem != quotient)
4727 emit_move_insn (quotient, tem);
4728 emit_jump_insn (gen_jump (label5));
4729 emit_barrier ();
4730 emit_label (label1);
4731 expand_dec (adjusted_op0, const1_rtx);
4732 emit_jump_insn (gen_jump (label4));
4733 emit_barrier ();
4734 emit_label (label2);
4735 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4736 compute_mode, label3);
4737 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4738 quotient, 0, OPTAB_LIB_WIDEN);
4739 if (tem != quotient)
4740 emit_move_insn (quotient, tem);
4741 emit_jump_insn (gen_jump (label5));
4742 emit_barrier ();
4743 emit_label (label3);
4744 expand_inc (adjusted_op0, const1_rtx);
4745 emit_label (label4);
4746 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4747 quotient, 0, OPTAB_LIB_WIDEN);
4748 if (tem != quotient)
4749 emit_move_insn (quotient, tem);
4750 expand_inc (quotient, const1_rtx);
4751 emit_label (label5);
4752 }
4753 }
4754 break;
4755
4756 case EXACT_DIV_EXPR:
4757 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4758 {
4759 HOST_WIDE_INT d = INTVAL (op1);
4760 unsigned HOST_WIDE_INT ml;
4761 int pre_shift;
4762 rtx t1;
4763
4764 pre_shift = floor_log2 (d & -d);
4765 ml = invert_mod2n (d >> pre_shift, size);
4766 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4767 pre_shift, NULL_RTX, unsignedp);
4768 quotient = expand_mult (compute_mode, t1,
4769 gen_int_mode (ml, compute_mode),
4770 NULL_RTX, 1);
4771
4772 insn = get_last_insn ();
4773 set_dst_reg_note (insn, REG_EQUAL,
4774 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4775 compute_mode, op0, op1),
4776 quotient);
4777 }
4778 break;
4779
4780 case ROUND_DIV_EXPR:
4781 case ROUND_MOD_EXPR:
4782 if (unsignedp)
4783 {
4784 rtx tem;
4785 rtx label;
4786 label = gen_label_rtx ();
4787 quotient = gen_reg_rtx (compute_mode);
4788 remainder = gen_reg_rtx (compute_mode);
4789 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4790 {
4791 rtx tem;
4792 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4793 quotient, 1, OPTAB_LIB_WIDEN);
4794 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4795 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4796 remainder, 1, OPTAB_LIB_WIDEN);
4797 }
4798 tem = plus_constant (compute_mode, op1, -1);
4799 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4800 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4801 expand_inc (quotient, const1_rtx);
4802 expand_dec (remainder, op1);
4803 emit_label (label);
4804 }
4805 else
4806 {
4807 rtx abs_rem, abs_op1, tem, mask;
4808 rtx label;
4809 label = gen_label_rtx ();
4810 quotient = gen_reg_rtx (compute_mode);
4811 remainder = gen_reg_rtx (compute_mode);
4812 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4813 {
4814 rtx tem;
4815 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4816 quotient, 0, OPTAB_LIB_WIDEN);
4817 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4818 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4819 remainder, 0, OPTAB_LIB_WIDEN);
4820 }
4821 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4822 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4823 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4824 1, NULL_RTX, 1);
4825 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4826 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4827 NULL_RTX, 0, OPTAB_WIDEN);
4828 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4829 size - 1, NULL_RTX, 0);
4830 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4831 NULL_RTX, 0, OPTAB_WIDEN);
4832 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4833 NULL_RTX, 0, OPTAB_WIDEN);
4834 expand_inc (quotient, tem);
4835 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4836 NULL_RTX, 0, OPTAB_WIDEN);
4837 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4838 NULL_RTX, 0, OPTAB_WIDEN);
4839 expand_dec (remainder, tem);
4840 emit_label (label);
4841 }
4842 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4843
4844 default:
4845 gcc_unreachable ();
4846 }
4847
4848 if (quotient == 0)
4849 {
4850 if (target && GET_MODE (target) != compute_mode)
4851 target = 0;
4852
4853 if (rem_flag)
4854 {
4855 /* Try to produce the remainder without producing the quotient.
4856 If we seem to have a divmod pattern that does not require widening,
4857 don't try widening here. We should really have a WIDEN argument
4858 to expand_twoval_binop, since what we'd really like to do here is
4859 1) try a mod insn in compute_mode
4860 2) try a divmod insn in compute_mode
4861 3) try a div insn in compute_mode and multiply-subtract to get
4862 remainder
4863 4) try the same things with widening allowed. */
4864 remainder
4865 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4866 op0, op1, target,
4867 unsignedp,
4868 ((optab_handler (optab2, compute_mode)
4869 != CODE_FOR_nothing)
4870 ? OPTAB_DIRECT : OPTAB_WIDEN));
4871 if (remainder == 0)
4872 {
4873 /* No luck there. Can we do remainder and divide at once
4874 without a library call? */
4875 remainder = gen_reg_rtx (compute_mode);
4876 if (! expand_twoval_binop ((unsignedp
4877 ? udivmod_optab
4878 : sdivmod_optab),
4879 op0, op1,
4880 NULL_RTX, remainder, unsignedp))
4881 remainder = 0;
4882 }
4883
4884 if (remainder)
4885 return gen_lowpart (mode, remainder);
4886 }
4887
4888 /* Produce the quotient. Try a quotient insn, but not a library call.
4889 If we have a divmod in this mode, use it in preference to widening
4890 the div (for this test we assume it will not fail). Note that optab2
4891 is set to the one of the two optabs that the call below will use. */
4892 quotient
4893 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4894 op0, op1, rem_flag ? NULL_RTX : target,
4895 unsignedp,
4896 ((optab_handler (optab2, compute_mode)
4897 != CODE_FOR_nothing)
4898 ? OPTAB_DIRECT : OPTAB_WIDEN));
4899
4900 if (quotient == 0)
4901 {
4902 /* No luck there. Try a quotient-and-remainder insn,
4903 keeping the quotient alone. */
4904 quotient = gen_reg_rtx (compute_mode);
4905 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4906 op0, op1,
4907 quotient, NULL_RTX, unsignedp))
4908 {
4909 quotient = 0;
4910 if (! rem_flag)
4911 /* Still no luck. If we are not computing the remainder,
4912 use a library call for the quotient. */
4913 quotient = sign_expand_binop (compute_mode,
4914 udiv_optab, sdiv_optab,
4915 op0, op1, target,
4916 unsignedp, OPTAB_LIB_WIDEN);
4917 }
4918 }
4919 }
4920
4921 if (rem_flag)
4922 {
4923 if (target && GET_MODE (target) != compute_mode)
4924 target = 0;
4925
4926 if (quotient == 0)
4927 {
4928 /* No divide instruction either. Use library for remainder. */
4929 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4930 op0, op1, target,
4931 unsignedp, OPTAB_LIB_WIDEN);
4932 /* No remainder function. Try a quotient-and-remainder
4933 function, keeping the remainder. */
4934 if (!remainder)
4935 {
4936 remainder = gen_reg_rtx (compute_mode);
4937 if (!expand_twoval_binop_libfunc
4938 (unsignedp ? udivmod_optab : sdivmod_optab,
4939 op0, op1,
4940 NULL_RTX, remainder,
4941 unsignedp ? UMOD : MOD))
4942 remainder = NULL_RTX;
4943 }
4944 }
4945 else
4946 {
4947 /* We divided. Now finish doing X - Y * (X / Y). */
4948 remainder = expand_mult (compute_mode, quotient, op1,
4949 NULL_RTX, unsignedp);
4950 remainder = expand_binop (compute_mode, sub_optab, op0,
4951 remainder, target, unsignedp,
4952 OPTAB_LIB_WIDEN);
4953 }
4954 }
4955
4956 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4957 }
4958 \f
4959 /* Return a tree node with data type TYPE, describing the value of X.
4960 Usually this is an VAR_DECL, if there is no obvious better choice.
4961 X may be an expression, however we only support those expressions
4962 generated by loop.c. */
4963
4964 tree
4965 make_tree (tree type, rtx x)
4966 {
4967 tree t;
4968
4969 switch (GET_CODE (x))
4970 {
4971 case CONST_INT:
4972 case CONST_WIDE_INT:
4973 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4974 return t;
4975
4976 case CONST_DOUBLE:
4977 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4978 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4979 t = wide_int_to_tree (type,
4980 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4981 HOST_BITS_PER_WIDE_INT * 2));
4982 else
4983 {
4984 REAL_VALUE_TYPE d;
4985
4986 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4987 t = build_real (type, d);
4988 }
4989
4990 return t;
4991
4992 case CONST_VECTOR:
4993 {
4994 int units = CONST_VECTOR_NUNITS (x);
4995 tree itype = TREE_TYPE (type);
4996 tree *elts;
4997 int i;
4998
4999 /* Build a tree with vector elements. */
5000 elts = XALLOCAVEC (tree, units);
5001 for (i = units - 1; i >= 0; --i)
5002 {
5003 rtx elt = CONST_VECTOR_ELT (x, i);
5004 elts[i] = make_tree (itype, elt);
5005 }
5006
5007 return build_vector (type, elts);
5008 }
5009
5010 case PLUS:
5011 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5012 make_tree (type, XEXP (x, 1)));
5013
5014 case MINUS:
5015 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5016 make_tree (type, XEXP (x, 1)));
5017
5018 case NEG:
5019 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5020
5021 case MULT:
5022 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5023 make_tree (type, XEXP (x, 1)));
5024
5025 case ASHIFT:
5026 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5027 make_tree (type, XEXP (x, 1)));
5028
5029 case LSHIFTRT:
5030 t = unsigned_type_for (type);
5031 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5032 make_tree (t, XEXP (x, 0)),
5033 make_tree (type, XEXP (x, 1))));
5034
5035 case ASHIFTRT:
5036 t = signed_type_for (type);
5037 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5038 make_tree (t, XEXP (x, 0)),
5039 make_tree (type, XEXP (x, 1))));
5040
5041 case DIV:
5042 if (TREE_CODE (type) != REAL_TYPE)
5043 t = signed_type_for (type);
5044 else
5045 t = type;
5046
5047 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5048 make_tree (t, XEXP (x, 0)),
5049 make_tree (t, XEXP (x, 1))));
5050 case UDIV:
5051 t = unsigned_type_for (type);
5052 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5053 make_tree (t, XEXP (x, 0)),
5054 make_tree (t, XEXP (x, 1))));
5055
5056 case SIGN_EXTEND:
5057 case ZERO_EXTEND:
5058 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5059 GET_CODE (x) == ZERO_EXTEND);
5060 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5061
5062 case CONST:
5063 return make_tree (type, XEXP (x, 0));
5064
5065 case SYMBOL_REF:
5066 t = SYMBOL_REF_DECL (x);
5067 if (t)
5068 return fold_convert (type, build_fold_addr_expr (t));
5069 /* else fall through. */
5070
5071 default:
5072 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5073
5074 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5075 address mode to pointer mode. */
5076 if (POINTER_TYPE_P (type))
5077 x = convert_memory_address_addr_space
5078 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5079
5080 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5081 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5082 t->decl_with_rtl.rtl = x;
5083
5084 return t;
5085 }
5086 }
5087 \f
5088 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5089 and returning TARGET.
5090
5091 If TARGET is 0, a pseudo-register or constant is returned. */
5092
5093 rtx
5094 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5095 {
5096 rtx tem = 0;
5097
5098 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5099 tem = simplify_binary_operation (AND, mode, op0, op1);
5100 if (tem == 0)
5101 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5102
5103 if (target == 0)
5104 target = tem;
5105 else if (tem != target)
5106 emit_move_insn (target, tem);
5107 return target;
5108 }
5109
5110 /* Helper function for emit_store_flag. */
5111 static rtx
5112 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5113 enum machine_mode mode, enum machine_mode compare_mode,
5114 int unsignedp, rtx x, rtx y, int normalizep,
5115 enum machine_mode target_mode)
5116 {
5117 struct expand_operand ops[4];
5118 rtx op0, last, comparison, subtarget;
5119 enum machine_mode result_mode = targetm.cstore_mode (icode);
5120
5121 last = get_last_insn ();
5122 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5123 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5124 if (!x || !y)
5125 {
5126 delete_insns_since (last);
5127 return NULL_RTX;
5128 }
5129
5130 if (target_mode == VOIDmode)
5131 target_mode = result_mode;
5132 if (!target)
5133 target = gen_reg_rtx (target_mode);
5134
5135 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5136
5137 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5138 create_fixed_operand (&ops[1], comparison);
5139 create_fixed_operand (&ops[2], x);
5140 create_fixed_operand (&ops[3], y);
5141 if (!maybe_expand_insn (icode, 4, ops))
5142 {
5143 delete_insns_since (last);
5144 return NULL_RTX;
5145 }
5146 subtarget = ops[0].value;
5147
5148 /* If we are converting to a wider mode, first convert to
5149 TARGET_MODE, then normalize. This produces better combining
5150 opportunities on machines that have a SIGN_EXTRACT when we are
5151 testing a single bit. This mostly benefits the 68k.
5152
5153 If STORE_FLAG_VALUE does not have the sign bit set when
5154 interpreted in MODE, we can do this conversion as unsigned, which
5155 is usually more efficient. */
5156 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5157 {
5158 convert_move (target, subtarget,
5159 val_signbit_known_clear_p (result_mode,
5160 STORE_FLAG_VALUE));
5161 op0 = target;
5162 result_mode = target_mode;
5163 }
5164 else
5165 op0 = subtarget;
5166
5167 /* If we want to keep subexpressions around, don't reuse our last
5168 target. */
5169 if (optimize)
5170 subtarget = 0;
5171
5172 /* Now normalize to the proper value in MODE. Sometimes we don't
5173 have to do anything. */
5174 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5175 ;
5176 /* STORE_FLAG_VALUE might be the most negative number, so write
5177 the comparison this way to avoid a compiler-time warning. */
5178 else if (- normalizep == STORE_FLAG_VALUE)
5179 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5180
5181 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5182 it hard to use a value of just the sign bit due to ANSI integer
5183 constant typing rules. */
5184 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5185 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5186 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5187 normalizep == 1);
5188 else
5189 {
5190 gcc_assert (STORE_FLAG_VALUE & 1);
5191
5192 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5193 if (normalizep == -1)
5194 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5195 }
5196
5197 /* If we were converting to a smaller mode, do the conversion now. */
5198 if (target_mode != result_mode)
5199 {
5200 convert_move (target, op0, 0);
5201 return target;
5202 }
5203 else
5204 return op0;
5205 }
5206
5207
5208 /* A subroutine of emit_store_flag only including "tricks" that do not
5209 need a recursive call. These are kept separate to avoid infinite
5210 loops. */
5211
5212 static rtx
5213 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5214 enum machine_mode mode, int unsignedp, int normalizep,
5215 enum machine_mode target_mode)
5216 {
5217 rtx subtarget;
5218 enum insn_code icode;
5219 enum machine_mode compare_mode;
5220 enum mode_class mclass;
5221 enum rtx_code scode;
5222 rtx tem;
5223
5224 if (unsignedp)
5225 code = unsigned_condition (code);
5226 scode = swap_condition (code);
5227
5228 /* If one operand is constant, make it the second one. Only do this
5229 if the other operand is not constant as well. */
5230
5231 if (swap_commutative_operands_p (op0, op1))
5232 {
5233 tem = op0;
5234 op0 = op1;
5235 op1 = tem;
5236 code = swap_condition (code);
5237 }
5238
5239 if (mode == VOIDmode)
5240 mode = GET_MODE (op0);
5241
5242 /* For some comparisons with 1 and -1, we can convert this to
5243 comparisons with zero. This will often produce more opportunities for
5244 store-flag insns. */
5245
5246 switch (code)
5247 {
5248 case LT:
5249 if (op1 == const1_rtx)
5250 op1 = const0_rtx, code = LE;
5251 break;
5252 case LE:
5253 if (op1 == constm1_rtx)
5254 op1 = const0_rtx, code = LT;
5255 break;
5256 case GE:
5257 if (op1 == const1_rtx)
5258 op1 = const0_rtx, code = GT;
5259 break;
5260 case GT:
5261 if (op1 == constm1_rtx)
5262 op1 = const0_rtx, code = GE;
5263 break;
5264 case GEU:
5265 if (op1 == const1_rtx)
5266 op1 = const0_rtx, code = NE;
5267 break;
5268 case LTU:
5269 if (op1 == const1_rtx)
5270 op1 = const0_rtx, code = EQ;
5271 break;
5272 default:
5273 break;
5274 }
5275
5276 /* If we are comparing a double-word integer with zero or -1, we can
5277 convert the comparison into one involving a single word. */
5278 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5279 && GET_MODE_CLASS (mode) == MODE_INT
5280 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5281 {
5282 if ((code == EQ || code == NE)
5283 && (op1 == const0_rtx || op1 == constm1_rtx))
5284 {
5285 rtx op00, op01;
5286
5287 /* Do a logical OR or AND of the two words and compare the
5288 result. */
5289 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5290 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5291 tem = expand_binop (word_mode,
5292 op1 == const0_rtx ? ior_optab : and_optab,
5293 op00, op01, NULL_RTX, unsignedp,
5294 OPTAB_DIRECT);
5295
5296 if (tem != 0)
5297 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5298 unsignedp, normalizep);
5299 }
5300 else if ((code == LT || code == GE) && op1 == const0_rtx)
5301 {
5302 rtx op0h;
5303
5304 /* If testing the sign bit, can just test on high word. */
5305 op0h = simplify_gen_subreg (word_mode, op0, mode,
5306 subreg_highpart_offset (word_mode,
5307 mode));
5308 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5309 unsignedp, normalizep);
5310 }
5311 else
5312 tem = NULL_RTX;
5313
5314 if (tem)
5315 {
5316 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5317 return tem;
5318 if (!target)
5319 target = gen_reg_rtx (target_mode);
5320
5321 convert_move (target, tem,
5322 !val_signbit_known_set_p (word_mode,
5323 (normalizep ? normalizep
5324 : STORE_FLAG_VALUE)));
5325 return target;
5326 }
5327 }
5328
5329 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5330 complement of A (for GE) and shifting the sign bit to the low bit. */
5331 if (op1 == const0_rtx && (code == LT || code == GE)
5332 && GET_MODE_CLASS (mode) == MODE_INT
5333 && (normalizep || STORE_FLAG_VALUE == 1
5334 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5335 {
5336 subtarget = target;
5337
5338 if (!target)
5339 target_mode = mode;
5340
5341 /* If the result is to be wider than OP0, it is best to convert it
5342 first. If it is to be narrower, it is *incorrect* to convert it
5343 first. */
5344 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5345 {
5346 op0 = convert_modes (target_mode, mode, op0, 0);
5347 mode = target_mode;
5348 }
5349
5350 if (target_mode != mode)
5351 subtarget = 0;
5352
5353 if (code == GE)
5354 op0 = expand_unop (mode, one_cmpl_optab, op0,
5355 ((STORE_FLAG_VALUE == 1 || normalizep)
5356 ? 0 : subtarget), 0);
5357
5358 if (STORE_FLAG_VALUE == 1 || normalizep)
5359 /* If we are supposed to produce a 0/1 value, we want to do
5360 a logical shift from the sign bit to the low-order bit; for
5361 a -1/0 value, we do an arithmetic shift. */
5362 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5363 GET_MODE_BITSIZE (mode) - 1,
5364 subtarget, normalizep != -1);
5365
5366 if (mode != target_mode)
5367 op0 = convert_modes (target_mode, mode, op0, 0);
5368
5369 return op0;
5370 }
5371
5372 mclass = GET_MODE_CLASS (mode);
5373 for (compare_mode = mode; compare_mode != VOIDmode;
5374 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5375 {
5376 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5377 icode = optab_handler (cstore_optab, optab_mode);
5378 if (icode != CODE_FOR_nothing)
5379 {
5380 do_pending_stack_adjust ();
5381 tem = emit_cstore (target, icode, code, mode, compare_mode,
5382 unsignedp, op0, op1, normalizep, target_mode);
5383 if (tem)
5384 return tem;
5385
5386 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5387 {
5388 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5389 unsignedp, op1, op0, normalizep, target_mode);
5390 if (tem)
5391 return tem;
5392 }
5393 break;
5394 }
5395 }
5396
5397 return 0;
5398 }
5399
5400 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5401 and storing in TARGET. Normally return TARGET.
5402 Return 0 if that cannot be done.
5403
5404 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5405 it is VOIDmode, they cannot both be CONST_INT.
5406
5407 UNSIGNEDP is for the case where we have to widen the operands
5408 to perform the operation. It says to use zero-extension.
5409
5410 NORMALIZEP is 1 if we should convert the result to be either zero
5411 or one. Normalize is -1 if we should convert the result to be
5412 either zero or -1. If NORMALIZEP is zero, the result will be left
5413 "raw" out of the scc insn. */
5414
5415 rtx
5416 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5417 enum machine_mode mode, int unsignedp, int normalizep)
5418 {
5419 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5420 enum rtx_code rcode;
5421 rtx subtarget;
5422 rtx tem, last, trueval;
5423
5424 /* If we compare constants, we shouldn't use a store-flag operation,
5425 but a constant load. We can get there via the vanilla route that
5426 usually generates a compare-branch sequence, but will in this case
5427 fold the comparison to a constant, and thus elide the branch. */
5428 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5429 return NULL_RTX;
5430
5431 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5432 target_mode);
5433 if (tem)
5434 return tem;
5435
5436 /* If we reached here, we can't do this with a scc insn, however there
5437 are some comparisons that can be done in other ways. Don't do any
5438 of these cases if branches are very cheap. */
5439 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5440 return 0;
5441
5442 /* See what we need to return. We can only return a 1, -1, or the
5443 sign bit. */
5444
5445 if (normalizep == 0)
5446 {
5447 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5448 normalizep = STORE_FLAG_VALUE;
5449
5450 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5451 ;
5452 else
5453 return 0;
5454 }
5455
5456 last = get_last_insn ();
5457
5458 /* If optimizing, use different pseudo registers for each insn, instead
5459 of reusing the same pseudo. This leads to better CSE, but slows
5460 down the compiler, since there are more pseudos */
5461 subtarget = (!optimize
5462 && (target_mode == mode)) ? target : NULL_RTX;
5463 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5464
5465 /* For floating-point comparisons, try the reverse comparison or try
5466 changing the "orderedness" of the comparison. */
5467 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5468 {
5469 enum rtx_code first_code;
5470 bool and_them;
5471
5472 rcode = reverse_condition_maybe_unordered (code);
5473 if (can_compare_p (rcode, mode, ccp_store_flag)
5474 && (code == ORDERED || code == UNORDERED
5475 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5476 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5477 {
5478 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5479 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5480
5481 /* For the reverse comparison, use either an addition or a XOR. */
5482 if (want_add
5483 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5484 optimize_insn_for_speed_p ()) == 0)
5485 {
5486 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5487 STORE_FLAG_VALUE, target_mode);
5488 if (tem)
5489 return expand_binop (target_mode, add_optab, tem,
5490 gen_int_mode (normalizep, target_mode),
5491 target, 0, OPTAB_WIDEN);
5492 }
5493 else if (!want_add
5494 && rtx_cost (trueval, XOR, 1,
5495 optimize_insn_for_speed_p ()) == 0)
5496 {
5497 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5498 normalizep, target_mode);
5499 if (tem)
5500 return expand_binop (target_mode, xor_optab, tem, trueval,
5501 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5502 }
5503 }
5504
5505 delete_insns_since (last);
5506
5507 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5508 if (code == ORDERED || code == UNORDERED)
5509 return 0;
5510
5511 and_them = split_comparison (code, mode, &first_code, &code);
5512
5513 /* If there are no NaNs, the first comparison should always fall through.
5514 Effectively change the comparison to the other one. */
5515 if (!HONOR_NANS (mode))
5516 {
5517 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5518 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5519 target_mode);
5520 }
5521
5522 #ifdef HAVE_conditional_move
5523 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5524 conditional move. */
5525 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5526 normalizep, target_mode);
5527 if (tem == 0)
5528 return 0;
5529
5530 if (and_them)
5531 tem = emit_conditional_move (target, code, op0, op1, mode,
5532 tem, const0_rtx, GET_MODE (tem), 0);
5533 else
5534 tem = emit_conditional_move (target, code, op0, op1, mode,
5535 trueval, tem, GET_MODE (tem), 0);
5536
5537 if (tem == 0)
5538 delete_insns_since (last);
5539 return tem;
5540 #else
5541 return 0;
5542 #endif
5543 }
5544
5545 /* The remaining tricks only apply to integer comparisons. */
5546
5547 if (GET_MODE_CLASS (mode) != MODE_INT)
5548 return 0;
5549
5550 /* If this is an equality comparison of integers, we can try to exclusive-or
5551 (or subtract) the two operands and use a recursive call to try the
5552 comparison with zero. Don't do any of these cases if branches are
5553 very cheap. */
5554
5555 if ((code == EQ || code == NE) && op1 != const0_rtx)
5556 {
5557 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5558 OPTAB_WIDEN);
5559
5560 if (tem == 0)
5561 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5562 OPTAB_WIDEN);
5563 if (tem != 0)
5564 tem = emit_store_flag (target, code, tem, const0_rtx,
5565 mode, unsignedp, normalizep);
5566 if (tem != 0)
5567 return tem;
5568
5569 delete_insns_since (last);
5570 }
5571
5572 /* For integer comparisons, try the reverse comparison. However, for
5573 small X and if we'd have anyway to extend, implementing "X != 0"
5574 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5575 rcode = reverse_condition (code);
5576 if (can_compare_p (rcode, mode, ccp_store_flag)
5577 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5578 && code == NE
5579 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5580 && op1 == const0_rtx))
5581 {
5582 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5583 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5584
5585 /* Again, for the reverse comparison, use either an addition or a XOR. */
5586 if (want_add
5587 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5588 optimize_insn_for_speed_p ()) == 0)
5589 {
5590 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5591 STORE_FLAG_VALUE, target_mode);
5592 if (tem != 0)
5593 tem = expand_binop (target_mode, add_optab, tem,
5594 gen_int_mode (normalizep, target_mode),
5595 target, 0, OPTAB_WIDEN);
5596 }
5597 else if (!want_add
5598 && rtx_cost (trueval, XOR, 1,
5599 optimize_insn_for_speed_p ()) == 0)
5600 {
5601 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5602 normalizep, target_mode);
5603 if (tem != 0)
5604 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5605 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5606 }
5607
5608 if (tem != 0)
5609 return tem;
5610 delete_insns_since (last);
5611 }
5612
5613 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5614 the constant zero. Reject all other comparisons at this point. Only
5615 do LE and GT if branches are expensive since they are expensive on
5616 2-operand machines. */
5617
5618 if (op1 != const0_rtx
5619 || (code != EQ && code != NE
5620 && (BRANCH_COST (optimize_insn_for_speed_p (),
5621 false) <= 1 || (code != LE && code != GT))))
5622 return 0;
5623
5624 /* Try to put the result of the comparison in the sign bit. Assume we can't
5625 do the necessary operation below. */
5626
5627 tem = 0;
5628
5629 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5630 the sign bit set. */
5631
5632 if (code == LE)
5633 {
5634 /* This is destructive, so SUBTARGET can't be OP0. */
5635 if (rtx_equal_p (subtarget, op0))
5636 subtarget = 0;
5637
5638 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5639 OPTAB_WIDEN);
5640 if (tem)
5641 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5642 OPTAB_WIDEN);
5643 }
5644
5645 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5646 number of bits in the mode of OP0, minus one. */
5647
5648 if (code == GT)
5649 {
5650 if (rtx_equal_p (subtarget, op0))
5651 subtarget = 0;
5652
5653 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5654 GET_MODE_BITSIZE (mode) - 1,
5655 subtarget, 0);
5656 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5657 OPTAB_WIDEN);
5658 }
5659
5660 if (code == EQ || code == NE)
5661 {
5662 /* For EQ or NE, one way to do the comparison is to apply an operation
5663 that converts the operand into a positive number if it is nonzero
5664 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5665 for NE we negate. This puts the result in the sign bit. Then we
5666 normalize with a shift, if needed.
5667
5668 Two operations that can do the above actions are ABS and FFS, so try
5669 them. If that doesn't work, and MODE is smaller than a full word,
5670 we can use zero-extension to the wider mode (an unsigned conversion)
5671 as the operation. */
5672
5673 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5674 that is compensated by the subsequent overflow when subtracting
5675 one / negating. */
5676
5677 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5678 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5679 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5680 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5681 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5682 {
5683 tem = convert_modes (word_mode, mode, op0, 1);
5684 mode = word_mode;
5685 }
5686
5687 if (tem != 0)
5688 {
5689 if (code == EQ)
5690 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5691 0, OPTAB_WIDEN);
5692 else
5693 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5694 }
5695
5696 /* If we couldn't do it that way, for NE we can "or" the two's complement
5697 of the value with itself. For EQ, we take the one's complement of
5698 that "or", which is an extra insn, so we only handle EQ if branches
5699 are expensive. */
5700
5701 if (tem == 0
5702 && (code == NE
5703 || BRANCH_COST (optimize_insn_for_speed_p (),
5704 false) > 1))
5705 {
5706 if (rtx_equal_p (subtarget, op0))
5707 subtarget = 0;
5708
5709 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5710 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5711 OPTAB_WIDEN);
5712
5713 if (tem && code == EQ)
5714 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5715 }
5716 }
5717
5718 if (tem && normalizep)
5719 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5720 GET_MODE_BITSIZE (mode) - 1,
5721 subtarget, normalizep == 1);
5722
5723 if (tem)
5724 {
5725 if (!target)
5726 ;
5727 else if (GET_MODE (tem) != target_mode)
5728 {
5729 convert_move (target, tem, 0);
5730 tem = target;
5731 }
5732 else if (!subtarget)
5733 {
5734 emit_move_insn (target, tem);
5735 tem = target;
5736 }
5737 }
5738 else
5739 delete_insns_since (last);
5740
5741 return tem;
5742 }
5743
5744 /* Like emit_store_flag, but always succeeds. */
5745
5746 rtx
5747 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5748 enum machine_mode mode, int unsignedp, int normalizep)
5749 {
5750 rtx tem, label;
5751 rtx trueval, falseval;
5752
5753 /* First see if emit_store_flag can do the job. */
5754 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5755 if (tem != 0)
5756 return tem;
5757
5758 if (!target)
5759 target = gen_reg_rtx (word_mode);
5760
5761 /* If this failed, we have to do this with set/compare/jump/set code.
5762 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5763 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5764 if (code == NE
5765 && GET_MODE_CLASS (mode) == MODE_INT
5766 && REG_P (target)
5767 && op0 == target
5768 && op1 == const0_rtx)
5769 {
5770 label = gen_label_rtx ();
5771 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5772 mode, NULL_RTX, NULL_RTX, label, -1);
5773 emit_move_insn (target, trueval);
5774 emit_label (label);
5775 return target;
5776 }
5777
5778 if (!REG_P (target)
5779 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5780 target = gen_reg_rtx (GET_MODE (target));
5781
5782 /* Jump in the right direction if the target cannot implement CODE
5783 but can jump on its reverse condition. */
5784 falseval = const0_rtx;
5785 if (! can_compare_p (code, mode, ccp_jump)
5786 && (! FLOAT_MODE_P (mode)
5787 || code == ORDERED || code == UNORDERED
5788 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5789 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5790 {
5791 enum rtx_code rcode;
5792 if (FLOAT_MODE_P (mode))
5793 rcode = reverse_condition_maybe_unordered (code);
5794 else
5795 rcode = reverse_condition (code);
5796
5797 /* Canonicalize to UNORDERED for the libcall. */
5798 if (can_compare_p (rcode, mode, ccp_jump)
5799 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5800 {
5801 falseval = trueval;
5802 trueval = const0_rtx;
5803 code = rcode;
5804 }
5805 }
5806
5807 emit_move_insn (target, trueval);
5808 label = gen_label_rtx ();
5809 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5810 NULL_RTX, label, -1);
5811
5812 emit_move_insn (target, falseval);
5813 emit_label (label);
5814
5815 return target;
5816 }
5817 \f
5818 /* Perform possibly multi-word comparison and conditional jump to LABEL
5819 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5820 now a thin wrapper around do_compare_rtx_and_jump. */
5821
5822 static void
5823 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5824 rtx label)
5825 {
5826 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5827 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5828 NULL_RTX, NULL_RTX, label, -1);
5829 }