gcov-io.c (gcov_read_words): Don't call memmove if excess is 0.
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2014 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "tm_p.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "recog.h"
36 #include "langhooks.h"
37 #include "df.h"
38 #include "target.h"
39 #include "expmed.h"
40
41 struct target_expmed default_target_expmed;
42 #if SWITCHABLE_TARGET
43 struct target_expmed *this_target_expmed = &default_target_expmed;
44 #endif
45
46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 rtx);
51 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT, rtx, int);
62 static rtx extract_fixed_bit_field_1 (enum machine_mode, rtx,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, rtx, int);
65 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
66 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
67 unsigned HOST_WIDE_INT, int);
68 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
69 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71
72 /* Return a constant integer mask value of mode MODE with BITSIZE ones
73 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
74 The mask is truncated if necessary to the width of mode MODE. The
75 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
76
77 static inline rtx
78 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, bool complement)
79 {
80 return immed_wide_int_const
81 (wi::shifted_mask (bitpos, bitsize, complement,
82 GET_MODE_PRECISION (mode)), mode);
83 }
84
85 /* Test whether a value is zero of a power of two. */
86 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
87 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
88
89 struct init_expmed_rtl
90 {
91 struct rtx_def reg;
92 struct rtx_def plus;
93 struct rtx_def neg;
94 struct rtx_def mult;
95 struct rtx_def sdiv;
96 struct rtx_def udiv;
97 struct rtx_def sdiv_32;
98 struct rtx_def smod_32;
99 struct rtx_def wide_mult;
100 struct rtx_def wide_lshr;
101 struct rtx_def wide_trunc;
102 struct rtx_def shift;
103 struct rtx_def shift_mult;
104 struct rtx_def shift_add;
105 struct rtx_def shift_sub0;
106 struct rtx_def shift_sub1;
107 struct rtx_def zext;
108 struct rtx_def trunc;
109
110 rtx pow2[MAX_BITS_PER_WORD];
111 rtx cint[MAX_BITS_PER_WORD];
112 };
113
114 static void
115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
116 enum machine_mode from_mode, bool speed)
117 {
118 int to_size, from_size;
119 rtx which;
120
121 /* We're given no information about the true size of a partial integer,
122 only the size of the "full" integer it requires for storage. For
123 comparison purposes here, reduce the bit size by one in that case. */
124 to_size = (GET_MODE_BITSIZE (to_mode)
125 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
126 from_size = (GET_MODE_BITSIZE (from_mode)
127 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
128
129 /* Assume cost of zero-extend and sign-extend is the same. */
130 which = (to_size < from_size ? &all->trunc : &all->zext);
131
132 PUT_MODE (&all->reg, from_mode);
133 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
134 }
135
136 static void
137 init_expmed_one_mode (struct init_expmed_rtl *all,
138 enum machine_mode mode, int speed)
139 {
140 int m, n, mode_bitsize;
141 enum machine_mode mode_from;
142
143 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
144
145 PUT_MODE (&all->reg, mode);
146 PUT_MODE (&all->plus, mode);
147 PUT_MODE (&all->neg, mode);
148 PUT_MODE (&all->mult, mode);
149 PUT_MODE (&all->sdiv, mode);
150 PUT_MODE (&all->udiv, mode);
151 PUT_MODE (&all->sdiv_32, mode);
152 PUT_MODE (&all->smod_32, mode);
153 PUT_MODE (&all->wide_trunc, mode);
154 PUT_MODE (&all->shift, mode);
155 PUT_MODE (&all->shift_mult, mode);
156 PUT_MODE (&all->shift_add, mode);
157 PUT_MODE (&all->shift_sub0, mode);
158 PUT_MODE (&all->shift_sub1, mode);
159 PUT_MODE (&all->zext, mode);
160 PUT_MODE (&all->trunc, mode);
161
162 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
163 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
164 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
165 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
166 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
167
168 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
169 <= 2 * add_cost (speed, mode)));
170 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
171 <= 4 * add_cost (speed, mode)));
172
173 set_shift_cost (speed, mode, 0, 0);
174 {
175 int cost = add_cost (speed, mode);
176 set_shiftadd_cost (speed, mode, 0, cost);
177 set_shiftsub0_cost (speed, mode, 0, cost);
178 set_shiftsub1_cost (speed, mode, 0, cost);
179 }
180
181 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
182 for (m = 1; m < n; m++)
183 {
184 XEXP (&all->shift, 1) = all->cint[m];
185 XEXP (&all->shift_mult, 1) = all->pow2[m];
186
187 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
188 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
189 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
190 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
191 }
192
193 if (SCALAR_INT_MODE_P (mode))
194 {
195 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
196 mode_from = (enum machine_mode)(mode_from + 1))
197 init_expmed_one_conv (all, mode, mode_from, speed);
198 }
199 if (GET_MODE_CLASS (mode) == MODE_INT)
200 {
201 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
202 if (wider_mode != VOIDmode)
203 {
204 PUT_MODE (&all->zext, wider_mode);
205 PUT_MODE (&all->wide_mult, wider_mode);
206 PUT_MODE (&all->wide_lshr, wider_mode);
207 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
208
209 set_mul_widen_cost (speed, wider_mode,
210 set_src_cost (&all->wide_mult, speed));
211 set_mul_highpart_cost (speed, mode,
212 set_src_cost (&all->wide_trunc, speed));
213 }
214 }
215 }
216
217 void
218 init_expmed (void)
219 {
220 struct init_expmed_rtl all;
221 enum machine_mode mode;
222 int m, speed;
223
224 memset (&all, 0, sizeof all);
225 for (m = 1; m < MAX_BITS_PER_WORD; m++)
226 {
227 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
228 all.cint[m] = GEN_INT (m);
229 }
230
231 PUT_CODE (&all.reg, REG);
232 /* Avoid using hard regs in ways which may be unsupported. */
233 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
234
235 PUT_CODE (&all.plus, PLUS);
236 XEXP (&all.plus, 0) = &all.reg;
237 XEXP (&all.plus, 1) = &all.reg;
238
239 PUT_CODE (&all.neg, NEG);
240 XEXP (&all.neg, 0) = &all.reg;
241
242 PUT_CODE (&all.mult, MULT);
243 XEXP (&all.mult, 0) = &all.reg;
244 XEXP (&all.mult, 1) = &all.reg;
245
246 PUT_CODE (&all.sdiv, DIV);
247 XEXP (&all.sdiv, 0) = &all.reg;
248 XEXP (&all.sdiv, 1) = &all.reg;
249
250 PUT_CODE (&all.udiv, UDIV);
251 XEXP (&all.udiv, 0) = &all.reg;
252 XEXP (&all.udiv, 1) = &all.reg;
253
254 PUT_CODE (&all.sdiv_32, DIV);
255 XEXP (&all.sdiv_32, 0) = &all.reg;
256 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
257
258 PUT_CODE (&all.smod_32, MOD);
259 XEXP (&all.smod_32, 0) = &all.reg;
260 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
261
262 PUT_CODE (&all.zext, ZERO_EXTEND);
263 XEXP (&all.zext, 0) = &all.reg;
264
265 PUT_CODE (&all.wide_mult, MULT);
266 XEXP (&all.wide_mult, 0) = &all.zext;
267 XEXP (&all.wide_mult, 1) = &all.zext;
268
269 PUT_CODE (&all.wide_lshr, LSHIFTRT);
270 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
271
272 PUT_CODE (&all.wide_trunc, TRUNCATE);
273 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
274
275 PUT_CODE (&all.shift, ASHIFT);
276 XEXP (&all.shift, 0) = &all.reg;
277
278 PUT_CODE (&all.shift_mult, MULT);
279 XEXP (&all.shift_mult, 0) = &all.reg;
280
281 PUT_CODE (&all.shift_add, PLUS);
282 XEXP (&all.shift_add, 0) = &all.shift_mult;
283 XEXP (&all.shift_add, 1) = &all.reg;
284
285 PUT_CODE (&all.shift_sub0, MINUS);
286 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
287 XEXP (&all.shift_sub0, 1) = &all.reg;
288
289 PUT_CODE (&all.shift_sub1, MINUS);
290 XEXP (&all.shift_sub1, 0) = &all.reg;
291 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
292
293 PUT_CODE (&all.trunc, TRUNCATE);
294 XEXP (&all.trunc, 0) = &all.reg;
295
296 for (speed = 0; speed < 2; speed++)
297 {
298 crtl->maybe_hot_insn_p = speed;
299 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
300
301 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
302 mode = (enum machine_mode)(mode + 1))
303 init_expmed_one_mode (&all, mode, speed);
304
305 if (MIN_MODE_PARTIAL_INT != VOIDmode)
306 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
307 mode = (enum machine_mode)(mode + 1))
308 init_expmed_one_mode (&all, mode, speed);
309
310 if (MIN_MODE_VECTOR_INT != VOIDmode)
311 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
312 mode = (enum machine_mode)(mode + 1))
313 init_expmed_one_mode (&all, mode, speed);
314 }
315
316 if (alg_hash_used_p ())
317 {
318 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
319 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
320 }
321 else
322 set_alg_hash_used_p (true);
323 default_rtl_profile ();
324 }
325
326 /* Return an rtx representing minus the value of X.
327 MODE is the intended mode of the result,
328 useful if X is a CONST_INT. */
329
330 rtx
331 negate_rtx (enum machine_mode mode, rtx x)
332 {
333 rtx result = simplify_unary_operation (NEG, mode, x, mode);
334
335 if (result == 0)
336 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
337
338 return result;
339 }
340
341 /* Adjust bitfield memory MEM so that it points to the first unit of mode
342 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
343 If MODE is BLKmode, return a reference to every byte in the bitfield.
344 Set *NEW_BITNUM to the bit position of the field within the new memory. */
345
346 static rtx
347 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
348 unsigned HOST_WIDE_INT bitsize,
349 unsigned HOST_WIDE_INT bitnum,
350 unsigned HOST_WIDE_INT *new_bitnum)
351 {
352 if (mode == BLKmode)
353 {
354 *new_bitnum = bitnum % BITS_PER_UNIT;
355 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
356 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
357 / BITS_PER_UNIT);
358 return adjust_bitfield_address_size (mem, mode, offset, size);
359 }
360 else
361 {
362 unsigned int unit = GET_MODE_BITSIZE (mode);
363 *new_bitnum = bitnum % unit;
364 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
365 return adjust_bitfield_address (mem, mode, offset);
366 }
367 }
368
369 /* The caller wants to perform insertion or extraction PATTERN on a
370 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
371 BITREGION_START and BITREGION_END are as for store_bit_field
372 and FIELDMODE is the natural mode of the field.
373
374 Search for a mode that is compatible with the memory access
375 restrictions and (where applicable) with a register insertion or
376 extraction. Return the new memory on success, storing the adjusted
377 bit position in *NEW_BITNUM. Return null otherwise. */
378
379 static rtx
380 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
381 rtx op0, HOST_WIDE_INT bitsize,
382 HOST_WIDE_INT bitnum,
383 unsigned HOST_WIDE_INT bitregion_start,
384 unsigned HOST_WIDE_INT bitregion_end,
385 enum machine_mode fieldmode,
386 unsigned HOST_WIDE_INT *new_bitnum)
387 {
388 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
389 bitregion_end, MEM_ALIGN (op0),
390 MEM_VOLATILE_P (op0));
391 enum machine_mode best_mode;
392 if (iter.next_mode (&best_mode))
393 {
394 /* We can use a memory in BEST_MODE. See whether this is true for
395 any wider modes. All other things being equal, we prefer to
396 use the widest mode possible because it tends to expose more
397 CSE opportunities. */
398 if (!iter.prefer_smaller_modes ())
399 {
400 /* Limit the search to the mode required by the corresponding
401 register insertion or extraction instruction, if any. */
402 enum machine_mode limit_mode = word_mode;
403 extraction_insn insn;
404 if (get_best_reg_extraction_insn (&insn, pattern,
405 GET_MODE_BITSIZE (best_mode),
406 fieldmode))
407 limit_mode = insn.field_mode;
408
409 enum machine_mode wider_mode;
410 while (iter.next_mode (&wider_mode)
411 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
412 best_mode = wider_mode;
413 }
414 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
415 new_bitnum);
416 }
417 return NULL_RTX;
418 }
419
420 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
421 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
422 offset is then BITNUM / BITS_PER_UNIT. */
423
424 static bool
425 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
426 unsigned HOST_WIDE_INT bitsize,
427 enum machine_mode struct_mode)
428 {
429 if (BYTES_BIG_ENDIAN)
430 return (bitnum % BITS_PER_UNIT == 0
431 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
432 || (bitnum + bitsize) % BITS_PER_WORD == 0));
433 else
434 return bitnum % BITS_PER_WORD == 0;
435 }
436
437 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
438 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
439 Return false if the access would touch memory outside the range
440 BITREGION_START to BITREGION_END for conformance to the C++ memory
441 model. */
442
443 static bool
444 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
445 unsigned HOST_WIDE_INT bitnum,
446 enum machine_mode fieldmode,
447 unsigned HOST_WIDE_INT bitregion_start,
448 unsigned HOST_WIDE_INT bitregion_end)
449 {
450 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
451
452 /* -fstrict-volatile-bitfields must be enabled and we must have a
453 volatile MEM. */
454 if (!MEM_P (op0)
455 || !MEM_VOLATILE_P (op0)
456 || flag_strict_volatile_bitfields <= 0)
457 return false;
458
459 /* Non-integral modes likely only happen with packed structures.
460 Punt. */
461 if (!SCALAR_INT_MODE_P (fieldmode))
462 return false;
463
464 /* The bit size must not be larger than the field mode, and
465 the field mode must not be larger than a word. */
466 if (bitsize > modesize || modesize > BITS_PER_WORD)
467 return false;
468
469 /* Check for cases of unaligned fields that must be split. */
470 if (bitnum % BITS_PER_UNIT + bitsize > modesize
471 || (STRICT_ALIGNMENT
472 && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
473 return false;
474
475 /* Check for cases where the C++ memory model applies. */
476 if (bitregion_end != 0
477 && (bitnum - bitnum % modesize < bitregion_start
478 || bitnum - bitnum % modesize + modesize > bitregion_end))
479 return false;
480
481 return true;
482 }
483
484 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
485 bit number BITNUM can be treated as a simple value of mode MODE. */
486
487 static bool
488 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
489 unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
490 {
491 return (MEM_P (op0)
492 && bitnum % BITS_PER_UNIT == 0
493 && bitsize == GET_MODE_BITSIZE (mode)
494 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
495 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
496 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
497 }
498 \f
499 /* Try to use instruction INSV to store VALUE into a field of OP0.
500 BITSIZE and BITNUM are as for store_bit_field. */
501
502 static bool
503 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
504 unsigned HOST_WIDE_INT bitsize,
505 unsigned HOST_WIDE_INT bitnum,
506 rtx value)
507 {
508 struct expand_operand ops[4];
509 rtx value1;
510 rtx xop0 = op0;
511 rtx last = get_last_insn ();
512 bool copy_back = false;
513
514 enum machine_mode op_mode = insv->field_mode;
515 unsigned int unit = GET_MODE_BITSIZE (op_mode);
516 if (bitsize == 0 || bitsize > unit)
517 return false;
518
519 if (MEM_P (xop0))
520 /* Get a reference to the first byte of the field. */
521 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
522 &bitnum);
523 else
524 {
525 /* Convert from counting within OP0 to counting in OP_MODE. */
526 if (BYTES_BIG_ENDIAN)
527 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
528
529 /* If xop0 is a register, we need it in OP_MODE
530 to make it acceptable to the format of insv. */
531 if (GET_CODE (xop0) == SUBREG)
532 /* We can't just change the mode, because this might clobber op0,
533 and we will need the original value of op0 if insv fails. */
534 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
535 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
536 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
537 }
538
539 /* If the destination is a paradoxical subreg such that we need a
540 truncate to the inner mode, perform the insertion on a temporary and
541 truncate the result to the original destination. Note that we can't
542 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
543 X) 0)) is (reg:N X). */
544 if (GET_CODE (xop0) == SUBREG
545 && REG_P (SUBREG_REG (xop0))
546 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
547 op_mode))
548 {
549 rtx tem = gen_reg_rtx (op_mode);
550 emit_move_insn (tem, xop0);
551 xop0 = tem;
552 copy_back = true;
553 }
554
555 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
556 "backwards" from the size of the unit we are inserting into.
557 Otherwise, we count bits from the most significant on a
558 BYTES/BITS_BIG_ENDIAN machine. */
559
560 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
561 bitnum = unit - bitsize - bitnum;
562
563 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
564 value1 = value;
565 if (GET_MODE (value) != op_mode)
566 {
567 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
568 {
569 /* Optimization: Don't bother really extending VALUE
570 if it has all the bits we will actually use. However,
571 if we must narrow it, be sure we do it correctly. */
572
573 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
574 {
575 rtx tmp;
576
577 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
578 if (! tmp)
579 tmp = simplify_gen_subreg (op_mode,
580 force_reg (GET_MODE (value),
581 value1),
582 GET_MODE (value), 0);
583 value1 = tmp;
584 }
585 else
586 value1 = gen_lowpart (op_mode, value1);
587 }
588 else if (CONST_INT_P (value))
589 value1 = gen_int_mode (INTVAL (value), op_mode);
590 else
591 /* Parse phase is supposed to make VALUE's data type
592 match that of the component reference, which is a type
593 at least as wide as the field; so VALUE should have
594 a mode that corresponds to that type. */
595 gcc_assert (CONSTANT_P (value));
596 }
597
598 create_fixed_operand (&ops[0], xop0);
599 create_integer_operand (&ops[1], bitsize);
600 create_integer_operand (&ops[2], bitnum);
601 create_input_operand (&ops[3], value1, op_mode);
602 if (maybe_expand_insn (insv->icode, 4, ops))
603 {
604 if (copy_back)
605 convert_move (op0, xop0, true);
606 return true;
607 }
608 delete_insns_since (last);
609 return false;
610 }
611
612 /* A subroutine of store_bit_field, with the same arguments. Return true
613 if the operation could be implemented.
614
615 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
616 no other way of implementing the operation. If FALLBACK_P is false,
617 return false instead. */
618
619 static bool
620 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
621 unsigned HOST_WIDE_INT bitnum,
622 unsigned HOST_WIDE_INT bitregion_start,
623 unsigned HOST_WIDE_INT bitregion_end,
624 enum machine_mode fieldmode,
625 rtx value, bool fallback_p)
626 {
627 rtx op0 = str_rtx;
628 rtx orig_value;
629
630 while (GET_CODE (op0) == SUBREG)
631 {
632 /* The following line once was done only if WORDS_BIG_ENDIAN,
633 but I think that is a mistake. WORDS_BIG_ENDIAN is
634 meaningful at a much higher level; when structures are copied
635 between memory and regs, the higher-numbered regs
636 always get higher addresses. */
637 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
638 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
639 int byte_offset = 0;
640
641 /* Paradoxical subregs need special handling on big endian machines. */
642 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
643 {
644 int difference = inner_mode_size - outer_mode_size;
645
646 if (WORDS_BIG_ENDIAN)
647 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
648 if (BYTES_BIG_ENDIAN)
649 byte_offset += difference % UNITS_PER_WORD;
650 }
651 else
652 byte_offset = SUBREG_BYTE (op0);
653
654 bitnum += byte_offset * BITS_PER_UNIT;
655 op0 = SUBREG_REG (op0);
656 }
657
658 /* No action is needed if the target is a register and if the field
659 lies completely outside that register. This can occur if the source
660 code contains an out-of-bounds access to a small array. */
661 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
662 return true;
663
664 /* Use vec_set patterns for inserting parts of vectors whenever
665 available. */
666 if (VECTOR_MODE_P (GET_MODE (op0))
667 && !MEM_P (op0)
668 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
669 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
670 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
671 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
672 {
673 struct expand_operand ops[3];
674 enum machine_mode outermode = GET_MODE (op0);
675 enum machine_mode innermode = GET_MODE_INNER (outermode);
676 enum insn_code icode = optab_handler (vec_set_optab, outermode);
677 int pos = bitnum / GET_MODE_BITSIZE (innermode);
678
679 create_fixed_operand (&ops[0], op0);
680 create_input_operand (&ops[1], value, innermode);
681 create_integer_operand (&ops[2], pos);
682 if (maybe_expand_insn (icode, 3, ops))
683 return true;
684 }
685
686 /* If the target is a register, overwriting the entire object, or storing
687 a full-word or multi-word field can be done with just a SUBREG. */
688 if (!MEM_P (op0)
689 && bitsize == GET_MODE_BITSIZE (fieldmode)
690 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
691 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
692 {
693 /* Use the subreg machinery either to narrow OP0 to the required
694 words or to cope with mode punning between equal-sized modes.
695 In the latter case, use subreg on the rhs side, not lhs. */
696 rtx sub;
697
698 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
699 {
700 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
701 if (sub)
702 {
703 emit_move_insn (op0, sub);
704 return true;
705 }
706 }
707 else
708 {
709 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
710 bitnum / BITS_PER_UNIT);
711 if (sub)
712 {
713 emit_move_insn (sub, value);
714 return true;
715 }
716 }
717 }
718
719 /* If the target is memory, storing any naturally aligned field can be
720 done with a simple store. For targets that support fast unaligned
721 memory, any naturally sized, unit aligned field can be done directly. */
722 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
723 {
724 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
725 emit_move_insn (op0, value);
726 return true;
727 }
728
729 /* Make sure we are playing with integral modes. Pun with subregs
730 if we aren't. This must come after the entire register case above,
731 since that case is valid for any mode. The following cases are only
732 valid for integral modes. */
733 {
734 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
735 if (imode != GET_MODE (op0))
736 {
737 if (MEM_P (op0))
738 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
739 else
740 {
741 gcc_assert (imode != BLKmode);
742 op0 = gen_lowpart (imode, op0);
743 }
744 }
745 }
746
747 /* Storing an lsb-aligned field in a register
748 can be done with a movstrict instruction. */
749
750 if (!MEM_P (op0)
751 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
752 && bitsize == GET_MODE_BITSIZE (fieldmode)
753 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
754 {
755 struct expand_operand ops[2];
756 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
757 rtx arg0 = op0;
758 unsigned HOST_WIDE_INT subreg_off;
759
760 if (GET_CODE (arg0) == SUBREG)
761 {
762 /* Else we've got some float mode source being extracted into
763 a different float mode destination -- this combination of
764 subregs results in Severe Tire Damage. */
765 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
766 || GET_MODE_CLASS (fieldmode) == MODE_INT
767 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
768 arg0 = SUBREG_REG (arg0);
769 }
770
771 subreg_off = bitnum / BITS_PER_UNIT;
772 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
773 {
774 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
775
776 create_fixed_operand (&ops[0], arg0);
777 /* Shrink the source operand to FIELDMODE. */
778 create_convert_operand_to (&ops[1], value, fieldmode, false);
779 if (maybe_expand_insn (icode, 2, ops))
780 return true;
781 }
782 }
783
784 /* Handle fields bigger than a word. */
785
786 if (bitsize > BITS_PER_WORD)
787 {
788 /* Here we transfer the words of the field
789 in the order least significant first.
790 This is because the most significant word is the one which may
791 be less than full.
792 However, only do that if the value is not BLKmode. */
793
794 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
795 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
796 unsigned int i;
797 rtx last;
798
799 /* This is the mode we must force value to, so that there will be enough
800 subwords to extract. Note that fieldmode will often (always?) be
801 VOIDmode, because that is what store_field uses to indicate that this
802 is a bit field, but passing VOIDmode to operand_subword_force
803 is not allowed. */
804 fieldmode = GET_MODE (value);
805 if (fieldmode == VOIDmode)
806 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
807
808 last = get_last_insn ();
809 for (i = 0; i < nwords; i++)
810 {
811 /* If I is 0, use the low-order word in both field and target;
812 if I is 1, use the next to lowest word; and so on. */
813 unsigned int wordnum = (backwards
814 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
815 - i - 1
816 : i);
817 unsigned int bit_offset = (backwards
818 ? MAX ((int) bitsize - ((int) i + 1)
819 * BITS_PER_WORD,
820 0)
821 : (int) i * BITS_PER_WORD);
822 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
823 unsigned HOST_WIDE_INT new_bitsize =
824 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
825
826 /* If the remaining chunk doesn't have full wordsize we have
827 to make sure that for big endian machines the higher order
828 bits are used. */
829 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
830 value_word = simplify_expand_binop (word_mode, lshr_optab,
831 value_word,
832 GEN_INT (BITS_PER_WORD
833 - new_bitsize),
834 NULL_RTX, true,
835 OPTAB_LIB_WIDEN);
836
837 if (!store_bit_field_1 (op0, new_bitsize,
838 bitnum + bit_offset,
839 bitregion_start, bitregion_end,
840 word_mode,
841 value_word, fallback_p))
842 {
843 delete_insns_since (last);
844 return false;
845 }
846 }
847 return true;
848 }
849
850 /* If VALUE has a floating-point or complex mode, access it as an
851 integer of the corresponding size. This can occur on a machine
852 with 64 bit registers that uses SFmode for float. It can also
853 occur for unaligned float or complex fields. */
854 orig_value = value;
855 if (GET_MODE (value) != VOIDmode
856 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
857 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
858 {
859 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
860 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
861 }
862
863 /* If OP0 is a multi-word register, narrow it to the affected word.
864 If the region spans two words, defer to store_split_bit_field. */
865 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
866 {
867 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
868 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
869 gcc_assert (op0);
870 bitnum %= BITS_PER_WORD;
871 if (bitnum + bitsize > BITS_PER_WORD)
872 {
873 if (!fallback_p)
874 return false;
875
876 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
877 bitregion_end, value);
878 return true;
879 }
880 }
881
882 /* From here on we can assume that the field to be stored in fits
883 within a word. If the destination is a register, it too fits
884 in a word. */
885
886 extraction_insn insv;
887 if (!MEM_P (op0)
888 && get_best_reg_extraction_insn (&insv, EP_insv,
889 GET_MODE_BITSIZE (GET_MODE (op0)),
890 fieldmode)
891 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
892 return true;
893
894 /* If OP0 is a memory, try copying it to a register and seeing if a
895 cheap register alternative is available. */
896 if (MEM_P (op0))
897 {
898 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
899 fieldmode)
900 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
901 return true;
902
903 rtx last = get_last_insn ();
904
905 /* Try loading part of OP0 into a register, inserting the bitfield
906 into that, and then copying the result back to OP0. */
907 unsigned HOST_WIDE_INT bitpos;
908 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
909 bitregion_start, bitregion_end,
910 fieldmode, &bitpos);
911 if (xop0)
912 {
913 rtx tempreg = copy_to_reg (xop0);
914 if (store_bit_field_1 (tempreg, bitsize, bitpos,
915 bitregion_start, bitregion_end,
916 fieldmode, orig_value, false))
917 {
918 emit_move_insn (xop0, tempreg);
919 return true;
920 }
921 delete_insns_since (last);
922 }
923 }
924
925 if (!fallback_p)
926 return false;
927
928 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
929 bitregion_end, value);
930 return true;
931 }
932
933 /* Generate code to store value from rtx VALUE
934 into a bit-field within structure STR_RTX
935 containing BITSIZE bits starting at bit BITNUM.
936
937 BITREGION_START is bitpos of the first bitfield in this region.
938 BITREGION_END is the bitpos of the ending bitfield in this region.
939 These two fields are 0, if the C++ memory model does not apply,
940 or we are not interested in keeping track of bitfield regions.
941
942 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
943
944 void
945 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
946 unsigned HOST_WIDE_INT bitnum,
947 unsigned HOST_WIDE_INT bitregion_start,
948 unsigned HOST_WIDE_INT bitregion_end,
949 enum machine_mode fieldmode,
950 rtx value)
951 {
952 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
953 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
954 bitregion_start, bitregion_end))
955 {
956 /* Storing any naturally aligned field can be done with a simple
957 store. For targets that support fast unaligned memory, any
958 naturally sized, unit aligned field can be done directly. */
959 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
960 {
961 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
962 bitnum / BITS_PER_UNIT);
963 emit_move_insn (str_rtx, value);
964 }
965 else
966 {
967 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
968 &bitnum);
969 /* Explicitly override the C/C++ memory model; ignore the
970 bit range so that we can do the access in the mode mandated
971 by -fstrict-volatile-bitfields instead. */
972 store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
973 }
974
975 return;
976 }
977
978 /* Under the C++0x memory model, we must not touch bits outside the
979 bit region. Adjust the address to start at the beginning of the
980 bit region. */
981 if (MEM_P (str_rtx) && bitregion_start > 0)
982 {
983 enum machine_mode bestmode;
984 HOST_WIDE_INT offset, size;
985
986 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
987
988 offset = bitregion_start / BITS_PER_UNIT;
989 bitnum -= bitregion_start;
990 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
991 bitregion_end -= bitregion_start;
992 bitregion_start = 0;
993 bestmode = get_best_mode (bitsize, bitnum,
994 bitregion_start, bitregion_end,
995 MEM_ALIGN (str_rtx), VOIDmode,
996 MEM_VOLATILE_P (str_rtx));
997 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
998 }
999
1000 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1001 bitregion_start, bitregion_end,
1002 fieldmode, value, true))
1003 gcc_unreachable ();
1004 }
1005 \f
1006 /* Use shifts and boolean operations to store VALUE into a bit field of
1007 width BITSIZE in OP0, starting at bit BITNUM. */
1008
1009 static void
1010 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1011 unsigned HOST_WIDE_INT bitnum,
1012 unsigned HOST_WIDE_INT bitregion_start,
1013 unsigned HOST_WIDE_INT bitregion_end,
1014 rtx value)
1015 {
1016 /* There is a case not handled here:
1017 a structure with a known alignment of just a halfword
1018 and a field split across two aligned halfwords within the structure.
1019 Or likewise a structure with a known alignment of just a byte
1020 and a field split across two bytes.
1021 Such cases are not supposed to be able to occur. */
1022
1023 if (MEM_P (op0))
1024 {
1025 enum machine_mode mode = GET_MODE (op0);
1026 if (GET_MODE_BITSIZE (mode) == 0
1027 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1028 mode = word_mode;
1029 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1030 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1031
1032 if (mode == VOIDmode)
1033 {
1034 /* The only way this should occur is if the field spans word
1035 boundaries. */
1036 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1037 bitregion_end, value);
1038 return;
1039 }
1040
1041 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1042 }
1043
1044 store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1045 }
1046
1047 /* Helper function for store_fixed_bit_field, stores
1048 the bit field always using the MODE of OP0. */
1049
1050 static void
1051 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1052 unsigned HOST_WIDE_INT bitnum,
1053 rtx value)
1054 {
1055 enum machine_mode mode;
1056 rtx temp;
1057 int all_zero = 0;
1058 int all_one = 0;
1059
1060 mode = GET_MODE (op0);
1061 gcc_assert (SCALAR_INT_MODE_P (mode));
1062
1063 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1064 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1065
1066 if (BYTES_BIG_ENDIAN)
1067 /* BITNUM is the distance between our msb
1068 and that of the containing datum.
1069 Convert it to the distance from the lsb. */
1070 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1071
1072 /* Now BITNUM is always the distance between our lsb
1073 and that of OP0. */
1074
1075 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1076 we must first convert its mode to MODE. */
1077
1078 if (CONST_INT_P (value))
1079 {
1080 HOST_WIDE_INT v = INTVAL (value);
1081
1082 if (bitsize < HOST_BITS_PER_WIDE_INT)
1083 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1084
1085 if (v == 0)
1086 all_zero = 1;
1087 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1088 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1089 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1090 all_one = 1;
1091
1092 value = lshift_value (mode, v, bitnum);
1093 }
1094 else
1095 {
1096 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1097 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1098
1099 if (GET_MODE (value) != mode)
1100 value = convert_to_mode (mode, value, 1);
1101
1102 if (must_and)
1103 value = expand_binop (mode, and_optab, value,
1104 mask_rtx (mode, 0, bitsize, 0),
1105 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1106 if (bitnum > 0)
1107 value = expand_shift (LSHIFT_EXPR, mode, value,
1108 bitnum, NULL_RTX, 1);
1109 }
1110
1111 /* Now clear the chosen bits in OP0,
1112 except that if VALUE is -1 we need not bother. */
1113 /* We keep the intermediates in registers to allow CSE to combine
1114 consecutive bitfield assignments. */
1115
1116 temp = force_reg (mode, op0);
1117
1118 if (! all_one)
1119 {
1120 temp = expand_binop (mode, and_optab, temp,
1121 mask_rtx (mode, bitnum, bitsize, 1),
1122 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1123 temp = force_reg (mode, temp);
1124 }
1125
1126 /* Now logical-or VALUE into OP0, unless it is zero. */
1127
1128 if (! all_zero)
1129 {
1130 temp = expand_binop (mode, ior_optab, temp, value,
1131 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1132 temp = force_reg (mode, temp);
1133 }
1134
1135 if (op0 != temp)
1136 {
1137 op0 = copy_rtx (op0);
1138 emit_move_insn (op0, temp);
1139 }
1140 }
1141 \f
1142 /* Store a bit field that is split across multiple accessible memory objects.
1143
1144 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1145 BITSIZE is the field width; BITPOS the position of its first bit
1146 (within the word).
1147 VALUE is the value to store.
1148
1149 This does not yet handle fields wider than BITS_PER_WORD. */
1150
1151 static void
1152 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1153 unsigned HOST_WIDE_INT bitpos,
1154 unsigned HOST_WIDE_INT bitregion_start,
1155 unsigned HOST_WIDE_INT bitregion_end,
1156 rtx value)
1157 {
1158 unsigned int unit;
1159 unsigned int bitsdone = 0;
1160
1161 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1162 much at a time. */
1163 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1164 unit = BITS_PER_WORD;
1165 else
1166 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1167
1168 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1169 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1170 again, and we will mutually recurse forever. */
1171 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1172 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1173
1174 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1175 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1176 that VALUE might be a floating-point constant. */
1177 if (CONSTANT_P (value) && !CONST_INT_P (value))
1178 {
1179 rtx word = gen_lowpart_common (word_mode, value);
1180
1181 if (word && (value != word))
1182 value = word;
1183 else
1184 value = gen_lowpart_common (word_mode,
1185 force_reg (GET_MODE (value) != VOIDmode
1186 ? GET_MODE (value)
1187 : word_mode, value));
1188 }
1189
1190 while (bitsdone < bitsize)
1191 {
1192 unsigned HOST_WIDE_INT thissize;
1193 rtx part, word;
1194 unsigned HOST_WIDE_INT thispos;
1195 unsigned HOST_WIDE_INT offset;
1196
1197 offset = (bitpos + bitsdone) / unit;
1198 thispos = (bitpos + bitsdone) % unit;
1199
1200 /* When region of bytes we can touch is restricted, decrease
1201 UNIT close to the end of the region as needed. If op0 is a REG
1202 or SUBREG of REG, don't do this, as there can't be data races
1203 on a register and we can expand shorter code in some cases. */
1204 if (bitregion_end
1205 && unit > BITS_PER_UNIT
1206 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1207 && !REG_P (op0)
1208 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1209 {
1210 unit = unit / 2;
1211 continue;
1212 }
1213
1214 /* THISSIZE must not overrun a word boundary. Otherwise,
1215 store_fixed_bit_field will call us again, and we will mutually
1216 recurse forever. */
1217 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1218 thissize = MIN (thissize, unit - thispos);
1219
1220 if (BYTES_BIG_ENDIAN)
1221 {
1222 /* Fetch successively less significant portions. */
1223 if (CONST_INT_P (value))
1224 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1225 >> (bitsize - bitsdone - thissize))
1226 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1227 else
1228 {
1229 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1230 /* The args are chosen so that the last part includes the
1231 lsb. Give extract_bit_field the value it needs (with
1232 endianness compensation) to fetch the piece we want. */
1233 part = extract_fixed_bit_field (word_mode, value, thissize,
1234 total_bits - bitsize + bitsdone,
1235 NULL_RTX, 1);
1236 }
1237 }
1238 else
1239 {
1240 /* Fetch successively more significant portions. */
1241 if (CONST_INT_P (value))
1242 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1243 >> bitsdone)
1244 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1245 else
1246 part = extract_fixed_bit_field (word_mode, value, thissize,
1247 bitsdone, NULL_RTX, 1);
1248 }
1249
1250 /* If OP0 is a register, then handle OFFSET here.
1251
1252 When handling multiword bitfields, extract_bit_field may pass
1253 down a word_mode SUBREG of a larger REG for a bitfield that actually
1254 crosses a word boundary. Thus, for a SUBREG, we must find
1255 the current word starting from the base register. */
1256 if (GET_CODE (op0) == SUBREG)
1257 {
1258 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1259 + (offset * unit / BITS_PER_WORD);
1260 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1261 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1262 word = word_offset ? const0_rtx : op0;
1263 else
1264 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1265 GET_MODE (SUBREG_REG (op0)));
1266 offset &= BITS_PER_WORD / unit - 1;
1267 }
1268 else if (REG_P (op0))
1269 {
1270 enum machine_mode op0_mode = GET_MODE (op0);
1271 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1272 word = offset ? const0_rtx : op0;
1273 else
1274 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1275 GET_MODE (op0));
1276 offset &= BITS_PER_WORD / unit - 1;
1277 }
1278 else
1279 word = op0;
1280
1281 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1282 it is just an out-of-bounds access. Ignore it. */
1283 if (word != const0_rtx)
1284 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1285 bitregion_start, bitregion_end, part);
1286 bitsdone += thissize;
1287 }
1288 }
1289 \f
1290 /* A subroutine of extract_bit_field_1 that converts return value X
1291 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1292 to extract_bit_field. */
1293
1294 static rtx
1295 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1296 enum machine_mode tmode, bool unsignedp)
1297 {
1298 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1299 return x;
1300
1301 /* If the x mode is not a scalar integral, first convert to the
1302 integer mode of that size and then access it as a floating-point
1303 value via a SUBREG. */
1304 if (!SCALAR_INT_MODE_P (tmode))
1305 {
1306 enum machine_mode smode;
1307
1308 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1309 x = convert_to_mode (smode, x, unsignedp);
1310 x = force_reg (smode, x);
1311 return gen_lowpart (tmode, x);
1312 }
1313
1314 return convert_to_mode (tmode, x, unsignedp);
1315 }
1316
1317 /* Try to use an ext(z)v pattern to extract a field from OP0.
1318 Return the extracted value on success, otherwise return null.
1319 EXT_MODE is the mode of the extraction and the other arguments
1320 are as for extract_bit_field. */
1321
1322 static rtx
1323 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1324 unsigned HOST_WIDE_INT bitsize,
1325 unsigned HOST_WIDE_INT bitnum,
1326 int unsignedp, rtx target,
1327 enum machine_mode mode, enum machine_mode tmode)
1328 {
1329 struct expand_operand ops[4];
1330 rtx spec_target = target;
1331 rtx spec_target_subreg = 0;
1332 enum machine_mode ext_mode = extv->field_mode;
1333 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1334
1335 if (bitsize == 0 || unit < bitsize)
1336 return NULL_RTX;
1337
1338 if (MEM_P (op0))
1339 /* Get a reference to the first byte of the field. */
1340 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1341 &bitnum);
1342 else
1343 {
1344 /* Convert from counting within OP0 to counting in EXT_MODE. */
1345 if (BYTES_BIG_ENDIAN)
1346 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1347
1348 /* If op0 is a register, we need it in EXT_MODE to make it
1349 acceptable to the format of ext(z)v. */
1350 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1351 return NULL_RTX;
1352 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1353 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1354 }
1355
1356 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1357 "backwards" from the size of the unit we are extracting from.
1358 Otherwise, we count bits from the most significant on a
1359 BYTES/BITS_BIG_ENDIAN machine. */
1360
1361 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1362 bitnum = unit - bitsize - bitnum;
1363
1364 if (target == 0)
1365 target = spec_target = gen_reg_rtx (tmode);
1366
1367 if (GET_MODE (target) != ext_mode)
1368 {
1369 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1370 between the mode of the extraction (word_mode) and the target
1371 mode. Instead, create a temporary and use convert_move to set
1372 the target. */
1373 if (REG_P (target)
1374 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1375 {
1376 target = gen_lowpart (ext_mode, target);
1377 if (GET_MODE_PRECISION (ext_mode)
1378 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1379 spec_target_subreg = target;
1380 }
1381 else
1382 target = gen_reg_rtx (ext_mode);
1383 }
1384
1385 create_output_operand (&ops[0], target, ext_mode);
1386 create_fixed_operand (&ops[1], op0);
1387 create_integer_operand (&ops[2], bitsize);
1388 create_integer_operand (&ops[3], bitnum);
1389 if (maybe_expand_insn (extv->icode, 4, ops))
1390 {
1391 target = ops[0].value;
1392 if (target == spec_target)
1393 return target;
1394 if (target == spec_target_subreg)
1395 return spec_target;
1396 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1397 }
1398 return NULL_RTX;
1399 }
1400
1401 /* A subroutine of extract_bit_field, with the same arguments.
1402 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1403 if we can find no other means of implementing the operation.
1404 if FALLBACK_P is false, return NULL instead. */
1405
1406 static rtx
1407 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1408 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1409 enum machine_mode mode, enum machine_mode tmode,
1410 bool fallback_p)
1411 {
1412 rtx op0 = str_rtx;
1413 enum machine_mode int_mode;
1414 enum machine_mode mode1;
1415
1416 if (tmode == VOIDmode)
1417 tmode = mode;
1418
1419 while (GET_CODE (op0) == SUBREG)
1420 {
1421 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1422 op0 = SUBREG_REG (op0);
1423 }
1424
1425 /* If we have an out-of-bounds access to a register, just return an
1426 uninitialized register of the required mode. This can occur if the
1427 source code contains an out-of-bounds access to a small array. */
1428 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1429 return gen_reg_rtx (tmode);
1430
1431 if (REG_P (op0)
1432 && mode == GET_MODE (op0)
1433 && bitnum == 0
1434 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1435 {
1436 /* We're trying to extract a full register from itself. */
1437 return op0;
1438 }
1439
1440 /* See if we can get a better vector mode before extracting. */
1441 if (VECTOR_MODE_P (GET_MODE (op0))
1442 && !MEM_P (op0)
1443 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1444 {
1445 enum machine_mode new_mode;
1446
1447 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1448 new_mode = MIN_MODE_VECTOR_FLOAT;
1449 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1450 new_mode = MIN_MODE_VECTOR_FRACT;
1451 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1452 new_mode = MIN_MODE_VECTOR_UFRACT;
1453 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1454 new_mode = MIN_MODE_VECTOR_ACCUM;
1455 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1456 new_mode = MIN_MODE_VECTOR_UACCUM;
1457 else
1458 new_mode = MIN_MODE_VECTOR_INT;
1459
1460 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1461 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1462 && targetm.vector_mode_supported_p (new_mode))
1463 break;
1464 if (new_mode != VOIDmode)
1465 op0 = gen_lowpart (new_mode, op0);
1466 }
1467
1468 /* Use vec_extract patterns for extracting parts of vectors whenever
1469 available. */
1470 if (VECTOR_MODE_P (GET_MODE (op0))
1471 && !MEM_P (op0)
1472 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1473 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1474 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1475 {
1476 struct expand_operand ops[3];
1477 enum machine_mode outermode = GET_MODE (op0);
1478 enum machine_mode innermode = GET_MODE_INNER (outermode);
1479 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1480 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1481
1482 create_output_operand (&ops[0], target, innermode);
1483 create_input_operand (&ops[1], op0, outermode);
1484 create_integer_operand (&ops[2], pos);
1485 if (maybe_expand_insn (icode, 3, ops))
1486 {
1487 target = ops[0].value;
1488 if (GET_MODE (target) != mode)
1489 return gen_lowpart (tmode, target);
1490 return target;
1491 }
1492 }
1493
1494 /* Make sure we are playing with integral modes. Pun with subregs
1495 if we aren't. */
1496 {
1497 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1498 if (imode != GET_MODE (op0))
1499 {
1500 if (MEM_P (op0))
1501 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1502 else if (imode != BLKmode)
1503 {
1504 op0 = gen_lowpart (imode, op0);
1505
1506 /* If we got a SUBREG, force it into a register since we
1507 aren't going to be able to do another SUBREG on it. */
1508 if (GET_CODE (op0) == SUBREG)
1509 op0 = force_reg (imode, op0);
1510 }
1511 else if (REG_P (op0))
1512 {
1513 rtx reg, subreg;
1514 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1515 MODE_INT);
1516 reg = gen_reg_rtx (imode);
1517 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1518 emit_move_insn (subreg, op0);
1519 op0 = reg;
1520 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1521 }
1522 else
1523 {
1524 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1525 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1526 emit_move_insn (mem, op0);
1527 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1528 }
1529 }
1530 }
1531
1532 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1533 If that's wrong, the solution is to test for it and set TARGET to 0
1534 if needed. */
1535
1536 /* Get the mode of the field to use for atomic access or subreg
1537 conversion. */
1538 mode1 = mode;
1539 if (SCALAR_INT_MODE_P (tmode))
1540 {
1541 enum machine_mode try_mode = mode_for_size (bitsize,
1542 GET_MODE_CLASS (tmode), 0);
1543 if (try_mode != BLKmode)
1544 mode1 = try_mode;
1545 }
1546 gcc_assert (mode1 != BLKmode);
1547
1548 /* Extraction of a full MODE1 value can be done with a subreg as long
1549 as the least significant bit of the value is the least significant
1550 bit of either OP0 or a word of OP0. */
1551 if (!MEM_P (op0)
1552 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1553 && bitsize == GET_MODE_BITSIZE (mode1)
1554 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1555 {
1556 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1557 bitnum / BITS_PER_UNIT);
1558 if (sub)
1559 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1560 }
1561
1562 /* Extraction of a full MODE1 value can be done with a load as long as
1563 the field is on a byte boundary and is sufficiently aligned. */
1564 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1565 {
1566 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1567 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1568 }
1569
1570 /* Handle fields bigger than a word. */
1571
1572 if (bitsize > BITS_PER_WORD)
1573 {
1574 /* Here we transfer the words of the field
1575 in the order least significant first.
1576 This is because the most significant word is the one which may
1577 be less than full. */
1578
1579 unsigned int backwards = WORDS_BIG_ENDIAN;
1580 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1581 unsigned int i;
1582 rtx last;
1583
1584 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1585 target = gen_reg_rtx (mode);
1586
1587 /* Indicate for flow that the entire target reg is being set. */
1588 emit_clobber (target);
1589
1590 last = get_last_insn ();
1591 for (i = 0; i < nwords; i++)
1592 {
1593 /* If I is 0, use the low-order word in both field and target;
1594 if I is 1, use the next to lowest word; and so on. */
1595 /* Word number in TARGET to use. */
1596 unsigned int wordnum
1597 = (backwards
1598 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1599 : i);
1600 /* Offset from start of field in OP0. */
1601 unsigned int bit_offset = (backwards
1602 ? MAX ((int) bitsize - ((int) i + 1)
1603 * BITS_PER_WORD,
1604 0)
1605 : (int) i * BITS_PER_WORD);
1606 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1607 rtx result_part
1608 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1609 bitsize - i * BITS_PER_WORD),
1610 bitnum + bit_offset, 1, target_part,
1611 mode, word_mode, fallback_p);
1612
1613 gcc_assert (target_part);
1614 if (!result_part)
1615 {
1616 delete_insns_since (last);
1617 return NULL;
1618 }
1619
1620 if (result_part != target_part)
1621 emit_move_insn (target_part, result_part);
1622 }
1623
1624 if (unsignedp)
1625 {
1626 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1627 need to be zero'd out. */
1628 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1629 {
1630 unsigned int i, total_words;
1631
1632 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1633 for (i = nwords; i < total_words; i++)
1634 emit_move_insn
1635 (operand_subword (target,
1636 backwards ? total_words - i - 1 : i,
1637 1, VOIDmode),
1638 const0_rtx);
1639 }
1640 return target;
1641 }
1642
1643 /* Signed bit field: sign-extend with two arithmetic shifts. */
1644 target = expand_shift (LSHIFT_EXPR, mode, target,
1645 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1646 return expand_shift (RSHIFT_EXPR, mode, target,
1647 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1648 }
1649
1650 /* If OP0 is a multi-word register, narrow it to the affected word.
1651 If the region spans two words, defer to extract_split_bit_field. */
1652 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1653 {
1654 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1655 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1656 bitnum %= BITS_PER_WORD;
1657 if (bitnum + bitsize > BITS_PER_WORD)
1658 {
1659 if (!fallback_p)
1660 return NULL_RTX;
1661 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1662 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1663 }
1664 }
1665
1666 /* From here on we know the desired field is smaller than a word.
1667 If OP0 is a register, it too fits within a word. */
1668 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1669 extraction_insn extv;
1670 if (!MEM_P (op0)
1671 /* ??? We could limit the structure size to the part of OP0 that
1672 contains the field, with appropriate checks for endianness
1673 and TRULY_NOOP_TRUNCATION. */
1674 && get_best_reg_extraction_insn (&extv, pattern,
1675 GET_MODE_BITSIZE (GET_MODE (op0)),
1676 tmode))
1677 {
1678 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1679 unsignedp, target, mode,
1680 tmode);
1681 if (result)
1682 return result;
1683 }
1684
1685 /* If OP0 is a memory, try copying it to a register and seeing if a
1686 cheap register alternative is available. */
1687 if (MEM_P (op0))
1688 {
1689 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1690 tmode))
1691 {
1692 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1693 bitnum, unsignedp,
1694 target, mode,
1695 tmode);
1696 if (result)
1697 return result;
1698 }
1699
1700 rtx last = get_last_insn ();
1701
1702 /* Try loading part of OP0 into a register and extracting the
1703 bitfield from that. */
1704 unsigned HOST_WIDE_INT bitpos;
1705 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1706 0, 0, tmode, &bitpos);
1707 if (xop0)
1708 {
1709 xop0 = copy_to_reg (xop0);
1710 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1711 unsignedp, target,
1712 mode, tmode, false);
1713 if (result)
1714 return result;
1715 delete_insns_since (last);
1716 }
1717 }
1718
1719 if (!fallback_p)
1720 return NULL;
1721
1722 /* Find a correspondingly-sized integer field, so we can apply
1723 shifts and masks to it. */
1724 int_mode = int_mode_for_mode (tmode);
1725 if (int_mode == BLKmode)
1726 int_mode = int_mode_for_mode (mode);
1727 /* Should probably push op0 out to memory and then do a load. */
1728 gcc_assert (int_mode != BLKmode);
1729
1730 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1731 target, unsignedp);
1732 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1733 }
1734
1735 /* Generate code to extract a byte-field from STR_RTX
1736 containing BITSIZE bits, starting at BITNUM,
1737 and put it in TARGET if possible (if TARGET is nonzero).
1738 Regardless of TARGET, we return the rtx for where the value is placed.
1739
1740 STR_RTX is the structure containing the byte (a REG or MEM).
1741 UNSIGNEDP is nonzero if this is an unsigned bit field.
1742 MODE is the natural mode of the field value once extracted.
1743 TMODE is the mode the caller would like the value to have;
1744 but the value may be returned with type MODE instead.
1745
1746 If a TARGET is specified and we can store in it at no extra cost,
1747 we do so, and return TARGET.
1748 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1749 if they are equally easy. */
1750
1751 rtx
1752 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1753 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1754 enum machine_mode mode, enum machine_mode tmode)
1755 {
1756 enum machine_mode mode1;
1757
1758 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1759 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1760 mode1 = GET_MODE (str_rtx);
1761 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1762 mode1 = GET_MODE (target);
1763 else
1764 mode1 = tmode;
1765
1766 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1767 {
1768 rtx result;
1769
1770 /* Extraction of a full MODE1 value can be done with a load as long as
1771 the field is on a byte boundary and is sufficiently aligned. */
1772 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1773 result = adjust_bitfield_address (str_rtx, mode1,
1774 bitnum / BITS_PER_UNIT);
1775 else
1776 {
1777 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1778 &bitnum);
1779 result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1780 target, unsignedp);
1781 }
1782
1783 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1784 }
1785
1786 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1787 target, mode, tmode, true);
1788 }
1789 \f
1790 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1791 from bit BITNUM of OP0.
1792
1793 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1794 If TARGET is nonzero, attempts to store the value there
1795 and return TARGET, but this is not guaranteed.
1796 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1797
1798 static rtx
1799 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1800 unsigned HOST_WIDE_INT bitsize,
1801 unsigned HOST_WIDE_INT bitnum, rtx target,
1802 int unsignedp)
1803 {
1804 if (MEM_P (op0))
1805 {
1806 enum machine_mode mode
1807 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1808 MEM_VOLATILE_P (op0));
1809
1810 if (mode == VOIDmode)
1811 /* The only way this should occur is if the field spans word
1812 boundaries. */
1813 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1814
1815 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1816 }
1817
1818 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1819 target, unsignedp);
1820 }
1821
1822 /* Helper function for extract_fixed_bit_field, extracts
1823 the bit field always using the MODE of OP0. */
1824
1825 static rtx
1826 extract_fixed_bit_field_1 (enum machine_mode tmode, rtx op0,
1827 unsigned HOST_WIDE_INT bitsize,
1828 unsigned HOST_WIDE_INT bitnum, rtx target,
1829 int unsignedp)
1830 {
1831 enum machine_mode mode = GET_MODE (op0);
1832 gcc_assert (SCALAR_INT_MODE_P (mode));
1833
1834 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1835 for invalid input, such as extract equivalent of f5 from
1836 gcc.dg/pr48335-2.c. */
1837
1838 if (BYTES_BIG_ENDIAN)
1839 /* BITNUM is the distance between our msb and that of OP0.
1840 Convert it to the distance from the lsb. */
1841 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1842
1843 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1844 We have reduced the big-endian case to the little-endian case. */
1845
1846 if (unsignedp)
1847 {
1848 if (bitnum)
1849 {
1850 /* If the field does not already start at the lsb,
1851 shift it so it does. */
1852 /* Maybe propagate the target for the shift. */
1853 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1854 if (tmode != mode)
1855 subtarget = 0;
1856 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1857 }
1858 /* Convert the value to the desired mode. */
1859 if (mode != tmode)
1860 op0 = convert_to_mode (tmode, op0, 1);
1861
1862 /* Unless the msb of the field used to be the msb when we shifted,
1863 mask out the upper bits. */
1864
1865 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1866 return expand_binop (GET_MODE (op0), and_optab, op0,
1867 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1868 target, 1, OPTAB_LIB_WIDEN);
1869 return op0;
1870 }
1871
1872 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1873 then arithmetic-shift its lsb to the lsb of the word. */
1874 op0 = force_reg (mode, op0);
1875
1876 /* Find the narrowest integer mode that contains the field. */
1877
1878 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1879 mode = GET_MODE_WIDER_MODE (mode))
1880 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1881 {
1882 op0 = convert_to_mode (mode, op0, 0);
1883 break;
1884 }
1885
1886 if (mode != tmode)
1887 target = 0;
1888
1889 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1890 {
1891 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1892 /* Maybe propagate the target for the shift. */
1893 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1894 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1895 }
1896
1897 return expand_shift (RSHIFT_EXPR, mode, op0,
1898 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1899 }
1900
1901 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1902 VALUE << BITPOS. */
1903
1904 static rtx
1905 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1906 int bitpos)
1907 {
1908 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1909 }
1910 \f
1911 /* Extract a bit field that is split across two words
1912 and return an RTX for the result.
1913
1914 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1915 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1916 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1917
1918 static rtx
1919 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1920 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1921 {
1922 unsigned int unit;
1923 unsigned int bitsdone = 0;
1924 rtx result = NULL_RTX;
1925 int first = 1;
1926
1927 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1928 much at a time. */
1929 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1930 unit = BITS_PER_WORD;
1931 else
1932 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1933
1934 while (bitsdone < bitsize)
1935 {
1936 unsigned HOST_WIDE_INT thissize;
1937 rtx part, word;
1938 unsigned HOST_WIDE_INT thispos;
1939 unsigned HOST_WIDE_INT offset;
1940
1941 offset = (bitpos + bitsdone) / unit;
1942 thispos = (bitpos + bitsdone) % unit;
1943
1944 /* THISSIZE must not overrun a word boundary. Otherwise,
1945 extract_fixed_bit_field will call us again, and we will mutually
1946 recurse forever. */
1947 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1948 thissize = MIN (thissize, unit - thispos);
1949
1950 /* If OP0 is a register, then handle OFFSET here.
1951
1952 When handling multiword bitfields, extract_bit_field may pass
1953 down a word_mode SUBREG of a larger REG for a bitfield that actually
1954 crosses a word boundary. Thus, for a SUBREG, we must find
1955 the current word starting from the base register. */
1956 if (GET_CODE (op0) == SUBREG)
1957 {
1958 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1959 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1960 GET_MODE (SUBREG_REG (op0)));
1961 offset = 0;
1962 }
1963 else if (REG_P (op0))
1964 {
1965 word = operand_subword_force (op0, offset, GET_MODE (op0));
1966 offset = 0;
1967 }
1968 else
1969 word = op0;
1970
1971 /* Extract the parts in bit-counting order,
1972 whose meaning is determined by BYTES_PER_UNIT.
1973 OFFSET is in UNITs, and UNIT is in bits. */
1974 part = extract_fixed_bit_field (word_mode, word, thissize,
1975 offset * unit + thispos, 0, 1);
1976 bitsdone += thissize;
1977
1978 /* Shift this part into place for the result. */
1979 if (BYTES_BIG_ENDIAN)
1980 {
1981 if (bitsize != bitsdone)
1982 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1983 bitsize - bitsdone, 0, 1);
1984 }
1985 else
1986 {
1987 if (bitsdone != thissize)
1988 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1989 bitsdone - thissize, 0, 1);
1990 }
1991
1992 if (first)
1993 result = part;
1994 else
1995 /* Combine the parts with bitwise or. This works
1996 because we extracted each part as an unsigned bit field. */
1997 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1998 OPTAB_LIB_WIDEN);
1999
2000 first = 0;
2001 }
2002
2003 /* Unsigned bit field: we are done. */
2004 if (unsignedp)
2005 return result;
2006 /* Signed bit field: sign-extend with two arithmetic shifts. */
2007 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2008 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2009 return expand_shift (RSHIFT_EXPR, word_mode, result,
2010 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2011 }
2012 \f
2013 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2014 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2015 MODE, fill the upper bits with zeros. Fail if the layout of either
2016 mode is unknown (as for CC modes) or if the extraction would involve
2017 unprofitable mode punning. Return the value on success, otherwise
2018 return null.
2019
2020 This is different from gen_lowpart* in these respects:
2021
2022 - the returned value must always be considered an rvalue
2023
2024 - when MODE is wider than SRC_MODE, the extraction involves
2025 a zero extension
2026
2027 - when MODE is smaller than SRC_MODE, the extraction involves
2028 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2029
2030 In other words, this routine performs a computation, whereas the
2031 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2032 operations. */
2033
2034 rtx
2035 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2036 {
2037 enum machine_mode int_mode, src_int_mode;
2038
2039 if (mode == src_mode)
2040 return src;
2041
2042 if (CONSTANT_P (src))
2043 {
2044 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2045 fails, it will happily create (subreg (symbol_ref)) or similar
2046 invalid SUBREGs. */
2047 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2048 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2049 if (ret)
2050 return ret;
2051
2052 if (GET_MODE (src) == VOIDmode
2053 || !validate_subreg (mode, src_mode, src, byte))
2054 return NULL_RTX;
2055
2056 src = force_reg (GET_MODE (src), src);
2057 return gen_rtx_SUBREG (mode, src, byte);
2058 }
2059
2060 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2061 return NULL_RTX;
2062
2063 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2064 && MODES_TIEABLE_P (mode, src_mode))
2065 {
2066 rtx x = gen_lowpart_common (mode, src);
2067 if (x)
2068 return x;
2069 }
2070
2071 src_int_mode = int_mode_for_mode (src_mode);
2072 int_mode = int_mode_for_mode (mode);
2073 if (src_int_mode == BLKmode || int_mode == BLKmode)
2074 return NULL_RTX;
2075
2076 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2077 return NULL_RTX;
2078 if (!MODES_TIEABLE_P (int_mode, mode))
2079 return NULL_RTX;
2080
2081 src = gen_lowpart (src_int_mode, src);
2082 src = convert_modes (int_mode, src_int_mode, src, true);
2083 src = gen_lowpart (mode, src);
2084 return src;
2085 }
2086 \f
2087 /* Add INC into TARGET. */
2088
2089 void
2090 expand_inc (rtx target, rtx inc)
2091 {
2092 rtx value = expand_binop (GET_MODE (target), add_optab,
2093 target, inc,
2094 target, 0, OPTAB_LIB_WIDEN);
2095 if (value != target)
2096 emit_move_insn (target, value);
2097 }
2098
2099 /* Subtract DEC from TARGET. */
2100
2101 void
2102 expand_dec (rtx target, rtx dec)
2103 {
2104 rtx value = expand_binop (GET_MODE (target), sub_optab,
2105 target, dec,
2106 target, 0, OPTAB_LIB_WIDEN);
2107 if (value != target)
2108 emit_move_insn (target, value);
2109 }
2110 \f
2111 /* Output a shift instruction for expression code CODE,
2112 with SHIFTED being the rtx for the value to shift,
2113 and AMOUNT the rtx for the amount to shift by.
2114 Store the result in the rtx TARGET, if that is convenient.
2115 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2116 Return the rtx for where the value is. */
2117
2118 static rtx
2119 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2120 rtx amount, rtx target, int unsignedp)
2121 {
2122 rtx op1, temp = 0;
2123 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2124 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2125 optab lshift_optab = ashl_optab;
2126 optab rshift_arith_optab = ashr_optab;
2127 optab rshift_uns_optab = lshr_optab;
2128 optab lrotate_optab = rotl_optab;
2129 optab rrotate_optab = rotr_optab;
2130 enum machine_mode op1_mode;
2131 enum machine_mode scalar_mode = mode;
2132 int attempt;
2133 bool speed = optimize_insn_for_speed_p ();
2134
2135 if (VECTOR_MODE_P (mode))
2136 scalar_mode = GET_MODE_INNER (mode);
2137 op1 = amount;
2138 op1_mode = GET_MODE (op1);
2139
2140 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2141 shift amount is a vector, use the vector/vector shift patterns. */
2142 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2143 {
2144 lshift_optab = vashl_optab;
2145 rshift_arith_optab = vashr_optab;
2146 rshift_uns_optab = vlshr_optab;
2147 lrotate_optab = vrotl_optab;
2148 rrotate_optab = vrotr_optab;
2149 }
2150
2151 /* Previously detected shift-counts computed by NEGATE_EXPR
2152 and shifted in the other direction; but that does not work
2153 on all machines. */
2154
2155 if (SHIFT_COUNT_TRUNCATED)
2156 {
2157 if (CONST_INT_P (op1)
2158 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2159 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2160 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2161 % GET_MODE_BITSIZE (scalar_mode));
2162 else if (GET_CODE (op1) == SUBREG
2163 && subreg_lowpart_p (op1)
2164 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2165 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2166 op1 = SUBREG_REG (op1);
2167 }
2168
2169 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2170 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2171 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2172 amount instead. */
2173 if (rotate
2174 && CONST_INT_P (op1)
2175 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2176 GET_MODE_BITSIZE (scalar_mode) - 1))
2177 {
2178 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2179 left = !left;
2180 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2181 }
2182
2183 if (op1 == const0_rtx)
2184 return shifted;
2185
2186 /* Check whether its cheaper to implement a left shift by a constant
2187 bit count by a sequence of additions. */
2188 if (code == LSHIFT_EXPR
2189 && CONST_INT_P (op1)
2190 && INTVAL (op1) > 0
2191 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2192 && INTVAL (op1) < MAX_BITS_PER_WORD
2193 && (shift_cost (speed, mode, INTVAL (op1))
2194 > INTVAL (op1) * add_cost (speed, mode))
2195 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2196 {
2197 int i;
2198 for (i = 0; i < INTVAL (op1); i++)
2199 {
2200 temp = force_reg (mode, shifted);
2201 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2202 unsignedp, OPTAB_LIB_WIDEN);
2203 }
2204 return shifted;
2205 }
2206
2207 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2208 {
2209 enum optab_methods methods;
2210
2211 if (attempt == 0)
2212 methods = OPTAB_DIRECT;
2213 else if (attempt == 1)
2214 methods = OPTAB_WIDEN;
2215 else
2216 methods = OPTAB_LIB_WIDEN;
2217
2218 if (rotate)
2219 {
2220 /* Widening does not work for rotation. */
2221 if (methods == OPTAB_WIDEN)
2222 continue;
2223 else if (methods == OPTAB_LIB_WIDEN)
2224 {
2225 /* If we have been unable to open-code this by a rotation,
2226 do it as the IOR of two shifts. I.e., to rotate A
2227 by N bits, compute
2228 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2229 where C is the bitsize of A.
2230
2231 It is theoretically possible that the target machine might
2232 not be able to perform either shift and hence we would
2233 be making two libcalls rather than just the one for the
2234 shift (similarly if IOR could not be done). We will allow
2235 this extremely unlikely lossage to avoid complicating the
2236 code below. */
2237
2238 rtx subtarget = target == shifted ? 0 : target;
2239 rtx new_amount, other_amount;
2240 rtx temp1;
2241
2242 new_amount = op1;
2243 if (op1 == const0_rtx)
2244 return shifted;
2245 else if (CONST_INT_P (op1))
2246 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2247 - INTVAL (op1));
2248 else
2249 {
2250 other_amount
2251 = simplify_gen_unary (NEG, GET_MODE (op1),
2252 op1, GET_MODE (op1));
2253 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2254 other_amount
2255 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2256 gen_int_mode (mask, GET_MODE (op1)));
2257 }
2258
2259 shifted = force_reg (mode, shifted);
2260
2261 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2262 mode, shifted, new_amount, 0, 1);
2263 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2264 mode, shifted, other_amount,
2265 subtarget, 1);
2266 return expand_binop (mode, ior_optab, temp, temp1, target,
2267 unsignedp, methods);
2268 }
2269
2270 temp = expand_binop (mode,
2271 left ? lrotate_optab : rrotate_optab,
2272 shifted, op1, target, unsignedp, methods);
2273 }
2274 else if (unsignedp)
2275 temp = expand_binop (mode,
2276 left ? lshift_optab : rshift_uns_optab,
2277 shifted, op1, target, unsignedp, methods);
2278
2279 /* Do arithmetic shifts.
2280 Also, if we are going to widen the operand, we can just as well
2281 use an arithmetic right-shift instead of a logical one. */
2282 if (temp == 0 && ! rotate
2283 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2284 {
2285 enum optab_methods methods1 = methods;
2286
2287 /* If trying to widen a log shift to an arithmetic shift,
2288 don't accept an arithmetic shift of the same size. */
2289 if (unsignedp)
2290 methods1 = OPTAB_MUST_WIDEN;
2291
2292 /* Arithmetic shift */
2293
2294 temp = expand_binop (mode,
2295 left ? lshift_optab : rshift_arith_optab,
2296 shifted, op1, target, unsignedp, methods1);
2297 }
2298
2299 /* We used to try extzv here for logical right shifts, but that was
2300 only useful for one machine, the VAX, and caused poor code
2301 generation there for lshrdi3, so the code was deleted and a
2302 define_expand for lshrsi3 was added to vax.md. */
2303 }
2304
2305 gcc_assert (temp);
2306 return temp;
2307 }
2308
2309 /* Output a shift instruction for expression code CODE,
2310 with SHIFTED being the rtx for the value to shift,
2311 and AMOUNT the amount to shift by.
2312 Store the result in the rtx TARGET, if that is convenient.
2313 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2314 Return the rtx for where the value is. */
2315
2316 rtx
2317 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2318 int amount, rtx target, int unsignedp)
2319 {
2320 return expand_shift_1 (code, mode,
2321 shifted, GEN_INT (amount), target, unsignedp);
2322 }
2323
2324 /* Output a shift instruction for expression code CODE,
2325 with SHIFTED being the rtx for the value to shift,
2326 and AMOUNT the tree for the amount to shift by.
2327 Store the result in the rtx TARGET, if that is convenient.
2328 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2329 Return the rtx for where the value is. */
2330
2331 rtx
2332 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2333 tree amount, rtx target, int unsignedp)
2334 {
2335 return expand_shift_1 (code, mode,
2336 shifted, expand_normal (amount), target, unsignedp);
2337 }
2338
2339 \f
2340 /* Indicates the type of fixup needed after a constant multiplication.
2341 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2342 the result should be negated, and ADD_VARIANT means that the
2343 multiplicand should be added to the result. */
2344 enum mult_variant {basic_variant, negate_variant, add_variant};
2345
2346 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2347 const struct mult_cost *, enum machine_mode mode);
2348 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2349 struct algorithm *, enum mult_variant *, int);
2350 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2351 const struct algorithm *, enum mult_variant);
2352 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2353 static rtx extract_high_half (enum machine_mode, rtx);
2354 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2355 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2356 int, int);
2357 /* Compute and return the best algorithm for multiplying by T.
2358 The algorithm must cost less than cost_limit
2359 If retval.cost >= COST_LIMIT, no algorithm was found and all
2360 other field of the returned struct are undefined.
2361 MODE is the machine mode of the multiplication. */
2362
2363 static void
2364 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2365 const struct mult_cost *cost_limit, enum machine_mode mode)
2366 {
2367 int m;
2368 struct algorithm *alg_in, *best_alg;
2369 struct mult_cost best_cost;
2370 struct mult_cost new_limit;
2371 int op_cost, op_latency;
2372 unsigned HOST_WIDE_INT orig_t = t;
2373 unsigned HOST_WIDE_INT q;
2374 int maxm, hash_index;
2375 bool cache_hit = false;
2376 enum alg_code cache_alg = alg_zero;
2377 bool speed = optimize_insn_for_speed_p ();
2378 enum machine_mode imode;
2379 struct alg_hash_entry *entry_ptr;
2380
2381 /* Indicate that no algorithm is yet found. If no algorithm
2382 is found, this value will be returned and indicate failure. */
2383 alg_out->cost.cost = cost_limit->cost + 1;
2384 alg_out->cost.latency = cost_limit->latency + 1;
2385
2386 if (cost_limit->cost < 0
2387 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2388 return;
2389
2390 /* Be prepared for vector modes. */
2391 imode = GET_MODE_INNER (mode);
2392 if (imode == VOIDmode)
2393 imode = mode;
2394
2395 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2396
2397 /* Restrict the bits of "t" to the multiplication's mode. */
2398 t &= GET_MODE_MASK (imode);
2399
2400 /* t == 1 can be done in zero cost. */
2401 if (t == 1)
2402 {
2403 alg_out->ops = 1;
2404 alg_out->cost.cost = 0;
2405 alg_out->cost.latency = 0;
2406 alg_out->op[0] = alg_m;
2407 return;
2408 }
2409
2410 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2411 fail now. */
2412 if (t == 0)
2413 {
2414 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2415 return;
2416 else
2417 {
2418 alg_out->ops = 1;
2419 alg_out->cost.cost = zero_cost (speed);
2420 alg_out->cost.latency = zero_cost (speed);
2421 alg_out->op[0] = alg_zero;
2422 return;
2423 }
2424 }
2425
2426 /* We'll be needing a couple extra algorithm structures now. */
2427
2428 alg_in = XALLOCA (struct algorithm);
2429 best_alg = XALLOCA (struct algorithm);
2430 best_cost = *cost_limit;
2431
2432 /* Compute the hash index. */
2433 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2434
2435 /* See if we already know what to do for T. */
2436 entry_ptr = alg_hash_entry_ptr (hash_index);
2437 if (entry_ptr->t == t
2438 && entry_ptr->mode == mode
2439 && entry_ptr->mode == mode
2440 && entry_ptr->speed == speed
2441 && entry_ptr->alg != alg_unknown)
2442 {
2443 cache_alg = entry_ptr->alg;
2444
2445 if (cache_alg == alg_impossible)
2446 {
2447 /* The cache tells us that it's impossible to synthesize
2448 multiplication by T within entry_ptr->cost. */
2449 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2450 /* COST_LIMIT is at least as restrictive as the one
2451 recorded in the hash table, in which case we have no
2452 hope of synthesizing a multiplication. Just
2453 return. */
2454 return;
2455
2456 /* If we get here, COST_LIMIT is less restrictive than the
2457 one recorded in the hash table, so we may be able to
2458 synthesize a multiplication. Proceed as if we didn't
2459 have the cache entry. */
2460 }
2461 else
2462 {
2463 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2464 /* The cached algorithm shows that this multiplication
2465 requires more cost than COST_LIMIT. Just return. This
2466 way, we don't clobber this cache entry with
2467 alg_impossible but retain useful information. */
2468 return;
2469
2470 cache_hit = true;
2471
2472 switch (cache_alg)
2473 {
2474 case alg_shift:
2475 goto do_alg_shift;
2476
2477 case alg_add_t_m2:
2478 case alg_sub_t_m2:
2479 goto do_alg_addsub_t_m2;
2480
2481 case alg_add_factor:
2482 case alg_sub_factor:
2483 goto do_alg_addsub_factor;
2484
2485 case alg_add_t2_m:
2486 goto do_alg_add_t2_m;
2487
2488 case alg_sub_t2_m:
2489 goto do_alg_sub_t2_m;
2490
2491 default:
2492 gcc_unreachable ();
2493 }
2494 }
2495 }
2496
2497 /* If we have a group of zero bits at the low-order part of T, try
2498 multiplying by the remaining bits and then doing a shift. */
2499
2500 if ((t & 1) == 0)
2501 {
2502 do_alg_shift:
2503 m = floor_log2 (t & -t); /* m = number of low zero bits */
2504 if (m < maxm)
2505 {
2506 q = t >> m;
2507 /* The function expand_shift will choose between a shift and
2508 a sequence of additions, so the observed cost is given as
2509 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2510 op_cost = m * add_cost (speed, mode);
2511 if (shift_cost (speed, mode, m) < op_cost)
2512 op_cost = shift_cost (speed, mode, m);
2513 new_limit.cost = best_cost.cost - op_cost;
2514 new_limit.latency = best_cost.latency - op_cost;
2515 synth_mult (alg_in, q, &new_limit, mode);
2516
2517 alg_in->cost.cost += op_cost;
2518 alg_in->cost.latency += op_cost;
2519 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2520 {
2521 struct algorithm *x;
2522 best_cost = alg_in->cost;
2523 x = alg_in, alg_in = best_alg, best_alg = x;
2524 best_alg->log[best_alg->ops] = m;
2525 best_alg->op[best_alg->ops] = alg_shift;
2526 }
2527
2528 /* See if treating ORIG_T as a signed number yields a better
2529 sequence. Try this sequence only for a negative ORIG_T
2530 as it would be useless for a non-negative ORIG_T. */
2531 if ((HOST_WIDE_INT) orig_t < 0)
2532 {
2533 /* Shift ORIG_T as follows because a right shift of a
2534 negative-valued signed type is implementation
2535 defined. */
2536 q = ~(~orig_t >> m);
2537 /* The function expand_shift will choose between a shift
2538 and a sequence of additions, so the observed cost is
2539 given as MIN (m * add_cost(speed, mode),
2540 shift_cost(speed, mode, m)). */
2541 op_cost = m * add_cost (speed, mode);
2542 if (shift_cost (speed, mode, m) < op_cost)
2543 op_cost = shift_cost (speed, mode, m);
2544 new_limit.cost = best_cost.cost - op_cost;
2545 new_limit.latency = best_cost.latency - op_cost;
2546 synth_mult (alg_in, q, &new_limit, mode);
2547
2548 alg_in->cost.cost += op_cost;
2549 alg_in->cost.latency += op_cost;
2550 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2551 {
2552 struct algorithm *x;
2553 best_cost = alg_in->cost;
2554 x = alg_in, alg_in = best_alg, best_alg = x;
2555 best_alg->log[best_alg->ops] = m;
2556 best_alg->op[best_alg->ops] = alg_shift;
2557 }
2558 }
2559 }
2560 if (cache_hit)
2561 goto done;
2562 }
2563
2564 /* If we have an odd number, add or subtract one. */
2565 if ((t & 1) != 0)
2566 {
2567 unsigned HOST_WIDE_INT w;
2568
2569 do_alg_addsub_t_m2:
2570 for (w = 1; (w & t) != 0; w <<= 1)
2571 ;
2572 /* If T was -1, then W will be zero after the loop. This is another
2573 case where T ends with ...111. Handling this with (T + 1) and
2574 subtract 1 produces slightly better code and results in algorithm
2575 selection much faster than treating it like the ...0111 case
2576 below. */
2577 if (w == 0
2578 || (w > 2
2579 /* Reject the case where t is 3.
2580 Thus we prefer addition in that case. */
2581 && t != 3))
2582 {
2583 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2584
2585 op_cost = add_cost (speed, mode);
2586 new_limit.cost = best_cost.cost - op_cost;
2587 new_limit.latency = best_cost.latency - op_cost;
2588 synth_mult (alg_in, t + 1, &new_limit, mode);
2589
2590 alg_in->cost.cost += op_cost;
2591 alg_in->cost.latency += op_cost;
2592 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2593 {
2594 struct algorithm *x;
2595 best_cost = alg_in->cost;
2596 x = alg_in, alg_in = best_alg, best_alg = x;
2597 best_alg->log[best_alg->ops] = 0;
2598 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2599 }
2600 }
2601 else
2602 {
2603 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2604
2605 op_cost = add_cost (speed, mode);
2606 new_limit.cost = best_cost.cost - op_cost;
2607 new_limit.latency = best_cost.latency - op_cost;
2608 synth_mult (alg_in, t - 1, &new_limit, mode);
2609
2610 alg_in->cost.cost += op_cost;
2611 alg_in->cost.latency += op_cost;
2612 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2613 {
2614 struct algorithm *x;
2615 best_cost = alg_in->cost;
2616 x = alg_in, alg_in = best_alg, best_alg = x;
2617 best_alg->log[best_alg->ops] = 0;
2618 best_alg->op[best_alg->ops] = alg_add_t_m2;
2619 }
2620 }
2621
2622 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2623 quickly with a - a * n for some appropriate constant n. */
2624 m = exact_log2 (-orig_t + 1);
2625 if (m >= 0 && m < maxm)
2626 {
2627 op_cost = shiftsub1_cost (speed, mode, m);
2628 new_limit.cost = best_cost.cost - op_cost;
2629 new_limit.latency = best_cost.latency - op_cost;
2630 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2631 &new_limit, mode);
2632
2633 alg_in->cost.cost += op_cost;
2634 alg_in->cost.latency += op_cost;
2635 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2636 {
2637 struct algorithm *x;
2638 best_cost = alg_in->cost;
2639 x = alg_in, alg_in = best_alg, best_alg = x;
2640 best_alg->log[best_alg->ops] = m;
2641 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2642 }
2643 }
2644
2645 if (cache_hit)
2646 goto done;
2647 }
2648
2649 /* Look for factors of t of the form
2650 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2651 If we find such a factor, we can multiply by t using an algorithm that
2652 multiplies by q, shift the result by m and add/subtract it to itself.
2653
2654 We search for large factors first and loop down, even if large factors
2655 are less probable than small; if we find a large factor we will find a
2656 good sequence quickly, and therefore be able to prune (by decreasing
2657 COST_LIMIT) the search. */
2658
2659 do_alg_addsub_factor:
2660 for (m = floor_log2 (t - 1); m >= 2; m--)
2661 {
2662 unsigned HOST_WIDE_INT d;
2663
2664 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2665 if (t % d == 0 && t > d && m < maxm
2666 && (!cache_hit || cache_alg == alg_add_factor))
2667 {
2668 /* If the target has a cheap shift-and-add instruction use
2669 that in preference to a shift insn followed by an add insn.
2670 Assume that the shift-and-add is "atomic" with a latency
2671 equal to its cost, otherwise assume that on superscalar
2672 hardware the shift may be executed concurrently with the
2673 earlier steps in the algorithm. */
2674 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2675 if (shiftadd_cost (speed, mode, m) < op_cost)
2676 {
2677 op_cost = shiftadd_cost (speed, mode, m);
2678 op_latency = op_cost;
2679 }
2680 else
2681 op_latency = add_cost (speed, mode);
2682
2683 new_limit.cost = best_cost.cost - op_cost;
2684 new_limit.latency = best_cost.latency - op_latency;
2685 synth_mult (alg_in, t / d, &new_limit, mode);
2686
2687 alg_in->cost.cost += op_cost;
2688 alg_in->cost.latency += op_latency;
2689 if (alg_in->cost.latency < op_cost)
2690 alg_in->cost.latency = op_cost;
2691 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2692 {
2693 struct algorithm *x;
2694 best_cost = alg_in->cost;
2695 x = alg_in, alg_in = best_alg, best_alg = x;
2696 best_alg->log[best_alg->ops] = m;
2697 best_alg->op[best_alg->ops] = alg_add_factor;
2698 }
2699 /* Other factors will have been taken care of in the recursion. */
2700 break;
2701 }
2702
2703 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2704 if (t % d == 0 && t > d && m < maxm
2705 && (!cache_hit || cache_alg == alg_sub_factor))
2706 {
2707 /* If the target has a cheap shift-and-subtract insn use
2708 that in preference to a shift insn followed by a sub insn.
2709 Assume that the shift-and-sub is "atomic" with a latency
2710 equal to it's cost, otherwise assume that on superscalar
2711 hardware the shift may be executed concurrently with the
2712 earlier steps in the algorithm. */
2713 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2714 if (shiftsub0_cost (speed, mode, m) < op_cost)
2715 {
2716 op_cost = shiftsub0_cost (speed, mode, m);
2717 op_latency = op_cost;
2718 }
2719 else
2720 op_latency = add_cost (speed, mode);
2721
2722 new_limit.cost = best_cost.cost - op_cost;
2723 new_limit.latency = best_cost.latency - op_latency;
2724 synth_mult (alg_in, t / d, &new_limit, mode);
2725
2726 alg_in->cost.cost += op_cost;
2727 alg_in->cost.latency += op_latency;
2728 if (alg_in->cost.latency < op_cost)
2729 alg_in->cost.latency = op_cost;
2730 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2731 {
2732 struct algorithm *x;
2733 best_cost = alg_in->cost;
2734 x = alg_in, alg_in = best_alg, best_alg = x;
2735 best_alg->log[best_alg->ops] = m;
2736 best_alg->op[best_alg->ops] = alg_sub_factor;
2737 }
2738 break;
2739 }
2740 }
2741 if (cache_hit)
2742 goto done;
2743
2744 /* Try shift-and-add (load effective address) instructions,
2745 i.e. do a*3, a*5, a*9. */
2746 if ((t & 1) != 0)
2747 {
2748 do_alg_add_t2_m:
2749 q = t - 1;
2750 q = q & -q;
2751 m = exact_log2 (q);
2752 if (m >= 0 && m < maxm)
2753 {
2754 op_cost = shiftadd_cost (speed, mode, m);
2755 new_limit.cost = best_cost.cost - op_cost;
2756 new_limit.latency = best_cost.latency - op_cost;
2757 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2758
2759 alg_in->cost.cost += op_cost;
2760 alg_in->cost.latency += op_cost;
2761 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2762 {
2763 struct algorithm *x;
2764 best_cost = alg_in->cost;
2765 x = alg_in, alg_in = best_alg, best_alg = x;
2766 best_alg->log[best_alg->ops] = m;
2767 best_alg->op[best_alg->ops] = alg_add_t2_m;
2768 }
2769 }
2770 if (cache_hit)
2771 goto done;
2772
2773 do_alg_sub_t2_m:
2774 q = t + 1;
2775 q = q & -q;
2776 m = exact_log2 (q);
2777 if (m >= 0 && m < maxm)
2778 {
2779 op_cost = shiftsub0_cost (speed, mode, m);
2780 new_limit.cost = best_cost.cost - op_cost;
2781 new_limit.latency = best_cost.latency - op_cost;
2782 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2783
2784 alg_in->cost.cost += op_cost;
2785 alg_in->cost.latency += op_cost;
2786 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2787 {
2788 struct algorithm *x;
2789 best_cost = alg_in->cost;
2790 x = alg_in, alg_in = best_alg, best_alg = x;
2791 best_alg->log[best_alg->ops] = m;
2792 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2793 }
2794 }
2795 if (cache_hit)
2796 goto done;
2797 }
2798
2799 done:
2800 /* If best_cost has not decreased, we have not found any algorithm. */
2801 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2802 {
2803 /* We failed to find an algorithm. Record alg_impossible for
2804 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2805 we are asked to find an algorithm for T within the same or
2806 lower COST_LIMIT, we can immediately return to the
2807 caller. */
2808 entry_ptr->t = t;
2809 entry_ptr->mode = mode;
2810 entry_ptr->speed = speed;
2811 entry_ptr->alg = alg_impossible;
2812 entry_ptr->cost = *cost_limit;
2813 return;
2814 }
2815
2816 /* Cache the result. */
2817 if (!cache_hit)
2818 {
2819 entry_ptr->t = t;
2820 entry_ptr->mode = mode;
2821 entry_ptr->speed = speed;
2822 entry_ptr->alg = best_alg->op[best_alg->ops];
2823 entry_ptr->cost.cost = best_cost.cost;
2824 entry_ptr->cost.latency = best_cost.latency;
2825 }
2826
2827 /* If we are getting a too long sequence for `struct algorithm'
2828 to record, make this search fail. */
2829 if (best_alg->ops == MAX_BITS_PER_WORD)
2830 return;
2831
2832 /* Copy the algorithm from temporary space to the space at alg_out.
2833 We avoid using structure assignment because the majority of
2834 best_alg is normally undefined, and this is a critical function. */
2835 alg_out->ops = best_alg->ops + 1;
2836 alg_out->cost = best_cost;
2837 memcpy (alg_out->op, best_alg->op,
2838 alg_out->ops * sizeof *alg_out->op);
2839 memcpy (alg_out->log, best_alg->log,
2840 alg_out->ops * sizeof *alg_out->log);
2841 }
2842 \f
2843 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2844 Try three variations:
2845
2846 - a shift/add sequence based on VAL itself
2847 - a shift/add sequence based on -VAL, followed by a negation
2848 - a shift/add sequence based on VAL - 1, followed by an addition.
2849
2850 Return true if the cheapest of these cost less than MULT_COST,
2851 describing the algorithm in *ALG and final fixup in *VARIANT. */
2852
2853 static bool
2854 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2855 struct algorithm *alg, enum mult_variant *variant,
2856 int mult_cost)
2857 {
2858 struct algorithm alg2;
2859 struct mult_cost limit;
2860 int op_cost;
2861 bool speed = optimize_insn_for_speed_p ();
2862
2863 /* Fail quickly for impossible bounds. */
2864 if (mult_cost < 0)
2865 return false;
2866
2867 /* Ensure that mult_cost provides a reasonable upper bound.
2868 Any constant multiplication can be performed with less
2869 than 2 * bits additions. */
2870 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2871 if (mult_cost > op_cost)
2872 mult_cost = op_cost;
2873
2874 *variant = basic_variant;
2875 limit.cost = mult_cost;
2876 limit.latency = mult_cost;
2877 synth_mult (alg, val, &limit, mode);
2878
2879 /* This works only if the inverted value actually fits in an
2880 `unsigned int' */
2881 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2882 {
2883 op_cost = neg_cost (speed, mode);
2884 if (MULT_COST_LESS (&alg->cost, mult_cost))
2885 {
2886 limit.cost = alg->cost.cost - op_cost;
2887 limit.latency = alg->cost.latency - op_cost;
2888 }
2889 else
2890 {
2891 limit.cost = mult_cost - op_cost;
2892 limit.latency = mult_cost - op_cost;
2893 }
2894
2895 synth_mult (&alg2, -val, &limit, mode);
2896 alg2.cost.cost += op_cost;
2897 alg2.cost.latency += op_cost;
2898 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2899 *alg = alg2, *variant = negate_variant;
2900 }
2901
2902 /* This proves very useful for division-by-constant. */
2903 op_cost = add_cost (speed, mode);
2904 if (MULT_COST_LESS (&alg->cost, mult_cost))
2905 {
2906 limit.cost = alg->cost.cost - op_cost;
2907 limit.latency = alg->cost.latency - op_cost;
2908 }
2909 else
2910 {
2911 limit.cost = mult_cost - op_cost;
2912 limit.latency = mult_cost - op_cost;
2913 }
2914
2915 synth_mult (&alg2, val - 1, &limit, mode);
2916 alg2.cost.cost += op_cost;
2917 alg2.cost.latency += op_cost;
2918 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2919 *alg = alg2, *variant = add_variant;
2920
2921 return MULT_COST_LESS (&alg->cost, mult_cost);
2922 }
2923
2924 /* A subroutine of expand_mult, used for constant multiplications.
2925 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2926 convenient. Use the shift/add sequence described by ALG and apply
2927 the final fixup specified by VARIANT. */
2928
2929 static rtx
2930 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2931 rtx target, const struct algorithm *alg,
2932 enum mult_variant variant)
2933 {
2934 HOST_WIDE_INT val_so_far;
2935 rtx insn, accum, tem;
2936 int opno;
2937 enum machine_mode nmode;
2938
2939 /* Avoid referencing memory over and over and invalid sharing
2940 on SUBREGs. */
2941 op0 = force_reg (mode, op0);
2942
2943 /* ACCUM starts out either as OP0 or as a zero, depending on
2944 the first operation. */
2945
2946 if (alg->op[0] == alg_zero)
2947 {
2948 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2949 val_so_far = 0;
2950 }
2951 else if (alg->op[0] == alg_m)
2952 {
2953 accum = copy_to_mode_reg (mode, op0);
2954 val_so_far = 1;
2955 }
2956 else
2957 gcc_unreachable ();
2958
2959 for (opno = 1; opno < alg->ops; opno++)
2960 {
2961 int log = alg->log[opno];
2962 rtx shift_subtarget = optimize ? 0 : accum;
2963 rtx add_target
2964 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2965 && !optimize)
2966 ? target : 0;
2967 rtx accum_target = optimize ? 0 : accum;
2968 rtx accum_inner;
2969
2970 switch (alg->op[opno])
2971 {
2972 case alg_shift:
2973 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2974 /* REG_EQUAL note will be attached to the following insn. */
2975 emit_move_insn (accum, tem);
2976 val_so_far <<= log;
2977 break;
2978
2979 case alg_add_t_m2:
2980 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2981 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2982 add_target ? add_target : accum_target);
2983 val_so_far += (HOST_WIDE_INT) 1 << log;
2984 break;
2985
2986 case alg_sub_t_m2:
2987 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2988 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2989 add_target ? add_target : accum_target);
2990 val_so_far -= (HOST_WIDE_INT) 1 << log;
2991 break;
2992
2993 case alg_add_t2_m:
2994 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2995 log, shift_subtarget, 0);
2996 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2997 add_target ? add_target : accum_target);
2998 val_so_far = (val_so_far << log) + 1;
2999 break;
3000
3001 case alg_sub_t2_m:
3002 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3003 log, shift_subtarget, 0);
3004 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3005 add_target ? add_target : accum_target);
3006 val_so_far = (val_so_far << log) - 1;
3007 break;
3008
3009 case alg_add_factor:
3010 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3011 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3012 add_target ? add_target : accum_target);
3013 val_so_far += val_so_far << log;
3014 break;
3015
3016 case alg_sub_factor:
3017 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3018 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3019 (add_target
3020 ? add_target : (optimize ? 0 : tem)));
3021 val_so_far = (val_so_far << log) - val_so_far;
3022 break;
3023
3024 default:
3025 gcc_unreachable ();
3026 }
3027
3028 if (SCALAR_INT_MODE_P (mode))
3029 {
3030 /* Write a REG_EQUAL note on the last insn so that we can cse
3031 multiplication sequences. Note that if ACCUM is a SUBREG,
3032 we've set the inner register and must properly indicate that. */
3033 tem = op0, nmode = mode;
3034 accum_inner = accum;
3035 if (GET_CODE (accum) == SUBREG)
3036 {
3037 accum_inner = SUBREG_REG (accum);
3038 nmode = GET_MODE (accum_inner);
3039 tem = gen_lowpart (nmode, op0);
3040 }
3041
3042 insn = get_last_insn ();
3043 set_dst_reg_note (insn, REG_EQUAL,
3044 gen_rtx_MULT (nmode, tem,
3045 gen_int_mode (val_so_far, nmode)),
3046 accum_inner);
3047 }
3048 }
3049
3050 if (variant == negate_variant)
3051 {
3052 val_so_far = -val_so_far;
3053 accum = expand_unop (mode, neg_optab, accum, target, 0);
3054 }
3055 else if (variant == add_variant)
3056 {
3057 val_so_far = val_so_far + 1;
3058 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3059 }
3060
3061 /* Compare only the bits of val and val_so_far that are significant
3062 in the result mode, to avoid sign-/zero-extension confusion. */
3063 nmode = GET_MODE_INNER (mode);
3064 if (nmode == VOIDmode)
3065 nmode = mode;
3066 val &= GET_MODE_MASK (nmode);
3067 val_so_far &= GET_MODE_MASK (nmode);
3068 gcc_assert (val == val_so_far);
3069
3070 return accum;
3071 }
3072
3073 /* Perform a multiplication and return an rtx for the result.
3074 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3075 TARGET is a suggestion for where to store the result (an rtx).
3076
3077 We check specially for a constant integer as OP1.
3078 If you want this check for OP0 as well, then before calling
3079 you should swap the two operands if OP0 would be constant. */
3080
3081 rtx
3082 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3083 int unsignedp)
3084 {
3085 enum mult_variant variant;
3086 struct algorithm algorithm;
3087 rtx scalar_op1;
3088 int max_cost;
3089 bool speed = optimize_insn_for_speed_p ();
3090 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3091
3092 if (CONSTANT_P (op0))
3093 {
3094 rtx temp = op0;
3095 op0 = op1;
3096 op1 = temp;
3097 }
3098
3099 /* For vectors, there are several simplifications that can be made if
3100 all elements of the vector constant are identical. */
3101 scalar_op1 = op1;
3102 if (GET_CODE (op1) == CONST_VECTOR)
3103 {
3104 int i, n = CONST_VECTOR_NUNITS (op1);
3105 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3106 for (i = 1; i < n; ++i)
3107 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3108 goto skip_scalar;
3109 }
3110
3111 if (INTEGRAL_MODE_P (mode))
3112 {
3113 rtx fake_reg;
3114 HOST_WIDE_INT coeff;
3115 bool is_neg;
3116 int mode_bitsize;
3117
3118 if (op1 == CONST0_RTX (mode))
3119 return op1;
3120 if (op1 == CONST1_RTX (mode))
3121 return op0;
3122 if (op1 == CONSTM1_RTX (mode))
3123 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3124 op0, target, 0);
3125
3126 if (do_trapv)
3127 goto skip_synth;
3128
3129 /* If mode is integer vector mode, check if the backend supports
3130 vector lshift (by scalar or vector) at all. If not, we can't use
3131 synthetized multiply. */
3132 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3133 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3134 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3135 goto skip_synth;
3136
3137 /* These are the operations that are potentially turned into
3138 a sequence of shifts and additions. */
3139 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3140
3141 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3142 less than or equal in size to `unsigned int' this doesn't matter.
3143 If the mode is larger than `unsigned int', then synth_mult works
3144 only if the constant value exactly fits in an `unsigned int' without
3145 any truncation. This means that multiplying by negative values does
3146 not work; results are off by 2^32 on a 32 bit machine. */
3147 if (CONST_INT_P (scalar_op1))
3148 {
3149 coeff = INTVAL (scalar_op1);
3150 is_neg = coeff < 0;
3151 }
3152 #if TARGET_SUPPORTS_WIDE_INT
3153 else if (CONST_WIDE_INT_P (scalar_op1))
3154 #else
3155 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3156 #endif
3157 {
3158 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3159 /* Perfect power of 2 (other than 1, which is handled above). */
3160 if (shift > 0)
3161 return expand_shift (LSHIFT_EXPR, mode, op0,
3162 shift, target, unsignedp);
3163 else
3164 goto skip_synth;
3165 }
3166 else
3167 goto skip_synth;
3168
3169 /* We used to test optimize here, on the grounds that it's better to
3170 produce a smaller program when -O is not used. But this causes
3171 such a terrible slowdown sometimes that it seems better to always
3172 use synth_mult. */
3173
3174 /* Special case powers of two. */
3175 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3176 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3177 return expand_shift (LSHIFT_EXPR, mode, op0,
3178 floor_log2 (coeff), target, unsignedp);
3179
3180 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3181
3182 /* Attempt to handle multiplication of DImode values by negative
3183 coefficients, by performing the multiplication by a positive
3184 multiplier and then inverting the result. */
3185 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3186 {
3187 /* Its safe to use -coeff even for INT_MIN, as the
3188 result is interpreted as an unsigned coefficient.
3189 Exclude cost of op0 from max_cost to match the cost
3190 calculation of the synth_mult. */
3191 coeff = -(unsigned HOST_WIDE_INT) coeff;
3192 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3193 - neg_cost (speed, mode));
3194 if (max_cost <= 0)
3195 goto skip_synth;
3196
3197 /* Special case powers of two. */
3198 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3199 {
3200 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3201 floor_log2 (coeff), target, unsignedp);
3202 return expand_unop (mode, neg_optab, temp, target, 0);
3203 }
3204
3205 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3206 max_cost))
3207 {
3208 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3209 &algorithm, variant);
3210 return expand_unop (mode, neg_optab, temp, target, 0);
3211 }
3212 goto skip_synth;
3213 }
3214
3215 /* Exclude cost of op0 from max_cost to match the cost
3216 calculation of the synth_mult. */
3217 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3218 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3219 return expand_mult_const (mode, op0, coeff, target,
3220 &algorithm, variant);
3221 }
3222 skip_synth:
3223
3224 /* Expand x*2.0 as x+x. */
3225 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3226 {
3227 REAL_VALUE_TYPE d;
3228 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3229
3230 if (REAL_VALUES_EQUAL (d, dconst2))
3231 {
3232 op0 = force_reg (GET_MODE (op0), op0);
3233 return expand_binop (mode, add_optab, op0, op0,
3234 target, unsignedp, OPTAB_LIB_WIDEN);
3235 }
3236 }
3237 skip_scalar:
3238
3239 /* This used to use umul_optab if unsigned, but for non-widening multiply
3240 there is no difference between signed and unsigned. */
3241 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3242 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3243 gcc_assert (op0);
3244 return op0;
3245 }
3246
3247 /* Return a cost estimate for multiplying a register by the given
3248 COEFFicient in the given MODE and SPEED. */
3249
3250 int
3251 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3252 {
3253 int max_cost;
3254 struct algorithm algorithm;
3255 enum mult_variant variant;
3256
3257 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3258 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3259 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3260 return algorithm.cost.cost;
3261 else
3262 return max_cost;
3263 }
3264
3265 /* Perform a widening multiplication and return an rtx for the result.
3266 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3267 TARGET is a suggestion for where to store the result (an rtx).
3268 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3269 or smul_widen_optab.
3270
3271 We check specially for a constant integer as OP1, comparing the
3272 cost of a widening multiply against the cost of a sequence of shifts
3273 and adds. */
3274
3275 rtx
3276 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3277 int unsignedp, optab this_optab)
3278 {
3279 bool speed = optimize_insn_for_speed_p ();
3280 rtx cop1;
3281
3282 if (CONST_INT_P (op1)
3283 && GET_MODE (op0) != VOIDmode
3284 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3285 this_optab == umul_widen_optab))
3286 && CONST_INT_P (cop1)
3287 && (INTVAL (cop1) >= 0
3288 || HWI_COMPUTABLE_MODE_P (mode)))
3289 {
3290 HOST_WIDE_INT coeff = INTVAL (cop1);
3291 int max_cost;
3292 enum mult_variant variant;
3293 struct algorithm algorithm;
3294
3295 /* Special case powers of two. */
3296 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3297 {
3298 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3299 return expand_shift (LSHIFT_EXPR, mode, op0,
3300 floor_log2 (coeff), target, unsignedp);
3301 }
3302
3303 /* Exclude cost of op0 from max_cost to match the cost
3304 calculation of the synth_mult. */
3305 max_cost = mul_widen_cost (speed, mode);
3306 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3307 max_cost))
3308 {
3309 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3310 return expand_mult_const (mode, op0, coeff, target,
3311 &algorithm, variant);
3312 }
3313 }
3314 return expand_binop (mode, this_optab, op0, op1, target,
3315 unsignedp, OPTAB_LIB_WIDEN);
3316 }
3317 \f
3318 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3319 replace division by D, and put the least significant N bits of the result
3320 in *MULTIPLIER_PTR and return the most significant bit.
3321
3322 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3323 needed precision is in PRECISION (should be <= N).
3324
3325 PRECISION should be as small as possible so this function can choose
3326 multiplier more freely.
3327
3328 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3329 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3330
3331 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3332 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3333
3334 unsigned HOST_WIDE_INT
3335 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3336 unsigned HOST_WIDE_INT *multiplier_ptr,
3337 int *post_shift_ptr, int *lgup_ptr)
3338 {
3339 int lgup, post_shift;
3340 int pow, pow2;
3341
3342 /* lgup = ceil(log2(divisor)); */
3343 lgup = ceil_log2 (d);
3344
3345 gcc_assert (lgup <= n);
3346
3347 pow = n + lgup;
3348 pow2 = n + lgup - precision;
3349
3350 /* mlow = 2^(N + lgup)/d */
3351 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3352 wide_int mlow = wi::udiv_trunc (val, d);
3353
3354 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3355 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3356 wide_int mhigh = wi::udiv_trunc (val, d);
3357
3358 /* If precision == N, then mlow, mhigh exceed 2^N
3359 (but they do not exceed 2^(N+1)). */
3360
3361 /* Reduce to lowest terms. */
3362 for (post_shift = lgup; post_shift > 0; post_shift--)
3363 {
3364 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3365 HOST_BITS_PER_WIDE_INT);
3366 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3367 HOST_BITS_PER_WIDE_INT);
3368 if (ml_lo >= mh_lo)
3369 break;
3370
3371 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3372 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3373 }
3374
3375 *post_shift_ptr = post_shift;
3376 *lgup_ptr = lgup;
3377 if (n < HOST_BITS_PER_WIDE_INT)
3378 {
3379 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3380 *multiplier_ptr = mhigh.to_uhwi () & mask;
3381 return mhigh.to_uhwi () >= mask;
3382 }
3383 else
3384 {
3385 *multiplier_ptr = mhigh.to_uhwi ();
3386 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3387 }
3388 }
3389
3390 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3391 congruent to 1 (mod 2**N). */
3392
3393 static unsigned HOST_WIDE_INT
3394 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3395 {
3396 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3397
3398 /* The algorithm notes that the choice y = x satisfies
3399 x*y == 1 mod 2^3, since x is assumed odd.
3400 Each iteration doubles the number of bits of significance in y. */
3401
3402 unsigned HOST_WIDE_INT mask;
3403 unsigned HOST_WIDE_INT y = x;
3404 int nbit = 3;
3405
3406 mask = (n == HOST_BITS_PER_WIDE_INT
3407 ? ~(unsigned HOST_WIDE_INT) 0
3408 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3409
3410 while (nbit < n)
3411 {
3412 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3413 nbit *= 2;
3414 }
3415 return y;
3416 }
3417
3418 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3419 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3420 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3421 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3422 become signed.
3423
3424 The result is put in TARGET if that is convenient.
3425
3426 MODE is the mode of operation. */
3427
3428 rtx
3429 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3430 rtx op1, rtx target, int unsignedp)
3431 {
3432 rtx tem;
3433 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3434
3435 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3436 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3437 tem = expand_and (mode, tem, op1, NULL_RTX);
3438 adj_operand
3439 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3440 adj_operand);
3441
3442 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3443 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3444 tem = expand_and (mode, tem, op0, NULL_RTX);
3445 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3446 target);
3447
3448 return target;
3449 }
3450
3451 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3452
3453 static rtx
3454 extract_high_half (enum machine_mode mode, rtx op)
3455 {
3456 enum machine_mode wider_mode;
3457
3458 if (mode == word_mode)
3459 return gen_highpart (mode, op);
3460
3461 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3462
3463 wider_mode = GET_MODE_WIDER_MODE (mode);
3464 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3465 GET_MODE_BITSIZE (mode), 0, 1);
3466 return convert_modes (mode, wider_mode, op, 0);
3467 }
3468
3469 /* Like expmed_mult_highpart, but only consider using a multiplication
3470 optab. OP1 is an rtx for the constant operand. */
3471
3472 static rtx
3473 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3474 rtx target, int unsignedp, int max_cost)
3475 {
3476 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3477 enum machine_mode wider_mode;
3478 optab moptab;
3479 rtx tem;
3480 int size;
3481 bool speed = optimize_insn_for_speed_p ();
3482
3483 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3484
3485 wider_mode = GET_MODE_WIDER_MODE (mode);
3486 size = GET_MODE_BITSIZE (mode);
3487
3488 /* Firstly, try using a multiplication insn that only generates the needed
3489 high part of the product, and in the sign flavor of unsignedp. */
3490 if (mul_highpart_cost (speed, mode) < max_cost)
3491 {
3492 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3493 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3494 unsignedp, OPTAB_DIRECT);
3495 if (tem)
3496 return tem;
3497 }
3498
3499 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3500 Need to adjust the result after the multiplication. */
3501 if (size - 1 < BITS_PER_WORD
3502 && (mul_highpart_cost (speed, mode)
3503 + 2 * shift_cost (speed, mode, size-1)
3504 + 4 * add_cost (speed, mode) < max_cost))
3505 {
3506 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3507 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3508 unsignedp, OPTAB_DIRECT);
3509 if (tem)
3510 /* We used the wrong signedness. Adjust the result. */
3511 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3512 tem, unsignedp);
3513 }
3514
3515 /* Try widening multiplication. */
3516 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3517 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3518 && mul_widen_cost (speed, wider_mode) < max_cost)
3519 {
3520 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3521 unsignedp, OPTAB_WIDEN);
3522 if (tem)
3523 return extract_high_half (mode, tem);
3524 }
3525
3526 /* Try widening the mode and perform a non-widening multiplication. */
3527 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3528 && size - 1 < BITS_PER_WORD
3529 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3530 < max_cost))
3531 {
3532 rtx insns, wop0, wop1;
3533
3534 /* We need to widen the operands, for example to ensure the
3535 constant multiplier is correctly sign or zero extended.
3536 Use a sequence to clean-up any instructions emitted by
3537 the conversions if things don't work out. */
3538 start_sequence ();
3539 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3540 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3541 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3542 unsignedp, OPTAB_WIDEN);
3543 insns = get_insns ();
3544 end_sequence ();
3545
3546 if (tem)
3547 {
3548 emit_insn (insns);
3549 return extract_high_half (mode, tem);
3550 }
3551 }
3552
3553 /* Try widening multiplication of opposite signedness, and adjust. */
3554 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3555 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3556 && size - 1 < BITS_PER_WORD
3557 && (mul_widen_cost (speed, wider_mode)
3558 + 2 * shift_cost (speed, mode, size-1)
3559 + 4 * add_cost (speed, mode) < max_cost))
3560 {
3561 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3562 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3563 if (tem != 0)
3564 {
3565 tem = extract_high_half (mode, tem);
3566 /* We used the wrong signedness. Adjust the result. */
3567 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3568 target, unsignedp);
3569 }
3570 }
3571
3572 return 0;
3573 }
3574
3575 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3576 putting the high half of the result in TARGET if that is convenient,
3577 and return where the result is. If the operation can not be performed,
3578 0 is returned.
3579
3580 MODE is the mode of operation and result.
3581
3582 UNSIGNEDP nonzero means unsigned multiply.
3583
3584 MAX_COST is the total allowed cost for the expanded RTL. */
3585
3586 static rtx
3587 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3588 rtx target, int unsignedp, int max_cost)
3589 {
3590 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3591 unsigned HOST_WIDE_INT cnst1;
3592 int extra_cost;
3593 bool sign_adjust = false;
3594 enum mult_variant variant;
3595 struct algorithm alg;
3596 rtx tem;
3597 bool speed = optimize_insn_for_speed_p ();
3598
3599 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3600 /* We can't support modes wider than HOST_BITS_PER_INT. */
3601 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3602
3603 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3604
3605 /* We can't optimize modes wider than BITS_PER_WORD.
3606 ??? We might be able to perform double-word arithmetic if
3607 mode == word_mode, however all the cost calculations in
3608 synth_mult etc. assume single-word operations. */
3609 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3610 return expmed_mult_highpart_optab (mode, op0, op1, target,
3611 unsignedp, max_cost);
3612
3613 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3614
3615 /* Check whether we try to multiply by a negative constant. */
3616 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3617 {
3618 sign_adjust = true;
3619 extra_cost += add_cost (speed, mode);
3620 }
3621
3622 /* See whether shift/add multiplication is cheap enough. */
3623 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3624 max_cost - extra_cost))
3625 {
3626 /* See whether the specialized multiplication optabs are
3627 cheaper than the shift/add version. */
3628 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3629 alg.cost.cost + extra_cost);
3630 if (tem)
3631 return tem;
3632
3633 tem = convert_to_mode (wider_mode, op0, unsignedp);
3634 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3635 tem = extract_high_half (mode, tem);
3636
3637 /* Adjust result for signedness. */
3638 if (sign_adjust)
3639 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3640
3641 return tem;
3642 }
3643 return expmed_mult_highpart_optab (mode, op0, op1, target,
3644 unsignedp, max_cost);
3645 }
3646
3647
3648 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3649
3650 static rtx
3651 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3652 {
3653 rtx result, temp, shift, label;
3654 int logd;
3655 int prec = GET_MODE_PRECISION (mode);
3656
3657 logd = floor_log2 (d);
3658 result = gen_reg_rtx (mode);
3659
3660 /* Avoid conditional branches when they're expensive. */
3661 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3662 && optimize_insn_for_speed_p ())
3663 {
3664 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3665 mode, 0, -1);
3666 if (signmask)
3667 {
3668 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3669 signmask = force_reg (mode, signmask);
3670 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3671
3672 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3673 which instruction sequence to use. If logical right shifts
3674 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3675 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3676
3677 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3678 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3679 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3680 > COSTS_N_INSNS (2)))
3681 {
3682 temp = expand_binop (mode, xor_optab, op0, signmask,
3683 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684 temp = expand_binop (mode, sub_optab, temp, signmask,
3685 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3686 temp = expand_binop (mode, and_optab, temp,
3687 gen_int_mode (masklow, mode),
3688 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3689 temp = expand_binop (mode, xor_optab, temp, signmask,
3690 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3691 temp = expand_binop (mode, sub_optab, temp, signmask,
3692 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3693 }
3694 else
3695 {
3696 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3697 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3698 signmask = force_reg (mode, signmask);
3699
3700 temp = expand_binop (mode, add_optab, op0, signmask,
3701 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3702 temp = expand_binop (mode, and_optab, temp,
3703 gen_int_mode (masklow, mode),
3704 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3705 temp = expand_binop (mode, sub_optab, temp, signmask,
3706 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3707 }
3708 return temp;
3709 }
3710 }
3711
3712 /* Mask contains the mode's signbit and the significant bits of the
3713 modulus. By including the signbit in the operation, many targets
3714 can avoid an explicit compare operation in the following comparison
3715 against zero. */
3716 wide_int mask = wi::mask (logd, false, prec);
3717 mask = wi::set_bit (mask, prec - 1);
3718
3719 temp = expand_binop (mode, and_optab, op0,
3720 immed_wide_int_const (mask, mode),
3721 result, 1, OPTAB_LIB_WIDEN);
3722 if (temp != result)
3723 emit_move_insn (result, temp);
3724
3725 label = gen_label_rtx ();
3726 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3727
3728 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3729 0, OPTAB_LIB_WIDEN);
3730
3731 mask = wi::mask (logd, true, prec);
3732 temp = expand_binop (mode, ior_optab, temp,
3733 immed_wide_int_const (mask, mode),
3734 result, 1, OPTAB_LIB_WIDEN);
3735 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3736 0, OPTAB_LIB_WIDEN);
3737 if (temp != result)
3738 emit_move_insn (result, temp);
3739 emit_label (label);
3740 return result;
3741 }
3742
3743 /* Expand signed division of OP0 by a power of two D in mode MODE.
3744 This routine is only called for positive values of D. */
3745
3746 static rtx
3747 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3748 {
3749 rtx temp, label;
3750 int logd;
3751
3752 logd = floor_log2 (d);
3753
3754 if (d == 2
3755 && BRANCH_COST (optimize_insn_for_speed_p (),
3756 false) >= 1)
3757 {
3758 temp = gen_reg_rtx (mode);
3759 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3760 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3761 0, OPTAB_LIB_WIDEN);
3762 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3763 }
3764
3765 #ifdef HAVE_conditional_move
3766 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3767 >= 2)
3768 {
3769 rtx temp2;
3770
3771 start_sequence ();
3772 temp2 = copy_to_mode_reg (mode, op0);
3773 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3774 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3775 temp = force_reg (mode, temp);
3776
3777 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3778 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3779 mode, temp, temp2, mode, 0);
3780 if (temp2)
3781 {
3782 rtx seq = get_insns ();
3783 end_sequence ();
3784 emit_insn (seq);
3785 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3786 }
3787 end_sequence ();
3788 }
3789 #endif
3790
3791 if (BRANCH_COST (optimize_insn_for_speed_p (),
3792 false) >= 2)
3793 {
3794 int ushift = GET_MODE_BITSIZE (mode) - logd;
3795
3796 temp = gen_reg_rtx (mode);
3797 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3798 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3799 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3800 > COSTS_N_INSNS (1))
3801 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3802 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3803 else
3804 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3805 ushift, NULL_RTX, 1);
3806 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3807 0, OPTAB_LIB_WIDEN);
3808 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3809 }
3810
3811 label = gen_label_rtx ();
3812 temp = copy_to_mode_reg (mode, op0);
3813 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3814 expand_inc (temp, gen_int_mode (d - 1, mode));
3815 emit_label (label);
3816 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3817 }
3818 \f
3819 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3820 if that is convenient, and returning where the result is.
3821 You may request either the quotient or the remainder as the result;
3822 specify REM_FLAG nonzero to get the remainder.
3823
3824 CODE is the expression code for which kind of division this is;
3825 it controls how rounding is done. MODE is the machine mode to use.
3826 UNSIGNEDP nonzero means do unsigned division. */
3827
3828 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3829 and then correct it by or'ing in missing high bits
3830 if result of ANDI is nonzero.
3831 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3832 This could optimize to a bfexts instruction.
3833 But C doesn't use these operations, so their optimizations are
3834 left for later. */
3835 /* ??? For modulo, we don't actually need the highpart of the first product,
3836 the low part will do nicely. And for small divisors, the second multiply
3837 can also be a low-part only multiply or even be completely left out.
3838 E.g. to calculate the remainder of a division by 3 with a 32 bit
3839 multiply, multiply with 0x55555556 and extract the upper two bits;
3840 the result is exact for inputs up to 0x1fffffff.
3841 The input range can be reduced by using cross-sum rules.
3842 For odd divisors >= 3, the following table gives right shift counts
3843 so that if a number is shifted by an integer multiple of the given
3844 amount, the remainder stays the same:
3845 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3846 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3847 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3848 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3849 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3850
3851 Cross-sum rules for even numbers can be derived by leaving as many bits
3852 to the right alone as the divisor has zeros to the right.
3853 E.g. if x is an unsigned 32 bit number:
3854 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3855 */
3856
3857 rtx
3858 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3859 rtx op0, rtx op1, rtx target, int unsignedp)
3860 {
3861 enum machine_mode compute_mode;
3862 rtx tquotient;
3863 rtx quotient = 0, remainder = 0;
3864 rtx last;
3865 int size;
3866 rtx insn;
3867 optab optab1, optab2;
3868 int op1_is_constant, op1_is_pow2 = 0;
3869 int max_cost, extra_cost;
3870 static HOST_WIDE_INT last_div_const = 0;
3871 bool speed = optimize_insn_for_speed_p ();
3872
3873 op1_is_constant = CONST_INT_P (op1);
3874 if (op1_is_constant)
3875 {
3876 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3877 if (unsignedp)
3878 ext_op1 &= GET_MODE_MASK (mode);
3879 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3880 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3881 }
3882
3883 /*
3884 This is the structure of expand_divmod:
3885
3886 First comes code to fix up the operands so we can perform the operations
3887 correctly and efficiently.
3888
3889 Second comes a switch statement with code specific for each rounding mode.
3890 For some special operands this code emits all RTL for the desired
3891 operation, for other cases, it generates only a quotient and stores it in
3892 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3893 to indicate that it has not done anything.
3894
3895 Last comes code that finishes the operation. If QUOTIENT is set and
3896 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3897 QUOTIENT is not set, it is computed using trunc rounding.
3898
3899 We try to generate special code for division and remainder when OP1 is a
3900 constant. If |OP1| = 2**n we can use shifts and some other fast
3901 operations. For other values of OP1, we compute a carefully selected
3902 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3903 by m.
3904
3905 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3906 half of the product. Different strategies for generating the product are
3907 implemented in expmed_mult_highpart.
3908
3909 If what we actually want is the remainder, we generate that by another
3910 by-constant multiplication and a subtraction. */
3911
3912 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3913 code below will malfunction if we are, so check here and handle
3914 the special case if so. */
3915 if (op1 == const1_rtx)
3916 return rem_flag ? const0_rtx : op0;
3917
3918 /* When dividing by -1, we could get an overflow.
3919 negv_optab can handle overflows. */
3920 if (! unsignedp && op1 == constm1_rtx)
3921 {
3922 if (rem_flag)
3923 return const0_rtx;
3924 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3925 ? negv_optab : neg_optab, op0, target, 0);
3926 }
3927
3928 if (target
3929 /* Don't use the function value register as a target
3930 since we have to read it as well as write it,
3931 and function-inlining gets confused by this. */
3932 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3933 /* Don't clobber an operand while doing a multi-step calculation. */
3934 || ((rem_flag || op1_is_constant)
3935 && (reg_mentioned_p (target, op0)
3936 || (MEM_P (op0) && MEM_P (target))))
3937 || reg_mentioned_p (target, op1)
3938 || (MEM_P (op1) && MEM_P (target))))
3939 target = 0;
3940
3941 /* Get the mode in which to perform this computation. Normally it will
3942 be MODE, but sometimes we can't do the desired operation in MODE.
3943 If so, pick a wider mode in which we can do the operation. Convert
3944 to that mode at the start to avoid repeated conversions.
3945
3946 First see what operations we need. These depend on the expression
3947 we are evaluating. (We assume that divxx3 insns exist under the
3948 same conditions that modxx3 insns and that these insns don't normally
3949 fail. If these assumptions are not correct, we may generate less
3950 efficient code in some cases.)
3951
3952 Then see if we find a mode in which we can open-code that operation
3953 (either a division, modulus, or shift). Finally, check for the smallest
3954 mode for which we can do the operation with a library call. */
3955
3956 /* We might want to refine this now that we have division-by-constant
3957 optimization. Since expmed_mult_highpart tries so many variants, it is
3958 not straightforward to generalize this. Maybe we should make an array
3959 of possible modes in init_expmed? Save this for GCC 2.7. */
3960
3961 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3962 ? (unsignedp ? lshr_optab : ashr_optab)
3963 : (unsignedp ? udiv_optab : sdiv_optab));
3964 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3965 ? optab1
3966 : (unsignedp ? udivmod_optab : sdivmod_optab));
3967
3968 for (compute_mode = mode; compute_mode != VOIDmode;
3969 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3970 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3971 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3972 break;
3973
3974 if (compute_mode == VOIDmode)
3975 for (compute_mode = mode; compute_mode != VOIDmode;
3976 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3977 if (optab_libfunc (optab1, compute_mode)
3978 || optab_libfunc (optab2, compute_mode))
3979 break;
3980
3981 /* If we still couldn't find a mode, use MODE, but expand_binop will
3982 probably die. */
3983 if (compute_mode == VOIDmode)
3984 compute_mode = mode;
3985
3986 if (target && GET_MODE (target) == compute_mode)
3987 tquotient = target;
3988 else
3989 tquotient = gen_reg_rtx (compute_mode);
3990
3991 size = GET_MODE_BITSIZE (compute_mode);
3992 #if 0
3993 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3994 (mode), and thereby get better code when OP1 is a constant. Do that
3995 later. It will require going over all usages of SIZE below. */
3996 size = GET_MODE_BITSIZE (mode);
3997 #endif
3998
3999 /* Only deduct something for a REM if the last divide done was
4000 for a different constant. Then set the constant of the last
4001 divide. */
4002 max_cost = (unsignedp
4003 ? udiv_cost (speed, compute_mode)
4004 : sdiv_cost (speed, compute_mode));
4005 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4006 && INTVAL (op1) == last_div_const))
4007 max_cost -= (mul_cost (speed, compute_mode)
4008 + add_cost (speed, compute_mode));
4009
4010 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4011
4012 /* Now convert to the best mode to use. */
4013 if (compute_mode != mode)
4014 {
4015 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4016 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4017
4018 /* convert_modes may have placed op1 into a register, so we
4019 must recompute the following. */
4020 op1_is_constant = CONST_INT_P (op1);
4021 op1_is_pow2 = (op1_is_constant
4022 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4023 || (! unsignedp
4024 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4025 }
4026
4027 /* If one of the operands is a volatile MEM, copy it into a register. */
4028
4029 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4030 op0 = force_reg (compute_mode, op0);
4031 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4032 op1 = force_reg (compute_mode, op1);
4033
4034 /* If we need the remainder or if OP1 is constant, we need to
4035 put OP0 in a register in case it has any queued subexpressions. */
4036 if (rem_flag || op1_is_constant)
4037 op0 = force_reg (compute_mode, op0);
4038
4039 last = get_last_insn ();
4040
4041 /* Promote floor rounding to trunc rounding for unsigned operations. */
4042 if (unsignedp)
4043 {
4044 if (code == FLOOR_DIV_EXPR)
4045 code = TRUNC_DIV_EXPR;
4046 if (code == FLOOR_MOD_EXPR)
4047 code = TRUNC_MOD_EXPR;
4048 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4049 code = TRUNC_DIV_EXPR;
4050 }
4051
4052 if (op1 != const0_rtx)
4053 switch (code)
4054 {
4055 case TRUNC_MOD_EXPR:
4056 case TRUNC_DIV_EXPR:
4057 if (op1_is_constant)
4058 {
4059 if (unsignedp)
4060 {
4061 unsigned HOST_WIDE_INT mh, ml;
4062 int pre_shift, post_shift;
4063 int dummy;
4064 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4065 & GET_MODE_MASK (compute_mode));
4066
4067 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4068 {
4069 pre_shift = floor_log2 (d);
4070 if (rem_flag)
4071 {
4072 unsigned HOST_WIDE_INT mask
4073 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4074 remainder
4075 = expand_binop (compute_mode, and_optab, op0,
4076 gen_int_mode (mask, compute_mode),
4077 remainder, 1,
4078 OPTAB_LIB_WIDEN);
4079 if (remainder)
4080 return gen_lowpart (mode, remainder);
4081 }
4082 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4083 pre_shift, tquotient, 1);
4084 }
4085 else if (size <= HOST_BITS_PER_WIDE_INT)
4086 {
4087 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4088 {
4089 /* Most significant bit of divisor is set; emit an scc
4090 insn. */
4091 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4092 compute_mode, 1, 1);
4093 }
4094 else
4095 {
4096 /* Find a suitable multiplier and right shift count
4097 instead of multiplying with D. */
4098
4099 mh = choose_multiplier (d, size, size,
4100 &ml, &post_shift, &dummy);
4101
4102 /* If the suggested multiplier is more than SIZE bits,
4103 we can do better for even divisors, using an
4104 initial right shift. */
4105 if (mh != 0 && (d & 1) == 0)
4106 {
4107 pre_shift = floor_log2 (d & -d);
4108 mh = choose_multiplier (d >> pre_shift, size,
4109 size - pre_shift,
4110 &ml, &post_shift, &dummy);
4111 gcc_assert (!mh);
4112 }
4113 else
4114 pre_shift = 0;
4115
4116 if (mh != 0)
4117 {
4118 rtx t1, t2, t3, t4;
4119
4120 if (post_shift - 1 >= BITS_PER_WORD)
4121 goto fail1;
4122
4123 extra_cost
4124 = (shift_cost (speed, compute_mode, post_shift - 1)
4125 + shift_cost (speed, compute_mode, 1)
4126 + 2 * add_cost (speed, compute_mode));
4127 t1 = expmed_mult_highpart
4128 (compute_mode, op0,
4129 gen_int_mode (ml, compute_mode),
4130 NULL_RTX, 1, max_cost - extra_cost);
4131 if (t1 == 0)
4132 goto fail1;
4133 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4134 op0, t1),
4135 NULL_RTX);
4136 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4137 t2, 1, NULL_RTX, 1);
4138 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4139 t1, t3),
4140 NULL_RTX);
4141 quotient = expand_shift
4142 (RSHIFT_EXPR, compute_mode, t4,
4143 post_shift - 1, tquotient, 1);
4144 }
4145 else
4146 {
4147 rtx t1, t2;
4148
4149 if (pre_shift >= BITS_PER_WORD
4150 || post_shift >= BITS_PER_WORD)
4151 goto fail1;
4152
4153 t1 = expand_shift
4154 (RSHIFT_EXPR, compute_mode, op0,
4155 pre_shift, NULL_RTX, 1);
4156 extra_cost
4157 = (shift_cost (speed, compute_mode, pre_shift)
4158 + shift_cost (speed, compute_mode, post_shift));
4159 t2 = expmed_mult_highpart
4160 (compute_mode, t1,
4161 gen_int_mode (ml, compute_mode),
4162 NULL_RTX, 1, max_cost - extra_cost);
4163 if (t2 == 0)
4164 goto fail1;
4165 quotient = expand_shift
4166 (RSHIFT_EXPR, compute_mode, t2,
4167 post_shift, tquotient, 1);
4168 }
4169 }
4170 }
4171 else /* Too wide mode to use tricky code */
4172 break;
4173
4174 insn = get_last_insn ();
4175 if (insn != last)
4176 set_dst_reg_note (insn, REG_EQUAL,
4177 gen_rtx_UDIV (compute_mode, op0, op1),
4178 quotient);
4179 }
4180 else /* TRUNC_DIV, signed */
4181 {
4182 unsigned HOST_WIDE_INT ml;
4183 int lgup, post_shift;
4184 rtx mlr;
4185 HOST_WIDE_INT d = INTVAL (op1);
4186 unsigned HOST_WIDE_INT abs_d;
4187
4188 /* Since d might be INT_MIN, we have to cast to
4189 unsigned HOST_WIDE_INT before negating to avoid
4190 undefined signed overflow. */
4191 abs_d = (d >= 0
4192 ? (unsigned HOST_WIDE_INT) d
4193 : - (unsigned HOST_WIDE_INT) d);
4194
4195 /* n rem d = n rem -d */
4196 if (rem_flag && d < 0)
4197 {
4198 d = abs_d;
4199 op1 = gen_int_mode (abs_d, compute_mode);
4200 }
4201
4202 if (d == 1)
4203 quotient = op0;
4204 else if (d == -1)
4205 quotient = expand_unop (compute_mode, neg_optab, op0,
4206 tquotient, 0);
4207 else if (HOST_BITS_PER_WIDE_INT >= size
4208 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4209 {
4210 /* This case is not handled correctly below. */
4211 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4212 compute_mode, 1, 1);
4213 if (quotient == 0)
4214 goto fail1;
4215 }
4216 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4217 && (rem_flag
4218 ? smod_pow2_cheap (speed, compute_mode)
4219 : sdiv_pow2_cheap (speed, compute_mode))
4220 /* We assume that cheap metric is true if the
4221 optab has an expander for this mode. */
4222 && ((optab_handler ((rem_flag ? smod_optab
4223 : sdiv_optab),
4224 compute_mode)
4225 != CODE_FOR_nothing)
4226 || (optab_handler (sdivmod_optab,
4227 compute_mode)
4228 != CODE_FOR_nothing)))
4229 ;
4230 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4231 {
4232 if (rem_flag)
4233 {
4234 remainder = expand_smod_pow2 (compute_mode, op0, d);
4235 if (remainder)
4236 return gen_lowpart (mode, remainder);
4237 }
4238
4239 if (sdiv_pow2_cheap (speed, compute_mode)
4240 && ((optab_handler (sdiv_optab, compute_mode)
4241 != CODE_FOR_nothing)
4242 || (optab_handler (sdivmod_optab, compute_mode)
4243 != CODE_FOR_nothing)))
4244 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4245 compute_mode, op0,
4246 gen_int_mode (abs_d,
4247 compute_mode),
4248 NULL_RTX, 0);
4249 else
4250 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4251
4252 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4253 negate the quotient. */
4254 if (d < 0)
4255 {
4256 insn = get_last_insn ();
4257 if (insn != last
4258 && abs_d < ((unsigned HOST_WIDE_INT) 1
4259 << (HOST_BITS_PER_WIDE_INT - 1)))
4260 set_dst_reg_note (insn, REG_EQUAL,
4261 gen_rtx_DIV (compute_mode, op0,
4262 gen_int_mode
4263 (abs_d,
4264 compute_mode)),
4265 quotient);
4266
4267 quotient = expand_unop (compute_mode, neg_optab,
4268 quotient, quotient, 0);
4269 }
4270 }
4271 else if (size <= HOST_BITS_PER_WIDE_INT)
4272 {
4273 choose_multiplier (abs_d, size, size - 1,
4274 &ml, &post_shift, &lgup);
4275 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4276 {
4277 rtx t1, t2, t3;
4278
4279 if (post_shift >= BITS_PER_WORD
4280 || size - 1 >= BITS_PER_WORD)
4281 goto fail1;
4282
4283 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4284 + shift_cost (speed, compute_mode, size - 1)
4285 + add_cost (speed, compute_mode));
4286 t1 = expmed_mult_highpart
4287 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4288 NULL_RTX, 0, max_cost - extra_cost);
4289 if (t1 == 0)
4290 goto fail1;
4291 t2 = expand_shift
4292 (RSHIFT_EXPR, compute_mode, t1,
4293 post_shift, NULL_RTX, 0);
4294 t3 = expand_shift
4295 (RSHIFT_EXPR, compute_mode, op0,
4296 size - 1, NULL_RTX, 0);
4297 if (d < 0)
4298 quotient
4299 = force_operand (gen_rtx_MINUS (compute_mode,
4300 t3, t2),
4301 tquotient);
4302 else
4303 quotient
4304 = force_operand (gen_rtx_MINUS (compute_mode,
4305 t2, t3),
4306 tquotient);
4307 }
4308 else
4309 {
4310 rtx t1, t2, t3, t4;
4311
4312 if (post_shift >= BITS_PER_WORD
4313 || size - 1 >= BITS_PER_WORD)
4314 goto fail1;
4315
4316 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4317 mlr = gen_int_mode (ml, compute_mode);
4318 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4319 + shift_cost (speed, compute_mode, size - 1)
4320 + 2 * add_cost (speed, compute_mode));
4321 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4322 NULL_RTX, 0,
4323 max_cost - extra_cost);
4324 if (t1 == 0)
4325 goto fail1;
4326 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4327 t1, op0),
4328 NULL_RTX);
4329 t3 = expand_shift
4330 (RSHIFT_EXPR, compute_mode, t2,
4331 post_shift, NULL_RTX, 0);
4332 t4 = expand_shift
4333 (RSHIFT_EXPR, compute_mode, op0,
4334 size - 1, NULL_RTX, 0);
4335 if (d < 0)
4336 quotient
4337 = force_operand (gen_rtx_MINUS (compute_mode,
4338 t4, t3),
4339 tquotient);
4340 else
4341 quotient
4342 = force_operand (gen_rtx_MINUS (compute_mode,
4343 t3, t4),
4344 tquotient);
4345 }
4346 }
4347 else /* Too wide mode to use tricky code */
4348 break;
4349
4350 insn = get_last_insn ();
4351 if (insn != last)
4352 set_dst_reg_note (insn, REG_EQUAL,
4353 gen_rtx_DIV (compute_mode, op0, op1),
4354 quotient);
4355 }
4356 break;
4357 }
4358 fail1:
4359 delete_insns_since (last);
4360 break;
4361
4362 case FLOOR_DIV_EXPR:
4363 case FLOOR_MOD_EXPR:
4364 /* We will come here only for signed operations. */
4365 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4366 {
4367 unsigned HOST_WIDE_INT mh, ml;
4368 int pre_shift, lgup, post_shift;
4369 HOST_WIDE_INT d = INTVAL (op1);
4370
4371 if (d > 0)
4372 {
4373 /* We could just as easily deal with negative constants here,
4374 but it does not seem worth the trouble for GCC 2.6. */
4375 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4376 {
4377 pre_shift = floor_log2 (d);
4378 if (rem_flag)
4379 {
4380 unsigned HOST_WIDE_INT mask
4381 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4382 remainder = expand_binop
4383 (compute_mode, and_optab, op0,
4384 gen_int_mode (mask, compute_mode),
4385 remainder, 0, OPTAB_LIB_WIDEN);
4386 if (remainder)
4387 return gen_lowpart (mode, remainder);
4388 }
4389 quotient = expand_shift
4390 (RSHIFT_EXPR, compute_mode, op0,
4391 pre_shift, tquotient, 0);
4392 }
4393 else
4394 {
4395 rtx t1, t2, t3, t4;
4396
4397 mh = choose_multiplier (d, size, size - 1,
4398 &ml, &post_shift, &lgup);
4399 gcc_assert (!mh);
4400
4401 if (post_shift < BITS_PER_WORD
4402 && size - 1 < BITS_PER_WORD)
4403 {
4404 t1 = expand_shift
4405 (RSHIFT_EXPR, compute_mode, op0,
4406 size - 1, NULL_RTX, 0);
4407 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4408 NULL_RTX, 0, OPTAB_WIDEN);
4409 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4410 + shift_cost (speed, compute_mode, size - 1)
4411 + 2 * add_cost (speed, compute_mode));
4412 t3 = expmed_mult_highpart
4413 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4414 NULL_RTX, 1, max_cost - extra_cost);
4415 if (t3 != 0)
4416 {
4417 t4 = expand_shift
4418 (RSHIFT_EXPR, compute_mode, t3,
4419 post_shift, NULL_RTX, 1);
4420 quotient = expand_binop (compute_mode, xor_optab,
4421 t4, t1, tquotient, 0,
4422 OPTAB_WIDEN);
4423 }
4424 }
4425 }
4426 }
4427 else
4428 {
4429 rtx nsign, t1, t2, t3, t4;
4430 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4431 op0, constm1_rtx), NULL_RTX);
4432 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4433 0, OPTAB_WIDEN);
4434 nsign = expand_shift
4435 (RSHIFT_EXPR, compute_mode, t2,
4436 size - 1, NULL_RTX, 0);
4437 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4438 NULL_RTX);
4439 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4440 NULL_RTX, 0);
4441 if (t4)
4442 {
4443 rtx t5;
4444 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4445 NULL_RTX, 0);
4446 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4447 t4, t5),
4448 tquotient);
4449 }
4450 }
4451 }
4452
4453 if (quotient != 0)
4454 break;
4455 delete_insns_since (last);
4456
4457 /* Try using an instruction that produces both the quotient and
4458 remainder, using truncation. We can easily compensate the quotient
4459 or remainder to get floor rounding, once we have the remainder.
4460 Notice that we compute also the final remainder value here,
4461 and return the result right away. */
4462 if (target == 0 || GET_MODE (target) != compute_mode)
4463 target = gen_reg_rtx (compute_mode);
4464
4465 if (rem_flag)
4466 {
4467 remainder
4468 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4469 quotient = gen_reg_rtx (compute_mode);
4470 }
4471 else
4472 {
4473 quotient
4474 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4475 remainder = gen_reg_rtx (compute_mode);
4476 }
4477
4478 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4479 quotient, remainder, 0))
4480 {
4481 /* This could be computed with a branch-less sequence.
4482 Save that for later. */
4483 rtx tem;
4484 rtx label = gen_label_rtx ();
4485 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4486 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4487 NULL_RTX, 0, OPTAB_WIDEN);
4488 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4489 expand_dec (quotient, const1_rtx);
4490 expand_inc (remainder, op1);
4491 emit_label (label);
4492 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4493 }
4494
4495 /* No luck with division elimination or divmod. Have to do it
4496 by conditionally adjusting op0 *and* the result. */
4497 {
4498 rtx label1, label2, label3, label4, label5;
4499 rtx adjusted_op0;
4500 rtx tem;
4501
4502 quotient = gen_reg_rtx (compute_mode);
4503 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4504 label1 = gen_label_rtx ();
4505 label2 = gen_label_rtx ();
4506 label3 = gen_label_rtx ();
4507 label4 = gen_label_rtx ();
4508 label5 = gen_label_rtx ();
4509 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4510 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4511 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4512 quotient, 0, OPTAB_LIB_WIDEN);
4513 if (tem != quotient)
4514 emit_move_insn (quotient, tem);
4515 emit_jump_insn (gen_jump (label5));
4516 emit_barrier ();
4517 emit_label (label1);
4518 expand_inc (adjusted_op0, const1_rtx);
4519 emit_jump_insn (gen_jump (label4));
4520 emit_barrier ();
4521 emit_label (label2);
4522 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4523 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4524 quotient, 0, OPTAB_LIB_WIDEN);
4525 if (tem != quotient)
4526 emit_move_insn (quotient, tem);
4527 emit_jump_insn (gen_jump (label5));
4528 emit_barrier ();
4529 emit_label (label3);
4530 expand_dec (adjusted_op0, const1_rtx);
4531 emit_label (label4);
4532 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4533 quotient, 0, OPTAB_LIB_WIDEN);
4534 if (tem != quotient)
4535 emit_move_insn (quotient, tem);
4536 expand_dec (quotient, const1_rtx);
4537 emit_label (label5);
4538 }
4539 break;
4540
4541 case CEIL_DIV_EXPR:
4542 case CEIL_MOD_EXPR:
4543 if (unsignedp)
4544 {
4545 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4546 {
4547 rtx t1, t2, t3;
4548 unsigned HOST_WIDE_INT d = INTVAL (op1);
4549 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4550 floor_log2 (d), tquotient, 1);
4551 t2 = expand_binop (compute_mode, and_optab, op0,
4552 gen_int_mode (d - 1, compute_mode),
4553 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4554 t3 = gen_reg_rtx (compute_mode);
4555 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4556 compute_mode, 1, 1);
4557 if (t3 == 0)
4558 {
4559 rtx lab;
4560 lab = gen_label_rtx ();
4561 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4562 expand_inc (t1, const1_rtx);
4563 emit_label (lab);
4564 quotient = t1;
4565 }
4566 else
4567 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4568 t1, t3),
4569 tquotient);
4570 break;
4571 }
4572
4573 /* Try using an instruction that produces both the quotient and
4574 remainder, using truncation. We can easily compensate the
4575 quotient or remainder to get ceiling rounding, once we have the
4576 remainder. Notice that we compute also the final remainder
4577 value here, and return the result right away. */
4578 if (target == 0 || GET_MODE (target) != compute_mode)
4579 target = gen_reg_rtx (compute_mode);
4580
4581 if (rem_flag)
4582 {
4583 remainder = (REG_P (target)
4584 ? target : gen_reg_rtx (compute_mode));
4585 quotient = gen_reg_rtx (compute_mode);
4586 }
4587 else
4588 {
4589 quotient = (REG_P (target)
4590 ? target : gen_reg_rtx (compute_mode));
4591 remainder = gen_reg_rtx (compute_mode);
4592 }
4593
4594 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4595 remainder, 1))
4596 {
4597 /* This could be computed with a branch-less sequence.
4598 Save that for later. */
4599 rtx label = gen_label_rtx ();
4600 do_cmp_and_jump (remainder, const0_rtx, EQ,
4601 compute_mode, label);
4602 expand_inc (quotient, const1_rtx);
4603 expand_dec (remainder, op1);
4604 emit_label (label);
4605 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4606 }
4607
4608 /* No luck with division elimination or divmod. Have to do it
4609 by conditionally adjusting op0 *and* the result. */
4610 {
4611 rtx label1, label2;
4612 rtx adjusted_op0, tem;
4613
4614 quotient = gen_reg_rtx (compute_mode);
4615 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4616 label1 = gen_label_rtx ();
4617 label2 = gen_label_rtx ();
4618 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4619 compute_mode, label1);
4620 emit_move_insn (quotient, const0_rtx);
4621 emit_jump_insn (gen_jump (label2));
4622 emit_barrier ();
4623 emit_label (label1);
4624 expand_dec (adjusted_op0, const1_rtx);
4625 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4626 quotient, 1, OPTAB_LIB_WIDEN);
4627 if (tem != quotient)
4628 emit_move_insn (quotient, tem);
4629 expand_inc (quotient, const1_rtx);
4630 emit_label (label2);
4631 }
4632 }
4633 else /* signed */
4634 {
4635 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4636 && INTVAL (op1) >= 0)
4637 {
4638 /* This is extremely similar to the code for the unsigned case
4639 above. For 2.7 we should merge these variants, but for
4640 2.6.1 I don't want to touch the code for unsigned since that
4641 get used in C. The signed case will only be used by other
4642 languages (Ada). */
4643
4644 rtx t1, t2, t3;
4645 unsigned HOST_WIDE_INT d = INTVAL (op1);
4646 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4647 floor_log2 (d), tquotient, 0);
4648 t2 = expand_binop (compute_mode, and_optab, op0,
4649 gen_int_mode (d - 1, compute_mode),
4650 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4651 t3 = gen_reg_rtx (compute_mode);
4652 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4653 compute_mode, 1, 1);
4654 if (t3 == 0)
4655 {
4656 rtx lab;
4657 lab = gen_label_rtx ();
4658 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4659 expand_inc (t1, const1_rtx);
4660 emit_label (lab);
4661 quotient = t1;
4662 }
4663 else
4664 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4665 t1, t3),
4666 tquotient);
4667 break;
4668 }
4669
4670 /* Try using an instruction that produces both the quotient and
4671 remainder, using truncation. We can easily compensate the
4672 quotient or remainder to get ceiling rounding, once we have the
4673 remainder. Notice that we compute also the final remainder
4674 value here, and return the result right away. */
4675 if (target == 0 || GET_MODE (target) != compute_mode)
4676 target = gen_reg_rtx (compute_mode);
4677 if (rem_flag)
4678 {
4679 remainder= (REG_P (target)
4680 ? target : gen_reg_rtx (compute_mode));
4681 quotient = gen_reg_rtx (compute_mode);
4682 }
4683 else
4684 {
4685 quotient = (REG_P (target)
4686 ? target : gen_reg_rtx (compute_mode));
4687 remainder = gen_reg_rtx (compute_mode);
4688 }
4689
4690 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4691 remainder, 0))
4692 {
4693 /* This could be computed with a branch-less sequence.
4694 Save that for later. */
4695 rtx tem;
4696 rtx label = gen_label_rtx ();
4697 do_cmp_and_jump (remainder, const0_rtx, EQ,
4698 compute_mode, label);
4699 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4700 NULL_RTX, 0, OPTAB_WIDEN);
4701 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4702 expand_inc (quotient, const1_rtx);
4703 expand_dec (remainder, op1);
4704 emit_label (label);
4705 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4706 }
4707
4708 /* No luck with division elimination or divmod. Have to do it
4709 by conditionally adjusting op0 *and* the result. */
4710 {
4711 rtx label1, label2, label3, label4, label5;
4712 rtx adjusted_op0;
4713 rtx tem;
4714
4715 quotient = gen_reg_rtx (compute_mode);
4716 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4717 label1 = gen_label_rtx ();
4718 label2 = gen_label_rtx ();
4719 label3 = gen_label_rtx ();
4720 label4 = gen_label_rtx ();
4721 label5 = gen_label_rtx ();
4722 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4723 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4724 compute_mode, label1);
4725 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4726 quotient, 0, OPTAB_LIB_WIDEN);
4727 if (tem != quotient)
4728 emit_move_insn (quotient, tem);
4729 emit_jump_insn (gen_jump (label5));
4730 emit_barrier ();
4731 emit_label (label1);
4732 expand_dec (adjusted_op0, const1_rtx);
4733 emit_jump_insn (gen_jump (label4));
4734 emit_barrier ();
4735 emit_label (label2);
4736 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4737 compute_mode, label3);
4738 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4739 quotient, 0, OPTAB_LIB_WIDEN);
4740 if (tem != quotient)
4741 emit_move_insn (quotient, tem);
4742 emit_jump_insn (gen_jump (label5));
4743 emit_barrier ();
4744 emit_label (label3);
4745 expand_inc (adjusted_op0, const1_rtx);
4746 emit_label (label4);
4747 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4748 quotient, 0, OPTAB_LIB_WIDEN);
4749 if (tem != quotient)
4750 emit_move_insn (quotient, tem);
4751 expand_inc (quotient, const1_rtx);
4752 emit_label (label5);
4753 }
4754 }
4755 break;
4756
4757 case EXACT_DIV_EXPR:
4758 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4759 {
4760 HOST_WIDE_INT d = INTVAL (op1);
4761 unsigned HOST_WIDE_INT ml;
4762 int pre_shift;
4763 rtx t1;
4764
4765 pre_shift = floor_log2 (d & -d);
4766 ml = invert_mod2n (d >> pre_shift, size);
4767 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4768 pre_shift, NULL_RTX, unsignedp);
4769 quotient = expand_mult (compute_mode, t1,
4770 gen_int_mode (ml, compute_mode),
4771 NULL_RTX, 1);
4772
4773 insn = get_last_insn ();
4774 set_dst_reg_note (insn, REG_EQUAL,
4775 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4776 compute_mode, op0, op1),
4777 quotient);
4778 }
4779 break;
4780
4781 case ROUND_DIV_EXPR:
4782 case ROUND_MOD_EXPR:
4783 if (unsignedp)
4784 {
4785 rtx tem;
4786 rtx label;
4787 label = gen_label_rtx ();
4788 quotient = gen_reg_rtx (compute_mode);
4789 remainder = gen_reg_rtx (compute_mode);
4790 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4791 {
4792 rtx tem;
4793 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4794 quotient, 1, OPTAB_LIB_WIDEN);
4795 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4796 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4797 remainder, 1, OPTAB_LIB_WIDEN);
4798 }
4799 tem = plus_constant (compute_mode, op1, -1);
4800 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4801 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4802 expand_inc (quotient, const1_rtx);
4803 expand_dec (remainder, op1);
4804 emit_label (label);
4805 }
4806 else
4807 {
4808 rtx abs_rem, abs_op1, tem, mask;
4809 rtx label;
4810 label = gen_label_rtx ();
4811 quotient = gen_reg_rtx (compute_mode);
4812 remainder = gen_reg_rtx (compute_mode);
4813 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4814 {
4815 rtx tem;
4816 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4817 quotient, 0, OPTAB_LIB_WIDEN);
4818 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4819 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4820 remainder, 0, OPTAB_LIB_WIDEN);
4821 }
4822 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4823 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4824 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4825 1, NULL_RTX, 1);
4826 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4827 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4828 NULL_RTX, 0, OPTAB_WIDEN);
4829 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4830 size - 1, NULL_RTX, 0);
4831 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4832 NULL_RTX, 0, OPTAB_WIDEN);
4833 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4834 NULL_RTX, 0, OPTAB_WIDEN);
4835 expand_inc (quotient, tem);
4836 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4837 NULL_RTX, 0, OPTAB_WIDEN);
4838 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4839 NULL_RTX, 0, OPTAB_WIDEN);
4840 expand_dec (remainder, tem);
4841 emit_label (label);
4842 }
4843 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4844
4845 default:
4846 gcc_unreachable ();
4847 }
4848
4849 if (quotient == 0)
4850 {
4851 if (target && GET_MODE (target) != compute_mode)
4852 target = 0;
4853
4854 if (rem_flag)
4855 {
4856 /* Try to produce the remainder without producing the quotient.
4857 If we seem to have a divmod pattern that does not require widening,
4858 don't try widening here. We should really have a WIDEN argument
4859 to expand_twoval_binop, since what we'd really like to do here is
4860 1) try a mod insn in compute_mode
4861 2) try a divmod insn in compute_mode
4862 3) try a div insn in compute_mode and multiply-subtract to get
4863 remainder
4864 4) try the same things with widening allowed. */
4865 remainder
4866 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4867 op0, op1, target,
4868 unsignedp,
4869 ((optab_handler (optab2, compute_mode)
4870 != CODE_FOR_nothing)
4871 ? OPTAB_DIRECT : OPTAB_WIDEN));
4872 if (remainder == 0)
4873 {
4874 /* No luck there. Can we do remainder and divide at once
4875 without a library call? */
4876 remainder = gen_reg_rtx (compute_mode);
4877 if (! expand_twoval_binop ((unsignedp
4878 ? udivmod_optab
4879 : sdivmod_optab),
4880 op0, op1,
4881 NULL_RTX, remainder, unsignedp))
4882 remainder = 0;
4883 }
4884
4885 if (remainder)
4886 return gen_lowpart (mode, remainder);
4887 }
4888
4889 /* Produce the quotient. Try a quotient insn, but not a library call.
4890 If we have a divmod in this mode, use it in preference to widening
4891 the div (for this test we assume it will not fail). Note that optab2
4892 is set to the one of the two optabs that the call below will use. */
4893 quotient
4894 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4895 op0, op1, rem_flag ? NULL_RTX : target,
4896 unsignedp,
4897 ((optab_handler (optab2, compute_mode)
4898 != CODE_FOR_nothing)
4899 ? OPTAB_DIRECT : OPTAB_WIDEN));
4900
4901 if (quotient == 0)
4902 {
4903 /* No luck there. Try a quotient-and-remainder insn,
4904 keeping the quotient alone. */
4905 quotient = gen_reg_rtx (compute_mode);
4906 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4907 op0, op1,
4908 quotient, NULL_RTX, unsignedp))
4909 {
4910 quotient = 0;
4911 if (! rem_flag)
4912 /* Still no luck. If we are not computing the remainder,
4913 use a library call for the quotient. */
4914 quotient = sign_expand_binop (compute_mode,
4915 udiv_optab, sdiv_optab,
4916 op0, op1, target,
4917 unsignedp, OPTAB_LIB_WIDEN);
4918 }
4919 }
4920 }
4921
4922 if (rem_flag)
4923 {
4924 if (target && GET_MODE (target) != compute_mode)
4925 target = 0;
4926
4927 if (quotient == 0)
4928 {
4929 /* No divide instruction either. Use library for remainder. */
4930 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4931 op0, op1, target,
4932 unsignedp, OPTAB_LIB_WIDEN);
4933 /* No remainder function. Try a quotient-and-remainder
4934 function, keeping the remainder. */
4935 if (!remainder)
4936 {
4937 remainder = gen_reg_rtx (compute_mode);
4938 if (!expand_twoval_binop_libfunc
4939 (unsignedp ? udivmod_optab : sdivmod_optab,
4940 op0, op1,
4941 NULL_RTX, remainder,
4942 unsignedp ? UMOD : MOD))
4943 remainder = NULL_RTX;
4944 }
4945 }
4946 else
4947 {
4948 /* We divided. Now finish doing X - Y * (X / Y). */
4949 remainder = expand_mult (compute_mode, quotient, op1,
4950 NULL_RTX, unsignedp);
4951 remainder = expand_binop (compute_mode, sub_optab, op0,
4952 remainder, target, unsignedp,
4953 OPTAB_LIB_WIDEN);
4954 }
4955 }
4956
4957 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4958 }
4959 \f
4960 /* Return a tree node with data type TYPE, describing the value of X.
4961 Usually this is an VAR_DECL, if there is no obvious better choice.
4962 X may be an expression, however we only support those expressions
4963 generated by loop.c. */
4964
4965 tree
4966 make_tree (tree type, rtx x)
4967 {
4968 tree t;
4969
4970 switch (GET_CODE (x))
4971 {
4972 case CONST_INT:
4973 case CONST_WIDE_INT:
4974 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4975 return t;
4976
4977 case CONST_DOUBLE:
4978 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4979 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4980 t = wide_int_to_tree (type,
4981 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4982 HOST_BITS_PER_WIDE_INT * 2));
4983 else
4984 {
4985 REAL_VALUE_TYPE d;
4986
4987 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4988 t = build_real (type, d);
4989 }
4990
4991 return t;
4992
4993 case CONST_VECTOR:
4994 {
4995 int units = CONST_VECTOR_NUNITS (x);
4996 tree itype = TREE_TYPE (type);
4997 tree *elts;
4998 int i;
4999
5000 /* Build a tree with vector elements. */
5001 elts = XALLOCAVEC (tree, units);
5002 for (i = units - 1; i >= 0; --i)
5003 {
5004 rtx elt = CONST_VECTOR_ELT (x, i);
5005 elts[i] = make_tree (itype, elt);
5006 }
5007
5008 return build_vector (type, elts);
5009 }
5010
5011 case PLUS:
5012 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5013 make_tree (type, XEXP (x, 1)));
5014
5015 case MINUS:
5016 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5017 make_tree (type, XEXP (x, 1)));
5018
5019 case NEG:
5020 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5021
5022 case MULT:
5023 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5024 make_tree (type, XEXP (x, 1)));
5025
5026 case ASHIFT:
5027 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5028 make_tree (type, XEXP (x, 1)));
5029
5030 case LSHIFTRT:
5031 t = unsigned_type_for (type);
5032 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5033 make_tree (t, XEXP (x, 0)),
5034 make_tree (type, XEXP (x, 1))));
5035
5036 case ASHIFTRT:
5037 t = signed_type_for (type);
5038 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5039 make_tree (t, XEXP (x, 0)),
5040 make_tree (type, XEXP (x, 1))));
5041
5042 case DIV:
5043 if (TREE_CODE (type) != REAL_TYPE)
5044 t = signed_type_for (type);
5045 else
5046 t = type;
5047
5048 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5049 make_tree (t, XEXP (x, 0)),
5050 make_tree (t, XEXP (x, 1))));
5051 case UDIV:
5052 t = unsigned_type_for (type);
5053 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5054 make_tree (t, XEXP (x, 0)),
5055 make_tree (t, XEXP (x, 1))));
5056
5057 case SIGN_EXTEND:
5058 case ZERO_EXTEND:
5059 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5060 GET_CODE (x) == ZERO_EXTEND);
5061 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5062
5063 case CONST:
5064 return make_tree (type, XEXP (x, 0));
5065
5066 case SYMBOL_REF:
5067 t = SYMBOL_REF_DECL (x);
5068 if (t)
5069 return fold_convert (type, build_fold_addr_expr (t));
5070 /* else fall through. */
5071
5072 default:
5073 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5074
5075 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5076 address mode to pointer mode. */
5077 if (POINTER_TYPE_P (type))
5078 x = convert_memory_address_addr_space
5079 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5080
5081 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5082 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5083 t->decl_with_rtl.rtl = x;
5084
5085 return t;
5086 }
5087 }
5088 \f
5089 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5090 and returning TARGET.
5091
5092 If TARGET is 0, a pseudo-register or constant is returned. */
5093
5094 rtx
5095 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5096 {
5097 rtx tem = 0;
5098
5099 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5100 tem = simplify_binary_operation (AND, mode, op0, op1);
5101 if (tem == 0)
5102 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5103
5104 if (target == 0)
5105 target = tem;
5106 else if (tem != target)
5107 emit_move_insn (target, tem);
5108 return target;
5109 }
5110
5111 /* Helper function for emit_store_flag. */
5112 static rtx
5113 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5114 enum machine_mode mode, enum machine_mode compare_mode,
5115 int unsignedp, rtx x, rtx y, int normalizep,
5116 enum machine_mode target_mode)
5117 {
5118 struct expand_operand ops[4];
5119 rtx op0, last, comparison, subtarget;
5120 enum machine_mode result_mode = targetm.cstore_mode (icode);
5121
5122 last = get_last_insn ();
5123 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5124 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5125 if (!x || !y)
5126 {
5127 delete_insns_since (last);
5128 return NULL_RTX;
5129 }
5130
5131 if (target_mode == VOIDmode)
5132 target_mode = result_mode;
5133 if (!target)
5134 target = gen_reg_rtx (target_mode);
5135
5136 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5137
5138 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5139 create_fixed_operand (&ops[1], comparison);
5140 create_fixed_operand (&ops[2], x);
5141 create_fixed_operand (&ops[3], y);
5142 if (!maybe_expand_insn (icode, 4, ops))
5143 {
5144 delete_insns_since (last);
5145 return NULL_RTX;
5146 }
5147 subtarget = ops[0].value;
5148
5149 /* If we are converting to a wider mode, first convert to
5150 TARGET_MODE, then normalize. This produces better combining
5151 opportunities on machines that have a SIGN_EXTRACT when we are
5152 testing a single bit. This mostly benefits the 68k.
5153
5154 If STORE_FLAG_VALUE does not have the sign bit set when
5155 interpreted in MODE, we can do this conversion as unsigned, which
5156 is usually more efficient. */
5157 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5158 {
5159 convert_move (target, subtarget,
5160 val_signbit_known_clear_p (result_mode,
5161 STORE_FLAG_VALUE));
5162 op0 = target;
5163 result_mode = target_mode;
5164 }
5165 else
5166 op0 = subtarget;
5167
5168 /* If we want to keep subexpressions around, don't reuse our last
5169 target. */
5170 if (optimize)
5171 subtarget = 0;
5172
5173 /* Now normalize to the proper value in MODE. Sometimes we don't
5174 have to do anything. */
5175 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5176 ;
5177 /* STORE_FLAG_VALUE might be the most negative number, so write
5178 the comparison this way to avoid a compiler-time warning. */
5179 else if (- normalizep == STORE_FLAG_VALUE)
5180 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5181
5182 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5183 it hard to use a value of just the sign bit due to ANSI integer
5184 constant typing rules. */
5185 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5186 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5187 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5188 normalizep == 1);
5189 else
5190 {
5191 gcc_assert (STORE_FLAG_VALUE & 1);
5192
5193 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5194 if (normalizep == -1)
5195 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5196 }
5197
5198 /* If we were converting to a smaller mode, do the conversion now. */
5199 if (target_mode != result_mode)
5200 {
5201 convert_move (target, op0, 0);
5202 return target;
5203 }
5204 else
5205 return op0;
5206 }
5207
5208
5209 /* A subroutine of emit_store_flag only including "tricks" that do not
5210 need a recursive call. These are kept separate to avoid infinite
5211 loops. */
5212
5213 static rtx
5214 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5215 enum machine_mode mode, int unsignedp, int normalizep,
5216 enum machine_mode target_mode)
5217 {
5218 rtx subtarget;
5219 enum insn_code icode;
5220 enum machine_mode compare_mode;
5221 enum mode_class mclass;
5222 enum rtx_code scode;
5223 rtx tem;
5224
5225 if (unsignedp)
5226 code = unsigned_condition (code);
5227 scode = swap_condition (code);
5228
5229 /* If one operand is constant, make it the second one. Only do this
5230 if the other operand is not constant as well. */
5231
5232 if (swap_commutative_operands_p (op0, op1))
5233 {
5234 tem = op0;
5235 op0 = op1;
5236 op1 = tem;
5237 code = swap_condition (code);
5238 }
5239
5240 if (mode == VOIDmode)
5241 mode = GET_MODE (op0);
5242
5243 /* For some comparisons with 1 and -1, we can convert this to
5244 comparisons with zero. This will often produce more opportunities for
5245 store-flag insns. */
5246
5247 switch (code)
5248 {
5249 case LT:
5250 if (op1 == const1_rtx)
5251 op1 = const0_rtx, code = LE;
5252 break;
5253 case LE:
5254 if (op1 == constm1_rtx)
5255 op1 = const0_rtx, code = LT;
5256 break;
5257 case GE:
5258 if (op1 == const1_rtx)
5259 op1 = const0_rtx, code = GT;
5260 break;
5261 case GT:
5262 if (op1 == constm1_rtx)
5263 op1 = const0_rtx, code = GE;
5264 break;
5265 case GEU:
5266 if (op1 == const1_rtx)
5267 op1 = const0_rtx, code = NE;
5268 break;
5269 case LTU:
5270 if (op1 == const1_rtx)
5271 op1 = const0_rtx, code = EQ;
5272 break;
5273 default:
5274 break;
5275 }
5276
5277 /* If we are comparing a double-word integer with zero or -1, we can
5278 convert the comparison into one involving a single word. */
5279 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5280 && GET_MODE_CLASS (mode) == MODE_INT
5281 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5282 {
5283 if ((code == EQ || code == NE)
5284 && (op1 == const0_rtx || op1 == constm1_rtx))
5285 {
5286 rtx op00, op01;
5287
5288 /* Do a logical OR or AND of the two words and compare the
5289 result. */
5290 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5291 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5292 tem = expand_binop (word_mode,
5293 op1 == const0_rtx ? ior_optab : and_optab,
5294 op00, op01, NULL_RTX, unsignedp,
5295 OPTAB_DIRECT);
5296
5297 if (tem != 0)
5298 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5299 unsignedp, normalizep);
5300 }
5301 else if ((code == LT || code == GE) && op1 == const0_rtx)
5302 {
5303 rtx op0h;
5304
5305 /* If testing the sign bit, can just test on high word. */
5306 op0h = simplify_gen_subreg (word_mode, op0, mode,
5307 subreg_highpart_offset (word_mode,
5308 mode));
5309 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5310 unsignedp, normalizep);
5311 }
5312 else
5313 tem = NULL_RTX;
5314
5315 if (tem)
5316 {
5317 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5318 return tem;
5319 if (!target)
5320 target = gen_reg_rtx (target_mode);
5321
5322 convert_move (target, tem,
5323 !val_signbit_known_set_p (word_mode,
5324 (normalizep ? normalizep
5325 : STORE_FLAG_VALUE)));
5326 return target;
5327 }
5328 }
5329
5330 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5331 complement of A (for GE) and shifting the sign bit to the low bit. */
5332 if (op1 == const0_rtx && (code == LT || code == GE)
5333 && GET_MODE_CLASS (mode) == MODE_INT
5334 && (normalizep || STORE_FLAG_VALUE == 1
5335 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5336 {
5337 subtarget = target;
5338
5339 if (!target)
5340 target_mode = mode;
5341
5342 /* If the result is to be wider than OP0, it is best to convert it
5343 first. If it is to be narrower, it is *incorrect* to convert it
5344 first. */
5345 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5346 {
5347 op0 = convert_modes (target_mode, mode, op0, 0);
5348 mode = target_mode;
5349 }
5350
5351 if (target_mode != mode)
5352 subtarget = 0;
5353
5354 if (code == GE)
5355 op0 = expand_unop (mode, one_cmpl_optab, op0,
5356 ((STORE_FLAG_VALUE == 1 || normalizep)
5357 ? 0 : subtarget), 0);
5358
5359 if (STORE_FLAG_VALUE == 1 || normalizep)
5360 /* If we are supposed to produce a 0/1 value, we want to do
5361 a logical shift from the sign bit to the low-order bit; for
5362 a -1/0 value, we do an arithmetic shift. */
5363 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5364 GET_MODE_BITSIZE (mode) - 1,
5365 subtarget, normalizep != -1);
5366
5367 if (mode != target_mode)
5368 op0 = convert_modes (target_mode, mode, op0, 0);
5369
5370 return op0;
5371 }
5372
5373 mclass = GET_MODE_CLASS (mode);
5374 for (compare_mode = mode; compare_mode != VOIDmode;
5375 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5376 {
5377 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5378 icode = optab_handler (cstore_optab, optab_mode);
5379 if (icode != CODE_FOR_nothing)
5380 {
5381 do_pending_stack_adjust ();
5382 tem = emit_cstore (target, icode, code, mode, compare_mode,
5383 unsignedp, op0, op1, normalizep, target_mode);
5384 if (tem)
5385 return tem;
5386
5387 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5388 {
5389 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5390 unsignedp, op1, op0, normalizep, target_mode);
5391 if (tem)
5392 return tem;
5393 }
5394 break;
5395 }
5396 }
5397
5398 return 0;
5399 }
5400
5401 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5402 and storing in TARGET. Normally return TARGET.
5403 Return 0 if that cannot be done.
5404
5405 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5406 it is VOIDmode, they cannot both be CONST_INT.
5407
5408 UNSIGNEDP is for the case where we have to widen the operands
5409 to perform the operation. It says to use zero-extension.
5410
5411 NORMALIZEP is 1 if we should convert the result to be either zero
5412 or one. Normalize is -1 if we should convert the result to be
5413 either zero or -1. If NORMALIZEP is zero, the result will be left
5414 "raw" out of the scc insn. */
5415
5416 rtx
5417 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5418 enum machine_mode mode, int unsignedp, int normalizep)
5419 {
5420 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5421 enum rtx_code rcode;
5422 rtx subtarget;
5423 rtx tem, last, trueval;
5424
5425 /* If we compare constants, we shouldn't use a store-flag operation,
5426 but a constant load. We can get there via the vanilla route that
5427 usually generates a compare-branch sequence, but will in this case
5428 fold the comparison to a constant, and thus elide the branch. */
5429 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5430 return NULL_RTX;
5431
5432 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5433 target_mode);
5434 if (tem)
5435 return tem;
5436
5437 /* If we reached here, we can't do this with a scc insn, however there
5438 are some comparisons that can be done in other ways. Don't do any
5439 of these cases if branches are very cheap. */
5440 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5441 return 0;
5442
5443 /* See what we need to return. We can only return a 1, -1, or the
5444 sign bit. */
5445
5446 if (normalizep == 0)
5447 {
5448 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5449 normalizep = STORE_FLAG_VALUE;
5450
5451 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5452 ;
5453 else
5454 return 0;
5455 }
5456
5457 last = get_last_insn ();
5458
5459 /* If optimizing, use different pseudo registers for each insn, instead
5460 of reusing the same pseudo. This leads to better CSE, but slows
5461 down the compiler, since there are more pseudos */
5462 subtarget = (!optimize
5463 && (target_mode == mode)) ? target : NULL_RTX;
5464 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5465
5466 /* For floating-point comparisons, try the reverse comparison or try
5467 changing the "orderedness" of the comparison. */
5468 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5469 {
5470 enum rtx_code first_code;
5471 bool and_them;
5472
5473 rcode = reverse_condition_maybe_unordered (code);
5474 if (can_compare_p (rcode, mode, ccp_store_flag)
5475 && (code == ORDERED || code == UNORDERED
5476 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5477 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5478 {
5479 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5480 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5481
5482 /* For the reverse comparison, use either an addition or a XOR. */
5483 if (want_add
5484 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5485 optimize_insn_for_speed_p ()) == 0)
5486 {
5487 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5488 STORE_FLAG_VALUE, target_mode);
5489 if (tem)
5490 return expand_binop (target_mode, add_optab, tem,
5491 gen_int_mode (normalizep, target_mode),
5492 target, 0, OPTAB_WIDEN);
5493 }
5494 else if (!want_add
5495 && rtx_cost (trueval, XOR, 1,
5496 optimize_insn_for_speed_p ()) == 0)
5497 {
5498 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5499 normalizep, target_mode);
5500 if (tem)
5501 return expand_binop (target_mode, xor_optab, tem, trueval,
5502 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5503 }
5504 }
5505
5506 delete_insns_since (last);
5507
5508 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5509 if (code == ORDERED || code == UNORDERED)
5510 return 0;
5511
5512 and_them = split_comparison (code, mode, &first_code, &code);
5513
5514 /* If there are no NaNs, the first comparison should always fall through.
5515 Effectively change the comparison to the other one. */
5516 if (!HONOR_NANS (mode))
5517 {
5518 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5519 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5520 target_mode);
5521 }
5522
5523 #ifdef HAVE_conditional_move
5524 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5525 conditional move. */
5526 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5527 normalizep, target_mode);
5528 if (tem == 0)
5529 return 0;
5530
5531 if (and_them)
5532 tem = emit_conditional_move (target, code, op0, op1, mode,
5533 tem, const0_rtx, GET_MODE (tem), 0);
5534 else
5535 tem = emit_conditional_move (target, code, op0, op1, mode,
5536 trueval, tem, GET_MODE (tem), 0);
5537
5538 if (tem == 0)
5539 delete_insns_since (last);
5540 return tem;
5541 #else
5542 return 0;
5543 #endif
5544 }
5545
5546 /* The remaining tricks only apply to integer comparisons. */
5547
5548 if (GET_MODE_CLASS (mode) != MODE_INT)
5549 return 0;
5550
5551 /* If this is an equality comparison of integers, we can try to exclusive-or
5552 (or subtract) the two operands and use a recursive call to try the
5553 comparison with zero. Don't do any of these cases if branches are
5554 very cheap. */
5555
5556 if ((code == EQ || code == NE) && op1 != const0_rtx)
5557 {
5558 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5559 OPTAB_WIDEN);
5560
5561 if (tem == 0)
5562 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5563 OPTAB_WIDEN);
5564 if (tem != 0)
5565 tem = emit_store_flag (target, code, tem, const0_rtx,
5566 mode, unsignedp, normalizep);
5567 if (tem != 0)
5568 return tem;
5569
5570 delete_insns_since (last);
5571 }
5572
5573 /* For integer comparisons, try the reverse comparison. However, for
5574 small X and if we'd have anyway to extend, implementing "X != 0"
5575 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5576 rcode = reverse_condition (code);
5577 if (can_compare_p (rcode, mode, ccp_store_flag)
5578 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5579 && code == NE
5580 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5581 && op1 == const0_rtx))
5582 {
5583 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5584 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5585
5586 /* Again, for the reverse comparison, use either an addition or a XOR. */
5587 if (want_add
5588 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5589 optimize_insn_for_speed_p ()) == 0)
5590 {
5591 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5592 STORE_FLAG_VALUE, target_mode);
5593 if (tem != 0)
5594 tem = expand_binop (target_mode, add_optab, tem,
5595 gen_int_mode (normalizep, target_mode),
5596 target, 0, OPTAB_WIDEN);
5597 }
5598 else if (!want_add
5599 && rtx_cost (trueval, XOR, 1,
5600 optimize_insn_for_speed_p ()) == 0)
5601 {
5602 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5603 normalizep, target_mode);
5604 if (tem != 0)
5605 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5606 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5607 }
5608
5609 if (tem != 0)
5610 return tem;
5611 delete_insns_since (last);
5612 }
5613
5614 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5615 the constant zero. Reject all other comparisons at this point. Only
5616 do LE and GT if branches are expensive since they are expensive on
5617 2-operand machines. */
5618
5619 if (op1 != const0_rtx
5620 || (code != EQ && code != NE
5621 && (BRANCH_COST (optimize_insn_for_speed_p (),
5622 false) <= 1 || (code != LE && code != GT))))
5623 return 0;
5624
5625 /* Try to put the result of the comparison in the sign bit. Assume we can't
5626 do the necessary operation below. */
5627
5628 tem = 0;
5629
5630 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5631 the sign bit set. */
5632
5633 if (code == LE)
5634 {
5635 /* This is destructive, so SUBTARGET can't be OP0. */
5636 if (rtx_equal_p (subtarget, op0))
5637 subtarget = 0;
5638
5639 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5640 OPTAB_WIDEN);
5641 if (tem)
5642 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5643 OPTAB_WIDEN);
5644 }
5645
5646 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5647 number of bits in the mode of OP0, minus one. */
5648
5649 if (code == GT)
5650 {
5651 if (rtx_equal_p (subtarget, op0))
5652 subtarget = 0;
5653
5654 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5655 GET_MODE_BITSIZE (mode) - 1,
5656 subtarget, 0);
5657 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5658 OPTAB_WIDEN);
5659 }
5660
5661 if (code == EQ || code == NE)
5662 {
5663 /* For EQ or NE, one way to do the comparison is to apply an operation
5664 that converts the operand into a positive number if it is nonzero
5665 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5666 for NE we negate. This puts the result in the sign bit. Then we
5667 normalize with a shift, if needed.
5668
5669 Two operations that can do the above actions are ABS and FFS, so try
5670 them. If that doesn't work, and MODE is smaller than a full word,
5671 we can use zero-extension to the wider mode (an unsigned conversion)
5672 as the operation. */
5673
5674 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5675 that is compensated by the subsequent overflow when subtracting
5676 one / negating. */
5677
5678 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5679 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5680 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5681 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5682 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5683 {
5684 tem = convert_modes (word_mode, mode, op0, 1);
5685 mode = word_mode;
5686 }
5687
5688 if (tem != 0)
5689 {
5690 if (code == EQ)
5691 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5692 0, OPTAB_WIDEN);
5693 else
5694 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5695 }
5696
5697 /* If we couldn't do it that way, for NE we can "or" the two's complement
5698 of the value with itself. For EQ, we take the one's complement of
5699 that "or", which is an extra insn, so we only handle EQ if branches
5700 are expensive. */
5701
5702 if (tem == 0
5703 && (code == NE
5704 || BRANCH_COST (optimize_insn_for_speed_p (),
5705 false) > 1))
5706 {
5707 if (rtx_equal_p (subtarget, op0))
5708 subtarget = 0;
5709
5710 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5711 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5712 OPTAB_WIDEN);
5713
5714 if (tem && code == EQ)
5715 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5716 }
5717 }
5718
5719 if (tem && normalizep)
5720 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5721 GET_MODE_BITSIZE (mode) - 1,
5722 subtarget, normalizep == 1);
5723
5724 if (tem)
5725 {
5726 if (!target)
5727 ;
5728 else if (GET_MODE (tem) != target_mode)
5729 {
5730 convert_move (target, tem, 0);
5731 tem = target;
5732 }
5733 else if (!subtarget)
5734 {
5735 emit_move_insn (target, tem);
5736 tem = target;
5737 }
5738 }
5739 else
5740 delete_insns_since (last);
5741
5742 return tem;
5743 }
5744
5745 /* Like emit_store_flag, but always succeeds. */
5746
5747 rtx
5748 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5749 enum machine_mode mode, int unsignedp, int normalizep)
5750 {
5751 rtx tem, label;
5752 rtx trueval, falseval;
5753
5754 /* First see if emit_store_flag can do the job. */
5755 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5756 if (tem != 0)
5757 return tem;
5758
5759 if (!target)
5760 target = gen_reg_rtx (word_mode);
5761
5762 /* If this failed, we have to do this with set/compare/jump/set code.
5763 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5764 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5765 if (code == NE
5766 && GET_MODE_CLASS (mode) == MODE_INT
5767 && REG_P (target)
5768 && op0 == target
5769 && op1 == const0_rtx)
5770 {
5771 label = gen_label_rtx ();
5772 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5773 mode, NULL_RTX, NULL_RTX, label, -1);
5774 emit_move_insn (target, trueval);
5775 emit_label (label);
5776 return target;
5777 }
5778
5779 if (!REG_P (target)
5780 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5781 target = gen_reg_rtx (GET_MODE (target));
5782
5783 /* Jump in the right direction if the target cannot implement CODE
5784 but can jump on its reverse condition. */
5785 falseval = const0_rtx;
5786 if (! can_compare_p (code, mode, ccp_jump)
5787 && (! FLOAT_MODE_P (mode)
5788 || code == ORDERED || code == UNORDERED
5789 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5790 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5791 {
5792 enum rtx_code rcode;
5793 if (FLOAT_MODE_P (mode))
5794 rcode = reverse_condition_maybe_unordered (code);
5795 else
5796 rcode = reverse_condition (code);
5797
5798 /* Canonicalize to UNORDERED for the libcall. */
5799 if (can_compare_p (rcode, mode, ccp_jump)
5800 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5801 {
5802 falseval = trueval;
5803 trueval = const0_rtx;
5804 code = rcode;
5805 }
5806 }
5807
5808 emit_move_insn (target, trueval);
5809 label = gen_label_rtx ();
5810 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5811 NULL_RTX, label, -1);
5812
5813 emit_move_insn (target, falseval);
5814 emit_label (label);
5815
5816 return target;
5817 }
5818 \f
5819 /* Perform possibly multi-word comparison and conditional jump to LABEL
5820 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5821 now a thin wrapper around do_compare_rtx_and_jump. */
5822
5823 static void
5824 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5825 rtx label)
5826 {
5827 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5828 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5829 NULL_RTX, NULL_RTX, label, -1);
5830 }